Timusgeorge commited on
Commit
3e795c9
·
verified ·
1 Parent(s): 15706a5

Remove $0 compute — app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -70,7 +70,7 @@ def make_reward_plot():
70
 
71
  ax.set_xlabel('Training Step', color='#8b949e', fontsize=11)
72
  ax.set_ylabel('Mean Reward', color='#8b949e', fontsize=11)
73
- ax.set_title('GRPO 200-Step Reward Curve — Qwen2.5-3B-Instruct | 4-bit LoRA | Tesla T4 | $0 Compute',
74
  color='#f0f6fc', fontsize=12, fontweight='bold', pad=10)
75
  ax.legend(fontsize=9, facecolor='#161b22', edgecolor='#30363d', labelcolor='#c9d1d9')
76
  ax.set_xlim(0.5, 200.5)
@@ -153,7 +153,7 @@ def build_app():
153
  <h1>🩺 SynthAudit.Env</h1>
154
  <p>Multi-Agent Clinical AI Oversight — 200-Step GRPO Reinforcement Learning</p>
155
  <p style="margin-top: 8px; color: #58a6ff !important; font-size: 0.95em;">
156
- AI that watches AI • $0 compute • 283% improvement over baseline
157
  </p>
158
  <p style="margin-top: 14px;">
159
  <a href="https://github.com/sumitsaraswat362/SynthAudit.Env" target="_blank" style="color: #58a6ff; text-decoration: none; margin: 0 10px;">📦 GitHub</a> |
@@ -167,7 +167,7 @@ def build_app():
167
  gr.HTML('<div class="stat-card"><h3>+283%</h3><p>Improvement Over Base</p></div>')
168
  gr.HTML('<div class="stat-card"><h3>0.506</h3><p>Peak GRPO Reward</p></div>')
169
  gr.HTML('<div class="stat-card"><h3>200</h3><p>Training Steps</p></div>')
170
- gr.HTML('<div class="stat-card"><h3>$0</h3><p>Compute Cost</p></div>')
171
  gr.HTML('<div class="stat-card"><h3>4×</h3><p>More Errors Caught</p></div>')
172
 
173
  with gr.Tabs():
@@ -185,7 +185,7 @@ def build_app():
185
  | **Quantization** | 4-bit QLoRA (Unsloth) | | **Algorithm** | GRPO (TRL) |
186
  | **GPU** | Tesla T4 (free Colab) | | **Training Time** | 2h 20m |
187
  | **Steps** | 200 | | **Peak Reward** | **0.506** (Step 157) |
188
- | **Compute Cost** | **$0** | | **Final Reward** | 0.346 |
189
 
190
  ### What The Model Learned (Zero Supervised Data)
191
 
@@ -422,7 +422,7 @@ An **Oversight Agent** trained with GRPO learns to catch errors from an **Actor
422
  ### Key Results
423
  - **283% improvement** over untrained baseline
424
  - **4× more clinical errors** correctly detected
425
- - **$0 compute cost** — trained on free Google Colab T4
426
  - **200 GRPO steps** in 2 hours 20 minutes
427
 
428
  ### Links
 
70
 
71
  ax.set_xlabel('Training Step', color='#8b949e', fontsize=11)
72
  ax.set_ylabel('Mean Reward', color='#8b949e', fontsize=11)
73
+ ax.set_title('GRPO 200-Step Reward Curve — Qwen2.5-3B-Instruct | 4-bit QLoRA | Tesla T4',
74
  color='#f0f6fc', fontsize=12, fontweight='bold', pad=10)
75
  ax.legend(fontsize=9, facecolor='#161b22', edgecolor='#30363d', labelcolor='#c9d1d9')
76
  ax.set_xlim(0.5, 200.5)
 
153
  <h1>🩺 SynthAudit.Env</h1>
154
  <p>Multi-Agent Clinical AI Oversight — 200-Step GRPO Reinforcement Learning</p>
155
  <p style="margin-top: 8px; color: #58a6ff !important; font-size: 0.95em;">
156
+ AI that watches AI • Colab T4 GPU • 283% improvement over baseline
157
  </p>
158
  <p style="margin-top: 14px;">
159
  <a href="https://github.com/sumitsaraswat362/SynthAudit.Env" target="_blank" style="color: #58a6ff; text-decoration: none; margin: 0 10px;">📦 GitHub</a> |
 
167
  gr.HTML('<div class="stat-card"><h3>+283%</h3><p>Improvement Over Base</p></div>')
168
  gr.HTML('<div class="stat-card"><h3>0.506</h3><p>Peak GRPO Reward</p></div>')
169
  gr.HTML('<div class="stat-card"><h3>200</h3><p>Training Steps</p></div>')
170
+ gr.HTML('<div class="stat-card"><h3>8</h3><p>Oversight Tools</p></div>')
171
  gr.HTML('<div class="stat-card"><h3>4×</h3><p>More Errors Caught</p></div>')
172
 
173
  with gr.Tabs():
 
185
  | **Quantization** | 4-bit QLoRA (Unsloth) | | **Algorithm** | GRPO (TRL) |
186
  | **GPU** | Tesla T4 (free Colab) | | **Training Time** | 2h 20m |
187
  | **Steps** | 200 | | **Peak Reward** | **0.506** (Step 157) |
188
+ | **Hardware** | **Free Colab T4** | | **Final Reward** | 0.346 |
189
 
190
  ### What The Model Learned (Zero Supervised Data)
191
 
 
422
  ### Key Results
423
  - **283% improvement** over untrained baseline
424
  - **4× more clinical errors** correctly detected
425
+ - **Free Colab T4** — trained in 2h 20m on 15.6 GB VRAM
426
  - **200 GRPO steps** in 2 hours 20 minutes
427
 
428
  ### Links