Timusgeorge commited on
Commit
f095b05
·
verified ·
1 Parent(s): 4369d27

Honest labeling: Environment Simulator, not fake LLM demo

Browse files
Files changed (1) hide show
  1. app.py +9 -4
app.py CHANGED
@@ -278,9 +278,14 @@ def build_app():
278
  - **Wrong rule application**: Applies Stage IV exception without checking overrides
279
  """)
280
 
281
- # Tab 4: Interactive Audit
282
- with gr.Tab("🔍 Live Audit Demo"):
283
- gr.Markdown("### 🔬 Interactive Audit Simulator\nClick **Generate Scenario** to create a random clinical trial. Then click **Run Oversight Audit** to watch the trained agent detect errors step-by-step.")
 
 
 
 
 
284
 
285
  with gr.Row():
286
  seed_input = gr.Slider(1, 999, value=42, step=1, label="Random Seed")
@@ -288,7 +293,7 @@ def build_app():
288
  gen_btn = gr.Button("🎲 Generate Scenario", variant="primary")
289
 
290
  scenario_out = gr.Markdown(value="*Click 'Generate Scenario' to begin*")
291
- audit_btn = gr.Button("🩺 Run Oversight Audit", variant="secondary", interactive=False)
292
  audit_out = gr.Markdown(value="")
293
 
294
  def generate_scenario(seed, difficulty):
 
278
  - **Wrong rule application**: Applies Stage IV exception without checking overrides
279
  """)
280
 
281
+ # Tab 4: Interactive Environment Simulator
282
+ with gr.Tab("🔬 Environment Simulator"):
283
+ gr.Markdown("""### 🔬 SynthAudit Environment Simulator
284
+ **This demonstrates the environment**, not the LLM. It shows how SynthAudit.Env generates adversarial clinical scenarios, injects hidden errors, and scores agent actions using our dense reward model.
285
+
286
+ The **actual trained model's results** (real LLM inference) are in the ⚔️ Base vs Trained tab.
287
+
288
+ 👇 Try generating different scenarios to see the variety of adversarial cases our environment produces:""")
289
 
290
  with gr.Row():
291
  seed_input = gr.Slider(1, 999, value=42, step=1, label="Random Seed")
 
293
  gen_btn = gr.Button("🎲 Generate Scenario", variant="primary")
294
 
295
  scenario_out = gr.Markdown(value="*Click 'Generate Scenario' to begin*")
296
+ audit_btn = gr.Button(" Simulate Perfect Agent (shows reward scoring)", variant="secondary", interactive=False)
297
  audit_out = gr.Markdown(value="")
298
 
299
  def generate_scenario(seed, difficulty):