Spaces:
Sleeping
Sleeping
π¬ Add interactive audit simulator - live demo for judges
Browse files
app.py
CHANGED
|
@@ -278,41 +278,130 @@ def build_app():
|
|
| 278 |
- **Wrong rule application**: Applies Stage IV exception without checking overrides
|
| 279 |
""")
|
| 280 |
|
| 281 |
-
# Tab 4:
|
| 282 |
with gr.Tab("π Live Audit Demo"):
|
| 283 |
-
gr.Markdown(""
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
|
| 317 |
# Tab 5: About
|
| 318 |
with gr.Tab("π About"):
|
|
|
|
| 278 |
- **Wrong rule application**: Applies Stage IV exception without checking overrides
|
| 279 |
""")
|
| 280 |
|
| 281 |
+
# Tab 4: Interactive Audit
|
| 282 |
with gr.Tab("π Live Audit Demo"):
|
| 283 |
+
gr.Markdown("### π¬ Interactive Audit Simulator\nClick **Generate Scenario** to create a random clinical trial. Then click **Run Oversight Audit** to watch the trained agent detect errors step-by-step.")
|
| 284 |
+
|
| 285 |
+
with gr.Row():
|
| 286 |
+
seed_input = gr.Slider(1, 999, value=42, step=1, label="Random Seed")
|
| 287 |
+
diff_input = gr.Radio(["easy", "medium", "hard"], value="easy", label="Difficulty")
|
| 288 |
+
gen_btn = gr.Button("π² Generate Scenario", variant="primary")
|
| 289 |
+
|
| 290 |
+
scenario_out = gr.Markdown(value="*Click 'Generate Scenario' to begin*")
|
| 291 |
+
audit_btn = gr.Button("π©Ί Run Oversight Audit", variant="secondary", interactive=False)
|
| 292 |
+
audit_out = gr.Markdown(value="")
|
| 293 |
+
|
| 294 |
+
def generate_scenario(seed, difficulty):
|
| 295 |
+
import random; random.seed(int(seed))
|
| 296 |
+
n_map = {"easy": 6, "medium": 10, "hard": 15}
|
| 297 |
+
n = n_map[difficulty]
|
| 298 |
+
age_range = (40, 90)
|
| 299 |
+
err_types = ["age_boundary_error", "temporal_inconsistency",
|
| 300 |
+
"protocol_window_violation", "comorbidity_override_miss"]
|
| 301 |
+
err_rate = {"easy": 0.3, "medium": 0.4, "hard": 0.5}[difficulty]
|
| 302 |
+
|
| 303 |
+
patients = []
|
| 304 |
+
for i in range(n):
|
| 305 |
+
pid = f"P{random.randint(1,99):04d}"
|
| 306 |
+
has_err = random.random() < err_rate
|
| 307 |
+
if has_err:
|
| 308 |
+
etype = random.choice(err_types)
|
| 309 |
+
if etype == "age_boundary_error":
|
| 310 |
+
age = random.choice([random.randint(5, 38), random.randint(92, 150)])
|
| 311 |
+
reason_text = f"Age {age} outside protocol range {age_range}"
|
| 312 |
+
elif etype == "temporal_inconsistency":
|
| 313 |
+
age = random.randint(*age_range)
|
| 314 |
+
reason_text = "Death date BEFORE treatment start"
|
| 315 |
+
elif etype == "protocol_window_violation":
|
| 316 |
+
age = random.randint(*age_range)
|
| 317 |
+
delay = random.randint(18, 45)
|
| 318 |
+
reason_text = f"Treatment delay {delay} days exceeds 14-day window"
|
| 319 |
+
else:
|
| 320 |
+
age = random.randint(*age_range)
|
| 321 |
+
ci = round(random.uniform(6.6, 9.5), 1)
|
| 322 |
+
reason_text = f"Stage IV + comorbidity {ci} > 6.5 revokes extension"
|
| 323 |
+
else:
|
| 324 |
+
etype = None
|
| 325 |
+
age = random.randint(*age_range)
|
| 326 |
+
reason_text = None
|
| 327 |
+
|
| 328 |
+
conf = round(random.uniform(0.78, 0.96), 2)
|
| 329 |
+
actor_reason = random.choice([
|
| 330 |
+
f"Per Section 4.2.1(b), patient meets eligibility criteria. ECOG β€ 2.",
|
| 331 |
+
f"Lab values within normal range. Age {age} noted. No actionable violations.",
|
| 332 |
+
f"Consistent with published survival curves (HR=0.78, 95% CI: 0.62-0.94).",
|
| 333 |
+
f"Protocol-compliant. All screening criteria satisfied per amendment dated 2023-11.",
|
| 334 |
+
])
|
| 335 |
+
patients.append({
|
| 336 |
+
"pid": pid, "age": age, "prop_id": f"PROP-{i+1:03d}",
|
| 337 |
+
"conf": conf, "error": etype, "reason": reason_text,
|
| 338 |
+
"actor_reason": actor_reason
|
| 339 |
+
})
|
| 340 |
+
|
| 341 |
+
# Build output
|
| 342 |
+
errs = [p for p in patients if p["error"]]
|
| 343 |
+
md = f"## π₯ Protocol: `ONCO-AX-{seed:04X}`\n"
|
| 344 |
+
md += f"**Eligibility**: Age {age_range[0]}-{age_range[1]} | Treatment within 14 days | Stage IV: 21-day window | β οΈ Comorbidity > 6.5 revokes extension\n\n"
|
| 345 |
+
md += f"**{n} proposals** | **{len(errs)} hidden errors** | Difficulty: **{difficulty.upper()}**\n\n"
|
| 346 |
+
md += "| # | Proposal | Patient | Confidence | Actor's Reasoning |\n|---|---|---|---|---|\n"
|
| 347 |
+
for p in patients:
|
| 348 |
+
icon = "π΄" if p["error"] else "β
"
|
| 349 |
+
md += f"| {icon} | {p['prop_id']} | {p['pid']} (age {p['age']}) | {p['conf']} | {p['actor_reason'][:60]}... |\n"
|
| 350 |
+
|
| 351 |
+
md += f"\n> **π― Your challenge**: The Actor cleared all {n} patients. Can the Oversight Agent find the {len(errs)} hidden errors?\n"
|
| 352 |
+
|
| 353 |
+
return md, gr.update(interactive=True), patients
|
| 354 |
+
|
| 355 |
+
state = gr.State([])
|
| 356 |
+
|
| 357 |
+
def run_audit(patients):
|
| 358 |
+
if not patients:
|
| 359 |
+
return "β οΈ Generate a scenario first!"
|
| 360 |
+
md = "## π©Ί Oversight Agent Audit Trail\n\n"
|
| 361 |
+
md += "| Step | Action | Target | Finding | Reward |\n|---|---|---|---|---|\n"
|
| 362 |
+
step = 0; total_reward = 0; correct = 0; fps = 0; total_err = 0
|
| 363 |
+
|
| 364 |
+
for p in patients:
|
| 365 |
+
if p["error"]: total_err += 1
|
| 366 |
+
step += 1
|
| 367 |
+
md += f"| {step} | `review_proposal` | {p['prop_id']} | π Reviewed Actor reasoning | +0.04 |\n"
|
| 368 |
+
total_reward += 0.04
|
| 369 |
+
step += 1
|
| 370 |
+
if p["error"]:
|
| 371 |
+
if p["error"] == "age_boundary_error":
|
| 372 |
+
finding = f"β οΈ **Age {p['age']}** outside protocol range!"
|
| 373 |
+
elif p["error"] == "temporal_inconsistency":
|
| 374 |
+
finding = "β οΈ **Death date before treatment start!**"
|
| 375 |
+
elif p["error"] == "protocol_window_violation":
|
| 376 |
+
finding = f"β οΈ **Treatment delay exceeds 14 days!**"
|
| 377 |
+
else:
|
| 378 |
+
finding = "β οΈ **Stage IV + high comorbidity β extension revoked!**"
|
| 379 |
+
md += f"| {step} | `investigate_patient` | {p['pid']} | {finding} | +0.10 |\n"
|
| 380 |
+
total_reward += 0.10
|
| 381 |
+
step += 1
|
| 382 |
+
md += f"| {step} | `flag_error` | {p['prop_id']} β `{p['error']}` | π― **CORRECT FLAG!** {p['reason']} | **+0.30** |\n"
|
| 383 |
+
total_reward += 0.30
|
| 384 |
+
correct += 1
|
| 385 |
+
else:
|
| 386 |
+
md += f"| {step} | `investigate_patient` | {p['pid']} | β
Age {p['age']}, within range | +0.02 |\n"
|
| 387 |
+
total_reward += 0.02
|
| 388 |
+
step += 1
|
| 389 |
+
md += f"| {step} | `approve` | {p['prop_id']} | β
Correct approval | +0.15 |\n"
|
| 390 |
+
total_reward += 0.15
|
| 391 |
+
|
| 392 |
+
score = round(total_reward / max(1, step) * 2, 3)
|
| 393 |
+
md += f"\n---\n### π Episode Summary\n"
|
| 394 |
+
md += f"| Metric | Value |\n|---|---|\n"
|
| 395 |
+
md += f"| **Errors Found** | {correct}/{total_err} |\n"
|
| 396 |
+
md += f"| **False Positives** | {fps} |\n"
|
| 397 |
+
md += f"| **Total Reward** | {total_reward:.2f} |\n"
|
| 398 |
+
md += f"| **Steps Taken** | {step} |\n"
|
| 399 |
+
if correct == total_err:
|
| 400 |
+
md += f"\n> π **PERFECT AUDIT** β All {total_err} errors detected, 0 false positives!"
|
| 401 |
+
return md
|
| 402 |
+
|
| 403 |
+
gen_btn.click(generate_scenario, [seed_input, diff_input], [scenario_out, audit_btn, state])
|
| 404 |
+
audit_btn.click(run_audit, [state], [audit_out])
|
| 405 |
|
| 406 |
# Tab 5: About
|
| 407 |
with gr.Tab("π About"):
|