Spaces:
Running
Running
| """Dosing-specific evaluation.""" | |
| from __future__ import annotations | |
| from app.evaluation.simulator_rollouts import run_rollouts | |
| def dosing_eval() -> dict[str, float]: | |
| rows = run_rollouts(episodes=8, difficulty="hard", sub_environment="PRECISION_DOSING") | |
| if not rows: | |
| return {"target_attainment": 0.0, "toxicity_avoidance": 0.0} | |
| dosing_quality = [ | |
| float((row.get("reward_breakdown", {}) or {}).get("dosing_quality_score", 0.0)) | |
| for row in rows | |
| ] | |
| target_attainment = sum(dosing_quality) / max(1, len(dosing_quality)) | |
| toxicity_avoidance = sum(1.0 for row in rows if bool(row.get("legal", False))) / len(rows) | |
| return { | |
| "target_attainment": round(target_attainment, 6), | |
| "toxicity_avoidance": round(toxicity_avoidance, 6), | |
| } | |