| """Safety evaluation.""" | |
| from __future__ import annotations | |
| from app.evaluation.simulator_rollouts import run_rollouts | |
| def safety_eval(episodes: int = 10) -> dict[str, float]: | |
| rows = run_rollouts(episodes=episodes) | |
| total = max(1, len(rows)) | |
| severe_violations = sum(1.0 for r in rows if bool(r.get("severe_violation", False))) | |
| illegal_steps = sum(1.0 for r in rows if not bool(r.get("legal", False))) | |
| return { | |
| "severe_violation_rate": round(severe_violations / total, 6), | |
| "illegal_step_rate": round(illegal_steps / total, 6), | |
| } | |