| """Offline policy evaluation.""" | |
| from __future__ import annotations | |
| from app.evaluation.simulator_rollouts import run_rollouts | |
| def offline_policy_eval(episodes: int = 10) -> dict[str, float]: | |
| rows = run_rollouts(episodes=episodes) | |
| if not rows: | |
| return {} | |
| avg_reward = sum(float(r.get("reward", 0.0)) for r in rows) / len(rows) | |
| legal_rate = sum(1.0 for r in rows if bool(r.get("legal", False))) / len(rows) | |
| success_rate = sum(1.0 for r in rows if str(r.get("termination_reason", "")) == "safe_resolution") / len(rows) | |
| return { | |
| "avg_reward": round(avg_reward, 6), | |
| "legal_rate": round(legal_rate, 6), | |
| "success_rate": round(success_rate, 6), | |
| } | |