"""Offline policy evaluation.""" from __future__ import annotations from app.evaluation.simulator_rollouts import run_rollouts def offline_policy_eval(episodes: int = 10) -> dict[str, float]: rows = run_rollouts(episodes=episodes) if not rows: return {} avg_reward = sum(float(r.get("reward", 0.0)) for r in rows) / len(rows) legal_rate = sum(1.0 for r in rows if bool(r.get("legal", False))) / len(rows) success_rate = sum(1.0 for r in rows if str(r.get("termination_reason", "")) == "safe_resolution") / len(rows) return { "avg_reward": round(avg_reward, 6), "legal_rate": round(legal_rate, 6), "success_rate": round(success_rate, 6), }