adithya9903's picture
Deploy PolyGuard HF training Space
fd0c71a verified
"""Offline policy evaluation."""
from __future__ import annotations
from app.evaluation.simulator_rollouts import run_rollouts
def offline_policy_eval(episodes: int = 10) -> dict[str, float]:
rows = run_rollouts(episodes=episodes)
if not rows:
return {}
avg_reward = sum(float(r.get("reward", 0.0)) for r in rows) / len(rows)
legal_rate = sum(1.0 for r in rows if bool(r.get("legal", False))) / len(rows)
success_rate = sum(1.0 for r in rows if str(r.get("termination_reason", "")) == "safe_resolution") / len(rows)
return {
"avg_reward": round(avg_reward, 6),
"legal_rate": round(legal_rate, 6),
"success_rate": round(success_rate, 6),
}