TheJackBright's picture
Deploy GitHub root master to Space
c296d62
"""Safety evaluation."""
from __future__ import annotations
from app.evaluation.simulator_rollouts import run_rollouts
def safety_eval(episodes: int = 10) -> dict[str, float]:
rows = run_rollouts(episodes=episodes)
total = max(1, len(rows))
severe_violations = sum(1.0 for r in rows if bool(r.get("severe_violation", False)))
illegal_steps = sum(1.0 for r in rows if not bool(r.get("legal", False)))
return {
"severe_violation_rate": round(severe_violations / total, 6),
"illegal_step_rate": round(illegal_steps / total, 6),
}