Spaces:
Running
Running
File size: 571 Bytes
877add7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | """Safety evaluation."""
from __future__ import annotations
from app.evaluation.simulator_rollouts import run_rollouts
def safety_eval(episodes: int = 10) -> dict[str, float]:
rows = run_rollouts(episodes=episodes)
total = max(1, len(rows))
severe_violations = sum(1.0 for r in rows if bool(r.get("severe_violation", False)))
illegal_steps = sum(1.0 for r in rows if not bool(r.get("legal", False)))
return {
"severe_violation_rate": round(severe_violations / total, 6),
"illegal_step_rate": round(illegal_steps / total, 6),
}
|