Spaces:
Running
Running
File size: 596 Bytes
877add7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | """Uncertainty calibration evaluation."""
from __future__ import annotations
from app.evaluation.simulator_rollouts import run_rollouts
def calibration_eval() -> dict[str, float]:
rows = run_rollouts(episodes=8, difficulty="medium")
if not rows:
return {"ece_proxy": 1.0}
calibration_scores = [
float((row.get("reward_breakdown", {}) or {}).get("uncertainty_calibration_score", 0.0))
for row in rows
]
mean_calibration = sum(calibration_scores) / max(1, len(calibration_scores))
return {"ece_proxy": round(max(0.0, 1.0 - mean_calibration), 6)}
|