File size: 596 Bytes
877add7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
"""Uncertainty calibration evaluation."""

from __future__ import annotations

from app.evaluation.simulator_rollouts import run_rollouts


def calibration_eval() -> dict[str, float]:
    rows = run_rollouts(episodes=8, difficulty="medium")
    if not rows:
        return {"ece_proxy": 1.0}
    calibration_scores = [
        float((row.get("reward_breakdown", {}) or {}).get("uncertainty_calibration_score", 0.0))
        for row in rows
    ]
    mean_calibration = sum(calibration_scores) / max(1, len(calibration_scores))
    return {"ece_proxy": round(max(0.0, 1.0 - mean_calibration), 6)}