File size: 718 Bytes
21c7db9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
"""Process-fidelity evaluation."""

from __future__ import annotations

from app.evaluation.simulator_rollouts import run_rollouts


def process_eval() -> dict[str, float]:
    rows = run_rollouts(episodes=8, difficulty="medium")
    if not rows:
        return {"process_fidelity": 0.0}
    fidelity_scores = [
        float((row.get("reward_breakdown", {}) or {}).get("process_fidelity_score", 0.0))
        for row in rows
    ]
    invalid_actions = [float(row.get("invalid_action_count", 0)) for row in rows]
    return {
        "process_fidelity": round(sum(fidelity_scores) / max(1, len(fidelity_scores)), 6),
        "avg_invalid_actions": round(sum(invalid_actions) / max(1, len(invalid_actions)), 6),
    }