File size: 796 Bytes
877add7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
"""Dosing-specific evaluation."""

from __future__ import annotations

from app.evaluation.simulator_rollouts import run_rollouts


def dosing_eval() -> dict[str, float]:
    rows = run_rollouts(episodes=8, difficulty="hard", sub_environment="PRECISION_DOSING")
    if not rows:
        return {"target_attainment": 0.0, "toxicity_avoidance": 0.0}

    dosing_quality = [
        float((row.get("reward_breakdown", {}) or {}).get("dosing_quality_score", 0.0))
        for row in rows
    ]
    target_attainment = sum(dosing_quality) / max(1, len(dosing_quality))
    toxicity_avoidance = sum(1.0 for row in rows if bool(row.get("legal", False))) / len(rows)
    return {
        "target_attainment": round(target_attainment, 6),
        "toxicity_avoidance": round(toxicity_avoidance, 6),
    }