| { | |
| "offline_policy_eval": { | |
| "avg_reward": 0.772833, | |
| "legal_rate": 1.0, | |
| "success_rate": 0.0 | |
| }, | |
| "safety_eval": { | |
| "severe_violation_rate": 0.0, | |
| "illegal_step_rate": 0.0 | |
| }, | |
| "dosing_eval": { | |
| "target_attainment": 0.75, | |
| "toxicity_avoidance": 1.0 | |
| }, | |
| "robustness_eval": { | |
| "missing_labs_safety_rate": 0.666667, | |
| "noisy_dose_info_safety_rate": 1.0, | |
| "conflicting_meds_safety_rate": 1.0, | |
| "alias_noise_safety_rate": 1.0, | |
| "hidden_duplicate_detection_rate": 1.0, | |
| "wrong_candidate_id_resilience": 1.0, | |
| "stale_evidence_safety_rate": 1.0, | |
| "delayed_ade_manifestation_safety_rate": 1.0 | |
| }, | |
| "calibration_eval": { | |
| "ece_proxy": 0.08625 | |
| }, | |
| "abstention_eval": { | |
| "appropriate_abstention_rate": 0.0 | |
| }, | |
| "process_eval": { | |
| "process_fidelity": 0.92, | |
| "avg_invalid_actions": 0.333333 | |
| }, | |
| "subgroup_eval": { | |
| "renal_compromise": { | |
| "avg_reward": 0.774, | |
| "legal_rate": 1.0 | |
| }, | |
| "hepatic_compromise": { | |
| "avg_reward": 0.779333, | |
| "legal_rate": 1.0 | |
| }, | |
| "frail": { | |
| "avg_reward": 0.781667, | |
| "legal_rate": 1.0 | |
| } | |
| }, | |
| "explainability_eval": { | |
| "grounding_rate": 0.8 | |
| } | |
| } |