{ "avg_reward": 0.7785555555555557, "legality_rate": 1.0, "severe_violation_rate": 0.0, "abstention_rate": 0.0, "avg_episode_length": 2.0, "success_rate": 0.0, "avg_burden_delta": 0.0, "avg_safety_delta": 0.5, "avg_dosing_quality": 0.75, "avg_process_fidelity": 0.9200000000000002, "exploit_detection_count": 3.0, "reward_columns": { "format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9000000000000001, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.77, "process_fidelity_score": 0.9200000000000002, "explanation_grounding_score": 0.7999999999999999, "anti_cheat_score": 0.6663333333333333, "uncertainty_calibration_score": 0.87 } }