{ "avg_reward": 0.7348571428571429, "legality_rate": 1.0, "severe_violation_rate": 0.0, "abstention_rate": 0.0, "avg_episode_length": 1.8571428571428572, "success_rate": 0.0, "avg_burden_delta": 0.0, "avg_safety_delta": 0.5, "avg_dosing_quality": 0.5, "avg_process_fidelity": 0.92, "exploit_detection_count": 3.0, "reward_columns": { "format_compliance_score": 0.9989999999999999, "candidate_alignment_score": 0.9989999999999999, "legality_score": 0.9989999999999999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9000000000000001, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.6427142857142858, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.7999999999999999, "anti_cheat_score": 0.5712857142857143, "uncertainty_calibration_score": 0.6142857142857144 } }