{ "status": "ok", "ablations": { "bandit_only": { "avg_reward": 0.779625, "legality_rate": 1.0, "severe_violation_rate": 0.0, "abstention_rate": 0.0, "avg_episode_length": 2.8125, "success_rate": 0.0, "avg_burden_delta": 0.0, "avg_safety_delta": 0.483125, "avg_dosing_quality": 0.75, "avg_process_fidelity": 0.9056250000000008, "exploit_detection_count": 2.0, "timeout_rate": 0.0, "failure_visible_rate": 0.0625, "avg_invalid_actions": 0.0625, "reward_columns": { "format_compliance_score": 0.9989999999999996, "candidate_alignment_score": 0.9989999999999996, "legality_score": 0.9989999999999996, "safety_delta_score": 0.483125, "burden_improvement_score": 0.5, "disease_stability_score": 0.8999999999999995, "dosing_quality_score": 0.75, "abstention_quality_score": 0.5600000000000002, "efficiency_score": 0.5855625, "process_fidelity_score": 0.9056250000000008, "explanation_grounding_score": 0.8000000000000004, "anti_cheat_score": 0.9366249999999997, "uncertainty_calibration_score": 0.8531250000000004 }, "primary_reward_channels": { "safety_legality": 0.9469062499999998, "clinical_improvement": 0.6273749999999997, "dosing_quality": 0.6550000000000001, "process_integrity": 0.8225937500000001 }, "policy_stack": "bandit-only", "failure_mining": { "total_rows": 32, "failure_rows": 2, "top_failure_reasons": [ { "reason": "repeated_action_loop", "count": 2 } ] } }, "llm_only": { "avg_reward": 0.7723913043478261, "legality_rate": 1.0, "severe_violation_rate": 0.0, "abstention_rate": 0.0, "avg_episode_length": 1.9565217391304348, "success_rate": 0.0, "avg_burden_delta": 0.0, "avg_safety_delta": 0.4882608695652174, "avg_dosing_quality": 0.75, "avg_process_fidelity": 0.9000000000000005, "exploit_detection_count": 7.0, "timeout_rate": 0.0, "failure_visible_rate": 0.30434782608695654, "avg_invalid_actions": 0.30434782608695654, "reward_columns": { "format_compliance_score": 0.9989999999999999, "candidate_alignment_score": 0.9989999999999999, "legality_score": 0.9989999999999999, "safety_delta_score": 0.4882608695652174, "burden_improvement_score": 0.5, "disease_stability_score": 0.8999999999999998, "dosing_quality_score": 0.75, "abstention_quality_score": 0.5600000000000004, "efficiency_score": 0.7027826086956522, "process_fidelity_score": 0.9000000000000005, "explanation_grounding_score": 0.8000000000000003, "anti_cheat_score": 0.6952608695652175, "uncertainty_calibration_score": 0.8482608695652176 }, "primary_reward_channels": { "safety_legality": 0.8853478260869562, "clinical_improvement": 0.6290869565217388, "dosing_quality": 0.6549999999999998, "process_integrity": 0.8504782608695656 }, "policy_stack": "llm-only", "failure_mining": { "total_rows": 23, "failure_rows": 7, "top_failure_reasons": [ { "reason": "repeated_action_loop", "count": 7 } ] } }, "llm_bandit": { "avg_reward": 0.7647391304347826, "legality_rate": 1.0, "severe_violation_rate": 0.0, "abstention_rate": 0.0, "avg_episode_length": 1.9565217391304348, "success_rate": 0.0, "avg_burden_delta": 0.0, "avg_safety_delta": 0.48982608695652174, "avg_dosing_quality": 0.717391304347826, "avg_process_fidelity": 0.9000000000000005, "exploit_detection_count": 7.0, "timeout_rate": 0.0, "failure_visible_rate": 0.30434782608695654, "avg_invalid_actions": 0.30434782608695654, "reward_columns": { "format_compliance_score": 0.9989999999999999, "candidate_alignment_score": 0.9989999999999999, "legality_score": 0.9989999999999999, "safety_delta_score": 0.48982608695652174, "burden_improvement_score": 0.5043478260869565, "disease_stability_score": 0.8582608695652173, "dosing_quality_score": 0.717391304347826, "abstention_quality_score": 0.5600000000000004, "efficiency_score": 0.7027826086956522, "process_fidelity_score": 0.9000000000000005, "explanation_grounding_score": 0.8000000000000003, "anti_cheat_score": 0.6952608695652175, "uncertainty_calibration_score": 0.8126086956521739 }, "primary_reward_channels": { "safety_legality": 0.8765217391304347, "clinical_improvement": 0.6171739130434781, "dosing_quality": 0.6386956521739129, "process_integrity": 0.8504782608695656 }, "policy_stack": "llm+bandit", "failure_mining": { "total_rows": 23, "failure_rows": 7, "top_failure_reasons": [ { "reason": "repeated_action_loop", "count": 7 } ] } } } }