| { |
| "status": "ok", |
| "ablations": { |
| "bandit_only": { |
| "avg_reward": 0.779625, |
| "legality_rate": 1.0, |
| "severe_violation_rate": 0.0, |
| "abstention_rate": 0.0, |
| "avg_episode_length": 2.8125, |
| "success_rate": 0.0, |
| "avg_burden_delta": 0.0, |
| "avg_safety_delta": 0.483125, |
| "avg_dosing_quality": 0.75, |
| "avg_process_fidelity": 0.9056250000000008, |
| "exploit_detection_count": 2.0, |
| "timeout_rate": 0.0, |
| "failure_visible_rate": 0.0625, |
| "avg_invalid_actions": 0.0625, |
| "reward_columns": { |
| "format_compliance_score": 0.9989999999999996, |
| "candidate_alignment_score": 0.9989999999999996, |
| "legality_score": 0.9989999999999996, |
| "safety_delta_score": 0.483125, |
| "burden_improvement_score": 0.5, |
| "disease_stability_score": 0.8999999999999995, |
| "dosing_quality_score": 0.75, |
| "abstention_quality_score": 0.5600000000000002, |
| "efficiency_score": 0.5855625, |
| "process_fidelity_score": 0.9056250000000008, |
| "explanation_grounding_score": 0.8000000000000004, |
| "anti_cheat_score": 0.9366249999999997, |
| "uncertainty_calibration_score": 0.8531250000000004 |
| }, |
| "primary_reward_channels": { |
| "safety_legality": 0.9469062499999998, |
| "clinical_improvement": 0.6273749999999997, |
| "dosing_quality": 0.6550000000000001, |
| "process_integrity": 0.8225937500000001 |
| }, |
| "policy_stack": "bandit-only", |
| "failure_mining": { |
| "total_rows": 32, |
| "failure_rows": 2, |
| "top_failure_reasons": [ |
| { |
| "reason": "repeated_action_loop", |
| "count": 2 |
| } |
| ] |
| } |
| }, |
| "llm_only": { |
| "avg_reward": 0.7723913043478261, |
| "legality_rate": 1.0, |
| "severe_violation_rate": 0.0, |
| "abstention_rate": 0.0, |
| "avg_episode_length": 1.9565217391304348, |
| "success_rate": 0.0, |
| "avg_burden_delta": 0.0, |
| "avg_safety_delta": 0.4882608695652174, |
| "avg_dosing_quality": 0.75, |
| "avg_process_fidelity": 0.9000000000000005, |
| "exploit_detection_count": 7.0, |
| "timeout_rate": 0.0, |
| "failure_visible_rate": 0.30434782608695654, |
| "avg_invalid_actions": 0.30434782608695654, |
| "reward_columns": { |
| "format_compliance_score": 0.9989999999999999, |
| "candidate_alignment_score": 0.9989999999999999, |
| "legality_score": 0.9989999999999999, |
| "safety_delta_score": 0.4882608695652174, |
| "burden_improvement_score": 0.5, |
| "disease_stability_score": 0.8999999999999998, |
| "dosing_quality_score": 0.75, |
| "abstention_quality_score": 0.5600000000000004, |
| "efficiency_score": 0.7027826086956522, |
| "process_fidelity_score": 0.9000000000000005, |
| "explanation_grounding_score": 0.8000000000000003, |
| "anti_cheat_score": 0.6952608695652175, |
| "uncertainty_calibration_score": 0.8482608695652176 |
| }, |
| "primary_reward_channels": { |
| "safety_legality": 0.8853478260869562, |
| "clinical_improvement": 0.6290869565217388, |
| "dosing_quality": 0.6549999999999998, |
| "process_integrity": 0.8504782608695656 |
| }, |
| "policy_stack": "llm-only", |
| "failure_mining": { |
| "total_rows": 23, |
| "failure_rows": 7, |
| "top_failure_reasons": [ |
| { |
| "reason": "repeated_action_loop", |
| "count": 7 |
| } |
| ] |
| } |
| }, |
| "llm_bandit": { |
| "avg_reward": 0.7647391304347826, |
| "legality_rate": 1.0, |
| "severe_violation_rate": 0.0, |
| "abstention_rate": 0.0, |
| "avg_episode_length": 1.9565217391304348, |
| "success_rate": 0.0, |
| "avg_burden_delta": 0.0, |
| "avg_safety_delta": 0.48982608695652174, |
| "avg_dosing_quality": 0.717391304347826, |
| "avg_process_fidelity": 0.9000000000000005, |
| "exploit_detection_count": 7.0, |
| "timeout_rate": 0.0, |
| "failure_visible_rate": 0.30434782608695654, |
| "avg_invalid_actions": 0.30434782608695654, |
| "reward_columns": { |
| "format_compliance_score": 0.9989999999999999, |
| "candidate_alignment_score": 0.9989999999999999, |
| "legality_score": 0.9989999999999999, |
| "safety_delta_score": 0.48982608695652174, |
| "burden_improvement_score": 0.5043478260869565, |
| "disease_stability_score": 0.8582608695652173, |
| "dosing_quality_score": 0.717391304347826, |
| "abstention_quality_score": 0.5600000000000004, |
| "efficiency_score": 0.7027826086956522, |
| "process_fidelity_score": 0.9000000000000005, |
| "explanation_grounding_score": 0.8000000000000003, |
| "anti_cheat_score": 0.6952608695652175, |
| "uncertainty_calibration_score": 0.8126086956521739 |
| }, |
| "primary_reward_channels": { |
| "safety_legality": 0.8765217391304347, |
| "clinical_improvement": 0.6171739130434781, |
| "dosing_quality": 0.6386956521739129, |
| "process_integrity": 0.8504782608695656 |
| }, |
| "policy_stack": "llm+bandit", |
| "failure_mining": { |
| "total_rows": 23, |
| "failure_rows": 7, |
| "top_failure_reasons": [ |
| { |
| "reason": "repeated_action_loop", |
| "count": 7 |
| } |
| ] |
| } |
| } |
| } |
| } |