Upload PolyGuard artifact folder: qwen-qwen2-5-3b-instruct checkpoints after policy_ablation
Browse files- checkpoints/sweeps/qwen-qwen2-5-3b-instruct/bandit-only_failures.json +64 -0
- checkpoints/sweeps/qwen-qwen2-5-3b-instruct/bandit-only_replay.jsonl +32 -0
- checkpoints/sweeps/qwen-qwen2-5-3b-instruct/llm-only_failures.json +219 -0
- checkpoints/sweeps/qwen-qwen2-5-3b-instruct/llm-only_replay.jsonl +23 -0
- checkpoints/sweeps/qwen-qwen2-5-3b-instruct/llm_bandit_failures.json +219 -0
- checkpoints/sweeps/qwen-qwen2-5-3b-instruct/llm_bandit_replay.jsonl +23 -0
checkpoints/sweeps/qwen-qwen2-5-3b-instruct/bandit-only_failures.json
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"policy_stack": "bandit-only",
|
| 4 |
+
"episode": 4,
|
| 5 |
+
"step": 3,
|
| 6 |
+
"reward": 0.729,
|
| 7 |
+
"final_action": {
|
| 8 |
+
"mode": "DOSE_OPT",
|
| 9 |
+
"action_type": "DOSE_HOLD",
|
| 10 |
+
"target_drug": "nsaid_like",
|
| 11 |
+
"replacement_drug": null,
|
| 12 |
+
"dose_bucket": "NA",
|
| 13 |
+
"taper_days": null,
|
| 14 |
+
"monitoring_plan": "repeat_labs_72h",
|
| 15 |
+
"evidence_query": null,
|
| 16 |
+
"new_drug_name": null,
|
| 17 |
+
"candidate_components": [],
|
| 18 |
+
"candidate_id": "cand_05",
|
| 19 |
+
"confidence": 0.72,
|
| 20 |
+
"rationale_brief": "Bandit-only policy selected top contextual candidate."
|
| 21 |
+
},
|
| 22 |
+
"termination_reason": "exploit_detection",
|
| 23 |
+
"failure_reasons": [
|
| 24 |
+
"repeated_action_loop"
|
| 25 |
+
],
|
| 26 |
+
"primary_reward_channels": {
|
| 27 |
+
"safety_legality": 0.73,
|
| 28 |
+
"clinical_improvement": 0.633,
|
| 29 |
+
"dosing_quality": 0.655,
|
| 30 |
+
"process_integrity": 0.823
|
| 31 |
+
}
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"policy_stack": "bandit-only",
|
| 35 |
+
"episode": 6,
|
| 36 |
+
"step": 3,
|
| 37 |
+
"reward": 0.739,
|
| 38 |
+
"final_action": {
|
| 39 |
+
"mode": "DOSE_OPT",
|
| 40 |
+
"action_type": "ORDER_MONITORING_AND_WAIT",
|
| 41 |
+
"target_drug": null,
|
| 42 |
+
"replacement_drug": null,
|
| 43 |
+
"dose_bucket": "NA",
|
| 44 |
+
"taper_days": null,
|
| 45 |
+
"monitoring_plan": "vitals_labs_7d",
|
| 46 |
+
"evidence_query": null,
|
| 47 |
+
"new_drug_name": null,
|
| 48 |
+
"candidate_components": [],
|
| 49 |
+
"candidate_id": "cand_08",
|
| 50 |
+
"confidence": 0.74,
|
| 51 |
+
"rationale_brief": "Bandit-only policy selected top contextual candidate."
|
| 52 |
+
},
|
| 53 |
+
"termination_reason": "exploit_detection",
|
| 54 |
+
"failure_reasons": [
|
| 55 |
+
"repeated_action_loop"
|
| 56 |
+
],
|
| 57 |
+
"primary_reward_channels": {
|
| 58 |
+
"safety_legality": 0.735,
|
| 59 |
+
"clinical_improvement": 0.633,
|
| 60 |
+
"dosing_quality": 0.655,
|
| 61 |
+
"process_integrity": 0.861
|
| 62 |
+
}
|
| 63 |
+
}
|
| 64 |
+
]
|
checkpoints/sweeps/qwen-qwen2-5-3b-instruct/bandit-only_replay.jsonl
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"policy_stack": "bandit-only", "episode": 0, "step": 1, "reward": 0.792, "final_action": {"mode": "DOSE_OPT", "action_type": "ORDER_MONITORING_AND_WAIT", "target_drug": null, "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "vitals_labs_7d", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_08", "confidence": 0.74, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.934, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.867}}
|
| 2 |
+
{"policy_stack": "bandit-only", "episode": 0, "step": 2, "reward": 0.774, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.917, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.805}}
|
| 3 |
+
{"policy_stack": "bandit-only", "episode": 0, "step": 3, "reward": 0.762, "final_action": {"mode": "DOSE_OPT", "action_type": "ORDER_MONITORING_AND_WAIT", "target_drug": null, "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "vitals_labs_7d", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_08", "confidence": 0.74, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "step_budget_exhausted", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.934, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.742}}
|
| 4 |
+
{"policy_stack": "bandit-only", "episode": 1, "step": 1, "reward": 0.796, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "statin_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.917, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}}
|
| 5 |
+
{"policy_stack": "bandit-only", "episode": 1, "step": 2, "reward": 0.787, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "statin_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.917, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.858}}
|
| 6 |
+
{"policy_stack": "bandit-only", "episode": 1, "step": 3, "reward": 0.781, "final_action": {"mode": "DOSE_OPT", "action_type": "ORDER_MONITORING_AND_WAIT", "target_drug": null, "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "vitals_labs_7d", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_08", "confidence": 0.74, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.934, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.823}}
|
| 7 |
+
{"policy_stack": "bandit-only", "episode": 1, "step": 4, "reward": 0.77, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "statin_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.917, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.787}}
|
| 8 |
+
{"policy_stack": "bandit-only", "episode": 1, "step": 5, "reward": 0.764, "final_action": {"mode": "DOSE_OPT", "action_type": "ORDER_MONITORING_AND_WAIT", "target_drug": null, "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "vitals_labs_7d", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_08", "confidence": 0.74, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.934, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.751}}
|
| 9 |
+
{"policy_stack": "bandit-only", "episode": 1, "step": 6, "reward": 0.756, "final_action": {"mode": "DOSE_OPT", "action_type": "ORDER_MONITORING_AND_WAIT", "target_drug": null, "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "vitals_labs_7d", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_08", "confidence": 0.74, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "step_budget_exhausted", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.934, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.716}}
|
| 10 |
+
{"policy_stack": "bandit-only", "episode": 2, "step": 1, "reward": 0.806, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "nsaid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}}
|
| 11 |
+
{"policy_stack": "bandit-only", "episode": 2, "step": 2, "reward": 0.797, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "nsaid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.858}}
|
| 12 |
+
{"policy_stack": "bandit-only", "episode": 2, "step": 3, "reward": 0.787, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "safety_veto_threshold", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.823}}
|
| 13 |
+
{"policy_stack": "bandit-only", "episode": 3, "step": 1, "reward": 0.804, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "statin_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}}
|
| 14 |
+
{"policy_stack": "bandit-only", "episode": 3, "step": 2, "reward": 0.798, "final_action": {"mode": "DOSE_OPT", "action_type": "ORDER_MONITORING_AND_WAIT", "target_drug": null, "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "vitals_labs_7d", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_08", "confidence": 0.74, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.984, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.858}}
|
| 15 |
+
{"policy_stack": "bandit-only", "episode": 3, "step": 3, "reward": 0.787, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "statin_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.823}}
|
| 16 |
+
{"policy_stack": "bandit-only", "episode": 3, "step": 4, "reward": 0.781, "final_action": {"mode": "DOSE_OPT", "action_type": "ORDER_MONITORING_AND_WAIT", "target_drug": null, "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "vitals_labs_7d", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_08", "confidence": 0.74, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.984, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.787}}
|
| 17 |
+
{"policy_stack": "bandit-only", "episode": 3, "step": 5, "reward": 0.769, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "statin_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.751}}
|
| 18 |
+
{"policy_stack": "bandit-only", "episode": 3, "step": 6, "reward": 0.764, "final_action": {"mode": "DOSE_OPT", "action_type": "ORDER_MONITORING_AND_WAIT", "target_drug": null, "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "vitals_labs_7d", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_08", "confidence": 0.74, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "step_budget_exhausted", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.984, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.716}}
|
| 19 |
+
{"policy_stack": "bandit-only", "episode": 4, "step": 1, "reward": 0.806, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "nsaid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}}
|
| 20 |
+
{"policy_stack": "bandit-only", "episode": 4, "step": 2, "reward": 0.797, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "nsaid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.858}}
|
| 21 |
+
{"policy_stack": "bandit-only", "episode": 4, "step": 3, "reward": 0.729, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "nsaid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.73, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.823}}
|
| 22 |
+
{"policy_stack": "bandit-only", "episode": 5, "step": 1, "reward": 0.79, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.603, "dosing_quality": 0.655, "process_integrity": 0.894}}
|
| 23 |
+
{"policy_stack": "bandit-only", "episode": 5, "step": 2, "reward": 0.782, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.603, "dosing_quality": 0.655, "process_integrity": 0.858}}
|
| 24 |
+
{"policy_stack": "bandit-only", "episode": 5, "step": 3, "reward": 0.776, "final_action": {"mode": "DOSE_OPT", "action_type": "ORDER_MONITORING_AND_WAIT", "target_drug": null, "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "vitals_labs_7d", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_08", "confidence": 0.74, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.984, "clinical_improvement": 0.603, "dosing_quality": 0.655, "process_integrity": 0.823}}
|
| 25 |
+
{"policy_stack": "bandit-only", "episode": 5, "step": 4, "reward": 0.764, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.603, "dosing_quality": 0.655, "process_integrity": 0.787}}
|
| 26 |
+
{"policy_stack": "bandit-only", "episode": 5, "step": 5, "reward": 0.759, "final_action": {"mode": "DOSE_OPT", "action_type": "ORDER_MONITORING_AND_WAIT", "target_drug": null, "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "vitals_labs_7d", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_08", "confidence": 0.74, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.984, "clinical_improvement": 0.603, "dosing_quality": 0.655, "process_integrity": 0.751}}
|
| 27 |
+
{"policy_stack": "bandit-only", "episode": 5, "step": 6, "reward": 0.747, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "step_budget_exhausted", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.603, "dosing_quality": 0.655, "process_integrity": 0.716}}
|
| 28 |
+
{"policy_stack": "bandit-only", "episode": 6, "step": 1, "reward": 0.81, "final_action": {"mode": "DOSE_OPT", "action_type": "ORDER_MONITORING_AND_WAIT", "target_drug": null, "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "vitals_labs_7d", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_08", "confidence": 0.74, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.984, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.907}}
|
| 29 |
+
{"policy_stack": "bandit-only", "episode": 6, "step": 2, "reward": 0.804, "final_action": {"mode": "DOSE_OPT", "action_type": "ORDER_MONITORING_AND_WAIT", "target_drug": null, "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "vitals_labs_7d", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_08", "confidence": 0.74, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.984, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.884}}
|
| 30 |
+
{"policy_stack": "bandit-only", "episode": 6, "step": 3, "reward": 0.739, "final_action": {"mode": "DOSE_OPT", "action_type": "ORDER_MONITORING_AND_WAIT", "target_drug": null, "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "vitals_labs_7d", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_08", "confidence": 0.74, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.735, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.861}}
|
| 31 |
+
{"policy_stack": "bandit-only", "episode": 7, "step": 1, "reward": 0.79, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.831}}
|
| 32 |
+
{"policy_stack": "bandit-only", "episode": 7, "step": 2, "reward": 0.78, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "safety_veto_threshold", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.789}}
|
checkpoints/sweeps/qwen-qwen2-5-3b-instruct/llm-only_failures.json
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"policy_stack": "llm-only",
|
| 4 |
+
"episode": 0,
|
| 5 |
+
"step": 3,
|
| 6 |
+
"reward": 0.699,
|
| 7 |
+
"final_action": {
|
| 8 |
+
"mode": "DOSE_OPT",
|
| 9 |
+
"action_type": "REDUCE_DOSE_BUCKET",
|
| 10 |
+
"target_drug": "benzodiazepine_like",
|
| 11 |
+
"replacement_drug": null,
|
| 12 |
+
"dose_bucket": "LOW",
|
| 13 |
+
"taper_days": null,
|
| 14 |
+
"monitoring_plan": null,
|
| 15 |
+
"evidence_query": null,
|
| 16 |
+
"new_drug_name": null,
|
| 17 |
+
"candidate_components": [],
|
| 18 |
+
"candidate_id": "cand_03",
|
| 19 |
+
"confidence": 0.67,
|
| 20 |
+
"rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."
|
| 21 |
+
},
|
| 22 |
+
"termination_reason": "exploit_detection",
|
| 23 |
+
"failure_reasons": [
|
| 24 |
+
"repeated_action_loop"
|
| 25 |
+
],
|
| 26 |
+
"primary_reward_channels": {
|
| 27 |
+
"safety_legality": 0.667,
|
| 28 |
+
"clinical_improvement": 0.633,
|
| 29 |
+
"dosing_quality": 0.655,
|
| 30 |
+
"process_integrity": 0.742
|
| 31 |
+
}
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"policy_stack": "llm-only",
|
| 35 |
+
"episode": 1,
|
| 36 |
+
"step": 3,
|
| 37 |
+
"reward": 0.719,
|
| 38 |
+
"final_action": {
|
| 39 |
+
"mode": "DOSE_OPT",
|
| 40 |
+
"action_type": "REDUCE_DOSE_BUCKET",
|
| 41 |
+
"target_drug": "opioid_like",
|
| 42 |
+
"replacement_drug": null,
|
| 43 |
+
"dose_bucket": "LOW",
|
| 44 |
+
"taper_days": null,
|
| 45 |
+
"monitoring_plan": null,
|
| 46 |
+
"evidence_query": null,
|
| 47 |
+
"new_drug_name": null,
|
| 48 |
+
"candidate_components": [],
|
| 49 |
+
"candidate_id": "cand_03",
|
| 50 |
+
"confidence": 0.67,
|
| 51 |
+
"rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."
|
| 52 |
+
},
|
| 53 |
+
"termination_reason": "exploit_detection",
|
| 54 |
+
"failure_reasons": [
|
| 55 |
+
"repeated_action_loop"
|
| 56 |
+
],
|
| 57 |
+
"primary_reward_channels": {
|
| 58 |
+
"safety_legality": 0.667,
|
| 59 |
+
"clinical_improvement": 0.633,
|
| 60 |
+
"dosing_quality": 0.655,
|
| 61 |
+
"process_integrity": 0.823
|
| 62 |
+
}
|
| 63 |
+
},
|
| 64 |
+
{
|
| 65 |
+
"policy_stack": "llm-only",
|
| 66 |
+
"episode": 2,
|
| 67 |
+
"step": 3,
|
| 68 |
+
"reward": 0.727,
|
| 69 |
+
"final_action": {
|
| 70 |
+
"mode": "DOSE_OPT",
|
| 71 |
+
"action_type": "REDUCE_DOSE_BUCKET",
|
| 72 |
+
"target_drug": "warfarin_like",
|
| 73 |
+
"replacement_drug": null,
|
| 74 |
+
"dose_bucket": "LOW",
|
| 75 |
+
"taper_days": null,
|
| 76 |
+
"monitoring_plan": null,
|
| 77 |
+
"evidence_query": null,
|
| 78 |
+
"new_drug_name": null,
|
| 79 |
+
"candidate_components": [],
|
| 80 |
+
"candidate_id": "cand_03",
|
| 81 |
+
"confidence": 0.67,
|
| 82 |
+
"rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."
|
| 83 |
+
},
|
| 84 |
+
"termination_reason": "exploit_detection",
|
| 85 |
+
"failure_reasons": [
|
| 86 |
+
"repeated_action_loop"
|
| 87 |
+
],
|
| 88 |
+
"primary_reward_channels": {
|
| 89 |
+
"safety_legality": 0.717,
|
| 90 |
+
"clinical_improvement": 0.633,
|
| 91 |
+
"dosing_quality": 0.655,
|
| 92 |
+
"process_integrity": 0.823
|
| 93 |
+
}
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"policy_stack": "llm-only",
|
| 97 |
+
"episode": 3,
|
| 98 |
+
"step": 3,
|
| 99 |
+
"reward": 0.729,
|
| 100 |
+
"final_action": {
|
| 101 |
+
"mode": "DOSE_OPT",
|
| 102 |
+
"action_type": "DOSE_HOLD",
|
| 103 |
+
"target_drug": "nsaid_like",
|
| 104 |
+
"replacement_drug": null,
|
| 105 |
+
"dose_bucket": "NA",
|
| 106 |
+
"taper_days": null,
|
| 107 |
+
"monitoring_plan": "repeat_labs_72h",
|
| 108 |
+
"evidence_query": null,
|
| 109 |
+
"new_drug_name": null,
|
| 110 |
+
"candidate_components": [],
|
| 111 |
+
"candidate_id": "cand_05",
|
| 112 |
+
"confidence": 0.72,
|
| 113 |
+
"rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."
|
| 114 |
+
},
|
| 115 |
+
"termination_reason": "exploit_detection",
|
| 116 |
+
"failure_reasons": [
|
| 117 |
+
"repeated_action_loop"
|
| 118 |
+
],
|
| 119 |
+
"primary_reward_channels": {
|
| 120 |
+
"safety_legality": 0.73,
|
| 121 |
+
"clinical_improvement": 0.633,
|
| 122 |
+
"dosing_quality": 0.655,
|
| 123 |
+
"process_integrity": 0.823
|
| 124 |
+
}
|
| 125 |
+
},
|
| 126 |
+
{
|
| 127 |
+
"policy_stack": "llm-only",
|
| 128 |
+
"episode": 4,
|
| 129 |
+
"step": 3,
|
| 130 |
+
"reward": 0.729,
|
| 131 |
+
"final_action": {
|
| 132 |
+
"mode": "DOSE_OPT",
|
| 133 |
+
"action_type": "DOSE_HOLD",
|
| 134 |
+
"target_drug": "benzodiazepine_like",
|
| 135 |
+
"replacement_drug": null,
|
| 136 |
+
"dose_bucket": "NA",
|
| 137 |
+
"taper_days": null,
|
| 138 |
+
"monitoring_plan": "repeat_labs_72h",
|
| 139 |
+
"evidence_query": null,
|
| 140 |
+
"new_drug_name": null,
|
| 141 |
+
"candidate_components": [],
|
| 142 |
+
"candidate_id": "cand_05",
|
| 143 |
+
"confidence": 0.72,
|
| 144 |
+
"rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."
|
| 145 |
+
},
|
| 146 |
+
"termination_reason": "exploit_detection",
|
| 147 |
+
"failure_reasons": [
|
| 148 |
+
"repeated_action_loop"
|
| 149 |
+
],
|
| 150 |
+
"primary_reward_channels": {
|
| 151 |
+
"safety_legality": 0.73,
|
| 152 |
+
"clinical_improvement": 0.633,
|
| 153 |
+
"dosing_quality": 0.655,
|
| 154 |
+
"process_integrity": 0.823
|
| 155 |
+
}
|
| 156 |
+
},
|
| 157 |
+
{
|
| 158 |
+
"policy_stack": "llm-only",
|
| 159 |
+
"episode": 5,
|
| 160 |
+
"step": 3,
|
| 161 |
+
"reward": 0.715,
|
| 162 |
+
"final_action": {
|
| 163 |
+
"mode": "DOSE_OPT",
|
| 164 |
+
"action_type": "DOSE_HOLD",
|
| 165 |
+
"target_drug": "nsaid_like",
|
| 166 |
+
"replacement_drug": null,
|
| 167 |
+
"dose_bucket": "NA",
|
| 168 |
+
"taper_days": null,
|
| 169 |
+
"monitoring_plan": "repeat_labs_72h",
|
| 170 |
+
"evidence_query": null,
|
| 171 |
+
"new_drug_name": null,
|
| 172 |
+
"candidate_components": [],
|
| 173 |
+
"candidate_id": "cand_05",
|
| 174 |
+
"confidence": 0.72,
|
| 175 |
+
"rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."
|
| 176 |
+
},
|
| 177 |
+
"termination_reason": "exploit_detection",
|
| 178 |
+
"failure_reasons": [
|
| 179 |
+
"repeated_action_loop"
|
| 180 |
+
],
|
| 181 |
+
"primary_reward_channels": {
|
| 182 |
+
"safety_legality": 0.73,
|
| 183 |
+
"clinical_improvement": 0.603,
|
| 184 |
+
"dosing_quality": 0.655,
|
| 185 |
+
"process_integrity": 0.823
|
| 186 |
+
}
|
| 187 |
+
},
|
| 188 |
+
{
|
| 189 |
+
"policy_stack": "llm-only",
|
| 190 |
+
"episode": 6,
|
| 191 |
+
"step": 3,
|
| 192 |
+
"reward": 0.738,
|
| 193 |
+
"final_action": {
|
| 194 |
+
"mode": "DOSE_OPT",
|
| 195 |
+
"action_type": "DOSE_HOLD",
|
| 196 |
+
"target_drug": "opioid_like",
|
| 197 |
+
"replacement_drug": null,
|
| 198 |
+
"dose_bucket": "NA",
|
| 199 |
+
"taper_days": null,
|
| 200 |
+
"monitoring_plan": "repeat_labs_72h",
|
| 201 |
+
"evidence_query": null,
|
| 202 |
+
"new_drug_name": null,
|
| 203 |
+
"candidate_components": [],
|
| 204 |
+
"candidate_id": "cand_05",
|
| 205 |
+
"confidence": 0.72,
|
| 206 |
+
"rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."
|
| 207 |
+
},
|
| 208 |
+
"termination_reason": "exploit_detection",
|
| 209 |
+
"failure_reasons": [
|
| 210 |
+
"repeated_action_loop"
|
| 211 |
+
],
|
| 212 |
+
"primary_reward_channels": {
|
| 213 |
+
"safety_legality": 0.73,
|
| 214 |
+
"clinical_improvement": 0.633,
|
| 215 |
+
"dosing_quality": 0.655,
|
| 216 |
+
"process_integrity": 0.861
|
| 217 |
+
}
|
| 218 |
+
}
|
| 219 |
+
]
|
checkpoints/sweeps/qwen-qwen2-5-3b-instruct/llm-only_replay.jsonl
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"policy_stack": "llm-only", "episode": 0, "step": 1, "reward": 0.789, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.917, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.867}}
|
| 2 |
+
{"policy_stack": "llm-only", "episode": 0, "step": 2, "reward": 0.774, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.917, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.805}}
|
| 3 |
+
{"policy_stack": "llm-only", "episode": 0, "step": 3, "reward": 0.699, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.667, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.742}}
|
| 4 |
+
{"policy_stack": "llm-only", "episode": 1, "step": 1, "reward": 0.796, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.917, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}}
|
| 5 |
+
{"policy_stack": "llm-only", "episode": 1, "step": 2, "reward": 0.787, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.917, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.858}}
|
| 6 |
+
{"policy_stack": "llm-only", "episode": 1, "step": 3, "reward": 0.719, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.667, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.823}}
|
| 7 |
+
{"policy_stack": "llm-only", "episode": 2, "step": 1, "reward": 0.804, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "warfarin_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}}
|
| 8 |
+
{"policy_stack": "llm-only", "episode": 2, "step": 2, "reward": 0.795, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "warfarin_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.858}}
|
| 9 |
+
{"policy_stack": "llm-only", "episode": 2, "step": 3, "reward": 0.727, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "warfarin_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.717, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.823}}
|
| 10 |
+
{"policy_stack": "llm-only", "episode": 3, "step": 1, "reward": 0.806, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "nsaid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}}
|
| 11 |
+
{"policy_stack": "llm-only", "episode": 3, "step": 2, "reward": 0.797, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "nsaid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.858}}
|
| 12 |
+
{"policy_stack": "llm-only", "episode": 3, "step": 3, "reward": 0.729, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "nsaid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.73, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.823}}
|
| 13 |
+
{"policy_stack": "llm-only", "episode": 4, "step": 1, "reward": 0.806, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}}
|
| 14 |
+
{"policy_stack": "llm-only", "episode": 4, "step": 2, "reward": 0.797, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.858}}
|
| 15 |
+
{"policy_stack": "llm-only", "episode": 4, "step": 3, "reward": 0.729, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.73, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.823}}
|
| 16 |
+
{"policy_stack": "llm-only", "episode": 5, "step": 1, "reward": 0.792, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "nsaid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.603, "dosing_quality": 0.655, "process_integrity": 0.894}}
|
| 17 |
+
{"policy_stack": "llm-only", "episode": 5, "step": 2, "reward": 0.784, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "nsaid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.603, "dosing_quality": 0.655, "process_integrity": 0.858}}
|
| 18 |
+
{"policy_stack": "llm-only", "episode": 5, "step": 3, "reward": 0.715, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "nsaid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.73, "clinical_improvement": 0.603, "dosing_quality": 0.655, "process_integrity": 0.823}}
|
| 19 |
+
{"policy_stack": "llm-only", "episode": 6, "step": 1, "reward": 0.809, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.907}}
|
| 20 |
+
{"policy_stack": "llm-only", "episode": 6, "step": 2, "reward": 0.803, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.884}}
|
| 21 |
+
{"policy_stack": "llm-only", "episode": 6, "step": 3, "reward": 0.738, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.73, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.861}}
|
| 22 |
+
{"policy_stack": "llm-only", "episode": 7, "step": 1, "reward": 0.79, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.831}}
|
| 23 |
+
{"policy_stack": "llm-only", "episode": 7, "step": 2, "reward": 0.78, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "safety_veto_threshold", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.789}}
|
checkpoints/sweeps/qwen-qwen2-5-3b-instruct/llm_bandit_failures.json
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"policy_stack": "llm+bandit",
|
| 4 |
+
"episode": 0,
|
| 5 |
+
"step": 3,
|
| 6 |
+
"reward": 0.699,
|
| 7 |
+
"final_action": {
|
| 8 |
+
"mode": "DOSE_OPT",
|
| 9 |
+
"action_type": "REDUCE_DOSE_BUCKET",
|
| 10 |
+
"target_drug": "beta_blocker_like",
|
| 11 |
+
"replacement_drug": null,
|
| 12 |
+
"dose_bucket": "LOW",
|
| 13 |
+
"taper_days": null,
|
| 14 |
+
"monitoring_plan": null,
|
| 15 |
+
"evidence_query": null,
|
| 16 |
+
"new_drug_name": null,
|
| 17 |
+
"candidate_components": [],
|
| 18 |
+
"candidate_id": "cand_03",
|
| 19 |
+
"confidence": 0.67,
|
| 20 |
+
"rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."
|
| 21 |
+
},
|
| 22 |
+
"termination_reason": "exploit_detection",
|
| 23 |
+
"failure_reasons": [
|
| 24 |
+
"repeated_action_loop"
|
| 25 |
+
],
|
| 26 |
+
"primary_reward_channels": {
|
| 27 |
+
"safety_legality": 0.667,
|
| 28 |
+
"clinical_improvement": 0.633,
|
| 29 |
+
"dosing_quality": 0.655,
|
| 30 |
+
"process_integrity": 0.742
|
| 31 |
+
}
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"policy_stack": "llm+bandit",
|
| 35 |
+
"episode": 1,
|
| 36 |
+
"step": 3,
|
| 37 |
+
"reward": 0.663,
|
| 38 |
+
"final_action": {
|
| 39 |
+
"mode": "REGIMEN_OPT",
|
| 40 |
+
"action_type": "STOP_DRUG",
|
| 41 |
+
"target_drug": "ppi_like",
|
| 42 |
+
"replacement_drug": null,
|
| 43 |
+
"dose_bucket": "NA",
|
| 44 |
+
"taper_days": null,
|
| 45 |
+
"monitoring_plan": null,
|
| 46 |
+
"evidence_query": null,
|
| 47 |
+
"new_drug_name": null,
|
| 48 |
+
"candidate_components": [],
|
| 49 |
+
"candidate_id": "cand_02",
|
| 50 |
+
"confidence": 0.58,
|
| 51 |
+
"rationale_brief": "Transformers fallback selected cand_02 via local ranker; active_model_enabled=False; active_model_available=False."
|
| 52 |
+
},
|
| 53 |
+
"termination_reason": "exploit_detection",
|
| 54 |
+
"failure_reasons": [
|
| 55 |
+
"repeated_action_loop"
|
| 56 |
+
],
|
| 57 |
+
"primary_reward_channels": {
|
| 58 |
+
"safety_legality": 0.645,
|
| 59 |
+
"clinical_improvement": 0.527,
|
| 60 |
+
"dosing_quality": 0.53,
|
| 61 |
+
"process_integrity": 0.823
|
| 62 |
+
}
|
| 63 |
+
},
|
| 64 |
+
{
|
| 65 |
+
"policy_stack": "llm+bandit",
|
| 66 |
+
"episode": 2,
|
| 67 |
+
"step": 3,
|
| 68 |
+
"reward": 0.729,
|
| 69 |
+
"final_action": {
|
| 70 |
+
"mode": "DOSE_OPT",
|
| 71 |
+
"action_type": "DOSE_HOLD",
|
| 72 |
+
"target_drug": "nsaid_like",
|
| 73 |
+
"replacement_drug": null,
|
| 74 |
+
"dose_bucket": "NA",
|
| 75 |
+
"taper_days": null,
|
| 76 |
+
"monitoring_plan": "repeat_labs_72h",
|
| 77 |
+
"evidence_query": null,
|
| 78 |
+
"new_drug_name": null,
|
| 79 |
+
"candidate_components": [],
|
| 80 |
+
"candidate_id": "cand_05",
|
| 81 |
+
"confidence": 0.72,
|
| 82 |
+
"rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."
|
| 83 |
+
},
|
| 84 |
+
"termination_reason": "exploit_detection",
|
| 85 |
+
"failure_reasons": [
|
| 86 |
+
"repeated_action_loop"
|
| 87 |
+
],
|
| 88 |
+
"primary_reward_channels": {
|
| 89 |
+
"safety_legality": 0.73,
|
| 90 |
+
"clinical_improvement": 0.633,
|
| 91 |
+
"dosing_quality": 0.655,
|
| 92 |
+
"process_integrity": 0.823
|
| 93 |
+
}
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"policy_stack": "llm+bandit",
|
| 97 |
+
"episode": 3,
|
| 98 |
+
"step": 3,
|
| 99 |
+
"reward": 0.727,
|
| 100 |
+
"final_action": {
|
| 101 |
+
"mode": "DOSE_OPT",
|
| 102 |
+
"action_type": "REDUCE_DOSE_BUCKET",
|
| 103 |
+
"target_drug": "benzodiazepine_like",
|
| 104 |
+
"replacement_drug": null,
|
| 105 |
+
"dose_bucket": "LOW",
|
| 106 |
+
"taper_days": null,
|
| 107 |
+
"monitoring_plan": null,
|
| 108 |
+
"evidence_query": null,
|
| 109 |
+
"new_drug_name": null,
|
| 110 |
+
"candidate_components": [],
|
| 111 |
+
"candidate_id": "cand_03",
|
| 112 |
+
"confidence": 0.67,
|
| 113 |
+
"rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."
|
| 114 |
+
},
|
| 115 |
+
"termination_reason": "exploit_detection",
|
| 116 |
+
"failure_reasons": [
|
| 117 |
+
"repeated_action_loop"
|
| 118 |
+
],
|
| 119 |
+
"primary_reward_channels": {
|
| 120 |
+
"safety_legality": 0.717,
|
| 121 |
+
"clinical_improvement": 0.633,
|
| 122 |
+
"dosing_quality": 0.655,
|
| 123 |
+
"process_integrity": 0.823
|
| 124 |
+
}
|
| 125 |
+
},
|
| 126 |
+
{
|
| 127 |
+
"policy_stack": "llm+bandit",
|
| 128 |
+
"episode": 4,
|
| 129 |
+
"step": 3,
|
| 130 |
+
"reward": 0.727,
|
| 131 |
+
"final_action": {
|
| 132 |
+
"mode": "DOSE_OPT",
|
| 133 |
+
"action_type": "REDUCE_DOSE_BUCKET",
|
| 134 |
+
"target_drug": "benzodiazepine_like",
|
| 135 |
+
"replacement_drug": null,
|
| 136 |
+
"dose_bucket": "LOW",
|
| 137 |
+
"taper_days": null,
|
| 138 |
+
"monitoring_plan": null,
|
| 139 |
+
"evidence_query": null,
|
| 140 |
+
"new_drug_name": null,
|
| 141 |
+
"candidate_components": [],
|
| 142 |
+
"candidate_id": "cand_03",
|
| 143 |
+
"confidence": 0.67,
|
| 144 |
+
"rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."
|
| 145 |
+
},
|
| 146 |
+
"termination_reason": "exploit_detection",
|
| 147 |
+
"failure_reasons": [
|
| 148 |
+
"repeated_action_loop"
|
| 149 |
+
],
|
| 150 |
+
"primary_reward_channels": {
|
| 151 |
+
"safety_legality": 0.717,
|
| 152 |
+
"clinical_improvement": 0.633,
|
| 153 |
+
"dosing_quality": 0.655,
|
| 154 |
+
"process_integrity": 0.823
|
| 155 |
+
}
|
| 156 |
+
},
|
| 157 |
+
{
|
| 158 |
+
"policy_stack": "llm+bandit",
|
| 159 |
+
"episode": 5,
|
| 160 |
+
"step": 3,
|
| 161 |
+
"reward": 0.713,
|
| 162 |
+
"final_action": {
|
| 163 |
+
"mode": "DOSE_OPT",
|
| 164 |
+
"action_type": "REDUCE_DOSE_BUCKET",
|
| 165 |
+
"target_drug": "opioid_like",
|
| 166 |
+
"replacement_drug": null,
|
| 167 |
+
"dose_bucket": "LOW",
|
| 168 |
+
"taper_days": null,
|
| 169 |
+
"monitoring_plan": null,
|
| 170 |
+
"evidence_query": null,
|
| 171 |
+
"new_drug_name": null,
|
| 172 |
+
"candidate_components": [],
|
| 173 |
+
"candidate_id": "cand_03",
|
| 174 |
+
"confidence": 0.67,
|
| 175 |
+
"rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."
|
| 176 |
+
},
|
| 177 |
+
"termination_reason": "exploit_detection",
|
| 178 |
+
"failure_reasons": [
|
| 179 |
+
"repeated_action_loop"
|
| 180 |
+
],
|
| 181 |
+
"primary_reward_channels": {
|
| 182 |
+
"safety_legality": 0.717,
|
| 183 |
+
"clinical_improvement": 0.603,
|
| 184 |
+
"dosing_quality": 0.655,
|
| 185 |
+
"process_integrity": 0.823
|
| 186 |
+
}
|
| 187 |
+
},
|
| 188 |
+
{
|
| 189 |
+
"policy_stack": "llm+bandit",
|
| 190 |
+
"episode": 6,
|
| 191 |
+
"step": 3,
|
| 192 |
+
"reward": 0.736,
|
| 193 |
+
"final_action": {
|
| 194 |
+
"mode": "DOSE_OPT",
|
| 195 |
+
"action_type": "REDUCE_DOSE_BUCKET",
|
| 196 |
+
"target_drug": "benzodiazepine_like",
|
| 197 |
+
"replacement_drug": null,
|
| 198 |
+
"dose_bucket": "LOW",
|
| 199 |
+
"taper_days": null,
|
| 200 |
+
"monitoring_plan": null,
|
| 201 |
+
"evidence_query": null,
|
| 202 |
+
"new_drug_name": null,
|
| 203 |
+
"candidate_components": [],
|
| 204 |
+
"candidate_id": "cand_03",
|
| 205 |
+
"confidence": 0.67,
|
| 206 |
+
"rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."
|
| 207 |
+
},
|
| 208 |
+
"termination_reason": "exploit_detection",
|
| 209 |
+
"failure_reasons": [
|
| 210 |
+
"repeated_action_loop"
|
| 211 |
+
],
|
| 212 |
+
"primary_reward_channels": {
|
| 213 |
+
"safety_legality": 0.717,
|
| 214 |
+
"clinical_improvement": 0.633,
|
| 215 |
+
"dosing_quality": 0.655,
|
| 216 |
+
"process_integrity": 0.861
|
| 217 |
+
}
|
| 218 |
+
}
|
| 219 |
+
]
|
checkpoints/sweeps/qwen-qwen2-5-3b-instruct/llm_bandit_replay.jsonl
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"policy_stack": "llm+bandit", "episode": 0, "step": 1, "reward": 0.789, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "beta_blocker_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.917, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.867}}
|
| 2 |
+
{"policy_stack": "llm+bandit", "episode": 0, "step": 2, "reward": 0.774, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "beta_blocker_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.917, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.805}}
|
| 3 |
+
{"policy_stack": "llm+bandit", "episode": 0, "step": 3, "reward": 0.699, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "beta_blocker_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.667, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.742}}
|
| 4 |
+
{"policy_stack": "llm+bandit", "episode": 1, "step": 1, "reward": 0.747, "final_action": {"mode": "REGIMEN_OPT", "action_type": "STOP_DRUG", "target_drug": "beta_blocker_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_02", "confidence": 0.58, "rationale_brief": "Transformers fallback selected cand_02 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.894, "clinical_improvement": 0.549, "dosing_quality": 0.53, "process_integrity": 0.894}}
|
| 5 |
+
{"policy_stack": "llm+bandit", "episode": 1, "step": 2, "reward": 0.738, "final_action": {"mode": "REGIMEN_OPT", "action_type": "STOP_DRUG", "target_drug": "ace_inhibitor_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_02", "confidence": 0.58, "rationale_brief": "Transformers fallback selected cand_02 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.894, "clinical_improvement": 0.549, "dosing_quality": 0.53, "process_integrity": 0.858}}
|
| 6 |
+
{"policy_stack": "llm+bandit", "episode": 1, "step": 3, "reward": 0.663, "final_action": {"mode": "REGIMEN_OPT", "action_type": "STOP_DRUG", "target_drug": "ppi_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_02", "confidence": 0.58, "rationale_brief": "Transformers fallback selected cand_02 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.645, "clinical_improvement": 0.527, "dosing_quality": 0.53, "process_integrity": 0.823}}
|
| 7 |
+
{"policy_stack": "llm+bandit", "episode": 2, "step": 1, "reward": 0.806, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "nsaid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}}
|
| 8 |
+
{"policy_stack": "llm+bandit", "episode": 2, "step": 2, "reward": 0.797, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "nsaid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.858}}
|
| 9 |
+
{"policy_stack": "llm+bandit", "episode": 2, "step": 3, "reward": 0.729, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "nsaid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.73, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.823}}
|
| 10 |
+
{"policy_stack": "llm+bandit", "episode": 3, "step": 1, "reward": 0.804, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}}
|
| 11 |
+
{"policy_stack": "llm+bandit", "episode": 3, "step": 2, "reward": 0.795, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.858}}
|
| 12 |
+
{"policy_stack": "llm+bandit", "episode": 3, "step": 3, "reward": 0.727, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.717, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.823}}
|
| 13 |
+
{"policy_stack": "llm+bandit", "episode": 4, "step": 1, "reward": 0.804, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}}
|
| 14 |
+
{"policy_stack": "llm+bandit", "episode": 4, "step": 2, "reward": 0.795, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.858}}
|
| 15 |
+
{"policy_stack": "llm+bandit", "episode": 4, "step": 3, "reward": 0.727, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.717, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.823}}
|
| 16 |
+
{"policy_stack": "llm+bandit", "episode": 5, "step": 1, "reward": 0.79, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.603, "dosing_quality": 0.655, "process_integrity": 0.894}}
|
| 17 |
+
{"policy_stack": "llm+bandit", "episode": 5, "step": 2, "reward": 0.782, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.603, "dosing_quality": 0.655, "process_integrity": 0.858}}
|
| 18 |
+
{"policy_stack": "llm+bandit", "episode": 5, "step": 3, "reward": 0.713, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.717, "clinical_improvement": 0.603, "dosing_quality": 0.655, "process_integrity": 0.823}}
|
| 19 |
+
{"policy_stack": "llm+bandit", "episode": 6, "step": 1, "reward": 0.807, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.907}}
|
| 20 |
+
{"policy_stack": "llm+bandit", "episode": 6, "step": 2, "reward": 0.801, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.884}}
|
| 21 |
+
{"policy_stack": "llm+bandit", "episode": 6, "step": 3, "reward": 0.736, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.717, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.861}}
|
| 22 |
+
{"policy_stack": "llm+bandit", "episode": 7, "step": 1, "reward": 0.788, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.831}}
|
| 23 |
+
{"policy_stack": "llm+bandit", "episode": 7, "step": 2, "reward": 0.778, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "safety_veto_threshold", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.789}}
|