adithya9903 commited on
Commit
b14e428
·
verified ·
1 Parent(s): 9f2d200

Upload PolyGuard artifact folder: qwen-qwen2-5-3b-instruct checkpoints after policy_ablation

Browse files
checkpoints/sweeps/qwen-qwen2-5-3b-instruct/bandit-only_failures.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "policy_stack": "bandit-only",
4
+ "episode": 4,
5
+ "step": 3,
6
+ "reward": 0.729,
7
+ "final_action": {
8
+ "mode": "DOSE_OPT",
9
+ "action_type": "DOSE_HOLD",
10
+ "target_drug": "nsaid_like",
11
+ "replacement_drug": null,
12
+ "dose_bucket": "NA",
13
+ "taper_days": null,
14
+ "monitoring_plan": "repeat_labs_72h",
15
+ "evidence_query": null,
16
+ "new_drug_name": null,
17
+ "candidate_components": [],
18
+ "candidate_id": "cand_05",
19
+ "confidence": 0.72,
20
+ "rationale_brief": "Bandit-only policy selected top contextual candidate."
21
+ },
22
+ "termination_reason": "exploit_detection",
23
+ "failure_reasons": [
24
+ "repeated_action_loop"
25
+ ],
26
+ "primary_reward_channels": {
27
+ "safety_legality": 0.73,
28
+ "clinical_improvement": 0.633,
29
+ "dosing_quality": 0.655,
30
+ "process_integrity": 0.823
31
+ }
32
+ },
33
+ {
34
+ "policy_stack": "bandit-only",
35
+ "episode": 6,
36
+ "step": 3,
37
+ "reward": 0.739,
38
+ "final_action": {
39
+ "mode": "DOSE_OPT",
40
+ "action_type": "ORDER_MONITORING_AND_WAIT",
41
+ "target_drug": null,
42
+ "replacement_drug": null,
43
+ "dose_bucket": "NA",
44
+ "taper_days": null,
45
+ "monitoring_plan": "vitals_labs_7d",
46
+ "evidence_query": null,
47
+ "new_drug_name": null,
48
+ "candidate_components": [],
49
+ "candidate_id": "cand_08",
50
+ "confidence": 0.74,
51
+ "rationale_brief": "Bandit-only policy selected top contextual candidate."
52
+ },
53
+ "termination_reason": "exploit_detection",
54
+ "failure_reasons": [
55
+ "repeated_action_loop"
56
+ ],
57
+ "primary_reward_channels": {
58
+ "safety_legality": 0.735,
59
+ "clinical_improvement": 0.633,
60
+ "dosing_quality": 0.655,
61
+ "process_integrity": 0.861
62
+ }
63
+ }
64
+ ]
checkpoints/sweeps/qwen-qwen2-5-3b-instruct/bandit-only_replay.jsonl ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"policy_stack": "bandit-only", "episode": 0, "step": 1, "reward": 0.792, "final_action": {"mode": "DOSE_OPT", "action_type": "ORDER_MONITORING_AND_WAIT", "target_drug": null, "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "vitals_labs_7d", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_08", "confidence": 0.74, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.934, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.867}}
2
+ {"policy_stack": "bandit-only", "episode": 0, "step": 2, "reward": 0.774, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.917, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.805}}
3
+ {"policy_stack": "bandit-only", "episode": 0, "step": 3, "reward": 0.762, "final_action": {"mode": "DOSE_OPT", "action_type": "ORDER_MONITORING_AND_WAIT", "target_drug": null, "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "vitals_labs_7d", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_08", "confidence": 0.74, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "step_budget_exhausted", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.934, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.742}}
4
+ {"policy_stack": "bandit-only", "episode": 1, "step": 1, "reward": 0.796, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "statin_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.917, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}}
5
+ {"policy_stack": "bandit-only", "episode": 1, "step": 2, "reward": 0.787, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "statin_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.917, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.858}}
6
+ {"policy_stack": "bandit-only", "episode": 1, "step": 3, "reward": 0.781, "final_action": {"mode": "DOSE_OPT", "action_type": "ORDER_MONITORING_AND_WAIT", "target_drug": null, "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "vitals_labs_7d", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_08", "confidence": 0.74, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.934, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.823}}
7
+ {"policy_stack": "bandit-only", "episode": 1, "step": 4, "reward": 0.77, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "statin_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.917, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.787}}
8
+ {"policy_stack": "bandit-only", "episode": 1, "step": 5, "reward": 0.764, "final_action": {"mode": "DOSE_OPT", "action_type": "ORDER_MONITORING_AND_WAIT", "target_drug": null, "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "vitals_labs_7d", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_08", "confidence": 0.74, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.934, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.751}}
9
+ {"policy_stack": "bandit-only", "episode": 1, "step": 6, "reward": 0.756, "final_action": {"mode": "DOSE_OPT", "action_type": "ORDER_MONITORING_AND_WAIT", "target_drug": null, "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "vitals_labs_7d", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_08", "confidence": 0.74, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "step_budget_exhausted", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.934, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.716}}
10
+ {"policy_stack": "bandit-only", "episode": 2, "step": 1, "reward": 0.806, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "nsaid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}}
11
+ {"policy_stack": "bandit-only", "episode": 2, "step": 2, "reward": 0.797, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "nsaid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.858}}
12
+ {"policy_stack": "bandit-only", "episode": 2, "step": 3, "reward": 0.787, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "safety_veto_threshold", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.823}}
13
+ {"policy_stack": "bandit-only", "episode": 3, "step": 1, "reward": 0.804, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "statin_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}}
14
+ {"policy_stack": "bandit-only", "episode": 3, "step": 2, "reward": 0.798, "final_action": {"mode": "DOSE_OPT", "action_type": "ORDER_MONITORING_AND_WAIT", "target_drug": null, "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "vitals_labs_7d", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_08", "confidence": 0.74, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.984, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.858}}
15
+ {"policy_stack": "bandit-only", "episode": 3, "step": 3, "reward": 0.787, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "statin_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.823}}
16
+ {"policy_stack": "bandit-only", "episode": 3, "step": 4, "reward": 0.781, "final_action": {"mode": "DOSE_OPT", "action_type": "ORDER_MONITORING_AND_WAIT", "target_drug": null, "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "vitals_labs_7d", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_08", "confidence": 0.74, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.984, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.787}}
17
+ {"policy_stack": "bandit-only", "episode": 3, "step": 5, "reward": 0.769, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "statin_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.751}}
18
+ {"policy_stack": "bandit-only", "episode": 3, "step": 6, "reward": 0.764, "final_action": {"mode": "DOSE_OPT", "action_type": "ORDER_MONITORING_AND_WAIT", "target_drug": null, "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "vitals_labs_7d", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_08", "confidence": 0.74, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "step_budget_exhausted", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.984, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.716}}
19
+ {"policy_stack": "bandit-only", "episode": 4, "step": 1, "reward": 0.806, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "nsaid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}}
20
+ {"policy_stack": "bandit-only", "episode": 4, "step": 2, "reward": 0.797, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "nsaid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.858}}
21
+ {"policy_stack": "bandit-only", "episode": 4, "step": 3, "reward": 0.729, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "nsaid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.73, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.823}}
22
+ {"policy_stack": "bandit-only", "episode": 5, "step": 1, "reward": 0.79, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.603, "dosing_quality": 0.655, "process_integrity": 0.894}}
23
+ {"policy_stack": "bandit-only", "episode": 5, "step": 2, "reward": 0.782, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.603, "dosing_quality": 0.655, "process_integrity": 0.858}}
24
+ {"policy_stack": "bandit-only", "episode": 5, "step": 3, "reward": 0.776, "final_action": {"mode": "DOSE_OPT", "action_type": "ORDER_MONITORING_AND_WAIT", "target_drug": null, "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "vitals_labs_7d", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_08", "confidence": 0.74, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.984, "clinical_improvement": 0.603, "dosing_quality": 0.655, "process_integrity": 0.823}}
25
+ {"policy_stack": "bandit-only", "episode": 5, "step": 4, "reward": 0.764, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.603, "dosing_quality": 0.655, "process_integrity": 0.787}}
26
+ {"policy_stack": "bandit-only", "episode": 5, "step": 5, "reward": 0.759, "final_action": {"mode": "DOSE_OPT", "action_type": "ORDER_MONITORING_AND_WAIT", "target_drug": null, "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "vitals_labs_7d", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_08", "confidence": 0.74, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.984, "clinical_improvement": 0.603, "dosing_quality": 0.655, "process_integrity": 0.751}}
27
+ {"policy_stack": "bandit-only", "episode": 5, "step": 6, "reward": 0.747, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "step_budget_exhausted", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.603, "dosing_quality": 0.655, "process_integrity": 0.716}}
28
+ {"policy_stack": "bandit-only", "episode": 6, "step": 1, "reward": 0.81, "final_action": {"mode": "DOSE_OPT", "action_type": "ORDER_MONITORING_AND_WAIT", "target_drug": null, "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "vitals_labs_7d", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_08", "confidence": 0.74, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.984, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.907}}
29
+ {"policy_stack": "bandit-only", "episode": 6, "step": 2, "reward": 0.804, "final_action": {"mode": "DOSE_OPT", "action_type": "ORDER_MONITORING_AND_WAIT", "target_drug": null, "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "vitals_labs_7d", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_08", "confidence": 0.74, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.984, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.884}}
30
+ {"policy_stack": "bandit-only", "episode": 6, "step": 3, "reward": 0.739, "final_action": {"mode": "DOSE_OPT", "action_type": "ORDER_MONITORING_AND_WAIT", "target_drug": null, "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "vitals_labs_7d", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_08", "confidence": 0.74, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.735, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.861}}
31
+ {"policy_stack": "bandit-only", "episode": 7, "step": 1, "reward": 0.79, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.831}}
32
+ {"policy_stack": "bandit-only", "episode": 7, "step": 2, "reward": 0.78, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Bandit-only policy selected top contextual candidate."}, "termination_reason": "safety_veto_threshold", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.789}}
checkpoints/sweeps/qwen-qwen2-5-3b-instruct/llm-only_failures.json ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "policy_stack": "llm-only",
4
+ "episode": 0,
5
+ "step": 3,
6
+ "reward": 0.699,
7
+ "final_action": {
8
+ "mode": "DOSE_OPT",
9
+ "action_type": "REDUCE_DOSE_BUCKET",
10
+ "target_drug": "benzodiazepine_like",
11
+ "replacement_drug": null,
12
+ "dose_bucket": "LOW",
13
+ "taper_days": null,
14
+ "monitoring_plan": null,
15
+ "evidence_query": null,
16
+ "new_drug_name": null,
17
+ "candidate_components": [],
18
+ "candidate_id": "cand_03",
19
+ "confidence": 0.67,
20
+ "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."
21
+ },
22
+ "termination_reason": "exploit_detection",
23
+ "failure_reasons": [
24
+ "repeated_action_loop"
25
+ ],
26
+ "primary_reward_channels": {
27
+ "safety_legality": 0.667,
28
+ "clinical_improvement": 0.633,
29
+ "dosing_quality": 0.655,
30
+ "process_integrity": 0.742
31
+ }
32
+ },
33
+ {
34
+ "policy_stack": "llm-only",
35
+ "episode": 1,
36
+ "step": 3,
37
+ "reward": 0.719,
38
+ "final_action": {
39
+ "mode": "DOSE_OPT",
40
+ "action_type": "REDUCE_DOSE_BUCKET",
41
+ "target_drug": "opioid_like",
42
+ "replacement_drug": null,
43
+ "dose_bucket": "LOW",
44
+ "taper_days": null,
45
+ "monitoring_plan": null,
46
+ "evidence_query": null,
47
+ "new_drug_name": null,
48
+ "candidate_components": [],
49
+ "candidate_id": "cand_03",
50
+ "confidence": 0.67,
51
+ "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."
52
+ },
53
+ "termination_reason": "exploit_detection",
54
+ "failure_reasons": [
55
+ "repeated_action_loop"
56
+ ],
57
+ "primary_reward_channels": {
58
+ "safety_legality": 0.667,
59
+ "clinical_improvement": 0.633,
60
+ "dosing_quality": 0.655,
61
+ "process_integrity": 0.823
62
+ }
63
+ },
64
+ {
65
+ "policy_stack": "llm-only",
66
+ "episode": 2,
67
+ "step": 3,
68
+ "reward": 0.727,
69
+ "final_action": {
70
+ "mode": "DOSE_OPT",
71
+ "action_type": "REDUCE_DOSE_BUCKET",
72
+ "target_drug": "warfarin_like",
73
+ "replacement_drug": null,
74
+ "dose_bucket": "LOW",
75
+ "taper_days": null,
76
+ "monitoring_plan": null,
77
+ "evidence_query": null,
78
+ "new_drug_name": null,
79
+ "candidate_components": [],
80
+ "candidate_id": "cand_03",
81
+ "confidence": 0.67,
82
+ "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."
83
+ },
84
+ "termination_reason": "exploit_detection",
85
+ "failure_reasons": [
86
+ "repeated_action_loop"
87
+ ],
88
+ "primary_reward_channels": {
89
+ "safety_legality": 0.717,
90
+ "clinical_improvement": 0.633,
91
+ "dosing_quality": 0.655,
92
+ "process_integrity": 0.823
93
+ }
94
+ },
95
+ {
96
+ "policy_stack": "llm-only",
97
+ "episode": 3,
98
+ "step": 3,
99
+ "reward": 0.729,
100
+ "final_action": {
101
+ "mode": "DOSE_OPT",
102
+ "action_type": "DOSE_HOLD",
103
+ "target_drug": "nsaid_like",
104
+ "replacement_drug": null,
105
+ "dose_bucket": "NA",
106
+ "taper_days": null,
107
+ "monitoring_plan": "repeat_labs_72h",
108
+ "evidence_query": null,
109
+ "new_drug_name": null,
110
+ "candidate_components": [],
111
+ "candidate_id": "cand_05",
112
+ "confidence": 0.72,
113
+ "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."
114
+ },
115
+ "termination_reason": "exploit_detection",
116
+ "failure_reasons": [
117
+ "repeated_action_loop"
118
+ ],
119
+ "primary_reward_channels": {
120
+ "safety_legality": 0.73,
121
+ "clinical_improvement": 0.633,
122
+ "dosing_quality": 0.655,
123
+ "process_integrity": 0.823
124
+ }
125
+ },
126
+ {
127
+ "policy_stack": "llm-only",
128
+ "episode": 4,
129
+ "step": 3,
130
+ "reward": 0.729,
131
+ "final_action": {
132
+ "mode": "DOSE_OPT",
133
+ "action_type": "DOSE_HOLD",
134
+ "target_drug": "benzodiazepine_like",
135
+ "replacement_drug": null,
136
+ "dose_bucket": "NA",
137
+ "taper_days": null,
138
+ "monitoring_plan": "repeat_labs_72h",
139
+ "evidence_query": null,
140
+ "new_drug_name": null,
141
+ "candidate_components": [],
142
+ "candidate_id": "cand_05",
143
+ "confidence": 0.72,
144
+ "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."
145
+ },
146
+ "termination_reason": "exploit_detection",
147
+ "failure_reasons": [
148
+ "repeated_action_loop"
149
+ ],
150
+ "primary_reward_channels": {
151
+ "safety_legality": 0.73,
152
+ "clinical_improvement": 0.633,
153
+ "dosing_quality": 0.655,
154
+ "process_integrity": 0.823
155
+ }
156
+ },
157
+ {
158
+ "policy_stack": "llm-only",
159
+ "episode": 5,
160
+ "step": 3,
161
+ "reward": 0.715,
162
+ "final_action": {
163
+ "mode": "DOSE_OPT",
164
+ "action_type": "DOSE_HOLD",
165
+ "target_drug": "nsaid_like",
166
+ "replacement_drug": null,
167
+ "dose_bucket": "NA",
168
+ "taper_days": null,
169
+ "monitoring_plan": "repeat_labs_72h",
170
+ "evidence_query": null,
171
+ "new_drug_name": null,
172
+ "candidate_components": [],
173
+ "candidate_id": "cand_05",
174
+ "confidence": 0.72,
175
+ "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."
176
+ },
177
+ "termination_reason": "exploit_detection",
178
+ "failure_reasons": [
179
+ "repeated_action_loop"
180
+ ],
181
+ "primary_reward_channels": {
182
+ "safety_legality": 0.73,
183
+ "clinical_improvement": 0.603,
184
+ "dosing_quality": 0.655,
185
+ "process_integrity": 0.823
186
+ }
187
+ },
188
+ {
189
+ "policy_stack": "llm-only",
190
+ "episode": 6,
191
+ "step": 3,
192
+ "reward": 0.738,
193
+ "final_action": {
194
+ "mode": "DOSE_OPT",
195
+ "action_type": "DOSE_HOLD",
196
+ "target_drug": "opioid_like",
197
+ "replacement_drug": null,
198
+ "dose_bucket": "NA",
199
+ "taper_days": null,
200
+ "monitoring_plan": "repeat_labs_72h",
201
+ "evidence_query": null,
202
+ "new_drug_name": null,
203
+ "candidate_components": [],
204
+ "candidate_id": "cand_05",
205
+ "confidence": 0.72,
206
+ "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."
207
+ },
208
+ "termination_reason": "exploit_detection",
209
+ "failure_reasons": [
210
+ "repeated_action_loop"
211
+ ],
212
+ "primary_reward_channels": {
213
+ "safety_legality": 0.73,
214
+ "clinical_improvement": 0.633,
215
+ "dosing_quality": 0.655,
216
+ "process_integrity": 0.861
217
+ }
218
+ }
219
+ ]
checkpoints/sweeps/qwen-qwen2-5-3b-instruct/llm-only_replay.jsonl ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"policy_stack": "llm-only", "episode": 0, "step": 1, "reward": 0.789, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.917, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.867}}
2
+ {"policy_stack": "llm-only", "episode": 0, "step": 2, "reward": 0.774, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.917, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.805}}
3
+ {"policy_stack": "llm-only", "episode": 0, "step": 3, "reward": 0.699, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.667, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.742}}
4
+ {"policy_stack": "llm-only", "episode": 1, "step": 1, "reward": 0.796, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.917, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}}
5
+ {"policy_stack": "llm-only", "episode": 1, "step": 2, "reward": 0.787, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.917, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.858}}
6
+ {"policy_stack": "llm-only", "episode": 1, "step": 3, "reward": 0.719, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.667, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.823}}
7
+ {"policy_stack": "llm-only", "episode": 2, "step": 1, "reward": 0.804, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "warfarin_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}}
8
+ {"policy_stack": "llm-only", "episode": 2, "step": 2, "reward": 0.795, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "warfarin_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.858}}
9
+ {"policy_stack": "llm-only", "episode": 2, "step": 3, "reward": 0.727, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "warfarin_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.717, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.823}}
10
+ {"policy_stack": "llm-only", "episode": 3, "step": 1, "reward": 0.806, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "nsaid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}}
11
+ {"policy_stack": "llm-only", "episode": 3, "step": 2, "reward": 0.797, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "nsaid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.858}}
12
+ {"policy_stack": "llm-only", "episode": 3, "step": 3, "reward": 0.729, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "nsaid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.73, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.823}}
13
+ {"policy_stack": "llm-only", "episode": 4, "step": 1, "reward": 0.806, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}}
14
+ {"policy_stack": "llm-only", "episode": 4, "step": 2, "reward": 0.797, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.858}}
15
+ {"policy_stack": "llm-only", "episode": 4, "step": 3, "reward": 0.729, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.73, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.823}}
16
+ {"policy_stack": "llm-only", "episode": 5, "step": 1, "reward": 0.792, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "nsaid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.603, "dosing_quality": 0.655, "process_integrity": 0.894}}
17
+ {"policy_stack": "llm-only", "episode": 5, "step": 2, "reward": 0.784, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "nsaid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.603, "dosing_quality": 0.655, "process_integrity": 0.858}}
18
+ {"policy_stack": "llm-only", "episode": 5, "step": 3, "reward": 0.715, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "nsaid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.73, "clinical_improvement": 0.603, "dosing_quality": 0.655, "process_integrity": 0.823}}
19
+ {"policy_stack": "llm-only", "episode": 6, "step": 1, "reward": 0.809, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.907}}
20
+ {"policy_stack": "llm-only", "episode": 6, "step": 2, "reward": 0.803, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.884}}
21
+ {"policy_stack": "llm-only", "episode": 6, "step": 3, "reward": 0.738, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.73, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.861}}
22
+ {"policy_stack": "llm-only", "episode": 7, "step": 1, "reward": 0.79, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.831}}
23
+ {"policy_stack": "llm-only", "episode": 7, "step": 2, "reward": 0.78, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "safety_veto_threshold", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.789}}
checkpoints/sweeps/qwen-qwen2-5-3b-instruct/llm_bandit_failures.json ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "policy_stack": "llm+bandit",
4
+ "episode": 0,
5
+ "step": 3,
6
+ "reward": 0.699,
7
+ "final_action": {
8
+ "mode": "DOSE_OPT",
9
+ "action_type": "REDUCE_DOSE_BUCKET",
10
+ "target_drug": "beta_blocker_like",
11
+ "replacement_drug": null,
12
+ "dose_bucket": "LOW",
13
+ "taper_days": null,
14
+ "monitoring_plan": null,
15
+ "evidence_query": null,
16
+ "new_drug_name": null,
17
+ "candidate_components": [],
18
+ "candidate_id": "cand_03",
19
+ "confidence": 0.67,
20
+ "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."
21
+ },
22
+ "termination_reason": "exploit_detection",
23
+ "failure_reasons": [
24
+ "repeated_action_loop"
25
+ ],
26
+ "primary_reward_channels": {
27
+ "safety_legality": 0.667,
28
+ "clinical_improvement": 0.633,
29
+ "dosing_quality": 0.655,
30
+ "process_integrity": 0.742
31
+ }
32
+ },
33
+ {
34
+ "policy_stack": "llm+bandit",
35
+ "episode": 1,
36
+ "step": 3,
37
+ "reward": 0.663,
38
+ "final_action": {
39
+ "mode": "REGIMEN_OPT",
40
+ "action_type": "STOP_DRUG",
41
+ "target_drug": "ppi_like",
42
+ "replacement_drug": null,
43
+ "dose_bucket": "NA",
44
+ "taper_days": null,
45
+ "monitoring_plan": null,
46
+ "evidence_query": null,
47
+ "new_drug_name": null,
48
+ "candidate_components": [],
49
+ "candidate_id": "cand_02",
50
+ "confidence": 0.58,
51
+ "rationale_brief": "Transformers fallback selected cand_02 via local ranker; active_model_enabled=False; active_model_available=False."
52
+ },
53
+ "termination_reason": "exploit_detection",
54
+ "failure_reasons": [
55
+ "repeated_action_loop"
56
+ ],
57
+ "primary_reward_channels": {
58
+ "safety_legality": 0.645,
59
+ "clinical_improvement": 0.527,
60
+ "dosing_quality": 0.53,
61
+ "process_integrity": 0.823
62
+ }
63
+ },
64
+ {
65
+ "policy_stack": "llm+bandit",
66
+ "episode": 2,
67
+ "step": 3,
68
+ "reward": 0.729,
69
+ "final_action": {
70
+ "mode": "DOSE_OPT",
71
+ "action_type": "DOSE_HOLD",
72
+ "target_drug": "nsaid_like",
73
+ "replacement_drug": null,
74
+ "dose_bucket": "NA",
75
+ "taper_days": null,
76
+ "monitoring_plan": "repeat_labs_72h",
77
+ "evidence_query": null,
78
+ "new_drug_name": null,
79
+ "candidate_components": [],
80
+ "candidate_id": "cand_05",
81
+ "confidence": 0.72,
82
+ "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."
83
+ },
84
+ "termination_reason": "exploit_detection",
85
+ "failure_reasons": [
86
+ "repeated_action_loop"
87
+ ],
88
+ "primary_reward_channels": {
89
+ "safety_legality": 0.73,
90
+ "clinical_improvement": 0.633,
91
+ "dosing_quality": 0.655,
92
+ "process_integrity": 0.823
93
+ }
94
+ },
95
+ {
96
+ "policy_stack": "llm+bandit",
97
+ "episode": 3,
98
+ "step": 3,
99
+ "reward": 0.727,
100
+ "final_action": {
101
+ "mode": "DOSE_OPT",
102
+ "action_type": "REDUCE_DOSE_BUCKET",
103
+ "target_drug": "benzodiazepine_like",
104
+ "replacement_drug": null,
105
+ "dose_bucket": "LOW",
106
+ "taper_days": null,
107
+ "monitoring_plan": null,
108
+ "evidence_query": null,
109
+ "new_drug_name": null,
110
+ "candidate_components": [],
111
+ "candidate_id": "cand_03",
112
+ "confidence": 0.67,
113
+ "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."
114
+ },
115
+ "termination_reason": "exploit_detection",
116
+ "failure_reasons": [
117
+ "repeated_action_loop"
118
+ ],
119
+ "primary_reward_channels": {
120
+ "safety_legality": 0.717,
121
+ "clinical_improvement": 0.633,
122
+ "dosing_quality": 0.655,
123
+ "process_integrity": 0.823
124
+ }
125
+ },
126
+ {
127
+ "policy_stack": "llm+bandit",
128
+ "episode": 4,
129
+ "step": 3,
130
+ "reward": 0.727,
131
+ "final_action": {
132
+ "mode": "DOSE_OPT",
133
+ "action_type": "REDUCE_DOSE_BUCKET",
134
+ "target_drug": "benzodiazepine_like",
135
+ "replacement_drug": null,
136
+ "dose_bucket": "LOW",
137
+ "taper_days": null,
138
+ "monitoring_plan": null,
139
+ "evidence_query": null,
140
+ "new_drug_name": null,
141
+ "candidate_components": [],
142
+ "candidate_id": "cand_03",
143
+ "confidence": 0.67,
144
+ "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."
145
+ },
146
+ "termination_reason": "exploit_detection",
147
+ "failure_reasons": [
148
+ "repeated_action_loop"
149
+ ],
150
+ "primary_reward_channels": {
151
+ "safety_legality": 0.717,
152
+ "clinical_improvement": 0.633,
153
+ "dosing_quality": 0.655,
154
+ "process_integrity": 0.823
155
+ }
156
+ },
157
+ {
158
+ "policy_stack": "llm+bandit",
159
+ "episode": 5,
160
+ "step": 3,
161
+ "reward": 0.713,
162
+ "final_action": {
163
+ "mode": "DOSE_OPT",
164
+ "action_type": "REDUCE_DOSE_BUCKET",
165
+ "target_drug": "opioid_like",
166
+ "replacement_drug": null,
167
+ "dose_bucket": "LOW",
168
+ "taper_days": null,
169
+ "monitoring_plan": null,
170
+ "evidence_query": null,
171
+ "new_drug_name": null,
172
+ "candidate_components": [],
173
+ "candidate_id": "cand_03",
174
+ "confidence": 0.67,
175
+ "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."
176
+ },
177
+ "termination_reason": "exploit_detection",
178
+ "failure_reasons": [
179
+ "repeated_action_loop"
180
+ ],
181
+ "primary_reward_channels": {
182
+ "safety_legality": 0.717,
183
+ "clinical_improvement": 0.603,
184
+ "dosing_quality": 0.655,
185
+ "process_integrity": 0.823
186
+ }
187
+ },
188
+ {
189
+ "policy_stack": "llm+bandit",
190
+ "episode": 6,
191
+ "step": 3,
192
+ "reward": 0.736,
193
+ "final_action": {
194
+ "mode": "DOSE_OPT",
195
+ "action_type": "REDUCE_DOSE_BUCKET",
196
+ "target_drug": "benzodiazepine_like",
197
+ "replacement_drug": null,
198
+ "dose_bucket": "LOW",
199
+ "taper_days": null,
200
+ "monitoring_plan": null,
201
+ "evidence_query": null,
202
+ "new_drug_name": null,
203
+ "candidate_components": [],
204
+ "candidate_id": "cand_03",
205
+ "confidence": 0.67,
206
+ "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."
207
+ },
208
+ "termination_reason": "exploit_detection",
209
+ "failure_reasons": [
210
+ "repeated_action_loop"
211
+ ],
212
+ "primary_reward_channels": {
213
+ "safety_legality": 0.717,
214
+ "clinical_improvement": 0.633,
215
+ "dosing_quality": 0.655,
216
+ "process_integrity": 0.861
217
+ }
218
+ }
219
+ ]
checkpoints/sweeps/qwen-qwen2-5-3b-instruct/llm_bandit_replay.jsonl ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"policy_stack": "llm+bandit", "episode": 0, "step": 1, "reward": 0.789, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "beta_blocker_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.917, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.867}}
2
+ {"policy_stack": "llm+bandit", "episode": 0, "step": 2, "reward": 0.774, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "beta_blocker_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.917, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.805}}
3
+ {"policy_stack": "llm+bandit", "episode": 0, "step": 3, "reward": 0.699, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "beta_blocker_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.667, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.742}}
4
+ {"policy_stack": "llm+bandit", "episode": 1, "step": 1, "reward": 0.747, "final_action": {"mode": "REGIMEN_OPT", "action_type": "STOP_DRUG", "target_drug": "beta_blocker_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_02", "confidence": 0.58, "rationale_brief": "Transformers fallback selected cand_02 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.894, "clinical_improvement": 0.549, "dosing_quality": 0.53, "process_integrity": 0.894}}
5
+ {"policy_stack": "llm+bandit", "episode": 1, "step": 2, "reward": 0.738, "final_action": {"mode": "REGIMEN_OPT", "action_type": "STOP_DRUG", "target_drug": "ace_inhibitor_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_02", "confidence": 0.58, "rationale_brief": "Transformers fallback selected cand_02 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.894, "clinical_improvement": 0.549, "dosing_quality": 0.53, "process_integrity": 0.858}}
6
+ {"policy_stack": "llm+bandit", "episode": 1, "step": 3, "reward": 0.663, "final_action": {"mode": "REGIMEN_OPT", "action_type": "STOP_DRUG", "target_drug": "ppi_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_02", "confidence": 0.58, "rationale_brief": "Transformers fallback selected cand_02 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.645, "clinical_improvement": 0.527, "dosing_quality": 0.53, "process_integrity": 0.823}}
7
+ {"policy_stack": "llm+bandit", "episode": 2, "step": 1, "reward": 0.806, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "nsaid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}}
8
+ {"policy_stack": "llm+bandit", "episode": 2, "step": 2, "reward": 0.797, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "nsaid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.858}}
9
+ {"policy_stack": "llm+bandit", "episode": 2, "step": 3, "reward": 0.729, "final_action": {"mode": "DOSE_OPT", "action_type": "DOSE_HOLD", "target_drug": "nsaid_like", "replacement_drug": null, "dose_bucket": "NA", "taper_days": null, "monitoring_plan": "repeat_labs_72h", "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_05", "confidence": 0.72, "rationale_brief": "Transformers fallback selected cand_05 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.73, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.823}}
10
+ {"policy_stack": "llm+bandit", "episode": 3, "step": 1, "reward": 0.804, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}}
11
+ {"policy_stack": "llm+bandit", "episode": 3, "step": 2, "reward": 0.795, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.858}}
12
+ {"policy_stack": "llm+bandit", "episode": 3, "step": 3, "reward": 0.727, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.717, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.823}}
13
+ {"policy_stack": "llm+bandit", "episode": 4, "step": 1, "reward": 0.804, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}}
14
+ {"policy_stack": "llm+bandit", "episode": 4, "step": 2, "reward": 0.795, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.858}}
15
+ {"policy_stack": "llm+bandit", "episode": 4, "step": 3, "reward": 0.727, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.717, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.823}}
16
+ {"policy_stack": "llm+bandit", "episode": 5, "step": 1, "reward": 0.79, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.603, "dosing_quality": 0.655, "process_integrity": 0.894}}
17
+ {"policy_stack": "llm+bandit", "episode": 5, "step": 2, "reward": 0.782, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.603, "dosing_quality": 0.655, "process_integrity": 0.858}}
18
+ {"policy_stack": "llm+bandit", "episode": 5, "step": 3, "reward": 0.713, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.717, "clinical_improvement": 0.603, "dosing_quality": 0.655, "process_integrity": 0.823}}
19
+ {"policy_stack": "llm+bandit", "episode": 6, "step": 1, "reward": 0.807, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.907}}
20
+ {"policy_stack": "llm+bandit", "episode": 6, "step": 2, "reward": 0.801, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.884}}
21
+ {"policy_stack": "llm+bandit", "episode": 6, "step": 3, "reward": 0.736, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "benzodiazepine_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "exploit_detection", "failure_reasons": ["repeated_action_loop"], "primary_reward_channels": {"safety_legality": 0.717, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.861}}
22
+ {"policy_stack": "llm+bandit", "episode": 7, "step": 1, "reward": 0.788, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "ongoing", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.831}}
23
+ {"policy_stack": "llm+bandit", "episode": 7, "step": 2, "reward": 0.778, "final_action": {"mode": "DOSE_OPT", "action_type": "REDUCE_DOSE_BUCKET", "target_drug": "opioid_like", "replacement_drug": null, "dose_bucket": "LOW", "taper_days": null, "monitoring_plan": null, "evidence_query": null, "new_drug_name": null, "candidate_components": [], "candidate_id": "cand_03", "confidence": 0.67, "rationale_brief": "Transformers fallback selected cand_03 via local ranker; active_model_enabled=False; active_model_available=False."}, "termination_reason": "safety_veto_threshold", "failure_reasons": [], "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.789}}