File size: 3,268 Bytes
21c7db9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
{
  "no_change": {
    "mode": "REGIMEN_OPT",
    "action_type": "KEEP_REGIMEN",
    "target_drug": null,
    "replacement_drug": null,
    "dose_bucket": "NA",
    "taper_days": null,
    "monitoring_plan": null,
    "evidence_query": null,
    "new_drug_name": null,
    "candidate_components": [],
    "candidate_id": "cand_01",
    "confidence": 0.8,
    "rationale_brief": "Baseline no-change policy."
  },
  "rules_only": {
    "mode": "REGIMEN_OPT",
    "action_type": "SUBSTITUTE_WITHIN_CLASS",
    "target_drug": "opioid_like",
    "replacement_drug": "non_opioid_analgesic",
    "dose_bucket": "NA",
    "taper_days": null,
    "monitoring_plan": null,
    "evidence_query": null,
    "new_drug_name": null,
    "candidate_components": [],
    "candidate_id": "cand_04",
    "confidence": 0.75,
    "rationale_brief": "Rules-only selected top legal candidate."
  },
  "greedy": {
    "mode": "REGIMEN_OPT",
    "action_type": "SUBSTITUTE_WITHIN_CLASS",
    "target_drug": "opioid_like",
    "replacement_drug": "non_opioid_analgesic",
    "dose_bucket": "NA",
    "taper_days": null,
    "monitoring_plan": null,
    "evidence_query": null,
    "new_drug_name": null,
    "candidate_components": [],
    "candidate_id": "cand_04",
    "confidence": 0.72,
    "rationale_brief": "Greedy safety/burden improvement baseline."
  },
  "contextual_bandit": {
    "mode": "REGIMEN_OPT",
    "action_type": "SUBSTITUTE_WITHIN_CLASS",
    "target_drug": "opioid_like",
    "replacement_drug": "non_opioid_analgesic",
    "dose_bucket": "NA",
    "taper_days": null,
    "monitoring_plan": null,
    "evidence_query": null,
    "new_drug_name": null,
    "candidate_components": [],
    "candidate_id": "cand_04",
    "confidence": 0.68,
    "rationale_brief": "Contextual bandit selected candidate."
  },
  "contextual_bandit_topk": [
    {
      "candidate_id": "cand_09",
      "score": 1.1532307878304324,
      "exploration_bonus": 1.1532307878304324,
      "algorithm": "linucb"
    },
    {
      "candidate_id": "cand_10",
      "score": 1.1489735636645433,
      "exploration_bonus": 1.1489735636645433,
      "algorithm": "linucb"
    },
    {
      "candidate_id": "cand_08",
      "score": 1.1447401451857973,
      "exploration_bonus": 1.1447401451857973,
      "algorithm": "linucb"
    }
  ],
  "beam_search": {
    "mode": "REGIMEN_OPT",
    "action_type": "SUBSTITUTE_WITHIN_CLASS",
    "target_drug": "opioid_like",
    "replacement_drug": "non_opioid_analgesic",
    "dose_bucket": "NA",
    "taper_days": null,
    "monitoring_plan": null,
    "evidence_query": null,
    "new_drug_name": null,
    "candidate_components": [],
    "candidate_id": "cand_04",
    "confidence": 0.74,
    "rationale_brief": "Beam-search(3) top candidate."
  },
  "baseline_policy": "no_change_candidate",
  "episodes": 8,
  "avg_reward": 0.747,
  "legality_rate": 1.0,
  "success_rate": 0.0,
  "policy_stack_ablations": {
    "bandit-only": {
      "avg_reward": 0.7616666666666667,
      "legality_rate": 1.0,
      "steps": 3.0
    },
    "llm-only": {
      "avg_reward": 0.7753333333333333,
      "legality_rate": 1.0,
      "steps": 3.0
    },
    "llm+bandit": {
      "avg_reward": 0.7753333333333333,
      "legality_rate": 1.0,
      "steps": 3.0
    }
  }
}