adithya9903 commited on
Commit
e543908
Β·
1 Parent(s): b451b97

fix: monorepo

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. .dockerignore +1 -1
  2. openenv-polypharmacy/.env.example β†’ .env.example +0 -0
  3. .gitignore +1 -5
  4. .gitignore copy +0 -35
  5. Dockerfile +9 -9
  6. PROMPT.md +571 -0
  7. README.MD +28 -17
  8. {openenv-polypharmacy/backend β†’ backend}/Dockerfile +0 -0
  9. {openenv-polypharmacy/backend β†’ backend}/__init__.py +0 -0
  10. {openenv-polypharmacy/backend β†’ backend}/main.py +0 -0
  11. {openenv-polypharmacy/backend β†’ backend}/requirements.txt +0 -0
  12. {openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/__init__.py +0 -0
  13. {openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/api/__init__.py +0 -0
  14. {openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/api/app.py +0 -0
  15. {openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/api/routes/__init__.py +0 -0
  16. {openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/api/routes/agent.py +0 -0
  17. {openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/api/server.py +0 -0
  18. {openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/baselines/__init__.py +0 -0
  19. {openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/baselines/heuristic_agent.py +0 -0
  20. {openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/baselines/random_agent.py +0 -0
  21. {openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/client.py +0 -0
  22. {openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/config.py +0 -0
  23. {openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/data_loader.py +0 -0
  24. {openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/ddi_simulator.py +0 -0
  25. {openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/env_core.py +0 -0
  26. {openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/graders.py +0 -0
  27. {openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/models.py +0 -0
  28. {openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/rewards.py +0 -0
  29. {openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/services/__init__.py +0 -0
  30. {openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/services/groq_agent.py +0 -0
  31. {openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/tasks.py +0 -0
  32. {openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/tests/__init__.py +0 -0
  33. {openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/tests/test_api.py +0 -0
  34. {openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/tests/test_env_core.py +0 -0
  35. {openenv-polypharmacy/data β†’ data}/lookups/beers_criteria.csv +0 -0
  36. {openenv-polypharmacy/data β†’ data}/lookups/ddi_rules.csv +0 -0
  37. {openenv-polypharmacy/data β†’ data}/lookups/drug_metadata.csv +0 -0
  38. {openenv-polypharmacy/data β†’ data}/processed/patients_polypharmacy.csv +0 -0
  39. openenv-polypharmacy/docker-compose.yml β†’ docker-compose.yml +0 -0
  40. {openenv-polypharmacy/frontend β†’ frontend}/Dockerfile +0 -0
  41. {openenv-polypharmacy/frontend β†’ frontend}/index.html +0 -0
  42. {openenv-polypharmacy/frontend β†’ frontend}/package-lock.json +0 -0
  43. {openenv-polypharmacy/frontend β†’ frontend}/package.json +0 -0
  44. {openenv-polypharmacy/frontend β†’ frontend}/src/App.jsx +0 -0
  45. {openenv-polypharmacy/frontend β†’ frontend}/src/main.jsx +0 -0
  46. {openenv-polypharmacy/frontend β†’ frontend}/src/styles.css +0 -0
  47. {openenv-polypharmacy/frontend β†’ frontend}/vite.config.js +0 -0
  48. inference.py +188 -0
  49. openenv-polypharmacy/.dockerignore +0 -8
  50. openenv-polypharmacy/Dockerfile +0 -39
.dockerignore CHANGED
@@ -9,4 +9,4 @@
9
  **/dist
10
  **/.env
11
  **/.env.*
12
- !openenv-polypharmacy/.env.example
 
9
  **/dist
10
  **/.env
11
  **/.env.*
12
+ !.env.example
openenv-polypharmacy/.env.example β†’ .env.example RENAMED
File without changes
.gitignore CHANGED
@@ -4,7 +4,7 @@ venv/
4
  env/
5
  .env
6
  .env.*
7
- !openenv-polypharmacy/.env.example
8
  *.py[cod]
9
  __pycache__/
10
  .pytest_cache/
@@ -29,7 +29,3 @@ pnpm-debug.log*
29
  *.swp
30
  .DS_Store
31
 
32
- # --- Project-specific nested paths ---
33
- openenv-polypharmacy/frontend/node_modules/
34
- openenv-polypharmacy/frontend/dist/
35
- openenv-polypharmacy/.pytest_cache/
 
4
  env/
5
  .env
6
  .env.*
7
+ !.env.example
8
  *.py[cod]
9
  __pycache__/
10
  .pytest_cache/
 
29
  *.swp
30
  .DS_Store
31
 
 
 
 
 
.gitignore copy DELETED
@@ -1,35 +0,0 @@
1
- # --- Python ---
2
- venv/
3
- .venv/
4
- env/
5
- .env
6
- .env.*
7
- !openenv-polypharmacy/.env.example
8
- *.py[cod]
9
- __pycache__/
10
- .pytest_cache/
11
- .mypy_cache/
12
- .ruff_cache/
13
- .coverage
14
- coverage.xml
15
-
16
- # --- Node / frontend ---
17
- node_modules/
18
- **/node_modules/
19
- frontend/dist/
20
- **/dist/
21
- npm-debug.log*
22
- yarn-debug.log*
23
- yarn-error.log*
24
- pnpm-debug.log*
25
-
26
- # --- Build / temp ---
27
- *.log
28
- *.tmp
29
- *.swp
30
- .DS_Store
31
-
32
- # --- Project-specific nested paths ---
33
- openenv-polypharmacy/frontend/node_modules/
34
- openenv-polypharmacy/frontend/dist/
35
- openenv-polypharmacy/.pytest_cache/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Dockerfile CHANGED
@@ -1,8 +1,8 @@
1
  FROM node:20-alpine AS frontend-builder
2
  WORKDIR /app/frontend
3
- COPY openenv-polypharmacy/frontend/package*.json ./
4
  RUN npm ci
5
- COPY openenv-polypharmacy/frontend/ ./
6
  RUN npm run build
7
 
8
  FROM python:3.11-slim
@@ -13,15 +13,15 @@ RUN apt-get update && \
13
 
14
  WORKDIR /app
15
 
16
- COPY openenv-polypharmacy/backend/requirements.txt /app/backend/requirements.txt
17
  RUN pip install --no-cache-dir -r /app/backend/requirements.txt
18
 
19
- COPY openenv-polypharmacy/backend /app/backend
20
- COPY openenv-polypharmacy/data /app/data
21
- COPY openenv-polypharmacy/scripts /app/scripts
22
- COPY openenv-polypharmacy/openenv.yaml /app/openenv.yaml
23
- COPY openenv-polypharmacy/.env.example /app/.env.example
24
- COPY openenv-polypharmacy/inference.py /app/inference.py
25
 
26
  COPY --from=frontend-builder /app/frontend/dist /app/frontend/dist
27
 
 
1
  FROM node:20-alpine AS frontend-builder
2
  WORKDIR /app/frontend
3
+ COPY frontend/package*.json ./
4
  RUN npm ci
5
+ COPY frontend/ ./
6
  RUN npm run build
7
 
8
  FROM python:3.11-slim
 
13
 
14
  WORKDIR /app
15
 
16
+ COPY backend/requirements.txt /app/backend/requirements.txt
17
  RUN pip install --no-cache-dir -r /app/backend/requirements.txt
18
 
19
+ COPY backend /app/backend
20
+ COPY data /app/data
21
+ COPY scripts /app/scripts
22
+ COPY openenv.yaml /app/openenv.yaml
23
+ COPY .env.example /app/.env.example
24
+ COPY inference.py /app/inference.py
25
 
26
  COPY --from=frontend-builder /app/frontend/dist /app/frontend/dist
27
 
PROMPT.md ADDED
@@ -0,0 +1,571 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are an expert Python backend, ML, and infrastructure engineer.
2
+ Your task is to implement a complete, production-ready OpenEnv environment called **PolypharmacyEnv** for training and evaluating agentic RL policies that act as an "elderly polypharmacy safety agent" (clinical pharmacist assistant).
3
+
4
+ The deliverable MUST satisfy all of the following:
5
+ - Fully compliant with the OpenEnv spec (typed models, `step()` / `reset()` / `state()`, `openenv.yaml`, HTTP server, Dockerfile).
6
+ - Simulates a realistic healthcare workflow around elderly polypharmacy and dangerous drug combinations.
7
+ - Defines at least **3 tasks** (easy β†’ medium β†’ hard) with deterministic agent graders producing scores in (0.0, 1.0).
8
+ - Provides shaped rewards over the trajectory (not just sparse terminal rewards).
9
+ - Includes a baseline LLM-based inference script `inference.py` in the repo root, following the evaluation requirements:
10
+ - Uses the OpenAI Python client.
11
+ - Reads `OPENAI_API_KEY`, `API_BASE_URL`, `MODEL_NAME`, and `HF_TOKEN` from the environment.
12
+ - Emits structured stdout logs in the exact `[START]`, `[STEP]`, `[END]` format from the OpenEnv sample inference script.
13
+ - Is containerized and deployable as a **Hugging Face Space** tagged with `openenv` that responds to OpenEnv-style `reset` / `step` / `state` HTTP calls.
14
+
15
+ Implement everything described below.
16
+
17
+ =================================================
18
+ 1. Repository and folder structure
19
+ =================================================
20
+
21
+ Create a Python package repository with this structure (names are important unless clearly labeled as examples):
22
+
23
+ - `openenv-polypharmacy/`
24
+ - `openenv.yaml`
25
+ - `README.md`
26
+ - `requirements.txt`
27
+ - `Dockerfile`
28
+ - `inference.py` # baseline LLM agent per spec
29
+ - `pyproject.toml` or `setup.cfg` (optional but recommended)
30
+ - `src/`
31
+ - `polypharmacy_env/`
32
+ - `__init__.py`
33
+ - `config.py`
34
+ - `models.py` # Action, Observation, State, helper models
35
+ - `env_core.py` # PolypharmacyEnv implementation
36
+ - `tasks.py` # task setup utilities
37
+ - `graders.py` # deterministic graders for each task
38
+ - `rewards.py` # reward shaping logic
39
+ - `data_loader.py` # load/preprocess patient and lookup data
40
+ - `ddi_simulator.py` # local DDI / guideline simulator
41
+ - `api/`
42
+ - `__init__.py`
43
+ - `schemas.py` # HTTP request/response schemas
44
+ - `server.py` # FastAPI app exposing OpenEnv endpoints
45
+ - `baselines/`
46
+ - `__init__.py`
47
+ - `heuristic_agent.py` # simple rule-based baseline agent
48
+ - `random_agent.py` # trivial random baseline (optional)
49
+ - `tests/`
50
+ - `__init__.py`
51
+ - `test_env_core.py`
52
+ - `test_api.py`
53
+ - `data/`
54
+ - `raw/` # placeholder for real/synthetic source data
55
+ - `processed/`
56
+ - `lookups/`
57
+ - `ddi_rules.csv`
58
+ - `beers_criteria.csv`
59
+ - `drug_metadata.csv`
60
+ - `scripts/`
61
+ - `preprocess_data.py`
62
+ - `run_validation.sh` # optional; runs OpenEnv validator, tests, etc.
63
+
64
+ Use Python 3.10+ with full type hints, and keep the code black/isort-compatible.
65
+
66
+ =================================================
67
+ 2. Domain, data, and clinical abstraction
68
+ =================================================
69
+
70
+ 2.1. Core scenario
71
+
72
+ Model an elderly patient (age β‰₯ 65) with:
73
+ - Demographics: age, sex.
74
+ - Comorbidities: e.g., hypertension, diabetes, heart failure, CKD, dementia.
75
+ - Basic labs: kidney function (eGFR category), liver function category.
76
+ - A current medication list (polypharmacy, e.g., 3–15 drugs depending on task).
77
+
78
+ Each **episode** is one medication-review session where the agent:
79
+ - Observes patient info and current meds.
80
+ - Optionally **queries** a DDI/guideline tool for specific drug pairs.
81
+ - Proposes **interventions**:
82
+ - `stop`: discontinue a drug.
83
+ - `dose_reduce`: lower dose of a drug.
84
+ - `substitute`: swap to a safer alternative.
85
+ - `add_monitoring`: keep the drug but flag extra monitoring.
86
+ - Calls `finish_review` when it decides the regimen is acceptable or budgets are exhausted.
87
+
88
+ No external PHI, EHRs, or online APIs: all data is **synthetic** or de-identified and local to the container (CSV files).
89
+
90
+ 2.2. Data files and CSV schemas
91
+
92
+ Implement local CSVs under `data/lookups/`:
93
+
94
+ **`drug_metadata.csv`**
95
+ - `drug_id` (string; unique key)
96
+ - `generic_name` (string)
97
+ - `atc_class` (string)
98
+ - `is_high_risk_elderly` (0/1)
99
+ - `default_dose_mg` (float)
100
+ - `min_dose_mg` (float)
101
+ - `max_dose_mg` (float)
102
+
103
+ **`beers_criteria.csv`**
104
+ - `drug_id` (string)
105
+ - `criterion_type` (enum string: `avoid`, `caution`, `dose_adjust`, `avoid_in_condition`)
106
+ - `condition` (nullable string; e.g., `CKD`, `dementia`)
107
+ - `rationale` (brief text)
108
+
109
+ **`ddi_rules.csv`**
110
+ - `drug_id_1` (string; normalized so `drug_id_1 < drug_id_2` lexicographically)
111
+ - `drug_id_2` (string)
112
+ - `severity` (enum string: `mild`, `moderate`, `severe`)
113
+ - `mechanism` (short text)
114
+ - `recommendation` (enum string: `avoid_combination`, `monitor_closely`, `dose_adjust`, `no_action`)
115
+ - `base_risk_score` (float in [0.0, 1.0])
116
+
117
+ Implement a synthetic patient-episode dataset under `data/processed/`:
118
+
119
+ **`patients_polypharmacy.csv`**
120
+ - `episode_id` (string)
121
+ - `age` (int)
122
+ - `sex` (enum: `M`, `F`, `O`)
123
+ - `conditions` (semicolon-separated; e.g., `HTN;DM;CKD`)
124
+ - `eGFR_category` (enum: `normal`, `mild`, `moderate`, `severe`)
125
+ - `liver_function_category` (enum: `normal`, `impaired`)
126
+ - `medication_ids` (semicolon-separated list of `drug_id`)
127
+ - `baseline_risk_score` (float in [0.0, 1.0])
128
+
129
+ 2.3. Preprocessing script
130
+
131
+ In `scripts/preprocess_data.py`:
132
+ - If real data is not provided, procedurally generate synthetic but plausible data using:
133
+ - Random combinations of conditions and drugs constrained by simple rules (e.g., CKD + renally-cleared drugs).
134
+ - Controlled distribution of high-risk DDIs and Beers violations.
135
+ - Explicitly tag episodes as easy/medium/hard (e.g., via number of drugs, number/severity of DDIs, and number of Beers issues).
136
+ - Save `patients_polypharmacy.csv` ready for the environment to consume.
137
+
138
+ =================================================
139
+ 3. OpenEnv models and environment implementation
140
+ =================================================
141
+
142
+ 3.1. Models
143
+
144
+ In `models.py`, define dataclasses or Pydantic models that extend the appropriate OpenEnv base types (`Action`, `Observation`, `State`) and are JSON-compatible.
145
+
146
+ Auxiliary models:
147
+
148
+ **`MedicationEntry`**
149
+ - `drug_id: str`
150
+ - `generic_name: str`
151
+ - `atc_class: str`
152
+ - `dose_mg: float`
153
+ - `frequency: str` # e.g., `qd`, `bid`
154
+ - `route: str` # e.g., `po`
155
+ - `is_high_risk_elderly: bool`
156
+ - `beers_flags: list[str]` # e.g., `["avoid", "dose_adjust_CKD"]`
157
+
158
+ **`InteractionQueryRecord`**
159
+ - `drug_id_1: str`
160
+ - `drug_id_2: str`
161
+ - `severity: str | None`
162
+ - `recommendation: str | None`
163
+ - `risk_score: float | None`
164
+ - `step_index: int`
165
+
166
+ **`InterventionRecord`**
167
+ - `target_drug_id: str`
168
+ - `action_type: Literal["stop", "dose_reduce", "substitute", "add_monitoring"]`
169
+ - `proposed_new_drug_id: str | None`
170
+ - `rationale: str`
171
+ - `step_index: int`
172
+
173
+ Core wire models:
174
+
175
+ **`PolypharmacyObservation`** (extends OpenEnv `Observation`)
176
+ - `episode_id: str`
177
+ - `task_id: Literal["easy_screening", "budgeted_screening", "complex_tradeoff"]`
178
+ - `age: int`
179
+ - `sex: str`
180
+ - `conditions: list[str]`
181
+ - `eGFR_category: str`
182
+ - `liver_function_category: str`
183
+ - `current_medications: list[MedicationEntry]`
184
+ - `interaction_queries: list[InteractionQueryRecord]`
185
+ - `interventions: list[InterventionRecord]`
186
+ - `step_index: int`
187
+ - `remaining_query_budget: int`
188
+ - `remaining_intervention_budget: int`
189
+ - `shaped_reward: float` # reward from last step
190
+ - `done: bool`
191
+
192
+ **`PolypharmacyAction`** (extends OpenEnv `Action`)
193
+ - `action_type: Literal["query_ddi", "propose_intervention", "finish_review"]`
194
+ - `drug_id_1: str | None` # for DDI queries or some interventions
195
+ - `drug_id_2: str | None` # for DDI queries
196
+ - `target_drug_id: str | None` # for interventions
197
+ - `intervention_type: Literal["stop", "dose_reduce", "substitute", "add_monitoring", "none"] | None`
198
+ - `proposed_new_drug_id: str | None`
199
+ - `rationale: str | None`
200
+
201
+ **`PolypharmacyState`** (extends OpenEnv `State`)
202
+ - `episode_id: str`
203
+ - `task_id: str`
204
+ - `step_count: int`
205
+ - `max_steps: int`
206
+ - `num_query_actions: int`
207
+ - `num_interventions: int`
208
+
209
+ 3.2. Environment core
210
+
211
+ In `env_core.py`, implement `PolypharmacyEnv` extending the appropriate OpenEnv environment base class. It must implement:
212
+
213
+ **`reset(task_id: str | None = None) -> PolypharmacyObservation`**
214
+ - If `task_id` is `None`, default to medium (`budgeted_screening`).
215
+ - Sample an episode from `patients_polypharmacy.csv` filtered by difficulty.
216
+ - Initialize:
217
+ - `episode_id`
218
+ - `step_count = 0`
219
+ - task-specific budgets (query, interventions, max_steps)
220
+ - baseline regime and risk
221
+ - empty `interaction_queries` and `interventions`
222
+ - Return the initial `PolypharmacyObservation` with:
223
+ - `step_index = 0`
224
+ - `shaped_reward = 0.0`
225
+ - `done = False`
226
+
227
+ **`step(action: PolypharmacyAction) -> dict`**
228
+ - Validate the action; if invalid:
229
+ - Apply a negative reward.
230
+ - Do not modify regimen, but log error in `info`.
231
+ - If `action_type == "query_ddi"`:
232
+ - If query budget exhausted, apply penalty and do not query.
233
+ - Else:
234
+ - Use `ddi_simulator.lookup_ddi(drug_id_1, drug_id_2)` to get severity, recommendation, base_risk_score.
235
+ - Append an `InteractionQueryRecord`.
236
+ - Apply a small negative reward for query cost.
237
+ - If `action_type == "propose_intervention"`:
238
+ - If intervention budget exhausted, apply penalty and ignore change.
239
+ - Else:
240
+ - Update `current_medications` according to `intervention_type`:
241
+ - `stop`: remove medication.
242
+ - `dose_reduce`: adjust dose downward within [min_dose_mg, default_dose_mg].
243
+ - `substitute`: replace with a safer alternative from same `atc_class`.
244
+ - `add_monitoring`: keep drug but tag in internal state.
245
+ - Append an `InterventionRecord`.
246
+ - Recompute current regimen risk using the risk model (see 3.3).
247
+ - Compute shaped reward = (previous_risk - new_risk) - small intervention cost.
248
+ - If `action_type == "finish_review"`:
249
+ - Mark `done = True`.
250
+ - Call the task’s grader to get episode-level score in [0.0, 1.0].
251
+ - Add this as a terminal bonus to the current step reward.
252
+
253
+ - In all cases:
254
+ - Increment `step_count`.
255
+ - Check `max_steps`; if exceeded, auto-terminate:
256
+ - `done = True`
257
+ - apply time-out penalty
258
+ - call grader with current trajectory for a final score if appropriate.
259
+ - Construct next `PolypharmacyObservation` with updated fields.
260
+ - Return a dict:
261
+ - `observation`: `PolypharmacyObservation`
262
+ - `reward`: float shaped reward for this step
263
+ - `done`: bool
264
+ - `info`: dict with fields like `current_risk`, `baseline_risk`, `grader_score_if_terminal`, and debug flags.
265
+
266
+ **`state` property**
267
+ - Returns `PolypharmacyState` reflecting the current internal state.
268
+
269
+ 3.3. DDI simulator and risk model
270
+
271
+ In `ddi_simulator.py`:
272
+ - Load `ddi_rules.csv` once via `data_loader`.
273
+ - Implement `lookup_ddi(drug_id_1, drug_id_2) -> tuple[severity, recommendation, base_risk_score]`:
274
+ - Normalize the pair ordering.
275
+ - Look up row; if missing, return:
276
+ - severity = `"none"`
277
+ - recommendation = `"no_action"`
278
+ - base_risk_score = 0.0
279
+
280
+ In `rewards.py` (or a dedicated module), implement:
281
+ - `compute_regimen_risk(current_drug_ids, patient_context, ddi_rules, beers_rules, drug_metadata) -> float`
282
+ - Aggregate contributions from:
283
+ - Beers violations (weighted by `criterion_type` and relevant conditions).
284
+ - DDI base risk scores for all present drug pairs.
285
+ - High-risk elderly drugs.
286
+ - Normalize and clip to [0.0, 1.0].
287
+
288
+ Use this function to compute:
289
+ - `baseline_risk` at episode start.
290
+ - Risk after each intervention step.
291
+
292
+ Also implement:
293
+ - `compute_shaped_reward(previous_risk, new_risk, action, context, partial_metrics) -> float`
294
+ - Positive component: `previous_risk - new_risk`.
295
+ - Negative components: per-query cost, per-intervention cost, invalid-action penalty, time-out penalty.
296
+
297
+ =================================================
298
+ 4. Tasks and graders (3 difficulty levels)
299
+ =================================================
300
+
301
+ Define three task IDs and semantics in `tasks.py` and `graders.py`:
302
+
303
+ Task IDs:
304
+ - `easy_screening`
305
+ - `budgeted_screening`
306
+ - `complex_tradeoff`
307
+
308
+ 4.1. `easy_screening` (easy)
309
+
310
+ - Small regimen: 3–5 drugs.
311
+ - Exactly one **severe** DDI pair and possibly one simple Beers violation.
312
+ - Budgets:
313
+ - query_budget β‰ˆ 4
314
+ - intervention_budget β‰ˆ 2
315
+ - max_steps β‰ˆ 10
316
+
317
+ Grader:
318
+ - Input: full trajectory, baseline risk, final risk, list of interventions.
319
+ - Compute:
320
+ - `risk_reduction = max(0.0, baseline_risk - final_risk) / max(baseline_risk, Ξ΅)` (normalized).
321
+ - `targeted_intervention_flag = 1.0` if at least one intervention affects one of the drugs in the known severe DDI pair, else 0.0.
322
+ - Score:
323
+ - `score = 0.5 * risk_reduction + 0.5 * targeted_intervention_flag`
324
+ - Clip to [0.0, 1.0].
325
+
326
+ 4.2. `budgeted_screening` (medium)
327
+
328
+ - Medium regimen: 6–10 drugs.
329
+ - Multiple DDIs (mild/moderate/severe) and multiple Beers issues.
330
+ - Budgets:
331
+ - query_budget β‰ˆ 8
332
+ - intervention_budget β‰ˆ 3
333
+ - max_steps β‰ˆ 20
334
+
335
+ Grader:
336
+ - Compute:
337
+ - `risk_reduction_score` as normalized risk drop.
338
+ - `intervention_precision_score` = fraction of interventions that actually reduce risk or fix guideline violations.
339
+ - `query_efficiency_score` = (number of severe/moderate DDIs discovered) / (number of queries used), normalized.
340
+ - Weighted score, for example:
341
+ - `score = 0.5 * risk_reduction_score + 0.3 * intervention_precision_score + 0.2 * query_efficiency_score`
342
+ - Clip to [0.0, 1.0].
343
+
344
+ 4.3. `complex_tradeoff` (hard)
345
+
346
+ - Larger regimen: 10–15 drugs.
347
+ - Some drugs are **clinically critical** (e.g., anticoagulants, insulin analogues) and encoded as such in `drug_metadata` or a small internal map.
348
+ - Episodes contain:
349
+ - multiple DDIs and Beers issues, including ones involving critical drugs.
350
+ - safer substitutes for some risky drugs.
351
+
352
+ Budgets:
353
+ - query_budget β‰ˆ 12
354
+ - intervention_budget β‰ˆ 5
355
+ - max_steps β‰ˆ 30
356
+
357
+ Grader adds a **regimen disruption penalty** component:
358
+ - Metrics:
359
+ - `risk_reduction_score` (as above).
360
+ - `critical_drug_penalty` = penalty if a critical drug is stopped without substitution to another suitable agent.
361
+ - `total_drug_changes` = number of drugs stopped or substituted.
362
+ - `regimen_disruption_penalty` derived from `total_drug_changes` and `critical_drug_penalty`.
363
+
364
+ Example scoring:
365
+ - `base = risk_reduction_score`
366
+ - `penalty = Ξ± * regimen_disruption_penalty`
367
+ - `score = clamp(base - penalty, 0.0, 1.0)`
368
+
369
+ 4.4. Reward shaping
370
+
371
+ In `rewards.py`, define a consistent shaping scheme:
372
+ - On each query:
373
+ - Small negative reward (e.g., βˆ’0.01) plus any small bonus if it discovers a severe DDI, if desired.
374
+ - On each intervention:
375
+ - Reward β‰ˆ (previous_risk - new_risk) βˆ’ small intervention cost.
376
+ - On invalid actions:
377
+ - Larger negative reward (e.g., βˆ’0.1) and no state change.
378
+ - On `finish_review`:
379
+ - Add the task-level `score` ∈ [0.0, 1.0] from the corresponding grader to that step’s shaped reward.
380
+
381
+ Ensure the sum of step rewards per episode remains in a reasonable numeric range (e.g., roughly -5 to +5) while still allowing meaningful differentiation by graders.
382
+
383
+ =================================================
384
+ 5. HTTP API server and openenv.yaml
385
+ =================================================
386
+
387
+ 5.1. HTTP server (FastAPI)
388
+
389
+ In `api/server.py`:
390
+ - Implement a FastAPI app that maintains a `PolypharmacyEnv` instance (or a multiplexing scheme if needed).
391
+ - Endpoints:
392
+ - `POST /reset`:
393
+ - Request body: may include `task_id` (string).
394
+ - Response: serialized `PolypharmacyObservation`.
395
+ - `POST /step`:
396
+ - Request body: serialized `PolypharmacyAction`.
397
+ - Response: dict with:
398
+ - `observation`: `PolypharmacyObservation`
399
+ - `reward`: float
400
+ - `done`: bool
401
+ - `info`: dict
402
+ - `GET /state`:
403
+ - Response: `PolypharmacyState`.
404
+
405
+ Provide a module-level `app = FastAPI(...)` object for use with uvicorn and Hugging Face Spaces. Ensure the JSON schema is consistent with OpenEnv clients (simple, flat JSON for observation/action/state).
406
+
407
+ 5.2. `openenv.yaml`
408
+
409
+ At repo root, define `openenv.yaml` consistent with the latest OpenEnv spec. At minimum, include:
410
+ - `name`: `polypharmacy_env`
411
+ - `version`: e.g., `0.1.0`
412
+ - `description`: human-readable description.
413
+ - `author`: your details.
414
+ - `tags`: e.g., `["healthcare", "polypharmacy", "openenv"]`
415
+ - `tasks`:
416
+ - One entry per task:
417
+ - `id`: `"easy_screening"` / `"budgeted_screening"` / `"complex_tradeoff"`
418
+ - `description`: one-line description
419
+ - `difficulty`: `"easy"`, `"medium"`, `"hard"`
420
+
421
+ Ensure `openenv validate` (or equivalent validator) passes once implemented.
422
+
423
+ =================================================
424
+ 6. Baseline heuristic (non-LLM) agent
425
+ =================================================
426
+
427
+ In `baselines/heuristic_agent.py`, implement a simple, deterministic baseline agent that:
428
+
429
+ For each episode:
430
+ - Iterates through all unordered medication pairs within query budget:
431
+ - Calls `query_ddi` via the environment for each pair until the query budget is exhausted or all pairs are examined.
432
+ - Records severe and moderate interactions.
433
+ - After querying:
434
+ - For each severe DDI pair:
435
+ - Try `substitute` one of the drugs using `drug_metadata`:
436
+ - Prefer substitute within same `atc_class` that:
437
+ - is not marked high-risk elderly.
438
+ - does not participate in known severe DDIs with the rest of the regimen.
439
+ - If no substitute exists, propose `stop` for the higher-risk drug.
440
+ - Respect intervention budget limits.
441
+ - Finally, call `finish_review`.
442
+
443
+ This baseline should be callable as a simple Python function that interacts with `PolypharmacyEnv` directly (without HTTP).
444
+
445
+ =================================================
446
+ 7. Baseline LLM inference script (inference.py)
447
+ =================================================
448
+
449
+ At repo root, create `inference.py` that:
450
+
451
+ 7.1. Uses the OpenAI Python client
452
+
453
+ - Import and configure the official OpenAI Python client.
454
+ - Read environment variables:
455
+ - `OPENAI_API_KEY` (required).
456
+ - `API_BASE_URL` (base URL for LLM; default to OpenAI standard if not set).
457
+ - `MODEL_NAME` (e.g., `gpt-4.1` or similar).
458
+ - `HF_TOKEN` (if needed for HF auth; do not hardcode).
459
+ - Read `POLYPHARMACY_ENV_URL` (or similar) for the environment’s HTTP base URL.
460
+
461
+ 7.2. Implements the required logging format
462
+
463
+ - For each **run** across all tasks:
464
+ - Emit a `[START]` line with a JSON payload exactly matching the evaluation specification:
465
+ - Fields such as `run_id`, `task_id`, `model`, etc., in the same order and naming as the sample OpenEnv inference script.
466
+ - For each **step** in an episode:
467
+ - Emit a `[STEP]` line with JSON fields including:
468
+ - `run_id`
469
+ - `task_id`
470
+ - `episode_id`
471
+ - `step_index`
472
+ - `observation_summary` (brief, machine-readable summary)
473
+ - `action_payload` (the action sent to the env)
474
+ - `reward`
475
+ - `done`
476
+ - After finishing an episode for a task:
477
+ - Emit an `[END]` line summarizing:
478
+ - `run_id`
479
+ - `task_id`
480
+ - per-episode statistics (e.g., total reward, grader score from last step’s `info`).
481
+ - The stdout format MUST follow the sample exactly:
482
+ - Same tags: `[START]`, `[STEP]`, `[END]`.
483
+ - Same JSON field names and ordering as the provided reference.
484
+ - No extra prints except these structured logs (and necessary error messages to stderr).
485
+
486
+ 7.3. LLM agent loop
487
+
488
+ - For each task (`easy_screening`, `budgeted_screening`, `complex_tradeoff`):
489
+ - Run a fixed small number of episodes (e.g., 5–10 per task) for baseline scoring.
490
+ - For each episode:
491
+ - Call `/reset` with the task id.
492
+ - At each step:
493
+ - Summarize the observation into a concise prompt for the LLM:
494
+ - Include age, sex, conditions, high-risk flags, budgets, and a compressed view of meds and previous actions.
495
+ - Ask the model to output a **strict JSON** representing `PolypharmacyAction` fields.
496
+ - Parse and validate the JSON; if invalid, fall back to a safe default (e.g., `finish_review` or a no-op) and penalize in evaluation.
497
+ - Send this action to `/step` and log `[STEP]`.
498
+ - End when `done=True` or max_steps is reached.
499
+ - At the end, print aggregate scores per task and overall.
500
+
501
+ Make sure runtime < 20 minutes and that the script can run within 2 vCPUs and 8 GB RAM.
502
+
503
+ =================================================
504
+ 8. Dockerfile and Hugging Face Space
505
+ =================================================
506
+
507
+ 8.1. Dockerfile
508
+
509
+ Create a `Dockerfile` that:
510
+ - Starts from a slim Python image (e.g., `python:3.11-slim`).
511
+ - Installs system dependencies as needed (e.g., `build-essential`, `curl`).
512
+ - Copies the project into the container.
513
+ - Installs Python dependencies from `requirements.txt`.
514
+ - Sets appropriate environment variables for the app (e.g., `PORT=7860`).
515
+ - Exposes port 7860.
516
+ - Uses a `CMD` or `ENTRYPOINT` that runs the FastAPI server, for example:
517
+ - `uvicorn polypharmacy_env.api.server:app --host 0.0.0.0 --port 7860`
518
+
519
+ 8.2. Hugging Face Space
520
+
521
+ Ensure the repository is ready to be used as a Hugging Face Space:
522
+ - Space type: `docker`.
523
+ - Tag: `openenv`.
524
+ - On container start, the server must listen on the correct port and respond to:
525
+ - `POST /reset`
526
+ - `POST /step`
527
+ - `GET /state`
528
+ - The environment must start cleanly with `docker build` + `docker run` locally.
529
+
530
+ =================================================
531
+ 9. README and documentation
532
+ =================================================
533
+
534
+ In `README.md`, include:
535
+
536
+ - **Environment description & motivation**:
537
+ - What PolypharmacyEnv simulates.
538
+ - Why elderly polypharmacy safety matters.
539
+ - **Action and observation spaces**:
540
+ - Describe `PolypharmacyAction`, `PolypharmacyObservation`, and `PolypharmacyState` fields and semantics.
541
+ - **Task descriptions**:
542
+ - `easy_screening`, `budgeted_screening`, `complex_tradeoff`, their difficulty and goals.
543
+ - **Reward structure**:
544
+ - Summarize shaping and terminal rewards.
545
+ - **Setup & usage**:
546
+ - How to install dependencies.
547
+ - How to run the API server locally (uvicorn command).
548
+ - How to run the heuristic baseline.
549
+ - How to run `inference.py` with environment variables.
550
+ - **Baseline scores**:
551
+ - Document reproducible baseline scores for each task (heuristic agent, and LLM baseline if available).
552
+
553
+ =================================================
554
+ 10. Validation and quality gates
555
+ =================================================
556
+
557
+ - Ensure:
558
+ - `openenv.yaml` and the HTTP server pass the OpenEnv validation script.
559
+ - `docker build` and `docker run` work without errors.
560
+ - `inference.py` completes under 20 minutes, within 2 vCPUs / 8 GB RAM.
561
+ - All graders:
562
+ - Are deterministic.
563
+ - Return scores strictly in [0.0, 1.0].
564
+ - No grader returns a constant score irrespective of behavior.
565
+
566
+ Aim for clean, well-structured, well-documented code with clear separation of concerns between:
567
+ - Data loading,
568
+ - Environment state & dynamics,
569
+ - Reward/grade logic,
570
+ - HTTP serving,
571
+ - Baseline agents and inference.
README.MD CHANGED
@@ -1,3 +1,12 @@
 
 
 
 
 
 
 
 
 
1
  # PolypharmacyEnv
2
 
3
  Monorepo for an OpenEnv-compatible medication safety environment with:
@@ -12,8 +21,7 @@ Monorepo for an OpenEnv-compatible medication safety environment with:
12
  ## Repository Structure
13
 
14
  ```text
15
- openenv-polypharmacy/
16
- backend/
17
  main.py # ASGI entrypoint (uvicorn target)
18
  requirements.txt # Backend dependencies
19
  Dockerfile # Backend container
@@ -32,22 +40,22 @@ openenv-polypharmacy/
32
  graders.py # Task graders
33
  tasks.py # Task/episode selection
34
  tests/ # Backend tests
35
- frontend/
36
  src/ # React UI code
37
  package.json
38
  Dockerfile # Frontend container
39
- data/
40
  lookups/ # drug_metadata.csv, ddi_rules.csv, beers_criteria.csv
41
  processed/ # patients_polypharmacy.csv
42
- scripts/
43
  preprocess_data.py # Synthetic data generation
44
  dev_backend.sh # Local backend run helper
45
  dev_frontend.sh # Local frontend run helper
46
  run_validation.sh # Tests + baseline validation
47
- docker-compose.yml # Full stack orchestration
48
- openenv.yaml # OpenEnv manifest
49
- inference.py # Optional CLI inference baseline
50
- .env.example # Environment template
51
  ```
52
 
53
  ---
@@ -85,11 +93,7 @@ Create `.env`:
85
  cp .env.example .env
86
  ```
87
 
88
- Set values:
89
-
90
- - `GROQ_API_KEY=...` (required)
91
- - `GROQ_BASE_URL=https://api.groq.com/openai/v1` (recommended)
92
- - `GROQ_MODEL_NAME=llama-3.3-70b-versatile` (recommended)
93
 
94
  ---
95
 
@@ -173,9 +177,9 @@ This repo now includes a **root `Dockerfile`** that builds frontend + backend in
173
 
174
  In Space Settings -> Variables and Secrets:
175
 
176
- - Secret: `GROQ_API_KEY`
177
- - Variable: `GROQ_BASE_URL=https://api.groq.com/openai/v1`
178
- - Variable: `GROQ_MODEL_NAME=llama-3.3-70b-versatile`
179
 
180
  ### 3) Push this repository to the Space
181
 
@@ -225,6 +229,13 @@ Or run validation script:
225
  ./scripts/run_validation.sh
226
  ```
227
 
 
 
 
 
 
 
 
228
  ---
229
 
230
  ## Notes
 
1
+ ---
2
+ title: Polypharmacy
3
+ emoji: πŸ“‰
4
+ colorFrom: yellow
5
+ colorTo: blue
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
  # PolypharmacyEnv
11
 
12
  Monorepo for an OpenEnv-compatible medication safety environment with:
 
21
  ## Repository Structure
22
 
23
  ```text
24
+ backend/
 
25
  main.py # ASGI entrypoint (uvicorn target)
26
  requirements.txt # Backend dependencies
27
  Dockerfile # Backend container
 
40
  graders.py # Task graders
41
  tasks.py # Task/episode selection
42
  tests/ # Backend tests
43
+ frontend/
44
  src/ # React UI code
45
  package.json
46
  Dockerfile # Frontend container
47
+ data/
48
  lookups/ # drug_metadata.csv, ddi_rules.csv, beers_criteria.csv
49
  processed/ # patients_polypharmacy.csv
50
+ scripts/
51
  preprocess_data.py # Synthetic data generation
52
  dev_backend.sh # Local backend run helper
53
  dev_frontend.sh # Local frontend run helper
54
  run_validation.sh # Tests + baseline validation
55
+ docker-compose.yml # Full stack orchestration
56
+ openenv.yaml # OpenEnv manifest
57
+ inference.py # Baseline inference script (required at root)
58
+ .env.example # Environment template
59
  ```
60
 
61
  ---
 
93
  cp .env.example .env
94
  ```
95
 
96
+ Set values for local backend integrations as needed.
 
 
 
 
97
 
98
  ---
99
 
 
177
 
178
  In Space Settings -> Variables and Secrets:
179
 
180
+ - Secret: `HF_TOKEN`
181
+ - Variable: `API_BASE_URL=https://router.huggingface.co/v1`
182
+ - Variable: `MODEL_NAME=Qwen/Qwen2.5-72B-Instruct`
183
 
184
  ### 3) Push this repository to the Space
185
 
 
229
  ./scripts/run_validation.sh
230
  ```
231
 
232
+ ### Submission validation
233
+
234
+ ```bash
235
+ openenv validate
236
+ python inference.py
237
+ ```
238
+
239
  ---
240
 
241
  ## Notes
{openenv-polypharmacy/backend β†’ backend}/Dockerfile RENAMED
File without changes
{openenv-polypharmacy/backend β†’ backend}/__init__.py RENAMED
File without changes
{openenv-polypharmacy/backend β†’ backend}/main.py RENAMED
File without changes
{openenv-polypharmacy/backend β†’ backend}/requirements.txt RENAMED
File without changes
{openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/__init__.py RENAMED
File without changes
{openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/api/__init__.py RENAMED
File without changes
{openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/api/app.py RENAMED
File without changes
{openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/api/routes/__init__.py RENAMED
File without changes
{openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/api/routes/agent.py RENAMED
File without changes
{openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/api/server.py RENAMED
File without changes
{openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/baselines/__init__.py RENAMED
File without changes
{openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/baselines/heuristic_agent.py RENAMED
File without changes
{openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/baselines/random_agent.py RENAMED
File without changes
{openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/client.py RENAMED
File without changes
{openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/config.py RENAMED
File without changes
{openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/data_loader.py RENAMED
File without changes
{openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/ddi_simulator.py RENAMED
File without changes
{openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/env_core.py RENAMED
File without changes
{openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/graders.py RENAMED
File without changes
{openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/models.py RENAMED
File without changes
{openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/rewards.py RENAMED
File without changes
{openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/services/__init__.py RENAMED
File without changes
{openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/services/groq_agent.py RENAMED
File without changes
{openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/tasks.py RENAMED
File without changes
{openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/tests/__init__.py RENAMED
File without changes
{openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/tests/test_api.py RENAMED
File without changes
{openenv-polypharmacy/backend β†’ backend}/src/polypharmacy_env/tests/test_env_core.py RENAMED
File without changes
{openenv-polypharmacy/data β†’ data}/lookups/beers_criteria.csv RENAMED
File without changes
{openenv-polypharmacy/data β†’ data}/lookups/ddi_rules.csv RENAMED
File without changes
{openenv-polypharmacy/data β†’ data}/lookups/drug_metadata.csv RENAMED
File without changes
{openenv-polypharmacy/data β†’ data}/processed/patients_polypharmacy.csv RENAMED
File without changes
openenv-polypharmacy/docker-compose.yml β†’ docker-compose.yml RENAMED
File without changes
{openenv-polypharmacy/frontend β†’ frontend}/Dockerfile RENAMED
File without changes
{openenv-polypharmacy/frontend β†’ frontend}/index.html RENAMED
File without changes
{openenv-polypharmacy/frontend β†’ frontend}/package-lock.json RENAMED
File without changes
{openenv-polypharmacy/frontend β†’ frontend}/package.json RENAMED
File without changes
{openenv-polypharmacy/frontend β†’ frontend}/src/App.jsx RENAMED
File without changes
{openenv-polypharmacy/frontend β†’ frontend}/src/main.jsx RENAMED
File without changes
{openenv-polypharmacy/frontend β†’ frontend}/src/styles.css RENAMED
File without changes
{openenv-polypharmacy/frontend β†’ frontend}/vite.config.js RENAMED
File without changes
inference.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Submission inference script for Polypharmacy OpenEnv environment.
3
+
4
+ Required environment variables:
5
+ API_BASE_URL OpenAI-compatible base URL
6
+ MODEL_NAME Model identifier
7
+ HF_TOKEN API key/token
8
+
9
+ Optional:
10
+ POLYPHARMACY_ENV_URL Environment API base (default: http://localhost:7860)
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ import os
17
+ import re
18
+ from typing import Any, Dict, List
19
+
20
+ import requests
21
+ from openai import OpenAI
22
+
23
+ API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
24
+ MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
25
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
26
+ ENV_URL = os.getenv("POLYPHARMACY_ENV_URL", "http://localhost:7860").rstrip("/")
27
+
28
+ BENCHMARK = "polypharmacy_env"
29
+ TASKS = ["easy_screening", "budgeted_screening", "complex_tradeoff"]
30
+ MAX_STEPS = 16
31
+ TEMPERATURE = 0.0
32
+ MAX_TOKENS = 220
33
+
34
+ SYSTEM_PROMPT = (
35
+ "You are a clinical-pharmacist agent. "
36
+ "Return one JSON action only with keys matching this schema: "
37
+ '{"action_type":"query_ddi|propose_intervention|finish_review",'
38
+ '"drug_id_1":"", "drug_id_2":"", "target_drug_id":"",'
39
+ '"intervention_type":"stop|dose_reduce|substitute|add_monitoring",'
40
+ '"proposed_new_drug_id":"", "rationale":""}. '
41
+ "Prefer safe, high-impact actions and finish when useful actions are exhausted."
42
+ )
43
+
44
+
45
+ def _b(v: bool) -> str:
46
+ return str(bool(v)).lower()
47
+
48
+
49
+ def _fmt_reward(v: float) -> str:
50
+ return f"{float(v):.2f}"
51
+
52
+
53
+ def _clamp01(v: float) -> float:
54
+ return max(0.0, min(1.0, float(v)))
55
+
56
+
57
+ def log_start(task: str) -> None:
58
+ print(f"[START] task={task} env={BENCHMARK} model={MODEL_NAME}", flush=True)
59
+
60
+
61
+ def log_step(step: int, action_str: str, reward: float, done: bool, error: str | None) -> None:
62
+ err = error if error else "null"
63
+ print(
64
+ f"[STEP] step={step} action={action_str} reward={_fmt_reward(reward)} "
65
+ f"done={_b(done)} error={err}",
66
+ flush=True,
67
+ )
68
+
69
+
70
+ def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
71
+ rewards_str = ",".join(_fmt_reward(r) for r in rewards)
72
+ print(
73
+ f"[END] success={_b(success)} steps={steps} score={_clamp01(score):.3f} rewards={rewards_str}",
74
+ flush=True,
75
+ )
76
+
77
+
78
+ def _safe_json(text: str) -> Dict[str, Any]:
79
+ text = text.strip()
80
+ if text.startswith("```"):
81
+ text = re.sub(r"^```[a-zA-Z]*\n?", "", text)
82
+ text = text.replace("```", "").strip()
83
+ try:
84
+ data = json.loads(text)
85
+ if isinstance(data, dict):
86
+ return data
87
+ except Exception:
88
+ pass
89
+ return {"action_type": "finish_review"}
90
+
91
+
92
+ def _llm_action(client: OpenAI, obs: Dict[str, Any]) -> Dict[str, Any]:
93
+ meds = obs.get("current_medications", [])
94
+ summary = {
95
+ "step_index": obs.get("step_index", 0),
96
+ "remaining_query_budget": obs.get("remaining_query_budget", 0),
97
+ "remaining_intervention_budget": obs.get("remaining_intervention_budget", 0),
98
+ "conditions": obs.get("conditions", []),
99
+ "current_medications": [
100
+ {
101
+ "drug_id": m.get("drug_id"),
102
+ "generic_name": m.get("generic_name"),
103
+ "dose_mg": m.get("dose_mg"),
104
+ "beers_flags": m.get("beers_flags", []),
105
+ }
106
+ for m in meds
107
+ ],
108
+ "interaction_queries": obs.get("interaction_queries", []),
109
+ "interventions": obs.get("interventions", []),
110
+ }
111
+ resp = client.chat.completions.create(
112
+ model=MODEL_NAME,
113
+ temperature=TEMPERATURE,
114
+ max_tokens=MAX_TOKENS,
115
+ messages=[
116
+ {"role": "system", "content": SYSTEM_PROMPT},
117
+ {"role": "user", "content": json.dumps(summary, separators=(",", ":"))},
118
+ ],
119
+ )
120
+ content = (resp.choices[0].message.content or "").strip()
121
+ return _safe_json(content)
122
+
123
+
124
+ def _reset(task_id: str) -> Dict[str, Any]:
125
+ r = requests.post(f"{ENV_URL}/reset", json={"task_id": task_id}, timeout=45)
126
+ r.raise_for_status()
127
+ return r.json()
128
+
129
+
130
+ def _step(action: Dict[str, Any]) -> Dict[str, Any]:
131
+ r = requests.post(f"{ENV_URL}/step", json={"action": action}, timeout=45)
132
+ r.raise_for_status()
133
+ return r.json()
134
+
135
+
136
+ def run_task(client: OpenAI, task_id: str) -> None:
137
+ rewards: List[float] = []
138
+ steps = 0
139
+ success = False
140
+ score = 0.0
141
+ log_start(task_id)
142
+ try:
143
+ reset_payload = _reset(task_id)
144
+ obs = reset_payload.get("observation", {})
145
+ done = bool(reset_payload.get("done", False))
146
+
147
+ for i in range(1, MAX_STEPS + 1):
148
+ if done:
149
+ break
150
+ action = _llm_action(client, obs)
151
+ action_str = json.dumps(action, separators=(",", ":"))
152
+ step_payload = _step(action)
153
+ obs = step_payload.get("observation", {})
154
+ reward = float(step_payload.get("reward") or 0.0)
155
+ done = bool(step_payload.get("done", False))
156
+ metadata = (obs or {}).get("metadata", {}) or {}
157
+ last_error = metadata.get("error")
158
+ rewards.append(reward)
159
+ steps = i
160
+ log_step(i, action_str, reward, done, str(last_error) if last_error else None)
161
+
162
+ if done:
163
+ raw_score = metadata.get("grader_score", None)
164
+ if raw_score is not None:
165
+ score = _clamp01(float(raw_score))
166
+ else:
167
+ score = _clamp01(sum(max(0.0, r) for r in rewards) / max(len(rewards), 1))
168
+ success = score > 0.0
169
+ break
170
+ except Exception:
171
+ # Still emit END to keep evaluator parser stable.
172
+ success = False
173
+ finally:
174
+ log_end(success=success, steps=steps, score=score, rewards=rewards)
175
+
176
+
177
+ def main() -> int:
178
+ if not HF_TOKEN:
179
+ print("HF_TOKEN is required", flush=True)
180
+ return 1
181
+ client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
182
+ for task in TASKS:
183
+ run_task(client, task)
184
+ return 0
185
+
186
+
187
+ if __name__ == "__main__":
188
+ raise SystemExit(main())
openenv-polypharmacy/.dockerignore DELETED
@@ -1,8 +0,0 @@
1
- .git
2
- .gitignore
3
- **/__pycache__/
4
- **/.pytest_cache/
5
- **/.DS_Store
6
- .env
7
- frontend/node_modules
8
- frontend/dist
 
 
 
 
 
 
 
 
 
openenv-polypharmacy/Dockerfile DELETED
@@ -1,39 +0,0 @@
1
- FROM node:20-alpine AS frontend-builder
2
- WORKDIR /app/frontend
3
- COPY frontend/package*.json ./
4
- RUN npm ci
5
- COPY frontend/ ./
6
- RUN npm run build
7
-
8
- FROM python:3.11-slim
9
-
10
- RUN apt-get update && \
11
- apt-get install -y --no-install-recommends build-essential curl && \
12
- rm -rf /var/lib/apt/lists/*
13
-
14
- WORKDIR /app
15
-
16
- COPY backend/requirements.txt /app/backend/requirements.txt
17
- RUN pip install --no-cache-dir -r /app/backend/requirements.txt
18
-
19
- COPY backend /app/backend
20
- COPY data /app/data
21
- COPY scripts /app/scripts
22
- COPY openenv.yaml /app/openenv.yaml
23
- COPY .env.example /app/.env.example
24
- COPY inference.py /app/inference.py
25
-
26
- COPY --from=frontend-builder /app/frontend/dist /app/frontend/dist
27
-
28
- RUN python3 /app/scripts/preprocess_data.py
29
-
30
- ENV PORT=7860
31
- ENV PYTHONPATH="/app/backend/src:${PYTHONPATH}"
32
- ENV PYTHONUNBUFFERED=1
33
-
34
- EXPOSE 7860
35
-
36
- HEALTHCHECK --interval=30s --timeout=3s --start-period=15s --retries=3 \
37
- CMD curl -f http://localhost:7860/health || exit 1
38
-
39
- CMD ["sh", "-c", "uvicorn backend.main:app --host 0.0.0.0 --port ${PORT:-7860}"]