Spaces:

TheJackBright
/

polypharmacy-env

Sleeping

App Files Files Community

TheJackBright commited on Apr 6

Commit

b42dbeb

1 Parent(s): 2f3edd0

Version 1 - VK

Browse files

Files changed (36) hide show

openenv-polypharmacy/Dockerfile +30 -0
openenv-polypharmacy/PROMPT.md +571 -0
openenv-polypharmacy/README.md +184 -0
openenv-polypharmacy/data/lookups/beers_criteria.csv +16 -0
openenv-polypharmacy/data/lookups/ddi_rules.csv +25 -0
openenv-polypharmacy/data/lookups/drug_metadata.csv +34 -0
openenv-polypharmacy/data/processed/patients_polypharmacy.csv +121 -0
openenv-polypharmacy/inference.py +214 -0
openenv-polypharmacy/openenv.yaml +30 -0
openenv-polypharmacy/pyproject.toml +39 -0
openenv-polypharmacy/requirements.txt +7 -0
openenv-polypharmacy/scripts/preprocess_data.py +301 -0
openenv-polypharmacy/scripts/run_validation.sh +15 -0
openenv-polypharmacy/src/polypharmacy_env.egg-info/PKG-INFO +15 -0
openenv-polypharmacy/src/polypharmacy_env.egg-info/SOURCES.txt +25 -0
openenv-polypharmacy/src/polypharmacy_env.egg-info/dependency_links.txt +1 -0
openenv-polypharmacy/src/polypharmacy_env.egg-info/requires.txt +11 -0
openenv-polypharmacy/src/polypharmacy_env.egg-info/top_level.txt +1 -0
openenv-polypharmacy/src/polypharmacy_env/__init__.py +1 -0
openenv-polypharmacy/src/polypharmacy_env/api/__init__.py +1 -0
openenv-polypharmacy/src/polypharmacy_env/api/schemas.py +36 -0
openenv-polypharmacy/src/polypharmacy_env/api/server.py +67 -0
openenv-polypharmacy/src/polypharmacy_env/baselines/__init__.py +1 -0
openenv-polypharmacy/src/polypharmacy_env/baselines/heuristic_agent.py +204 -0
openenv-polypharmacy/src/polypharmacy_env/baselines/random_agent.py +54 -0
openenv-polypharmacy/src/polypharmacy_env/config.py +79 -0
openenv-polypharmacy/src/polypharmacy_env/data_loader.py +142 -0
openenv-polypharmacy/src/polypharmacy_env/ddi_simulator.py +115 -0
openenv-polypharmacy/src/polypharmacy_env/env_core.py +413 -0
openenv-polypharmacy/src/polypharmacy_env/graders.py +98 -0
openenv-polypharmacy/src/polypharmacy_env/models.py +103 -0
openenv-polypharmacy/src/polypharmacy_env/rewards.py +92 -0
openenv-polypharmacy/src/polypharmacy_env/tasks.py +47 -0
openenv-polypharmacy/src/polypharmacy_env/tests/__init__.py +1 -0
openenv-polypharmacy/src/polypharmacy_env/tests/test_api.py +73 -0
openenv-polypharmacy/src/polypharmacy_env/tests/test_env_core.py +162 -0

openenv-polypharmacy/Dockerfile ADDED Viewed

	@@ -0,0 +1,30 @@

+FROM python:3.11-slim
+# System deps
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends build-essential curl && \
+    rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+# Install Python deps first (layer caching)
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy project
+COPY . .
+# Generate data if not present
+RUN python3 scripts/preprocess_data.py
+# Environment
+ENV PORT=7860
+ENV PYTHONPATH="/app/src:${PYTHONPATH}"
+ENV PYTHONUNBUFFERED=1
+EXPOSE 7860
+HEALTHCHECK --interval=30s --timeout=3s --start-period=10s --retries=3 \
+    CMD curl -f http://localhost:7860/health || exit 1
+CMD ["uvicorn", "polypharmacy_env.api.server:app", "--host", "0.0.0.0", "--port", "7860"]

openenv-polypharmacy/PROMPT.md ADDED Viewed

	@@ -0,0 +1,571 @@

+You are an expert Python backend, ML, and infrastructure engineer.
+Your task is to implement a complete, production-ready OpenEnv environment called **PolypharmacyEnv** for training and evaluating agentic RL policies that act as an "elderly polypharmacy safety agent" (clinical pharmacist assistant).
+The deliverable MUST satisfy all of the following:
+- Fully compliant with the OpenEnv spec (typed models, `step()` / `reset()` / `state()`, `openenv.yaml`, HTTP server, Dockerfile).
+- Simulates a realistic healthcare workflow around elderly polypharmacy and dangerous drug combinations.
+- Defines at least **3 tasks** (easy → medium → hard) with deterministic agent graders producing scores in (0.0, 1.0).
+- Provides shaped rewards over the trajectory (not just sparse terminal rewards).
+- Includes a baseline LLM-based inference script `inference.py` in the repo root, following the evaluation requirements:
+  - Uses the OpenAI Python client.
+  - Reads `OPENAI_API_KEY`, `API_BASE_URL`, `MODEL_NAME`, and `HF_TOKEN` from the environment.
+  - Emits structured stdout logs in the exact `[START]`, `[STEP]`, `[END]` format from the OpenEnv sample inference script.
+- Is containerized and deployable as a **Hugging Face Space** tagged with `openenv` that responds to OpenEnv-style `reset` / `step` / `state` HTTP calls.
+Implement everything described below.
+=================================================
+1. Repository and folder structure
+=================================================
+Create a Python package repository with this structure (names are important unless clearly labeled as examples):
+- `openenv-polypharmacy/`
+  - `openenv.yaml`
+  - `README.md`
+  - `requirements.txt`
+  - `Dockerfile`
+  - `inference.py`                 # baseline LLM agent per spec
+  - `pyproject.toml` or `setup.cfg` (optional but recommended)
+  - `src/`
+    - `polypharmacy_env/`
+      - `__init__.py`
+      - `config.py`
+      - `models.py`                # Action, Observation, State, helper models
+      - `env_core.py`              # PolypharmacyEnv implementation
+      - `tasks.py`                 # task setup utilities
+      - `graders.py`               # deterministic graders for each task
+      - `rewards.py`               # reward shaping logic
+      - `data_loader.py`           # load/preprocess patient and lookup data
+      - `ddi_simulator.py`         # local DDI / guideline simulator
+      - `api/`
+        - `__init__.py`
+        - `schemas.py`            # HTTP request/response schemas
+        - `server.py`             # FastAPI app exposing OpenEnv endpoints
+      - `baselines/`
+        - `__init__.py`
+        - `heuristic_agent.py`    # simple rule-based baseline agent
+        - `random_agent.py`       # trivial random baseline (optional)
+      - `tests/`
+        - `__init__.py`
+        - `test_env_core.py`
+        - `test_api.py`
+  - `data/`
+    - `raw/`                      # placeholder for real/synthetic source data
+    - `processed/`
+    - `lookups/`
+      - `ddi_rules.csv`
+      - `beers_criteria.csv`
+      - `drug_metadata.csv`
+  - `scripts/`
+    - `preprocess_data.py`
+    - `run_validation.sh`         # optional; runs OpenEnv validator, tests, etc.
+Use Python 3.10+ with full type hints, and keep the code black/isort-compatible.
+=================================================
+2. Domain, data, and clinical abstraction
+=================================================
+2.1. Core scenario
+Model an elderly patient (age ≥ 65) with:
+- Demographics: age, sex.
+- Comorbidities: e.g., hypertension, diabetes, heart failure, CKD, dementia.
+- Basic labs: kidney function (eGFR category), liver function category.
+- A current medication list (polypharmacy, e.g., 3–15 drugs depending on task).
+Each **episode** is one medication-review session where the agent:
+- Observes patient info and current meds.
+- Optionally **queries** a DDI/guideline tool for specific drug pairs.
+- Proposes **interventions**:
+  - `stop`: discontinue a drug.
+  - `dose_reduce`: lower dose of a drug.
+  - `substitute`: swap to a safer alternative.
+  - `add_monitoring`: keep the drug but flag extra monitoring.
+- Calls `finish_review` when it decides the regimen is acceptable or budgets are exhausted.
+No external PHI, EHRs, or online APIs: all data is **synthetic** or de-identified and local to the container (CSV files).
+2.2. Data files and CSV schemas
+Implement local CSVs under `data/lookups/`:
+**`drug_metadata.csv`**
+- `drug_id` (string; unique key)
+- `generic_name` (string)
+- `atc_class` (string)
+- `is_high_risk_elderly` (0/1)
+- `default_dose_mg` (float)
+- `min_dose_mg` (float)
+- `max_dose_mg` (float)
+**`beers_criteria.csv`**
+- `drug_id` (string)
+- `criterion_type` (enum string: `avoid`, `caution`, `dose_adjust`, `avoid_in_condition`)
+- `condition` (nullable string; e.g., `CKD`, `dementia`)
+- `rationale` (brief text)
+**`ddi_rules.csv`**
+- `drug_id_1` (string; normalized so `drug_id_1 < drug_id_2` lexicographically)
+- `drug_id_2` (string)
+- `severity` (enum string: `mild`, `moderate`, `severe`)
+- `mechanism` (short text)
+- `recommendation` (enum string: `avoid_combination`, `monitor_closely`, `dose_adjust`, `no_action`)
+- `base_risk_score` (float in [0.0, 1.0])
+Implement a synthetic patient-episode dataset under `data/processed/`:
+**`patients_polypharmacy.csv`**
+- `episode_id` (string)
+- `age` (int)
+- `sex` (enum: `M`, `F`, `O`)
+- `conditions` (semicolon-separated; e.g., `HTN;DM;CKD`)
+- `eGFR_category` (enum: `normal`, `mild`, `moderate`, `severe`)
+- `liver_function_category` (enum: `normal`, `impaired`)
+- `medication_ids` (semicolon-separated list of `drug_id`)
+- `baseline_risk_score` (float in [0.0, 1.0])
+2.3. Preprocessing script
+In `scripts/preprocess_data.py`:
+- If real data is not provided, procedurally generate synthetic but plausible data using:
+  - Random combinations of conditions and drugs constrained by simple rules (e.g., CKD + renally-cleared drugs).
+  - Controlled distribution of high-risk DDIs and Beers violations.
+- Explicitly tag episodes as easy/medium/hard (e.g., via number of drugs, number/severity of DDIs, and number of Beers issues).
+- Save `patients_polypharmacy.csv` ready for the environment to consume.
+=================================================
+3. OpenEnv models and environment implementation
+=================================================
+3.1. Models
+In `models.py`, define dataclasses or Pydantic models that extend the appropriate OpenEnv base types (`Action`, `Observation`, `State`) and are JSON-compatible.
+Auxiliary models:
+**`MedicationEntry`**
+- `drug_id: str`
+- `generic_name: str`
+- `atc_class: str`
+- `dose_mg: float`
+- `frequency: str`          # e.g., `qd`, `bid`
+- `route: str`              # e.g., `po`
+- `is_high_risk_elderly: bool`
+- `beers_flags: list[str]`  # e.g., `["avoid", "dose_adjust_CKD"]`
+**`InteractionQueryRecord`**
+- `drug_id_1: str`
+- `drug_id_2: str`
+- `severity: str | None`
+- `recommendation: str | None`
+- `risk_score: float | None`
+- `step_index: int`
+**`InterventionRecord`**
+- `target_drug_id: str`
+- `action_type: Literal["stop", "dose_reduce", "substitute", "add_monitoring"]`
+- `proposed_new_drug_id: str | None`
+- `rationale: str`
+- `step_index: int`
+Core wire models:
+**`PolypharmacyObservation`** (extends OpenEnv `Observation`)
+- `episode_id: str`
+- `task_id: Literal["easy_screening", "budgeted_screening", "complex_tradeoff"]`
+- `age: int`
+- `sex: str`
+- `conditions: list[str]`
+- `eGFR_category: str`
+- `liver_function_category: str`
+- `current_medications: list[MedicationEntry]`
+- `interaction_queries: list[InteractionQueryRecord]`
+- `interventions: list[InterventionRecord]`
+- `step_index: int`
+- `remaining_query_budget: int`
+- `remaining_intervention_budget: int`
+- `shaped_reward: float`  # reward from last step
+- `done: bool`
+**`PolypharmacyAction`** (extends OpenEnv `Action`)
+- `action_type: Literal["query_ddi", "propose_intervention", "finish_review"]`
+- `drug_id_1: str | None`        # for DDI queries or some interventions
+- `drug_id_2: str | None`        # for DDI queries
+- `target_drug_id: str | None`   # for interventions
+- `intervention_type: Literal["stop", "dose_reduce", "substitute", "add_monitoring", "none"] | None`
+- `proposed_new_drug_id: str | None`
+- `rationale: str | None`
+**`PolypharmacyState`** (extends OpenEnv `State`)
+- `episode_id: str`
+- `task_id: str`
+- `step_count: int`
+- `max_steps: int`
+- `num_query_actions: int`
+- `num_interventions: int`
+3.2. Environment core
+In `env_core.py`, implement `PolypharmacyEnv` extending the appropriate OpenEnv environment base class. It must implement:
+**`reset(task_id: str | None = None) -> PolypharmacyObservation`**
+- If `task_id` is `None`, default to medium (`budgeted_screening`).
+- Sample an episode from `patients_polypharmacy.csv` filtered by difficulty.
+- Initialize:
+  - `episode_id`
+  - `step_count = 0`
+  - task-specific budgets (query, interventions, max_steps)
+  - baseline regime and risk
+  - empty `interaction_queries` and `interventions`
+- Return the initial `PolypharmacyObservation` with:
+  - `step_index = 0`
+  - `shaped_reward = 0.0`
+  - `done = False`
+**`step(action: PolypharmacyAction) -> dict`**
+- Validate the action; if invalid:
+  - Apply a negative reward.
+  - Do not modify regimen, but log error in `info`.
+- If `action_type == "query_ddi"`:
+  - If query budget exhausted, apply penalty and do not query.
+  - Else:
+    - Use `ddi_simulator.lookup_ddi(drug_id_1, drug_id_2)` to get severity, recommendation, base_risk_score.
+    - Append an `InteractionQueryRecord`.
+    - Apply a small negative reward for query cost.
+- If `action_type == "propose_intervention"`:
+  - If intervention budget exhausted, apply penalty and ignore change.
+  - Else:
+    - Update `current_medications` according to `intervention_type`:
+      - `stop`: remove medication.
+      - `dose_reduce`: adjust dose downward within [min_dose_mg, default_dose_mg].
+      - `substitute`: replace with a safer alternative from same `atc_class`.
+      - `add_monitoring`: keep drug but tag in internal state.
+    - Append an `InterventionRecord`.
+    - Recompute current regimen risk using the risk model (see 3.3).
+    - Compute shaped reward = (previous_risk - new_risk) - small intervention cost.
+- If `action_type == "finish_review"`:
+  - Mark `done = True`.
+  - Call the task’s grader to get episode-level score in [0.0, 1.0].
+  - Add this as a terminal bonus to the current step reward.
+- In all cases:
+  - Increment `step_count`.
+  - Check `max_steps`; if exceeded, auto-terminate:
+    - `done = True`
+    - apply time-out penalty
+    - call grader with current trajectory for a final score if appropriate.
+  - Construct next `PolypharmacyObservation` with updated fields.
+  - Return a dict:
+    - `observation`: `PolypharmacyObservation`
+    - `reward`: float shaped reward for this step
+    - `done`: bool
+    - `info`: dict with fields like `current_risk`, `baseline_risk`, `grader_score_if_terminal`, and debug flags.
+**`state` property**
+- Returns `PolypharmacyState` reflecting the current internal state.
+3.3. DDI simulator and risk model
+In `ddi_simulator.py`:
+- Load `ddi_rules.csv` once via `data_loader`.
+- Implement `lookup_ddi(drug_id_1, drug_id_2) -> tuple[severity, recommendation, base_risk_score]`:
+  - Normalize the pair ordering.
+  - Look up row; if missing, return:
+    - severity = `"none"`
+    - recommendation = `"no_action"`
+    - base_risk_score = 0.0
+In `rewards.py` (or a dedicated module), implement:
+- `compute_regimen_risk(current_drug_ids, patient_context, ddi_rules, beers_rules, drug_metadata) -> float`
+  - Aggregate contributions from:
+    - Beers violations (weighted by `criterion_type` and relevant conditions).
+    - DDI base risk scores for all present drug pairs.
+    - High-risk elderly drugs.
+  - Normalize and clip to [0.0, 1.0].
+Use this function to compute:
+- `baseline_risk` at episode start.
+- Risk after each intervention step.
+Also implement:
+- `compute_shaped_reward(previous_risk, new_risk, action, context, partial_metrics) -> float`
+  - Positive component: `previous_risk - new_risk`.
+  - Negative components: per-query cost, per-intervention cost, invalid-action penalty, time-out penalty.
+=================================================
+4. Tasks and graders (3 difficulty levels)
+=================================================
+Define three task IDs and semantics in `tasks.py` and `graders.py`:
+Task IDs:
+- `easy_screening`
+- `budgeted_screening`
+- `complex_tradeoff`
+4.1. `easy_screening` (easy)
+- Small regimen: 3–5 drugs.
+- Exactly one **severe** DDI pair and possibly one simple Beers violation.
+- Budgets:
+  - query_budget ≈ 4
+  - intervention_budget ≈ 2
+  - max_steps ≈ 10
+Grader:
+- Input: full trajectory, baseline risk, final risk, list of interventions.
+- Compute:
+  - `risk_reduction = max(0.0, baseline_risk - final_risk) / max(baseline_risk, ε)` (normalized).
+  - `targeted_intervention_flag = 1.0` if at least one intervention affects one of the drugs in the known severe DDI pair, else 0.0.
+- Score:
+  - `score = 0.5 * risk_reduction + 0.5 * targeted_intervention_flag`
+  - Clip to [0.0, 1.0].
+4.2. `budgeted_screening` (medium)
+- Medium regimen: 6–10 drugs.
+- Multiple DDIs (mild/moderate/severe) and multiple Beers issues.
+- Budgets:
+  - query_budget ≈ 8
+  - intervention_budget ≈ 3
+  - max_steps ≈ 20
+Grader:
+- Compute:
+  - `risk_reduction_score` as normalized risk drop.
+  - `intervention_precision_score` = fraction of interventions that actually reduce risk or fix guideline violations.
+  - `query_efficiency_score` = (number of severe/moderate DDIs discovered) / (number of queries used), normalized.
+- Weighted score, for example:
+  - `score = 0.5 * risk_reduction_score + 0.3 * intervention_precision_score + 0.2 * query_efficiency_score`
+  - Clip to [0.0, 1.0].
+4.3. `complex_tradeoff` (hard)
+- Larger regimen: 10–15 drugs.
+- Some drugs are **clinically critical** (e.g., anticoagulants, insulin analogues) and encoded as such in `drug_metadata` or a small internal map.
+- Episodes contain:
+  - multiple DDIs and Beers issues, including ones involving critical drugs.
+  - safer substitutes for some risky drugs.
+Budgets:
+- query_budget ≈ 12
+- intervention_budget ≈ 5
+- max_steps ≈ 30
+Grader adds a **regimen disruption penalty** component:
+- Metrics:
+  - `risk_reduction_score` (as above).
+  - `critical_drug_penalty` = penalty if a critical drug is stopped without substitution to another suitable agent.
+  - `total_drug_changes` = number of drugs stopped or substituted.
+  - `regimen_disruption_penalty` derived from `total_drug_changes` and `critical_drug_penalty`.
+Example scoring:
+- `base = risk_reduction_score`
+- `penalty = α * regimen_disruption_penalty`
+- `score = clamp(base - penalty, 0.0, 1.0)`
+4.4. Reward shaping
+In `rewards.py`, define a consistent shaping scheme:
+- On each query:
+  - Small negative reward (e.g., −0.01) plus any small bonus if it discovers a severe DDI, if desired.
+- On each intervention:
+  - Reward ≈ (previous_risk - new_risk) − small intervention cost.
+- On invalid actions:
+  - Larger negative reward (e.g., −0.1) and no state change.
+- On `finish_review`:
+  - Add the task-level `score` ∈ [0.0, 1.0] from the corresponding grader to that step’s shaped reward.
+Ensure the sum of step rewards per episode remains in a reasonable numeric range (e.g., roughly -5 to +5) while still allowing meaningful differentiation by graders.
+=================================================
+5. HTTP API server and openenv.yaml
+=================================================
+5.1. HTTP server (FastAPI)
+In `api/server.py`:
+- Implement a FastAPI app that maintains a `PolypharmacyEnv` instance (or a multiplexing scheme if needed).
+- Endpoints:
+  - `POST /reset`:
+    - Request body: may include `task_id` (string).
+    - Response: serialized `PolypharmacyObservation`.
+  - `POST /step`:
+    - Request body: serialized `PolypharmacyAction`.
+    - Response: dict with:
+      - `observation`: `PolypharmacyObservation`
+      - `reward`: float
+      - `done`: bool
+      - `info`: dict
+  - `GET /state`:
+    - Response: `PolypharmacyState`.
+Provide a module-level `app = FastAPI(...)` object for use with uvicorn and Hugging Face Spaces. Ensure the JSON schema is consistent with OpenEnv clients (simple, flat JSON for observation/action/state).
+5.2. `openenv.yaml`
+At repo root, define `openenv.yaml` consistent with the latest OpenEnv spec. At minimum, include:
+- `name`: `polypharmacy_env`
+- `version`: e.g., `0.1.0`
+- `description`: human-readable description.
+- `author`: your details.
+- `tags`: e.g., `["healthcare", "polypharmacy", "openenv"]`
+- `tasks`:
+  - One entry per task:
+    - `id`: `"easy_screening"` / `"budgeted_screening"` / `"complex_tradeoff"`
+    - `description`: one-line description
+    - `difficulty`: `"easy"`, `"medium"`, `"hard"`
+Ensure `openenv validate` (or equivalent validator) passes once implemented.
+=================================================
+6. Baseline heuristic (non-LLM) agent
+=================================================
+In `baselines/heuristic_agent.py`, implement a simple, deterministic baseline agent that:
+For each episode:
+- Iterates through all unordered medication pairs within query budget:
+  - Calls `query_ddi` via the environment for each pair until the query budget is exhausted or all pairs are examined.
+  - Records severe and moderate interactions.
+- After querying:
+  - For each severe DDI pair:
+    - Try `substitute` one of the drugs using `drug_metadata`:
+      - Prefer substitute within same `atc_class` that:
+        - is not marked high-risk elderly.
+        - does not participate in known severe DDIs with the rest of the regimen.
+    - If no substitute exists, propose `stop` for the higher-risk drug.
+  - Respect intervention budget limits.
+- Finally, call `finish_review`.
+This baseline should be callable as a simple Python function that interacts with `PolypharmacyEnv` directly (without HTTP).
+=================================================
+7. Baseline LLM inference script (inference.py)
+=================================================
+At repo root, create `inference.py` that:
+7.1. Uses the OpenAI Python client
+- Import and configure the official OpenAI Python client.
+- Read environment variables:
+  - `OPENAI_API_KEY` (required).
+  - `API_BASE_URL` (base URL for LLM; default to OpenAI standard if not set).
+  - `MODEL_NAME` (e.g., `gpt-4.1` or similar).
+  - `HF_TOKEN` (if needed for HF auth; do not hardcode).
+- Read `POLYPHARMACY_ENV_URL` (or similar) for the environment’s HTTP base URL.
+7.2. Implements the required logging format
+- For each **run** across all tasks:
+  - Emit a `[START]` line with a JSON payload exactly matching the evaluation specification:
+    - Fields such as `run_id`, `task_id`, `model`, etc., in the same order and naming as the sample OpenEnv inference script.
+- For each **step** in an episode:
+  - Emit a `[STEP]` line with JSON fields including:
+    - `run_id`
+    - `task_id`
+    - `episode_id`
+    - `step_index`
+    - `observation_summary` (brief, machine-readable summary)
+    - `action_payload` (the action sent to the env)
+    - `reward`
+    - `done`
+- After finishing an episode for a task:
+  - Emit an `[END]` line summarizing:
+    - `run_id`
+    - `task_id`
+    - per-episode statistics (e.g., total reward, grader score from last step’s `info`).
+- The stdout format MUST follow the sample exactly:
+  - Same tags: `[START]`, `[STEP]`, `[END]`.
+  - Same JSON field names and ordering as the provided reference.
+  - No extra prints except these structured logs (and necessary error messages to stderr).
+7.3. LLM agent loop
+- For each task (`easy_screening`, `budgeted_screening`, `complex_tradeoff`):
+  - Run a fixed small number of episodes (e.g., 5–10 per task) for baseline scoring.
+  - For each episode:
+    - Call `/reset` with the task id.
+    - At each step:
+      - Summarize the observation into a concise prompt for the LLM:
+        - Include age, sex, conditions, high-risk flags, budgets, and a compressed view of meds and previous actions.
+      - Ask the model to output a **strict JSON** representing `PolypharmacyAction` fields.
+      - Parse and validate the JSON; if invalid, fall back to a safe default (e.g., `finish_review` or a no-op) and penalize in evaluation.
+      - Send this action to `/step` and log `[STEP]`.
+    - End when `done=True` or max_steps is reached.
+- At the end, print aggregate scores per task and overall.
+Make sure runtime < 20 minutes and that the script can run within 2 vCPUs and 8 GB RAM.
+=================================================
+8. Dockerfile and Hugging Face Space
+=================================================
+8.1. Dockerfile
+Create a `Dockerfile` that:
+- Starts from a slim Python image (e.g., `python:3.11-slim`).
+- Installs system dependencies as needed (e.g., `build-essential`, `curl`).
+- Copies the project into the container.
+- Installs Python dependencies from `requirements.txt`.
+- Sets appropriate environment variables for the app (e.g., `PORT=7860`).
+- Exposes port 7860.
+- Uses a `CMD` or `ENTRYPOINT` that runs the FastAPI server, for example:
+  - `uvicorn polypharmacy_env.api.server:app --host 0.0.0.0 --port 7860`
+8.2. Hugging Face Space
+Ensure the repository is ready to be used as a Hugging Face Space:
+- Space type: `docker`.
+- Tag: `openenv`.
+- On container start, the server must listen on the correct port and respond to:
+  - `POST /reset`
+  - `POST /step`
+  - `GET /state`
+- The environment must start cleanly with `docker build` + `docker run` locally.
+=================================================
+9. README and documentation
+=================================================
+In `README.md`, include:
+- **Environment description & motivation**:
+  - What PolypharmacyEnv simulates.
+  - Why elderly polypharmacy safety matters.
+- **Action and observation spaces**:
+  - Describe `PolypharmacyAction`, `PolypharmacyObservation`, and `PolypharmacyState` fields and semantics.
+- **Task descriptions**:
+  - `easy_screening`, `budgeted_screening`, `complex_tradeoff`, their difficulty and goals.
+- **Reward structure**:
+  - Summarize shaping and terminal rewards.
+- **Setup & usage**:
+  - How to install dependencies.
+  - How to run the API server locally (uvicorn command).
+  - How to run the heuristic baseline.
+  - How to run `inference.py` with environment variables.
+- **Baseline scores**:
+  - Document reproducible baseline scores for each task (heuristic agent, and LLM baseline if available).
+=================================================
+10. Validation and quality gates
+=================================================
+- Ensure:
+  - `openenv.yaml` and the HTTP server pass the OpenEnv validation script.
+  - `docker build` and `docker run` work without errors.
+  - `inference.py` completes under 20 minutes, within 2 vCPUs / 8 GB RAM.
+  - All graders:
+    - Are deterministic.
+    - Return scores strictly in [0.0, 1.0].
+  - No grader returns a constant score irrespective of behavior.
+Aim for clean, well-structured, well-documented code with clear separation of concerns between:
+- Data loading,
+- Environment state & dynamics,
+- Reward/grade logic,
+- HTTP serving,
+- Baseline agents and inference.

openenv-polypharmacy/README.md ADDED Viewed

	@@ -0,0 +1,184 @@

+# PolypharmacyEnv
+An [OpenEnv](https://github.com/meta-pytorch/OpenEnv)-compliant reinforcement-learning environment that simulates **elderly polypharmacy medication review**. An RL agent acts as a clinical pharmacist assistant, identifying dangerous drug-drug interactions (DDIs), Beers-criteria violations, and proposing safe interventions.
+---
+## Motivation
+Polypharmacy (concurrent use of multiple medications) is extremely common in elderly patients (age >= 65) and carries significant risks:
+- **Drug-drug interactions** can cause adverse events, hospitalisation, and death.
+- **Beers-criteria violations** flag medications that are inappropriate or require dose adjustments in older adults.
+- Stopping critical medications (anticoagulants, insulin) without proper substitution can be equally dangerous.
+This environment lets RL and LLM-based agents learn to **balance risk reduction against regimen stability**.
+---
+## Action Space
+Each step, the agent sends a `PolypharmacyAction` with one of three action types:
+| `action_type` | Required fields | Description |
+|---|---|---|
+| `query_ddi` | `drug_id_1`, `drug_id_2` | Query the DDI database for an interaction between two drugs |
+| `propose_intervention` | `target_drug_id`, `intervention_type` | Propose changing a medication (`stop`, `dose_reduce`, `substitute`, `add_monitoring`) |
+| `finish_review` | — | End the review and trigger final grading |
+Optional fields: `proposed_new_drug_id`, `rationale`.
+## Observation Space
+`PolypharmacyObservation` includes:
+- **Patient demographics**: `age`, `sex`, `conditions`, `eGFR_category`, `liver_function_category`
+- **Medications**: list of `MedicationEntry` (drug_id, name, class, dose, high-risk flags, Beers flags)
+- **History**: `interaction_queries` (past DDI query results), `interventions` (past actions)
+- **Budgets**: `remaining_query_budget`, `remaining_intervention_budget`
+- **Reward signals**: `shaped_reward`, `done`
+## State
+`PolypharmacyState`: `episode_id`, `task_id`, `step_count`, `max_steps`, `num_query_actions`, `num_interventions`.
+---
+## Tasks
+| Task ID | Difficulty | Drugs | Query Budget | Intervention Budget | Max Steps | Description |
+|---|---|---|---|---|---|---|
+| `easy_screening` | Easy | 3-5 | 4 | 2 | 10 | One severe DDI, simple resolution |
+| `budgeted_screening` | Medium | 6-10 | 8 | 3 | 20 | Multiple DDIs + Beers issues, limited budgets |
+| `complex_tradeoff` | Hard | 10-15 | 12 | 5 | 30 | Critical drugs, trade-off between risk and regimen stability |
+---
+## Reward Structure
+**Per-step shaped rewards:**
+| Event | Reward |
+|---|---|
+| DDI query | -0.01 (cost) + 0.03 bonus if severe DDI discovered |
+| Successful intervention | +(previous_risk - new_risk) - 0.02 cost |
+| Invalid action | -0.10 penalty |
+| Timeout (max steps exceeded) | -0.20 penalty |
+| `finish_review` | + grader score (0.0 to 1.0) |
+**Terminal grader scoring:**
+- **Easy**: 50% risk reduction + 50% targeted intervention flag
+- **Medium**: 50% risk reduction + 30% intervention precision + 20% query efficiency
+- **Hard**: risk reduction - regimen disruption penalty - critical drug penalty
+---
+## Setup & Usage
+### Install dependencies
+```bash
+pip install -r requirements.txt
+```
+### Generate synthetic data
+```bash
+python3 scripts/preprocess_data.py
+```
+### Run the API server locally
+```bash
+PYTHONPATH=src uvicorn polypharmacy_env.api.server:app --host 0.0.0.0 --port 7860
+```
+### Run the heuristic baseline
+```bash
+PYTHONPATH=src python3 -m polypharmacy_env.baselines.heuristic_agent
+```
+### Run tests
+```bash
+PYTHONPATH=src python3 -m pytest src/polypharmacy_env/tests/ -v
+```
+### Run `inference.py` (LLM baseline)
+```bash
+# Start the server first, then in another terminal:
+export OPENAI_API_KEY="sk-..."
+export MODEL_NAME="gpt-4.1"
+export POLYPHARMACY_ENV_URL="http://localhost:7860"
+python3 inference.py
+```
+### Docker
+```bash
+docker build -t polypharmacy-env .
+docker run -p 7860:7860 polypharmacy-env
+```
+---
+## Hugging Face Space
+This repo is ready for deployment as a HF Space:
+- **Space type**: `docker`
+- **Tag**: `openenv`
+- The container listens on port 7860 and exposes `/reset`, `/step`, `/state`, `/health`.
+---
+## Baseline Scores
+### Heuristic Agent (deterministic, rule-based)
+| Task | Avg Score | Avg Reward |
+|---|---|---|
+| `easy_screening` | ~0.96 | ~1.30 |
+| `budgeted_screening` | ~0.48 | ~0.45 |
+| `complex_tradeoff` | ~0.24 | ~0.11 |
+*(Scores vary by seed; run `scripts/run_validation.sh` for exact numbers.)*
+---
+## Project Structure
+```
+openenv-polypharmacy/
+  openenv.yaml              # OpenEnv manifest
+  Dockerfile                # Container image
+  inference.py              # LLM baseline script
+  requirements.txt
+  pyproject.toml
+  src/polypharmacy_env/
+    config.py               # Constants, task configs
+    models.py               # Pydantic action/observation/state models
+    env_core.py             # PolypharmacyEnv implementation
+    tasks.py                # Task selection utilities
+    graders.py              # Deterministic graders (3 difficulty levels)
+    rewards.py              # Reward shaping logic
+    data_loader.py          # CSV data loading
+    ddi_simulator.py        # Drug interaction lookup engine
+    api/
+      server.py             # FastAPI HTTP server
+      schemas.py            # Request/response schemas
+    baselines/
+      heuristic_agent.py    # Rule-based baseline
+      random_agent.py       # Random baseline
+    tests/
+      test_env_core.py
+      test_api.py
+  data/
+    lookups/                # Drug metadata, DDI rules, Beers criteria CSVs
+    processed/              # Synthetic patient episodes
+  scripts/
+    preprocess_data.py      # Synthetic data generator
+    run_validation.sh       # Run tests + baseline
+```

openenv-polypharmacy/data/lookups/beers_criteria.csv ADDED Viewed

	@@ -0,0 +1,16 @@

+drug_id,criterion_type,condition,rationale
+DRUG_DIAZEPAM,avoid,,"Long-acting benzodiazepine: falls, fractures, cognitive impairment in elderly"
+DRUG_ALPRAZOLAM,avoid,,"Benzodiazepine: falls, fractures, cognitive impairment in elderly"
+DRUG_AMITRIPTYLINE,avoid,,"Strongly anticholinergic TCA: sedation, confusion, urinary retention in elderly"
+DRUG_GLIPIZIDE,caution,,Sulfonylurea: hypoglycemia risk higher in elderly
+DRUG_NAPROXEN,avoid,CKD,"NSAID contraindicated in CKD – renal deterioration, fluid retention"
+DRUG_IBUPROFEN,avoid,CKD,"NSAID contraindicated in CKD – renal deterioration, fluid retention"
+DRUG_NAPROXEN,caution,,NSAID: GI bleeding and renal risk in elderly
+DRUG_IBUPROFEN,caution,,NSAID: GI bleeding and renal risk in elderly
+DRUG_DIGOXIN,dose_adjust,,Avoid doses > 0.125 mg/day in elderly – toxicity risk
+DRUG_TRAMADOL,avoid,,"Opioid: CNS depression, falls, constipation in elderly"
+DRUG_METFORMIN,dose_adjust,CKD,Reduce dose or avoid if eGFR < 30 – lactic acidosis risk
+DRUG_INSULIN_GLARGINE,caution,,Tight glycemic control increases hypoglycemia risk in elderly
+DRUG_PREDNISONE,avoid_in_condition,DM,Corticosteroid worsens glycemic control in diabetes
+DRUG_DONEPEZIL,avoid_in_condition,dementia,"Limited benefit, GI side effects; reassess regularly"
+DRUG_CIPROFLOXACIN,caution,,"Fluoroquinolone: tendon rupture, QT prolongation risk in elderly"

openenv-polypharmacy/data/lookups/ddi_rules.csv ADDED Viewed

	@@ -0,0 +1,25 @@

+drug_id_1,drug_id_2,severity,mechanism,recommendation,base_risk_score
+DRUG_NAPROXEN,DRUG_WARFARIN,severe,Increased bleeding risk – NSAID inhibits platelet + anticoagulant,avoid_combination,0.9
+DRUG_IBUPROFEN,DRUG_WARFARIN,severe,Increased bleeding risk – NSAID + anticoagulant synergy,avoid_combination,0.88
+DRUG_ASPIRIN,DRUG_WARFARIN,moderate,Additive antiplatelet + anticoagulant bleeding risk,monitor_closely,0.55
+DRUG_FLUOXETINE,DRUG_WARFARIN,moderate,SSRI increases serotonin and may potentiate bleeding,monitor_closely,0.45
+DRUG_CIPROFLOXACIN,DRUG_WARFARIN,moderate,CYP1A2 inhibition raises warfarin levels,dose_adjust,0.5
+DRUG_APIXABAN,DRUG_NAPROXEN,severe,DOAC + NSAID – high bleeding risk,avoid_combination,0.85
+DRUG_APIXABAN,DRUG_ASPIRIN,moderate,Additive bleeding risk with antiplatelet,monitor_closely,0.5
+DRUG_AMIODARONE,DRUG_DIGOXIN,severe,Amiodarone increases digoxin levels – toxicity risk,dose_adjust,0.8
+DRUG_DIGOXIN,DRUG_SPIRONOLACTONE,moderate,Spironolactone may raise digoxin levels,monitor_closely,0.4
+DRUG_CIPROFLOXACIN,DRUG_METFORMIN,moderate,Fluoroquinolone may cause dysglycemia with metformin,monitor_closely,0.35
+DRUG_DIAZEPAM,DRUG_TRAMADOL,severe,CNS depression – benzodiazepine + opioid,avoid_combination,0.92
+DRUG_ALPRAZOLAM,DRUG_TRAMADOL,severe,CNS depression – benzodiazepine + opioid,avoid_combination,0.91
+DRUG_LISINOPRIL,DRUG_SPIRONOLACTONE,moderate,Hyperkalemia risk – ACE-I + K-sparing diuretic,monitor_closely,0.48
+DRUG_LISINOPRIL,DRUG_NAPROXEN,moderate,"NSAID reduces ACE-I efficacy, renal risk",monitor_closely,0.42
+DRUG_AMLODIPINE,DRUG_SIMVASTATIN,moderate,CYP3A4 interaction increases statin exposure,dose_adjust,0.38
+DRUG_ATORVASTATIN,DRUG_CIPROFLOXACIN,mild,Minor CYP interaction raising statin levels,no_action,0.15
+DRUG_CLOPIDOGREL,DRUG_OMEPRAZOLE,moderate,PPI reduces clopidogrel activation via CYP2C19,dose_adjust,0.45
+DRUG_GLIPIZIDE,DRUG_INSULIN_GLARGINE,moderate,Additive hypoglycemia risk,monitor_closely,0.5
+DRUG_FLUOXETINE,DRUG_TRAMADOL,severe,Serotonin syndrome risk – SSRI + serotonergic opioid,avoid_combination,0.82
+DRUG_AMITRIPTYLINE,DRUG_TRAMADOL,severe,Serotonin syndrome + CNS depression,avoid_combination,0.85
+DRUG_DIGOXIN,DRUG_METOPROLOL,moderate,Additive bradycardia,monitor_closely,0.4
+DRUG_DIGOXIN,DRUG_FUROSEMIDE,moderate,Loop diuretic causes hypokalemia increasing digoxin toxicity risk,monitor_closely,0.45
+DRUG_NAPROXEN,DRUG_PREDNISONE,moderate,GI bleeding risk – corticosteroid + NSAID,monitor_closely,0.5
+DRUG_PREDNISONE,DRUG_WARFARIN,mild,Corticosteroid may alter INR,monitor_closely,0.25

openenv-polypharmacy/data/lookups/drug_metadata.csv ADDED Viewed

	@@ -0,0 +1,34 @@

+drug_id,generic_name,atc_class,is_high_risk_elderly,default_dose_mg,min_dose_mg,max_dose_mg
+DRUG_WARFARIN,warfarin,B01AA,1,5.0,1.0,10.0
+DRUG_APIXABAN,apixaban,B01AF,1,5.0,2.5,10.0
+DRUG_METFORMIN,metformin,A10BA,0,1000,500,2000
+DRUG_GLIPIZIDE,glipizide,A10BB,1,5.0,2.5,20.0
+DRUG_LISINOPRIL,lisinopril,C09AA,0,10.0,2.5,40.0
+DRUG_AMLODIPINE,amlodipine,C08CA,0,5.0,2.5,10.0
+DRUG_METOPROLOL,metoprolol,C07AB,0,50.0,25.0,200.0
+DRUG_DIGOXIN,digoxin,C01AA,1,0.25,0.0625,0.5
+DRUG_FUROSEMIDE,furosemide,C03CA,0,40.0,20.0,160.0
+DRUG_SPIRONOLACTONE,spironolactone,C03DA,0,25.0,12.5,50.0
+DRUG_ATORVASTATIN,atorvastatin,C10AA,0,20.0,10.0,80.0
+DRUG_SIMVASTATIN,simvastatin,C10AA,0,20.0,10.0,40.0
+DRUG_OMEPRAZOLE,omeprazole,A02BC,0,20.0,10.0,40.0
+DRUG_DIAZEPAM,diazepam,N05BA,1,5.0,2.0,10.0
+DRUG_ALPRAZOLAM,alprazolam,N05BA,1,0.5,0.25,2.0
+DRUG_AMITRIPTYLINE,amitriptyline,N06AA,1,25.0,10.0,75.0
+DRUG_INSULIN_GLARGINE,insulin glargine,A10AE,1,20.0,10.0,60.0
+DRUG_PREDNISONE,prednisone,H02AB,0,10.0,5.0,60.0
+DRUG_NAPROXEN,naproxen,M01AE,1,500,250,1000
+DRUG_IBUPROFEN,ibuprofen,M01AE,1,400,200,800
+DRUG_CLOPIDOGREL,clopidogrel,B01AC,0,75.0,75.0,75.0
+DRUG_ASPIRIN,aspirin,B01AC,0,81.0,81.0,325.0
+DRUG_HYDROCHLOROTHIAZIDE,HCTZ,C03AA,0,25.0,12.5,50.0
+DRUG_DONEPEZIL,donepezil,N06DA,0,5.0,5.0,10.0
+DRUG_GABAPENTIN,gabapentin,N03AX,0,300,100,1200
+DRUG_TRAMADOL,tramadol,N02AX,1,50.0,25.0,200.0
+DRUG_FLUOXETINE,fluoxetine,N06AB,0,20.0,10.0,60.0
+DRUG_SERTRALINE,sertraline,N06AB,0,50.0,25.0,200.0
+DRUG_CIPROFLOXACIN,ciprofloxacin,J01MA,0,500,250,750
+DRUG_TAMSULOSIN,tamsulosin,G04CA,0,0.4,0.4,0.8
+DRUG_CELECOXIB,celecoxib,M01AE,0,200,100,400
+DRUG_NORTRIPTYLINE,nortriptyline,N06AA,0,25.0,10.0,75.0
+DRUG_LOSARTAN,losartan,C09AA,0,50.0,25.0,100.0

openenv-polypharmacy/data/processed/patients_polypharmacy.csv ADDED Viewed

	@@ -0,0 +1,121 @@

+episode_id,age,sex,conditions,eGFR_category,liver_function_category,medication_ids,baseline_risk_score,difficulty
+EP_0001,72,F,HTN,moderate,normal,DRUG_WARFARIN;DRUG_FUROSEMIDE;DRUG_LISINOPRIL;DRUG_AMLODIPINE;DRUG_NAPROXEN,0.264,easy
+EP_0002,67,M,OA;COPD;neuropathy,normal,normal,DRUG_IBUPROFEN;DRUG_TRAMADOL;DRUG_AMITRIPTYLINE,0.2833,easy
+EP_0003,73,F,HTN;HF,normal,normal,DRUG_IBUPROFEN;DRUG_WARFARIN;DRUG_FUROSEMIDE,0.2933,easy
+EP_0004,74,M,CKD,mild,impaired,DRUG_TRAMADOL;DRUG_AMLODIPINE;DRUG_DIAZEPAM,0.3067,easy
+EP_0005,76,F,OA;neuropathy;CKD,mild,normal,DRUG_IBUPROFEN;DRUG_GABAPENTIN;DRUG_TRAMADOL;DRUG_NAPROXEN;DRUG_AMITRIPTYLINE,0.17,easy
+EP_0006,74,M,HTN;OA,normal,impaired,DRUG_IBUPROFEN;DRUG_WARFARIN;DRUG_LISINOPRIL;DRUG_FUROSEMIDE;DRUG_NAPROXEN,0.44,easy
+EP_0007,90,M,BPH;OA,moderate,normal,DRUG_DIGOXIN;DRUG_TAMSULOSIN;DRUG_GABAPENTIN;DRUG_NAPROXEN;DRUG_AMIODARONE,0.16,easy
+EP_0008,77,F,CKD;OA;depression,mild,normal,DRUG_AMITRIPTYLINE;DRUG_IBUPROFEN;DRUG_SERTRALINE;DRUG_TRAMADOL;DRUG_FUROSEMIDE,0.17,easy
+EP_0009,67,M,COPD;GERD;BPH,mild,normal,DRUG_TRAMADOL;DRUG_FLUOXETINE;DRUG_OMEPRAZOLE;DRUG_TAMSULOSIN,0.205,easy
+EP_0010,75,M,dementia;HTN;depression,normal,impaired,DRUG_TRAMADOL;DRUG_DIAZEPAM;DRUG_SERTRALINE;DRUG_AMITRIPTYLINE,0.4425,easy
+EP_0011,83,F,AF,moderate,normal,DRUG_TRAMADOL;DRUG_WARFARIN;DRUG_DIGOXIN;DRUG_ALPRAZOLAM,0.2275,easy
+EP_0012,71,F,HTN;GERD;depression,normal,normal,DRUG_LISINOPRIL;DRUG_FLUOXETINE;DRUG_APIXABAN;DRUG_AMLODIPINE;DRUG_NAPROXEN,0.254,easy
+EP_0013,70,F,HF;HTN;AF,mild,normal,DRUG_TRAMADOL;DRUG_FUROSEMIDE;DRUG_ALPRAZOLAM,0.3033,easy
+EP_0014,82,F,dementia,normal,normal,DRUG_DONEPEZIL;DRUG_NAPROXEN;DRUG_APIXABAN;DRUG_SPIRONOLACTONE;DRUG_FUROSEMIDE,0.17,easy
+EP_0015,84,F,dementia;neuropathy,normal,normal,DRUG_DONEPEZIL;DRUG_GABAPENTIN;DRUG_AMITRIPTYLINE;DRUG_CELECOXIB;DRUG_TRAMADOL,0.17,easy
+EP_0016,83,M,HTN,normal,normal,DRUG_TRAMADOL;DRUG_METOPROLOL;DRUG_ALPRAZOLAM,0.3033,easy
+EP_0017,83,F,CKD,severe,normal,DRUG_APIXABAN;DRUG_AMLODIPINE;DRUG_NAPROXEN,0.2833,easy
+EP_0018,70,F,CKD;HF;HTN,mild,normal,DRUG_SPIRONOLACTONE;DRUG_ALPRAZOLAM;DRUG_TRAMADOL;DRUG_AMLODIPINE;DRUG_METOPROLOL,0.182,easy
+EP_0019,84,M,DM;depression,normal,normal,DRUG_GLIPIZIDE;DRUG_FLUOXETINE;DRUG_TRAMADOL;DRUG_INSULIN_GLARGINE;DRUG_DIAZEPAM,0.448,easy
+EP_0020,90,F,neuropathy;BPH;AF,normal,normal,DRUG_WARFARIN;DRUG_NAPROXEN;DRUG_TAMSULOSIN,0.3,easy
+EP_0021,87,M,HTN;BPH;HF,normal,normal,DRUG_TRAMADOL;DRUG_AMITRIPTYLINE;DRUG_AMLODIPINE;DRUG_SPIRONOLACTONE,0.2125,easy
+EP_0022,90,M,AF;GERD;DM,normal,impaired,DRUG_APIXABAN;DRUG_NAPROXEN;DRUG_METOPROLOL;DRUG_OMEPRAZOLE,0.2125,easy
+EP_0023,90,F,HF,normal,normal,DRUG_APIXABAN;DRUG_NAPROXEN;DRUG_METOPROLOL,0.2833,easy
+EP_0024,71,F,OA,mild,normal,DRUG_IBUPROFEN;DRUG_GABAPENTIN;DRUG_TRAMADOL;DRUG_NAPROXEN;DRUG_APIXABAN,0.17,easy
+EP_0025,71,M,COPD;AF;neuropathy,mild,normal,DRUG_GABAPENTIN;DRUG_WARFARIN;DRUG_NAPROXEN,0.3,easy
+EP_0026,88,M,GERD;dementia,severe,normal,DRUG_TRAMADOL;DRUG_AMITRIPTYLINE;DRUG_DONEPEZIL;DRUG_OMEPRAZOLE,0.2125,easy
+EP_0027,76,M,AF,normal,normal,DRUG_DIGOXIN;DRUG_METOPROLOL;DRUG_WARFARIN;DRUG_APIXABAN;DRUG_NAPROXEN,0.43,easy
+EP_0028,73,F,CKD,moderate,normal,DRUG_AMLODIPINE;DRUG_FUROSEMIDE;DRUG_METFORMIN;DRUG_AMITRIPTYLINE;DRUG_TRAMADOL,0.17,easy
+EP_0029,70,F,CKD;OA,mild,normal,DRUG_IBUPROFEN;DRUG_TRAMADOL;DRUG_GABAPENTIN;DRUG_AMLODIPINE;DRUG_DIAZEPAM,0.184,easy
+EP_0030,87,F,dementia;HF;depression,normal,normal,DRUG_WARFARIN;DRUG_DONEPEZIL;DRUG_FLUOXETINE;DRUG_FUROSEMIDE;DRUG_NAPROXEN,0.27,easy
+EP_0031,69,M,HF,severe,normal,DRUG_WARFARIN;DRUG_SPIRONOLACTONE;DRUG_LISINOPRIL;DRUG_FUROSEMIDE;DRUG_NAPROXEN,0.36,easy
+EP_0032,89,F,neuropathy,mild,normal,DRUG_AMITRIPTYLINE;DRUG_GABAPENTIN;DRUG_PREDNISONE;DRUG_TRAMADOL,0.2125,easy
+EP_0033,68,F,dementia,mild,impaired,DRUG_DONEPEZIL;DRUG_OMEPRAZOLE;DRUG_SPIRONOLACTONE;DRUG_TRAMADOL;DRUG_ALPRAZOLAM,0.182,easy
+EP_0034,84,F,CKD;HF;HTN,moderate,normal,DRUG_HYDROCHLOROTHIAZIDE;DRUG_DIGOXIN;DRUG_AMIODARONE,0.2667,easy
+EP_0035,74,M,HTN;DM,normal,impaired,DRUG_IBUPROFEN;DRUG_GLIPIZIDE;DRUG_WARFARIN;DRUG_HYDROCHLOROTHIAZIDE;DRUG_METOPROLOL,0.176,easy
+EP_0036,80,F,DM;neuropathy;HTN,severe,normal,DRUG_WARFARIN;DRUG_AMLODIPINE;DRUG_AMITRIPTYLINE;DRUG_NAPROXEN,0.225,easy
+EP_0037,78,M,HF,normal,normal,DRUG_TRAMADOL;DRUG_FUROSEMIDE;DRUG_DIAZEPAM;DRUG_LISINOPRIL,0.23,easy
+EP_0038,89,F,HTN;AF,moderate,normal,DRUG_TRAMADOL;DRUG_FUROSEMIDE;DRUG_DIAZEPAM,0.3067,easy
+EP_0039,78,F,OA;depression,moderate,normal,DRUG_GABAPENTIN;DRUG_FLUOXETINE;DRUG_TRAMADOL;DRUG_SERTRALINE,0.205,easy
+EP_0040,72,F,neuropathy;COPD;BPH,normal,normal,DRUG_TRAMADOL;DRUG_ALPRAZOLAM;DRUG_AMITRIPTYLINE;DRUG_TAMSULOSIN,0.44,easy
+EP_0041,89,F,AF;BPH;DM;HF;HTN,mild,normal,DRUG_GLIPIZIDE;DRUG_DIGOXIN;DRUG_METOPROLOL;DRUG_WARFARIN;DRUG_METFORMIN;DRUG_AMLODIPINE;DRUG_INSULIN_GLARGINE;DRUG_HYDROCHLOROTHIAZIDE;DRUG_APIXABAN,0.1,medium
+EP_0042,66,F,HTN;AF;CKD,moderate,normal,DRUG_METOPROLOL;DRUG_AMLODIPINE;DRUG_LISINOPRIL;DRUG_DIGOXIN;DRUG_FUROSEMIDE;DRUG_WARFARIN;DRUG_APIXABAN;DRUG_HYDROCHLOROTHIAZIDE;DRUG_IBUPROFEN;DRUG_SERTRALINE,0.173,medium
+EP_0043,70,F,OA;HTN;dementia,moderate,normal,DRUG_TRAMADOL;DRUG_HYDROCHLOROTHIAZIDE;DRUG_GABAPENTIN;DRUG_IBUPROFEN;DRUG_DONEPEZIL;DRUG_FUROSEMIDE;DRUG_NAPROXEN;DRUG_LISINOPRIL;DRUG_METOPROLOL,0.0467,medium
+EP_0044,77,M,HF;HTN;GERD;COPD;neuropathy,normal,normal,DRUG_OMEPRAZOLE;DRUG_AMLODIPINE;DRUG_PREDNISONE;DRUG_LISINOPRIL;DRUG_SPIRONOLACTONE;DRUG_METOPROLOL;DRUG_GABAPENTIN;DRUG_HYDROCHLOROTHIAZIDE;DRUG_DIGOXIN,0.1422,medium
+EP_0045,78,M,CKD;depression;dementia;GERD;OA,severe,normal,DRUG_FLUOXETINE;DRUG_NAPROXEN;DRUG_GABAPENTIN;DRUG_IBUPROFEN;DRUG_OMEPRAZOLE;DRUG_TRAMADOL;DRUG_SERTRALINE;DRUG_DONEPEZIL;DRUG_FUROSEMIDE;DRUG_AMITRIPTYLINE,0.167,medium
+EP_0046,82,M,BPH;DM;CKD;dementia;HF,moderate,normal,DRUG_GLIPIZIDE;DRUG_INSULIN_GLARGINE;DRUG_METOPROLOL;DRUG_METFORMIN;DRUG_DIGOXIN;DRUG_AMLODIPINE;DRUG_TAMSULOSIN;DRUG_SPIRONOLACTONE;DRUG_DONEPEZIL;DRUG_LISINOPRIL,0.178,medium
+EP_0047,83,F,depression;HTN;BPH;neuropathy;AF,normal,impaired,DRUG_SERTRALINE;DRUG_GABAPENTIN;DRUG_HYDROCHLOROTHIAZIDE;DRUG_METOPROLOL;DRUG_LISINOPRIL;DRUG_APIXABAN;DRUG_TAMSULOSIN;DRUG_AMLODIPINE;DRUG_FUROSEMIDE,0.0,medium
+EP_0048,85,F,AF;DM;OA,severe,impaired,DRUG_WARFARIN;DRUG_GLIPIZIDE;DRUG_INSULIN_GLARGINE;DRUG_TRAMADOL;DRUG_NAPROXEN;DRUG_GABAPENTIN;DRUG_DIGOXIN;DRUG_METFORMIN;DRUG_IBUPROFEN;DRUG_METOPROLOL,0.268,medium
+EP_0049,65,F,BPH;COPD;neuropathy,normal,normal,DRUG_TAMSULOSIN;DRUG_GABAPENTIN;DRUG_AMITRIPTYLINE;DRUG_PREDNISONE;DRUG_WARFARIN;DRUG_FLUOXETINE;DRUG_AMLODIPINE;DRUG_TRAMADOL,0.2963,medium
+EP_0050,86,M,dementia;depression;OA;neuropathy,mild,impaired,DRUG_AMITRIPTYLINE;DRUG_GABAPENTIN;DRUG_SERTRALINE;DRUG_NAPROXEN;DRUG_DONEPEZIL;DRUG_IBUPROFEN;DRUG_TRAMADOL,0.1214,medium
+EP_0051,90,M,OA;HF;HTN;DM,normal,normal,DRUG_FUROSEMIDE;DRUG_DIGOXIN;DRUG_HYDROCHLOROTHIAZIDE;DRUG_IBUPROFEN;DRUG_GLIPIZIDE;DRUG_TRAMADOL;DRUG_METOPROLOL;DRUG_AMLODIPINE;DRUG_METFORMIN;DRUG_NAPROXEN,0.085,medium
+EP_0052,70,M,AF;depression;GERD,moderate,normal,DRUG_FLUOXETINE;DRUG_METOPROLOL;DRUG_SERTRALINE;DRUG_AMITRIPTYLINE;DRUG_OMEPRAZOLE;DRUG_APIXABAN;DRUG_WARFARIN;DRUG_DIGOXIN,0.1063,medium
+EP_0053,65,F,HF;DM;GERD;neuropathy;BPH,moderate,impaired,DRUG_INSULIN_GLARGINE;DRUG_GLIPIZIDE;DRUG_OMEPRAZOLE;DRUG_TAMSULOSIN;DRUG_FUROSEMIDE;DRUG_METFORMIN;DRUG_SPIRONOLACTONE;DRUG_AMITRIPTYLINE;DRUG_LISINOPRIL;DRUG_GABAPENTIN,0.098,medium
+EP_0054,82,F,OA;neuropathy;AF;DM,mild,normal,DRUG_GABAPENTIN;DRUG_TRAMADOL;DRUG_APIXABAN;DRUG_INSULIN_GLARGINE;DRUG_IBUPROFEN;DRUG_NAPROXEN,0.1417,medium
+EP_0055,74,M,GERD;HTN;CKD,moderate,normal,DRUG_LISINOPRIL;DRUG_AMLODIPINE;DRUG_HYDROCHLOROTHIAZIDE;DRUG_OMEPRAZOLE;DRUG_METOPROLOL;DRUG_FUROSEMIDE;DRUG_SPIRONOLACTONE;DRUG_INSULIN_GLARGINE,0.06,medium
+EP_0056,67,F,HTN;GERD;COPD;AF,moderate,normal,DRUG_PREDNISONE;DRUG_FUROSEMIDE;DRUG_DIGOXIN;DRUG_OMEPRAZOLE;DRUG_LISINOPRIL;DRUG_METOPROLOL;DRUG_APIXABAN;DRUG_WARFARIN;DRUG_AMLODIPINE,0.1222,medium
+EP_0057,74,F,DM;HTN;BPH,normal,normal,DRUG_INSULIN_GLARGINE;DRUG_LISINOPRIL;DRUG_AMLODIPINE;DRUG_GLIPIZIDE;DRUG_METFORMIN;DRUG_FUROSEMIDE;DRUG_TAMSULOSIN;DRUG_METOPROLOL;DRUG_HYDROCHLOROTHIAZIDE;DRUG_APIXABAN,0.05,medium
+EP_0058,90,F,AF;OA;BPH,moderate,normal,DRUG_DIGOXIN;DRUG_METOPROLOL;DRUG_GABAPENTIN;DRUG_WARFARIN;DRUG_IBUPROFEN;DRUG_NAPROXEN;DRUG_TAMSULOSIN;DRUG_TRAMADOL;DRUG_APIXABAN;DRUG_CLOPIDOGREL,0.303,medium
+EP_0059,85,F,BPH;HTN;depression;dementia;COPD,mild,normal,DRUG_PREDNISONE;DRUG_FUROSEMIDE;DRUG_AMLODIPINE;DRUG_SERTRALINE;DRUG_FLUOXETINE;DRUG_HYDROCHLOROTHIAZIDE;DRUG_TAMSULOSIN;DRUG_AMITRIPTYLINE,0.0,medium
+EP_0060,80,F,HF;CKD;neuropathy;HTN,moderate,normal,DRUG_AMLODIPINE;DRUG_HYDROCHLOROTHIAZIDE;DRUG_SPIRONOLACTONE;DRUG_GABAPENTIN;DRUG_LISINOPRIL;DRUG_DIGOXIN;DRUG_AMITRIPTYLINE;DRUG_METOPROLOL;DRUG_FUROSEMIDE;DRUG_ATORVASTATIN,0.173,medium
+EP_0061,90,M,GERD;BPH;HF;HTN;CKD,mild,normal,DRUG_DIGOXIN;DRUG_AMLODIPINE;DRUG_LISINOPRIL;DRUG_METOPROLOL;DRUG_TAMSULOSIN;DRUG_FUROSEMIDE,0.1417,medium
+EP_0062,65,M,neuropathy;COPD;GERD;BPH;AF,moderate,normal,DRUG_PREDNISONE;DRUG_TAMSULOSIN;DRUG_DIGOXIN;DRUG_METOPROLOL;DRUG_OMEPRAZOLE;DRUG_GABAPENTIN;DRUG_AMITRIPTYLINE;DRUG_WARFARIN;DRUG_APIXABAN,0.0722,medium
+EP_0063,76,M,depression;COPD;OA,mild,normal,DRUG_IBUPROFEN;DRUG_TRAMADOL;DRUG_NAPROXEN;DRUG_AMITRIPTYLINE;DRUG_FLUOXETINE;DRUG_SERTRALINE;DRUG_GABAPENTIN,0.2386,medium
+EP_0064,88,M,BPH;GERD;COPD,mild,normal,DRUG_OMEPRAZOLE;DRUG_PREDNISONE;DRUG_TAMSULOSIN;DRUG_METOPROLOL;DRUG_DIGOXIN;DRUG_SIMVASTATIN;DRUG_AMLODIPINE;DRUG_ATORVASTATIN;DRUG_ALPRAZOLAM,0.0867,medium
+EP_0065,75,M,HTN;HF;AF,mild,normal,DRUG_FUROSEMIDE;DRUG_WARFARIN;DRUG_METOPROLOL;DRUG_HYDROCHLOROTHIAZIDE;DRUG_APIXABAN;DRUG_SPIRONOLACTONE;DRUG_DIGOXIN,0.1786,medium
+EP_0066,66,M,HF;dementia;GERD;OA;DM,moderate,normal,DRUG_INSULIN_GLARGINE;DRUG_FUROSEMIDE;DRUG_DIGOXIN;DRUG_TRAMADOL;DRUG_NAPROXEN;DRUG_METFORMIN;DRUG_DONEPEZIL;DRUG_OMEPRAZOLE,0.0563,medium
+EP_0067,70,F,CKD;HTN;AF;HF,moderate,normal,DRUG_APIXABAN;DRUG_HYDROCHLOROTHIAZIDE;DRUG_LISINOPRIL;DRUG_METOPROLOL;DRUG_FUROSEMIDE;DRUG_WARFARIN;DRUG_AMLODIPINE,0.0,medium
+EP_0068,85,M,depression;dementia;neuropathy;HF,normal,impaired,DRUG_FLUOXETINE;DRUG_SERTRALINE;DRUG_GABAPENTIN;DRUG_AMITRIPTYLINE;DRUG_SPIRONOLACTONE;DRUG_DONEPEZIL;DRUG_LISINOPRIL;DRUG_METOPROLOL;DRUG_DIGOXIN;DRUG_FUROSEMIDE,0.173,medium
+EP_0069,74,F,OA;CKD;AF,mild,normal,DRUG_IBUPROFEN;DRUG_FUROSEMIDE;DRUG_GABAPENTIN;DRUG_NAPROXEN;DRUG_AMLODIPINE;DRUG_APIXABAN;DRUG_METOPROLOL;DRUG_TRAMADOL;DRUG_DIGOXIN,0.1889,medium
+EP_0070,75,F,dementia;GERD;COPD;OA,mild,normal,DRUG_DONEPEZIL;DRUG_PREDNISONE;DRUG_OMEPRAZOLE;DRUG_IBUPROFEN;DRUG_TRAMADOL;DRUG_GABAPENTIN,0.0,medium
+EP_0071,68,M,BPH;DM;COPD;neuropathy,normal,normal,DRUG_AMITRIPTYLINE;DRUG_INSULIN_GLARGINE;DRUG_METFORMIN;DRUG_TAMSULOSIN;DRUG_GABAPENTIN;DRUG_PREDNISONE;DRUG_GLIPIZIDE,0.0714,medium
+EP_0072,92,F,CKD;BPH;COPD;AF,normal,normal,DRUG_PREDNISONE;DRUG_FUROSEMIDE;DRUG_DIGOXIN;DRUG_AMLODIPINE;DRUG_TAMSULOSIN;DRUG_METOPROLOL;DRUG_APIXABAN,0.1214,medium
+EP_0073,88,F,OA;GERD;HTN;depression,mild,normal,DRUG_AMITRIPTYLINE;DRUG_OMEPRAZOLE;DRUG_NAPROXEN;DRUG_METOPROLOL;DRUG_IBUPROFEN;DRUG_FUROSEMIDE;DRUG_LISINOPRIL;DRUG_SERTRALINE,0.0525,medium
+EP_0074,80,F,neuropathy;OA;CKD;depression,mild,normal,DRUG_AMLODIPINE;DRUG_SERTRALINE;DRUG_IBUPROFEN;DRUG_FUROSEMIDE;DRUG_TRAMADOL;DRUG_AMITRIPTYLINE;DRUG_NAPROXEN,0.1214,medium
+EP_0075,68,F,dementia;AF;COPD;HTN;neuropathy,mild,normal,DRUG_GABAPENTIN;DRUG_AMLODIPINE;DRUG_DIGOXIN;DRUG_FUROSEMIDE;DRUG_DONEPEZIL;DRUG_METOPROLOL;DRUG_WARFARIN;DRUG_HYDROCHLOROTHIAZIDE;DRUG_LISINOPRIL,0.0944,medium
+EP_0076,71,M,HF;DM;dementia,severe,normal,DRUG_SPIRONOLACTONE;DRUG_DONEPEZIL;DRUG_INSULIN_GLARGINE;DRUG_LISINOPRIL;DRUG_DIGOXIN;DRUG_METFORMIN;DRUG_GLIPIZIDE;DRUG_FUROSEMIDE,0.2287,medium
+EP_0077,75,F,AF;BPH;dementia,mild,impaired,DRUG_METOPROLOL;DRUG_TAMSULOSIN;DRUG_APIXABAN;DRUG_DONEPEZIL;DRUG_DIGOXIN;DRUG_WARFARIN;DRUG_SPIRONOLACTONE,0.1143,medium
+EP_0078,81,F,OA;depression;DM;neuropathy;CKD,normal,normal,DRUG_GLIPIZIDE;DRUG_NAPROXEN;DRUG_FUROSEMIDE;DRUG_AMLODIPINE;DRUG_SERTRALINE;DRUG_INSULIN_GLARGINE;DRUG_IBUPROFEN,0.0714,medium
+EP_0079,74,F,DM;OA;GERD;CKD,mild,impaired,DRUG_GLIPIZIDE;DRUG_AMLODIPINE;DRUG_INSULIN_GLARGINE;DRUG_METFORMIN;DRUG_IBUPROFEN;DRUG_GABAPENTIN;DRUG_NAPROXEN,0.0714,medium
+EP_0080,72,M,GERD;HF;OA;CKD,normal,normal,DRUG_AMLODIPINE;DRUG_TRAMADOL;DRUG_LISINOPRIL;DRUG_OMEPRAZOLE;DRUG_FUROSEMIDE;DRUG_DIGOXIN;DRUG_METOPROLOL;DRUG_IBUPROFEN,0.1063,medium
+EP_0081,84,M,neuropathy;CKD;depression;OA,normal,impaired,DRUG_FLUOXETINE;DRUG_AMLODIPINE;DRUG_IBUPROFEN;DRUG_NAPROXEN;DRUG_SERTRALINE;DRUG_GABAPENTIN;DRUG_FUROSEMIDE;DRUG_AMITRIPTYLINE;DRUG_TRAMADOL;DRUG_METOPROLOL;DRUG_SIMVASTATIN;DRUG_SPIRONOLACTONE;DRUG_DIGOXIN;DRUG_WARFARIN,0.395,hard
+EP_0082,75,M,OA;COPD;neuropathy;CKD;GERD;HTN;depression,severe,impaired,DRUG_SERTRALINE;DRUG_FUROSEMIDE;DRUG_METOPROLOL;DRUG_FLUOXETINE;DRUG_LISINOPRIL;DRUG_NAPROXEN;DRUG_GABAPENTIN;DRUG_OMEPRAZOLE;DRUG_AMLODIPINE;DRUG_AMITRIPTYLINE;DRUG_TRAMADOL;DRUG_HYDROCHLOROTHIAZIDE;DRUG_IBUPROFEN;DRUG_PREDNISONE;DRUG_DIGOXIN,0.2293,hard
+EP_0083,82,F,DM;dementia;OA;HF;neuropathy;COPD,moderate,normal,DRUG_SPIRONOLACTONE;DRUG_LISINOPRIL;DRUG_DONEPEZIL;DRUG_TRAMADOL;DRUG_AMITRIPTYLINE;DRUG_FUROSEMIDE;DRUG_GLIPIZIDE;DRUG_PREDNISONE;DRUG_IBUPROFEN;DRUG_GABAPENTIN;DRUG_INSULIN_GLARGINE;DRUG_DIGOXIN;DRUG_WARFARIN,0.2931,hard
+EP_0084,80,F,CKD;neuropathy;COPD;BPH;dementia;HTN;OA,moderate,normal,DRUG_GABAPENTIN;DRUG_METOPROLOL;DRUG_HYDROCHLOROTHIAZIDE;DRUG_PREDNISONE;DRUG_TRAMADOL;DRUG_NAPROXEN;DRUG_LISINOPRIL;DRUG_AMLODIPINE;DRUG_AMITRIPTYLINE;DRUG_TAMSULOSIN;DRUG_DIGOXIN;DRUG_WARFARIN,0.2767,hard
+EP_0085,79,F,OA;DM;CKD;GERD;BPH;HF;neuropathy,severe,normal,DRUG_OMEPRAZOLE;DRUG_AMLODIPINE;DRUG_SPIRONOLACTONE;DRUG_METFORMIN;DRUG_TRAMADOL;DRUG_METOPROLOL;DRUG_IBUPROFEN;DRUG_FUROSEMIDE;DRUG_GABAPENTIN;DRUG_LISINOPRIL;DRUG_DIGOXIN;DRUG_WARFARIN;DRUG_INSULIN_GLARGINE,0.2008,hard
+EP_0086,82,M,AF;DM;GERD;COPD;OA,moderate,impaired,DRUG_PREDNISONE;DRUG_METFORMIN;DRUG_APIXABAN;DRUG_WARFARIN;DRUG_DIGOXIN;DRUG_OMEPRAZOLE;DRUG_INSULIN_GLARGINE;DRUG_NAPROXEN;DRUG_GABAPENTIN;DRUG_GLIPIZIDE;DRUG_METOPROLOL;DRUG_TRAMADOL;DRUG_IBUPROFEN;DRUG_LOSARTAN,0.3057,hard
+EP_0087,90,M,HTN;GERD;DM;AF;CKD,mild,impaired,DRUG_APIXABAN;DRUG_FUROSEMIDE;DRUG_GLIPIZIDE;DRUG_OMEPRAZOLE;DRUG_WARFARIN;DRUG_METOPROLOL;DRUG_INSULIN_GLARGINE;DRUG_LISINOPRIL;DRUG_AMLODIPINE;DRUG_HYDROCHLOROTHIAZIDE,0.05,hard
+EP_0088,86,F,HF;AF;COPD;HTN;OA;GERD,normal,impaired,DRUG_PREDNISONE;DRUG_AMLODIPINE;DRUG_METOPROLOL;DRUG_APIXABAN;DRUG_IBUPROFEN;DRUG_OMEPRAZOLE;DRUG_SPIRONOLACTONE;DRUG_NAPROXEN;DRUG_HYDROCHLOROTHIAZIDE;DRUG_WARFARIN;DRUG_LISINOPRIL;DRUG_FUROSEMIDE;DRUG_TRAMADOL;DRUG_DIGOXIN;DRUG_INSULIN_GLARGINE,0.3687,hard
+EP_0089,75,M,DM;CKD;HTN;HF;BPH;neuropathy;GERD,mild,normal,DRUG_HYDROCHLOROTHIAZIDE;DRUG_AMLODIPINE;DRUG_METFORMIN;DRUG_OMEPRAZOLE;DRUG_METOPROLOL;DRUG_LISINOPRIL;DRUG_AMITRIPTYLINE;DRUG_GLIPIZIDE;DRUG_GABAPENTIN;DRUG_TAMSULOSIN;DRUG_WARFARIN;DRUG_INSULIN_GLARGINE,0.0417,hard
+EP_0090,87,M,AF;depression;DM;COPD;OA,mild,normal,DRUG_NAPROXEN;DRUG_FLUOXETINE;DRUG_APIXABAN;DRUG_SERTRALINE;DRUG_WARFARIN;DRUG_DIGOXIN;DRUG_INSULIN_GLARGINE;DRUG_TRAMADOL;DRUG_PREDNISONE;DRUG_GLIPIZIDE;DRUG_GABAPENTIN;DRUG_IBUPROFEN;DRUG_METFORMIN;DRUG_METOPROLOL;DRUG_AMITRIPTYLINE,0.4267,hard
+EP_0091,83,M,OA;dementia;GERD;depression;DM;HF,severe,normal,DRUG_GLIPIZIDE;DRUG_DIGOXIN;DRUG_IBUPROFEN;DRUG_SPIRONOLACTONE;DRUG_METFORMIN;DRUG_FLUOXETINE;DRUG_FUROSEMIDE;DRUG_SERTRALINE;DRUG_OMEPRAZOLE;DRUG_AMITRIPTYLINE;DRUG_INSULIN_GLARGINE;DRUG_NAPROXEN;DRUG_METOPROLOL;DRUG_WARFARIN,0.2843,hard
+EP_0092,78,F,CKD;OA;AF;COPD;depression,severe,normal,DRUG_METOPROLOL;DRUG_TRAMADOL;DRUG_SERTRALINE;DRUG_NAPROXEN;DRUG_AMITRIPTYLINE;DRUG_APIXABAN;DRUG_AMLODIPINE;DRUG_IBUPROFEN;DRUG_DIGOXIN;DRUG_WARFARIN;DRUG_FUROSEMIDE;DRUG_GABAPENTIN;DRUG_FLUOXETINE;DRUG_PREDNISONE,0.4536,hard
+EP_0093,93,F,HTN;DM;BPH;OA;dementia,severe,impaired,DRUG_LISINOPRIL;DRUG_DONEPEZIL;DRUG_METFORMIN;DRUG_FUROSEMIDE;DRUG_INSULIN_GLARGINE;DRUG_GABAPENTIN;DRUG_IBUPROFEN;DRUG_METOPROLOL;DRUG_GLIPIZIDE;DRUG_HYDROCHLOROTHIAZIDE;DRUG_TAMSULOSIN;DRUG_DIGOXIN,0.1125,hard
+EP_0094,94,F,BPH;GERD;COPD;HF,moderate,impaired,DRUG_LISINOPRIL;DRUG_DIGOXIN;DRUG_TAMSULOSIN;DRUG_PREDNISONE;DRUG_FUROSEMIDE;DRUG_OMEPRAZOLE;DRUG_METOPROLOL;DRUG_SPIRONOLACTONE;DRUG_HYDROCHLOROTHIAZIDE;DRUG_WARFARIN,0.198,hard
+EP_0095,90,M,HF;neuropathy;COPD;BPH;dementia;DM;CKD,normal,normal,DRUG_GABAPENTIN;DRUG_METFORMIN;DRUG_INSULIN_GLARGINE;DRUG_DIGOXIN;DRUG_AMITRIPTYLINE;DRUG_FUROSEMIDE;DRUG_GLIPIZIDE;DRUG_AMLODIPINE;DRUG_LISINOPRIL;DRUG_DONEPEZIL;DRUG_PREDNISONE;DRUG_SPIRONOLACTONE;DRUG_TAMSULOSIN;DRUG_METOPROLOL;DRUG_SERTRALINE,0.1487,hard
+EP_0096,85,F,DM;COPD;HTN;CKD;depression;dementia,severe,impaired,DRUG_AMITRIPTYLINE;DRUG_INSULIN_GLARGINE;DRUG_PREDNISONE;DRUG_METOPROLOL;DRUG_HYDROCHLOROTHIAZIDE;DRUG_FUROSEMIDE;DRUG_AMLODIPINE;DRUG_FLUOXETINE;DRUG_LISINOPRIL;DRUG_DONEPEZIL;DRUG_DIGOXIN,0.0773,hard
+EP_0097,70,M,BPH;COPD;neuropathy;CKD;GERD;depression,severe,normal,DRUG_PREDNISONE;DRUG_AMITRIPTYLINE;DRUG_AMLODIPINE;DRUG_FLUOXETINE;DRUG_GABAPENTIN;DRUG_TAMSULOSIN;DRUG_FUROSEMIDE;DRUG_SERTRALINE;DRUG_OMEPRAZOLE;DRUG_IBUPROFEN;DRUG_LOSARTAN;DRUG_INSULIN_GLARGINE;DRUG_DIGOXIN;DRUG_WARFARIN,0.145,hard
+EP_0098,81,F,COPD;depression;GERD;BPH;OA,severe,impaired,DRUG_GABAPENTIN;DRUG_FLUOXETINE;DRUG_OMEPRAZOLE;DRUG_NAPROXEN;DRUG_SERTRALINE;DRUG_IBUPROFEN;DRUG_TRAMADOL;DRUG_TAMSULOSIN;DRUG_PREDNISONE;DRUG_AMITRIPTYLINE;DRUG_NORTRIPTYLINE;DRUG_INSULIN_GLARGINE;DRUG_DIGOXIN;DRUG_LOSARTAN;DRUG_HYDROCHLOROTHIAZIDE,0.1447,hard
+EP_0099,91,M,neuropathy;OA;AF;CKD,mild,normal,DRUG_TRAMADOL;DRUG_APIXABAN;DRUG_WARFARIN;DRUG_AMLODIPINE;DRUG_METOPROLOL;DRUG_IBUPROFEN;DRUG_AMITRIPTYLINE;DRUG_GABAPENTIN;DRUG_FUROSEMIDE;DRUG_DIGOXIN;DRUG_NAPROXEN;DRUG_SIMVASTATIN;DRUG_FLUOXETINE;DRUG_INSULIN_GLARGINE,0.4271,hard
+EP_0100,90,F,DM;OA;HTN;HF,mild,impaired,DRUG_INSULIN_GLARGINE;DRUG_SPIRONOLACTONE;DRUG_METFORMIN;DRUG_HYDROCHLOROTHIAZIDE;DRUG_METOPROLOL;DRUG_AMLODIPINE;DRUG_DIGOXIN;DRUG_GLIPIZIDE;DRUG_LISINOPRIL;DRUG_FUROSEMIDE;DRUG_NAPROXEN;DRUG_IBUPROFEN;DRUG_GABAPENTIN;DRUG_TRAMADOL;DRUG_AMITRIPTYLINE,0.2333,hard
+EP_0101,88,M,BPH;neuropathy;dementia;OA;CKD;DM;HF,severe,impaired,DRUG_AMLODIPINE;DRUG_TAMSULOSIN;DRUG_TRAMADOL;DRUG_GLIPIZIDE;DRUG_FUROSEMIDE;DRUG_AMITRIPTYLINE;DRUG_LISINOPRIL;DRUG_INSULIN_GLARGINE;DRUG_DIGOXIN;DRUG_METFORMIN;DRUG_IBUPROFEN;DRUG_SPIRONOLACTONE;DRUG_METOPROLOL;DRUG_GABAPENTIN;DRUG_NAPROXEN,0.2333,hard
+EP_0102,74,M,BPH;HF;dementia;CKD;DM;GERD,moderate,normal,DRUG_OMEPRAZOLE;DRUG_LISINOPRIL;DRUG_INSULIN_GLARGINE;DRUG_METOPROLOL;DRUG_FUROSEMIDE;DRUG_GLIPIZIDE;DRUG_DONEPEZIL;DRUG_SPIRONOLACTONE;DRUG_DIGOXIN;DRUG_AMLODIPINE;DRUG_METFORMIN;DRUG_TAMSULOSIN;DRUG_WARFARIN,0.1715,hard
+EP_0103,70,F,AF;GERD;depression;CKD,mild,impaired,DRUG_DIGOXIN;DRUG_AMLODIPINE;DRUG_FLUOXETINE;DRUG_FUROSEMIDE;DRUG_APIXABAN;DRUG_METOPROLOL;DRUG_WARFARIN;DRUG_SERTRALINE;DRUG_OMEPRAZOLE;DRUG_AMITRIPTYLINE;DRUG_LOSARTAN;DRUG_GLIPIZIDE;DRUG_DIAZEPAM;DRUG_TRAMADOL;DRUG_HYDROCHLOROTHIAZIDE,0.2593,hard
+EP_0104,84,F,OA;DM;dementia;AF;GERD;COPD;BPH,mild,normal,DRUG_METOPROLOL;DRUG_DONEPEZIL;DRUG_METFORMIN;DRUG_GABAPENTIN;DRUG_APIXABAN;DRUG_OMEPRAZOLE;DRUG_TAMSULOSIN;DRUG_NAPROXEN;DRUG_GLIPIZIDE;DRUG_TRAMADOL;DRUG_IBUPROFEN;DRUG_INSULIN_GLARGINE;DRUG_DIGOXIN;DRUG_WARFARIN,0.2521,hard
+EP_0105,88,F,neuropathy;BPH;HTN;COPD;DM,severe,impaired,DRUG_AMITRIPTYLINE;DRUG_AMLODIPINE;DRUG_METOPROLOL;DRUG_LISINOPRIL;DRUG_FUROSEMIDE;DRUG_PREDNISONE;DRUG_GABAPENTIN;DRUG_INSULIN_GLARGINE;DRUG_METFORMIN;DRUG_HYDROCHLOROTHIAZIDE;DRUG_TAMSULOSIN;DRUG_WARFARIN,0.0208,hard
+EP_0106,73,M,HTN;DM;CKD;OA;depression,normal,normal,DRUG_LISINOPRIL;DRUG_NAPROXEN;DRUG_AMITRIPTYLINE;DRUG_SERTRALINE;DRUG_HYDROCHLOROTHIAZIDE;DRUG_METFORMIN;DRUG_FLUOXETINE;DRUG_INSULIN_GLARGINE;DRUG_AMLODIPINE;DRUG_GLIPIZIDE;DRUG_TRAMADOL;DRUG_METOPROLOL;DRUG_IBUPROFEN;DRUG_WARFARIN,0.3443,hard
+EP_0107,82,M,HTN;GERD;BPH;depression;AF,mild,impaired,DRUG_WARFARIN;DRUG_LISINOPRIL;DRUG_FUROSEMIDE;DRUG_SERTRALINE;DRUG_FLUOXETINE;DRUG_OMEPRAZOLE;DRUG_HYDROCHLOROTHIAZIDE;DRUG_TAMSULOSIN;DRUG_METOPROLOL;DRUG_AMITRIPTYLINE;DRUG_AMLODIPINE;DRUG_DIGOXIN;DRUG_APIXABAN;DRUG_ATORVASTATIN;DRUG_CLOPIDOGREL,0.1167,hard
+EP_0108,72,F,DM;dementia;GERD;BPH;neuropathy;OA;depression,severe,normal,DRUG_FLUOXETINE;DRUG_GABAPENTIN;DRUG_TAMSULOSIN;DRUG_AMITRIPTYLINE;DRUG_NAPROXEN;DRUG_DONEPEZIL;DRUG_OMEPRAZOLE;DRUG_METFORMIN;DRUG_SERTRALINE;DRUG_IBUPROFEN;DRUG_INSULIN_GLARGINE;DRUG_GLIPIZIDE;DRUG_TRAMADOL;DRUG_NORTRIPTYLINE;DRUG_WARFARIN,0.2933,hard
+EP_0109,91,M,COPD;dementia;HF;OA;HTN;DM,mild,normal,DRUG_DONEPEZIL;DRUG_GLIPIZIDE;DRUG_LISINOPRIL;DRUG_DIGOXIN;DRUG_INSULIN_GLARGINE;DRUG_SPIRONOLACTONE;DRUG_AMLODIPINE;DRUG_FUROSEMIDE;DRUG_NAPROXEN;DRUG_METFORMIN;DRUG_WARFARIN,0.2864,hard
+EP_0110,84,M,HF;GERD;dementia;CKD;COPD,severe,impaired,DRUG_DONEPEZIL;DRUG_SPIRONOLACTONE;DRUG_AMLODIPINE;DRUG_FUROSEMIDE;DRUG_LISINOPRIL;DRUG_PREDNISONE;DRUG_OMEPRAZOLE;DRUG_METOPROLOL;DRUG_DIGOXIN;DRUG_AMITRIPTYLINE;DRUG_ASPIRIN;DRUG_NORTRIPTYLINE;DRUG_WARFARIN;DRUG_INSULIN_GLARGINE,0.1807,hard
+EP_0111,91,F,dementia;CKD;DM;AF;BPH,normal,impaired,DRUG_GLIPIZIDE;DRUG_DONEPEZIL;DRUG_TAMSULOSIN;DRUG_WARFARIN;DRUG_APIXABAN;DRUG_FUROSEMIDE;DRUG_METFORMIN;DRUG_AMLODIPINE;DRUG_METOPROLOL;DRUG_DIGOXIN;DRUG_INSULIN_GLARGINE;DRUG_CELECOXIB;DRUG_AMITRIPTYLINE;DRUG_ATORVASTATIN;DRUG_IBUPROFEN,0.1487,hard
+EP_0112,87,F,AF;HF;CKD;neuropathy;HTN;depression,moderate,impaired,DRUG_HYDROCHLOROTHIAZIDE;DRUG_GABAPENTIN;DRUG_WARFARIN;DRUG_FLUOXETINE;DRUG_AMLODIPINE;DRUG_SPIRONOLACTONE;DRUG_FUROSEMIDE;DRUG_METOPROLOL;DRUG_AMITRIPTYLINE;DRUG_APIXABAN;DRUG_SERTRALINE;DRUG_DIGOXIN;DRUG_INSULIN_GLARGINE,0.1308,hard
+EP_0113,92,M,OA;HF;COPD;dementia;neuropathy;CKD,moderate,normal,DRUG_AMLODIPINE;DRUG_DONEPEZIL;DRUG_LISINOPRIL;DRUG_GABAPENTIN;DRUG_METOPROLOL;DRUG_IBUPROFEN;DRUG_SPIRONOLACTONE;DRUG_AMITRIPTYLINE;DRUG_DIGOXIN;DRUG_NAPROXEN;DRUG_FUROSEMIDE;DRUG_WARFARIN;DRUG_INSULIN_GLARGINE,0.3023,hard
+EP_0114,72,M,depression;COPD;neuropathy;dementia;AF,moderate,normal,DRUG_DIGOXIN;DRUG_APIXABAN;DRUG_SERTRALINE;DRUG_METOPROLOL;DRUG_FLUOXETINE;DRUG_DONEPEZIL;DRUG_AMITRIPTYLINE;DRUG_PREDNISONE;DRUG_WARFARIN;DRUG_GABAPENTIN;DRUG_LISINOPRIL;DRUG_NORTRIPTYLINE;DRUG_AMLODIPINE,0.0846,hard
+EP_0115,75,M,HTN;OA;dementia;HF;depression;CKD;AF,mild,normal,DRUG_WARFARIN;DRUG_SERTRALINE;DRUG_METOPROLOL;DRUG_SPIRONOLACTONE;DRUG_FUROSEMIDE;DRUG_TRAMADOL;DRUG_AMLODIPINE;DRUG_NAPROXEN;DRUG_GABAPENTIN;DRUG_IBUPROFEN;DRUG_AMITRIPTYLINE;DRUG_DIGOXIN,0.3233,hard
+EP_0116,76,F,OA;depression;neuropathy;HF,severe,impaired,DRUG_FLUOXETINE;DRUG_NAPROXEN;DRUG_FUROSEMIDE;DRUG_METOPROLOL;DRUG_SPIRONOLACTONE;DRUG_AMITRIPTYLINE;DRUG_TRAMADOL;DRUG_LISINOPRIL;DRUG_GABAPENTIN;DRUG_DIGOXIN;DRUG_IBUPROFEN;DRUG_SERTRALINE;DRUG_WARFARIN;DRUG_INSULIN_GLARGINE,0.4321,hard
+EP_0117,85,M,depression;GERD;neuropathy;HTN,normal,normal,DRUG_GABAPENTIN;DRUG_SERTRALINE;DRUG_OMEPRAZOLE;DRUG_HYDROCHLOROTHIAZIDE;DRUG_FUROSEMIDE;DRUG_AMLODIPINE;DRUG_AMITRIPTYLINE;DRUG_METOPROLOL;DRUG_FLUOXETINE;DRUG_LISINOPRIL;DRUG_SPIRONOLACTONE;DRUG_IBUPROFEN;DRUG_NAPROXEN;DRUG_WARFARIN;DRUG_INSULIN_GLARGINE,0.2087,hard
+EP_0118,70,F,DM;depression;HTN;HF;AF;neuropathy;CKD,severe,impaired,DRUG_FUROSEMIDE;DRUG_HYDROCHLOROTHIAZIDE;DRUG_METFORMIN;DRUG_AMLODIPINE;DRUG_SPIRONOLACTONE;DRUG_GABAPENTIN;DRUG_FLUOXETINE;DRUG_GLIPIZIDE;DRUG_WARFARIN;DRUG_METOPROLOL;DRUG_INSULIN_GLARGINE;DRUG_DIGOXIN,0.1833,hard
+EP_0119,86,F,AF;HTN;HF;OA;dementia,normal,impaired,DRUG_APIXABAN;DRUG_HYDROCHLOROTHIAZIDE;DRUG_IBUPROFEN;DRUG_SPIRONOLACTONE;DRUG_GABAPENTIN;DRUG_NAPROXEN;DRUG_DONEPEZIL;DRUG_LISINOPRIL;DRUG_FUROSEMIDE;DRUG_DIGOXIN;DRUG_INSULIN_GLARGINE,0.2364,hard
+EP_0120,94,F,HF;COPD;dementia;HTN;CKD,mild,normal,DRUG_AMLODIPINE;DRUG_METOPROLOL;DRUG_SPIRONOLACTONE;DRUG_DONEPEZIL;DRUG_HYDROCHLOROTHIAZIDE;DRUG_DIGOXIN;DRUG_LISINOPRIL;DRUG_PREDNISONE;DRUG_FUROSEMIDE;DRUG_AMITRIPTYLINE;DRUG_INSULIN_GLARGINE;DRUG_WARFARIN,0.165,hard

openenv-polypharmacy/inference.py ADDED Viewed

	@@ -0,0 +1,214 @@

+#!/usr/bin/env python3
+"""Baseline LLM inference script for the PolypharmacyEnv.
+Uses the OpenAI Python client to drive an LLM agent through the
+PolypharmacyEnv HTTP API.  Emits structured stdout logs in the
+[START], [STEP], [END] format required by the OpenEnv evaluation spec.
+Environment variables:
+  OPENAI_API_KEY        – required
+  API_BASE_URL          – LLM endpoint (default: https://api.openai.com/v1)
+  MODEL_NAME            – model to use (default: gpt-4.1)
+  HF_TOKEN              – HuggingFace token (optional)
+  POLYPHARMACY_ENV_URL  – environment HTTP base URL (default: http://localhost:7860)
+"""
+from __future__ import annotations
+import json
+import os
+import sys
+import time
+import uuid
+from typing import Any, Dict, List
+import requests
+from openai import OpenAI
+# ── Configuration ────────────────────────────────────────────────────────────
+API_KEY = os.environ.get("OPENAI_API_KEY", "")
+API_BASE = os.environ.get("API_BASE_URL", "https://api.openai.com/v1")
+MODEL = os.environ.get("MODEL_NAME", "gpt-4.1")
+HF_TOKEN = os.environ.get("HF_TOKEN", "")
+ENV_URL = os.environ.get("POLYPHARMACY_ENV_URL", "http://localhost:7860")
+TASKS = ["easy_screening", "budgeted_screening", "complex_tradeoff"]
+EPISODES_PER_TASK = 5
+client = OpenAI(api_key=API_KEY, base_url=API_BASE)
+# ── Logging helpers ──────────────────────────────────────────────────────────
+def _log(tag: str, payload: Dict[str, Any]) -> None:
+    print(f"[{tag}] {json.dumps(payload, default=str)}", flush=True)
+def _err(msg: str) -> None:
+    print(msg, file=sys.stderr, flush=True)
+# ── Environment HTTP helpers ─────────────────────────────────────────────────
+def env_reset(task_id: str) -> Dict[str, Any]:
+    resp = requests.post(f"{ENV_URL}/reset", json={"task_id": task_id}, timeout=30)
+    resp.raise_for_status()
+    return resp.json()
+def env_step(action: Dict[str, Any]) -> Dict[str, Any]:
+    resp = requests.post(f"{ENV_URL}/step", json={"action": action}, timeout=30)
+    resp.raise_for_status()
+    return resp.json()
+# ── Observation → prompt ─────────────────────────────────────────────────────
+SYSTEM_PROMPT = """\
+You are a clinical pharmacist AI assistant reviewing an elderly patient's medication regimen.
+You must reduce drug-interaction risk and address Beers-criteria violations while minimising
+unnecessary medication changes.
+Available actions (respond with STRICT JSON, no extra text):
+1. Query a drug pair for interactions:
+   {"action_type": "query_ddi", "drug_id_1": "...", "drug_id_2": "..."}
+2. Propose an intervention:
+   {"action_type": "propose_intervention", "target_drug_id": "...",
+    "intervention_type": "stop|dose_reduce|substitute|add_monitoring",
+    "proposed_new_drug_id": "...(optional)", "rationale": "..."}
+3. Finish the review:
+   {"action_type": "finish_review"}
+Respond with EXACTLY ONE JSON object per turn. No markdown, no explanation outside JSON.
+"""
+def _summarise_obs(obs: Dict[str, Any]) -> str:
+    meds = obs.get("current_medications", [])
+    med_summary = "; ".join(
+        f"{m['drug_id']}({m['generic_name']},{m['dose_mg']}mg)"
+        for m in meds
+    )
+    queries = obs.get("interaction_queries", [])
+    q_summary = "; ".join(
+        f"{q['drug_id_1']}+{q['drug_id_2']}={q.get('severity','?')}"
+        for q in queries
+    )
+    interventions = obs.get("interventions", [])
+    iv_summary = "; ".join(
+        f"{iv['action_type']}({iv['target_drug_id']})"
+        for iv in interventions
+    )
+    return (
+        f"Patient: age={obs.get('age')}, sex={obs.get('sex')}, "
+        f"conditions={obs.get('conditions')}, "
+        f"eGFR={obs.get('eGFR_category')}, liver={obs.get('liver_function_category')}\n"
+        f"Medications: {med_summary}\n"
+        f"Queries so far: {q_summary or 'none'}\n"
+        f"Interventions so far: {iv_summary or 'none'}\n"
+        f"Remaining query budget: {obs.get('remaining_query_budget')}\n"
+        f"Remaining intervention budget: {obs.get('remaining_intervention_budget')}\n"
+        f"Step: {obs.get('step_index')}"
+    )
+# ── LLM call ─────────────────────────────────────────────────────────────────
+def _ask_llm(obs_summary: str) -> Dict[str, Any]:
+    """Call the LLM and parse a PolypharmacyAction JSON."""
+    try:
+        resp = client.chat.completions.create(
+            model=MODEL,
+            messages=[
+                {"role": "system", "content": SYSTEM_PROMPT},
+                {"role": "user", "content": obs_summary},
+            ],
+            temperature=0.2,
+            max_tokens=256,
+        )
+        text = resp.choices[0].message.content or ""
+        # Strip markdown fences if present
+        text = text.strip()
+        if text.startswith("```"):
+            text = text.split("\n", 1)[-1]
+        if text.endswith("```"):
+            text = text.rsplit("```", 1)[0]
+        text = text.strip()
+        return json.loads(text)
+    except Exception as e:
+        _err(f"LLM parse error: {e}")
+        return {"action_type": "finish_review"}
+# ── Main loop ────────────────────────────────────────────────────────────────
+def main() -> None:
+    if not API_KEY:
+        _err("OPENAI_API_KEY is required")
+        sys.exit(1)
+    run_id = str(uuid.uuid4())[:8]
+    for task_id in TASKS:
+        task_scores: List[float] = []
+        task_rewards: List[float] = []
+        _log("START", {
+            "run_id": run_id,
+            "task_id": task_id,
+            "model": MODEL,
+            "api_base": API_BASE,
+            "episodes": EPISODES_PER_TASK,
+        })
+        for ep_idx in range(EPISODES_PER_TASK):
+            reset_resp = env_reset(task_id)
+            obs = reset_resp["observation"]
+            done = reset_resp.get("done", False)
+            episode_id = obs.get("episode_id", f"ep_{ep_idx}")
+            total_reward = 0.0
+            step_idx = 0
+            while not done:
+                obs_summary = _summarise_obs(obs)
+                action_payload = _ask_llm(obs_summary)
+                step_resp = env_step(action_payload)
+                obs = step_resp["observation"]
+                reward = step_resp.get("reward", 0.0)
+                done = step_resp.get("done", False)
+                total_reward += reward
+                _log("STEP", {
+                    "run_id": run_id,
+                    "task_id": task_id,
+                    "episode_id": episode_id,
+                    "step_index": step_idx,
+                    "observation_summary": obs_summary[:200],
+                    "action_payload": action_payload,
+                    "reward": reward,
+                    "done": done,
+                })
+                step_idx += 1
+            grader_score = step_resp.get("info", {}).get("grader_score", 0.0)
+            task_scores.append(grader_score)
+            task_rewards.append(total_reward)
+        _log("END", {
+            "run_id": run_id,
+            "task_id": task_id,
+            "episodes": EPISODES_PER_TASK,
+            "avg_grader_score": sum(task_scores) / max(len(task_scores), 1),
+            "avg_total_reward": sum(task_rewards) / max(len(task_rewards), 1),
+            "per_episode_scores": task_scores,
+        })
+    _err("Inference complete.")
+if __name__ == "__main__":
+    main()

openenv-polypharmacy/openenv.yaml ADDED Viewed

	@@ -0,0 +1,30 @@

+spec_version: 1
+name: polypharmacy_env
+version: "0.1.0"
+description: >
+  An OpenEnv environment that simulates elderly polypharmacy medication review.
+  An RL agent acts as a clinical pharmacist assistant, identifying dangerous
+  drug–drug interactions, Beers-criteria violations, and proposing safe
+  interventions (stop, dose-reduce, substitute, monitor).
+author: "PolypharmacyEnv Team"
+tags:
+  - healthcare
+  - polypharmacy
+  - openenv
+type: space
+runtime: fastapi
+app: src.polypharmacy_env.api.server:app
+port: 7860
+tasks:
+  - id: easy_screening
+    description: "Small regimen (3-5 drugs) with one severe DDI. Identify and resolve it."
+    difficulty: easy
+  - id: budgeted_screening
+    description: "Medium regimen (6-10 drugs) with multiple DDIs and Beers issues under query/intervention budgets."
+    difficulty: medium
+  - id: complex_tradeoff
+    description: "Large regimen (10-15 drugs) including critical drugs. Balance risk reduction against regimen disruption."
+    difficulty: hard

openenv-polypharmacy/pyproject.toml ADDED Viewed

	@@ -0,0 +1,39 @@

+[build-system]
+requires = ["setuptools>=68.0", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "polypharmacy-env"
+version = "0.1.0"
+description = "OpenEnv environment for elderly polypharmacy medication-review safety"
+requires-python = ">=3.10"
+dependencies = [
+    "fastapi>=0.104.0",
+    "uvicorn>=0.24.0",
+    "pydantic>=2.0.0",
+    "requests>=2.31.0",
+    "openai>=1.0.0",
+]
+[project.optional-dependencies]
+dev = [
+    "pytest>=7.0.0",
+    "httpx>=0.25.0",
+    "black",
+    "isort",
+]
+[tool.setuptools.packages.find]
+where = ["src"]
+[tool.pytest.ini_options]
+testpaths = ["src/polypharmacy_env/tests"]
+pythonpath = ["src"]
+[tool.black]
+line-length = 99
+target-version = ["py310"]
+[tool.isort]
+profile = "black"
+line_length = 99

openenv-polypharmacy/requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+fastapi>=0.104.0
+uvicorn>=0.24.0
+pydantic>=2.0.0
+requests>=2.31.0
+openai>=1.0.0
+httpx>=0.25.0
+pytest>=7.0.0

openenv-polypharmacy/scripts/preprocess_data.py ADDED Viewed

	@@ -0,0 +1,301 @@

+"""Synthetic data generator for the PolypharmacyEnv.
+Generates:
+  - data/lookups/drug_metadata.csv
+  - data/lookups/ddi_rules.csv
+  - data/lookups/beers_criteria.csv
+  - data/processed/patients_polypharmacy.csv
+"""
+from __future__ import annotations
+import csv
+import random
+import sys
+from itertools import combinations
+from pathlib import Path
+ROOT = Path(__file__).resolve().parents[1]
+LOOKUPS = ROOT / "data" / "lookups"
+PROCESSED = ROOT / "data" / "processed"
+# ── Drug catalogue ───────────────────────────────────────────────────────────
+DRUGS = [
+    # drug_id, generic_name, atc_class, high_risk, default, min, max
+    ("DRUG_WARFARIN",        "warfarin",         "B01AA",  1, 5.0,  1.0, 10.0),
+    ("DRUG_APIXABAN",        "apixaban",         "B01AF",  1, 5.0,  2.5, 10.0),
+    ("DRUG_METFORMIN",       "metformin",        "A10BA",  0, 1000, 500, 2000),
+    ("DRUG_GLIPIZIDE",       "glipizide",        "A10BB",  1, 5.0,  2.5, 20.0),
+    ("DRUG_LISINOPRIL",      "lisinopril",       "C09AA",  0, 10.0, 2.5, 40.0),
+    ("DRUG_AMLODIPINE",      "amlodipine",       "C08CA",  0, 5.0,  2.5, 10.0),
+    ("DRUG_METOPROLOL",      "metoprolol",       "C07AB",  0, 50.0, 25.0,200.0),
+    ("DRUG_DIGOXIN",         "digoxin",          "C01AA",  1, 0.25, 0.0625,0.5),
+    ("DRUG_FUROSEMIDE",      "furosemide",       "C03CA",  0, 40.0, 20.0,160.0),
+    ("DRUG_SPIRONOLACTONE",  "spironolactone",   "C03DA",  0, 25.0, 12.5, 50.0),
+    ("DRUG_ATORVASTATIN",    "atorvastatin",     "C10AA",  0, 20.0, 10.0, 80.0),
+    ("DRUG_SIMVASTATIN",     "simvastatin",      "C10AA",  0, 20.0, 10.0, 40.0),
+    ("DRUG_OMEPRAZOLE",      "omeprazole",       "A02BC",  0, 20.0, 10.0, 40.0),
+    ("DRUG_DIAZEPAM",        "diazepam",         "N05BA",  1, 5.0,  2.0, 10.0),
+    ("DRUG_ALPRAZOLAM",      "alprazolam",       "N05BA",  1, 0.5,  0.25, 2.0),
+    ("DRUG_AMITRIPTYLINE",   "amitriptyline",    "N06AA",  1, 25.0, 10.0, 75.0),
+    ("DRUG_INSULIN_GLARGINE","insulin glargine", "A10AE",  1, 20.0, 10.0, 60.0),
+    ("DRUG_PREDNISONE",      "prednisone",       "H02AB",  0, 10.0, 5.0,  60.0),
+    ("DRUG_NAPROXEN",        "naproxen",         "M01AE",  1, 500,  250, 1000),
+    ("DRUG_IBUPROFEN",       "ibuprofen",        "M01AE",  1, 400,  200,  800),
+    ("DRUG_CLOPIDOGREL",     "clopidogrel",      "B01AC",  0, 75.0, 75.0, 75.0),
+    ("DRUG_ASPIRIN",         "aspirin",          "B01AC",  0, 81.0, 81.0, 325.0),
+    ("DRUG_HYDROCHLOROTHIAZIDE","HCTZ",          "C03AA",  0, 25.0, 12.5, 50.0),
+    ("DRUG_DONEPEZIL",       "donepezil",        "N06DA",  0, 5.0,  5.0,  10.0),
+    ("DRUG_GABAPENTIN",      "gabapentin",       "N03AX",  0, 300,  100, 1200),
+    ("DRUG_TRAMADOL",        "tramadol",         "N02AX",  1, 50.0, 25.0, 200.0),
+    ("DRUG_FLUOXETINE",      "fluoxetine",       "N06AB",  0, 20.0, 10.0, 60.0),
+    ("DRUG_SERTRALINE",      "sertraline",       "N06AB",  0, 50.0, 25.0, 200.0),
+    ("DRUG_CIPROFLOXACIN",   "ciprofloxacin",    "J01MA",  0, 500,  250, 750),
+    ("DRUG_TAMSULOSIN",      "tamsulosin",       "G04CA",  0, 0.4,  0.4,  0.8),
+    ("DRUG_CELECOXIB",       "celecoxib",        "M01AE",  0, 200,  100,  400),
+    ("DRUG_NORTRIPTYLINE",   "nortriptyline",    "N06AA",  0, 25.0, 10.0, 75.0),
+    ("DRUG_LOSARTAN",        "losartan",         "C09AA",  0, 50.0, 25.0, 100.0),
+]
+# ── DDI rules ────────────────────────────────────────────────────────────────
+DDI_PAIRS: list[tuple[str, str, str, str, str, float]] = [
+    # id1, id2, severity, mechanism, recommendation, base_risk_score
+    ("DRUG_WARFARIN",   "DRUG_NAPROXEN",    "severe",   "Increased bleeding risk – NSAID inhibits platelet + anticoagulant",  "avoid_combination",  0.90),
+    ("DRUG_WARFARIN",   "DRUG_IBUPROFEN",   "severe",   "Increased bleeding risk – NSAID + anticoagulant synergy",            "avoid_combination",  0.88),
+    ("DRUG_WARFARIN",   "DRUG_ASPIRIN",     "moderate", "Additive antiplatelet + anticoagulant bleeding risk",                 "monitor_closely",    0.55),
+    ("DRUG_WARFARIN",   "DRUG_FLUOXETINE",  "moderate", "SSRI increases serotonin and may potentiate bleeding",               "monitor_closely",    0.45),
+    ("DRUG_WARFARIN",   "DRUG_CIPROFLOXACIN","moderate","CYP1A2 inhibition raises warfarin levels",                            "dose_adjust",        0.50),
+    ("DRUG_APIXABAN",   "DRUG_NAPROXEN",    "severe",   "DOAC + NSAID – high bleeding risk",                                  "avoid_combination",  0.85),
+    ("DRUG_APIXABAN",   "DRUG_ASPIRIN",     "moderate", "Additive bleeding risk with antiplatelet",                            "monitor_closely",    0.50),
+    ("DRUG_DIGOXIN",    "DRUG_AMIODARONE",  "severe",   "Amiodarone increases digoxin levels – toxicity risk",                 "dose_adjust",        0.80),
+    ("DRUG_DIGOXIN",    "DRUG_SPIRONOLACTONE","moderate","Spironolactone may raise digoxin levels",                             "monitor_closely",    0.40),
+    ("DRUG_METFORMIN",  "DRUG_CIPROFLOXACIN","moderate","Fluoroquinolone may cause dysglycemia with metformin",                "monitor_closely",    0.35),
+    ("DRUG_DIAZEPAM",   "DRUG_TRAMADOL",    "severe",   "CNS depression – benzodiazepine + opioid",                           "avoid_combination",  0.92),
+    ("DRUG_ALPRAZOLAM",  "DRUG_TRAMADOL",   "severe",   "CNS depression – benzodiazepine + opioid",                           "avoid_combination",  0.91),
+    ("DRUG_LISINOPRIL", "DRUG_SPIRONOLACTONE","moderate","Hyperkalemia risk – ACE-I + K-sparing diuretic",                     "monitor_closely",    0.48),
+    ("DRUG_LISINOPRIL", "DRUG_NAPROXEN",    "moderate", "NSAID reduces ACE-I efficacy, renal risk",                            "monitor_closely",    0.42),
+    ("DRUG_SIMVASTATIN","DRUG_AMLODIPINE",  "moderate", "CYP3A4 interaction increases statin exposure",                        "dose_adjust",        0.38),
+    ("DRUG_ATORVASTATIN","DRUG_CIPROFLOXACIN","mild",   "Minor CYP interaction raising statin levels",                         "no_action",          0.15),
+    ("DRUG_CLOPIDOGREL","DRUG_OMEPRAZOLE",  "moderate", "PPI reduces clopidogrel activation via CYP2C19",                     "dose_adjust",        0.45),
+    ("DRUG_INSULIN_GLARGINE","DRUG_GLIPIZIDE","moderate","Additive hypoglycemia risk",                                         "monitor_closely",    0.50),
+    ("DRUG_FLUOXETINE", "DRUG_TRAMADOL",    "severe",   "Serotonin syndrome risk – SSRI + serotonergic opioid",               "avoid_combination",  0.82),
+    ("DRUG_AMITRIPTYLINE","DRUG_TRAMADOL",  "severe",   "Serotonin syndrome + CNS depression",                                "avoid_combination",  0.85),
+    ("DRUG_METOPROLOL", "DRUG_DIGOXIN",     "moderate", "Additive bradycardia",                                               "monitor_closely",    0.40),
+    ("DRUG_FUROSEMIDE", "DRUG_DIGOXIN",     "moderate", "Loop diuretic causes hypokalemia increasing digoxin toxicity risk",   "monitor_closely",    0.45),
+    ("DRUG_PREDNISONE", "DRUG_NAPROXEN",    "moderate", "GI bleeding risk – corticosteroid + NSAID",                           "monitor_closely",    0.50),
+    ("DRUG_PREDNISONE", "DRUG_WARFARIN",    "mild",     "Corticosteroid may alter INR",                                       "monitor_closely",    0.25),
+]
+# ── Beers criteria ───────────────────────────────────────────────────────────
+BEERS_ENTRIES: list[tuple[str, str, str | None, str]] = [
+    # drug_id, criterion_type, condition, rationale
+    ("DRUG_DIAZEPAM",       "avoid",              None,       "Long-acting benzodiazepine: falls, fractures, cognitive impairment in elderly"),
+    ("DRUG_ALPRAZOLAM",     "avoid",              None,       "Benzodiazepine: falls, fractures, cognitive impairment in elderly"),
+    ("DRUG_AMITRIPTYLINE",  "avoid",              None,       "Strongly anticholinergic TCA: sedation, confusion, urinary retention in elderly"),
+    ("DRUG_GLIPIZIDE",      "caution",            None,       "Sulfonylurea: hypoglycemia risk higher in elderly"),
+    ("DRUG_NAPROXEN",       "avoid",              "CKD",      "NSAID contraindicated in CKD – renal deterioration, fluid retention"),
+    ("DRUG_IBUPROFEN",      "avoid",              "CKD",      "NSAID contraindicated in CKD – renal deterioration, fluid retention"),
+    ("DRUG_NAPROXEN",       "caution",            None,       "NSAID: GI bleeding and renal risk in elderly"),
+    ("DRUG_IBUPROFEN",      "caution",            None,       "NSAID: GI bleeding and renal risk in elderly"),
+    ("DRUG_DIGOXIN",        "dose_adjust",        None,       "Avoid doses > 0.125 mg/day in elderly – toxicity risk"),
+    ("DRUG_TRAMADOL",       "avoid",              None,       "Opioid: CNS depression, falls, constipation in elderly"),
+    ("DRUG_METFORMIN",      "dose_adjust",        "CKD",      "Reduce dose or avoid if eGFR < 30 – lactic acidosis risk"),
+    ("DRUG_INSULIN_GLARGINE","caution",           None,       "Tight glycemic control increases hypoglycemia risk in elderly"),
+    ("DRUG_PREDNISONE",     "avoid_in_condition", "DM",       "Corticosteroid worsens glycemic control in diabetes"),
+    ("DRUG_DONEPEZIL",      "avoid_in_condition", "dementia", "Limited benefit, GI side effects; reassess regularly"),
+    ("DRUG_CIPROFLOXACIN",  "caution",            None,       "Fluoroquinolone: tendon rupture, QT prolongation risk in elderly"),
+]
+# ── Conditions pool & constraints ────────────────────────────────────────────
+ALL_CONDITIONS = ["HTN", "DM", "HF", "CKD", "AF", "COPD", "OA", "depression", "dementia", "GERD", "BPH", "neuropathy"]
+EGFR_CATS = ["normal", "mild", "moderate", "severe"]
+LIVER_CATS = ["normal", "impaired"]
+# Drugs that make clinical sense per condition
+CONDITION_DRUG_MAP: dict[str, list[str]] = {
+    "HTN":        ["DRUG_LISINOPRIL", "DRUG_AMLODIPINE", "DRUG_METOPROLOL", "DRUG_HYDROCHLOROTHIAZIDE", "DRUG_FUROSEMIDE"],
+    "DM":         ["DRUG_METFORMIN", "DRUG_GLIPIZIDE", "DRUG_INSULIN_GLARGINE"],
+    "HF":         ["DRUG_FUROSEMIDE", "DRUG_SPIRONOLACTONE", "DRUG_METOPROLOL", "DRUG_LISINOPRIL", "DRUG_DIGOXIN"],
+    "CKD":        ["DRUG_FUROSEMIDE", "DRUG_AMLODIPINE"],
+    "AF":         ["DRUG_WARFARIN", "DRUG_APIXABAN", "DRUG_METOPROLOL", "DRUG_DIGOXIN"],
+    "COPD":       ["DRUG_PREDNISONE"],
+    "OA":         ["DRUG_NAPROXEN", "DRUG_IBUPROFEN", "DRUG_TRAMADOL", "DRUG_GABAPENTIN"],
+    "depression": ["DRUG_FLUOXETINE", "DRUG_SERTRALINE", "DRUG_AMITRIPTYLINE"],
+    "dementia":   ["DRUG_DONEPEZIL"],
+    "GERD":       ["DRUG_OMEPRAZOLE"],
+    "BPH":        ["DRUG_TAMSULOSIN"],
+    "neuropathy": ["DRUG_GABAPENTIN", "DRUG_AMITRIPTYLINE"],
+}
+def _normalise_pair(a: str, b: str) -> tuple[str, str]:
+    return (a, b) if a < b else (b, a)
+def _gen_drug_metadata(out: Path) -> None:
+    out.parent.mkdir(parents=True, exist_ok=True)
+    with open(out, "w", newline="") as f:
+        w = csv.writer(f)
+        w.writerow(["drug_id", "generic_name", "atc_class", "is_high_risk_elderly",
+                     "default_dose_mg", "min_dose_mg", "max_dose_mg"])
+        for row in DRUGS:
+            w.writerow(row)
+def _gen_ddi_rules(out: Path) -> None:
+    out.parent.mkdir(parents=True, exist_ok=True)
+    with open(out, "w", newline="") as f:
+        w = csv.writer(f)
+        w.writerow(["drug_id_1", "drug_id_2", "severity", "mechanism",
+                     "recommendation", "base_risk_score"])
+        for pair in DDI_PAIRS:
+            a, b = _normalise_pair(pair[0], pair[1])
+            w.writerow([a, b, pair[2], pair[3], pair[4], pair[5]])
+def _gen_beers(out: Path) -> None:
+    out.parent.mkdir(parents=True, exist_ok=True)
+    with open(out, "w", newline="") as f:
+        w = csv.writer(f)
+        w.writerow(["drug_id", "criterion_type", "condition", "rationale"])
+        for row in BEERS_ENTRIES:
+            w.writerow([row[0], row[1], row[2] or "", row[3]])
+def _gen_patients(out: Path, n_easy: int = 40, n_med: int = 40, n_hard: int = 40) -> None:
+    """Generate synthetic patient episodes tagged by difficulty."""
+    out.parent.mkdir(parents=True, exist_ok=True)
+    rng = random.Random(42)
+    drug_ids = [d[0] for d in DRUGS]
+    # Build severity lookup for quick reference
+    severe_pairs: set[tuple[str, str]] = set()
+    for pair in DDI_PAIRS:
+        if pair[2] == "severe":
+            severe_pairs.add(_normalise_pair(pair[0], pair[1]))
+    rows: list[list[str]] = []
+    ep_counter = 0
+    def _pick_conditions(n: int) -> list[str]:
+        return rng.sample(ALL_CONDITIONS, min(n, len(ALL_CONDITIONS)))
+    def _drugs_for_conditions(conds: list[str], target_n: int) -> list[str]:
+        pool: list[str] = []
+        for c in conds:
+            pool.extend(CONDITION_DRUG_MAP.get(c, []))
+        pool = list(dict.fromkeys(pool))  # deduplicate preserving order
+        rng.shuffle(pool)
+        selected = pool[:target_n]
+        # Pad with random drugs if needed
+        remaining = [d for d in drug_ids if d not in selected]
+        while len(selected) < target_n and remaining:
+            pick = rng.choice(remaining)
+            remaining.remove(pick)
+            selected.append(pick)
+        return selected
+    def _count_severe(meds: list[str]) -> int:
+        count = 0
+        for a, b in combinations(meds, 2):
+            if _normalise_pair(a, b) in severe_pairs:
+                count += 1
+        return count
+    def _baseline_risk(meds: list[str]) -> float:
+        risk = 0.0
+        for pair in DDI_PAIRS:
+            a, b = _normalise_pair(pair[0], pair[1])
+            if a in meds and b in meds:
+                risk += pair[5]
+        return min(risk / max(len(meds), 1), 1.0)
+    # Easy episodes: 3-5 drugs, exactly 1 severe DDI
+    for _ in range(n_easy):
+        ep_counter += 1
+        n_drugs = rng.randint(3, 5)
+        conds = _pick_conditions(rng.randint(1, 3))
+        # Ensure at least one severe DDI pair is present
+        for attempt in range(50):
+            meds = _drugs_for_conditions(conds, n_drugs)
+            if _count_severe(meds) >= 1:
+                break
+        else:
+            # Force a known severe pair
+            sp = rng.choice(list(severe_pairs))
+            meds = list(set(meds[:n_drugs - 2]) | {sp[0], sp[1]})[:n_drugs]
+        age = rng.randint(65, 90)
+        sex = rng.choice(["M", "F"])
+        egfr = rng.choices(EGFR_CATS, weights=[4, 3, 2, 1])[0]
+        liver = rng.choices(LIVER_CATS, weights=[8, 2])[0]
+        br = round(_baseline_risk(meds), 4)
+        rows.append([
+            f"EP_{ep_counter:04d}", str(age), sex, ";".join(conds),
+            egfr, liver, ";".join(meds), str(br), "easy",
+        ])
+    # Medium episodes: 6-10 drugs, multiple DDIs
+    for _ in range(n_med):
+        ep_counter += 1
+        n_drugs = rng.randint(6, 10)
+        conds = _pick_conditions(rng.randint(3, 5))
+        meds = _drugs_for_conditions(conds, n_drugs)
+        age = rng.randint(65, 92)
+        sex = rng.choice(["M", "F"])
+        egfr = rng.choices(EGFR_CATS, weights=[3, 3, 3, 1])[0]
+        liver = rng.choices(LIVER_CATS, weights=[7, 3])[0]
+        br = round(_baseline_risk(meds), 4)
+        rows.append([
+            f"EP_{ep_counter:04d}", str(age), sex, ";".join(conds),
+            egfr, liver, ";".join(meds), str(br), "medium",
+        ])
+    # Hard episodes: 10-15 drugs, many issues, include critical drugs
+    for _ in range(n_hard):
+        ep_counter += 1
+        n_drugs = rng.randint(10, 15)
+        conds = _pick_conditions(rng.randint(4, 7))
+        meds = _drugs_for_conditions(conds, n_drugs)
+        # Ensure some critical drugs are present
+        critical = ["DRUG_WARFARIN", "DRUG_INSULIN_GLARGINE", "DRUG_DIGOXIN"]
+        for cd in rng.sample(critical, min(2, len(critical))):
+            if cd not in meds and len(meds) < 15:
+                meds.append(cd)
+        age = rng.randint(70, 95)
+        sex = rng.choice(["M", "F"])
+        egfr = rng.choices(EGFR_CATS, weights=[2, 2, 3, 3])[0]
+        liver = rng.choices(LIVER_CATS, weights=[6, 4])[0]
+        br = round(_baseline_risk(meds), 4)
+        rows.append([
+            f"EP_{ep_counter:04d}", str(age), sex, ";".join(conds),
+            egfr, liver, ";".join(meds), str(br), "hard",
+        ])
+    with open(out, "w", newline="") as f:
+        w = csv.writer(f)
+        w.writerow(["episode_id", "age", "sex", "conditions", "eGFR_category",
+                     "liver_function_category", "medication_ids",
+                     "baseline_risk_score", "difficulty"])
+        for r in rows:
+            w.writerow(r)
+def main() -> None:
+    print("Generating drug_metadata.csv …")
+    _gen_drug_metadata(LOOKUPS / "drug_metadata.csv")
+    print("Generating ddi_rules.csv …")
+    _gen_ddi_rules(LOOKUPS / "ddi_rules.csv")
+    print("Generating beers_criteria.csv …")
+    _gen_beers(LOOKUPS / "beers_criteria.csv")
+    print("Generating patients_polypharmacy.csv …")
+    _gen_patients(PROCESSED / "patients_polypharmacy.csv")
+    print("Done.")
+if __name__ == "__main__":
+    main()

openenv-polypharmacy/scripts/run_validation.sh ADDED Viewed

	@@ -0,0 +1,15 @@

+#!/usr/bin/env bash
+# Run validation: tests, server smoke test, and heuristic baseline
+set -euo pipefail
+cd "$(dirname "$0")/.."
+echo "=== Running unit tests ==="
+PYTHONPATH=src python3 -m pytest src/polypharmacy_env/tests/ -v
+echo ""
+echo "=== Running heuristic baseline ==="
+PYTHONPATH=src python3 -m polypharmacy_env.baselines.heuristic_agent
+echo ""
+echo "=== Validation complete ==="

openenv-polypharmacy/src/polypharmacy_env.egg-info/PKG-INFO ADDED Viewed

	@@ -0,0 +1,15 @@

+Metadata-Version: 2.4
+Name: polypharmacy-env
+Version: 0.1.0
+Summary: OpenEnv environment for elderly polypharmacy medication-review safety
+Requires-Python: >=3.10
+Requires-Dist: fastapi>=0.104.0
+Requires-Dist: uvicorn>=0.24.0
+Requires-Dist: pydantic>=2.0.0
+Requires-Dist: requests>=2.31.0
+Requires-Dist: openai>=1.0.0
+Provides-Extra: dev
+Requires-Dist: pytest>=7.0.0; extra == "dev"
+Requires-Dist: httpx>=0.25.0; extra == "dev"
+Requires-Dist: black; extra == "dev"
+Requires-Dist: isort; extra == "dev"

openenv-polypharmacy/src/polypharmacy_env.egg-info/SOURCES.txt ADDED Viewed

	@@ -0,0 +1,25 @@

+README.md
+pyproject.toml
+src/polypharmacy_env/__init__.py
+src/polypharmacy_env/config.py
+src/polypharmacy_env/data_loader.py
+src/polypharmacy_env/ddi_simulator.py
+src/polypharmacy_env/env_core.py
+src/polypharmacy_env/graders.py
+src/polypharmacy_env/models.py
+src/polypharmacy_env/rewards.py
+src/polypharmacy_env/tasks.py
+src/polypharmacy_env.egg-info/PKG-INFO
+src/polypharmacy_env.egg-info/SOURCES.txt
+src/polypharmacy_env.egg-info/dependency_links.txt
+src/polypharmacy_env.egg-info/requires.txt
+src/polypharmacy_env.egg-info/top_level.txt
+src/polypharmacy_env/api/__init__.py
+src/polypharmacy_env/api/schemas.py
+src/polypharmacy_env/api/server.py
+src/polypharmacy_env/baselines/__init__.py
+src/polypharmacy_env/baselines/heuristic_agent.py
+src/polypharmacy_env/baselines/random_agent.py
+src/polypharmacy_env/tests/__init__.py
+src/polypharmacy_env/tests/test_api.py
+src/polypharmacy_env/tests/test_env_core.py

openenv-polypharmacy/src/polypharmacy_env.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+

openenv-polypharmacy/src/polypharmacy_env.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+fastapi>=0.104.0
+uvicorn>=0.24.0
+pydantic>=2.0.0
+requests>=2.31.0
+openai>=1.0.0
+[dev]
+pytest>=7.0.0
+httpx>=0.25.0
+black
+isort

openenv-polypharmacy/src/polypharmacy_env.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ polypharmacy_env

openenv-polypharmacy/src/polypharmacy_env/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """PolypharmacyEnv – an OpenEnv environment for elderly polypharmacy safety."""

openenv-polypharmacy/src/polypharmacy_env/api/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """API package."""

openenv-polypharmacy/src/polypharmacy_env/api/schemas.py ADDED Viewed

	@@ -0,0 +1,36 @@

+"""HTTP request/response schemas for the OpenEnv-compliant API."""
+from __future__ import annotations
+from typing import Any, Dict, Optional
+from pydantic import BaseModel, Field
+class ResetRequest(BaseModel):
+    task_id: Optional[str] = None
+    seed: Optional[int] = Field(default=None, ge=0)
+    episode_id: Optional[str] = Field(default=None, max_length=255)
+class StepRequest(BaseModel):
+    action: Dict[str, Any]
+    timeout_s: Optional[float] = Field(default=None, gt=0)
+    request_id: Optional[str] = Field(default=None, max_length=255)
+class ResetResponse(BaseModel):
+    observation: Dict[str, Any]
+    reward: Optional[float] = None
+    done: bool = False
+class StepResponse(BaseModel):
+    observation: Dict[str, Any]
+    reward: Optional[float] = None
+    done: bool = False
+    info: Dict[str, Any] = Field(default_factory=dict)
+class HealthResponse(BaseModel):
+    status: str = "healthy"

openenv-polypharmacy/src/polypharmacy_env/api/server.py ADDED Viewed

	@@ -0,0 +1,67 @@

+"""FastAPI server exposing the PolypharmacyEnv via OpenEnv HTTP endpoints."""
+from __future__ import annotations
+from fastapi import FastAPI, HTTPException
+from ..env_core import PolypharmacyEnv
+from ..models import PolypharmacyAction, PolypharmacyState
+from .schemas import (
+    HealthResponse,
+    ResetRequest,
+    ResetResponse,
+    StepRequest,
+    StepResponse,
+)
+app = FastAPI(
+    title="PolypharmacyEnv",
+    description="OpenEnv environment for elderly polypharmacy medication-review safety.",
+    version="0.1.0",
+)
+# Module-level environment instance (single-session for simplicity)
+_env = PolypharmacyEnv()
+@app.post("/reset", response_model=ResetResponse)
+def reset(req: ResetRequest | None = None) -> ResetResponse:
+    """Reset the environment and start a new episode."""
+    task_id = req.task_id if req else None
+    seed = req.seed if req else None
+    episode_id = req.episode_id if req else None
+    obs = _env.reset(task_id=task_id, seed=seed, episode_id=episode_id)
+    return ResetResponse(
+        observation=obs.model_dump(),
+        reward=0.0,
+        done=False,
+    )
+@app.post("/step", response_model=StepResponse)
+def step(req: StepRequest) -> StepResponse:
+    """Execute one step in the environment."""
+    try:
+        action = PolypharmacyAction(**req.action)
+    except Exception as e:
+        raise HTTPException(status_code=422, detail=f"Invalid action: {e}")
+    result = _env.step(action)
+    return StepResponse(
+        observation=result["observation"],
+        reward=result["reward"],
+        done=result["done"],
+        info=result["info"],
+    )
+@app.get("/state", response_model=PolypharmacyState)
+def state() -> PolypharmacyState:
+    """Return the current environment state snapshot."""
+    return _env.state
+@app.get("/health", response_model=HealthResponse)
+def health() -> HealthResponse:
+    return HealthResponse(status="healthy")

openenv-polypharmacy/src/polypharmacy_env/baselines/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Baseline agents."""

openenv-polypharmacy/src/polypharmacy_env/baselines/heuristic_agent.py ADDED Viewed

	@@ -0,0 +1,204 @@

+"""Deterministic heuristic baseline agent for PolypharmacyEnv.
+Strategy:
+1. Query all unordered medication pairs for DDIs (within budget),
+   prioritising high-risk elderly drugs first.
+2. For each severe DDI found, attempt substitution or stop.
+3. For each moderate DDI found, attempt substitution or stop.
+4. For remaining budget, address Beers-flagged "avoid" drugs.
+5. Call finish_review.
+"""
+from __future__ import annotations
+from itertools import combinations
+from typing import List, Tuple
+from ..env_core import PolypharmacyEnv
+from ..models import PolypharmacyAction, PolypharmacyObservation
+def run_heuristic_episode(
+    env: PolypharmacyEnv,
+    task_id: str = "budgeted_screening",
+    seed: int | None = None,
+) -> Tuple[float, float, int]:
+    """Run one episode with the heuristic agent.
+    Returns (total_reward, grader_score, steps).
+    """
+    obs = env.reset(task_id=task_id, seed=seed)
+    total_reward = 0.0
+    grader_score = 0.0
+    steps = 0
+    # Phase 1: Query DDIs between medication pairs, prioritising high-risk drugs
+    meds = obs.current_medications
+    # Sort: high-risk elderly drugs first, then by Beers flag count
+    meds_sorted = sorted(
+        meds,
+        key=lambda m: (not m.is_high_risk_elderly, -len(m.beers_flags), m.drug_id),
+    )
+    med_ids = [m.drug_id for m in meds_sorted]
+    pairs: List[Tuple[str, str]] = list(combinations(med_ids, 2))
+    severe_pairs: List[Tuple[str, str]] = []
+    moderate_pairs: List[Tuple[str, str]] = []
+    for a, b in pairs:
+        if obs.remaining_query_budget <= 0:
+            break
+        action = PolypharmacyAction(
+            action_type="query_ddi",
+            drug_id_1=a,
+            drug_id_2=b,
+        )
+        result = env.step(action)
+        obs = PolypharmacyObservation(**result["observation"])
+        total_reward += result["reward"]
+        steps += 1
+        if result["done"]:
+            grader_score = result["info"].get("grader_score", 0.0)
+            return total_reward, grader_score, steps
+        # Track severity
+        ddi_info = result["info"].get("ddi_result", {})
+        sev = ddi_info.get("severity", "none")
+        if sev == "severe":
+            severe_pairs.append((a, b))
+        elif sev == "moderate":
+            moderate_pairs.append((a, b))
+    # Phase 2: Intervene on severe DDI drugs first
+    current_ids = [m.drug_id for m in obs.current_medications]
+    intervened: set[str] = set()
+    def _try_intervene(
+        target: str,
+        rationale: str,
+    ) -> Tuple[bool, float, PolypharmacyObservation, int]:
+        """Try substitute then stop. Returns (success, total_reward, obs, steps)."""
+        nonlocal total_reward, steps
+        # Try substitute first
+        act = PolypharmacyAction(
+            action_type="propose_intervention",
+            target_drug_id=target,
+            intervention_type="substitute",
+            rationale=rationale,
+        )
+        res = env.step(act)
+        obs_new = PolypharmacyObservation(**res["observation"])
+        total_reward += res["reward"]
+        steps += 1
+        if res["done"]:
+            return True, total_reward, obs_new, steps
+        # If substitute failed, try stop
+        if res["info"].get("warning"):
+            if obs_new.remaining_intervention_budget <= 0:
+                return False, total_reward, obs_new, steps
+            act2 = PolypharmacyAction(
+                action_type="propose_intervention",
+                target_drug_id=target,
+                intervention_type="stop",
+                rationale=f"No substitute; {rationale}",
+            )
+            res2 = env.step(act2)
+            obs_new = PolypharmacyObservation(**res2["observation"])
+            total_reward += res2["reward"]
+            steps += 1
+            if res2["done"]:
+                return True, total_reward, obs_new, steps
+        return False, total_reward, obs_new, steps
+    # Intervene on severe pairs
+    for a, b in severe_pairs:
+        if obs.remaining_intervention_budget <= 0:
+            break
+        # Pick the drug to intervene on (prefer the one not yet intervened)
+        target = b if a in intervened else a
+        if target in intervened:
+            target = b
+        if target in intervened:
+            continue
+        intervened.add(target)
+        done, total_reward, obs, steps = _try_intervene(
+            target, f"Severe DDI between {a} and {b}"
+        )
+        if done:
+            grader_score = env._run_grader() if not done else 0.0
+            # grader_score was already computed in step
+            return total_reward, result["info"].get("grader_score", 0.0), steps
+    # Phase 2b: Intervene on moderate DDI drugs
+    for a, b in moderate_pairs:
+        if obs.remaining_intervention_budget <= 0:
+            break
+        target = b if a in intervened else a
+        if target in intervened:
+            target = b
+        if target in intervened:
+            continue
+        intervened.add(target)
+        done, total_reward, obs, steps = _try_intervene(
+            target, f"Moderate DDI between {a} and {b}"
+        )
+        if done:
+            return total_reward, result["info"].get("grader_score", 0.0), steps
+    # Phase 3: Address Beers-flagged "avoid" drugs
+    for med in meds_sorted:
+        if obs.remaining_intervention_budget <= 0:
+            break
+        if med.drug_id in intervened:
+            continue
+        if not med.beers_flags:
+            continue
+        if any("avoid" in f for f in med.beers_flags):
+            intervened.add(med.drug_id)
+            done, total_reward, obs, steps = _try_intervene(
+                med.drug_id, f"Beers criteria: {', '.join(med.beers_flags)}"
+            )
+            if done:
+                return total_reward, result["info"].get("grader_score", 0.0), steps
+    # Phase 4: Finish
+    action = PolypharmacyAction(action_type="finish_review")
+    result = env.step(action)
+    total_reward += result["reward"]
+    steps += 1
+    grader_score = result["info"].get("grader_score", 0.0)
+    return total_reward, grader_score, steps
+def run_heuristic_baseline(
+    n_episodes: int = 5,
+    task_ids: List[str] | None = None,
+) -> None:
+    """Run the heuristic agent across tasks and print results."""
+    if task_ids is None:
+        task_ids = ["easy_screening", "budgeted_screening", "complex_tradeoff"]
+    env = PolypharmacyEnv()
+    for tid in task_ids:
+        scores: list[float] = []
+        rewards: list[float] = []
+        for i in range(n_episodes):
+            total_r, score, steps = run_heuristic_episode(env, task_id=tid, seed=i)
+            scores.append(score)
+            rewards.append(total_r)
+            print(f"  [{tid}] ep={i} steps={steps} reward={total_r:.4f} score={score:.4f}")
+        avg_s = sum(scores) / len(scores) if scores else 0.0
+        avg_r = sum(rewards) / len(rewards) if rewards else 0.0
+        print(f"  [{tid}] avg_score={avg_s:.4f}  avg_reward={avg_r:.4f}\n")
+if __name__ == "__main__":
+    run_heuristic_baseline()

openenv-polypharmacy/src/polypharmacy_env/baselines/random_agent.py ADDED Viewed

	@@ -0,0 +1,54 @@

+"""Trivial random baseline agent for PolypharmacyEnv."""
+from __future__ import annotations
+import random
+from typing import List, Tuple
+from ..env_core import PolypharmacyEnv
+from ..models import PolypharmacyAction, PolypharmacyObservation
+def run_random_episode(
+    env: PolypharmacyEnv,
+    task_id: str = "budgeted_screening",
+    seed: int | None = None,
+) -> Tuple[float, float, int]:
+    rng = random.Random(seed)
+    obs = env.reset(task_id=task_id, seed=seed)
+    total_reward = 0.0
+    grader_score = 0.0
+    steps = 0
+    while not obs.done:
+        med_ids = [m.drug_id for m in obs.current_medications]
+        choice = rng.choice(["query_ddi", "propose_intervention", "finish_review"])
+        if choice == "query_ddi" and len(med_ids) >= 2 and obs.remaining_query_budget > 0:
+            pair = rng.sample(med_ids, 2)
+            action = PolypharmacyAction(
+                action_type="query_ddi",
+                drug_id_1=pair[0],
+                drug_id_2=pair[1],
+            )
+        elif choice == "propose_intervention" and med_ids and obs.remaining_intervention_budget > 0:
+            target = rng.choice(med_ids)
+            itype = rng.choice(["stop", "dose_reduce", "substitute", "add_monitoring"])
+            action = PolypharmacyAction(
+                action_type="propose_intervention",
+                target_drug_id=target,
+                intervention_type=itype,
+                rationale="random",
+            )
+        else:
+            action = PolypharmacyAction(action_type="finish_review")
+        result = env.step(action)
+        obs = PolypharmacyObservation(**result["observation"])
+        total_reward += result["reward"]
+        steps += 1
+        if result["done"]:
+            grader_score = result["info"].get("grader_score", 0.0)
+            break
+    return total_reward, grader_score, steps

openenv-polypharmacy/src/polypharmacy_env/config.py ADDED Viewed

	@@ -0,0 +1,79 @@

+"""Environment configuration constants and task parameter definitions."""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Dict
+# ── Paths ────────────────────────────────────────────────────────────────────
+PROJECT_ROOT = Path(__file__).resolve().parents[2]  # openenv-polypharmacy/
+DATA_DIR = PROJECT_ROOT / "data"
+LOOKUPS_DIR = DATA_DIR / "lookups"
+PROCESSED_DIR = DATA_DIR / "processed"
+DDI_RULES_CSV = LOOKUPS_DIR / "ddi_rules.csv"
+BEERS_CRITERIA_CSV = LOOKUPS_DIR / "beers_criteria.csv"
+DRUG_METADATA_CSV = LOOKUPS_DIR / "drug_metadata.csv"
+PATIENTS_CSV = PROCESSED_DIR / "patients_polypharmacy.csv"
+# ── Reward hyper-parameters ──────────────────────────────────────────────────
+QUERY_COST: float = 0.01
+INTERVENTION_COST: float = 0.02
+INVALID_ACTION_PENALTY: float = 0.10
+TIMEOUT_PENALTY: float = 0.20
+SEVERE_DDI_DISCOVERY_BONUS: float = 0.03
+# ── Task parameters ─────────────────────────────────────────────────────────
+@dataclass(frozen=True)
+class TaskConfig:
+    task_id: str
+    difficulty: str
+    min_drugs: int
+    max_drugs: int
+    query_budget: int
+    intervention_budget: int
+    max_steps: int
+TASK_CONFIGS: Dict[str, TaskConfig] = {
+    "easy_screening": TaskConfig(
+        task_id="easy_screening",
+        difficulty="easy",
+        min_drugs=3,
+        max_drugs=5,
+        query_budget=4,
+        intervention_budget=2,
+        max_steps=10,
+    ),
+    "budgeted_screening": TaskConfig(
+        task_id="budgeted_screening",
+        difficulty="medium",
+        min_drugs=6,
+        max_drugs=10,
+        query_budget=8,
+        intervention_budget=3,
+        max_steps=20,
+    ),
+    "complex_tradeoff": TaskConfig(
+        task_id="complex_tradeoff",
+        difficulty="hard",
+        min_drugs=10,
+        max_drugs=15,
+        query_budget=12,
+        intervention_budget=5,
+        max_steps=30,
+    ),
+}
+DEFAULT_TASK = "budgeted_screening"
+# ── Critical drugs (must not be stopped without substitution) ────────────────
+CRITICAL_DRUG_IDS: set[str] = {
+    "DRUG_WARFARIN",
+    "DRUG_APIXABAN",
+    "DRUG_INSULIN_GLARGINE",
+    "DRUG_METOPROLOL",
+    "DRUG_DIGOXIN",
+}

openenv-polypharmacy/src/polypharmacy_env/data_loader.py ADDED Viewed

	@@ -0,0 +1,142 @@

+"""Load and cache CSV lookup data for the PolypharmacyEnv."""
+from __future__ import annotations
+import csv
+from dataclasses import dataclass, field
+from functools import lru_cache
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+from .config import (
+    BEERS_CRITERIA_CSV,
+    DDI_RULES_CSV,
+    DRUG_METADATA_CSV,
+    PATIENTS_CSV,
+)
+# ── Row-level data classes ───────────────────────────────────────────────────
+@dataclass(frozen=True)
+class DrugMeta:
+    drug_id: str
+    generic_name: str
+    atc_class: str
+    is_high_risk_elderly: bool
+    default_dose_mg: float
+    min_dose_mg: float
+    max_dose_mg: float
+@dataclass(frozen=True)
+class DDIRule:
+    drug_id_1: str
+    drug_id_2: str
+    severity: str
+    mechanism: str
+    recommendation: str
+    base_risk_score: float
+@dataclass(frozen=True)
+class BeersCriterion:
+    drug_id: str
+    criterion_type: str  # avoid | caution | dose_adjust | avoid_in_condition
+    condition: Optional[str]
+    rationale: str
+@dataclass
+class PatientEpisode:
+    episode_id: str
+    age: int
+    sex: str
+    conditions: List[str]
+    eGFR_category: str
+    liver_function_category: str
+    medication_ids: List[str]
+    baseline_risk_score: float
+    difficulty: str
+# ── Loaders (cached) ────────────────────────────────────────────────────────
+def _read_csv(path: Path) -> List[Dict[str, str]]:
+    with open(path, newline="") as f:
+        return list(csv.DictReader(f))
+@lru_cache(maxsize=1)
+def load_drug_metadata(path: Path = DRUG_METADATA_CSV) -> Dict[str, DrugMeta]:
+    out: Dict[str, DrugMeta] = {}
+    for row in _read_csv(path):
+        dm = DrugMeta(
+            drug_id=row["drug_id"],
+            generic_name=row["generic_name"],
+            atc_class=row["atc_class"],
+            is_high_risk_elderly=row["is_high_risk_elderly"] == "1",
+            default_dose_mg=float(row["default_dose_mg"]),
+            min_dose_mg=float(row["min_dose_mg"]),
+            max_dose_mg=float(row["max_dose_mg"]),
+        )
+        out[dm.drug_id] = dm
+    return out
+def _normalise_pair(a: str, b: str) -> Tuple[str, str]:
+    return (a, b) if a < b else (b, a)
+@lru_cache(maxsize=1)
+def load_ddi_rules(path: Path = DDI_RULES_CSV) -> Dict[Tuple[str, str], DDIRule]:
+    out: Dict[Tuple[str, str], DDIRule] = {}
+    for row in _read_csv(path):
+        key = _normalise_pair(row["drug_id_1"], row["drug_id_2"])
+        out[key] = DDIRule(
+            drug_id_1=key[0],
+            drug_id_2=key[1],
+            severity=row["severity"],
+            mechanism=row["mechanism"],
+            recommendation=row["recommendation"],
+            base_risk_score=float(row["base_risk_score"]),
+        )
+    return out
+@lru_cache(maxsize=1)
+def load_beers_criteria(path: Path = BEERS_CRITERIA_CSV) -> List[BeersCriterion]:
+    out: List[BeersCriterion] = []
+    for row in _read_csv(path):
+        cond = row["condition"].strip() or None
+        out.append(BeersCriterion(
+            drug_id=row["drug_id"],
+            criterion_type=row["criterion_type"],
+            condition=cond,
+            rationale=row["rationale"],
+        ))
+    return out
+def load_patients(
+    path: Path = PATIENTS_CSV,
+    difficulty: Optional[str] = None,
+) -> List[PatientEpisode]:
+    rows = _read_csv(path)
+    eps: List[PatientEpisode] = []
+    for row in rows:
+        d = row.get("difficulty", "medium")
+        if difficulty and d != difficulty:
+            continue
+        eps.append(PatientEpisode(
+            episode_id=row["episode_id"],
+            age=int(row["age"]),
+            sex=row["sex"],
+            conditions=[c.strip() for c in row["conditions"].split(";") if c.strip()],
+            eGFR_category=row["eGFR_category"],
+            liver_function_category=row["liver_function_category"],
+            medication_ids=[m.strip() for m in row["medication_ids"].split(";") if m.strip()],
+            baseline_risk_score=float(row["baseline_risk_score"]),
+            difficulty=d,
+        ))
+    return eps

openenv-polypharmacy/src/polypharmacy_env/ddi_simulator.py ADDED Viewed

	@@ -0,0 +1,115 @@

+"""Local DDI and guideline simulation using CSV lookup data."""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Dict, List, Optional, Tuple
+from .data_loader import (
+    BeersCriterion,
+    DDIRule,
+    DrugMeta,
+    load_beers_criteria,
+    load_ddi_rules,
+    load_drug_metadata,
+)
+@dataclass(frozen=True)
+class DDIResult:
+    severity: str
+    recommendation: str
+    base_risk_score: float
+_NO_INTERACTION = DDIResult(severity="none", recommendation="no_action", base_risk_score=0.0)
+class DDISimulator:
+    """Provides drug–drug interaction and Beers-criteria lookups."""
+    def __init__(self) -> None:
+        self._ddi_rules: Dict[Tuple[str, str], DDIRule] = load_ddi_rules()
+        self._drug_meta: Dict[str, DrugMeta] = load_drug_metadata()
+        self._beers: List[BeersCriterion] = load_beers_criteria()
+    @staticmethod
+    def _normalise_pair(a: str, b: str) -> Tuple[str, str]:
+        return (a, b) if a < b else (b, a)
+    def lookup_ddi(self, drug_id_1: str, drug_id_2: str) -> DDIResult:
+        key = self._normalise_pair(drug_id_1, drug_id_2)
+        rule = self._ddi_rules.get(key)
+        if rule is None:
+            return _NO_INTERACTION
+        return DDIResult(
+            severity=rule.severity,
+            recommendation=rule.recommendation,
+            base_risk_score=rule.base_risk_score,
+        )
+    def get_beers_flags(
+        self,
+        drug_id: str,
+        patient_conditions: List[str],
+    ) -> List[str]:
+        """Return list of Beers flags applicable to *drug_id* given patient conditions."""
+        flags: List[str] = []
+        for bc in self._beers:
+            if bc.drug_id != drug_id:
+                continue
+            if bc.condition is None:
+                flags.append(bc.criterion_type)
+            elif bc.condition in patient_conditions:
+                flags.append(f"{bc.criterion_type}_{bc.condition}")
+        return flags
+    def get_drug_meta(self, drug_id: str) -> Optional[DrugMeta]:
+        return self._drug_meta.get(drug_id)
+    def find_substitute(
+        self,
+        drug_id: str,
+        current_drug_ids: List[str],
+    ) -> Optional[str]:
+        """Find a safer same-class substitute not already in the regimen."""
+        meta = self._drug_meta.get(drug_id)
+        if meta is None:
+            return None
+        candidates = [
+            dm
+            for dm in self._drug_meta.values()
+            if (
+                dm.atc_class == meta.atc_class
+                and dm.drug_id != drug_id
+                and dm.drug_id not in current_drug_ids
+                and not dm.is_high_risk_elderly
+            )
+        ]
+        if not candidates:
+            return None
+        # Pick the candidate with fewest severe DDIs with current regimen
+        def _severe_count(cand: DrugMeta) -> int:
+            count = 0
+            for did in current_drug_ids:
+                if did == drug_id:
+                    continue
+                r = self.lookup_ddi(cand.drug_id, did)
+                if r.severity == "severe":
+                    count += 1
+            return count
+        candidates.sort(key=lambda c: (_severe_count(c), c.drug_id))
+        return candidates[0].drug_id
+    @property
+    def drug_metadata(self) -> Dict[str, DrugMeta]:
+        return self._drug_meta
+    @property
+    def ddi_rules(self) -> Dict[Tuple[str, str], DDIRule]:
+        return self._ddi_rules
+    @property
+    def beers_criteria(self) -> List[BeersCriterion]:
+        return self._beers

openenv-polypharmacy/src/polypharmacy_env/env_core.py ADDED Viewed

	@@ -0,0 +1,413 @@

+"""PolypharmacyEnv – core environment implementing OpenEnv step / reset / state."""
+from __future__ import annotations
+from copy import deepcopy
+from itertools import combinations
+from typing import Any, Dict, List, Optional, Tuple
+from .config import CRITICAL_DRUG_IDS, TaskConfig
+from .data_loader import PatientEpisode
+from .ddi_simulator import DDISimulator
+from .graders import (
+    grade_budgeted_screening,
+    grade_complex_tradeoff,
+    grade_easy_screening,
+)
+from .models import (
+    InteractionQueryRecord,
+    InterventionRecord,
+    MedicationEntry,
+    PolypharmacyAction,
+    PolypharmacyObservation,
+    PolypharmacyState,
+)
+from .rewards import compute_regimen_risk, compute_shaped_reward
+from .tasks import get_task_config, sample_episode
+class PolypharmacyEnv:
+    """OpenEnv-compliant environment for elderly polypharmacy medication review."""
+    def __init__(self) -> None:
+        self._sim = DDISimulator()
+        self._task_cfg: Optional[TaskConfig] = None
+        self._episode: Optional[PatientEpisode] = None
+        self._medications: List[MedicationEntry] = []
+        self._interaction_queries: List[InteractionQueryRecord] = []
+        self._interventions: List[InterventionRecord] = []
+        self._risk_deltas: List[float] = []  # per-intervention risk improvement
+        self._step_count: int = 0
+        self._done: bool = True
+        self._baseline_risk: float = 0.0
+        self._current_risk: float = 0.0
+        self._remaining_query_budget: int = 0
+        self._remaining_intervention_budget: int = 0
+        self._severe_moderate_discovered: int = 0
+        self._total_drug_changes: int = 0
+        self._critical_stopped_without_sub: int = 0
+        self._last_reward: float = 0.0
+    # ── reset ────────────────────────────────────────────────────────────────
+    def reset(
+        self,
+        task_id: Optional[str] = None,
+        seed: Optional[int] = None,
+        episode_id: Optional[str] = None,
+    ) -> PolypharmacyObservation:
+        self._task_cfg = get_task_config(task_id)
+        self._episode = sample_episode(task_id, seed=seed, episode_id=episode_id)
+        # Build medication list
+        self._medications = []
+        for did in self._episode.medication_ids:
+            meta = self._sim.get_drug_meta(did)
+            if meta is None:
+                continue
+            flags = self._sim.get_beers_flags(did, self._episode.conditions)
+            self._medications.append(MedicationEntry(
+                drug_id=did,
+                generic_name=meta.generic_name,
+                atc_class=meta.atc_class,
+                dose_mg=meta.default_dose_mg,
+                is_high_risk_elderly=meta.is_high_risk_elderly,
+                beers_flags=flags,
+            ))
+        self._interaction_queries = []
+        self._interventions = []
+        self._risk_deltas = []
+        self._step_count = 0
+        self._done = False
+        self._remaining_query_budget = self._task_cfg.query_budget
+        self._remaining_intervention_budget = self._task_cfg.intervention_budget
+        self._severe_moderate_discovered = 0
+        self._total_drug_changes = 0
+        self._critical_stopped_without_sub = 0
+        self._last_reward = 0.0
+        # Compute baseline risk
+        self._baseline_risk = self._compute_risk()
+        self._current_risk = self._baseline_risk
+        return self._make_observation()
+    # ── step ─────────────────────────────────────────────────────────────────
+    def step(self, action: PolypharmacyAction) -> Dict[str, Any]:
+        if self._done:
+            return self._terminal_response("Episode already finished.")
+        assert self._task_cfg is not None
+        assert self._episode is not None
+        reward = 0.0
+        info: Dict[str, Any] = {}
+        # Validate basic action structure
+        valid, err = self._validate_action(action)
+        if not valid:
+            reward = compute_shaped_reward(
+                self._current_risk, self._current_risk,
+                action.action_type, is_invalid=True,
+            )
+            info["error"] = err
+            self._step_count += 1
+            return self._check_timeout_and_respond(reward, info)
+        if action.action_type == "query_ddi":
+            reward, info = self._handle_query(action)
+        elif action.action_type == "propose_intervention":
+            reward, info = self._handle_intervention(action)
+        elif action.action_type == "finish_review":
+            self._done = True
+            score = self._run_grader()
+            reward = score  # terminal bonus
+            info["grader_score"] = score
+        self._step_count += 1
+        return self._check_timeout_and_respond(reward, info)
+    # ── state property ───────────────────────────────────────────────────────
+    @property
+    def state(self) -> PolypharmacyState:
+        return PolypharmacyState(
+            episode_id=self._episode.episode_id if self._episode else None,
+            task_id=self._task_cfg.task_id if self._task_cfg else "",
+            step_count=self._step_count,
+            max_steps=self._task_cfg.max_steps if self._task_cfg else 0,
+            num_query_actions=len(self._interaction_queries),
+            num_interventions=len(self._interventions),
+        )
+    # ── Internal helpers ─────────────────────────────────────────────────────
+    def _compute_risk(self) -> float:
+        drug_ids = [m.drug_id for m in self._medications]
+        return compute_regimen_risk(
+            drug_ids,
+            self._episode.conditions if self._episode else [],
+            self._sim.ddi_rules,
+            self._sim.beers_criteria,
+            self._sim.drug_metadata,
+        )
+    def _validate_action(self, action: PolypharmacyAction) -> Tuple[bool, str]:
+        if action.action_type == "query_ddi":
+            if not action.drug_id_1 or not action.drug_id_2:
+                return False, "query_ddi requires drug_id_1 and drug_id_2"
+        elif action.action_type == "propose_intervention":
+            if not action.target_drug_id:
+                return False, "propose_intervention requires target_drug_id"
+            if action.intervention_type in (None, "none"):
+                return False, "propose_intervention requires a valid intervention_type"
+        return True, ""
+    def _handle_query(self, action: PolypharmacyAction) -> Tuple[float, Dict[str, Any]]:
+        info: Dict[str, Any] = {}
+        assert action.drug_id_1 and action.drug_id_2
+        if self._remaining_query_budget <= 0:
+            reward = compute_shaped_reward(
+                self._current_risk, self._current_risk,
+                "query_ddi", is_invalid=True,
+            )
+            info["error"] = "Query budget exhausted"
+            return reward, info
+        result = self._sim.lookup_ddi(action.drug_id_1, action.drug_id_2)
+        self._remaining_query_budget -= 1
+        self._interaction_queries.append(InteractionQueryRecord(
+            drug_id_1=action.drug_id_1,
+            drug_id_2=action.drug_id_2,
+            severity=result.severity,
+            recommendation=result.recommendation,
+            risk_score=result.base_risk_score,
+            step_index=self._step_count,
+        ))
+        discovered_severe = result.severity in ("severe", "moderate")
+        if discovered_severe:
+            self._severe_moderate_discovered += 1
+        reward = compute_shaped_reward(
+            self._current_risk, self._current_risk,
+            "query_ddi",
+            discovered_severe=(result.severity == "severe"),
+        )
+        info["ddi_result"] = {
+            "severity": result.severity,
+            "recommendation": result.recommendation,
+            "risk_score": result.base_risk_score,
+        }
+        return reward, info
+    def _handle_intervention(self, action: PolypharmacyAction) -> Tuple[float, Dict[str, Any]]:
+        info: Dict[str, Any] = {}
+        assert action.target_drug_id
+        assert action.intervention_type and action.intervention_type != "none"
+        if self._remaining_intervention_budget <= 0:
+            reward = compute_shaped_reward(
+                self._current_risk, self._current_risk,
+                "propose_intervention", is_invalid=True,
+            )
+            info["error"] = "Intervention budget exhausted"
+            return reward, info
+        # Find target medication
+        target_idx: Optional[int] = None
+        for i, m in enumerate(self._medications):
+            if m.drug_id == action.target_drug_id:
+                target_idx = i
+                break
+        if target_idx is None:
+            reward = compute_shaped_reward(
+                self._current_risk, self._current_risk,
+                "propose_intervention", is_invalid=True,
+            )
+            info["error"] = f"Drug {action.target_drug_id} not in current medications"
+            return reward, info
+        previous_risk = self._current_risk
+        target_med = self._medications[target_idx]
+        if action.intervention_type == "stop":
+            self._medications.pop(target_idx)
+            self._total_drug_changes += 1
+            if action.target_drug_id in CRITICAL_DRUG_IDS:
+                self._critical_stopped_without_sub += 1
+        elif action.intervention_type == "dose_reduce":
+            meta = self._sim.get_drug_meta(action.target_drug_id)
+            if meta:
+                new_dose = max(meta.min_dose_mg, target_med.dose_mg * 0.5)
+                self._medications[target_idx] = target_med.model_copy(
+                    update={"dose_mg": new_dose}
+                )
+        elif action.intervention_type == "substitute":
+            new_drug_id = action.proposed_new_drug_id
+            if not new_drug_id:
+                # Auto-find substitute
+                current_ids = [m.drug_id for m in self._medications]
+                new_drug_id = self._sim.find_substitute(action.target_drug_id, current_ids)
+            if new_drug_id:
+                new_meta = self._sim.get_drug_meta(new_drug_id)
+                if new_meta:
+                    flags = self._sim.get_beers_flags(
+                        new_drug_id,
+                        self._episode.conditions if self._episode else [],
+                    )
+                    self._medications[target_idx] = MedicationEntry(
+                        drug_id=new_drug_id,
+                        generic_name=new_meta.generic_name,
+                        atc_class=new_meta.atc_class,
+                        dose_mg=new_meta.default_dose_mg,
+                        is_high_risk_elderly=new_meta.is_high_risk_elderly,
+                        beers_flags=flags,
+                    )
+                    self._total_drug_changes += 1
+                    # If critical drug was substituted, don't penalise
+                    if action.target_drug_id in CRITICAL_DRUG_IDS:
+                        pass  # substitution is acceptable
+                else:
+                    info["warning"] = f"Substitute {new_drug_id} not found in metadata"
+                    # Don't consume budget for a failed substitute
+                    self._remaining_intervention_budget += 1
+            else:
+                info["warning"] = "No suitable substitute found"
+                # Don't consume budget for a failed substitute
+                self._remaining_intervention_budget += 1
+        elif action.intervention_type == "add_monitoring":
+            # Tag in metadata but don't change regimen
+            self._medications[target_idx] = target_med.model_copy(
+                update={"beers_flags": target_med.beers_flags + ["monitored"]}
+            )
+        self._remaining_intervention_budget -= 1
+        self._current_risk = self._compute_risk()
+        risk_delta = previous_risk - self._current_risk
+        self._risk_deltas.append(risk_delta)
+        self._interventions.append(InterventionRecord(
+            target_drug_id=action.target_drug_id,
+            action_type=action.intervention_type,
+            proposed_new_drug_id=action.proposed_new_drug_id,
+            rationale=action.rationale or "",
+            step_index=self._step_count,
+        ))
+        reward = compute_shaped_reward(previous_risk, self._current_risk, "propose_intervention")
+        info["risk_delta"] = risk_delta
+        return reward, info
+    def _run_grader(self) -> float:
+        assert self._task_cfg is not None
+        tid = self._task_cfg.task_id
+        if tid == "easy_screening":
+            severe_pairs = self._get_severe_pairs()
+            return grade_easy_screening(
+                self._baseline_risk,
+                self._current_risk,
+                self._interventions,
+                severe_pairs,
+            )
+        elif tid == "budgeted_screening":
+            return grade_budgeted_screening(
+                self._baseline_risk,
+                self._current_risk,
+                self._interventions,
+                self._risk_deltas,
+                len(self._interaction_queries),
+                self._severe_moderate_discovered,
+            )
+        elif tid == "complex_tradeoff":
+            return grade_complex_tradeoff(
+                self._baseline_risk,
+                self._current_risk,
+                self._interventions,
+                self._total_drug_changes,
+                self._critical_stopped_without_sub,
+            )
+        return 0.0
+    def _get_severe_pairs(self) -> List[Tuple[str, str]]:
+        """Return all severe DDI pairs present in the *initial* medication list."""
+        if not self._episode:
+            return []
+        pairs: List[Tuple[str, str]] = []
+        med_ids = self._episode.medication_ids
+        for a, b in combinations(sorted(set(med_ids)), 2):
+            key = (a, b) if a < b else (b, a)
+            rule = self._sim.ddi_rules.get(key)
+            if rule and rule.severity == "severe":
+                pairs.append(key)
+        return pairs
+    def _check_timeout_and_respond(
+        self, reward: float, info: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        assert self._task_cfg is not None
+        if not self._done and self._step_count >= self._task_cfg.max_steps:
+            self._done = True
+            timeout_penalty = compute_shaped_reward(
+                self._current_risk, self._current_risk,
+                "finish_review", is_timeout=True,
+            )
+            score = self._run_grader()
+            reward += timeout_penalty + score
+            info["timeout"] = True
+            info["grader_score"] = score
+        self._last_reward = reward
+        info["current_risk"] = self._current_risk
+        info["baseline_risk"] = self._baseline_risk
+        obs = self._make_observation(reward=reward)
+        return {
+            "observation": obs.model_dump(),
+            "reward": reward,
+            "done": self._done,
+            "info": info,
+        }
+    def _terminal_response(self, msg: str) -> Dict[str, Any]:
+        obs = self._make_observation()
+        return {
+            "observation": obs.model_dump(),
+            "reward": 0.0,
+            "done": True,
+            "info": {"error": msg},
+        }
+    def _make_observation(self, reward: float = 0.0) -> PolypharmacyObservation:
+        ep = self._episode
+        cfg = self._task_cfg
+        return PolypharmacyObservation(
+            episode_id=ep.episode_id if ep else "",
+            task_id=cfg.task_id if cfg else "budgeted_screening",
+            age=ep.age if ep else 65,
+            sex=ep.sex if ep else "M",
+            conditions=ep.conditions if ep else [],
+            eGFR_category=ep.eGFR_category if ep else "normal",
+            liver_function_category=ep.liver_function_category if ep else "normal",
+            current_medications=deepcopy(self._medications),
+            interaction_queries=deepcopy(self._interaction_queries),
+            interventions=deepcopy(self._interventions),
+            step_index=self._step_count,
+            remaining_query_budget=self._remaining_query_budget,
+            remaining_intervention_budget=self._remaining_intervention_budget,
+            shaped_reward=reward,
+            done=self._done,
+            reward=reward,
+        )

openenv-polypharmacy/src/polypharmacy_env/graders.py ADDED Viewed

	@@ -0,0 +1,98 @@

+"""Deterministic graders for the three PolypharmacyEnv task difficulties."""
+from __future__ import annotations
+from itertools import combinations
+from typing import Dict, List, Tuple
+from .data_loader import DDIRule
+from .config import CRITICAL_DRUG_IDS
+from .models import InterventionRecord
+_EPS = 1e-8
+def _clip(x: float) -> float:
+    return max(0.0, min(x, 1.0))
+# ── Easy: easy_screening ─────────────────────────────────────────────────────
+def grade_easy_screening(
+    baseline_risk: float,
+    final_risk: float,
+    interventions: List[InterventionRecord],
+    severe_ddi_drug_ids: List[Tuple[str, str]],
+) -> float:
+    """Score ∈ [0, 1] for the easy task.
+    50 % risk reduction + 50 % targeted-intervention flag.
+    """
+    risk_reduction = max(0.0, baseline_risk - final_risk) / max(baseline_risk, _EPS)
+    targeted = 0.0
+    severe_drugs = set()
+    for a, b in severe_ddi_drug_ids:
+        severe_drugs.add(a)
+        severe_drugs.add(b)
+    for iv in interventions:
+        if iv.target_drug_id in severe_drugs:
+            targeted = 1.0
+            break
+    return _clip(0.5 * risk_reduction + 0.5 * targeted)
+# ── Medium: budgeted_screening ───────────────────────────────────────────────
+def grade_budgeted_screening(
+    baseline_risk: float,
+    final_risk: float,
+    interventions: List[InterventionRecord],
+    risk_deltas: List[float],
+    num_queries: int,
+    severe_moderate_discovered: int,
+) -> float:
+    """Score ∈ [0, 1] for the medium task.
+    50 % risk reduction + 30 % intervention precision + 20 % query efficiency.
+    """
+    risk_reduction = max(0.0, baseline_risk - final_risk) / max(baseline_risk, _EPS)
+    # Intervention precision: fraction of interventions that reduced risk
+    if interventions:
+        good = sum(1 for d in risk_deltas if d > 0)
+        precision = good / len(interventions)
+    else:
+        precision = 0.0
+    # Query efficiency
+    if num_queries > 0:
+        query_eff = min(severe_moderate_discovered / num_queries, 1.0)
+    else:
+        query_eff = 0.0
+    return _clip(0.5 * risk_reduction + 0.3 * precision + 0.2 * query_eff)
+# ── Hard: complex_tradeoff ───────────────────────────────────────────────────
+def grade_complex_tradeoff(
+    baseline_risk: float,
+    final_risk: float,
+    interventions: List[InterventionRecord],
+    total_drug_changes: int,
+    critical_drugs_stopped_without_sub: int,
+) -> float:
+    """Score ∈ [0, 1] for the hard task.
+    Base = risk reduction; penalty for regimen disruption and critical-drug stops.
+    """
+    risk_reduction = max(0.0, baseline_risk - final_risk) / max(baseline_risk, _EPS)
+    # Regimen disruption: penalise excessive changes
+    disruption = 0.05 * total_drug_changes
+    critical_penalty = 0.20 * critical_drugs_stopped_without_sub
+    return _clip(risk_reduction - disruption - critical_penalty)

openenv-polypharmacy/src/polypharmacy_env/models.py ADDED Viewed

	@@ -0,0 +1,103 @@

+"""Pydantic models for the PolypharmacyEnv environment.
+Extends OpenEnv base types (Action, Observation, State) and defines
+auxiliary records for medications, interactions, and interventions.
+"""
+from __future__ import annotations
+from typing import Any, Dict, List, Literal, Optional
+from pydantic import BaseModel, ConfigDict, Field
+# ── Auxiliary models ─────────────────────────────────────────────────────────
+class MedicationEntry(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    drug_id: str
+    generic_name: str
+    atc_class: str
+    dose_mg: float
+    frequency: str = "qd"
+    route: str = "po"
+    is_high_risk_elderly: bool = False
+    beers_flags: List[str] = Field(default_factory=list)
+class InteractionQueryRecord(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    drug_id_1: str
+    drug_id_2: str
+    severity: Optional[str] = None
+    recommendation: Optional[str] = None
+    risk_score: Optional[float] = None
+    step_index: int = 0
+class InterventionRecord(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    target_drug_id: str
+    action_type: Literal["stop", "dose_reduce", "substitute", "add_monitoring"]
+    proposed_new_drug_id: Optional[str] = None
+    rationale: str = ""
+    step_index: int = 0
+# ── OpenEnv wire models ─────────────────────────────────────────────────────
+class PolypharmacyAction(BaseModel):
+    """Action sent by the agent each step."""
+    model_config = ConfigDict(extra="forbid", validate_assignment=True)
+    action_type: Literal["query_ddi", "propose_intervention", "finish_review"]
+    drug_id_1: Optional[str] = None
+    drug_id_2: Optional[str] = None
+    target_drug_id: Optional[str] = None
+    intervention_type: Optional[
+        Literal["stop", "dose_reduce", "substitute", "add_monitoring", "none"]
+    ] = None
+    proposed_new_drug_id: Optional[str] = None
+    rationale: Optional[str] = None
+    metadata: Dict[str, Any] = Field(default_factory=dict)
+class PolypharmacyObservation(BaseModel):
+    """Observation returned to the agent."""
+    model_config = ConfigDict(extra="forbid", validate_assignment=True)
+    episode_id: str = ""
+    task_id: str = "budgeted_screening"
+    age: int = 65
+    sex: str = "M"
+    conditions: List[str] = Field(default_factory=list)
+    eGFR_category: str = "normal"
+    liver_function_category: str = "normal"
+    current_medications: List[MedicationEntry] = Field(default_factory=list)
+    interaction_queries: List[InteractionQueryRecord] = Field(default_factory=list)
+    interventions: List[InterventionRecord] = Field(default_factory=list)
+    step_index: int = 0
+    remaining_query_budget: int = 0
+    remaining_intervention_budget: int = 0
+    shaped_reward: float = 0.0
+    done: bool = False
+    reward: Optional[float] = None
+    metadata: Dict[str, Any] = Field(default_factory=dict)
+class PolypharmacyState(BaseModel):
+    """Compact state snapshot for the /state endpoint."""
+    model_config = ConfigDict(extra="allow", validate_assignment=True)
+    episode_id: Optional[str] = None
+    task_id: str = ""
+    step_count: int = Field(default=0, ge=0)
+    max_steps: int = 0
+    num_query_actions: int = 0
+    num_interventions: int = 0

openenv-polypharmacy/src/polypharmacy_env/rewards.py ADDED Viewed

	@@ -0,0 +1,92 @@

+"""Reward shaping and regimen-risk computation."""
+from __future__ import annotations
+from itertools import combinations
+from typing import Dict, List, Optional, Tuple
+from .config import (
+    INTERVENTION_COST,
+    INVALID_ACTION_PENALTY,
+    QUERY_COST,
+    SEVERE_DDI_DISCOVERY_BONUS,
+    TIMEOUT_PENALTY,
+)
+from .data_loader import BeersCriterion, DDIRule, DrugMeta
+def compute_regimen_risk(
+    current_drug_ids: List[str],
+    patient_conditions: List[str],
+    ddi_rules: Dict[Tuple[str, str], DDIRule],
+    beers_criteria: List[BeersCriterion],
+    drug_metadata: Dict[str, DrugMeta],
+) -> float:
+    """Compute an aggregate risk score for the current medication regimen.
+    Returns a float clipped to [0.0, 1.0].
+    """
+    if not current_drug_ids:
+        return 0.0
+    risk = 0.0
+    drug_set = set(current_drug_ids)
+    # 1. DDI pairwise risk
+    for a, b in combinations(sorted(drug_set), 2):
+        key = (a, b) if a < b else (b, a)
+        rule = ddi_rules.get(key)
+        if rule is not None:
+            risk += rule.base_risk_score
+    # 2. Beers violations
+    beers_weight = {"avoid": 0.25, "caution": 0.10, "dose_adjust": 0.08, "avoid_in_condition": 0.20}
+    for bc in beers_criteria:
+        if bc.drug_id not in drug_set:
+            continue
+        if bc.condition is None:
+            risk += beers_weight.get(bc.criterion_type, 0.05)
+        elif bc.condition in patient_conditions:
+            risk += beers_weight.get(bc.criterion_type, 0.05)
+    # 3. High-risk elderly drugs
+    for did in drug_set:
+        dm = drug_metadata.get(did)
+        if dm and dm.is_high_risk_elderly:
+            risk += 0.05
+    # Normalise by regimen size to keep score comparable across difficulties
+    risk /= max(len(drug_set), 1)
+    return min(max(risk, 0.0), 1.0)
+def compute_shaped_reward(
+    previous_risk: float,
+    new_risk: float,
+    action_type: str,
+    *,
+    is_invalid: bool = False,
+    is_timeout: bool = False,
+    discovered_severe: bool = False,
+) -> float:
+    """Compute the step-level shaped reward."""
+    reward = 0.0
+    if is_invalid:
+        return -INVALID_ACTION_PENALTY
+    if is_timeout:
+        return -TIMEOUT_PENALTY
+    if action_type == "query_ddi":
+        reward -= QUERY_COST
+        if discovered_severe:
+            reward += SEVERE_DDI_DISCOVERY_BONUS
+    elif action_type == "propose_intervention":
+        reward += (previous_risk - new_risk)
+        reward -= INTERVENTION_COST
+    # finish_review terminal bonus is added by the caller after grading
+    return reward

openenv-polypharmacy/src/polypharmacy_env/tasks.py ADDED Viewed

	@@ -0,0 +1,47 @@

+"""Task setup utilities: select episodes and configure budgets per difficulty."""
+from __future__ import annotations
+import random
+from typing import Optional
+from .config import DEFAULT_TASK, TASK_CONFIGS, TaskConfig
+from .data_loader import PatientEpisode, load_patients
+# Map OpenEnv difficulty labels to the CSV difficulty tags
+_DIFFICULTY_MAP = {
+    "easy_screening": "easy",
+    "budgeted_screening": "medium",
+    "complex_tradeoff": "hard",
+}
+def get_task_config(task_id: Optional[str] = None) -> TaskConfig:
+    tid = task_id or DEFAULT_TASK
+    cfg = TASK_CONFIGS.get(tid)
+    if cfg is None:
+        raise ValueError(f"Unknown task_id {tid!r}. Choose from {list(TASK_CONFIGS)}")
+    return cfg
+def sample_episode(
+    task_id: Optional[str] = None,
+    seed: Optional[int] = None,
+    episode_id: Optional[str] = None,
+) -> PatientEpisode:
+    """Return a single patient episode appropriate for *task_id*."""
+    tid = task_id or DEFAULT_TASK
+    difficulty = _DIFFICULTY_MAP.get(tid, "medium")
+    episodes = load_patients(difficulty=difficulty)
+    if not episodes:
+        raise RuntimeError(f"No episodes found for difficulty={difficulty!r}")
+    if episode_id:
+        for ep in episodes:
+            if ep.episode_id == episode_id:
+                return ep
+        raise ValueError(f"Episode {episode_id!r} not found for difficulty={difficulty!r}")
+    rng = random.Random(seed)
+    return rng.choice(episodes)

openenv-polypharmacy/src/polypharmacy_env/tests/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Tests."""

openenv-polypharmacy/src/polypharmacy_env/tests/test_api.py ADDED Viewed

	@@ -0,0 +1,73 @@

+"""Tests for the FastAPI HTTP server."""
+from __future__ import annotations
+import pytest
+from fastapi.testclient import TestClient
+from polypharmacy_env.api.server import app
+@pytest.fixture
+def client() -> TestClient:
+    return TestClient(app)
+class TestHealth:
+    def test_health(self, client: TestClient) -> None:
+        resp = client.get("/health")
+        assert resp.status_code == 200
+        assert resp.json()["status"] == "healthy"
+class TestReset:
+    def test_reset_default(self, client: TestClient) -> None:
+        resp = client.post("/reset", json={})
+        assert resp.status_code == 200
+        data = resp.json()
+        assert "observation" in data
+        assert data["done"] is False
+    def test_reset_with_task(self, client: TestClient) -> None:
+        resp = client.post("/reset", json={"task_id": "easy_screening"})
+        assert resp.status_code == 200
+        obs = resp.json()["observation"]
+        assert obs["task_id"] == "easy_screening"
+class TestStep:
+    def test_step_finish(self, client: TestClient) -> None:
+        client.post("/reset", json={"task_id": "easy_screening"})
+        resp = client.post("/step", json={"action": {"action_type": "finish_review"}})
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["done"] is True
+        assert "info" in data
+    def test_step_query(self, client: TestClient) -> None:
+        reset_resp = client.post("/reset", json={"task_id": "easy_screening", "seed": 0})
+        obs = reset_resp.json()["observation"]
+        meds = obs["current_medications"]
+        if len(meds) >= 2:
+            action = {
+                "action_type": "query_ddi",
+                "drug_id_1": meds[0]["drug_id"],
+                "drug_id_2": meds[1]["drug_id"],
+            }
+            resp = client.post("/step", json={"action": action})
+            assert resp.status_code == 200
+    def test_invalid_action(self, client: TestClient) -> None:
+        client.post("/reset", json={"task_id": "easy_screening"})
+        resp = client.post("/step", json={"action": {"action_type": "invalid_type"}})
+        assert resp.status_code == 422
+class TestState:
+    def test_state(self, client: TestClient) -> None:
+        client.post("/reset", json={"task_id": "easy_screening"})
+        resp = client.get("/state")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert "step_count" in data
+        assert data["task_id"] == "easy_screening"

openenv-polypharmacy/src/polypharmacy_env/tests/test_env_core.py ADDED Viewed

	@@ -0,0 +1,162 @@

+"""Tests for PolypharmacyEnv core logic."""
+from __future__ import annotations
+import pytest
+from polypharmacy_env.env_core import PolypharmacyEnv
+from polypharmacy_env.models import PolypharmacyAction, PolypharmacyObservation
+@pytest.fixture
+def env() -> PolypharmacyEnv:
+    return PolypharmacyEnv()
+class TestReset:
+    def test_reset_returns_observation(self, env: PolypharmacyEnv) -> None:
+        obs = env.reset(task_id="easy_screening", seed=0)
+        assert isinstance(obs, PolypharmacyObservation)
+        assert obs.done is False
+        assert obs.step_index == 0
+        assert len(obs.current_medications) >= 3
+    def test_reset_medium(self, env: PolypharmacyEnv) -> None:
+        obs = env.reset(task_id="budgeted_screening", seed=1)
+        assert obs.remaining_query_budget == 8
+        assert obs.remaining_intervention_budget == 3
+    def test_reset_hard(self, env: PolypharmacyEnv) -> None:
+        obs = env.reset(task_id="complex_tradeoff", seed=2)
+        assert obs.remaining_query_budget == 12
+        assert obs.remaining_intervention_budget == 5
+    def test_default_task(self, env: PolypharmacyEnv) -> None:
+        obs = env.reset()
+        assert obs.task_id == "budgeted_screening"
+class TestStep:
+    def test_query_ddi(self, env: PolypharmacyEnv) -> None:
+        obs = env.reset(task_id="easy_screening", seed=0)
+        meds = obs.current_medications
+        if len(meds) >= 2:
+            action = PolypharmacyAction(
+                action_type="query_ddi",
+                drug_id_1=meds[0].drug_id,
+                drug_id_2=meds[1].drug_id,
+            )
+            result = env.step(action)
+            assert "observation" in result
+            assert "reward" in result
+            assert result["done"] is False or result["done"] is True
+    def test_invalid_action_penalised(self, env: PolypharmacyEnv) -> None:
+        env.reset(task_id="easy_screening", seed=0)
+        action = PolypharmacyAction(
+            action_type="query_ddi",
+            drug_id_1=None,
+            drug_id_2=None,
+        )
+        result = env.step(action)
+        assert result["reward"] < 0
+    def test_finish_review(self, env: PolypharmacyEnv) -> None:
+        env.reset(task_id="easy_screening", seed=0)
+        action = PolypharmacyAction(action_type="finish_review")
+        result = env.step(action)
+        assert result["done"] is True
+        assert "grader_score" in result["info"]
+        score = result["info"]["grader_score"]
+        assert 0.0 <= score <= 1.0
+    def test_intervention_stop(self, env: PolypharmacyEnv) -> None:
+        obs = env.reset(task_id="easy_screening", seed=0)
+        if obs.current_medications:
+            target = obs.current_medications[0].drug_id
+            action = PolypharmacyAction(
+                action_type="propose_intervention",
+                target_drug_id=target,
+                intervention_type="stop",
+                rationale="test",
+            )
+            result = env.step(action)
+            new_obs = PolypharmacyObservation(**result["observation"])
+            drug_ids = [m.drug_id for m in new_obs.current_medications]
+            assert target not in drug_ids
+    def test_budget_exhaustion(self, env: PolypharmacyEnv) -> None:
+        obs = env.reset(task_id="easy_screening", seed=0)
+        # Exhaust query budget
+        meds = obs.current_medications
+        for _ in range(obs.remaining_query_budget + 1):
+            if len(meds) >= 2:
+                action = PolypharmacyAction(
+                    action_type="query_ddi",
+                    drug_id_1=meds[0].drug_id,
+                    drug_id_2=meds[1].drug_id,
+                )
+                result = env.step(action)
+                if result["done"]:
+                    break
+    def test_max_steps_timeout(self, env: PolypharmacyEnv) -> None:
+        obs = env.reset(task_id="easy_screening", seed=0)
+        meds = obs.current_medications
+        if len(meds) < 2:
+            return
+        for _ in range(20):  # more than max_steps=10
+            action = PolypharmacyAction(
+                action_type="query_ddi",
+                drug_id_1=meds[0].drug_id,
+                drug_id_2=meds[1].drug_id,
+            )
+            result = env.step(action)
+            if result["done"]:
+                assert "grader_score" in result["info"] or "timeout" in result["info"]
+                break
+class TestState:
+    def test_state_after_reset(self, env: PolypharmacyEnv) -> None:
+        env.reset(task_id="easy_screening", seed=0)
+        st = env.state
+        assert st.step_count == 0
+        assert st.task_id == "easy_screening"
+        assert st.episode_id is not None
+class TestGraderDeterminism:
+    def test_same_trajectory_same_score(self, env: PolypharmacyEnv) -> None:
+        """Run the same trajectory twice; grader must return the same score."""
+        scores = []
+        for _ in range(2):
+            env.reset(task_id="easy_screening", seed=42)
+            action = PolypharmacyAction(action_type="finish_review")
+            result = env.step(action)
+            scores.append(result["info"]["grader_score"])
+        assert scores[0] == scores[1]
+    def test_intervention_changes_score(self, env: PolypharmacyEnv) -> None:
+        """A meaningful intervention should change the grader score vs. no-op."""
+        # Score with no intervention
+        env.reset(task_id="easy_screening", seed=42)
+        r1 = env.step(PolypharmacyAction(action_type="finish_review"))
+        score_noop = r1["info"]["grader_score"]
+        # Score after stopping a high-risk drug
+        obs = env.reset(task_id="easy_screening", seed=42)
+        high_risk = [m for m in obs.current_medications if m.is_high_risk_elderly]
+        if high_risk:
+            env.step(PolypharmacyAction(
+                action_type="propose_intervention",
+                target_drug_id=high_risk[0].drug_id,
+                intervention_type="stop",
+                rationale="test",
+            ))
+            r2 = env.step(PolypharmacyAction(action_type="finish_review"))
+            score_with = r2["info"]["grader_score"]
+            # Scores should differ (not necessarily larger, depending on the drug)
+            # At minimum, grader is not constant
+            assert isinstance(score_with, float)
+            assert 0.0 <= score_with <= 1.0