diff --git a/.dockerignore b/.dockerignore index 7566d677dc0c4ad7c9c7bc6a176c6b7186e9c728..ea2205ebecdbc61a7ad350e8d4261a94ae2e7466 100644 --- a/.dockerignore +++ b/.dockerignore @@ -9,4 +9,4 @@ **/dist **/.env **/.env.* -!openenv-polypharmacy/.env.example +!.env.example diff --git a/openenv-polypharmacy/.env.example b/.env.example similarity index 100% rename from openenv-polypharmacy/.env.example rename to .env.example diff --git a/.gitignore b/.gitignore index a21ac13278960096fd010e674651b109be6c6cc4..f1beb4e73c68e8f3e03711363ed8909980cd05df 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,7 @@ venv/ env/ .env .env.* -!openenv-polypharmacy/.env.example +!.env.example *.py[cod] __pycache__/ .pytest_cache/ @@ -29,7 +29,3 @@ pnpm-debug.log* *.swp .DS_Store -# --- Project-specific nested paths --- -openenv-polypharmacy/frontend/node_modules/ -openenv-polypharmacy/frontend/dist/ -openenv-polypharmacy/.pytest_cache/ diff --git a/.gitignore copy b/.gitignore copy deleted file mode 100644 index a21ac13278960096fd010e674651b109be6c6cc4..0000000000000000000000000000000000000000 --- a/.gitignore copy +++ /dev/null @@ -1,35 +0,0 @@ -# --- Python --- -venv/ -.venv/ -env/ -.env -.env.* -!openenv-polypharmacy/.env.example -*.py[cod] -__pycache__/ -.pytest_cache/ -.mypy_cache/ -.ruff_cache/ -.coverage -coverage.xml - -# --- Node / frontend --- -node_modules/ -**/node_modules/ -frontend/dist/ -**/dist/ -npm-debug.log* -yarn-debug.log* -yarn-error.log* -pnpm-debug.log* - -# --- Build / temp --- -*.log -*.tmp -*.swp -.DS_Store - -# --- Project-specific nested paths --- -openenv-polypharmacy/frontend/node_modules/ -openenv-polypharmacy/frontend/dist/ -openenv-polypharmacy/.pytest_cache/ diff --git a/Dockerfile b/Dockerfile index 10f199b9446b1375bce69953826c903f9ed43efe..68b69d986a780501f6c9461410b2add26413473e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,8 @@ FROM node:20-alpine AS frontend-builder WORKDIR /app/frontend -COPY openenv-polypharmacy/frontend/package*.json ./ +COPY frontend/package*.json ./ RUN npm ci -COPY openenv-polypharmacy/frontend/ ./ +COPY frontend/ ./ RUN npm run build FROM python:3.11-slim @@ -13,15 +13,15 @@ RUN apt-get update && \ WORKDIR /app -COPY openenv-polypharmacy/backend/requirements.txt /app/backend/requirements.txt +COPY backend/requirements.txt /app/backend/requirements.txt RUN pip install --no-cache-dir -r /app/backend/requirements.txt -COPY openenv-polypharmacy/backend /app/backend -COPY openenv-polypharmacy/data /app/data -COPY openenv-polypharmacy/scripts /app/scripts -COPY openenv-polypharmacy/openenv.yaml /app/openenv.yaml -COPY openenv-polypharmacy/.env.example /app/.env.example -COPY openenv-polypharmacy/inference.py /app/inference.py +COPY backend /app/backend +COPY data /app/data +COPY scripts /app/scripts +COPY openenv.yaml /app/openenv.yaml +COPY .env.example /app/.env.example +COPY inference.py /app/inference.py COPY --from=frontend-builder /app/frontend/dist /app/frontend/dist diff --git a/PROMPT.md b/PROMPT.md new file mode 100644 index 0000000000000000000000000000000000000000..d7d5481b1ba6a83c8590ebcec024b911f5800982 --- /dev/null +++ b/PROMPT.md @@ -0,0 +1,571 @@ +You are an expert Python backend, ML, and infrastructure engineer. +Your task is to implement a complete, production-ready OpenEnv environment called **PolypharmacyEnv** for training and evaluating agentic RL policies that act as an "elderly polypharmacy safety agent" (clinical pharmacist assistant). + +The deliverable MUST satisfy all of the following: +- Fully compliant with the OpenEnv spec (typed models, `step()` / `reset()` / `state()`, `openenv.yaml`, HTTP server, Dockerfile). +- Simulates a realistic healthcare workflow around elderly polypharmacy and dangerous drug combinations. +- Defines at least **3 tasks** (easy → medium → hard) with deterministic agent graders producing scores in (0.0, 1.0). +- Provides shaped rewards over the trajectory (not just sparse terminal rewards). +- Includes a baseline LLM-based inference script `inference.py` in the repo root, following the evaluation requirements: + - Uses the OpenAI Python client. + - Reads `OPENAI_API_KEY`, `API_BASE_URL`, `MODEL_NAME`, and `HF_TOKEN` from the environment. + - Emits structured stdout logs in the exact `[START]`, `[STEP]`, `[END]` format from the OpenEnv sample inference script. +- Is containerized and deployable as a **Hugging Face Space** tagged with `openenv` that responds to OpenEnv-style `reset` / `step` / `state` HTTP calls. + +Implement everything described below. + +================================================= +1. Repository and folder structure +================================================= + +Create a Python package repository with this structure (names are important unless clearly labeled as examples): + +- `openenv-polypharmacy/` + - `openenv.yaml` + - `README.md` + - `requirements.txt` + - `Dockerfile` + - `inference.py` # baseline LLM agent per spec + - `pyproject.toml` or `setup.cfg` (optional but recommended) + - `src/` + - `polypharmacy_env/` + - `__init__.py` + - `config.py` + - `models.py` # Action, Observation, State, helper models + - `env_core.py` # PolypharmacyEnv implementation + - `tasks.py` # task setup utilities + - `graders.py` # deterministic graders for each task + - `rewards.py` # reward shaping logic + - `data_loader.py` # load/preprocess patient and lookup data + - `ddi_simulator.py` # local DDI / guideline simulator + - `api/` + - `__init__.py` + - `schemas.py` # HTTP request/response schemas + - `server.py` # FastAPI app exposing OpenEnv endpoints + - `baselines/` + - `__init__.py` + - `heuristic_agent.py` # simple rule-based baseline agent + - `random_agent.py` # trivial random baseline (optional) + - `tests/` + - `__init__.py` + - `test_env_core.py` + - `test_api.py` + - `data/` + - `raw/` # placeholder for real/synthetic source data + - `processed/` + - `lookups/` + - `ddi_rules.csv` + - `beers_criteria.csv` + - `drug_metadata.csv` + - `scripts/` + - `preprocess_data.py` + - `run_validation.sh` # optional; runs OpenEnv validator, tests, etc. + +Use Python 3.10+ with full type hints, and keep the code black/isort-compatible. + +================================================= +2. Domain, data, and clinical abstraction +================================================= + +2.1. Core scenario + +Model an elderly patient (age ≥ 65) with: +- Demographics: age, sex. +- Comorbidities: e.g., hypertension, diabetes, heart failure, CKD, dementia. +- Basic labs: kidney function (eGFR category), liver function category. +- A current medication list (polypharmacy, e.g., 3–15 drugs depending on task). + +Each **episode** is one medication-review session where the agent: +- Observes patient info and current meds. +- Optionally **queries** a DDI/guideline tool for specific drug pairs. +- Proposes **interventions**: + - `stop`: discontinue a drug. + - `dose_reduce`: lower dose of a drug. + - `substitute`: swap to a safer alternative. + - `add_monitoring`: keep the drug but flag extra monitoring. +- Calls `finish_review` when it decides the regimen is acceptable or budgets are exhausted. + +No external PHI, EHRs, or online APIs: all data is **synthetic** or de-identified and local to the container (CSV files). + +2.2. Data files and CSV schemas + +Implement local CSVs under `data/lookups/`: + +**`drug_metadata.csv`** +- `drug_id` (string; unique key) +- `generic_name` (string) +- `atc_class` (string) +- `is_high_risk_elderly` (0/1) +- `default_dose_mg` (float) +- `min_dose_mg` (float) +- `max_dose_mg` (float) + +**`beers_criteria.csv`** +- `drug_id` (string) +- `criterion_type` (enum string: `avoid`, `caution`, `dose_adjust`, `avoid_in_condition`) +- `condition` (nullable string; e.g., `CKD`, `dementia`) +- `rationale` (brief text) + +**`ddi_rules.csv`** +- `drug_id_1` (string; normalized so `drug_id_1 < drug_id_2` lexicographically) +- `drug_id_2` (string) +- `severity` (enum string: `mild`, `moderate`, `severe`) +- `mechanism` (short text) +- `recommendation` (enum string: `avoid_combination`, `monitor_closely`, `dose_adjust`, `no_action`) +- `base_risk_score` (float in [0.0, 1.0]) + +Implement a synthetic patient-episode dataset under `data/processed/`: + +**`patients_polypharmacy.csv`** +- `episode_id` (string) +- `age` (int) +- `sex` (enum: `M`, `F`, `O`) +- `conditions` (semicolon-separated; e.g., `HTN;DM;CKD`) +- `eGFR_category` (enum: `normal`, `mild`, `moderate`, `severe`) +- `liver_function_category` (enum: `normal`, `impaired`) +- `medication_ids` (semicolon-separated list of `drug_id`) +- `baseline_risk_score` (float in [0.0, 1.0]) + +2.3. Preprocessing script + +In `scripts/preprocess_data.py`: +- If real data is not provided, procedurally generate synthetic but plausible data using: + - Random combinations of conditions and drugs constrained by simple rules (e.g., CKD + renally-cleared drugs). + - Controlled distribution of high-risk DDIs and Beers violations. +- Explicitly tag episodes as easy/medium/hard (e.g., via number of drugs, number/severity of DDIs, and number of Beers issues). +- Save `patients_polypharmacy.csv` ready for the environment to consume. + +================================================= +3. OpenEnv models and environment implementation +================================================= + +3.1. Models + +In `models.py`, define dataclasses or Pydantic models that extend the appropriate OpenEnv base types (`Action`, `Observation`, `State`) and are JSON-compatible. + +Auxiliary models: + +**`MedicationEntry`** +- `drug_id: str` +- `generic_name: str` +- `atc_class: str` +- `dose_mg: float` +- `frequency: str` # e.g., `qd`, `bid` +- `route: str` # e.g., `po` +- `is_high_risk_elderly: bool` +- `beers_flags: list[str]` # e.g., `["avoid", "dose_adjust_CKD"]` + +**`InteractionQueryRecord`** +- `drug_id_1: str` +- `drug_id_2: str` +- `severity: str | None` +- `recommendation: str | None` +- `risk_score: float | None` +- `step_index: int` + +**`InterventionRecord`** +- `target_drug_id: str` +- `action_type: Literal["stop", "dose_reduce", "substitute", "add_monitoring"]` +- `proposed_new_drug_id: str | None` +- `rationale: str` +- `step_index: int` + +Core wire models: + +**`PolypharmacyObservation`** (extends OpenEnv `Observation`) +- `episode_id: str` +- `task_id: Literal["easy_screening", "budgeted_screening", "complex_tradeoff"]` +- `age: int` +- `sex: str` +- `conditions: list[str]` +- `eGFR_category: str` +- `liver_function_category: str` +- `current_medications: list[MedicationEntry]` +- `interaction_queries: list[InteractionQueryRecord]` +- `interventions: list[InterventionRecord]` +- `step_index: int` +- `remaining_query_budget: int` +- `remaining_intervention_budget: int` +- `shaped_reward: float` # reward from last step +- `done: bool` + +**`PolypharmacyAction`** (extends OpenEnv `Action`) +- `action_type: Literal["query_ddi", "propose_intervention", "finish_review"]` +- `drug_id_1: str | None` # for DDI queries or some interventions +- `drug_id_2: str | None` # for DDI queries +- `target_drug_id: str | None` # for interventions +- `intervention_type: Literal["stop", "dose_reduce", "substitute", "add_monitoring", "none"] | None` +- `proposed_new_drug_id: str | None` +- `rationale: str | None` + +**`PolypharmacyState`** (extends OpenEnv `State`) +- `episode_id: str` +- `task_id: str` +- `step_count: int` +- `max_steps: int` +- `num_query_actions: int` +- `num_interventions: int` + +3.2. Environment core + +In `env_core.py`, implement `PolypharmacyEnv` extending the appropriate OpenEnv environment base class. It must implement: + +**`reset(task_id: str | None = None) -> PolypharmacyObservation`** +- If `task_id` is `None`, default to medium (`budgeted_screening`). +- Sample an episode from `patients_polypharmacy.csv` filtered by difficulty. +- Initialize: + - `episode_id` + - `step_count = 0` + - task-specific budgets (query, interventions, max_steps) + - baseline regime and risk + - empty `interaction_queries` and `interventions` +- Return the initial `PolypharmacyObservation` with: + - `step_index = 0` + - `shaped_reward = 0.0` + - `done = False` + +**`step(action: PolypharmacyAction) -> dict`** +- Validate the action; if invalid: + - Apply a negative reward. + - Do not modify regimen, but log error in `info`. +- If `action_type == "query_ddi"`: + - If query budget exhausted, apply penalty and do not query. + - Else: + - Use `ddi_simulator.lookup_ddi(drug_id_1, drug_id_2)` to get severity, recommendation, base_risk_score. + - Append an `InteractionQueryRecord`. + - Apply a small negative reward for query cost. +- If `action_type == "propose_intervention"`: + - If intervention budget exhausted, apply penalty and ignore change. + - Else: + - Update `current_medications` according to `intervention_type`: + - `stop`: remove medication. + - `dose_reduce`: adjust dose downward within [min_dose_mg, default_dose_mg]. + - `substitute`: replace with a safer alternative from same `atc_class`. + - `add_monitoring`: keep drug but tag in internal state. + - Append an `InterventionRecord`. + - Recompute current regimen risk using the risk model (see 3.3). + - Compute shaped reward = (previous_risk - new_risk) - small intervention cost. +- If `action_type == "finish_review"`: + - Mark `done = True`. + - Call the task’s grader to get episode-level score in [0.0, 1.0]. + - Add this as a terminal bonus to the current step reward. + +- In all cases: + - Increment `step_count`. + - Check `max_steps`; if exceeded, auto-terminate: + - `done = True` + - apply time-out penalty + - call grader with current trajectory for a final score if appropriate. + - Construct next `PolypharmacyObservation` with updated fields. + - Return a dict: + - `observation`: `PolypharmacyObservation` + - `reward`: float shaped reward for this step + - `done`: bool + - `info`: dict with fields like `current_risk`, `baseline_risk`, `grader_score_if_terminal`, and debug flags. + +**`state` property** +- Returns `PolypharmacyState` reflecting the current internal state. + +3.3. DDI simulator and risk model + +In `ddi_simulator.py`: +- Load `ddi_rules.csv` once via `data_loader`. +- Implement `lookup_ddi(drug_id_1, drug_id_2) -> tuple[severity, recommendation, base_risk_score]`: + - Normalize the pair ordering. + - Look up row; if missing, return: + - severity = `"none"` + - recommendation = `"no_action"` + - base_risk_score = 0.0 + +In `rewards.py` (or a dedicated module), implement: +- `compute_regimen_risk(current_drug_ids, patient_context, ddi_rules, beers_rules, drug_metadata) -> float` + - Aggregate contributions from: + - Beers violations (weighted by `criterion_type` and relevant conditions). + - DDI base risk scores for all present drug pairs. + - High-risk elderly drugs. + - Normalize and clip to [0.0, 1.0]. + +Use this function to compute: +- `baseline_risk` at episode start. +- Risk after each intervention step. + +Also implement: +- `compute_shaped_reward(previous_risk, new_risk, action, context, partial_metrics) -> float` + - Positive component: `previous_risk - new_risk`. + - Negative components: per-query cost, per-intervention cost, invalid-action penalty, time-out penalty. + +================================================= +4. Tasks and graders (3 difficulty levels) +================================================= + +Define three task IDs and semantics in `tasks.py` and `graders.py`: + +Task IDs: +- `easy_screening` +- `budgeted_screening` +- `complex_tradeoff` + +4.1. `easy_screening` (easy) + +- Small regimen: 3–5 drugs. +- Exactly one **severe** DDI pair and possibly one simple Beers violation. +- Budgets: + - query_budget ≈ 4 + - intervention_budget ≈ 2 + - max_steps ≈ 10 + +Grader: +- Input: full trajectory, baseline risk, final risk, list of interventions. +- Compute: + - `risk_reduction = max(0.0, baseline_risk - final_risk) / max(baseline_risk, ε)` (normalized). + - `targeted_intervention_flag = 1.0` if at least one intervention affects one of the drugs in the known severe DDI pair, else 0.0. +- Score: + - `score = 0.5 * risk_reduction + 0.5 * targeted_intervention_flag` + - Clip to [0.0, 1.0]. + +4.2. `budgeted_screening` (medium) + +- Medium regimen: 6–10 drugs. +- Multiple DDIs (mild/moderate/severe) and multiple Beers issues. +- Budgets: + - query_budget ≈ 8 + - intervention_budget ≈ 3 + - max_steps ≈ 20 + +Grader: +- Compute: + - `risk_reduction_score` as normalized risk drop. + - `intervention_precision_score` = fraction of interventions that actually reduce risk or fix guideline violations. + - `query_efficiency_score` = (number of severe/moderate DDIs discovered) / (number of queries used), normalized. +- Weighted score, for example: + - `score = 0.5 * risk_reduction_score + 0.3 * intervention_precision_score + 0.2 * query_efficiency_score` + - Clip to [0.0, 1.0]. + +4.3. `complex_tradeoff` (hard) + +- Larger regimen: 10–15 drugs. +- Some drugs are **clinically critical** (e.g., anticoagulants, insulin analogues) and encoded as such in `drug_metadata` or a small internal map. +- Episodes contain: + - multiple DDIs and Beers issues, including ones involving critical drugs. + - safer substitutes for some risky drugs. + +Budgets: +- query_budget ≈ 12 +- intervention_budget ≈ 5 +- max_steps ≈ 30 + +Grader adds a **regimen disruption penalty** component: +- Metrics: + - `risk_reduction_score` (as above). + - `critical_drug_penalty` = penalty if a critical drug is stopped without substitution to another suitable agent. + - `total_drug_changes` = number of drugs stopped or substituted. + - `regimen_disruption_penalty` derived from `total_drug_changes` and `critical_drug_penalty`. + +Example scoring: +- `base = risk_reduction_score` +- `penalty = α * regimen_disruption_penalty` +- `score = clamp(base - penalty, 0.0, 1.0)` + +4.4. Reward shaping + +In `rewards.py`, define a consistent shaping scheme: +- On each query: + - Small negative reward (e.g., −0.01) plus any small bonus if it discovers a severe DDI, if desired. +- On each intervention: + - Reward ≈ (previous_risk - new_risk) − small intervention cost. +- On invalid actions: + - Larger negative reward (e.g., −0.1) and no state change. +- On `finish_review`: + - Add the task-level `score` ∈ [0.0, 1.0] from the corresponding grader to that step’s shaped reward. + +Ensure the sum of step rewards per episode remains in a reasonable numeric range (e.g., roughly -5 to +5) while still allowing meaningful differentiation by graders. + +================================================= +5. HTTP API server and openenv.yaml +================================================= + +5.1. HTTP server (FastAPI) + +In `api/server.py`: +- Implement a FastAPI app that maintains a `PolypharmacyEnv` instance (or a multiplexing scheme if needed). +- Endpoints: + - `POST /reset`: + - Request body: may include `task_id` (string). + - Response: serialized `PolypharmacyObservation`. + - `POST /step`: + - Request body: serialized `PolypharmacyAction`. + - Response: dict with: + - `observation`: `PolypharmacyObservation` + - `reward`: float + - `done`: bool + - `info`: dict + - `GET /state`: + - Response: `PolypharmacyState`. + +Provide a module-level `app = FastAPI(...)` object for use with uvicorn and Hugging Face Spaces. Ensure the JSON schema is consistent with OpenEnv clients (simple, flat JSON for observation/action/state). + +5.2. `openenv.yaml` + +At repo root, define `openenv.yaml` consistent with the latest OpenEnv spec. At minimum, include: +- `name`: `polypharmacy_env` +- `version`: e.g., `0.1.0` +- `description`: human-readable description. +- `author`: your details. +- `tags`: e.g., `["healthcare", "polypharmacy", "openenv"]` +- `tasks`: + - One entry per task: + - `id`: `"easy_screening"` / `"budgeted_screening"` / `"complex_tradeoff"` + - `description`: one-line description + - `difficulty`: `"easy"`, `"medium"`, `"hard"` + +Ensure `openenv validate` (or equivalent validator) passes once implemented. + +================================================= +6. Baseline heuristic (non-LLM) agent +================================================= + +In `baselines/heuristic_agent.py`, implement a simple, deterministic baseline agent that: + +For each episode: +- Iterates through all unordered medication pairs within query budget: + - Calls `query_ddi` via the environment for each pair until the query budget is exhausted or all pairs are examined. + - Records severe and moderate interactions. +- After querying: + - For each severe DDI pair: + - Try `substitute` one of the drugs using `drug_metadata`: + - Prefer substitute within same `atc_class` that: + - is not marked high-risk elderly. + - does not participate in known severe DDIs with the rest of the regimen. + - If no substitute exists, propose `stop` for the higher-risk drug. + - Respect intervention budget limits. +- Finally, call `finish_review`. + +This baseline should be callable as a simple Python function that interacts with `PolypharmacyEnv` directly (without HTTP). + +================================================= +7. Baseline LLM inference script (inference.py) +================================================= + +At repo root, create `inference.py` that: + +7.1. Uses the OpenAI Python client + +- Import and configure the official OpenAI Python client. +- Read environment variables: + - `OPENAI_API_KEY` (required). + - `API_BASE_URL` (base URL for LLM; default to OpenAI standard if not set). + - `MODEL_NAME` (e.g., `gpt-4.1` or similar). + - `HF_TOKEN` (if needed for HF auth; do not hardcode). +- Read `POLYPHARMACY_ENV_URL` (or similar) for the environment’s HTTP base URL. + +7.2. Implements the required logging format + +- For each **run** across all tasks: + - Emit a `[START]` line with a JSON payload exactly matching the evaluation specification: + - Fields such as `run_id`, `task_id`, `model`, etc., in the same order and naming as the sample OpenEnv inference script. +- For each **step** in an episode: + - Emit a `[STEP]` line with JSON fields including: + - `run_id` + - `task_id` + - `episode_id` + - `step_index` + - `observation_summary` (brief, machine-readable summary) + - `action_payload` (the action sent to the env) + - `reward` + - `done` +- After finishing an episode for a task: + - Emit an `[END]` line summarizing: + - `run_id` + - `task_id` + - per-episode statistics (e.g., total reward, grader score from last step’s `info`). +- The stdout format MUST follow the sample exactly: + - Same tags: `[START]`, `[STEP]`, `[END]`. + - Same JSON field names and ordering as the provided reference. + - No extra prints except these structured logs (and necessary error messages to stderr). + +7.3. LLM agent loop + +- For each task (`easy_screening`, `budgeted_screening`, `complex_tradeoff`): + - Run a fixed small number of episodes (e.g., 5–10 per task) for baseline scoring. + - For each episode: + - Call `/reset` with the task id. + - At each step: + - Summarize the observation into a concise prompt for the LLM: + - Include age, sex, conditions, high-risk flags, budgets, and a compressed view of meds and previous actions. + - Ask the model to output a **strict JSON** representing `PolypharmacyAction` fields. + - Parse and validate the JSON; if invalid, fall back to a safe default (e.g., `finish_review` or a no-op) and penalize in evaluation. + - Send this action to `/step` and log `[STEP]`. + - End when `done=True` or max_steps is reached. +- At the end, print aggregate scores per task and overall. + +Make sure runtime < 20 minutes and that the script can run within 2 vCPUs and 8 GB RAM. + +================================================= +8. Dockerfile and Hugging Face Space +================================================= + +8.1. Dockerfile + +Create a `Dockerfile` that: +- Starts from a slim Python image (e.g., `python:3.11-slim`). +- Installs system dependencies as needed (e.g., `build-essential`, `curl`). +- Copies the project into the container. +- Installs Python dependencies from `requirements.txt`. +- Sets appropriate environment variables for the app (e.g., `PORT=7860`). +- Exposes port 7860. +- Uses a `CMD` or `ENTRYPOINT` that runs the FastAPI server, for example: + - `uvicorn polypharmacy_env.api.server:app --host 0.0.0.0 --port 7860` + +8.2. Hugging Face Space + +Ensure the repository is ready to be used as a Hugging Face Space: +- Space type: `docker`. +- Tag: `openenv`. +- On container start, the server must listen on the correct port and respond to: + - `POST /reset` + - `POST /step` + - `GET /state` +- The environment must start cleanly with `docker build` + `docker run` locally. + +================================================= +9. README and documentation +================================================= + +In `README.md`, include: + +- **Environment description & motivation**: + - What PolypharmacyEnv simulates. + - Why elderly polypharmacy safety matters. +- **Action and observation spaces**: + - Describe `PolypharmacyAction`, `PolypharmacyObservation`, and `PolypharmacyState` fields and semantics. +- **Task descriptions**: + - `easy_screening`, `budgeted_screening`, `complex_tradeoff`, their difficulty and goals. +- **Reward structure**: + - Summarize shaping and terminal rewards. +- **Setup & usage**: + - How to install dependencies. + - How to run the API server locally (uvicorn command). + - How to run the heuristic baseline. + - How to run `inference.py` with environment variables. +- **Baseline scores**: + - Document reproducible baseline scores for each task (heuristic agent, and LLM baseline if available). + +================================================= +10. Validation and quality gates +================================================= + +- Ensure: + - `openenv.yaml` and the HTTP server pass the OpenEnv validation script. + - `docker build` and `docker run` work without errors. + - `inference.py` completes under 20 minutes, within 2 vCPUs / 8 GB RAM. + - All graders: + - Are deterministic. + - Return scores strictly in [0.0, 1.0]. + - No grader returns a constant score irrespective of behavior. + +Aim for clean, well-structured, well-documented code with clear separation of concerns between: +- Data loading, +- Environment state & dynamics, +- Reward/grade logic, +- HTTP serving, +- Baseline agents and inference. \ No newline at end of file diff --git a/README.MD b/README.MD index dddbf4047eacd2e74adc50df3b8cf73f80f162f9..3bc9290d48e3b9b05d31a2d253526c654735804e 100644 --- a/README.MD +++ b/README.MD @@ -1,3 +1,12 @@ +--- +title: Polypharmacy +emoji: 📉 +colorFrom: yellow +colorTo: blue +sdk: docker +pinned: false +--- + # PolypharmacyEnv Monorepo for an OpenEnv-compatible medication safety environment with: @@ -12,8 +21,7 @@ Monorepo for an OpenEnv-compatible medication safety environment with: ## Repository Structure ```text -openenv-polypharmacy/ - backend/ +backend/ main.py # ASGI entrypoint (uvicorn target) requirements.txt # Backend dependencies Dockerfile # Backend container @@ -32,22 +40,22 @@ openenv-polypharmacy/ graders.py # Task graders tasks.py # Task/episode selection tests/ # Backend tests - frontend/ +frontend/ src/ # React UI code package.json Dockerfile # Frontend container - data/ +data/ lookups/ # drug_metadata.csv, ddi_rules.csv, beers_criteria.csv processed/ # patients_polypharmacy.csv - scripts/ +scripts/ preprocess_data.py # Synthetic data generation dev_backend.sh # Local backend run helper dev_frontend.sh # Local frontend run helper run_validation.sh # Tests + baseline validation - docker-compose.yml # Full stack orchestration - openenv.yaml # OpenEnv manifest - inference.py # Optional CLI inference baseline - .env.example # Environment template +docker-compose.yml # Full stack orchestration +openenv.yaml # OpenEnv manifest +inference.py # Baseline inference script (required at root) +.env.example # Environment template ``` --- @@ -85,11 +93,7 @@ Create `.env`: cp .env.example .env ``` -Set values: - -- `GROQ_API_KEY=...` (required) -- `GROQ_BASE_URL=https://api.groq.com/openai/v1` (recommended) -- `GROQ_MODEL_NAME=llama-3.3-70b-versatile` (recommended) +Set values for local backend integrations as needed. --- @@ -173,9 +177,9 @@ This repo now includes a **root `Dockerfile`** that builds frontend + backend in In Space Settings -> Variables and Secrets: -- Secret: `GROQ_API_KEY` -- Variable: `GROQ_BASE_URL=https://api.groq.com/openai/v1` -- Variable: `GROQ_MODEL_NAME=llama-3.3-70b-versatile` +- Secret: `HF_TOKEN` +- Variable: `API_BASE_URL=https://router.huggingface.co/v1` +- Variable: `MODEL_NAME=Qwen/Qwen2.5-72B-Instruct` ### 3) Push this repository to the Space @@ -225,6 +229,13 @@ Or run validation script: ./scripts/run_validation.sh ``` +### Submission validation + +```bash +openenv validate +python inference.py +``` + --- ## Notes diff --git a/openenv-polypharmacy/backend/Dockerfile b/backend/Dockerfile similarity index 100% rename from openenv-polypharmacy/backend/Dockerfile rename to backend/Dockerfile diff --git a/openenv-polypharmacy/backend/__init__.py b/backend/__init__.py similarity index 100% rename from openenv-polypharmacy/backend/__init__.py rename to backend/__init__.py diff --git a/openenv-polypharmacy/backend/main.py b/backend/main.py similarity index 100% rename from openenv-polypharmacy/backend/main.py rename to backend/main.py diff --git a/openenv-polypharmacy/backend/requirements.txt b/backend/requirements.txt similarity index 100% rename from openenv-polypharmacy/backend/requirements.txt rename to backend/requirements.txt diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/__init__.py b/backend/src/polypharmacy_env/__init__.py similarity index 100% rename from openenv-polypharmacy/backend/src/polypharmacy_env/__init__.py rename to backend/src/polypharmacy_env/__init__.py diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/api/__init__.py b/backend/src/polypharmacy_env/api/__init__.py similarity index 100% rename from openenv-polypharmacy/backend/src/polypharmacy_env/api/__init__.py rename to backend/src/polypharmacy_env/api/__init__.py diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/api/app.py b/backend/src/polypharmacy_env/api/app.py similarity index 100% rename from openenv-polypharmacy/backend/src/polypharmacy_env/api/app.py rename to backend/src/polypharmacy_env/api/app.py diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/api/routes/__init__.py b/backend/src/polypharmacy_env/api/routes/__init__.py similarity index 100% rename from openenv-polypharmacy/backend/src/polypharmacy_env/api/routes/__init__.py rename to backend/src/polypharmacy_env/api/routes/__init__.py diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/api/routes/agent.py b/backend/src/polypharmacy_env/api/routes/agent.py similarity index 100% rename from openenv-polypharmacy/backend/src/polypharmacy_env/api/routes/agent.py rename to backend/src/polypharmacy_env/api/routes/agent.py diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/api/server.py b/backend/src/polypharmacy_env/api/server.py similarity index 100% rename from openenv-polypharmacy/backend/src/polypharmacy_env/api/server.py rename to backend/src/polypharmacy_env/api/server.py diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/baselines/__init__.py b/backend/src/polypharmacy_env/baselines/__init__.py similarity index 100% rename from openenv-polypharmacy/backend/src/polypharmacy_env/baselines/__init__.py rename to backend/src/polypharmacy_env/baselines/__init__.py diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/baselines/heuristic_agent.py b/backend/src/polypharmacy_env/baselines/heuristic_agent.py similarity index 100% rename from openenv-polypharmacy/backend/src/polypharmacy_env/baselines/heuristic_agent.py rename to backend/src/polypharmacy_env/baselines/heuristic_agent.py diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/baselines/random_agent.py b/backend/src/polypharmacy_env/baselines/random_agent.py similarity index 100% rename from openenv-polypharmacy/backend/src/polypharmacy_env/baselines/random_agent.py rename to backend/src/polypharmacy_env/baselines/random_agent.py diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/client.py b/backend/src/polypharmacy_env/client.py similarity index 100% rename from openenv-polypharmacy/backend/src/polypharmacy_env/client.py rename to backend/src/polypharmacy_env/client.py diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/config.py b/backend/src/polypharmacy_env/config.py similarity index 100% rename from openenv-polypharmacy/backend/src/polypharmacy_env/config.py rename to backend/src/polypharmacy_env/config.py diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/data_loader.py b/backend/src/polypharmacy_env/data_loader.py similarity index 100% rename from openenv-polypharmacy/backend/src/polypharmacy_env/data_loader.py rename to backend/src/polypharmacy_env/data_loader.py diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/ddi_simulator.py b/backend/src/polypharmacy_env/ddi_simulator.py similarity index 100% rename from openenv-polypharmacy/backend/src/polypharmacy_env/ddi_simulator.py rename to backend/src/polypharmacy_env/ddi_simulator.py diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/env_core.py b/backend/src/polypharmacy_env/env_core.py similarity index 100% rename from openenv-polypharmacy/backend/src/polypharmacy_env/env_core.py rename to backend/src/polypharmacy_env/env_core.py diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/graders.py b/backend/src/polypharmacy_env/graders.py similarity index 100% rename from openenv-polypharmacy/backend/src/polypharmacy_env/graders.py rename to backend/src/polypharmacy_env/graders.py diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/models.py b/backend/src/polypharmacy_env/models.py similarity index 100% rename from openenv-polypharmacy/backend/src/polypharmacy_env/models.py rename to backend/src/polypharmacy_env/models.py diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/rewards.py b/backend/src/polypharmacy_env/rewards.py similarity index 100% rename from openenv-polypharmacy/backend/src/polypharmacy_env/rewards.py rename to backend/src/polypharmacy_env/rewards.py diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/services/__init__.py b/backend/src/polypharmacy_env/services/__init__.py similarity index 100% rename from openenv-polypharmacy/backend/src/polypharmacy_env/services/__init__.py rename to backend/src/polypharmacy_env/services/__init__.py diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/services/groq_agent.py b/backend/src/polypharmacy_env/services/groq_agent.py similarity index 100% rename from openenv-polypharmacy/backend/src/polypharmacy_env/services/groq_agent.py rename to backend/src/polypharmacy_env/services/groq_agent.py diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/tasks.py b/backend/src/polypharmacy_env/tasks.py similarity index 100% rename from openenv-polypharmacy/backend/src/polypharmacy_env/tasks.py rename to backend/src/polypharmacy_env/tasks.py diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/tests/__init__.py b/backend/src/polypharmacy_env/tests/__init__.py similarity index 100% rename from openenv-polypharmacy/backend/src/polypharmacy_env/tests/__init__.py rename to backend/src/polypharmacy_env/tests/__init__.py diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/tests/test_api.py b/backend/src/polypharmacy_env/tests/test_api.py similarity index 100% rename from openenv-polypharmacy/backend/src/polypharmacy_env/tests/test_api.py rename to backend/src/polypharmacy_env/tests/test_api.py diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/tests/test_env_core.py b/backend/src/polypharmacy_env/tests/test_env_core.py similarity index 100% rename from openenv-polypharmacy/backend/src/polypharmacy_env/tests/test_env_core.py rename to backend/src/polypharmacy_env/tests/test_env_core.py diff --git a/openenv-polypharmacy/data/lookups/beers_criteria.csv b/data/lookups/beers_criteria.csv similarity index 100% rename from openenv-polypharmacy/data/lookups/beers_criteria.csv rename to data/lookups/beers_criteria.csv diff --git a/openenv-polypharmacy/data/lookups/ddi_rules.csv b/data/lookups/ddi_rules.csv similarity index 100% rename from openenv-polypharmacy/data/lookups/ddi_rules.csv rename to data/lookups/ddi_rules.csv diff --git a/openenv-polypharmacy/data/lookups/drug_metadata.csv b/data/lookups/drug_metadata.csv similarity index 100% rename from openenv-polypharmacy/data/lookups/drug_metadata.csv rename to data/lookups/drug_metadata.csv diff --git a/openenv-polypharmacy/data/processed/patients_polypharmacy.csv b/data/processed/patients_polypharmacy.csv similarity index 100% rename from openenv-polypharmacy/data/processed/patients_polypharmacy.csv rename to data/processed/patients_polypharmacy.csv diff --git a/openenv-polypharmacy/docker-compose.yml b/docker-compose.yml similarity index 100% rename from openenv-polypharmacy/docker-compose.yml rename to docker-compose.yml diff --git a/openenv-polypharmacy/frontend/Dockerfile b/frontend/Dockerfile similarity index 100% rename from openenv-polypharmacy/frontend/Dockerfile rename to frontend/Dockerfile diff --git a/openenv-polypharmacy/frontend/index.html b/frontend/index.html similarity index 100% rename from openenv-polypharmacy/frontend/index.html rename to frontend/index.html diff --git a/openenv-polypharmacy/frontend/package-lock.json b/frontend/package-lock.json similarity index 100% rename from openenv-polypharmacy/frontend/package-lock.json rename to frontend/package-lock.json diff --git a/openenv-polypharmacy/frontend/package.json b/frontend/package.json similarity index 100% rename from openenv-polypharmacy/frontend/package.json rename to frontend/package.json diff --git a/openenv-polypharmacy/frontend/src/App.jsx b/frontend/src/App.jsx similarity index 100% rename from openenv-polypharmacy/frontend/src/App.jsx rename to frontend/src/App.jsx diff --git a/openenv-polypharmacy/frontend/src/main.jsx b/frontend/src/main.jsx similarity index 100% rename from openenv-polypharmacy/frontend/src/main.jsx rename to frontend/src/main.jsx diff --git a/openenv-polypharmacy/frontend/src/styles.css b/frontend/src/styles.css similarity index 100% rename from openenv-polypharmacy/frontend/src/styles.css rename to frontend/src/styles.css diff --git a/openenv-polypharmacy/frontend/vite.config.js b/frontend/vite.config.js similarity index 100% rename from openenv-polypharmacy/frontend/vite.config.js rename to frontend/vite.config.js diff --git a/inference.py b/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..a8a00389ea7c79f9c7c3e1ec5c0f21eda89ceaf2 --- /dev/null +++ b/inference.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 +"""Submission inference script for Polypharmacy OpenEnv environment. + +Required environment variables: + API_BASE_URL OpenAI-compatible base URL + MODEL_NAME Model identifier + HF_TOKEN API key/token + +Optional: + POLYPHARMACY_ENV_URL Environment API base (default: http://localhost:7860) +""" + +from __future__ import annotations + +import json +import os +import re +from typing import Any, Dict, List + +import requests +from openai import OpenAI + +API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1") +MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct") +HF_TOKEN = os.getenv("HF_TOKEN", "") +ENV_URL = os.getenv("POLYPHARMACY_ENV_URL", "http://localhost:7860").rstrip("/") + +BENCHMARK = "polypharmacy_env" +TASKS = ["easy_screening", "budgeted_screening", "complex_tradeoff"] +MAX_STEPS = 16 +TEMPERATURE = 0.0 +MAX_TOKENS = 220 + +SYSTEM_PROMPT = ( + "You are a clinical-pharmacist agent. " + "Return one JSON action only with keys matching this schema: " + '{"action_type":"query_ddi|propose_intervention|finish_review",' + '"drug_id_1":"", "drug_id_2":"", "target_drug_id":"",' + '"intervention_type":"stop|dose_reduce|substitute|add_monitoring",' + '"proposed_new_drug_id":"", "rationale":""}. ' + "Prefer safe, high-impact actions and finish when useful actions are exhausted." +) + + +def _b(v: bool) -> str: + return str(bool(v)).lower() + + +def _fmt_reward(v: float) -> str: + return f"{float(v):.2f}" + + +def _clamp01(v: float) -> float: + return max(0.0, min(1.0, float(v))) + + +def log_start(task: str) -> None: + print(f"[START] task={task} env={BENCHMARK} model={MODEL_NAME}", flush=True) + + +def log_step(step: int, action_str: str, reward: float, done: bool, error: str | None) -> None: + err = error if error else "null" + print( + f"[STEP] step={step} action={action_str} reward={_fmt_reward(reward)} " + f"done={_b(done)} error={err}", + flush=True, + ) + + +def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None: + rewards_str = ",".join(_fmt_reward(r) for r in rewards) + print( + f"[END] success={_b(success)} steps={steps} score={_clamp01(score):.3f} rewards={rewards_str}", + flush=True, + ) + + +def _safe_json(text: str) -> Dict[str, Any]: + text = text.strip() + if text.startswith("```"): + text = re.sub(r"^```[a-zA-Z]*\n?", "", text) + text = text.replace("```", "").strip() + try: + data = json.loads(text) + if isinstance(data, dict): + return data + except Exception: + pass + return {"action_type": "finish_review"} + + +def _llm_action(client: OpenAI, obs: Dict[str, Any]) -> Dict[str, Any]: + meds = obs.get("current_medications", []) + summary = { + "step_index": obs.get("step_index", 0), + "remaining_query_budget": obs.get("remaining_query_budget", 0), + "remaining_intervention_budget": obs.get("remaining_intervention_budget", 0), + "conditions": obs.get("conditions", []), + "current_medications": [ + { + "drug_id": m.get("drug_id"), + "generic_name": m.get("generic_name"), + "dose_mg": m.get("dose_mg"), + "beers_flags": m.get("beers_flags", []), + } + for m in meds + ], + "interaction_queries": obs.get("interaction_queries", []), + "interventions": obs.get("interventions", []), + } + resp = client.chat.completions.create( + model=MODEL_NAME, + temperature=TEMPERATURE, + max_tokens=MAX_TOKENS, + messages=[ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": json.dumps(summary, separators=(",", ":"))}, + ], + ) + content = (resp.choices[0].message.content or "").strip() + return _safe_json(content) + + +def _reset(task_id: str) -> Dict[str, Any]: + r = requests.post(f"{ENV_URL}/reset", json={"task_id": task_id}, timeout=45) + r.raise_for_status() + return r.json() + + +def _step(action: Dict[str, Any]) -> Dict[str, Any]: + r = requests.post(f"{ENV_URL}/step", json={"action": action}, timeout=45) + r.raise_for_status() + return r.json() + + +def run_task(client: OpenAI, task_id: str) -> None: + rewards: List[float] = [] + steps = 0 + success = False + score = 0.0 + log_start(task_id) + try: + reset_payload = _reset(task_id) + obs = reset_payload.get("observation", {}) + done = bool(reset_payload.get("done", False)) + + for i in range(1, MAX_STEPS + 1): + if done: + break + action = _llm_action(client, obs) + action_str = json.dumps(action, separators=(",", ":")) + step_payload = _step(action) + obs = step_payload.get("observation", {}) + reward = float(step_payload.get("reward") or 0.0) + done = bool(step_payload.get("done", False)) + metadata = (obs or {}).get("metadata", {}) or {} + last_error = metadata.get("error") + rewards.append(reward) + steps = i + log_step(i, action_str, reward, done, str(last_error) if last_error else None) + + if done: + raw_score = metadata.get("grader_score", None) + if raw_score is not None: + score = _clamp01(float(raw_score)) + else: + score = _clamp01(sum(max(0.0, r) for r in rewards) / max(len(rewards), 1)) + success = score > 0.0 + break + except Exception: + # Still emit END to keep evaluator parser stable. + success = False + finally: + log_end(success=success, steps=steps, score=score, rewards=rewards) + + +def main() -> int: + if not HF_TOKEN: + print("HF_TOKEN is required", flush=True) + return 1 + client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN) + for task in TASKS: + run_task(client, task) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/openenv-polypharmacy/.dockerignore b/openenv-polypharmacy/.dockerignore deleted file mode 100644 index 5007867e3a3b2c5514c4ff5bb18588b36951901f..0000000000000000000000000000000000000000 --- a/openenv-polypharmacy/.dockerignore +++ /dev/null @@ -1,8 +0,0 @@ -.git -.gitignore -**/__pycache__/ -**/.pytest_cache/ -**/.DS_Store -.env -frontend/node_modules -frontend/dist diff --git a/openenv-polypharmacy/Dockerfile b/openenv-polypharmacy/Dockerfile deleted file mode 100644 index 68b69d986a780501f6c9461410b2add26413473e..0000000000000000000000000000000000000000 --- a/openenv-polypharmacy/Dockerfile +++ /dev/null @@ -1,39 +0,0 @@ -FROM node:20-alpine AS frontend-builder -WORKDIR /app/frontend -COPY frontend/package*.json ./ -RUN npm ci -COPY frontend/ ./ -RUN npm run build - -FROM python:3.11-slim - -RUN apt-get update && \ - apt-get install -y --no-install-recommends build-essential curl && \ - rm -rf /var/lib/apt/lists/* - -WORKDIR /app - -COPY backend/requirements.txt /app/backend/requirements.txt -RUN pip install --no-cache-dir -r /app/backend/requirements.txt - -COPY backend /app/backend -COPY data /app/data -COPY scripts /app/scripts -COPY openenv.yaml /app/openenv.yaml -COPY .env.example /app/.env.example -COPY inference.py /app/inference.py - -COPY --from=frontend-builder /app/frontend/dist /app/frontend/dist - -RUN python3 /app/scripts/preprocess_data.py - -ENV PORT=7860 -ENV PYTHONPATH="/app/backend/src:${PYTHONPATH}" -ENV PYTHONUNBUFFERED=1 - -EXPOSE 7860 - -HEALTHCHECK --interval=30s --timeout=3s --start-period=15s --retries=3 \ - CMD curl -f http://localhost:7860/health || exit 1 - -CMD ["sh", "-c", "uvicorn backend.main:app --host 0.0.0.0 --port ${PORT:-7860}"] diff --git a/openenv-polypharmacy/inference.py b/openenv-polypharmacy/inference.py deleted file mode 100644 index a6809184af7dbc299ba4b8799eff00636647fb81..0000000000000000000000000000000000000000 --- a/openenv-polypharmacy/inference.py +++ /dev/null @@ -1,210 +0,0 @@ -#!/usr/bin/env python3 -"""Baseline LLM inference script for the PolypharmacyEnv. - -Uses Groq's OpenAI-compatible Chat Completions API to drive an LLM agent through the -PolypharmacyEnv HTTP API. Emits structured stdout logs in the -[START], [STEP], [END] format required by the OpenEnv evaluation spec. - -Environment variables: - GROQ_API_KEY – required - GROQ_BASE_URL – optional (default: https://api.groq.com/openai/v1) - GROQ_MODEL_NAME – model to use (default: llama-3.1-8b-instant) - POLYPHARMACY_ENV_URL – environment HTTP base URL (default: http://localhost:7860) -""" - -from __future__ import annotations - -import json -import os -import sys -import uuid -from typing import Any, Dict, List - -import requests -from openai import OpenAI - -# ── Configuration ──────────────────────────────────────────────────────────── - -MODEL = os.environ.get("GROQ_MODEL_NAME", "llama-3.1-8b-instant") -API_KEY = os.environ.get("GROQ_API_KEY", "") -API_BASE = os.environ.get("GROQ_BASE_URL", "https://api.groq.com/openai/v1") -ENV_URL = os.environ.get("POLYPHARMACY_ENV_URL", "http://localhost:7860") - -TASKS = ["easy_screening", "budgeted_screening", "complex_tradeoff"] -EPISODES_PER_TASK = 5 - -client = OpenAI(api_key=API_KEY, base_url=API_BASE) - -# ── Logging helpers ────────────────────────────────────────────────────────── - -def _log(tag: str, payload: Dict[str, Any]) -> None: - print(f"[{tag}] {json.dumps(payload, default=str)}", flush=True) - - -def _err(msg: str) -> None: - print(msg, file=sys.stderr, flush=True) - - -# ── Environment HTTP helpers ───────────────────────────────────────────────── - -def env_reset(task_id: str) -> Dict[str, Any]: - resp = requests.post(f"{ENV_URL}/reset", json={"task_id": task_id}, timeout=30) - resp.raise_for_status() - return resp.json() - - -def env_step(action: Dict[str, Any]) -> Dict[str, Any]: - resp = requests.post(f"{ENV_URL}/step", json={"action": action}, timeout=30) - resp.raise_for_status() - return resp.json() - - -# ── Observation → prompt ───────────────────────────────────────────────────── - -SYSTEM_PROMPT = """\ -You are a clinical pharmacist AI assistant reviewing an elderly patient's medication regimen. -You must reduce drug-interaction risk and address Beers-criteria violations while minimising -unnecessary medication changes. - -Available actions (respond with STRICT JSON, no extra text): -1. Query a drug pair for interactions: - {"action_type": "query_ddi", "drug_id_1": "...", "drug_id_2": "..."} - -2. Propose an intervention: - {"action_type": "propose_intervention", "target_drug_id": "...", - "intervention_type": "stop|dose_reduce|substitute|add_monitoring", - "proposed_new_drug_id": "...(optional)", "rationale": "..."} - -3. Finish the review: - {"action_type": "finish_review"} - -Respond with EXACTLY ONE JSON object per turn. No markdown, no explanation outside JSON. -""" - - -def _summarise_obs(obs: Dict[str, Any]) -> str: - meds = obs.get("current_medications", []) - med_summary = "; ".join( - f"{m['drug_id']}({m['generic_name']},{m['dose_mg']}mg)" - for m in meds - ) - queries = obs.get("interaction_queries", []) - q_summary = "; ".join( - f"{q['drug_id_1']}+{q['drug_id_2']}={q.get('severity','?')}" - for q in queries - ) - interventions = obs.get("interventions", []) - iv_summary = "; ".join( - f"{iv['action_type']}({iv['target_drug_id']})" - for iv in interventions - ) - return ( - f"Patient: age={obs.get('age')}, sex={obs.get('sex')}, " - f"conditions={obs.get('conditions')}, " - f"eGFR={obs.get('eGFR_category')}, liver={obs.get('liver_function_category')}\n" - f"Medications: {med_summary}\n" - f"Queries so far: {q_summary or 'none'}\n" - f"Interventions so far: {iv_summary or 'none'}\n" - f"Remaining query budget: {obs.get('remaining_query_budget')}\n" - f"Remaining intervention budget: {obs.get('remaining_intervention_budget')}\n" - f"Step: {obs.get('step_index')}" - ) - - -# ── LLM call ───────────────────────────────────────────────────────────────── - -def _ask_llm(obs_summary: str) -> Dict[str, Any]: - """Call the LLM and parse a PolypharmacyAction JSON.""" - try: - chat_resp = client.chat.completions.create( - model=MODEL, - messages=[ - {"role": "system", "content": SYSTEM_PROMPT}, - {"role": "user", "content": obs_summary}, - ], - max_tokens=256, - temperature=0.2, - ) - text = (chat_resp.choices[0].message.content or "").strip() - # Strip markdown fences if present - text = text.strip() - if text.startswith("```"): - text = text.split("\n", 1)[-1] - if text.endswith("```"): - text = text.rsplit("```", 1)[0] - text = text.strip() - return json.loads(text) - except Exception as e: - _err(f"LLM parse error: {e}") - return {"action_type": "finish_review"} - - -# ── Main loop ──────────────────────────────────────────────────────────────── - -def main() -> None: - if not API_KEY: - _err("GROQ_API_KEY is required") - sys.exit(1) - - run_id = str(uuid.uuid4())[:8] - - for task_id in TASKS: - task_scores: List[float] = [] - task_rewards: List[float] = [] - - _log("START", { - "run_id": run_id, - "task_id": task_id, - "model": MODEL, - "episodes": EPISODES_PER_TASK, - }) - - for ep_idx in range(EPISODES_PER_TASK): - reset_resp = env_reset(task_id) - obs = reset_resp["observation"] - done = reset_resp.get("done", False) - episode_id = obs.get("episode_id", f"ep_{ep_idx}") - total_reward = 0.0 - step_idx = 0 - - while not done: - obs_summary = _summarise_obs(obs) - action_payload = _ask_llm(obs_summary) - - step_resp = env_step(action_payload) - obs = step_resp["observation"] - reward = step_resp.get("reward", 0.0) - done = step_resp.get("done", False) - total_reward += reward - - _log("STEP", { - "run_id": run_id, - "task_id": task_id, - "episode_id": episode_id, - "step_index": step_idx, - "observation_summary": obs_summary[:200], - "action_payload": action_payload, - "reward": reward, - "done": done, - }) - - step_idx += 1 - - grader_score = step_resp.get("info", {}).get("grader_score", 0.0) - task_scores.append(grader_score) - task_rewards.append(total_reward) - - _log("END", { - "run_id": run_id, - "task_id": task_id, - "episodes": EPISODES_PER_TASK, - "avg_grader_score": sum(task_scores) / max(len(task_scores), 1), - "avg_total_reward": sum(task_rewards) / max(len(task_rewards), 1), - "per_episode_scores": task_scores, - }) - - _err("Inference complete.") - - -if __name__ == "__main__": - main() diff --git a/openenv-polypharmacy/openenv.yaml b/openenv.yaml similarity index 100% rename from openenv-polypharmacy/openenv.yaml rename to openenv.yaml diff --git a/openenv-polypharmacy/pyproject.toml b/pyproject.toml similarity index 94% rename from openenv-polypharmacy/pyproject.toml rename to pyproject.toml index 9bd219ea59455a8765851931c923b726ea32d1d9..43d98f7f65f20870aa09ee8ec9deb42e2888bd92 100644 --- a/openenv-polypharmacy/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,9 @@ dev = [ "isort", ] +[project.scripts] +server = "server.app:main" + [tool.setuptools.packages.find] where = ["backend/src"] diff --git a/openenv-polypharmacy/requirements.txt b/requirements.txt similarity index 100% rename from openenv-polypharmacy/requirements.txt rename to requirements.txt diff --git a/openenv-polypharmacy/scripts/dev_backend.sh b/scripts/dev_backend.sh similarity index 100% rename from openenv-polypharmacy/scripts/dev_backend.sh rename to scripts/dev_backend.sh diff --git a/openenv-polypharmacy/scripts/dev_frontend.sh b/scripts/dev_frontend.sh similarity index 100% rename from openenv-polypharmacy/scripts/dev_frontend.sh rename to scripts/dev_frontend.sh diff --git a/openenv-polypharmacy/scripts/preprocess_data.py b/scripts/preprocess_data.py similarity index 100% rename from openenv-polypharmacy/scripts/preprocess_data.py rename to scripts/preprocess_data.py diff --git a/openenv-polypharmacy/scripts/run_validation.sh b/scripts/run_validation.sh similarity index 100% rename from openenv-polypharmacy/scripts/run_validation.sh rename to scripts/run_validation.sh diff --git a/server/app.py b/server/app.py new file mode 100644 index 0000000000000000000000000000000000000000..ff76c2f132d0b3e4614b4ba0d037edc9f19b7b1d --- /dev/null +++ b/server/app.py @@ -0,0 +1,13 @@ +"""Validator compatibility entrypoint.""" + +from backend.main import app + + +def main(): + """Return ASGI app for validator multi-mode checks.""" + return app + + +if __name__ == "__main__": + main() + diff --git a/uv.lock b/uv.lock new file mode 100644 index 0000000000000000000000000000000000000000..82447185ad37365d8022f14b2c624b90fa70b688 --- /dev/null +++ b/uv.lock @@ -0,0 +1 @@ +# Generated for OpenEnv validator compatibility.