diff --git a/.dockerignore b/.dockerignore
index 7566d677dc0c4ad7c9c7bc6a176c6b7186e9c728..ea2205ebecdbc61a7ad350e8d4261a94ae2e7466 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -9,4 +9,4 @@
 **/dist
 **/.env
 **/.env.*
-!openenv-polypharmacy/.env.example
+!.env.example
diff --git a/openenv-polypharmacy/.env.example b/.env.example
similarity index 100%
rename from openenv-polypharmacy/.env.example
rename to .env.example
diff --git a/.gitignore b/.gitignore
index a21ac13278960096fd010e674651b109be6c6cc4..f1beb4e73c68e8f3e03711363ed8909980cd05df 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,7 +4,7 @@ venv/
 env/
 .env
 .env.*
-!openenv-polypharmacy/.env.example
+!.env.example
 *.py[cod]
 __pycache__/
 .pytest_cache/
@@ -29,7 +29,3 @@ pnpm-debug.log*
 *.swp
 .DS_Store
 
-# --- Project-specific nested paths ---
-openenv-polypharmacy/frontend/node_modules/
-openenv-polypharmacy/frontend/dist/
-openenv-polypharmacy/.pytest_cache/
diff --git a/.gitignore copy b/.gitignore copy
deleted file mode 100644
index a21ac13278960096fd010e674651b109be6c6cc4..0000000000000000000000000000000000000000
--- a/.gitignore copy	
+++ /dev/null
@@ -1,35 +0,0 @@
-# --- Python ---
-venv/
-.venv/
-env/
-.env
-.env.*
-!openenv-polypharmacy/.env.example
-*.py[cod]
-__pycache__/
-.pytest_cache/
-.mypy_cache/
-.ruff_cache/
-.coverage
-coverage.xml
-
-# --- Node / frontend ---
-node_modules/
-**/node_modules/
-frontend/dist/
-**/dist/
-npm-debug.log*
-yarn-debug.log*
-yarn-error.log*
-pnpm-debug.log*
-
-# --- Build / temp ---
-*.log
-*.tmp
-*.swp
-.DS_Store
-
-# --- Project-specific nested paths ---
-openenv-polypharmacy/frontend/node_modules/
-openenv-polypharmacy/frontend/dist/
-openenv-polypharmacy/.pytest_cache/
diff --git a/Dockerfile b/Dockerfile
index 10f199b9446b1375bce69953826c903f9ed43efe..68b69d986a780501f6c9461410b2add26413473e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,8 +1,8 @@
 FROM node:20-alpine AS frontend-builder
 WORKDIR /app/frontend
-COPY openenv-polypharmacy/frontend/package*.json ./
+COPY frontend/package*.json ./
 RUN npm ci
-COPY openenv-polypharmacy/frontend/ ./
+COPY frontend/ ./
 RUN npm run build
 
 FROM python:3.11-slim
@@ -13,15 +13,15 @@ RUN apt-get update && \
 
 WORKDIR /app
 
-COPY openenv-polypharmacy/backend/requirements.txt /app/backend/requirements.txt
+COPY backend/requirements.txt /app/backend/requirements.txt
 RUN pip install --no-cache-dir -r /app/backend/requirements.txt
 
-COPY openenv-polypharmacy/backend /app/backend
-COPY openenv-polypharmacy/data /app/data
-COPY openenv-polypharmacy/scripts /app/scripts
-COPY openenv-polypharmacy/openenv.yaml /app/openenv.yaml
-COPY openenv-polypharmacy/.env.example /app/.env.example
-COPY openenv-polypharmacy/inference.py /app/inference.py
+COPY backend /app/backend
+COPY data /app/data
+COPY scripts /app/scripts
+COPY openenv.yaml /app/openenv.yaml
+COPY .env.example /app/.env.example
+COPY inference.py /app/inference.py
 
 COPY --from=frontend-builder /app/frontend/dist /app/frontend/dist
 
diff --git a/PROMPT.md b/PROMPT.md
new file mode 100644
index 0000000000000000000000000000000000000000..d7d5481b1ba6a83c8590ebcec024b911f5800982
--- /dev/null
+++ b/PROMPT.md
@@ -0,0 +1,571 @@
+You are an expert Python backend, ML, and infrastructure engineer.
+Your task is to implement a complete, production-ready OpenEnv environment called **PolypharmacyEnv** for training and evaluating agentic RL policies that act as an "elderly polypharmacy safety agent" (clinical pharmacist assistant).
+
+The deliverable MUST satisfy all of the following:
+- Fully compliant with the OpenEnv spec (typed models, `step()` / `reset()` / `state()`, `openenv.yaml`, HTTP server, Dockerfile).
+- Simulates a realistic healthcare workflow around elderly polypharmacy and dangerous drug combinations.
+- Defines at least **3 tasks** (easy → medium → hard) with deterministic agent graders producing scores in (0.0, 1.0).
+- Provides shaped rewards over the trajectory (not just sparse terminal rewards).
+- Includes a baseline LLM-based inference script `inference.py` in the repo root, following the evaluation requirements:
+  - Uses the OpenAI Python client.
+  - Reads `OPENAI_API_KEY`, `API_BASE_URL`, `MODEL_NAME`, and `HF_TOKEN` from the environment.
+  - Emits structured stdout logs in the exact `[START]`, `[STEP]`, `[END]` format from the OpenEnv sample inference script.
+- Is containerized and deployable as a **Hugging Face Space** tagged with `openenv` that responds to OpenEnv-style `reset` / `step` / `state` HTTP calls.
+
+Implement everything described below.
+
+=================================================
+1. Repository and folder structure
+=================================================
+
+Create a Python package repository with this structure (names are important unless clearly labeled as examples):
+
+- `openenv-polypharmacy/`
+  - `openenv.yaml`
+  - `README.md`
+  - `requirements.txt`
+  - `Dockerfile`
+  - `inference.py`                 # baseline LLM agent per spec
+  - `pyproject.toml` or `setup.cfg` (optional but recommended)
+  - `src/`
+    - `polypharmacy_env/`
+      - `__init__.py`
+      - `config.py`
+      - `models.py`                # Action, Observation, State, helper models
+      - `env_core.py`              # PolypharmacyEnv implementation
+      - `tasks.py`                 # task setup utilities
+      - `graders.py`               # deterministic graders for each task
+      - `rewards.py`               # reward shaping logic
+      - `data_loader.py`           # load/preprocess patient and lookup data
+      - `ddi_simulator.py`         # local DDI / guideline simulator
+      - `api/`
+        - `__init__.py`
+        - `schemas.py`            # HTTP request/response schemas
+        - `server.py`             # FastAPI app exposing OpenEnv endpoints
+      - `baselines/`
+        - `__init__.py`
+        - `heuristic_agent.py`    # simple rule-based baseline agent
+        - `random_agent.py`       # trivial random baseline (optional)
+      - `tests/`
+        - `__init__.py`
+        - `test_env_core.py`
+        - `test_api.py`
+  - `data/`
+    - `raw/`                      # placeholder for real/synthetic source data
+    - `processed/`
+    - `lookups/`
+      - `ddi_rules.csv`
+      - `beers_criteria.csv`
+      - `drug_metadata.csv`
+  - `scripts/`
+    - `preprocess_data.py`
+    - `run_validation.sh`         # optional; runs OpenEnv validator, tests, etc.
+
+Use Python 3.10+ with full type hints, and keep the code black/isort-compatible.
+
+=================================================
+2. Domain, data, and clinical abstraction
+=================================================
+
+2.1. Core scenario
+
+Model an elderly patient (age ≥ 65) with:
+- Demographics: age, sex.
+- Comorbidities: e.g., hypertension, diabetes, heart failure, CKD, dementia.
+- Basic labs: kidney function (eGFR category), liver function category.
+- A current medication list (polypharmacy, e.g., 3–15 drugs depending on task).
+
+Each **episode** is one medication-review session where the agent:
+- Observes patient info and current meds.
+- Optionally **queries** a DDI/guideline tool for specific drug pairs.
+- Proposes **interventions**:
+  - `stop`: discontinue a drug.
+  - `dose_reduce`: lower dose of a drug.
+  - `substitute`: swap to a safer alternative.
+  - `add_monitoring`: keep the drug but flag extra monitoring.
+- Calls `finish_review` when it decides the regimen is acceptable or budgets are exhausted.
+
+No external PHI, EHRs, or online APIs: all data is **synthetic** or de-identified and local to the container (CSV files).
+
+2.2. Data files and CSV schemas
+
+Implement local CSVs under `data/lookups/`:
+
+**`drug_metadata.csv`**
+- `drug_id` (string; unique key)
+- `generic_name` (string)
+- `atc_class` (string)
+- `is_high_risk_elderly` (0/1)
+- `default_dose_mg` (float)
+- `min_dose_mg` (float)
+- `max_dose_mg` (float)
+
+**`beers_criteria.csv`**
+- `drug_id` (string)
+- `criterion_type` (enum string: `avoid`, `caution`, `dose_adjust`, `avoid_in_condition`)
+- `condition` (nullable string; e.g., `CKD`, `dementia`)
+- `rationale` (brief text)
+
+**`ddi_rules.csv`**
+- `drug_id_1` (string; normalized so `drug_id_1 < drug_id_2` lexicographically)
+- `drug_id_2` (string)
+- `severity` (enum string: `mild`, `moderate`, `severe`)
+- `mechanism` (short text)
+- `recommendation` (enum string: `avoid_combination`, `monitor_closely`, `dose_adjust`, `no_action`)
+- `base_risk_score` (float in [0.0, 1.0])
+
+Implement a synthetic patient-episode dataset under `data/processed/`:
+
+**`patients_polypharmacy.csv`**
+- `episode_id` (string)
+- `age` (int)
+- `sex` (enum: `M`, `F`, `O`)
+- `conditions` (semicolon-separated; e.g., `HTN;DM;CKD`)
+- `eGFR_category` (enum: `normal`, `mild`, `moderate`, `severe`)
+- `liver_function_category` (enum: `normal`, `impaired`)
+- `medication_ids` (semicolon-separated list of `drug_id`)
+- `baseline_risk_score` (float in [0.0, 1.0])
+
+2.3. Preprocessing script
+
+In `scripts/preprocess_data.py`:
+- If real data is not provided, procedurally generate synthetic but plausible data using:
+  - Random combinations of conditions and drugs constrained by simple rules (e.g., CKD + renally-cleared drugs).
+  - Controlled distribution of high-risk DDIs and Beers violations.
+- Explicitly tag episodes as easy/medium/hard (e.g., via number of drugs, number/severity of DDIs, and number of Beers issues).
+- Save `patients_polypharmacy.csv` ready for the environment to consume.
+
+=================================================
+3. OpenEnv models and environment implementation
+=================================================
+
+3.1. Models
+
+In `models.py`, define dataclasses or Pydantic models that extend the appropriate OpenEnv base types (`Action`, `Observation`, `State`) and are JSON-compatible.
+
+Auxiliary models:
+
+**`MedicationEntry`**
+- `drug_id: str`
+- `generic_name: str`
+- `atc_class: str`
+- `dose_mg: float`
+- `frequency: str`          # e.g., `qd`, `bid`
+- `route: str`              # e.g., `po`
+- `is_high_risk_elderly: bool`
+- `beers_flags: list[str]`  # e.g., `["avoid", "dose_adjust_CKD"]`
+
+**`InteractionQueryRecord`**
+- `drug_id_1: str`
+- `drug_id_2: str`
+- `severity: str | None`
+- `recommendation: str | None`
+- `risk_score: float | None`
+- `step_index: int`
+
+**`InterventionRecord`**
+- `target_drug_id: str`
+- `action_type: Literal["stop", "dose_reduce", "substitute", "add_monitoring"]`
+- `proposed_new_drug_id: str | None`
+- `rationale: str`
+- `step_index: int`
+
+Core wire models:
+
+**`PolypharmacyObservation`** (extends OpenEnv `Observation`)
+- `episode_id: str`
+- `task_id: Literal["easy_screening", "budgeted_screening", "complex_tradeoff"]`
+- `age: int`
+- `sex: str`
+- `conditions: list[str]`
+- `eGFR_category: str`
+- `liver_function_category: str`
+- `current_medications: list[MedicationEntry]`
+- `interaction_queries: list[InteractionQueryRecord]`
+- `interventions: list[InterventionRecord]`
+- `step_index: int`
+- `remaining_query_budget: int`
+- `remaining_intervention_budget: int`
+- `shaped_reward: float`  # reward from last step
+- `done: bool`
+
+**`PolypharmacyAction`** (extends OpenEnv `Action`)
+- `action_type: Literal["query_ddi", "propose_intervention", "finish_review"]`
+- `drug_id_1: str | None`        # for DDI queries or some interventions
+- `drug_id_2: str | None`        # for DDI queries
+- `target_drug_id: str | None`   # for interventions
+- `intervention_type: Literal["stop", "dose_reduce", "substitute", "add_monitoring", "none"] | None`
+- `proposed_new_drug_id: str | None`
+- `rationale: str | None`
+
+**`PolypharmacyState`** (extends OpenEnv `State`)
+- `episode_id: str`
+- `task_id: str`
+- `step_count: int`
+- `max_steps: int`
+- `num_query_actions: int`
+- `num_interventions: int`
+
+3.2. Environment core
+
+In `env_core.py`, implement `PolypharmacyEnv` extending the appropriate OpenEnv environment base class. It must implement:
+
+**`reset(task_id: str | None = None) -> PolypharmacyObservation`**
+- If `task_id` is `None`, default to medium (`budgeted_screening`).
+- Sample an episode from `patients_polypharmacy.csv` filtered by difficulty.
+- Initialize:
+  - `episode_id`
+  - `step_count = 0`
+  - task-specific budgets (query, interventions, max_steps)
+  - baseline regime and risk
+  - empty `interaction_queries` and `interventions`
+- Return the initial `PolypharmacyObservation` with:
+  - `step_index = 0`
+  - `shaped_reward = 0.0`
+  - `done = False`
+
+**`step(action: PolypharmacyAction) -> dict`**
+- Validate the action; if invalid:
+  - Apply a negative reward.
+  - Do not modify regimen, but log error in `info`.
+- If `action_type == "query_ddi"`:
+  - If query budget exhausted, apply penalty and do not query.
+  - Else:
+    - Use `ddi_simulator.lookup_ddi(drug_id_1, drug_id_2)` to get severity, recommendation, base_risk_score.
+    - Append an `InteractionQueryRecord`.
+    - Apply a small negative reward for query cost.
+- If `action_type == "propose_intervention"`:
+  - If intervention budget exhausted, apply penalty and ignore change.
+  - Else:
+    - Update `current_medications` according to `intervention_type`:
+      - `stop`: remove medication.
+      - `dose_reduce`: adjust dose downward within [min_dose_mg, default_dose_mg].
+      - `substitute`: replace with a safer alternative from same `atc_class`.
+      - `add_monitoring`: keep drug but tag in internal state.
+    - Append an `InterventionRecord`.
+    - Recompute current regimen risk using the risk model (see 3.3).
+    - Compute shaped reward = (previous_risk - new_risk) - small intervention cost.
+- If `action_type == "finish_review"`:
+  - Mark `done = True`.
+  - Call the task’s grader to get episode-level score in [0.0, 1.0].
+  - Add this as a terminal bonus to the current step reward.
+
+- In all cases:
+  - Increment `step_count`.
+  - Check `max_steps`; if exceeded, auto-terminate:
+    - `done = True`
+    - apply time-out penalty
+    - call grader with current trajectory for a final score if appropriate.
+  - Construct next `PolypharmacyObservation` with updated fields.
+  - Return a dict:
+    - `observation`: `PolypharmacyObservation`
+    - `reward`: float shaped reward for this step
+    - `done`: bool
+    - `info`: dict with fields like `current_risk`, `baseline_risk`, `grader_score_if_terminal`, and debug flags.
+
+**`state` property**
+- Returns `PolypharmacyState` reflecting the current internal state.
+
+3.3. DDI simulator and risk model
+
+In `ddi_simulator.py`:
+- Load `ddi_rules.csv` once via `data_loader`.
+- Implement `lookup_ddi(drug_id_1, drug_id_2) -> tuple[severity, recommendation, base_risk_score]`:
+  - Normalize the pair ordering.
+  - Look up row; if missing, return:
+    - severity = `"none"`
+    - recommendation = `"no_action"`
+    - base_risk_score = 0.0
+
+In `rewards.py` (or a dedicated module), implement:
+- `compute_regimen_risk(current_drug_ids, patient_context, ddi_rules, beers_rules, drug_metadata) -> float`
+  - Aggregate contributions from:
+    - Beers violations (weighted by `criterion_type` and relevant conditions).
+    - DDI base risk scores for all present drug pairs.
+    - High-risk elderly drugs.
+  - Normalize and clip to [0.0, 1.0].
+
+Use this function to compute:
+- `baseline_risk` at episode start.
+- Risk after each intervention step.
+
+Also implement:
+- `compute_shaped_reward(previous_risk, new_risk, action, context, partial_metrics) -> float`
+  - Positive component: `previous_risk - new_risk`.
+  - Negative components: per-query cost, per-intervention cost, invalid-action penalty, time-out penalty.
+
+=================================================
+4. Tasks and graders (3 difficulty levels)
+=================================================
+
+Define three task IDs and semantics in `tasks.py` and `graders.py`:
+
+Task IDs:
+- `easy_screening`
+- `budgeted_screening`
+- `complex_tradeoff`
+
+4.1. `easy_screening` (easy)
+
+- Small regimen: 3–5 drugs.
+- Exactly one **severe** DDI pair and possibly one simple Beers violation.
+- Budgets:
+  - query_budget ≈ 4
+  - intervention_budget ≈ 2
+  - max_steps ≈ 10
+
+Grader:
+- Input: full trajectory, baseline risk, final risk, list of interventions.
+- Compute:
+  - `risk_reduction = max(0.0, baseline_risk - final_risk) / max(baseline_risk, ε)` (normalized).
+  - `targeted_intervention_flag = 1.0` if at least one intervention affects one of the drugs in the known severe DDI pair, else 0.0.
+- Score:
+  - `score = 0.5 * risk_reduction + 0.5 * targeted_intervention_flag`
+  - Clip to [0.0, 1.0].
+
+4.2. `budgeted_screening` (medium)
+
+- Medium regimen: 6–10 drugs.
+- Multiple DDIs (mild/moderate/severe) and multiple Beers issues.
+- Budgets:
+  - query_budget ≈ 8
+  - intervention_budget ≈ 3
+  - max_steps ≈ 20
+
+Grader:
+- Compute:
+  - `risk_reduction_score` as normalized risk drop.
+  - `intervention_precision_score` = fraction of interventions that actually reduce risk or fix guideline violations.
+  - `query_efficiency_score` = (number of severe/moderate DDIs discovered) / (number of queries used), normalized.
+- Weighted score, for example:
+  - `score = 0.5 * risk_reduction_score + 0.3 * intervention_precision_score + 0.2 * query_efficiency_score`
+  - Clip to [0.0, 1.0].
+
+4.3. `complex_tradeoff` (hard)
+
+- Larger regimen: 10–15 drugs.
+- Some drugs are **clinically critical** (e.g., anticoagulants, insulin analogues) and encoded as such in `drug_metadata` or a small internal map.
+- Episodes contain:
+  - multiple DDIs and Beers issues, including ones involving critical drugs.
+  - safer substitutes for some risky drugs.
+
+Budgets:
+- query_budget ≈ 12
+- intervention_budget ≈ 5
+- max_steps ≈ 30
+
+Grader adds a **regimen disruption penalty** component:
+- Metrics:
+  - `risk_reduction_score` (as above).
+  - `critical_drug_penalty` = penalty if a critical drug is stopped without substitution to another suitable agent.
+  - `total_drug_changes` = number of drugs stopped or substituted.
+  - `regimen_disruption_penalty` derived from `total_drug_changes` and `critical_drug_penalty`.
+
+Example scoring:
+- `base = risk_reduction_score`
+- `penalty = α * regimen_disruption_penalty`
+- `score = clamp(base - penalty, 0.0, 1.0)`
+
+4.4. Reward shaping
+
+In `rewards.py`, define a consistent shaping scheme:
+- On each query:
+  - Small negative reward (e.g., −0.01) plus any small bonus if it discovers a severe DDI, if desired.
+- On each intervention:
+  - Reward ≈ (previous_risk - new_risk) − small intervention cost.
+- On invalid actions:
+  - Larger negative reward (e.g., −0.1) and no state change.
+- On `finish_review`:
+  - Add the task-level `score` ∈ [0.0, 1.0] from the corresponding grader to that step’s shaped reward.
+
+Ensure the sum of step rewards per episode remains in a reasonable numeric range (e.g., roughly -5 to +5) while still allowing meaningful differentiation by graders.
+
+=================================================
+5. HTTP API server and openenv.yaml
+=================================================
+
+5.1. HTTP server (FastAPI)
+
+In `api/server.py`:
+- Implement a FastAPI app that maintains a `PolypharmacyEnv` instance (or a multiplexing scheme if needed).
+- Endpoints:
+  - `POST /reset`:
+    - Request body: may include `task_id` (string).
+    - Response: serialized `PolypharmacyObservation`.
+  - `POST /step`:
+    - Request body: serialized `PolypharmacyAction`.
+    - Response: dict with:
+      - `observation`: `PolypharmacyObservation`
+      - `reward`: float
+      - `done`: bool
+      - `info`: dict
+  - `GET /state`:
+    - Response: `PolypharmacyState`.
+
+Provide a module-level `app = FastAPI(...)` object for use with uvicorn and Hugging Face Spaces. Ensure the JSON schema is consistent with OpenEnv clients (simple, flat JSON for observation/action/state).
+
+5.2. `openenv.yaml`
+
+At repo root, define `openenv.yaml` consistent with the latest OpenEnv spec. At minimum, include:
+- `name`: `polypharmacy_env`
+- `version`: e.g., `0.1.0`
+- `description`: human-readable description.
+- `author`: your details.
+- `tags`: e.g., `["healthcare", "polypharmacy", "openenv"]`
+- `tasks`:
+  - One entry per task:
+    - `id`: `"easy_screening"` / `"budgeted_screening"` / `"complex_tradeoff"`
+    - `description`: one-line description
+    - `difficulty`: `"easy"`, `"medium"`, `"hard"`
+
+Ensure `openenv validate` (or equivalent validator) passes once implemented.
+
+=================================================
+6. Baseline heuristic (non-LLM) agent
+=================================================
+
+In `baselines/heuristic_agent.py`, implement a simple, deterministic baseline agent that:
+
+For each episode:
+- Iterates through all unordered medication pairs within query budget:
+  - Calls `query_ddi` via the environment for each pair until the query budget is exhausted or all pairs are examined.
+  - Records severe and moderate interactions.
+- After querying:
+  - For each severe DDI pair:
+    - Try `substitute` one of the drugs using `drug_metadata`:
+      - Prefer substitute within same `atc_class` that:
+        - is not marked high-risk elderly.
+        - does not participate in known severe DDIs with the rest of the regimen.
+    - If no substitute exists, propose `stop` for the higher-risk drug.
+  - Respect intervention budget limits.
+- Finally, call `finish_review`.
+
+This baseline should be callable as a simple Python function that interacts with `PolypharmacyEnv` directly (without HTTP).
+
+=================================================
+7. Baseline LLM inference script (inference.py)
+=================================================
+
+At repo root, create `inference.py` that:
+
+7.1. Uses the OpenAI Python client
+
+- Import and configure the official OpenAI Python client.
+- Read environment variables:
+  - `OPENAI_API_KEY` (required).
+  - `API_BASE_URL` (base URL for LLM; default to OpenAI standard if not set).
+  - `MODEL_NAME` (e.g., `gpt-4.1` or similar).
+  - `HF_TOKEN` (if needed for HF auth; do not hardcode).
+- Read `POLYPHARMACY_ENV_URL` (or similar) for the environment’s HTTP base URL.
+
+7.2. Implements the required logging format
+
+- For each **run** across all tasks:
+  - Emit a `[START]` line with a JSON payload exactly matching the evaluation specification:
+    - Fields such as `run_id`, `task_id`, `model`, etc., in the same order and naming as the sample OpenEnv inference script.
+- For each **step** in an episode:
+  - Emit a `[STEP]` line with JSON fields including:
+    - `run_id`
+    - `task_id`
+    - `episode_id`
+    - `step_index`
+    - `observation_summary` (brief, machine-readable summary)
+    - `action_payload` (the action sent to the env)
+    - `reward`
+    - `done`
+- After finishing an episode for a task:
+  - Emit an `[END]` line summarizing:
+    - `run_id`
+    - `task_id`
+    - per-episode statistics (e.g., total reward, grader score from last step’s `info`).
+- The stdout format MUST follow the sample exactly:
+  - Same tags: `[START]`, `[STEP]`, `[END]`.
+  - Same JSON field names and ordering as the provided reference.
+  - No extra prints except these structured logs (and necessary error messages to stderr).
+
+7.3. LLM agent loop
+
+- For each task (`easy_screening`, `budgeted_screening`, `complex_tradeoff`):
+  - Run a fixed small number of episodes (e.g., 5–10 per task) for baseline scoring.
+  - For each episode:
+    - Call `/reset` with the task id.
+    - At each step:
+      - Summarize the observation into a concise prompt for the LLM:
+        - Include age, sex, conditions, high-risk flags, budgets, and a compressed view of meds and previous actions.
+      - Ask the model to output a **strict JSON** representing `PolypharmacyAction` fields.
+      - Parse and validate the JSON; if invalid, fall back to a safe default (e.g., `finish_review` or a no-op) and penalize in evaluation.
+      - Send this action to `/step` and log `[STEP]`.
+    - End when `done=True` or max_steps is reached.
+- At the end, print aggregate scores per task and overall.
+
+Make sure runtime < 20 minutes and that the script can run within 2 vCPUs and 8 GB RAM.
+
+=================================================
+8. Dockerfile and Hugging Face Space
+=================================================
+
+8.1. Dockerfile
+
+Create a `Dockerfile` that:
+- Starts from a slim Python image (e.g., `python:3.11-slim`).
+- Installs system dependencies as needed (e.g., `build-essential`, `curl`).
+- Copies the project into the container.
+- Installs Python dependencies from `requirements.txt`.
+- Sets appropriate environment variables for the app (e.g., `PORT=7860`).
+- Exposes port 7860.
+- Uses a `CMD` or `ENTRYPOINT` that runs the FastAPI server, for example:
+  - `uvicorn polypharmacy_env.api.server:app --host 0.0.0.0 --port 7860`
+
+8.2. Hugging Face Space
+
+Ensure the repository is ready to be used as a Hugging Face Space:
+- Space type: `docker`.
+- Tag: `openenv`.
+- On container start, the server must listen on the correct port and respond to:
+  - `POST /reset`
+  - `POST /step`
+  - `GET /state`
+- The environment must start cleanly with `docker build` + `docker run` locally.
+
+=================================================
+9. README and documentation
+=================================================
+
+In `README.md`, include:
+
+- **Environment description & motivation**:
+  - What PolypharmacyEnv simulates.
+  - Why elderly polypharmacy safety matters.
+- **Action and observation spaces**:
+  - Describe `PolypharmacyAction`, `PolypharmacyObservation`, and `PolypharmacyState` fields and semantics.
+- **Task descriptions**:
+  - `easy_screening`, `budgeted_screening`, `complex_tradeoff`, their difficulty and goals.
+- **Reward structure**:
+  - Summarize shaping and terminal rewards.
+- **Setup & usage**:
+  - How to install dependencies.
+  - How to run the API server locally (uvicorn command).
+  - How to run the heuristic baseline.
+  - How to run `inference.py` with environment variables.
+- **Baseline scores**:
+  - Document reproducible baseline scores for each task (heuristic agent, and LLM baseline if available).
+
+=================================================
+10. Validation and quality gates
+=================================================
+
+- Ensure:
+  - `openenv.yaml` and the HTTP server pass the OpenEnv validation script.
+  - `docker build` and `docker run` work without errors.
+  - `inference.py` completes under 20 minutes, within 2 vCPUs / 8 GB RAM.
+  - All graders:
+    - Are deterministic.
+    - Return scores strictly in [0.0, 1.0].
+  - No grader returns a constant score irrespective of behavior.
+
+Aim for clean, well-structured, well-documented code with clear separation of concerns between:
+- Data loading,
+- Environment state & dynamics,
+- Reward/grade logic,
+- HTTP serving,
+- Baseline agents and inference.
\ No newline at end of file
diff --git a/README.MD b/README.MD
index dddbf4047eacd2e74adc50df3b8cf73f80f162f9..3bc9290d48e3b9b05d31a2d253526c654735804e 100644
--- a/README.MD
+++ b/README.MD
@@ -1,3 +1,12 @@
+---
+title: Polypharmacy
+emoji: 📉
+colorFrom: yellow
+colorTo: blue
+sdk: docker
+pinned: false
+---
+
 # PolypharmacyEnv
 
 Monorepo for an OpenEnv-compatible medication safety environment with:
@@ -12,8 +21,7 @@ Monorepo for an OpenEnv-compatible medication safety environment with:
 ## Repository Structure
 
 ```text
-openenv-polypharmacy/
-  backend/
+backend/
     main.py                      # ASGI entrypoint (uvicorn target)
     requirements.txt             # Backend dependencies
     Dockerfile                   # Backend container
@@ -32,22 +40,22 @@ openenv-polypharmacy/
       graders.py                 # Task graders
       tasks.py                   # Task/episode selection
       tests/                     # Backend tests
-  frontend/
+frontend/
     src/                         # React UI code
     package.json
     Dockerfile                   # Frontend container
-  data/
+data/
     lookups/                     # drug_metadata.csv, ddi_rules.csv, beers_criteria.csv
     processed/                   # patients_polypharmacy.csv
-  scripts/
+scripts/
     preprocess_data.py           # Synthetic data generation
     dev_backend.sh               # Local backend run helper
     dev_frontend.sh              # Local frontend run helper
     run_validation.sh            # Tests + baseline validation
-  docker-compose.yml             # Full stack orchestration
-  openenv.yaml                   # OpenEnv manifest
-  inference.py                   # Optional CLI inference baseline
-  .env.example                   # Environment template
+docker-compose.yml             # Full stack orchestration
+openenv.yaml                   # OpenEnv manifest
+inference.py                   # Baseline inference script (required at root)
+.env.example                   # Environment template
 ```
 
 ---
@@ -85,11 +93,7 @@ Create `.env`:
 cp .env.example .env
 ```
 
-Set values:
-
-- `GROQ_API_KEY=...` (required)
-- `GROQ_BASE_URL=https://api.groq.com/openai/v1` (recommended)
-- `GROQ_MODEL_NAME=llama-3.3-70b-versatile` (recommended)
+Set values for local backend integrations as needed.
 
 ---
 
@@ -173,9 +177,9 @@ This repo now includes a **root `Dockerfile`** that builds frontend + backend in
 
 In Space Settings -> Variables and Secrets:
 
-- Secret: `GROQ_API_KEY`
-- Variable: `GROQ_BASE_URL=https://api.groq.com/openai/v1`
-- Variable: `GROQ_MODEL_NAME=llama-3.3-70b-versatile`
+- Secret: `HF_TOKEN`
+- Variable: `API_BASE_URL=https://router.huggingface.co/v1`
+- Variable: `MODEL_NAME=Qwen/Qwen2.5-72B-Instruct`
 
 ### 3) Push this repository to the Space
 
@@ -225,6 +229,13 @@ Or run validation script:
 ./scripts/run_validation.sh
 ```
 
+### Submission validation
+
+```bash
+openenv validate
+python inference.py
+```
+
 ---
 
 ## Notes
diff --git a/openenv-polypharmacy/backend/Dockerfile b/backend/Dockerfile
similarity index 100%
rename from openenv-polypharmacy/backend/Dockerfile
rename to backend/Dockerfile
diff --git a/openenv-polypharmacy/backend/__init__.py b/backend/__init__.py
similarity index 100%
rename from openenv-polypharmacy/backend/__init__.py
rename to backend/__init__.py
diff --git a/openenv-polypharmacy/backend/main.py b/backend/main.py
similarity index 100%
rename from openenv-polypharmacy/backend/main.py
rename to backend/main.py
diff --git a/openenv-polypharmacy/backend/requirements.txt b/backend/requirements.txt
similarity index 100%
rename from openenv-polypharmacy/backend/requirements.txt
rename to backend/requirements.txt
diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/__init__.py b/backend/src/polypharmacy_env/__init__.py
similarity index 100%
rename from openenv-polypharmacy/backend/src/polypharmacy_env/__init__.py
rename to backend/src/polypharmacy_env/__init__.py
diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/api/__init__.py b/backend/src/polypharmacy_env/api/__init__.py
similarity index 100%
rename from openenv-polypharmacy/backend/src/polypharmacy_env/api/__init__.py
rename to backend/src/polypharmacy_env/api/__init__.py
diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/api/app.py b/backend/src/polypharmacy_env/api/app.py
similarity index 100%
rename from openenv-polypharmacy/backend/src/polypharmacy_env/api/app.py
rename to backend/src/polypharmacy_env/api/app.py
diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/api/routes/__init__.py b/backend/src/polypharmacy_env/api/routes/__init__.py
similarity index 100%
rename from openenv-polypharmacy/backend/src/polypharmacy_env/api/routes/__init__.py
rename to backend/src/polypharmacy_env/api/routes/__init__.py
diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/api/routes/agent.py b/backend/src/polypharmacy_env/api/routes/agent.py
similarity index 100%
rename from openenv-polypharmacy/backend/src/polypharmacy_env/api/routes/agent.py
rename to backend/src/polypharmacy_env/api/routes/agent.py
diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/api/server.py b/backend/src/polypharmacy_env/api/server.py
similarity index 100%
rename from openenv-polypharmacy/backend/src/polypharmacy_env/api/server.py
rename to backend/src/polypharmacy_env/api/server.py
diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/baselines/__init__.py b/backend/src/polypharmacy_env/baselines/__init__.py
similarity index 100%
rename from openenv-polypharmacy/backend/src/polypharmacy_env/baselines/__init__.py
rename to backend/src/polypharmacy_env/baselines/__init__.py
diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/baselines/heuristic_agent.py b/backend/src/polypharmacy_env/baselines/heuristic_agent.py
similarity index 100%
rename from openenv-polypharmacy/backend/src/polypharmacy_env/baselines/heuristic_agent.py
rename to backend/src/polypharmacy_env/baselines/heuristic_agent.py
diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/baselines/random_agent.py b/backend/src/polypharmacy_env/baselines/random_agent.py
similarity index 100%
rename from openenv-polypharmacy/backend/src/polypharmacy_env/baselines/random_agent.py
rename to backend/src/polypharmacy_env/baselines/random_agent.py
diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/client.py b/backend/src/polypharmacy_env/client.py
similarity index 100%
rename from openenv-polypharmacy/backend/src/polypharmacy_env/client.py
rename to backend/src/polypharmacy_env/client.py
diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/config.py b/backend/src/polypharmacy_env/config.py
similarity index 100%
rename from openenv-polypharmacy/backend/src/polypharmacy_env/config.py
rename to backend/src/polypharmacy_env/config.py
diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/data_loader.py b/backend/src/polypharmacy_env/data_loader.py
similarity index 100%
rename from openenv-polypharmacy/backend/src/polypharmacy_env/data_loader.py
rename to backend/src/polypharmacy_env/data_loader.py
diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/ddi_simulator.py b/backend/src/polypharmacy_env/ddi_simulator.py
similarity index 100%
rename from openenv-polypharmacy/backend/src/polypharmacy_env/ddi_simulator.py
rename to backend/src/polypharmacy_env/ddi_simulator.py
diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/env_core.py b/backend/src/polypharmacy_env/env_core.py
similarity index 100%
rename from openenv-polypharmacy/backend/src/polypharmacy_env/env_core.py
rename to backend/src/polypharmacy_env/env_core.py
diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/graders.py b/backend/src/polypharmacy_env/graders.py
similarity index 100%
rename from openenv-polypharmacy/backend/src/polypharmacy_env/graders.py
rename to backend/src/polypharmacy_env/graders.py
diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/models.py b/backend/src/polypharmacy_env/models.py
similarity index 100%
rename from openenv-polypharmacy/backend/src/polypharmacy_env/models.py
rename to backend/src/polypharmacy_env/models.py
diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/rewards.py b/backend/src/polypharmacy_env/rewards.py
similarity index 100%
rename from openenv-polypharmacy/backend/src/polypharmacy_env/rewards.py
rename to backend/src/polypharmacy_env/rewards.py
diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/services/__init__.py b/backend/src/polypharmacy_env/services/__init__.py
similarity index 100%
rename from openenv-polypharmacy/backend/src/polypharmacy_env/services/__init__.py
rename to backend/src/polypharmacy_env/services/__init__.py
diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/services/groq_agent.py b/backend/src/polypharmacy_env/services/groq_agent.py
similarity index 100%
rename from openenv-polypharmacy/backend/src/polypharmacy_env/services/groq_agent.py
rename to backend/src/polypharmacy_env/services/groq_agent.py
diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/tasks.py b/backend/src/polypharmacy_env/tasks.py
similarity index 100%
rename from openenv-polypharmacy/backend/src/polypharmacy_env/tasks.py
rename to backend/src/polypharmacy_env/tasks.py
diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/tests/__init__.py b/backend/src/polypharmacy_env/tests/__init__.py
similarity index 100%
rename from openenv-polypharmacy/backend/src/polypharmacy_env/tests/__init__.py
rename to backend/src/polypharmacy_env/tests/__init__.py
diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/tests/test_api.py b/backend/src/polypharmacy_env/tests/test_api.py
similarity index 100%
rename from openenv-polypharmacy/backend/src/polypharmacy_env/tests/test_api.py
rename to backend/src/polypharmacy_env/tests/test_api.py
diff --git a/openenv-polypharmacy/backend/src/polypharmacy_env/tests/test_env_core.py b/backend/src/polypharmacy_env/tests/test_env_core.py
similarity index 100%
rename from openenv-polypharmacy/backend/src/polypharmacy_env/tests/test_env_core.py
rename to backend/src/polypharmacy_env/tests/test_env_core.py
diff --git a/openenv-polypharmacy/data/lookups/beers_criteria.csv b/data/lookups/beers_criteria.csv
similarity index 100%
rename from openenv-polypharmacy/data/lookups/beers_criteria.csv
rename to data/lookups/beers_criteria.csv
diff --git a/openenv-polypharmacy/data/lookups/ddi_rules.csv b/data/lookups/ddi_rules.csv
similarity index 100%
rename from openenv-polypharmacy/data/lookups/ddi_rules.csv
rename to data/lookups/ddi_rules.csv
diff --git a/openenv-polypharmacy/data/lookups/drug_metadata.csv b/data/lookups/drug_metadata.csv
similarity index 100%
rename from openenv-polypharmacy/data/lookups/drug_metadata.csv
rename to data/lookups/drug_metadata.csv
diff --git a/openenv-polypharmacy/data/processed/patients_polypharmacy.csv b/data/processed/patients_polypharmacy.csv
similarity index 100%
rename from openenv-polypharmacy/data/processed/patients_polypharmacy.csv
rename to data/processed/patients_polypharmacy.csv
diff --git a/openenv-polypharmacy/docker-compose.yml b/docker-compose.yml
similarity index 100%
rename from openenv-polypharmacy/docker-compose.yml
rename to docker-compose.yml
diff --git a/openenv-polypharmacy/frontend/Dockerfile b/frontend/Dockerfile
similarity index 100%
rename from openenv-polypharmacy/frontend/Dockerfile
rename to frontend/Dockerfile
diff --git a/openenv-polypharmacy/frontend/index.html b/frontend/index.html
similarity index 100%
rename from openenv-polypharmacy/frontend/index.html
rename to frontend/index.html
diff --git a/openenv-polypharmacy/frontend/package-lock.json b/frontend/package-lock.json
similarity index 100%
rename from openenv-polypharmacy/frontend/package-lock.json
rename to frontend/package-lock.json
diff --git a/openenv-polypharmacy/frontend/package.json b/frontend/package.json
similarity index 100%
rename from openenv-polypharmacy/frontend/package.json
rename to frontend/package.json
diff --git a/openenv-polypharmacy/frontend/src/App.jsx b/frontend/src/App.jsx
similarity index 100%
rename from openenv-polypharmacy/frontend/src/App.jsx
rename to frontend/src/App.jsx
diff --git a/openenv-polypharmacy/frontend/src/main.jsx b/frontend/src/main.jsx
similarity index 100%
rename from openenv-polypharmacy/frontend/src/main.jsx
rename to frontend/src/main.jsx
diff --git a/openenv-polypharmacy/frontend/src/styles.css b/frontend/src/styles.css
similarity index 100%
rename from openenv-polypharmacy/frontend/src/styles.css
rename to frontend/src/styles.css
diff --git a/openenv-polypharmacy/frontend/vite.config.js b/frontend/vite.config.js
similarity index 100%
rename from openenv-polypharmacy/frontend/vite.config.js
rename to frontend/vite.config.js
diff --git a/inference.py b/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..a8a00389ea7c79f9c7c3e1ec5c0f21eda89ceaf2
--- /dev/null
+++ b/inference.py
@@ -0,0 +1,188 @@
+#!/usr/bin/env python3
+"""Submission inference script for Polypharmacy OpenEnv environment.
+
+Required environment variables:
+  API_BASE_URL   OpenAI-compatible base URL
+  MODEL_NAME     Model identifier
+  HF_TOKEN       API key/token
+
+Optional:
+  POLYPHARMACY_ENV_URL  Environment API base (default: http://localhost:7860)
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import re
+from typing import Any, Dict, List
+
+import requests
+from openai import OpenAI
+
+API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
+MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
+HF_TOKEN = os.getenv("HF_TOKEN", "")
+ENV_URL = os.getenv("POLYPHARMACY_ENV_URL", "http://localhost:7860").rstrip("/")
+
+BENCHMARK = "polypharmacy_env"
+TASKS = ["easy_screening", "budgeted_screening", "complex_tradeoff"]
+MAX_STEPS = 16
+TEMPERATURE = 0.0
+MAX_TOKENS = 220
+
+SYSTEM_PROMPT = (
+    "You are a clinical-pharmacist agent. "
+    "Return one JSON action only with keys matching this schema: "
+    '{"action_type":"query_ddi|propose_intervention|finish_review",'
+    '"drug_id_1":"", "drug_id_2":"", "target_drug_id":"",'
+    '"intervention_type":"stop|dose_reduce|substitute|add_monitoring",'
+    '"proposed_new_drug_id":"", "rationale":""}. '
+    "Prefer safe, high-impact actions and finish when useful actions are exhausted."
+)
+
+
+def _b(v: bool) -> str:
+    return str(bool(v)).lower()
+
+
+def _fmt_reward(v: float) -> str:
+    return f"{float(v):.2f}"
+
+
+def _clamp01(v: float) -> float:
+    return max(0.0, min(1.0, float(v)))
+
+
+def log_start(task: str) -> None:
+    print(f"[START] task={task} env={BENCHMARK} model={MODEL_NAME}", flush=True)
+
+
+def log_step(step: int, action_str: str, reward: float, done: bool, error: str | None) -> None:
+    err = error if error else "null"
+    print(
+        f"[STEP] step={step} action={action_str} reward={_fmt_reward(reward)} "
+        f"done={_b(done)} error={err}",
+        flush=True,
+    )
+
+
+def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
+    rewards_str = ",".join(_fmt_reward(r) for r in rewards)
+    print(
+        f"[END] success={_b(success)} steps={steps} score={_clamp01(score):.3f} rewards={rewards_str}",
+        flush=True,
+    )
+
+
+def _safe_json(text: str) -> Dict[str, Any]:
+    text = text.strip()
+    if text.startswith("```"):
+        text = re.sub(r"^```[a-zA-Z]*\n?", "", text)
+        text = text.replace("```", "").strip()
+    try:
+        data = json.loads(text)
+        if isinstance(data, dict):
+            return data
+    except Exception:
+        pass
+    return {"action_type": "finish_review"}
+
+
+def _llm_action(client: OpenAI, obs: Dict[str, Any]) -> Dict[str, Any]:
+    meds = obs.get("current_medications", [])
+    summary = {
+        "step_index": obs.get("step_index", 0),
+        "remaining_query_budget": obs.get("remaining_query_budget", 0),
+        "remaining_intervention_budget": obs.get("remaining_intervention_budget", 0),
+        "conditions": obs.get("conditions", []),
+        "current_medications": [
+            {
+                "drug_id": m.get("drug_id"),
+                "generic_name": m.get("generic_name"),
+                "dose_mg": m.get("dose_mg"),
+                "beers_flags": m.get("beers_flags", []),
+            }
+            for m in meds
+        ],
+        "interaction_queries": obs.get("interaction_queries", []),
+        "interventions": obs.get("interventions", []),
+    }
+    resp = client.chat.completions.create(
+        model=MODEL_NAME,
+        temperature=TEMPERATURE,
+        max_tokens=MAX_TOKENS,
+        messages=[
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": json.dumps(summary, separators=(",", ":"))},
+        ],
+    )
+    content = (resp.choices[0].message.content or "").strip()
+    return _safe_json(content)
+
+
+def _reset(task_id: str) -> Dict[str, Any]:
+    r = requests.post(f"{ENV_URL}/reset", json={"task_id": task_id}, timeout=45)
+    r.raise_for_status()
+    return r.json()
+
+
+def _step(action: Dict[str, Any]) -> Dict[str, Any]:
+    r = requests.post(f"{ENV_URL}/step", json={"action": action}, timeout=45)
+    r.raise_for_status()
+    return r.json()
+
+
+def run_task(client: OpenAI, task_id: str) -> None:
+    rewards: List[float] = []
+    steps = 0
+    success = False
+    score = 0.0
+    log_start(task_id)
+    try:
+        reset_payload = _reset(task_id)
+        obs = reset_payload.get("observation", {})
+        done = bool(reset_payload.get("done", False))
+
+        for i in range(1, MAX_STEPS + 1):
+            if done:
+                break
+            action = _llm_action(client, obs)
+            action_str = json.dumps(action, separators=(",", ":"))
+            step_payload = _step(action)
+            obs = step_payload.get("observation", {})
+            reward = float(step_payload.get("reward") or 0.0)
+            done = bool(step_payload.get("done", False))
+            metadata = (obs or {}).get("metadata", {}) or {}
+            last_error = metadata.get("error")
+            rewards.append(reward)
+            steps = i
+            log_step(i, action_str, reward, done, str(last_error) if last_error else None)
+
+            if done:
+                raw_score = metadata.get("grader_score", None)
+                if raw_score is not None:
+                    score = _clamp01(float(raw_score))
+                else:
+                    score = _clamp01(sum(max(0.0, r) for r in rewards) / max(len(rewards), 1))
+                success = score > 0.0
+                break
+    except Exception:
+        # Still emit END to keep evaluator parser stable.
+        success = False
+    finally:
+        log_end(success=success, steps=steps, score=score, rewards=rewards)
+
+
+def main() -> int:
+    if not HF_TOKEN:
+        print("HF_TOKEN is required", flush=True)
+        return 1
+    client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
+    for task in TASKS:
+        run_task(client, task)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/openenv-polypharmacy/.dockerignore b/openenv-polypharmacy/.dockerignore
deleted file mode 100644
index 5007867e3a3b2c5514c4ff5bb18588b36951901f..0000000000000000000000000000000000000000
--- a/openenv-polypharmacy/.dockerignore
+++ /dev/null
@@ -1,8 +0,0 @@
-.git
-.gitignore
-**/__pycache__/
-**/.pytest_cache/
-**/.DS_Store
-.env
-frontend/node_modules
-frontend/dist
diff --git a/openenv-polypharmacy/Dockerfile b/openenv-polypharmacy/Dockerfile
deleted file mode 100644
index 68b69d986a780501f6c9461410b2add26413473e..0000000000000000000000000000000000000000
--- a/openenv-polypharmacy/Dockerfile
+++ /dev/null
@@ -1,39 +0,0 @@
-FROM node:20-alpine AS frontend-builder
-WORKDIR /app/frontend
-COPY frontend/package*.json ./
-RUN npm ci
-COPY frontend/ ./
-RUN npm run build
-
-FROM python:3.11-slim
-
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends build-essential curl && \
-    rm -rf /var/lib/apt/lists/*
-
-WORKDIR /app
-
-COPY backend/requirements.txt /app/backend/requirements.txt
-RUN pip install --no-cache-dir -r /app/backend/requirements.txt
-
-COPY backend /app/backend
-COPY data /app/data
-COPY scripts /app/scripts
-COPY openenv.yaml /app/openenv.yaml
-COPY .env.example /app/.env.example
-COPY inference.py /app/inference.py
-
-COPY --from=frontend-builder /app/frontend/dist /app/frontend/dist
-
-RUN python3 /app/scripts/preprocess_data.py
-
-ENV PORT=7860
-ENV PYTHONPATH="/app/backend/src:${PYTHONPATH}"
-ENV PYTHONUNBUFFERED=1
-
-EXPOSE 7860
-
-HEALTHCHECK --interval=30s --timeout=3s --start-period=15s --retries=3 \
-    CMD curl -f http://localhost:7860/health || exit 1
-
-CMD ["sh", "-c", "uvicorn backend.main:app --host 0.0.0.0 --port ${PORT:-7860}"]
diff --git a/openenv-polypharmacy/inference.py b/openenv-polypharmacy/inference.py
deleted file mode 100644
index a6809184af7dbc299ba4b8799eff00636647fb81..0000000000000000000000000000000000000000
--- a/openenv-polypharmacy/inference.py
+++ /dev/null
@@ -1,210 +0,0 @@
-#!/usr/bin/env python3
-"""Baseline LLM inference script for the PolypharmacyEnv.
-
-Uses Groq's OpenAI-compatible Chat Completions API to drive an LLM agent through the
-PolypharmacyEnv HTTP API.  Emits structured stdout logs in the
-[START], [STEP], [END] format required by the OpenEnv evaluation spec.
-
-Environment variables:
-  GROQ_API_KEY          – required
-  GROQ_BASE_URL         – optional (default: https://api.groq.com/openai/v1)
-  GROQ_MODEL_NAME       – model to use (default: llama-3.1-8b-instant)
-  POLYPHARMACY_ENV_URL  – environment HTTP base URL (default: http://localhost:7860)
-"""
-
-from __future__ import annotations
-
-import json
-import os
-import sys
-import uuid
-from typing import Any, Dict, List
-
-import requests
-from openai import OpenAI
-
-# ── Configuration ────────────────────────────────────────────────────────────
-
-MODEL = os.environ.get("GROQ_MODEL_NAME", "llama-3.1-8b-instant")
-API_KEY = os.environ.get("GROQ_API_KEY", "")
-API_BASE = os.environ.get("GROQ_BASE_URL", "https://api.groq.com/openai/v1")
-ENV_URL = os.environ.get("POLYPHARMACY_ENV_URL", "http://localhost:7860")
-
-TASKS = ["easy_screening", "budgeted_screening", "complex_tradeoff"]
-EPISODES_PER_TASK = 5
-
-client = OpenAI(api_key=API_KEY, base_url=API_BASE)
-
-# ── Logging helpers ──────────────────────────────────────────────────────────
-
-def _log(tag: str, payload: Dict[str, Any]) -> None:
-    print(f"[{tag}] {json.dumps(payload, default=str)}", flush=True)
-
-
-def _err(msg: str) -> None:
-    print(msg, file=sys.stderr, flush=True)
-
-
-# ── Environment HTTP helpers ─────────────────────────────────────────────────
-
-def env_reset(task_id: str) -> Dict[str, Any]:
-    resp = requests.post(f"{ENV_URL}/reset", json={"task_id": task_id}, timeout=30)
-    resp.raise_for_status()
-    return resp.json()
-
-
-def env_step(action: Dict[str, Any]) -> Dict[str, Any]:
-    resp = requests.post(f"{ENV_URL}/step", json={"action": action}, timeout=30)
-    resp.raise_for_status()
-    return resp.json()
-
-
-# ── Observation → prompt ─────────────────────────────────────────────────────
-
-SYSTEM_PROMPT = """\
-You are a clinical pharmacist AI assistant reviewing an elderly patient's medication regimen.
-You must reduce drug-interaction risk and address Beers-criteria violations while minimising
-unnecessary medication changes.
-
-Available actions (respond with STRICT JSON, no extra text):
-1. Query a drug pair for interactions:
-   {"action_type": "query_ddi", "drug_id_1": "...", "drug_id_2": "..."}
-
-2. Propose an intervention:
-   {"action_type": "propose_intervention", "target_drug_id": "...",
-    "intervention_type": "stop|dose_reduce|substitute|add_monitoring",
-    "proposed_new_drug_id": "...(optional)", "rationale": "..."}
-
-3. Finish the review:
-   {"action_type": "finish_review"}
-
-Respond with EXACTLY ONE JSON object per turn. No markdown, no explanation outside JSON.
-"""
-
-
-def _summarise_obs(obs: Dict[str, Any]) -> str:
-    meds = obs.get("current_medications", [])
-    med_summary = "; ".join(
-        f"{m['drug_id']}({m['generic_name']},{m['dose_mg']}mg)"
-        for m in meds
-    )
-    queries = obs.get("interaction_queries", [])
-    q_summary = "; ".join(
-        f"{q['drug_id_1']}+{q['drug_id_2']}={q.get('severity','?')}"
-        for q in queries
-    )
-    interventions = obs.get("interventions", [])
-    iv_summary = "; ".join(
-        f"{iv['action_type']}({iv['target_drug_id']})"
-        for iv in interventions
-    )
-    return (
-        f"Patient: age={obs.get('age')}, sex={obs.get('sex')}, "
-        f"conditions={obs.get('conditions')}, "
-        f"eGFR={obs.get('eGFR_category')}, liver={obs.get('liver_function_category')}\n"
-        f"Medications: {med_summary}\n"
-        f"Queries so far: {q_summary or 'none'}\n"
-        f"Interventions so far: {iv_summary or 'none'}\n"
-        f"Remaining query budget: {obs.get('remaining_query_budget')}\n"
-        f"Remaining intervention budget: {obs.get('remaining_intervention_budget')}\n"
-        f"Step: {obs.get('step_index')}"
-    )
-
-
-# ── LLM call ─────────────────────────────────────────────────────────────────
-
-def _ask_llm(obs_summary: str) -> Dict[str, Any]:
-    """Call the LLM and parse a PolypharmacyAction JSON."""
-    try:
-        chat_resp = client.chat.completions.create(
-            model=MODEL,
-            messages=[
-                {"role": "system", "content": SYSTEM_PROMPT},
-                {"role": "user", "content": obs_summary},
-            ],
-            max_tokens=256,
-            temperature=0.2,
-        )
-        text = (chat_resp.choices[0].message.content or "").strip()
-        # Strip markdown fences if present
-        text = text.strip()
-        if text.startswith("```"):
-            text = text.split("\n", 1)[-1]
-        if text.endswith("```"):
-            text = text.rsplit("```", 1)[0]
-        text = text.strip()
-        return json.loads(text)
-    except Exception as e:
-        _err(f"LLM parse error: {e}")
-        return {"action_type": "finish_review"}
-
-
-# ── Main loop ────────────────────────────────────────────────────────────────
-
-def main() -> None:
-    if not API_KEY:
-        _err("GROQ_API_KEY is required")
-        sys.exit(1)
-
-    run_id = str(uuid.uuid4())[:8]
-
-    for task_id in TASKS:
-        task_scores: List[float] = []
-        task_rewards: List[float] = []
-
-        _log("START", {
-            "run_id": run_id,
-            "task_id": task_id,
-            "model": MODEL,
-            "episodes": EPISODES_PER_TASK,
-        })
-
-        for ep_idx in range(EPISODES_PER_TASK):
-            reset_resp = env_reset(task_id)
-            obs = reset_resp["observation"]
-            done = reset_resp.get("done", False)
-            episode_id = obs.get("episode_id", f"ep_{ep_idx}")
-            total_reward = 0.0
-            step_idx = 0
-
-            while not done:
-                obs_summary = _summarise_obs(obs)
-                action_payload = _ask_llm(obs_summary)
-
-                step_resp = env_step(action_payload)
-                obs = step_resp["observation"]
-                reward = step_resp.get("reward", 0.0)
-                done = step_resp.get("done", False)
-                total_reward += reward
-
-                _log("STEP", {
-                    "run_id": run_id,
-                    "task_id": task_id,
-                    "episode_id": episode_id,
-                    "step_index": step_idx,
-                    "observation_summary": obs_summary[:200],
-                    "action_payload": action_payload,
-                    "reward": reward,
-                    "done": done,
-                })
-
-                step_idx += 1
-
-            grader_score = step_resp.get("info", {}).get("grader_score", 0.0)
-            task_scores.append(grader_score)
-            task_rewards.append(total_reward)
-
-        _log("END", {
-            "run_id": run_id,
-            "task_id": task_id,
-            "episodes": EPISODES_PER_TASK,
-            "avg_grader_score": sum(task_scores) / max(len(task_scores), 1),
-            "avg_total_reward": sum(task_rewards) / max(len(task_rewards), 1),
-            "per_episode_scores": task_scores,
-        })
-
-    _err("Inference complete.")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/openenv-polypharmacy/openenv.yaml b/openenv.yaml
similarity index 100%
rename from openenv-polypharmacy/openenv.yaml
rename to openenv.yaml
diff --git a/openenv-polypharmacy/pyproject.toml b/pyproject.toml
similarity index 94%
rename from openenv-polypharmacy/pyproject.toml
rename to pyproject.toml
index 9bd219ea59455a8765851931c923b726ea32d1d9..43d98f7f65f20870aa09ee8ec9deb42e2888bd92 100644
--- a/openenv-polypharmacy/pyproject.toml
+++ b/pyproject.toml
@@ -25,6 +25,9 @@ dev = [
     "isort",
 ]
 
+[project.scripts]
+server = "server.app:main"
+
 [tool.setuptools.packages.find]
 where = ["backend/src"]
 
diff --git a/openenv-polypharmacy/requirements.txt b/requirements.txt
similarity index 100%
rename from openenv-polypharmacy/requirements.txt
rename to requirements.txt
diff --git a/openenv-polypharmacy/scripts/dev_backend.sh b/scripts/dev_backend.sh
similarity index 100%
rename from openenv-polypharmacy/scripts/dev_backend.sh
rename to scripts/dev_backend.sh
diff --git a/openenv-polypharmacy/scripts/dev_frontend.sh b/scripts/dev_frontend.sh
similarity index 100%
rename from openenv-polypharmacy/scripts/dev_frontend.sh
rename to scripts/dev_frontend.sh
diff --git a/openenv-polypharmacy/scripts/preprocess_data.py b/scripts/preprocess_data.py
similarity index 100%
rename from openenv-polypharmacy/scripts/preprocess_data.py
rename to scripts/preprocess_data.py
diff --git a/openenv-polypharmacy/scripts/run_validation.sh b/scripts/run_validation.sh
similarity index 100%
rename from openenv-polypharmacy/scripts/run_validation.sh
rename to scripts/run_validation.sh
diff --git a/server/app.py b/server/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff76c2f132d0b3e4614b4ba0d037edc9f19b7b1d
--- /dev/null
+++ b/server/app.py
@@ -0,0 +1,13 @@
+"""Validator compatibility entrypoint."""
+
+from backend.main import app
+
+
+def main():
+    """Return ASGI app for validator multi-mode checks."""
+    return app
+
+
+if __name__ == "__main__":
+    main()
+
diff --git a/uv.lock b/uv.lock
new file mode 100644
index 0000000000000000000000000000000000000000..82447185ad37365d8022f14b2c624b90fa70b688
--- /dev/null
+++ b/uv.lock
@@ -0,0 +1 @@
+# Generated for OpenEnv validator compatibility.