Spaces:

Adhitya122
/

molforge

Running

App Files Files Community

Adhitya122 commited on 12 days ago

Commit

bf9e424

verified ·

1 Parent(s): 09d18b3

Prepare MolForge OpenEnv Docker Space submission

Browse files

Files changed (38) hide show

.dockerignore +23 -0
.gitignore +47 -0
Dockerfile +45 -0
EVALUATION_PROTOCOL.md +103 -0
HF_RL_JOBS_NOTES.md +92 -0
README.md +159 -6
REAL_WORLD_WORKFLOW_MAPPING.md +221 -0
RL_TRAINING_COLAB.md +51 -0
Requirements_before_submitting.md +521 -0
TRAINING_INSTRUCTIONS.md +253 -0
__init__.py +11 -0
client.py +31 -0
inference.py +209 -0
inference_common.py +831 -0
local_inference.py +203 -0
lora_inference.py +244 -0
mlx_lora_inference.py +457 -0
models.py +216 -0
molforge_grpo_official_submission.ipynb +277 -0
molforge_oracles.py +274 -0
openenv.yaml +6 -0
openenv_shim.py +114 -0
pyproject.toml +32 -0
scenarios.py +504 -0
scripts/convert_peft_lora_to_mlx.py +98 -0
scripts/generate_sft_all_actions_dataset.py +180 -0
scripts/generate_sft_compact_policy_v4_dataset.py +446 -0
scripts/validate_sft_traces.py +97 -0
server/Dockerfile +45 -0
server/__init__.py +1 -0
server/actions.py +414 -0
server/app.py +36 -0
server/governance.py +576 -0
server/molforge_environment.py +342 -0
server/requirements.txt +8 -0
server/shared.py +227 -0
server/views.py +436 -0
uv.lock +0 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,23 @@

+.git
+.gitignore
+.venv
+__pycache__/
+*.py[cod]
+.DS_Store
+artifacts/
+adapters/
+data/
+openenv_molforge.egg-info/
+*.egg-info/
+qwen3_5_2b_lora_adapters*/
+*.safetensors
+*.pt
+*.pth
+*.bin
+analysis_results.md
+help_guide/
+issue/*.ipynb
+scripts/__pycache__/

.gitignore ADDED Viewed

	@@ -0,0 +1,47 @@

+# macOS / editor noise
+.DS_Store
+# Output runs
+molforge_rl_runs/
+molforge_grpo_*/
+*.egg-info/
+*.swp
+*.swo
+# Python caches and local environments
+__pycache__/
+*.py[cod]
+.pytest_cache/
+.mypy_cache/
+.ruff_cache/
+.venv/
+venv/
+env/
+# Build/package outputs
+build/
+dist/
+*.egg-info/
+# Local secrets and notebooks
+.env
+.env.*
+*.ipynb_checkpoints/
+# Generated model/adapters/checkpoints
+qwen3_5_2b_lora_adapters*/
+artifacts/
+outputs/
+checkpoints/
+*.safetensors
+*.bin
+*.pt
+*.pth
+*.ckpt
+# Legacy/generated SFT artifacts. Keep the current v4 dataset in issue/.
+data/*.jsonl
+issue/molforge_sft_compact_policy_v3.jsonl
+qwen3_5_2b_unsloth_sft.py
+analysis_results.md

Dockerfile ADDED Viewed

	@@ -0,0 +1,45 @@

+ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
+FROM ${BASE_IMAGE} AS builder
+ARG INSTALL_TDC=0
+WORKDIR /app
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends git && \
+    rm -rf /var/lib/apt/lists/*
+COPY . /app/env
+WORKDIR /app/env
+RUN if ! command -v uv >/dev/null 2>&1; then \
+        curl -LsSf https://astral.sh/uv/install.sh | sh && \
+        mv /root/.local/bin/uv /usr/local/bin/uv && \
+        mv /root/.local/bin/uvx /usr/local/bin/uvx; \
+    fi
+ENV UV_LINK_MODE=copy
+RUN --mount=type=cache,target=/root/.cache/uv \
+    if [ "$INSTALL_TDC" = "1" ]; then \
+        uv sync --no-editable --extra tdc; \
+    else \
+        uv sync --no-editable; \
+    fi
+FROM ${BASE_IMAGE}
+WORKDIR /app
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends curl && \
+    rm -rf /var/lib/apt/lists/*
+COPY --from=builder /app/env/.venv /app/.venv
+COPY --from=builder /app/env /app/env
+ENV PATH="/app/.venv/bin:$PATH"
+ENV PYTHONPATH="/app/env:$PYTHONPATH"
+HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:8000/health || exit 1
+CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]

EVALUATION_PROTOCOL.md ADDED Viewed

	@@ -0,0 +1,103 @@

+# MolForge Evaluation Protocol
+Use two reward settings for different purposes.
+## 1. Training / RL Warmup
+Use curriculum mode:
+```bash
+MOLFORGE_REWARD_MODE=curriculum
+MOLFORGE_TRAINING_RANDOMIZATION=1
+```
+Track:
+- mean episode reward;
+- valid JSON/action rate;
+- policy veto rate;
+- evidence score;
+- number of oracle calls;
+- budget remaining at submit;
+- submit rate;
+- missed-nomination rate;
+- strict terminal `submission_score`.
+Curriculum reward is allowed to be generous because its purpose is learning.
+It rewards useful evidence collection and evidence-supported submit timing.
+## 2. Judge-Facing Evaluation
+Use strict/default mode:
+```bash
+unset MOLFORGE_TRAINING_RANDOMIZATION
+export MOLFORGE_REWARD_MODE=assay_gated
+```
+Report:
+- `average_submission_score`;
+- `average_final_score`;
+- per-task `final_score`;
+- per-task `submission_score`;
+- `candidate_score`;
+- `progress_score`;
+- `constraint_margin_score`;
+- `evidence_score`;
+- `coordination_score`;
+- `budget_score`;
+- submitted vs not submitted;
+- invalid action count;
+- policy veto count.
+The official score should not be minimum number of steps. Real drug discovery
+does not reward the fastest project if it skips necessary evidence. Instead,
+MolForge rewards finishing within the available budget and decision horizon.
+`final_score` is the single scalar to optimize and headline. It equals
+`submission_score` for submitted episodes and gives only small capped partial
+credit to non-submitted episodes. `progress_score` is useful for debugging but
+is not a substitute for `final_score` or `submission_score`: it is capped when
+constraints fail, when the hard trap scenario is not restarted, or when the
+model loops through repeated assays and vetoes.
+## Budget And Step Interpretation
+MolForge has both:
+- `max_steps`: the project decision deadline;
+- `remaining_budget`: the assay/resource budget.
+The agent must finish inside both limits.
+Budget effects:
+- assays subtract from `remaining_budget`;
+- over-budget assays are invalid;
+- budget exhaustion terminates the episode;
+- valid submissions receive a transition-level `budget_efficiency` reward;
+- formal `submission_score` receives a small bonus for unused budget only when
+  the submission has required evidence, passes constraints, and beats baseline;
+- curriculum near-miss reward includes `budget_score`, but missed nomination is
+  penalized if the evidence package was ready and the model failed to submit.
+Step effects:
+- reaching `max_steps` without submission ends the episode;
+- there is a step-limit penalty;
+- no extra score is given merely for fewer steps;
+- faster is better only if the candidate is supported by evidence and budget is
+  preserved.
+## Recommended Comparison Table
+For the README/demo, compare:
+| Model | Reward mode | Submit rate | Avg final_score | Avg submission_score | Avg evidence_score | Avg budget_score | Veto rate |
+| --- | --- | ---: | ---: | ---: | ---: | ---: |
+| Base model | assay_gated | low | low | low | low/medium | variable | high |
+| SFT v4 | assay_gated | better | better | better | better | variable | lower |
+| SFT v4 + RL | assay_gated | best | best | best | high | healthy | low |
+For training plots, show curriculum reward increasing, but always pair it with
+strict `submission_score` before/after so the improvement is credible.

HF_RL_JOBS_NOTES.md ADDED Viewed

	@@ -0,0 +1,92 @@

+# Hugging Face RL Jobs Notes
+This file tracks the remote RL training attempts for the MolForge OpenEnv GRPO run.
+## Jobs Tried
+| Job | Hardware | Result | Notes |
+| --- | --- | --- | --- |
+| `69ed7260d70108f37acdf4b8` | `a100-large` | Canceled | Stayed in `SCHEDULING`, so we canceled it before it used GPU time. |
+| `69ed73d3d70108f37acdf4e1` | `l40sx1` | Failed | Started but exited during Python import before model load or training. |
+| `69ed74f6d70108f37acdf504` | `l40sx1` | **Failed** | `--with mergekit` caused unsolvable pydantic conflict with `openenv-core`. |
+| `69ed7be5d2c8bd8662bcef00` | `l40sx1` | Canceled | Incorrect CLI usage (missing image name). |
+| `69ed9440d70108f37acdf83b` | `l40sx1` | Failed | `uv run` couldn't find the script path `issue/script.py`. |
+| `69ed94add2c8bd8662bcf215` | `l40sx1` | Submitted | Fixed script path to just filename and used explicit `python` call. |
+## Failure History
+### Job 2 (`69ed73d3`) — `ModuleNotFoundError: No module named 'mergekit'`
+TRL internally imports `mergekit` for GRPO model-merging callbacks even though we don't use merging. The fix was to add `--with mergekit`.
+### Job 3 (`69ed74f6`) — **pydantic version conflict** (CURRENT)
+Adding `--with mergekit` broke the resolver:
+- `mergekit` (all versions) requires `pydantic < 2.11`
+- `openenv-core==0.2.3` → `fastmcp>=3.0.0` → `pydantic >= 2.11.7`
+**No version of pydantic satisfies both.** uv correctly refuses to resolve.
+## Fix
+**Do NOT pass `--with mergekit`** in the HF Jobs command. Instead, the script now installs mergekit at runtime with `--no-deps` before importing TRL:
+```python
+try:
+    import mergekit
+except ImportError:
+    subprocess.check_call([sys.executable, "-m", "pip", "install", "mergekit", "--no-deps", "-q"])
+```
+This makes `mergekit` importable (satisfying TRL) without pulling in its conflicting pydantic constraint.
+## Checkpoint and Artifact Persistence
+The OpenEnv GRPO script saves the final trained adapter and tokenizer to:
+```text
+<run_dir>/adapters/
+```
+It also writes logs, metrics, plots, before/after evaluator JSON, and a zip archive under the run directory. When `HF_OUTPUT_REPO=Adhitya122/molforge-rl-runs` is set, the full run folder is uploaded to:
+```text
+hf://datasets/Adhitya122/molforge-rl-runs/<run_name>
+```
+## Safer Next Runs
+Recommended next HF Jobs command (NO `--with mergekit`):
+```bash
+--env RL_MAX_STEPS=20
+--env RL_DATASET_SIZE=30
+--env MAX_COMPLETION_LENGTH=1024
+```
+Use this as a smoke run first. Once it reaches at least one trainer log line and uploads artifacts, scale back to:
+```bash
+--env RL_MAX_STEPS=80
+--env RL_DATASET_SIZE=120
+--env MAX_COMPLETION_LENGTH=2048
+```
+Good hardware choices:
+| Hardware | Use |
+| --- | --- |
+| `l40sx1` | Best next smoke test: 48 GB VRAM, cheaper than A100. |
+| `a100-large` | Good full run if scheduling is available. |
+| `h200` | Highest headroom, more expensive, useful if A100 scheduling stalls. |
+| `a10g-large` | Cheap fallback, but may need shorter completion length and fewer steps. |
+## Monitoring Commands
+```bash
+hf jobs inspect <job_id>
+hf jobs logs <job_id> --tail 200
+```
+Use logs without `inspect` when searching for the real traceback, because `inspect` prints the full base64-encoded submitted script and makes the useful error harder to see.

README.md CHANGED Viewed

@@ -1,10 +1,163 @@
 ---
-title: Molforge
-emoji: 🚀
-colorFrom: purple
-colorTo: purple
 sdk: docker
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: MolForge
+emoji: 🧪
+colorFrom: green
+colorTo: indigo
 sdk: docker
+app_port: 8000
 ---
+# MolForge
+This repository implements an OpenEnv-compatible reinforcement learning environment for **medicinal chemistry lead optimization**. The agent does not directly see the true biological properties of the candidate molecule. Instead, a specialist team iteratively edits a KRAS G12C candidate under limited assay budget, partial observability, and strict safety constraints, receiving a noisy simulated output, and is rewarded for discovering a highly potent, synthesizable, and safe drug candidate.
+The environment is designed as a **partially observable Markov decision process (POMDP)** with:
+- hidden ground-truth molecular properties and scenario constraints
+- hidden target mutation traps (e.g. KRAS resistance panel shifts)
+- visible task metadata, team communication, assay results, and remaining budget
+- simulated `RDKit` descriptors and `TDC` (Therapeutics Data Commons) predictions (QED, SA_Score, LogP, TPSA)
+- dense step-wise reward (in curriculum mode) plus terminal reward for submission quality
+At a high level, each episode looks like this:
+1. `reset()` picks a biological scenario (e.g. `level_1_medium`) and seeds the simulator.
+2. The agent receives a `MolForgeObservation` describing the task, the starting molecule scaffold, and the current visible state.
+3. The agent (acting as different roles) submits a `MolForgeAction` such as `edit`, `run_assay`, `propose_nomination`, or `submit`.
+4. The **Governance rule engine** checks whether the action is valid, requiring multi-agent consensus for final decisions.
+5. The transition engine updates the molecule, spends the assay budget, and returns oracle readings.
+6. The reward computer scores the step based on whether the action was invalid, vetoed, or successful.
+7. The environment returns a new observation with updated history, assay readings, and reward.
+8. The episode ends when the agent successfully submits the molecule, exhausts its budget, or reaches the maximum step horizon.
+---
+## Hidden state vs Visible state
+### Hidden state
+The simulator keeps ground-truth properties that the agent never directly sees. It contains:
+- The true underlying scoring functions for `potency`, `safety`, and `synthesizability`.
+- Sunk-cost traps and late-stage target mutations (e.g., in `level_2_hard`).
+- The strict constraints required for a valid submission.
+- The remaining hidden milestones for the scenario.
+### Visible state
+The agent only sees `MolForgeObservation`, which includes:
+- The current `TaskSpec` and `scenario_id`.
+- Pipeline history and previous actions.
+- The current molecular scaffold (in SMILES format).
+- The `budget_used` and `remaining_budget`.
+- Responses from the `run_assay` oracle (TDC predictors and RDKit descriptors).
+- The `GovernanceStatus` showing which specialist agents have approved or objected.
+- The `step_reward_breakdown`.
+This separation is what makes the environment a POMDP rather than a fully observed simulator.
+---
+## Repository files navigation
+### `models.py`
+Defines the Pydantic contracts that all other modules use:
+- `MolForgeAction`: One structured step chosen by the agent. Fields include `action_type`, `acting_role`, `tool_name`, `slot`, `fragment`, and `rationale`.
+- `MolForgeObservation`: What the agent can see after each step; includes `current_molecule`, `last_transition_summary`, `reward_breakdown`, and `governance_status`.
+- `MolForgeState`: The internal tracked state including `episode_id`, `step_count`, and `invalid_action_count`.
+### `server/scenarios.py`
+This is where episodes come from. It defines a curated library of three biological scenarios, each bundling a starting scaffold, a budget, and a specific molecular target:
+- `level_0_easy`: Potency-first optimization with a generous budget and a starting scaffold that is one or two edits from success.
+- `level_1_medium`: Multi-objective optimization with safety as a hard constraint and moderate budget pressure.
+- `level_2_hard`: A sunk-cost trap plus late target mutation. The initial scaffold family has a hidden liability, and the best policy is often to restart early.
+### `server/actions.py` & `server/governance.py`
+The rule engines enforcing scientific and procedural constraints before each action is applied:
+- `run_assay`: Costs budget. Assembles the fragments into a valid `SMILES` string and evaluates the current molecule using `TDC` Oracles and `RDKit` logic (e.g. `MolLogP`, `TPSA`, `NumRotatableBonds`, `QED`).
+- `edit`: Replaces a specific R-group slot (`warhead`, `hinge`, `solvent_tail`, `back_pocket`) with a new chemical fragment (e.g. `acrylamide`, `fluorophenyl`, `morpholine`). Clears previously gathered evidence.
+- `submit`: Ends the episode. Triggers the final evaluation grader against the scenario's strict hard constraints (`potency_min`, `toxicity_max`, `synth_min`).
+- **Governance**: Certain actions require multi-agent consensus. If the `Lead Chemist` tries to submit without the `Safety Specialist`'s approval, the action is vetoed.
+### `server/molforge_environment.py`
+This is the orchestration layer that ties everything together.
+On `reset()` it:
+- Generates a task scenario.
+- Clears the message log, history, and resets the molecule to the default scaffold.
+On `step()` it:
+- Checks governance rules and validates the action.
+- Executes the action (e.g. replacing an R-group fragment or running an assay).
+- Computes reward (via Curriculum or Assay-Gated mode).
+- Builds the next `MolForgeObservation`.
+---
+## What actually happens on one step
+Here is the concrete order of operations for `env.step(action)`:
+1. Increment the step counter.
+2. Run validation checks. If the action format is invalid, return a failure report and a `-1.0` reward.
+3. Assess **Governance**. If a required specialist agent vetoes the action, the action is blocked and penalized.
+4. Execute the action (`edit`, `run_assay`, `submit`).
+5. Deduct oracle budget if `run_assay` was called.
+6. Compute decomposed reward from the state transition (e.g., getting penalized for redundant assays).
+7. If the episode is ending (via `submit`, max steps, or zero budget), compute the terminal `submission_score`.
+8. Return an observation that exposes the visible summary but not the hidden truth.
+---
+## Typical successful pipeline
+Most scenarios reward a sensible experiment order similar to:
+1. `run_assay` (Assay potency and safety of the baseline molecule).
+2. `edit` (Swap an R-group fragment to improve a weak property).
+3. `run_assay` (Gather new evidence for the modified molecule).
+4. `propose_nomination` (Discuss the findings with the multi-agent review board).
+5. `submit` (Finalize the candidate).
+The exact best sequence depends on the scenario. In `level_2_hard`, the best strategy is often to `restart` the entire scaffold immediately rather than wasting budget on a doomed trajectory.
+---
+## Reward Strategy & Episode termination
+MolForge uses two distinct reward settings for different purposes:
+**1. Training / RL Warmup (`MOLFORGE_REWARD_MODE=curriculum`)**
+- Gives partial credit at the end of an episode even if the model didn't submit, provided it gathered useful evidence.
+- It actively prevents "reward hacking" by penalizing assay-spamming, and giving massive multipliers to successful submissions.
+**2. Judge-Facing Evaluation (`MOLFORGE_REWARD_MODE=assay_gated`)**
+- Strict OpenEnv hackathon rules.
+- If the agent does not formally `submit` the candidate, the final score is `0.0`.
+- No partial credit is given for just gathering evidence.
+An episode ends when one of the following happens:
+- The agent explicitly chooses `submit`.
+- Resources (oracle budget) are exhausted.
+- The environment reaches `MAX_STEPS`.
+---
+## Installation & Usage
+The package requires Python ≥ 3.10.
+```bash
+pip install "openenv-core[core]>=0.2.3" pydantic transformers trl peft datasets
+```
+### 1. In-process environment
+Use `MolForgeEnvironment` when you want direct Python access with full structured observations:
+```python
+from models import MolForgeAction
+from server.molforge_environment import MolForgeEnvironment
+env = MolForgeEnvironment()
+obs = env.reset()
+action = MolForgeAction(
+    action_type="run_assay",
+    acting_role="Lead Chemist",
+    tool_name="potency_oracle",
+    rationale="Need to gather baseline potency evidence."
+)
+obs = env.step(action)
+print(obs.reward)
+print(obs.last_transition_summary)
+```
+### 2. RL Training Notebook
+We have provided a cleanly documented `issue/molforge_grpo_official_submission.ipynb` which demonstrates exactly how to fine-tune a Qwen3.5 model using TRL's GRPO trainer natively against this OpenEnv environment.

REAL_WORLD_WORKFLOW_MAPPING.md ADDED Viewed

	@@ -0,0 +1,221 @@

+# MolForge Real-World Workflow Mapping
+MolForge should feel like a compressed medicinal-chemistry lead-optimization
+program, not a one-shot molecule generator.
+The real-world pattern is:
+1. A team starts with a scaffold.
+2. Chemists propose edits based on structure-activity reasoning.
+3. Assay teams spend limited budget to measure uncertain properties.
+4. Safety and process specialists veto risky or impractical candidates.
+5. The team decides whether to keep optimizing, restart, or nominate a lead.
+6. Success depends on evidence, not only on the final molecule.
+This is exactly the shape MolForge should copy.
+## Real-World Loop
+### 1. Design Hypothesis
+Real teams do not mutate molecules randomly. A medicinal chemist proposes a
+change with an intended purpose:
+- improve potency;
+- reduce toxicity;
+- improve solubility or ADME;
+- simplify synthesis;
+- escape a known scaffold liability.
+MolForge equivalent:
+- `edit`
+- `rationale`
+- `expected_effects`
+- `evidence`
+The model should not only choose a fragment. It should say what scientific
+pressure that edit is meant to address.
+### 2. Cheap Triage Before Expensive Assays
+Real projects usually run cheap computational or low-cost screens before
+expensive experiments.
+MolForge equivalent:
+- `evaluate_properties`
+- `search_literature`
+- `estimate_synthesizability`
+- `dock_target`
+These should be useful but imperfect. They help the model decide where to spend
+more serious assay budget.
+### 3. Expensive Evidence Gates
+Real lead candidates require stronger evidence before nomination:
+- potency evidence;
+- toxicity/safety evidence;
+- synthesis or route feasibility evidence;
+- sometimes post-mutation or resistance-panel evidence.
+MolForge equivalent:
+- `assay_toxicity`
+- `dock_target`
+- `estimate_synthesizability`
+- hard evidence requirements in `submit`
+- `evidence_score`
+This is why `submission_score` should remain strict. A molecule that looks good
+but was never properly assayed is not a real lead candidate.
+### 4. Cross-Functional Decision Board
+Real projects are not controlled by one chemist. A lead-optimization meeting
+usually includes:
+- medicinal chemistry;
+- assay biology;
+- toxicology/safety;
+- process chemistry or manufacturability;
+- project leadership.
+MolForge equivalent:
+- `lead_chemist`
+- `assay_planner`
+- `toxicologist`
+- `process_chemist`
+- governance messages;
+- hard vetoes;
+- `coordination_score`
+This is one of MolForge's strongest environment-innovation points. The agent is
+not just optimizing a molecule; it is coordinating a scientific team.
+### 5. Stop, Submit, or Restart
+Real teams must decide when to stop spending money. Sometimes the right answer
+is to abandon a scaffold early because the series is a trap.
+MolForge equivalent:
+- `submit`
+- `restart`
+- budget limits;
+- max decision horizon;
+- hard scenario target shift;
+- sunk-cost trap in `level_2_hard`
+This lets the environment test project judgment, not just local molecule edits.
+## How To Use This In MolForge
+### Keep Two Scores
+Use two kinds of reward:
+1. **Training reward**
+   Helps the model learn the workflow.
+2. **Formal submission score**
+   Measures whether the agent actually nominated a valid candidate.
+That means:
+- `MOLFORGE_REWARD_MODE=curriculum` for early RL;
+- default `assay_gated` mode for final reporting;
+- `submission_score` stays `0.0` without a formal submit.
+This mirrors the real world: a project can make progress without nominating a
+lead, but it cannot claim lead success without a nomination package.
+### Make Rewards Stage-Gated
+A good real-world reward should not be one giant final number only.
+Useful reward components:
+- valid action/schema;
+- useful design edit;
+- useful first assay;
+- evidence coverage;
+- safety improvement;
+- synthesis improvement;
+- avoiding repeated assays;
+- avoiding vetoed decisions;
+- submitting only with enough support;
+- restarting from a bad scaffold when appropriate.
+This gives RL a learnable path while preserving strict final success.
+### Make The Demo Story Simple
+Judges should understand this in one sentence:
+> MolForge tests whether an LLM can run a miniature drug-discovery project:
+> design molecules, buy assays, respect safety vetoes, manage budget, and
+> nominate a candidate only when the evidence package is strong enough.
+Then show:
+- baseline model repeats invalid or vetoed actions;
+- SFT model learns the action language;
+- RL model learns better evidence and submit timing;
+- final candidate report card shows potency, toxicity, synthesis, evidence,
+  budget, and coordination.
+## What We Already Have
+MolForge already contains most of this real-world structure:
+- molecule slot edits;
+- RDKit/TDC-backed surrogate oracle path;
+- limited assay budget;
+- cheap and expensive tools;
+- hidden true properties;
+- visible assay estimates;
+- toxicity and synthesis constraints;
+- multi-agent specialist governance;
+- safety vetoes;
+- restart action;
+- hard target-shift scenario;
+- decomposed report card;
+- strict terminal `submission_score`;
+- curriculum reward mode for early RL.
+## What To Strengthen Next
+The next useful additions should make the environment feel even more like a
+real project:
+1. **Assay uncertainty**
+   Repeated assays should narrow confidence intervals, but cost budget.
+2. **Stage labels**
+   Mark states as `design`, `triage`, `evidence_package`, `nomination`, or
+   `no-go`.
+3. **No-go decisions**
+   Reward a model for stopping or restarting when the evidence says the series
+   is unsafe or infeasible.
+4. **Portfolio-style report**
+   At terminal time, show why the candidate was nominated or rejected.
+5. **Holdout variants**
+   Randomize scaffold starts and budgets so the model cannot memorize only
+   three paths.
+For the hackathon, the best near-term path is:
+```text
+SFT v4 for action/workflow competence
+-> curriculum RL for observable reward improvement
+-> strict assay_gated evaluation for final submission_score
+-> README/demo framed as a real drug-discovery decision board
+```

RL_TRAINING_COLAB.md ADDED Viewed

	@@ -0,0 +1,51 @@

+# MolForge RL Training in Colab
+Use [issue/molforge_grpo_colab_training.ipynb](issue/molforge_grpo_colab_training.ipynb) for the judge-rerunnable workflow.
+The notebook trains from the Qwen3.5 2B SFT v4 adapter with TRL GRPO against the real MolForge environment reward. It uses the TRL/OpenEnv `environment_factory` pattern from the Wordle/Sudoku examples: MolForge exposes tool methods for `edit`, `run_assay`, `submit`, `restart`, and `defer`, and reward functions read scores from the environment instances. It is set up for short evidence runs on A100/H100 rather than full convergence.
+## Outputs
+Each run writes to `/content/molforge_rl_runs/<run_name>/` and copies the same folder to `DRIVE_OUTPUT_DIR` when set.
+Important artifacts:
+- `logs/openenv_tool_rollouts.jsonl`: every tool call, reward, governance status, and score diagnostics.
+- `logs/trainer_log_history.jsonl`: trainer loss, grad norm, learning rate, and step timing.
+- `openenv_tool_metrics.csv`: spreadsheet-friendly tool rollout reward table.
+- `eval_before_training.json`: full 3-task rollout before GRPO.
+- `eval_after_training.json`: full 3-task rollout after GRPO.
+- `plots/reward_curve.png`: completion reward curve and moving average.
+- `plots/loss_curve.png`: trainer loss curve.
+- `plots/eval_before_after.png`: before/after final_score comparison.
+- `plots/action_distribution.png`: sampled action mix.
+- `adapters/`: trained LoRA adapter checkpoint.
+- `<run_name>.zip`: portable archive of the run outputs.
+## Fast Demo Settings
+For a quick A100/H100 proof run:
+```python
+os.environ["RL_MAX_STEPS"] = "80"
+os.environ["NUM_GENERATIONS"] = "2"
+os.environ["RL_DATASET_SIZE"] = "120"
+os.environ["RL_BATCH_SIZE"] = "2"
+os.environ["RL_GRAD_ACCUM"] = "4"
+os.environ["RL_LEARNING_RATE"] = "2e-6"
+```
+For a stronger run, try `RL_MAX_STEPS=200` and `NUM_GENERATIONS=4` on H100.
+If Colab runs out of memory, reduce `MAX_COMPLETION_LENGTH` to `1024`; keep `RL_BATCH_SIZE` divisible by `NUM_GENERATIONS`.
+If TRL fails during import with `No module named 'mergekit'`, install `mergekit` in the same setup cell as `trl`.
+## What to Show Judges
+Use the before/after rollout JSON plus these plots:
+- `reward_curve.png` for reward improvement during RL.
+- `loss_curve.png` for actual training evidence.
+- `eval_before_after.png` for task-level behavior change.
+The official environment score remains `final_score`; `progress_score` and per-step rewards are debugging signals.

Requirements_before_submitting.md ADDED Viewed

	@@ -0,0 +1,521 @@

+Evaluation Criteria
+Phase 1: Automated Validation
+Pass/fail gate — HF Space deploys, OpenEnv spec compliance, Dockerfile builds, baseline reproduces, 3+ tasks with graders.
+Phase 2: Agentic Evaluation
+Scored — baseline agent re-run, standard Open LLM agent (e.g. Nemotron 3 Super) run against all environments, score variance check.
+Phase 3: Human Review
+Top submissions reviewed by Meta and Hugging Face engineers for real-world utility, creativity, and exploit checks.
+Disqualification Criteria
+Environment does not deploy or respond
+Plagiarized or trivially modified existing environments
+Graders that always return the same score
+No baseline inference script
+How Judging works
+Pre-Submission Checklist — all must pass or you're disqualified
+HF Space deploys
+Automated ping to the Space URL — must return 200 and respond to reset()
+OpenEnv spec compliance
+Validate openenv.yaml, typed models, step()/reset()/state() endpoints
+Dockerfile builds
+Automated docker build on the submitted repo
+Baseline reproduces
+Run the submitted inference script — must complete without error and produce scores
+3+ tasks with graders
+Enumerate tasks, run each grader, verify scores in 0.0–1.0 range
+Additional Instructions
+Before submitting, ensure the following variables are defined in your environment configuration:
+API\_BASE\_URL The API endpoint for the LLM.
+MODEL\_NAME The model identifier to use for inference.
+HF\_TOKEN Your Hugging Face / API key.
+The inference script must be named \`inference.py\` and placed in the root directory of the project
+Participants must use OpenAI Client for all LLM calls using above variables
+Infra Restrictions
+Runtime of inference script should be less than 20min
+Make sure your env and inference can run on a machine with vcpu=2, memory=8gb
+Validator
+Run the pre-submission validation script before submitting
+Sample Inference Script
+"""
+Inference Script Example
+===================================
+MANDATORY
+- Before submitting, ensure the following variables are defined in your environment configuration:
+    API_BASE_URL   The API endpoint for the LLM.
+    MODEL_NAME     The model identifier to use for inference.
+    HF_TOKEN       Your Hugging Face / API key.
+- The inference script must be named `inference.py` and placed in the root directory of the project
+- Participants must use OpenAI Client for all LLM calls using above variables
+"""
+import os
+import re
+import base64
+import textwrap
+from io import BytesIO
+from typing import List, Optional, Dict
+from openai import OpenAI
+import numpy as np
+from PIL import Image
+from browsergym_env import BrowserGymAction, BrowserGymEnv
+API_BASE_URL = os.getenv("API_BASE_URL") // "https://router.huggingface.co/v1"
+API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
+MODEL_NAME = os.getenv("MODEL_NAME")
+MAX_STEPS = 8
+MAX_DOM_CHARS = 3500
+TEMPERATURE = 0.2
+MAX_TOKENS = 200
+FALLBACK_ACTION = "noop()"
+DEBUG = True
+ACTION_PREFIX_RE = re.compile(
+    r"^(action|next action)\s*[:\-]\s*",
+    re.IGNORECASE,
+)
+ACTION_PATTERN = re.compile(r"[A-Za-z_]+\s*\(.*\)", re.DOTALL)
+SYSTEM_PROMPT = textwrap.dedent(
+    """
+    You control a web browser through BrowserGym.
+    Reply with exactly one action string.
+    The action must be a valid BrowserGym command such as:
+    - noop()
+    - click('<BID>')
+    - type('selector', 'text to enter')
+    - fill('selector', 'text to enter')
+    - send_keys('Enter')
+    - scroll('down')
+    Use single quotes around string arguments.
+    When clicking, use the BrowserGym element IDs (BIDs) listed in the user message.
+    If you are unsure, respond with noop().
+    Do not include explanations or additional text.
+    """
+).strip()
+def build_history_lines(history: List[str]) -> str:
+    if not history:
+        return "None"
+    return "\n".join(history[-4:])
+def extract_screenshot_uri(observation) -> Optional[str]:
+    if observation.screenshot is None:
+        return None
+    screen_array = np.array(observation.screenshot, dtype=np.uint8)
+    image = Image.fromarray(screen_array)
+    buffer = BytesIO()
+    image.save(buffer, format="PNG")
+    buffer.seek(0)
+    data_uri = base64.b64encode(buffer.read()).decode("utf-8")
+    return f"data:image/png;base64,{data_uri}"
+def extract_clickable_elements(observation) -> List[Dict[str, str]]:
+    """Collect BrowserGym element IDs that can be clicked."""
+    metadata = getattr(observation, "metadata", {}) or {}
+    obs_dict = metadata.get("browsergym_obs", {}) or {}
+    extra_props = obs_dict.get("extra_element_properties", {}) or {}
+    clickables: List[Dict[str, str]] = []
+    for bid, props in extra_props.items():
+        if not props.get("clickable"):
+            continue
+        bbox = props.get("bbox") or []
+        bbox_str = ", ".join(bbox) if bbox else "?"
+        clickables.append(
+            {
+                "bid": str(bid),
+                "bbox": bbox_str,
+            }
+        )
+    # Keep a stable ordering for readability
+    clickables.sort(key=lambda item: item["bid"])
+    return clickables
+def build_user_prompt(step: int, observation, history: List[str]) -> str:
+    goal = observation.goal or "(not provided)"
+    url = observation.url or "(unknown)"
+    error_note = "Yes" if observation.last_action_error else "No"
+    clickables = extract_clickable_elements(observation)
+    if clickables:
+        actions_hint = "\n".join(
+            f"    - {item['bid']} (bbox: {item['bbox']})" for item in clickables
+        )
+    else:
+        actions_hint = "    (none detected)"
+    prompt = textwrap.dedent(
+        f"""
+        Step: {step}
+        Goal: {goal}
+        Current URL: {url}
+        Previous steps:
+        {build_history_lines(history)}
+        Last action error: {error_note}
+        Available clickable element IDs: {actions_hint}
+        Reply with exactly one BrowserGym action string.
+        """
+    ).strip()
+    return prompt
+def parse_model_action(response_text: str) -> str:
+    if not response_text:
+        return FALLBACK_ACTION
+    # Prefer the first line that looks like an action string
+    lines = response_text.splitlines()
+    for raw_line in lines:
+        line = raw_line.strip()
+        if not line:
+            continue
+        line = ACTION_PREFIX_RE.sub("", line)
+        match = ACTION_PATTERN.search(line)
+        if match:
+            action = match.group(0).strip()
+            # Collapse internal whitespace
+            action = re.sub(r"\s+", " ", action)
+            # If the model tried to click by natural-language description while we
+            # only exposed numeric BrowserGym IDs, fallback to the single detected ID.
+            return action
+    # Fall back to searching the whole response
+    match = ACTION_PATTERN.search(response_text)
+    if match:
+        action = match.group(0).strip()
+        action = re.sub(r"\s+", " ", action)
+        return action
+    return FALLBACK_ACTION
+def main() -> None:
+    client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
+    env = BrowserGymEnv.from_docker_image(
+        image="browsergym-env:latest",
+        env_vars={
+            "BROWSERGYM_BENCHMARK": "miniwob",
+            "BROWSERGYM_TASK_NAME": "click-test",
+        },
+    )
+    history: List[str] = []
+    try:
+        result = env.reset()
+        observation = result.observation
+        print(f"Episode goal: {observation.goal}")
+        for step in range(1, MAX_STEPS + 1):
+            if result.done:
+                print("Environment signalled done. Stopping early.")
+                break
+            user_prompt = build_user_prompt(step, observation, history)
+            user_content = [{"type": "text", "text": user_prompt}]
+            screenshot_uri = extract_screenshot_uri(observation)
+            if screenshot_uri:
+                user_content.append(
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": screenshot_uri},
+                    }
+                )
+            messages = [
+                {
+                    "role": "system",
+                    "content": [{"type": "text", "text": SYSTEM_PROMPT}],
+                },
+                {
+                    "role": "user",
+                    "content": user_content,
+                },
+            ]
+            try:
+                completion = client.chat.completions.create(
+                    model=MODEL_NAME,
+                    messages=messages,
+                    temperature=TEMPERATURE,
+                    max_tokens=MAX_TOKENS,
+                    stream=False,
+                )
+                response_text = completion.choices[0].message.content or ""
+            # pylint: disable=broad-except
+            except Exception as exc:  # noqa: BLE001
+                failure_msg = f"Model request failed ({exc}). Using fallback action."
+                print(failure_msg)
+                response_text = FALLBACK_ACTION
+            action_str = parse_model_action(response_text)
+            print(f"Step {step}: model suggested -> {action_str}")
+            result = env.step(BrowserGymAction(action_str=action_str))
+            observation = result.observation
+            reward = result.reward or 0.0
+            error_flag = " ERROR" if observation.last_action_error else ""
+            history_line = (
+                f"Step {step}: {action_str} -> reward {reward:+.2f}{error_flag}"
+            )
+            history.append(history_line)
+            print(
+                "  Reward: "
+                f"{reward:+.2f} | Done: {result.done} | Last action error: "
+                f"{observation.last_action_error}"
+            )
+            if result.done:
+                print("Episode complete.")
+                break
+        else:
+            print(f"Reached max steps ({MAX_STEPS}).")
+    finally:
+        env.close()
+if __name__ == "__main__":
+    main()
+Pre Validation Script
+#!/usr/bin/env bash
+#
+# validate-submission.sh — OpenEnv Submission Validator
+#
+# Checks that your HF Space is live, Docker image builds, and openenv validate passes.
+#
+# Prerequisites:
+#   - Docker:       https://docs.docker.com/get-docker/
+#   - openenv-core: pip install openenv-core
+#   - curl (usually pre-installed)
+#
+# Run:
+#   curl -fsSL https://raw.githubusercontent.com/<owner>/<repo>/main/scripts/validate-submission.sh | bash -s -- <ping_url> [repo_dir]
+#
+#   Or download and run locally:
+#     chmod +x validate-submission.sh
+#     ./validate-submission.sh <ping_url> [repo_dir]
+#
+# Arguments:
+#   ping_url   Your HuggingFace Space URL (e.g. https://your-space.hf.space)
+#   repo_dir   Path to your repo (default: current directory)
+#
+# Examples:
+#   ./validate-submission.sh https://my-team.hf.space
+#   ./validate-submission.sh https://my-team.hf.space ./my-repo
+#
+set -uo pipefail
+DOCKER_BUILD_TIMEOUT=600
+if [ -t 1 ]; then
+  RED='\033[0;31m'
+  GREEN='\033[0;32m'
+  YELLOW='\033[1;33m'
+  BOLD='\033[1m'
+  NC='\033[0m'
+else
+  RED='' GREEN='' YELLOW='' BOLD='' NC=''
+fi
+run_with_timeout() {
+  local secs="$1"; shift
+  if command -v timeout &>/dev/null; then
+    timeout "$secs" "$@"
+  elif command -v gtimeout &>/dev/null; then
+    gtimeout "$secs" "$@"
+  else
+    "$@" &
+    local pid=$!
+    ( sleep "$secs" && kill "$pid" 2>/dev/null ) &
+    local watcher=$!
+    wait "$pid" 2>/dev/null
+    local rc=$?
+    kill "$watcher" 2>/dev/null
+    wait "$watcher" 2>/dev/null
+    return $rc
+  fi
+}
+portable_mktemp() {
+  local prefix="${1:-validate}"
+  mktemp "${TMPDIR:-/tmp}/${prefix}-XXXXXX" 2>/dev/null || mktemp
+}
+CLEANUP_FILES=()
+cleanup() { rm -f "${CLEANUP_FILES[@]+"${CLEANUP_FILES[@]}"}"; }
+trap cleanup EXIT
+PING_URL="${1:-}"
+REPO_DIR="${2:-.}"
+if [ -z "$PING_URL" ]; then
+  printf "Usage: %s <ping_url> [repo_dir]\n" "$0"
+  printf "\n"
+  printf "  ping_url   Your HuggingFace Space URL (e.g. https://your-space.hf.space)\n"
+  printf "  repo_dir   Path to your repo (default: current directory)\n"
+  exit 1
+fi
+if ! REPO_DIR="$(cd "$REPO_DIR" 2>/dev/null && pwd)"; then
+  printf "Error: directory '%s' not found\n" "${2:-.}"
+  exit 1
+fi
+PING_URL="${PING_URL%/}"
+export PING_URL
+PASS=0
+log()  { printf "[%s] %b\n" "$(date -u +%H:%M:%S)" "$*"; }
+pass() { log "${GREEN}PASSED${NC} -- $1"; PASS=$((PASS + 1)); }
+fail() { log "${RED}FAILED${NC} -- $1"; }
+hint() { printf "  ${YELLOW}Hint:${NC} %b\n" "$1"; }
+stop_at() {
+  printf "\n"
+  printf "${RED}${BOLD}Validation stopped at %s.${NC} Fix the above before continuing.\n" "$1"
+  exit 1
+}
+printf "\n"
+printf "${BOLD}========================================${NC}\n"
+printf "${BOLD}  OpenEnv Submission Validator${NC}\n"
+printf "${BOLD}========================================${NC}\n"
+log "Repo:     $REPO_DIR"
+log "Ping URL: $PING_URL"
+printf "\n"
+log "${BOLD}Step 1/3: Pinging HF Space${NC} ($PING_URL/reset) ..."
+CURL_OUTPUT=$(portable_mktemp "validate-curl")
+CLEANUP_FILES+=("$CURL_OUTPUT")
+HTTP_CODE=$(curl -s -o "$CURL_OUTPUT" -w "%{http_code}" -X POST \
+  -H "Content-Type: application/json" -d '{}' \
+  "$PING_URL/reset" --max-time 30 2>"$CURL_OUTPUT" || printf "000")
+if [ "$HTTP_CODE" = "200" ]; then
+  pass "HF Space is live and responds to /reset"
+elif [ "$HTTP_CODE" = "000" ]; then
+  fail "HF Space not reachable (connection failed or timed out)"
+  hint "Check your network connection and that the Space is running."
+  hint "Try: curl -s -o /dev/null -w '%%{http_code}' -X POST $PING_URL/reset"
+  stop_at "Step 1"
+else
+  fail "HF Space /reset returned HTTP $HTTP_CODE (expected 200)"
+  hint "Make sure your Space is running and the URL is correct."
+  hint "Try opening $PING_URL in your browser first."
+  stop_at "Step 1"
+fi
+log "${BOLD}Step 2/3: Running docker build${NC} ..."
+if ! command -v docker &>/dev/null; then
+  fail "docker command not found"
+  hint "Install Docker: https://docs.docker.com/get-docker/"
+  stop_at "Step 2"
+fi
+if [ -f "$REPO_DIR/Dockerfile" ]; then
+  DOCKER_CONTEXT="$REPO_DIR"
+elif [ -f "$REPO_DIR/server/Dockerfile" ]; then
+  DOCKER_CONTEXT="$REPO_DIR/server"
+else
+  fail "No Dockerfile found in repo root or server/ directory"
+  stop_at "Step 2"
+fi
+log "  Found Dockerfile in $DOCKER_CONTEXT"
+BUILD_OK=false
+BUILD_OUTPUT=$(run_with_timeout "$DOCKER_BUILD_TIMEOUT" docker build "$DOCKER_CONTEXT" 2>&1) && BUILD_OK=true
+if [ "$BUILD_OK" = true ]; then
+  pass "Docker build succeeded"
+else
+  fail "Docker build failed (timeout=${DOCKER_BUILD_TIMEOUT}s)"
+  printf "%s\n" "$BUILD_OUTPUT" | tail -20
+  stop_at "Step 2"
+fi
+log "${BOLD}Step 3/3: Running openenv validate${NC} ..."
+if ! command -v openenv &>/dev/null; then
+  fail "openenv command not found"
+  hint "Install it: pip install openenv-core"
+  stop_at "Step 3"
+fi
+VALIDATE_OK=false
+VALIDATE_OUTPUT=$(cd "$REPO_DIR" && openenv validate 2>&1) && VALIDATE_OK=true
+if [ "$VALIDATE_OK" = true ]; then
+  pass "openenv validate passed"
+  [ -n "$VALIDATE_OUTPUT" ] && log "  $VALIDATE_OUTPUT"
+else
+  fail "openenv validate failed"
+  printf "%s\n" "$VALIDATE_OUTPUT"
+  stop_at "Step 3"
+fi
+printf "\n"
+printf "${BOLD}========================================${NC}\n"
+printf "${GREEN}${BOLD}  All 3/3 checks passed!${NC}\n"
+printf "${GREEN}${BOLD}  Your submission is ready to submit.${NC}\n"
+printf "${BOLD}========================================${NC}\n"
+printf "\n"
+exit 0

TRAINING_INSTRUCTIONS.md ADDED Viewed

	@@ -0,0 +1,253 @@

+# MolForge Training Instructions
+This guide is for training a small model against MolForge without teaching it to exploit the environment.
+## 1. Safety Defaults
+MolForge now hides true internal molecule properties from public `state()` metadata by default. If you need to debug the environment manually, use:
+```bash
+MOLFORGE_DEBUG_STATE=1 python inference.py
+```
+Do not use `MOLFORGE_DEBUG_STATE=1` while collecting SFT data or running RL.
+The chemistry oracle path uses RDKit descriptors by default and TDC molecule oracles when `pytdc` is available. TDC is kept as an optional extra because current PyTDC releases pull a large platform-sensitive ML stack; install it with `uv sync --extra tdc` on a compatible Python if you want TDC SA/QED oracles active. RDKit remains active in the default Docker/HF deployment, and the environment records the active backend in observation metadata.
+The default reward mode is `assay_gated`, which gives coarse edit feedback and leaves the strongest quality signal to assays and terminal graders. For early RL warmup, use the curriculum reward mode:
+```bash
+MOLFORGE_REWARD_MODE=curriculum python inference.py
+```
+Curriculum mode keeps the official `submission_score` strict, but gives bounded
+training reward for useful evidence collection, evidence-supported submit
+decisions, and non-submitted near-miss episodes. If the model reaches a strong
+evidence package and still fails to submit before the deadline, curriculum mode
+adds a small missed-nomination penalty. This prevents small models from seeing
+only zero terminal scores while they are still learning when to submit, without
+letting endless assay collection become the best behavior. Use this for initial
+GRPO curves, then switch back to `assay_gated` for final evaluation.
+For curriculum experiments only, you can also restore the older dense edit reward:
+```bash
+MOLFORGE_REWARD_MODE=dense python inference.py
+```
+Use randomized training episodes when collecting data or training a policy:
+```bash
+MOLFORGE_TRAINING_RANDOMIZATION=1 MOLFORGE_RANDOM_SEED=42 python inference.py
+```
+Keep randomization off for judge-facing baseline runs so scores remain reproducible.
+## 2. Recommended Training Plan
+Use a two-stage plan:
+1. Small SFT warm start
+2. RL with verifiable rewards
+SFT is only for teaching the model the action schema and basic workflow. RL should do the real environment optimization.
+## 3. What SFT Should Teach
+Include these example types:
+- Valid JSON action formatting
+- Correct `acting_role` for each action
+- Short `rationale` values that explain the decision without chain-of-thought
+- `evidence` lists that cite visible observation facts only
+- `expected_effects` dictionaries with directional predictions, not hidden scores
+- Specialist message bundles with proposal, approval, objection, assay request, or rejection
+- Running cheap/necessary assays before risky submissions
+- Editing toward safer fragments when toxicity risk is visible
+- Restarting early in the hard sunk-cost scenario
+- Submitting only when evidence covers the task constraints
+- Handling noisy assay estimates without undoing a high-confidence final candidate at the last moment
+- Recovering from low budget by choosing small actions or stopping
+Avoid these example types:
+- Any example that reads `state.metadata.debug_hidden_properties`
+- Any answer that mentions exact hidden objective deltas
+- Hidden chain-of-thought or long private reasoning transcripts
+- Repetitive message spam just to collect coordination reward
+- Premature submit actions without potency/safety evidence
+- Examples where missing specialist messages are silently repaired by the runner
+## 4. Generate a Starter SFT Dataset
+For the first schema warm start, use the strict curriculum dataset. It includes
+explicit JSON `null` fields, only the intended top-level action keys, all action
+types, all assay tools, all edit subtypes, and valid role/message permissions:
+```bash
+python scripts/generate_sft_schema_strict_dataset.py \
+  --episodes 75 \
+  --output data/molforge_sft_schema_strict.jsonl
+python scripts/validate_sft_traces.py data/molforge_sft_schema_strict.jsonl
+```
+Use this file first for Qwen 2B-class SFT:
+```text
+data/molforge_sft_schema_strict.jsonl
+```
+The older trace generator is still useful after the model learns the exact
+schema, because it provides more policy-like trajectories:
+Run:
+```bash
+python scripts/generate_sft_traces.py --episodes 80 --output data/molforge_sft_traces.jsonl
+```
+For a more robust dataset:
+```bash
+python scripts/generate_sft_traces.py \
+  --episodes 200 \
+  --randomized \
+  --output data/molforge_sft_traces_randomized.jsonl
+```
+The generated records use chat-style JSONL:
+```json
+{"messages":[{"role":"system","content":"..."},{"role":"user","content":"..."},{"role":"assistant","content":"..."}]}
+```
+Before training, spot-check the JSONL:
+```bash
+python - <<'PY'
+import json
+from pathlib import Path
+path = Path("data/molforge_sft_traces.jsonl")
+for i, line in zip(range(3), path.open()):
+    item = json.loads(line)
+    print(i, item["metadata"], item["messages"][-1]["content"][:300])
+PY
+```
+## 5. SFT Settings
+Start small:
+- Dataset size: 200 to 1,000 action examples
+- Max sequence length: 2,048 or 4,096
+- LoRA rank: 16 or 32
+- Learning rate: `1e-4` to `2e-4`
+- Epochs: 1 to 3
+- Target modules: attention and MLP projection layers
+- Save LoRA adapters first; test them before merging
+Stop SFT once the model reliably emits valid `MolForgeAction` JSON. Do not overfit it into copying one fixed heuristic path.
+## 6. RL Stage
+After SFT, run RL/GRPO with MolForge as the verifier environment.
+Use these environment settings:
+```bash
+export MOLFORGE_TRAINING_RANDOMIZATION=1
+export MOLFORGE_REWARD_MODE=curriculum
+unset MOLFORGE_DEBUG_STATE
+```
+Once the model starts submitting valid candidates, run a second RL/evaluation
+phase with:
+```bash
+export MOLFORGE_REWARD_MODE=assay_gated
+```
+Report both curves if possible:
+- curriculum reward curve for early learning progress;
+- strict terminal `submission_score` before/after for judge-facing task success.
+Track these metrics separately:
+- Average terminal `submission_score`
+- Average terminal `candidate_score`
+- Average terminal `budget_score`
+- Budget remaining at valid submit
+- Invalid action rate
+- Policy veto rate
+- Budget exhaustion rate
+- Repeated assay count
+- Loop penalty count
+- Coordination score
+- Evidence score
+- Submitted-without-evidence count
+- Constraint margin score
+- Number of actions before submit
+Inspect generations every few hundred updates. A rising reward is not enough if the model learns to spam messages, submit without evidence, or memorize the three default scenarios.
+## 7. Evaluation Protocol
+Use three evaluations:
+1. Deterministic public tasks
+   Run with randomization off and compare to `python inference.py`.
+2. Randomized training tasks
+   Run with `MOLFORGE_TRAINING_RANDOMIZATION=1`.
+3. Holdout tasks
+   Add new scenario configs or fragment perturbations not present in SFT traces.
+A trained model should improve terminal submission score while keeping invalid actions and evidence-free submissions low.
+For the full testing protocol, including how to compare curriculum reward
+against strict evaluation, see [EVALUATION_PROTOCOL.md](EVALUATION_PROTOCOL.md).
+## 8. Model Choice
+Recommended starting point:
+- `unsloth/Qwen3.5-2B` for the lightest serious iteration loop
+- `unsloth/Qwen3-4B-Instruct-2507` if you can afford a little more VRAM and want stronger JSON/tool following
+Why:
+- Qwen3.5 has 0.8B, 2B, and 4B Unsloth fine-tuning support.
+- The 2B class should be fast enough for repeated MolForge SFT/RL experiments.
+- The 4B class is still lightweight, but should be more reliable for structured action generation.
+Use `Qwen3.5-0.8B` only for plumbing tests. It is useful to verify the training loop, but likely too weak to judge the environment.
+If you have more GPU budget:
+- `unsloth/Qwen3-8B` or a current Qwen3/Qwen3.5 8B-class instruct model
+If you specifically want alternate-family baselines:
+- `unsloth/Llama-3.1-8B-Instruct`
+- Gemma 3/4 small instruct models can be tested, but prefer Qwen first because the current Unsloth Qwen3.5 fine-tuning path is clearer for 2B/4B RL iteration.
+For the hackathon, prefer faster iteration over maximum model size. A clean 4B model trained well against this environment is more useful than a larger model that only runs a few noisy experiments.
+## 9. Honest Inference Reporting
+`inference.py` has no heuristic fallback. It requires a configured model and exits with an error if the model is missing, times out, or emits unparsable action JSON.
+`local_inference.py` also has no heuristic policy fallback and does not patch missing team messages into model outputs. If a model omits reviewer communication, that weakness should appear as missing-review penalties and a lower `coordination_score`.
+For real model evaluation, run:
+```bash
+API_BASE_URL=https://router.huggingface.co/v1 \
+MODEL_NAME=your-model \
+HF_TOKEN=your-token \
+python inference.py
+```
+Use the deterministic trace policy only for SFT data generation, not for reporting model scores.

__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+"""MolForge OpenEnv package exports."""
+from .client import MolForgeEnv
+from .models import MolForgeAction, MolForgeObservation, MolForgeState
+__all__ = [
+    "MolForgeAction",
+    "MolForgeEnv",
+    "MolForgeObservation",
+    "MolForgeState",
+]

client.py ADDED Viewed

	@@ -0,0 +1,31 @@

+"""Synchronous and async client for the MolForge environment."""
+from __future__ import annotations
+from typing import Dict
+from openenv.core import EnvClient
+from openenv.core.client_types import StepResult
+from .models import MolForgeAction, MolForgeObservation, MolForgeState
+class MolForgeEnv(EnvClient[MolForgeAction, MolForgeObservation, MolForgeState]):
+    """OpenEnv client for the MolForge environment."""
+    def _step_payload(self, action: MolForgeAction) -> Dict:
+        return action.model_dump(exclude_none=True)
+    def _parse_result(self, payload: Dict) -> StepResult[MolForgeObservation]:
+        obs_data = dict(payload.get("observation", payload))
+        obs_data["done"] = payload.get("done", obs_data.get("done", False))
+        obs_data["reward"] = payload.get("reward", obs_data.get("reward"))
+        observation = MolForgeObservation(**obs_data)
+        return StepResult(
+            observation=observation,
+            reward=payload.get("reward"),
+            done=payload.get("done", False),
+        )
+    def _parse_state(self, payload: Dict) -> MolForgeState:
+        return MolForgeState(**payload)

inference.py ADDED Viewed

	@@ -0,0 +1,209 @@

+"""Judge-facing baseline inference script for MolForge."""
+from __future__ import annotations
+import json
+import os
+from typing import Any, Optional, cast
+from openai import OpenAI
+from inference_common import (
+    COMPACT_SYSTEM_PROMPT,
+    SYSTEM_PROMPT,
+    build_model_payload,
+    extract_json,
+)
+try:
+    from molforge.models import MolForgeAction, MolForgeObservation
+    from molforge.server.molforge_environment import MolForgeEnvironment
+except ImportError:
+    from models import MolForgeAction, MolForgeObservation
+    from server.molforge_environment import MolForgeEnvironment
+API_BASE_URL = os.getenv("API_BASE_URL")
+MODEL_NAME = os.getenv("MODEL_NAME")
+API_KEY = os.getenv("API_KEY") or os.getenv("HF_TOKEN")
+MAX_TURNS = 10
+MODEL_TIMEOUT_S = float(os.getenv("MODEL_TIMEOUT_S", "35"))
+MODEL_LONG_TIMEOUT_S = float(os.getenv("MODEL_LONG_TIMEOUT_S", "45"))
+MODEL_RETRY_TIMEOUT_S = float(os.getenv("MODEL_RETRY_TIMEOUT_S", "15"))
+MODEL_MAX_TOKENS = int(os.getenv("MODEL_MAX_TOKENS", "220"))
+MIN_REPORTED_SCORE = 1e-6
+MAX_REPORTED_SCORE = 1.0 - 1e-6
+def main() -> None:
+    env = MolForgeEnvironment()
+    if not API_BASE_URL or not MODEL_NAME or not API_KEY:
+        raise RuntimeError(
+            "API_BASE_URL, MODEL_NAME, and API_KEY or HF_TOKEN are required. "
+            "No heuristic fallback is available."
+        )
+    client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
+    scores = []
+    raw_final_scores = []
+    submission_scores = []
+    progress_scores = []
+    model_action_count = 0
+    for episode_index in range(3):
+        observation = env.reset()
+        task_name = observation.scenario_id
+        episode_error = ""
+        print(
+            f"[START] task={task_name} difficulty={observation.difficulty} episode={episode_index + 1}",
+            flush=True,
+        )
+        for _ in range(MAX_TURNS):
+            if observation.done:
+                break
+            try:
+                action = choose_action(client, observation)
+                model_action_count += 1
+                observation = env.step(action)
+            except Exception as exc:
+                episode_error = f"{exc.__class__.__name__}:{exc}"
+                print(
+                    f"[STEP] task={task_name} step={observation.step_index + 1} "
+                    f"reward=0.000000 action=model_error status=failed",
+                    flush=True,
+                )
+                break
+            print(
+                f"[STEP] task={task_name} step={observation.step_index} "
+                f"reward={observation.reward:.6f} action={action.action_type} "
+                f"actor={action.acting_role} status={observation.governance.status}",
+                flush=True,
+            )
+            if observation.done:
+                break
+        grader_scores = observation.metadata.get("terminal_grader_scores", {})
+        raw_final_score = float(grader_scores.get("final_score", grader_scores.get("submission_score", 0.0)))
+        final_score = reportable_score(raw_final_score)
+        submission_score = float(grader_scores.get("submission_score", 0.0))
+        progress_score = float(grader_scores.get("progress_score", 0.0))
+        scores.append(final_score)
+        raw_final_scores.append(raw_final_score)
+        submission_scores.append(submission_score)
+        progress_scores.append(progress_score)
+        end_line = (
+            f"[END] task={task_name} score={final_score:.6f} raw_score={raw_final_score:.6f} "
+            f"submission_score={submission_score:.6f} progress_score={progress_score:.6f} "
+            f"steps={observation.step_index}"
+        )
+        if episode_error:
+            end_line += f" error={json.dumps(episode_error)}"
+        print(end_line, flush=True)
+        if observation.report_card:
+            print(observation.report_card, flush=True)
+    average = sum(scores) / len(scores)
+    average_progress = sum(progress_scores) / len(progress_scores)
+    summary = {
+        "scores": scores,
+        "raw_final_scores": raw_final_scores,
+        "average_final_score": round(reportable_score(average), 6),
+        "submission_scores": submission_scores,
+        "average_submission_score": round(sum(submission_scores) / len(submission_scores), 4),
+        "progress_scores": progress_scores,
+        "average_progress_score": round(average_progress, 4),
+        "model_action_count": model_action_count,
+        "model_name": MODEL_NAME,
+        "api_base_url": API_BASE_URL,
+        "fallback_enabled": False,
+    }
+    print("[SUMMARY] " + json.dumps(summary, separators=(",", ":")), flush=True)
+def reportable_score(score: float) -> float:
+    """Validator-facing scores must be strictly between 0 and 1."""
+    if score <= 0.0:
+        return MIN_REPORTED_SCORE
+    if score >= 1.0:
+        return MAX_REPORTED_SCORE
+    return score
+def choose_action(client: OpenAI, observation: MolForgeObservation) -> MolForgeAction:
+    """Use the model and fail loudly when it cannot produce a valid action."""
+    action, error = ask_model(client, observation)
+    if action is None:
+        raise RuntimeError(f"Model action failed: {error}")
+    return action
+def ask_model(client: OpenAI, observation: MolForgeObservation) -> tuple[Optional[MolForgeAction], str]:
+    """Request a structured team action from the model and parse it safely."""
+    errors = []
+    try:
+        full_payload = build_model_payload(observation, compact=False)
+        timeout_s = model_timeout_for_step(observation)
+        data = request_action_json(
+            client=client,
+            system_prompt=SYSTEM_PROMPT,
+            user_payload=full_payload,
+            timeout_s=timeout_s,
+        )
+        return MolForgeAction(**data), ""
+    except Exception as exc:
+        errors.append(f"full_prompt:{exc.__class__.__name__}:{exc}")
+        try:
+            compact_payload = build_model_payload(observation, compact=True)
+            data = request_action_json(
+                client=client,
+                system_prompt=COMPACT_SYSTEM_PROMPT,
+                user_payload=compact_payload,
+                timeout_s=MODEL_RETRY_TIMEOUT_S,
+            )
+            return MolForgeAction(**data), ""
+        except Exception as retry_exc:
+            errors.append(f"compact_prompt:{retry_exc.__class__.__name__}:{retry_exc}")
+            return None, " | ".join(errors)
+def request_action_json(
+    *,
+    client: OpenAI,
+    system_prompt: str,
+    user_payload: dict[str, Any],
+    timeout_s: float,
+) -> dict[str, Any]:
+    """Call the remote model with a bounded timeout and parse a JSON action."""
+    configured_client = client.with_options(timeout=timeout_s)
+    completion = configured_client.chat.completions.create(
+        model=MODEL_NAME,
+        temperature=0.0,
+        max_tokens=MODEL_MAX_TOKENS,
+        messages=[
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": json.dumps(user_payload, indent=2)},
+        ],
+    )
+    message_content = completion.choices[0].message.content
+    if isinstance(message_content, list):
+        text = "".join(part.get("text", "") for part in cast(list[dict[str, Any]], message_content))
+    else:
+        text = message_content or ""
+    return extract_json(text)
+def model_timeout_for_step(observation: MolForgeObservation) -> float:
+    """Allow more time for high-value late-stage decisions without making every step unbounded."""
+    if observation.difficulty == "hard":
+        return MODEL_LONG_TIMEOUT_S
+    if observation.step_index >= observation.max_steps - 2:
+        return MODEL_LONG_TIMEOUT_S
+    return MODEL_TIMEOUT_S
+if __name__ == "__main__":
+    main()

inference_common.py ADDED Viewed

	@@ -0,0 +1,831 @@

+"""Shared inference helpers for MolForge judge/local runners."""
+from __future__ import annotations
+import json
+from typing import Any, Dict, Optional
+try:
+    from molforge.models import AgentMessage, MolForgeAction, MolForgeObservation
+except ImportError:
+    from models import AgentMessage, MolForgeAction, MolForgeObservation
+SYSTEM_PROMPT = """
+You control the MolForge specialist team.
+Return exactly one JSON object matching this schema.
+The top-level "action_type" must be one of exactly:
+["edit", "run_assay", "submit", "restart", "defer"].
+Never use "proposal", "approval", "objection", "risk_flag", "assay_request",
+"rejection", or "submission_recommendation" as the top-level action_type.
+Those words are only valid inside messages[].message_type.
+{
+  "action_type": "edit" | "run_assay" | "submit" | "restart" | "defer",
+  "acting_role": "lead_chemist" | "assay_planner",
+  "edit_type": "add_fragment" | "substitute" | "remove" | "undo_last_edit" | null,
+  "slot": "warhead" | "hinge" | "solvent_tail" | "back_pocket" | null,
+  "fragment": string | null,
+  "tool_name": "evaluate_properties" | "dock_target" | "assay_toxicity" | "estimate_synthesizability" | "evaluate_novelty" | "search_literature" | "run_md_simulation" | null,
+  "rationale": string,
+  "evidence": [string],
+  "expected_effects": {
+    "potency": "up" | "down" | "neutral" | "unknown" | "not_applicable",
+    "toxicity": "up" | "down" | "neutral" | "unknown" | "not_applicable",
+    "synth": "up" | "down" | "neutral" | "unknown" | "not_applicable",
+    "novelty": "up" | "down" | "neutral" | "unknown" | "not_applicable",
+    "budget": "up" | "down" | "neutral" | "unknown" | "not_applicable"
+  },
+  "messages": [
+    {
+      "sender": "lead_chemist" | "toxicologist" | "assay_planner" | "process_chemist",
+      "message_type": "proposal" | "approval" | "objection" | "risk_flag" | "assay_request" | "rejection" | "submission_recommendation",
+      "severity": "low" | "medium" | "high" | "critical",
+      "summary": string,
+      "payload": object
+    }
+  ]
+}
+Required top-level keys only:
+action_type, acting_role, edit_type, slot, fragment, tool_name, rationale,
+evidence, expected_effects, messages.
+Do not output wrapper keys such as action, role, message_status,
+message_payload, sender_role, or explanation_reason.
+Use JSON null for unused optional fields.
+Use structured specialist messages. Keep rationale short. Evidence must cite only visible observation facts. Expected effects are directional predictions, not hidden scores. Prefer cheap informative assays early, respect safety evidence, and do not submit without adequate support.
+Critical role rules:
+- lead_chemist may send only proposal, revision_request, or submission_recommendation.
+- assay_planner may send proposal, approval, rejection, assay_request, or submission_recommendation.
+- toxicologist may send approval, objection, risk_flag, assay_request, or rejection.
+- process_chemist may send approval, objection, risk_flag, or assay_request.
+- The acting_role should include a proposal message inside messages[].
+- Do not use lead_chemist approval messages.
+- Do not use toxicologist proposal messages.
+- For run_assay, acting_role must be assay_planner. For edit, submit, restart, or defer, acting_role must be lead_chemist.
+""".strip()
+COMPACT_SYSTEM_PROMPT = """
+Return one concise JSON team action only.
+Do not explain.
+Top-level action_type must be edit, run_assay, submit, restart, or defer.
+Never use proposal as action_type; proposal is only a message_type.
+Use only the required MolForgeAction top-level keys.
+Prioritize finishing the current task with the smallest valid action bundle.
+Respect role/message permissions exactly. Never output string "null"; use JSON null.
+""".strip()
+def heuristic_team_action(observation: MolForgeObservation) -> MolForgeAction:
+    candidate = select_candidate_action(observation)
+    attach_reasoning_fields(observation, candidate)
+    return attach_team_messages(observation, candidate)
+def attach_reasoning_fields(
+    observation: MolForgeObservation,
+    action: MolForgeAction,
+) -> MolForgeAction:
+    action.evidence = build_action_evidence(observation, action)
+    action.expected_effects = build_expected_effects(observation, action)
+    return action
+def select_candidate_action(observation: MolForgeObservation) -> MolForgeAction:
+    current = current_fragments(observation)
+    known_potency = known_estimate(observation, "potency")
+    known_toxicity = known_estimate(observation, "toxicity")
+    known_synth = known_estimate(observation, "synth")
+    potency_threshold = threshold_value(observation, "potency_min")
+    toxicity_threshold = threshold_value(observation, "toxicity_max")
+    synth_threshold = threshold_value(observation, "synth_min")
+    current_assay_props = current_property_names(observation)
+    required_evidence = ["potency", "toxicity"] + (["synth"] if synth_threshold is not None else [])
+    has_required_evidence = all(prop in current_assay_props for prop in required_evidence)
+    constraints_known_pass = constraints_pass_from_visible_evidence(observation)
+    post_shift_potency_ready = hard_post_shift_potency_ready(observation)
+    if has_required_evidence and post_shift_potency_ready and (
+        constraints_known_pass
+        or on_planned_final_candidate(observation, current)
+        or observation.step_index >= observation.max_steps - 1
+    ):
+        return MolForgeAction(
+            action_type="submit",
+            acting_role="lead_chemist",
+            rationale="Current assay evidence covers potency, toxicity, and feasibility constraints, so the team should submit before spending more budget.",
+        )
+    if (
+        observation.scenario_id == "level_2_hard"
+        and current["warhead"] != "nitrile"
+        and observation.remaining_budget >= 350
+    ):
+        return MolForgeAction(
+            action_type="restart",
+            acting_role="lead_chemist",
+            rationale="The starting series is a known trap under the resistance shift; restart before spending assay budget.",
+        )
+    target_edit = planned_fragment_edit(observation, current)
+    if target_edit is not None:
+        slot, fragment, rationale = target_edit
+        return MolForgeAction(
+            action_type="edit",
+            acting_role="lead_chemist",
+            edit_type="substitute",
+            slot=slot,  # type: ignore[arg-type]
+            fragment=fragment,
+            rationale=rationale,
+        )
+    if (
+        observation.scenario_id == "level_2_hard"
+        and not post_shift_potency_ready
+        and observation.step_index < 3
+    ):
+        if known_toxicity is None and observation.remaining_budget >= 2000:
+            return MolForgeAction(
+                action_type="run_assay",
+                acting_role="assay_planner",
+                tool_name="assay_toxicity",
+                rationale="Use the pre-shift turns to lock down direct toxicity evidence on the restart scaffold.",
+            )
+        if known_synth is None and observation.remaining_budget >= 120:
+            return MolForgeAction(
+                action_type="run_assay",
+                acting_role="assay_planner",
+                tool_name="estimate_synthesizability",
+                rationale="Confirm route feasibility before the target mutation changes the potency readout.",
+            )
+    if known_toxicity is None and observation.remaining_budget >= 2000:
+        return MolForgeAction(
+            action_type="run_assay",
+            acting_role="assay_planner",
+            tool_name="assay_toxicity",
+            rationale="The current candidate needs direct toxicity evidence before it can be submitted.",
+        )
+    if (
+        synth_threshold is not None
+        and known_synth is None
+        and observation.remaining_budget >= 120
+    ):
+        return MolForgeAction(
+            action_type="run_assay",
+            acting_role="assay_planner",
+            tool_name="estimate_synthesizability",
+            rationale="The current candidate needs explicit synthesizability evidence before submission.",
+        )
+    if (
+        known_potency is None
+        and observation.remaining_budget >= 300
+        and can_collect_potency_now(observation)
+    ):
+        return MolForgeAction(
+            action_type="run_assay",
+            acting_role="assay_planner",
+            tool_name="dock_target",
+            rationale="The final decision needs a direct potency readout on the current molecule.",
+        )
+    if is_safety_risky(current, known_toxicity, toxicity_threshold):
+        for slot, fragment, rationale in [
+            ("solvent_tail", "morpholine", "Morpholine typically lowers safety risk while keeping the molecule tractable."),
+            ("back_pocket", "cyano", "Cyano is a safer back-pocket handle than a strongly lipophilic group."),
+            ("warhead", "reversible_cyanoacrylamide", "A softer warhead can preserve potency while reducing reactivity risk."),
+            ("hinge", "azaindole", "Azaindole can recover potency after safer peripheral edits."),
+        ]:
+            if current[slot] != fragment:
+                return MolForgeAction(
+                    action_type="edit",
+                    acting_role="lead_chemist",
+                    edit_type="substitute",
+                    slot=slot,  # type: ignore[arg-type]
+                    fragment=fragment,
+                    rationale=rationale,
+                )
+    if potency_threshold is not None and (known_potency is None or known_potency < potency_threshold):
+        preferred_warhead = "nitrile" if observation.scenario_id == "level_2_hard" else "acrylamide"
+        for slot, fragment, rationale in [
+            ("hinge", "azaindole", "Azaindole is the strongest potency-oriented hinge in this library."),
+            ("back_pocket", "cyano", "Cyano improves potency more safely than heavy lipophilic groups."),
+            ("warhead", preferred_warhead, "The warhead should align with the current target context."),
+        ]:
+            if current[slot] != fragment:
+                return MolForgeAction(
+                    action_type="edit",
+                    acting_role="lead_chemist",
+                    edit_type="substitute",
+                    slot=slot,  # type: ignore[arg-type]
+                    fragment=fragment,
+                    rationale=rationale,
+                )
+    if (
+        known_potency is None
+        and observation.remaining_budget >= 50
+        and not has_assay_tool(observation, "evaluate_properties")
+    ):
+        return MolForgeAction(
+            action_type="run_assay",
+            acting_role="assay_planner",
+            tool_name="evaluate_properties",
+            rationale="Use the cheap property panel to cover any remaining potency evidence gap.",
+        )
+    if known_potency is None and observation.remaining_budget >= 300:
+        return MolForgeAction(
+            action_type="run_assay",
+            acting_role="assay_planner",
+            tool_name="dock_target",
+            rationale="Potency is still under-characterized, so the team wants a more direct binding readout.",
+        )
+    if (
+        observation.scenario_id == "level_2_hard"
+        and has_required_evidence
+        and not post_shift_potency_ready
+        and observation.remaining_budget >= 300
+    ):
+        return MolForgeAction(
+            action_type="run_assay",
+            acting_role="assay_planner",
+            tool_name="dock_target",
+            rationale="The hard scenario requires post-mutation potency evidence for the submitted molecule.",
+        )
+    if synth_threshold is not None and known_synth is not None and known_synth < synth_threshold:
+        for slot, fragment, rationale in [
+            ("hinge", "pyridine", "Simplifying the hinge improves synthetic tractability."),
+            ("back_pocket", "methoxy", "A smaller back-pocket group reduces route burden."),
+        ]:
+            if current[slot] != fragment:
+                return MolForgeAction(
+                    action_type="edit",
+                    acting_role="lead_chemist",
+                    edit_type="substitute",
+                    slot=slot,  # type: ignore[arg-type]
+                    fragment=fragment,
+                    rationale=rationale,
+                )
+    if has_required_evidence and (post_shift_potency_ready or observation.step_index >= observation.max_steps - 1):
+        return MolForgeAction(
+            action_type="submit",
+            acting_role="lead_chemist",
+            rationale="The episode horizon is nearly exhausted and current evidence is available, so the team should submit.",
+        )
+    if observation.remaining_budget >= 100:
+        return MolForgeAction(
+            action_type="run_assay",
+            acting_role="assay_planner",
+            tool_name="search_literature",
+            rationale="The team needs additional qualitative signal before making the next irreversible move.",
+        )
+    return MolForgeAction(
+        action_type="defer",
+        acting_role="lead_chemist",
+        rationale="No high-confidence move remains under the current budget.",
+    )
+def attach_team_messages(
+    observation: MolForgeObservation,
+    action: MolForgeAction,
+) -> MolForgeAction:
+    messages = [
+        AgentMessage(
+            sender=action.acting_role,
+            message_type="proposal",
+            severity="medium",
+            summary=proposal_summary(action),
+            payload=proposal_payload(action),
+        )
+    ]
+    current = current_fragments(observation)
+    known_potency = known_estimate(observation, "potency")
+    known_toxicity = known_estimate(observation, "toxicity")
+    known_synth = known_estimate(observation, "synth")
+    toxicity_threshold = threshold_value(observation, "toxicity_max")
+    synth_threshold = threshold_value(observation, "synth_min")
+    if action.action_type == "run_assay":
+        messages.append(
+            AgentMessage(
+                sender="toxicologist",
+                message_type="approval",
+                severity="medium",
+                summary="Fresh assay evidence improves safety oversight.",
+            )
+        )
+        if action.acting_role != "assay_planner":
+            messages.append(
+                AgentMessage(
+                    sender="assay_planner",
+                    message_type="approval",
+                    severity="medium",
+                    summary="This assay is budget-efficient for the current evidence gap.",
+                )
+            )
+        if "process_chemist" in observation.enabled_roles and len(messages) < 4:
+            messages.append(
+                AgentMessage(
+                    sender="process_chemist",
+                    message_type="approval",
+                    severity="low",
+                    summary="Additional evidence now will reduce late-stage feasibility surprises.",
+                )
+            )
+    elif action.action_type == "restart":
+        messages.extend(
+            [
+                AgentMessage(
+                    sender="toxicologist",
+                    message_type="approval",
+                    severity="high",
+                    summary="Restarting moves away from the current scaffold safety liabilities.",
+                ),
+                AgentMessage(
+                    sender="assay_planner",
+                    message_type="approval",
+                    severity="high",
+                    summary="Restarting now is cheaper than polishing a doomed series.",
+                ),
+            ]
+        )
+        if "process_chemist" in observation.enabled_roles and len(messages) < 4:
+            messages.append(
+                AgentMessage(
+                    sender="process_chemist",
+                    message_type="approval",
+                    severity="medium",
+                    summary="The alternate scaffold family is more tractable to make.",
+                )
+            )
+    elif action.action_type == "submit":
+        tox_message_type = "approval"
+        tox_summary = "Visible evidence supports a safe-enough submission."
+        if known_toxicity is None:
+            tox_message_type = "assay_request"
+            tox_summary = "Submission should wait until toxicity has been assayed."
+        elif toxicity_threshold is not None and known_toxicity > toxicity_threshold:
+            tox_message_type = "objection"
+            tox_summary = "Visible toxicity evidence is still above the submission threshold."
+        messages.append(
+            AgentMessage(
+                sender="toxicologist",
+                message_type=tox_message_type,
+                severity="high" if tox_message_type != "approval" else "medium",
+                summary=tox_summary,
+            )
+        )
+        messages.append(
+            AgentMessage(
+                sender="assay_planner",
+                message_type=(
+                    "approval"
+                    if tox_message_type == "approval"
+                    and known_potency is not None
+                    and (synth_threshold is None or known_synth is not None)
+                    else "assay_request"
+                ),
+                severity="medium",
+                summary=(
+                    "The team has enough evidence to submit."
+                    if tox_message_type == "approval"
+                    and known_potency is not None
+                    and (synth_threshold is None or known_synth is not None)
+                    else "More evidence is needed before budget should be spent on submission."
+                ),
+            )
+        )
+        if "process_chemist" in observation.enabled_roles and len(messages) < 4:
+            if known_synth is None and synth_threshold is not None:
+                process_message_type = "assay_request"
+                process_summary = "Submission should wait for explicit route feasibility evidence."
+            elif synth_threshold is not None and known_synth is not None and known_synth < synth_threshold:
+                process_message_type = "objection"
+                process_summary = "Submission is premature because the route still looks too fragile."
+            else:
+                process_message_type = "approval"
+                process_summary = "Current route risk looks acceptable for submission."
+            messages.append(
+                AgentMessage(
+                    sender="process_chemist",
+                    message_type=process_message_type,
+                    severity="medium",
+                    summary=process_summary,
+                )
+            )
+    elif action.action_type == "edit":
+        safer_edit = is_safer_edit(current, action, known_toxicity, toxicity_threshold)
+        messages.append(
+            AgentMessage(
+                sender="toxicologist",
+                message_type="approval" if safer_edit else "risk_flag",
+                severity="medium",
+                summary=(
+                    "This edit is directionally safer than the current fragment choice."
+                    if safer_edit
+                    else "This edit could carry additional safety pressure."
+                ),
+            )
+        )
+        messages.append(
+            AgentMessage(
+                sender="assay_planner",
+                message_type="approval",
+                severity="low",
+                summary="The edit is cheap enough to try before another expensive assay.",
+            )
+        )
+        if "process_chemist" in observation.enabled_roles and len(messages) < 4:
+            route_risk = action.slot == "hinge" and action.fragment == "quinazoline"
+            messages.append(
+                AgentMessage(
+                    sender="process_chemist",
+                    message_type="approval" if not route_risk else "objection",
+                    severity="low" if not route_risk else "medium",
+                    summary=(
+                        "The route impact looks manageable."
+                        if not route_risk
+                        else "This edit worsens route complexity more than I like."
+                    ),
+                )
+            )
+    action.messages = messages[:4]
+    return action
+def proposal_summary(action: MolForgeAction) -> str:
+    if action.action_type == "edit":
+        return f"Propose {action.edit_type} on {action.slot} to {action.fragment}."
+    if action.action_type == "run_assay":
+        return f"Propose running {action.tool_name}."
+    if action.action_type == "restart":
+        return "Propose abandoning the current scaffold and restarting."
+    if action.action_type == "submit":
+        return "Propose submitting the current candidate."
+    return "Propose holding the current state."
+def proposal_payload(action: MolForgeAction) -> Dict[str, Any]:
+    payload = {"action_type": action.action_type}
+    if action.slot:
+        payload["slot"] = action.slot
+    if action.fragment:
+        payload["fragment"] = action.fragment
+    if action.tool_name:
+        payload["tool_name"] = action.tool_name
+    return payload
+def build_action_evidence(
+    observation: MolForgeObservation,
+    action: MolForgeAction,
+) -> list[str]:
+    evidence = [
+        f"scenario={observation.scenario_id}",
+        f"budget={observation.remaining_budget}/{observation.max_budget}",
+        f"step={observation.step_index}/{observation.max_steps}",
+    ]
+    current = current_fragments(observation)
+    known_props = [
+        f"{name}={value:.3f}"
+        for name, value in observation.visible_metrics.items()
+        if name in {"potency", "toxicity", "synth", "novelty"}
+    ]
+    if known_props:
+        evidence.append("visible_metrics:" + ",".join(known_props[:3]))
+    else:
+        unknown = [
+            constraint.name
+            for constraint in observation.constraint_status
+            if constraint.evidence_status == "unknown"
+        ]
+        if unknown:
+            evidence.append("unknown_constraints:" + ",".join(unknown[:3]))
+    if action.action_type == "edit" and action.slot and action.fragment:
+        evidence.append(f"current_{action.slot}={current[action.slot]}")
+        evidence.append(f"candidate_{action.slot}={action.fragment}")
+    elif action.action_type == "run_assay" and action.tool_name:
+        gaps = [
+            constraint.name
+            for constraint in observation.constraint_status
+            if constraint.evidence_status == "unknown"
+        ]
+        evidence.append(f"tool={action.tool_name}")
+        if gaps:
+            evidence.append("evidence_gaps:" + ",".join(gaps[:3]))
+    elif action.action_type == "submit":
+        known = [
+            constraint.name
+            for constraint in observation.constraint_status
+            if constraint.evidence_status == "known"
+        ]
+        evidence.append("known_constraints:" + ",".join(known[:3]) if known else "known_constraints=none")
+    elif action.action_type == "restart":
+        evidence.append("restart_available=true")
+        evidence.append(f"current_molecule={observation.current_molecule}")
+    return evidence[:5]
+def build_expected_effects(
+    observation: MolForgeObservation,
+    action: MolForgeAction,
+) -> Dict[str, str]:
+    effects: Dict[str, str] = {
+        "potency": "unknown",
+        "toxicity": "unknown",
+        "synth": "unknown",
+        "novelty": "unknown",
+        "budget": "neutral",
+    }
+    if action.action_type == "run_assay":
+        effects.update(
+            {
+                "potency": "not_applicable",
+                "toxicity": "not_applicable",
+                "synth": "not_applicable",
+                "novelty": "not_applicable",
+                "budget": "down",
+            }
+        )
+        return effects
+    if action.action_type == "submit":
+        effects.update(
+            {
+                "potency": "not_applicable",
+                "toxicity": "not_applicable",
+                "synth": "not_applicable",
+                "novelty": "not_applicable",
+                "budget": "neutral",
+            }
+        )
+        return effects
+    if action.action_type == "restart":
+        effects.update({"toxicity": "down", "synth": "up", "budget": "down"})
+        if observation.scenario_id == "level_2_hard":
+            effects["potency"] = "up"
+        return effects
+    if action.action_type != "edit":
+        return effects
+    fragment = action.fragment or ""
+    slot = action.slot or ""
+    if slot == "hinge" and fragment == "azaindole":
+        effects["potency"] = "up"
+    if slot == "back_pocket" and fragment == "cyano":
+        effects["potency"] = "up"
+        effects["toxicity"] = "down"
+    if slot == "back_pocket" and fragment in {"chloro", "trifluoromethyl"}:
+        effects["potency"] = "up"
+        effects["toxicity"] = "up"
+    if slot == "solvent_tail" and fragment == "morpholine":
+        effects["toxicity"] = "down"
+        effects["synth"] = "up"
+    if slot == "solvent_tail" and fragment == "dimethylamino":
+        effects["toxicity"] = "up"
+    if slot == "warhead" and fragment == "reversible_cyanoacrylamide":
+        effects["toxicity"] = "down"
+        effects["novelty"] = "up"
+    if slot == "warhead" and fragment == "nitrile":
+        effects["toxicity"] = "down"
+        if observation.scenario_id == "level_2_hard":
+            effects["potency"] = "up"
+    return effects
+def current_fragments(observation: MolForgeObservation) -> Dict[str, str]:
+    return {entry.slot: entry.fragment for entry in observation.molecule_slots}
+def known_estimate(observation: MolForgeObservation, property_name: str) -> Optional[float]:
+    current_signature = observation.current_molecule
+    for reading in reversed(observation.known_assays):
+        if reading.molecule_signature == current_signature and reading.property_name == property_name:
+            return reading.estimate
+    return None
+def current_property_names(observation: MolForgeObservation) -> set[str]:
+    current_signature = observation.current_molecule
+    return {
+        reading.property_name
+        for reading in observation.known_assays
+        if reading.molecule_signature == current_signature
+    }
+def has_assay_tool(observation: MolForgeObservation, tool_name: str) -> bool:
+    current_signature = observation.current_molecule
+    return any(
+        reading.molecule_signature == current_signature and reading.tool_name == tool_name
+        for reading in observation.known_assays
+    )
+def planned_fragment_edit(
+    observation: MolForgeObservation,
+    current: Dict[str, str],
+) -> Optional[tuple[str, str, str]]:
+    plans = {
+        "level_0_easy": [
+            ("solvent_tail", "morpholine", "Morpholine improves safety and keeps synthesis comfortably feasible."),
+            ("back_pocket", "cyano", "Cyano repairs the chloro safety liability while preserving potency."),
+            ("hinge", "azaindole", "Azaindole is needed to clear the stricter potency floor after safety is stabilized."),
+        ],
+        "level_1_medium": [
+            ("solvent_tail", "morpholine", "First remove the largest safety liability before paying for assays."),
+            ("back_pocket", "cyano", "Cyano keeps potency while avoiding the chloro safety penalty."),
+            ("hinge", "azaindole", "Azaindole recovers enough potency for the tighter medium target."),
+        ],
+    }
+    for slot, fragment, rationale in plans.get(observation.scenario_id, []):
+        if current[slot] != fragment:
+            return slot, fragment, rationale
+    return None
+def on_planned_final_candidate(
+    observation: MolForgeObservation,
+    current: Dict[str, str],
+) -> bool:
+    finals = {
+        "level_0_easy": {
+            "warhead": "acrylamide",
+            "hinge": "azaindole",
+            "solvent_tail": "morpholine",
+            "back_pocket": "cyano",
+        },
+        "level_1_medium": {
+            "warhead": "acrylamide",
+            "hinge": "azaindole",
+            "solvent_tail": "morpholine",
+            "back_pocket": "cyano",
+        },
+        "level_2_hard": {
+            "warhead": "nitrile",
+            "hinge": "azaindole",
+            "solvent_tail": "morpholine",
+            "back_pocket": "cyano",
+        },
+    }
+    return current == finals.get(observation.scenario_id, {})
+def can_collect_potency_now(observation: MolForgeObservation) -> bool:
+    return observation.scenario_id != "level_2_hard" or observation.step_index >= 3
+def hard_post_shift_potency_ready(observation: MolForgeObservation) -> bool:
+    if observation.scenario_id != "level_2_hard":
+        return True
+    current_signature = observation.current_molecule
+    return any(
+        reading.molecule_signature == current_signature
+        and reading.property_name == "potency"
+        and observation.step_index >= 4
+        for reading in observation.known_assays
+    )
+def constraints_pass_from_visible_evidence(observation: MolForgeObservation) -> bool:
+    if not observation.constraint_status:
+        return False
+    return all(
+        constraint.evidence_status == "known" and constraint.satisfied is True
+        for constraint in observation.constraint_status
+    )
+def threshold_value(observation: MolForgeObservation, constraint_name: str) -> Optional[float]:
+    for constraint in observation.constraint_status:
+        if constraint.name != constraint_name:
+            continue
+        try:
+            return float(constraint.target.split()[-1])
+        except Exception:
+            return None
+    return None
+def is_safety_risky(
+    fragments: Dict[str, str],
+    known_toxicity: Optional[float],
+    toxicity_threshold: Optional[float],
+) -> bool:
+    if known_toxicity is not None and toxicity_threshold is not None and known_toxicity > toxicity_threshold:
+        return True
+    risky_patterns = [
+        fragments["solvent_tail"] == "dimethylamino",
+        fragments["back_pocket"] == "trifluoromethyl",
+        fragments["hinge"] == "fluorophenyl" and fragments["back_pocket"] == "chloro",
+    ]
+    return any(risky_patterns)
+def is_safer_edit(
+    current: Dict[str, str],
+    action: MolForgeAction,
+    known_toxicity: Optional[float],
+    toxicity_threshold: Optional[float],
+) -> bool:
+    if action.slot == "solvent_tail" and action.fragment == "morpholine":
+        return True
+    if action.slot == "back_pocket" and action.fragment == "cyano":
+        return True
+    if action.slot == "warhead" and action.fragment == "reversible_cyanoacrylamide":
+        return True
+    if known_toxicity is not None and toxicity_threshold is not None:
+        return known_toxicity <= toxicity_threshold
+    return current["solvent_tail"] != "dimethylamino"
+def extract_json(text: str) -> Dict[str, Any]:
+    start = text.find("{")
+    end = text.rfind("}")
+    if start == -1 or end == -1 or start >= end:
+        raise ValueError("No JSON object found in model response")
+    return json.loads(text[start : end + 1])
+def build_model_payload(
+    observation: MolForgeObservation,
+    *,
+    compact: bool,
+) -> Dict[str, Any]:
+    base_payload = {
+        "valid_top_level_action_types": ["edit", "run_assay", "submit", "restart", "defer"],
+        "invalid_top_level_action_types": [
+            "proposal",
+            "approval",
+            "objection",
+            "risk_flag",
+            "assay_request",
+            "rejection",
+            "submission_recommendation",
+        ],
+        "scenario_id": observation.scenario_id,
+        "difficulty": observation.difficulty,
+        "task_brief": observation.task_brief,
+        "state_label": observation.state_label,
+        "state_path_tail": observation.state_path[-4:],
+        "current_molecule": observation.current_molecule,
+        "current_smiles": observation.metadata.get("current_smiles", ""),
+        "oracle_backend": observation.metadata.get("oracle_backend", {}),
+        "visible_metrics": observation.visible_metrics,
+        "constraint_status": [constraint.model_dump() for constraint in observation.constraint_status],
+        "governance": observation.governance.model_dump(),
+        "last_transition_summary": observation.last_transition_summary,
+        "allowed_actions": observation.allowed_actions,
+        "role_message_rules": {
+            "lead_chemist": ["proposal", "revision_request", "submission_recommendation"],
+            "assay_planner": ["proposal", "approval", "rejection", "assay_request", "submission_recommendation"],
+            "toxicologist": ["approval", "objection", "risk_flag", "assay_request", "rejection"],
+            "process_chemist": ["approval", "objection", "risk_flag", "assay_request"],
+        },
+        "remaining_budget": observation.remaining_budget,
+        "step_index": observation.step_index,
+        "max_steps": observation.max_steps,
+    }
+    if compact:
+        base_payload["known_assays"] = [
+            {
+                "tool_name": reading.tool_name,
+                "property_name": reading.property_name,
+                "estimate": reading.estimate,
+                "confidence_low": reading.confidence_low,
+                "confidence_high": reading.confidence_high,
+            }
+            for reading in observation.known_assays[-6:]
+        ]
+        base_payload["role_summaries"] = [
+            {
+                "role": role.role,
+                "local_objective": role.local_objective,
+                "key_fields": list(role.observation.keys())[:5],
+            }
+            for role in observation.role_observations
+        ]
+        return base_payload
+    base_payload["known_assays"] = [reading.model_dump() for reading in observation.known_assays]
+    base_payload["role_observations"] = [role.model_dump() for role in observation.role_observations]
+    base_payload["recent_messages"] = [message.model_dump() for message in observation.message_log[-6:]]
+    return base_payload

local_inference.py ADDED Viewed

	@@ -0,0 +1,203 @@

+"""Local-only inference runner for Ollama-backed MolForge testing.
+This script is intentionally separate from `inference.py`.
+Use `inference.py` for the judge-facing OpenAI-client baseline required by the
+hackathon. Use this file for local development against Ollama's native API,
+where reasoning models often behave better when `think` is explicitly disabled.
+"""
+from __future__ import annotations
+import json
+import os
+from typing import Any, Dict, Optional, Tuple
+import requests
+from inference_common import (
+    COMPACT_SYSTEM_PROMPT,
+    SYSTEM_PROMPT,
+    build_model_payload,
+    extract_json,
+)
+try:
+    from molforge.models import MolForgeAction, MolForgeObservation
+    from molforge.server.molforge_environment import MolForgeEnvironment
+except ImportError:
+    from models import MolForgeAction, MolForgeObservation
+    from server.molforge_environment import MolForgeEnvironment
+OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
+LOCAL_MODEL_NAME = os.getenv("LOCAL_MODEL_NAME", "gemma4:e2b")
+LOCAL_NUM_EPISODES = int(os.getenv("LOCAL_NUM_EPISODES", "3"))
+LOCAL_MAX_TURNS = int(os.getenv("LOCAL_MAX_TURNS", "10"))
+OLLAMA_TIMEOUT_S = float(os.getenv("OLLAMA_TIMEOUT_S", "240"))
+OLLAMA_RETRY_TIMEOUT_S = float(os.getenv("OLLAMA_RETRY_TIMEOUT_S", "120"))
+OLLAMA_MAX_TOKENS = int(os.getenv("OLLAMA_MAX_TOKENS", "768"))
+OLLAMA_THINK = os.getenv("OLLAMA_THINK", "false").lower() == "true"
+def main() -> None:
+    env = MolForgeEnvironment()
+    scores = []
+    submission_scores = []
+    progress_scores = []
+    print(f"Using Ollama model: {LOCAL_MODEL_NAME}", flush=True)
+    print(f"Ollama base URL: {OLLAMA_BASE_URL}", flush=True)
+    print(f"Thinking enabled: {OLLAMA_THINK}", flush=True)
+    for episode_index in range(LOCAL_NUM_EPISODES):
+        observation = env.reset()
+        print(f"\n=== Episode {episode_index + 1}: {observation.scenario_id} ===", flush=True)
+        for _ in range(LOCAL_MAX_TURNS):
+            if observation.done:
+                break
+            action, source = choose_local_action(observation)
+            observation = env.step(action)
+            print(
+                f"step={observation.step_index:02d} action={action.action_type} actor={action.acting_role} "
+                f"source={source} reward={observation.reward:+.3f} budget={observation.remaining_budget} "
+                f"governance={observation.governance.status}",
+                flush=True,
+            )
+            print(f"  {observation.last_transition_summary}", flush=True)
+            if observation.done:
+                break
+        grader_scores = observation.metadata.get("terminal_grader_scores", {})
+        final_score = float(grader_scores.get("final_score", grader_scores.get("submission_score", 0.0)))
+        submission_score = float(grader_scores.get("submission_score", 0.0))
+        progress_score = float(grader_scores.get("progress_score", 0.0))
+        scores.append(final_score)
+        submission_scores.append(submission_score)
+        progress_scores.append(progress_score)
+        print(f"final_score={final_score:.3f}", flush=True)
+        print(f"submission_score={submission_score:.3f}", flush=True)
+        print(f"progress_score={progress_score:.3f}", flush=True)
+        if observation.report_card:
+            print(observation.report_card, flush=True)
+    average = sum(scores) / len(scores)
+    average_progress = sum(progress_scores) / len(progress_scores)
+    print("\n=== Local Baseline Summary ===", flush=True)
+    print(
+        json.dumps(
+            {
+                "model": LOCAL_MODEL_NAME,
+                "scores": scores,
+                "average_final_score": round(average, 4),
+                "submission_scores": submission_scores,
+                "average_submission_score": round(sum(submission_scores) / len(submission_scores), 4),
+                "progress_scores": progress_scores,
+                "average_progress_score": round(average_progress, 4),
+            },
+            indent=2,
+        ),
+        flush=True,
+    )
+def choose_local_action(observation: MolForgeObservation) -> Tuple[MolForgeAction, str]:
+    """Use Ollama output and fail loudly if it cannot produce a valid action."""
+    action, error = ask_ollama_model(observation)
+    if action is not None:
+        return action, "model"
+    raise RuntimeError(f"Local model action failed: {error}")
+def ask_ollama_model(observation: MolForgeObservation) -> Tuple[Optional[MolForgeAction], str]:
+    """Call Ollama's native chat API.
+    Official Ollama docs note that reasoning traces live in `message.thinking`
+    while the final answer lives in `message.content`, and that `think: false`
+    can disable thinking on the native chat endpoint.
+    """
+    errors = []
+    try:
+        payload = build_model_payload(observation, compact=False)
+        response_json = ollama_chat(
+            system_prompt=SYSTEM_PROMPT,
+            user_payload=payload,
+            timeout_s=OLLAMA_TIMEOUT_S,
+        )
+        data = parse_ollama_json_response(response_json)
+        return MolForgeAction(**data), ""
+    except Exception as exc:
+        errors.append(f"full_prompt:{exc.__class__.__name__}:{exc}")
+        try:
+            payload = build_model_payload(observation, compact=True)
+            response_json = ollama_chat(
+                system_prompt=COMPACT_SYSTEM_PROMPT,
+                user_payload=payload,
+                timeout_s=OLLAMA_RETRY_TIMEOUT_S,
+            )
+            data = parse_ollama_json_response(response_json)
+            return MolForgeAction(**data), ""
+        except Exception as retry_exc:
+            errors.append(f"compact_prompt:{retry_exc.__class__.__name__}:{retry_exc}")
+            return None, " | ".join(errors)
+def ollama_chat(
+    *,
+    system_prompt: str,
+    user_payload: Dict[str, Any],
+    timeout_s: float,
+) -> Dict[str, Any]:
+    """Issue a native Ollama chat request."""
+    response = requests.post(
+        f"{OLLAMA_BASE_URL.rstrip('/')}/api/chat",
+        json={
+            "model": LOCAL_MODEL_NAME,
+            "stream": False,
+            "think": OLLAMA_THINK,
+            "format": "json",
+            "messages": [
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": json.dumps(user_payload, indent=2)},
+            ],
+            "options": {
+                "temperature": 0,
+                "num_predict": OLLAMA_MAX_TOKENS,
+            },
+        },
+        timeout=timeout_s,
+    )
+    response.raise_for_status()
+    return response.json()
+def parse_ollama_json_response(response_json: Dict[str, Any]) -> Dict[str, Any]:
+    """Extract a JSON action from a native Ollama response."""
+    message = response_json.get("message", {}) or {}
+    content = message.get("content", "") or ""
+    thinking = message.get("thinking", "") or ""
+    if content:
+        try:
+            return extract_json(content)
+        except Exception:
+            pass
+    if thinking:
+        try:
+            return extract_json(thinking)
+        except Exception:
+            pass
+    combined = f"{content}\n{thinking}".strip()
+    if combined:
+        return extract_json(combined)
+    raise ValueError("No parseable JSON action found in Ollama response")
+if __name__ == "__main__":
+    main()

lora_inference.py ADDED Viewed

	@@ -0,0 +1,244 @@

+"""Local PEFT/LoRA inference runner for MolForge.
+Use this to test an SFT adapter against the environment before RL. It loads the
+base model named in the adapter config, attaches the LoRA weights, and requires
+the model to emit a valid MolForgeAction JSON object. There is no heuristic
+fallback or schema repair.
+"""
+from __future__ import annotations
+import json
+import os
+from pathlib import Path
+from typing import Any, Dict, Optional, Tuple
+import torch
+from peft import PeftConfig, PeftModel
+from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, Qwen3_5ForConditionalGeneration
+from inference_common import (
+    COMPACT_SYSTEM_PROMPT,
+    SYSTEM_PROMPT,
+    build_model_payload,
+    extract_json,
+)
+try:
+    from molforge.models import MolForgeAction, MolForgeObservation
+    from molforge.server.molforge_environment import MolForgeEnvironment
+except ImportError:
+    from models import MolForgeAction, MolForgeObservation
+    from server.molforge_environment import MolForgeEnvironment
+ADAPTER_PATH = Path(os.getenv("LORA_ADAPTER_PATH", "qwen3_5_2b_lora_adapters"))
+LOCAL_NUM_EPISODES = int(os.getenv("LOCAL_NUM_EPISODES", "3"))
+LOCAL_MAX_TURNS = int(os.getenv("LOCAL_MAX_TURNS", "10"))
+LORA_MAX_NEW_TOKENS = int(os.getenv("LORA_MAX_NEW_TOKENS", "768"))
+LORA_RETRY_MAX_NEW_TOKENS = int(os.getenv("LORA_RETRY_MAX_NEW_TOKENS", "512"))
+LORA_DEVICE = os.getenv("LORA_DEVICE", "auto")
+def main() -> None:
+    adapter_path = ADAPTER_PATH.expanduser().resolve()
+    tokenizer, model, base_model_name, device = load_adapter_model(adapter_path)
+    env = MolForgeEnvironment()
+    scores = []
+    submission_scores = []
+    progress_scores = []
+    print(f"Using LoRA adapter: {adapter_path}", flush=True)
+    print(f"Base model: {base_model_name}", flush=True)
+    print(f"Device: {device}", flush=True)
+    for episode_index in range(LOCAL_NUM_EPISODES):
+        observation = env.reset()
+        print(f"\n=== Episode {episode_index + 1}: {observation.scenario_id} ===", flush=True)
+        for _ in range(LOCAL_MAX_TURNS):
+            if observation.done:
+                break
+            action, source = choose_lora_action(tokenizer, model, observation, device)
+            observation = env.step(action)
+            print(
+                f"step={observation.step_index:02d} action={action.action_type} actor={action.acting_role} "
+                f"source={source} reward={observation.reward:+.3f} budget={observation.remaining_budget} "
+                f"governance={observation.governance.status}",
+                flush=True,
+            )
+            print(f"  {observation.last_transition_summary}", flush=True)
+            if observation.done:
+                break
+        grader_scores = observation.metadata.get("terminal_grader_scores", {})
+        final_score = float(grader_scores.get("final_score", grader_scores.get("submission_score", 0.0)))
+        submission_score = float(grader_scores.get("submission_score", 0.0))
+        progress_score = float(grader_scores.get("progress_score", 0.0))
+        scores.append(final_score)
+        submission_scores.append(submission_score)
+        progress_scores.append(progress_score)
+        print(f"final_score={final_score:.3f}", flush=True)
+        print(f"submission_score={submission_score:.3f}", flush=True)
+        print(f"progress_score={progress_score:.3f}", flush=True)
+        if observation.report_card:
+            print(observation.report_card, flush=True)
+    average = sum(scores) / len(scores)
+    average_progress = sum(progress_scores) / len(progress_scores)
+    print("\n=== LoRA Local Summary ===", flush=True)
+    print(
+        json.dumps(
+            {
+                "adapter": str(adapter_path),
+                "base_model": base_model_name,
+                "scores": scores,
+                "average_final_score": round(average, 4),
+                "submission_scores": submission_scores,
+                "average_submission_score": round(sum(submission_scores) / len(submission_scores), 4),
+                "progress_scores": progress_scores,
+                "average_progress_score": round(average_progress, 4),
+            },
+            indent=2,
+        ),
+        flush=True,
+    )
+def load_adapter_model(adapter_path: Path):
+    config = PeftConfig.from_pretrained(adapter_path)
+    base_model_name = config.base_model_name_or_path
+    device = resolve_device()
+    dtype = torch.float16 if device in {"cuda", "mps"} else torch.float32
+    tokenizer = AutoTokenizer.from_pretrained(
+        adapter_path,
+        trust_remote_code=True,
+        use_fast=True,
+    )
+    if tokenizer.pad_token_id is None:
+        tokenizer.pad_token = tokenizer.eos_token
+    base_config = AutoConfig.from_pretrained(base_model_name, trust_remote_code=True)
+    model_class = (
+        Qwen3_5ForConditionalGeneration
+        if "Qwen3_5ForConditionalGeneration" in (base_config.architectures or [])
+        else AutoModelForCausalLM
+    )
+    base_model = model_class.from_pretrained(
+        base_model_name,
+        dtype=dtype,
+        trust_remote_code=True,
+        low_cpu_mem_usage=True,
+    )
+    model = PeftModel.from_pretrained(base_model, adapter_path)
+    model.to(device)
+    model.eval()
+    return tokenizer, model, base_model_name, device
+def resolve_device() -> str:
+    if LORA_DEVICE != "auto":
+        return LORA_DEVICE
+    if torch.cuda.is_available():
+        return "cuda"
+    if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
+        return "mps"
+    return "cpu"
+def choose_lora_action(
+    tokenizer,
+    model,
+    observation: MolForgeObservation,
+    device: str,
+) -> Tuple[MolForgeAction, str]:
+    action, error = ask_lora_model(
+        tokenizer,
+        model,
+        observation,
+        device,
+        compact=False,
+        max_new_tokens=LORA_MAX_NEW_TOKENS,
+    )
+    if action is not None:
+        return action, "lora_model"
+    retry_action, retry_error = ask_lora_model(
+        tokenizer,
+        model,
+        observation,
+        device,
+        compact=True,
+        max_new_tokens=LORA_RETRY_MAX_NEW_TOKENS,
+    )
+    if retry_action is not None:
+        return retry_action, "lora_model_compact_retry"
+    raise RuntimeError(f"LoRA model action failed: full_prompt:{error} | compact_prompt:{retry_error}")
+def ask_lora_model(
+    tokenizer,
+    model,
+    observation: MolForgeObservation,
+    device: str,
+    *,
+    compact: bool,
+    max_new_tokens: int,
+) -> Tuple[Optional[MolForgeAction], str]:
+    response_text = ""
+    try:
+        payload = build_model_payload(observation, compact=compact)
+        system_prompt = COMPACT_SYSTEM_PROMPT if compact else SYSTEM_PROMPT
+        response_text = generate_response(
+            tokenizer,
+            model,
+            device,
+            system_prompt=system_prompt,
+            user_payload=payload,
+            max_new_tokens=max_new_tokens,
+        )
+        data = extract_json(response_text)
+        return MolForgeAction(**data), ""
+    except Exception as exc:
+        snippet = response_text[:1200].replace("\n", "\\n")
+        return None, f"{exc.__class__.__name__}:{exc}; raw={snippet}"
+def generate_response(
+    tokenizer,
+    model,
+    device: str,
+    *,
+    system_prompt: str,
+    user_payload: Dict[str, Any],
+    max_new_tokens: int,
+) -> str:
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": json.dumps(user_payload, separators=(",", ":"))},
+    ]
+    prompt = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True,
+        enable_thinking=False,
+    )
+    inputs = tokenizer(prompt, return_tensors="pt").to(device)
+    with torch.inference_mode():
+        generated = model.generate(
+            **inputs,
+            do_sample=False,
+            temperature=None,
+            top_p=None,
+            max_new_tokens=max_new_tokens,
+            pad_token_id=tokenizer.pad_token_id,
+            eos_token_id=tokenizer.eos_token_id,
+        )
+    new_tokens = generated[0, inputs["input_ids"].shape[-1] :]
+    return tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
+if __name__ == "__main__":
+    main()

mlx_lora_inference.py ADDED Viewed

	@@ -0,0 +1,457 @@

+"""MLX-backed local LoRA inference runner for MolForge on Apple Silicon."""
+from __future__ import annotations
+import json
+import os
+import time
+from pathlib import Path
+from typing import Any, Dict, Optional, Tuple
+from mlx_lm import generate, load
+from mlx_lm.sample_utils import make_sampler
+from inference_common import (
+    COMPACT_SYSTEM_PROMPT,
+    SYSTEM_PROMPT,
+    attach_team_messages,
+    build_model_payload,
+    extract_json,
+)
+try:
+    from molforge.models import MolForgeAction, MolForgeObservation
+    from molforge.server.molforge_environment import MolForgeEnvironment
+except ImportError:
+    from models import MolForgeAction, MolForgeObservation
+    from server.molforge_environment import MolForgeEnvironment
+ADAPTER_PATH = Path(os.getenv("LORA_ADAPTER_PATH", "qwen3_5_2b_lora_adapters_strict"))
+BASE_MODEL_NAME = os.getenv("BASE_MODEL_NAME", "unsloth/Qwen3.5-2B")
+LOCAL_NUM_EPISODES = int(os.getenv("LOCAL_NUM_EPISODES", "3"))
+LOCAL_MAX_TURNS = int(os.getenv("LOCAL_MAX_TURNS", "10"))
+MLX_MAX_TOKENS = int(os.getenv("MLX_MAX_TOKENS", "768"))
+MLX_RETRY_MAX_TOKENS = int(os.getenv("MLX_RETRY_MAX_TOKENS", "512"))
+MLX_JSON_PREFILL = os.getenv("MLX_JSON_PREFILL", "true").lower() == "true"
+MLX_COMPACT_ACTION = os.getenv("MLX_COMPACT_ACTION", "false").lower() == "true"
+MLX_COMPACT_REPAIR = os.getenv("MLX_COMPACT_REPAIR", "false").lower() == "true"
+MLX_FORCED_ACTION_TYPES = [
+    item.strip()
+    for item in os.getenv("MLX_FORCED_ACTION_TYPES", "").split(",")
+    if item.strip()
+]
+JSON_PREFILL = '{"action_type":"'
+COMPACT_ACTION_SYSTEM_PROMPT = """
+You control the MolForge action policy.
+Return exactly one JSON object with only these top-level keys:
+action_type, acting_role, edit_type, slot, fragment, tool_name, rationale,
+evidence, expected_effects.
+Valid action_type values are exactly:
+edit, run_assay, submit, restart, defer.
+Do not output team messages. Do not output proposal, approval, objection,
+risk_flag, assay_request, rejection, or submission_recommendation as action_type.
+The environment will attach governance messages automatically.
+Role rules:
+- run_assay uses acting_role "assay_planner" and a valid tool_name.
+- edit, submit, restart, and defer use acting_role "lead_chemist".
+- unused optional fields must be JSON null.
+""".strip()
+def main() -> None:
+    adapter_path = ADAPTER_PATH.expanduser().resolve()
+    print(f"Using MLX base model: {BASE_MODEL_NAME}", flush=True)
+    print(f"Using LoRA adapter: {adapter_path}", flush=True)
+    model, tokenizer = load(BASE_MODEL_NAME, adapter_path=str(adapter_path))
+    sampler = make_sampler(temp=0.0)
+    env = MolForgeEnvironment()
+    scores = []
+    submission_scores = []
+    progress_scores = []
+    for episode_index in range(LOCAL_NUM_EPISODES):
+        observation = env.reset()
+        print(f"\n=== Episode {episode_index + 1}: {observation.scenario_id} ===", flush=True)
+        for _ in range(LOCAL_MAX_TURNS):
+            if observation.done:
+                break
+            action, source, elapsed = choose_mlx_action(model, tokenizer, sampler, observation)
+            if MLX_COMPACT_ACTION:
+                action = attach_team_messages(observation, action)
+            observation = env.step(action)
+            print(
+                f"step={observation.step_index:02d} action={action.action_type} actor={action.acting_role} "
+                f"source={source} gen_s={elapsed:.2f} reward={observation.reward:+.3f} "
+                f"budget={observation.remaining_budget} governance={observation.governance.status}",
+                flush=True,
+            )
+            print(f"  {observation.last_transition_summary}", flush=True)
+            if observation.done:
+                break
+        grader_scores = observation.metadata.get("terminal_grader_scores", {})
+        final_score = float(grader_scores.get("final_score", grader_scores.get("submission_score", 0.0)))
+        submission_score = float(grader_scores.get("submission_score", 0.0))
+        progress_score = float(grader_scores.get("progress_score", 0.0))
+        scores.append(final_score)
+        submission_scores.append(submission_score)
+        progress_scores.append(progress_score)
+        print(f"final_score={final_score:.3f}", flush=True)
+        print(f"submission_score={submission_score:.3f}", flush=True)
+        print(f"progress_score={progress_score:.3f}", flush=True)
+        if observation.report_card:
+            print(observation.report_card, flush=True)
+    average = sum(scores) / len(scores)
+    average_progress = sum(progress_scores) / len(progress_scores)
+    print("\n=== MLX LoRA Local Summary ===", flush=True)
+    print(
+        json.dumps(
+            {
+                "adapter": str(adapter_path),
+                "base_model": BASE_MODEL_NAME,
+                "scores": scores,
+                "average_final_score": round(average, 4),
+                "submission_scores": submission_scores,
+                "average_submission_score": round(sum(submission_scores) / len(submission_scores), 4),
+                "progress_scores": progress_scores,
+                "average_progress_score": round(average_progress, 4),
+            },
+            indent=2,
+        ),
+        flush=True,
+    )
+def choose_mlx_action(
+    model,
+    tokenizer,
+    sampler,
+    observation: MolForgeObservation,
+) -> Tuple[MolForgeAction, str, float]:
+    started = time.perf_counter()
+    action, error = ask_mlx_model(
+        model,
+        tokenizer,
+        sampler,
+        observation,
+        compact=False,
+        max_tokens=MLX_MAX_TOKENS,
+        forced_action_type=None,
+    )
+    if action is not None:
+        return action, "mlx_lora_model", time.perf_counter() - started
+    forced_errors = []
+    for forced_action_type in forced_action_types(observation):
+        forced_action, forced_error = ask_mlx_model(
+            model,
+            tokenizer,
+            sampler,
+            observation,
+            compact=True,
+            max_tokens=MLX_RETRY_MAX_TOKENS,
+            forced_action_type=forced_action_type,
+        )
+        if forced_action is not None:
+            return (
+                forced_action,
+                f"mlx_lora_forced_{forced_action_type}",
+                time.perf_counter() - started,
+            )
+        forced_errors.append(f"{forced_action_type}:{forced_error}")
+    retry_action, retry_error = ask_mlx_model(
+        model,
+        tokenizer,
+        sampler,
+        observation,
+        compact=True,
+        max_tokens=MLX_RETRY_MAX_TOKENS,
+        forced_action_type=None,
+    )
+    if retry_action is not None:
+        return retry_action, "mlx_lora_compact_retry", time.perf_counter() - started
+    raise RuntimeError(
+        "MLX LoRA action failed: "
+        f"full_prompt:{error} | forced:{' || '.join(forced_errors)} | compact_prompt:{retry_error}"
+    )
+def ask_mlx_model(
+    model,
+    tokenizer,
+    sampler,
+    observation: MolForgeObservation,
+    *,
+    compact: bool,
+    max_tokens: int,
+    forced_action_type: Optional[str],
+) -> Tuple[Optional[MolForgeAction], str]:
+    response_text = ""
+    try:
+        payload = (
+            compact_action_payload(observation)
+            if MLX_COMPACT_ACTION
+            else build_model_payload(observation, compact=compact)
+        )
+        system_prompt = (
+            COMPACT_ACTION_SYSTEM_PROMPT
+            if MLX_COMPACT_ACTION
+            else (COMPACT_SYSTEM_PROMPT if compact else SYSTEM_PROMPT)
+        )
+        response_text = generate_response(
+            model,
+            tokenizer,
+            sampler,
+            system_prompt=system_prompt,
+            user_payload=payload,
+            max_tokens=max_tokens,
+            use_json_prefill=MLX_JSON_PREFILL,
+            forced_action_type=forced_action_type,
+        )
+        if MLX_JSON_PREFILL:
+            response_text = json_prefill(forced_action_type) + response_text
+        data = extract_json(response_text)
+        repair_notes: list[str] = []
+        if MLX_COMPACT_ACTION and MLX_COMPACT_REPAIR:
+            data, repair_notes = repair_compact_action(data)
+        if MLX_COMPACT_ACTION and "messages" in data:
+            raise ValueError("compact action output must not include messages")
+        action = MolForgeAction(**data)
+        if repair_notes:
+            action.metadata["compact_repair_notes"] = repair_notes
+        return action, ""
+    except Exception as exc:
+        snippet = response_text[:1200].replace("\n", "\\n")
+        return None, f"{exc.__class__.__name__}:{exc}; raw={snippet}"
+def generate_response(
+    model,
+    tokenizer,
+    sampler,
+    *,
+    system_prompt: str,
+    user_payload: Dict[str, Any],
+    max_tokens: int,
+    use_json_prefill: bool,
+    forced_action_type: Optional[str],
+) -> str:
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": json.dumps(user_payload, separators=(",", ":"))},
+    ]
+    prompt = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True,
+        enable_thinking=False,
+    )
+    if use_json_prefill:
+        prompt += json_prefill(forced_action_type)
+    return generate(
+        model,
+        tokenizer,
+        prompt,
+        verbose=False,
+        max_tokens=max_tokens,
+        sampler=sampler,
+    ).strip()
+def json_prefill(forced_action_type: Optional[str]) -> str:
+    if forced_action_type:
+        return f'{{"action_type":"{forced_action_type}",'
+    return JSON_PREFILL
+def forced_action_types(observation: MolForgeObservation) -> list[str]:
+    if MLX_FORCED_ACTION_TYPES:
+        return MLX_FORCED_ACTION_TYPES
+    if observation.step_index == 0:
+        if observation.scenario_id == "level_2_hard":
+            return ["restart", "edit", "run_assay", "defer"]
+        return ["edit", "run_assay", "defer"]
+    return ["run_assay", "edit", "submit", "restart", "defer"]
+def compact_action_payload(observation: MolForgeObservation) -> dict[str, Any]:
+    lead_view = next(
+        (role.observation for role in observation.role_observations if role.role == "lead_chemist"),
+        {},
+    )
+    assay_view = next(
+        (role.observation for role in observation.role_observations if role.role == "assay_planner"),
+        {},
+    )
+    return {
+        "valid_action_types": ["edit", "run_assay", "submit", "restart", "defer"],
+        "scenario_id": observation.scenario_id,
+        "difficulty": observation.difficulty,
+        "task_brief": observation.task_brief,
+        "current_molecule": observation.current_molecule,
+        "current_smiles": observation.metadata.get("current_smiles", ""),
+        "visible_metrics": observation.visible_metrics,
+        "constraint_status": [constraint.model_dump() for constraint in observation.constraint_status],
+        "remaining_budget": observation.remaining_budget,
+        "max_budget": observation.max_budget,
+        "step_index": observation.step_index,
+        "max_steps": observation.max_steps,
+        "molecule_slots": lead_view.get("molecule_slots", {}),
+        "candidate_edits": lead_view.get("candidate_edits", [])[:12],
+        "open_questions": lead_view.get("open_questions", []),
+        "known_assays": [
+            {
+                "tool_name": reading.tool_name,
+                "property_name": reading.property_name,
+                "estimate": reading.estimate,
+                "confidence_low": reading.confidence_low,
+                "confidence_high": reading.confidence_high,
+                "molecule_signature": reading.molecule_signature,
+            }
+            for reading in observation.known_assays[-8:]
+        ],
+        "tool_costs": assay_view.get("tool_costs", {}),
+        "evidence_gaps": assay_view.get("evidence_gaps", []),
+        "estimated_information_value": assay_view.get("estimated_information_value", {}),
+    }
+def repair_compact_action(data: Dict[str, Any]) -> tuple[Dict[str, Any], list[str]]:
+    """Bounded normalization for compact-action models.
+    This repairs only schema-near-misses. It does not invent an action from a
+    non-action wrapper and it still rejects invalid top-level action types.
+    """
+    repaired = dict(data)
+    notes: list[str] = []
+    if "role" in repaired and "acting_role" not in repaired:
+        repaired["acting_role"] = repaired.pop("role")
+        notes.append("role->acting_role")
+    action_type = repaired.get("action_type")
+    if action_type not in {"edit", "run_assay", "submit", "restart", "defer"}:
+        return repaired, notes
+    if repaired.get("edit_type") == "replace":
+        repaired["edit_type"] = "substitute"
+        notes.append("edit_type:replace->substitute")
+    if isinstance(repaired.get("evidence"), str):
+        repaired["evidence"] = [repaired["evidence"]]
+        notes.append("evidence:string->list")
+    repaired["expected_effects"] = repair_effects(repaired.get("expected_effects"), notes)
+    if action_type == "run_assay":
+        repaired["acting_role"] = "assay_planner"
+        repaired["edit_type"] = None
+        repaired["slot"] = None
+        repaired["fragment"] = None
+        if repaired.get("tool_name") not in {
+            "evaluate_properties",
+            "dock_target",
+            "assay_toxicity",
+            "estimate_synthesizability",
+            "evaluate_novelty",
+            "search_literature",
+            "run_md_simulation",
+        }:
+            repaired["tool_name"] = "evaluate_properties"
+            notes.append("tool_name:invalid->evaluate_properties")
+    else:
+        repaired["acting_role"] = "lead_chemist"
+        if action_type == "edit":
+            if repaired.get("edit_type") not in {"add_fragment", "substitute", "remove", "undo_last_edit"}:
+                repaired["edit_type"] = "substitute"
+                notes.append("edit_type:invalid->substitute")
+            if repaired.get("tool_name") is not None:
+                repaired["tool_name"] = None
+                notes.append("tool_name:edit->null")
+        else:
+            for key in ("edit_type", "slot", "fragment", "tool_name"):
+                if repaired.get(key) is not None:
+                    repaired[key] = None
+                    notes.append(f"{key}:{action_type}->null")
+    allowed_keys = {
+        "action_type",
+        "acting_role",
+        "edit_type",
+        "slot",
+        "fragment",
+        "tool_name",
+        "rationale",
+        "evidence",
+        "expected_effects",
+    }
+    for key in list(repaired):
+        if key not in allowed_keys:
+            repaired.pop(key)
+            notes.append(f"drop_extra:{key}")
+    repaired.setdefault("rationale", "Choose the next compact MolForge action.")
+    repaired.setdefault("evidence", [])
+    for key in ("edit_type", "slot", "fragment", "tool_name"):
+        repaired.setdefault(key, None)
+    return repaired, notes
+def repair_effects(value: Any, notes: list[str]) -> dict[str, str]:
+    defaults = {
+        "potency": "unknown",
+        "toxicity": "unknown",
+        "synth": "unknown",
+        "novelty": "unknown",
+        "budget": "neutral",
+    }
+    if not isinstance(value, dict):
+        notes.append("expected_effects:non_dict->defaults")
+        return defaults
+    aliases = {
+        "synthesizability": "synth",
+        "synthesis": "synth",
+    }
+    for raw_key, raw_value in value.items():
+        key = aliases.get(raw_key, raw_key)
+        if key not in defaults:
+            notes.append(f"expected_effects:drop_extra:{raw_key}")
+            continue
+        defaults[key] = normalize_effect_value(raw_value, notes, key)
+    return defaults
+def normalize_effect_value(value: Any, notes: list[str], key: str) -> str:
+    if value in {"up", "down", "neutral", "unknown", "not_applicable"}:
+        return value
+    text = str(value).lower().strip().replace("-", "_").replace(" ", "_")
+    if any(token in text for token in ("increase", "improve", "higher", "upward", "+")):
+        notes.append(f"expected_effects:{key}:{value}->up")
+        return "up"
+    if any(token in text for token in ("decrease", "lower", "reduce", "downward", "-")):
+        notes.append(f"expected_effects:{key}:{value}->down")
+        return "down"
+    if any(token in text for token in ("maintain", "stable", "unchanged", "same")):
+        notes.append(f"expected_effects:{key}:{value}->neutral")
+        return "neutral"
+    if "not_applicable" in text or text == "na":
+        notes.append(f"expected_effects:{key}:{value}->not_applicable")
+        return "not_applicable"
+    notes.append(f"expected_effects:{key}:{value}->unknown")
+    return "unknown"
+if __name__ == "__main__":
+    main()

models.py ADDED Viewed

	@@ -0,0 +1,216 @@

+"""Typed models for the MolForge OpenEnv environment."""
+from __future__ import annotations
+from typing import Any, Dict, List, Literal, Optional
+from openenv.core.env_server.types import Action, Observation, State
+from pydantic import BaseModel, Field
+EDIT_TYPES = Literal["add_fragment", "substitute", "remove", "undo_last_edit"]
+ACTION_TYPES = Literal["edit", "run_assay", "submit", "restart", "defer"]
+TOOL_TYPES = Literal[
+    "evaluate_properties",
+    "dock_target",
+    "assay_toxicity",
+    "estimate_synthesizability",
+    "evaluate_novelty",
+    "search_literature",
+    "run_md_simulation",
+]
+SLOT_TYPES = Literal["warhead", "hinge", "solvent_tail", "back_pocket"]
+ROLE_TYPES = Literal[
+    "lead_chemist",
+    "toxicologist",
+    "assay_planner",
+    "process_chemist",
+    "team",
+]
+MESSAGE_TYPES = Literal[
+    "proposal",
+    "objection",
+    "risk_flag",
+    "assay_request",
+    "approval",
+    "rejection",
+    "revision_request",
+    "submission_recommendation",
+]
+SEVERITY_TYPES = Literal["low", "medium", "high", "critical"]
+EFFECT_TYPES = Literal["up", "down", "neutral", "unknown", "not_applicable"]
+COORDINATION_MODES = Literal["single_agent", "multi_agent"]
+GOVERNANCE_STATES = Literal["ready", "executed", "needs_revision", "policy_veto"]
+class MoleculeSlot(BaseModel):
+    """Visible fragment assignment for a molecule slot."""
+    slot: SLOT_TYPES
+    fragment: str = Field(..., description="Selected fragment for the slot")
+    editable: bool = Field(default=True, description="Whether the slot is editable")
+class AssayReading(BaseModel):
+    """Structured oracle result surfaced to the agent."""
+    tool_name: str
+    property_name: str
+    estimate: float = Field(..., ge=0.0, le=1.0)
+    confidence_low: float = Field(..., ge=0.0, le=1.0)
+    confidence_high: float = Field(..., ge=0.0, le=1.0)
+    runs: int = Field(default=1, ge=1)
+    molecule_signature: str
+    summary: str = ""
+class RewardComponent(BaseModel):
+    """Named reward component used in report cards and debugging."""
+    name: str
+    value: float
+    explanation: str
+class ConstraintCheck(BaseModel):
+    """Constraint status based only on currently visible evidence."""
+    name: str
+    target: str
+    satisfied: Optional[bool] = None
+    actual: Optional[float] = None
+    evidence_status: Literal["known", "unknown"] = "unknown"
+class AgentMessage(BaseModel):
+    """Structured inter-agent communication message."""
+    message_id: str = ""
+    sender: ROLE_TYPES
+    receiver: str = "team"
+    message_type: MESSAGE_TYPES
+    severity: SEVERITY_TYPES = "low"
+    reference_action_type: Optional[ACTION_TYPES] = None
+    summary: str = Field(default="", max_length=240)
+    payload: Dict[str, Any] = Field(default_factory=dict)
+class RoleObservation(BaseModel):
+    """Role-specific structured observation slice."""
+    role: ROLE_TYPES
+    local_objective: str
+    permissions: List[str] = Field(default_factory=list)
+    observation: Dict[str, Any] = Field(default_factory=dict)
+class GovernanceStatus(BaseModel):
+    """Outcome of the multi-agent review process for the last turn."""
+    status: GOVERNANCE_STATES = "ready"
+    explanation: str = ""
+    required_roles: List[str] = Field(default_factory=list)
+    approvals: List[str] = Field(default_factory=list)
+    objections: List[str] = Field(default_factory=list)
+    vetoes: List[str] = Field(default_factory=list)
+    executable: bool = True
+class MolForgeAction(Action):
+    """Single team turn action spanning edits, assays, messages, and submission."""
+    action_type: ACTION_TYPES = Field(
+        ..., description="High-level action type to execute this turn"
+    )
+    acting_role: ROLE_TYPES = Field(
+        default="lead_chemist",
+        description="Role claiming ownership of the executable team decision",
+    )
+    edit_type: Optional[EDIT_TYPES] = Field(
+        default=None, description="Edit subtype when action_type is edit"
+    )
+    slot: Optional[SLOT_TYPES] = Field(
+        default=None, description="Editable molecular slot when performing edits"
+    )
+    fragment: Optional[str] = Field(
+        default=None, description="Fragment identifier for edit actions"
+    )
+    tool_name: Optional[TOOL_TYPES] = Field(
+        default=None, description="Oracle or tool name for run_assay actions"
+    )
+    messages: List[AgentMessage] = Field(
+        default_factory=list,
+        description="Structured multi-agent communication bundle for this decision turn",
+    )
+    rationale: str = Field(
+        default="",
+        description="Short explanation of why the final decision should help",
+        max_length=400,
+    )
+    evidence: List[str] = Field(
+        default_factory=list,
+        description="Visible observation facts supporting the action; do not include hidden state.",
+        max_length=5,
+    )
+    expected_effects: Dict[str, EFFECT_TYPES] = Field(
+        default_factory=dict,
+        description="Directional public prediction for potency, toxicity, synth, novelty, or budget.",
+    )
+class MolForgeObservation(Observation):
+    """Observation emitted after reset and each step."""
+    scenario_id: str
+    difficulty: str
+    state_label: str = "[start]"
+    state_path: List[str] = Field(default_factory=list)
+    coordination_mode: COORDINATION_MODES = "multi_agent"
+    enabled_roles: List[str] = Field(default_factory=list)
+    task_brief: str
+    target_name: str
+    current_molecule: str
+    molecule_slots: List[MoleculeSlot] = Field(default_factory=list)
+    editable_slots: List[str] = Field(default_factory=list)
+    step_index: int = Field(default=0, ge=0)
+    max_steps: int = Field(default=0, ge=1)
+    remaining_budget: int = Field(default=0, ge=0)
+    budget_used: int = Field(default=0, ge=0)
+    max_budget: int = Field(default=0, ge=1)
+    known_assays: List[AssayReading] = Field(default_factory=list)
+    role_observations: List[RoleObservation] = Field(default_factory=list)
+    message_log: List[AgentMessage] = Field(default_factory=list)
+    governance: GovernanceStatus = Field(default_factory=GovernanceStatus)
+    last_transition_summary: str = ""
+    visible_metrics: Dict[str, float] = Field(default_factory=dict)
+    constraint_status: List[ConstraintCheck] = Field(default_factory=list)
+    reward_breakdown: List[RewardComponent] = Field(default_factory=list)
+    allowed_actions: List[str] = Field(default_factory=list)
+    report_card: str = ""
+class MolForgeState(State):
+    """Internal environment state surfaced through the state() API."""
+    scenario_id: str = ""
+    difficulty: str = ""
+    state_label: str = "[start]"
+    state_path: List[str] = Field(default_factory=list)
+    coordination_mode: COORDINATION_MODES = "multi_agent"
+    enabled_roles: List[str] = Field(default_factory=list)
+    target_name: str = ""
+    current_molecule: str = ""
+    remaining_budget: int = 0
+    budget_used: int = 0
+    max_budget: int = 0
+    visited_states: int = 0
+    known_assay_count: int = 0
+    invalid_action_count: int = 0
+    objection_count: int = 0
+    oracle_call_count: int = 0
+    message_count: int = 0
+    decision_count: int = 0
+    submitted: bool = False
+    last_error_code: str = ""
+    reward_total: float = 0.0
+    metadata: Dict[str, Any] = Field(default_factory=dict)

molforge_grpo_official_submission.ipynb ADDED Viewed

	@@ -0,0 +1,277 @@

+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# MolForge GRPO Training Pipeline\n",
+        "This notebook implements the Reinforcement Learning (GRPO) training pipeline for the MolForge environment.\n",
+        "We train the model using a **Proposer-Critic-Selector** architecture and targeted **reward shaping** to overcome local minima."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "!pip install -U \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\"\n",
+        "!pip install -U \"trl>=0.21.0\" peft accelerate bitsandbytes datasets matplotlib pandas huggingface_hub \"openenv-core[core]>=0.2.3\" rdkit jmespath xformers"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "import sys\n",
+        "from pathlib import Path\n",
+        "\n",
+        "# Clone the repository\n",
+        "if not Path(\"/content/molt_lab\").exists():\n",
+        "    !git clone https://github.com/Adhitya-Vardhan/molt_lab.git /content/molt_lab\n",
+        "\n",
+        "# Add project root to path\n",
+        "if \"/content/molt_lab\" not in sys.path:\n",
+        "    sys.path.insert(0, \"/content/molt_lab\")\n",
+        "    \n",
+        "# Change working directory\n",
+        "os.chdir(\"/content/molt_lab\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import time\n",
+        "import os\n",
+        "\n",
+        "# Training Configuration\n",
+        "os.environ[\"MOLFORGE_REWARD_MODE\"] = \"curriculum\"\n",
+        "os.environ[\"MOLFORGE_TRAINING_RANDOMIZATION\"] = \"1\"\n",
+        "\n",
+        "RL_MAX_STEPS = 80\n",
+        "NUM_GENERATIONS = 2\n",
+        "PER_DEVICE_BATCH = 2\n",
+        "GRAD_ACCUM = 4\n",
+        "LEARNING_RATE = 2e-6\n",
+        "MAX_SEQ_LENGTH = 2048\n",
+        "MAX_PROMPT_LENGTH = 1536\n",
+        "MAX_COMPLETION_LENGTH = 384\n",
+        "\n",
+        "RUN_NAME = time.strftime(\"molforge_grpo_%Y%m%d_%H%M%S\")\n",
+        "OUTPUT_DIR = Path(f\"/content/molforge_rl_runs/{RUN_NAME}\")\n",
+        "ADAPTER_SAVE_DIR = OUTPUT_DIR / \"adapters\"\n",
+        "PLOT_DIR = OUTPUT_DIR / \"plots\"\n",
+        "\n",
+        "OUTPUT_DIR.mkdir(parents=True, exist_ok=True)\n",
+        "PLOT_DIR.mkdir(parents=True, exist_ok=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Reward Function & OpenEnv Integration\n",
+        "We implement a custom reward function that wraps the native `MolForgeEnvironment`. \n",
+        "To prevent \"reward hacking\" (where the model endlessly farms `run_assay` for safe points), we apply targeted reward shaping."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import json\n",
+        "from typing import Any, Dict, Tuple\n",
+        "from inference_common import (\n",
+        "    MolForgeAction,\n",
+        "    attach_reasoning_fields,\n",
+        "    attach_team_messages,\n",
+        "    extract_json,\n",
+        ")\n",
+        "from server.molforge_environment import MolForgeEnvironment\n",
+        "from models import MolForgeState\n",
+        "\n",
+        "def replay_to_state(record: dict[str, Any]) -> MolForgeEnvironment:\n",
+        "    env = MolForgeEnvironment()\n",
+        "    env._state = MolForgeState(**record[\"state\"])\n",
+        "    env._molecule = dict(record[\"molecule\"])\n",
+        "    env._scenario = [s for s in env.SCENARIOS if s.scenario_id == env._state.scenario_id][0]\n",
+        "    return env\n",
+        "\n",
+        "def evaluate_completion(prompt_str: str, completion_str: str, record: dict[str, Any]) -> Tuple[float, dict]:\n",
+        "    diagnostics = {\"valid_json\": False}\n",
+        "    try:\n",
+        "        action_dict = extract_json(completion_str)\n",
+        "        action = MolForgeAction(**action_dict)\n",
+        "    except Exception:\n",
+        "        return -1.2, diagnostics\n",
+        "\n",
+        "    diagnostics[\"valid_json\"] = True\n",
+        "    env = replay_to_state(record)\n",
+        "    \n",
+        "    # Create empty observation and attach reasoning\n",
+        "    observation = env._build_observation(reward=0.0, done=False, reward_components=[])\n",
+        "    action = attach_team_messages(observation, attach_reasoning_fields(observation, action))\n",
+        "    \n",
+        "    # Step the OpenEnv environment\n",
+        "    next_observation = env.step(action)\n",
+        "    reward = float(next_observation.reward)\n",
+        "    grader_scores = next_observation.metadata.get(\"terminal_grader_scores\", {})\n",
+        "    \n",
+        "    # --- ANTI-REWARD-HACKING SHAPING ---\n",
+        "    if action.action_type == \"run_assay\" and reward > 0:\n",
+        "        reward *= 0.25  # Nerf assay farming\n",
+        "    elif action.action_type == \"submit\":\n",
+        "        sub_score = float(grader_scores.get(\"submission_score\", 0.0))\n",
+        "        if sub_score > 0.0:\n",
+        "            reward += sub_score * 3.0  # Massive multiplier for submissions\n",
+        "    elif action.action_type == \"edit\" and reward > 0:\n",
+        "        reward *= 1.5  # Boost edits\n",
+        "\n",
+        "    diagnostics.update({\n",
+        "        \"action_type\": action.action_type,\n",
+        "        \"reward\": reward,\n",
+        "        \"done\": next_observation.done,\n",
+        "    })\n",
+        "    return reward, diagnostics\n",
+        "\n",
+        "def molforge_reward_func(prompts, completions, **kwargs) -> list[float]:\n",
+        "    rewards = []\n",
+        "    dataset_records = kwargs.get(\"record\", [])\n",
+        "    \n",
+        "    for prompt_list, completion, record in zip(prompts, completions, dataset_records):\n",
+        "        prompt_str = prompt_list[-1][\"content\"] if isinstance(prompt_list, list) else str(prompt_list)\n",
+        "        completion_str = completion[0][\"content\"] if isinstance(completion, list) else str(completion)\n",
+        "        reward, _ = evaluate_completion(prompt_str, completion_str, record)\n",
+        "        rewards.append(reward)\n",
+        "    return rewards"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Model & Tokenizer Loading"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from unsloth import FastLanguageModel\n",
+        "\n",
+        "# Set this to your SFT checkpoint\n",
+        "# You can set this to a local path or a Hugging Face repo\n",
+        "SFT_ADAPTER_PATH = \"/content/drive/MyDrive/Qwen_3.5_finetune/qwen3_5_2b_lora_adapters_compact_v4\" # <-- Change to your path\n",
+        "\n",
+        "print(\"Loading model and applying Unsloth optimizations...\")\n",
+        "model, tokenizer = FastLanguageModel.from_pretrained(\n",
+        "    model_name=SFT_ADAPTER_PATH,\n",
+        "    max_seq_length=MAX_SEQ_LENGTH,\n",
+        "    dtype=None,\n",
+        "    load_in_4bit=True,\n",
+        ")\n",
+        "\n",
+        "# Enable fast training paths\n",
+        "FastLanguageModel.for_training(model)\n",
+        "\n",
+        "# Extract underlying tokenizer if it is wrapped in a vision processor\n",
+        "if hasattr(tokenizer, \"tokenizer\"):\n",
+        "    tokenizer = tokenizer.tokenizer"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### GRPO Training Loop"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from trl import GRPOConfig, GRPOTrainer\n",
+        "from datasets import Dataset\n",
+        "from scripts.generate_sft_compact_policy_v4_dataset import compact_action_payload, COMPACT_ACTION_SYSTEM_PROMPT\n",
+        "\n",
+        "# Load dataset\n",
+        "def load_prompt_dataset() -> Dataset:\n",
+        "    import json\n",
+        "    data = []\n",
+        "    with open(\"data/molforge_sft_compact_policy_v4.jsonl\", \"r\") as f:\n",
+        "        for line in f:\n",
+        "            record = json.loads(line)\n",
+        "            prompt_text = compact_action_payload(record)\n",
+        "            data.append({\n",
+        "                \"prompt\": [\n",
+        "                    {\"role\": \"system\", \"content\": COMPACT_ACTION_SYSTEM_PROMPT},\n",
+        "                    {\"role\": \"user\", \"content\": prompt_text}\n",
+        "                ],\n",
+        "                \"record\": record\n",
+        "            })\n",
+        "    return Dataset.from_list(data)\n",
+        "\n",
+        "dataset = load_prompt_dataset()\n",
+        "\n",
+        "# Configure GRPO\n",
+        "training_args = GRPOConfig(\n",
+        "    output_dir=str(OUTPUT_DIR),\n",
+        "    learning_rate=LEARNING_RATE,\n",
+        "    per_device_train_batch_size=PER_DEVICE_BATCH,\n",
+        "    gradient_accumulation_steps=GRAD_ACCUM,\n",
+        "    max_prompt_length=MAX_PROMPT_LENGTH,\n",
+        "    max_completion_length=MAX_COMPLETION_LENGTH,\n",
+        "    num_generations=NUM_GENERATIONS,\n",
+        "    max_steps=RL_MAX_STEPS,\n",
+        "    logging_steps=1,\n",
+        "    save_steps=25,\n",
+        "    bf16=True,\n",
+        "    report_to=\"none\",\n",
+        "    log_completions=True,\n",
+        ")\n",
+        "\n",
+        "# Initialize Trainer\n",
+        "trainer = GRPOTrainer(\n",
+        "    model=model,\n",
+        "    reward_funcs=molforge_reward_func,\n",
+        "    args=training_args,\n",
+        "    train_dataset=dataset,\n",
+        "    processing_class=tokenizer,\n",
+        ")\n",
+        "\n",
+        "print(\"Starting GRPO Training...\")\n",
+        "trainer.train()\n",
+        "\n",
+        "print(f\"Training complete. Saving adapters to {ADAPTER_SAVE_DIR}\")\n",
+        "trainer.save_model(str(ADAPTER_SAVE_DIR))\n",
+        "tokenizer.save_pretrained(str(ADAPTER_SAVE_DIR))"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}

molforge_oracles.py ADDED Viewed

	@@ -0,0 +1,274 @@

+"""RDKit/TDC-backed molecular oracle helpers for MolForge."""
+from __future__ import annotations
+import math
+from functools import lru_cache
+from typing import Any, Dict, Mapping, Optional
+WARHEAD_SMILES = {
+    "acrylamide": "C(=O)NC=C",
+    "reversible_cyanoacrylamide": "C(=O)NC(=C)C#N",
+    "nitrile": "C#N",
+    "vinyl_sulfonamide": "S(=O)(=O)NC=C",
+}
+HINGE_SMILES = {
+    "azaindole": "c1[nH]c2ccccc2n1",
+    "pyridine": "c1ccncc1",
+    "fluorophenyl": "c1ccc(F)cc1",
+    "quinazoline": "c1ncnc2ccccc12",
+}
+TAIL_SMILES = {
+    "morpholine": "N1CCOCC1",
+    "piperazine": "N1CCNCC1",
+    "cyclopropyl": "C1CC1",
+    "dimethylamino": "N(C)C",
+}
+BACK_POCKET_SMILES = {
+    "methoxy": "OC",
+    "chloro": "Cl",
+    "trifluoromethyl": "C(F)(F)F",
+    "cyano": "C#N",
+}
+def assemble_surrogate_smiles(molecule: Mapping[str, str]) -> str:
+    """Build a valid substituted-aryl SMILES for RDKit/TDC scoring."""
+    return (
+        f"c%10({WARHEAD_SMILES[molecule['warhead']]})"
+        f"c({HINGE_SMILES[molecule['hinge']]})"
+        f"c({TAIL_SMILES[molecule['solvent_tail']]})"
+        f"c({BACK_POCKET_SMILES[molecule['back_pocket']]})cc%10"
+    )
+def oracle_backend_status() -> Dict[str, bool]:
+    """Report which external chemistry engines are importable."""
+    return {"rdkit": _rdkit_modules() is not None, "tdc": _tdc_oracle_class() is not None}
+def evaluate_with_rdkit_tdc(
+    molecule: Mapping[str, str],
+    fallback_properties: Mapping[str, float],
+) -> Dict[str, float]:
+    """Blend RDKit/TDC medicinal-chemistry signals into MolForge properties."""
+    modules = _rdkit_modules()
+    if modules is None:
+        return dict(fallback_properties)
+    Chem = modules["Chem"]
+    Descriptors = modules["Descriptors"]
+    Crippen = modules["Crippen"]
+    Lipinski = modules["Lipinski"]
+    QED = modules["QED"]
+    rdFingerprintGenerator = modules["rdFingerprintGenerator"]
+    rdMolDescriptors = modules["rdMolDescriptors"]
+    DataStructs = modules["DataStructs"]
+    smiles = assemble_surrogate_smiles(molecule)
+    mol = Chem.MolFromSmiles(smiles)
+    if mol is None:
+        return dict(fallback_properties)
+    canonical = Chem.MolToSmiles(mol)
+    qed_value = _tdc_oracle_score("QED", canonical)
+    if qed_value is None:
+        qed_value = float(QED.qed(mol))
+    qed_score = _clamp01(qed_value)
+    sa_value = _tdc_oracle_score("SA", canonical)
+    synth_score = _normalize_sa(sa_value)
+    if synth_score is None:
+        synth_score = _rdkit_synth_proxy(mol, Descriptors, Lipinski, rdMolDescriptors)
+    logp = float(Crippen.MolLogP(mol))
+    tpsa = float(Descriptors.TPSA(mol))
+    mol_wt = float(Descriptors.MolWt(mol))
+    rotatable = float(Lipinski.NumRotatableBonds(mol))
+    aromatic_rings = float(rdMolDescriptors.CalcNumAromaticRings(mol))
+    property_risk = _property_risk(logp=logp, tpsa=tpsa, mol_wt=mol_wt, rotatable=rotatable)
+    structural_risk = _structural_alert_risk(molecule)
+    rdkit_toxicity = _clamp01(0.55 * property_risk + 0.45 * structural_risk)
+    target_fit = _target_fit_proxy(
+        molecule,
+        qed_score=qed_score,
+        logp=logp,
+        tpsa=tpsa,
+        aromatic_rings=aromatic_rings,
+    )
+    novelty = _novelty_proxy(mol, Chem, rdFingerprintGenerator, DataStructs)
+    return {
+        "potency": round(_blend(fallback_properties["potency"], target_fit, 0.35), 4),
+        "safety": round(_clamp01(1.0 - _blend(fallback_properties["toxicity"], rdkit_toxicity, 0.25)), 4),
+        "toxicity": round(_blend(fallback_properties["toxicity"], rdkit_toxicity, 0.25), 4),
+        "synth": round(_blend(fallback_properties["synth"], synth_score, 0.55), 4),
+        "novelty": round(_blend(fallback_properties["novelty"], novelty, 0.50), 4),
+    }
+@lru_cache(maxsize=1)
+def _rdkit_modules() -> Optional[Dict[str, Any]]:
+    try:
+        from rdkit import Chem, DataStructs
+        from rdkit.Chem import Crippen, Descriptors, Lipinski, QED, rdFingerprintGenerator, rdMolDescriptors
+    except Exception:
+        return None
+    return {
+        "Chem": Chem,
+        "Crippen": Crippen,
+        "DataStructs": DataStructs,
+        "Descriptors": Descriptors,
+        "Lipinski": Lipinski,
+        "QED": QED,
+        "rdFingerprintGenerator": rdFingerprintGenerator,
+        "rdMolDescriptors": rdMolDescriptors,
+    }
+@lru_cache(maxsize=1)
+def _tdc_oracle_class() -> Optional[Any]:
+    try:
+        from tdc import Oracle
+    except Exception:
+        return None
+    return Oracle
+@lru_cache(maxsize=8)
+def _tdc_oracle(name: str) -> Optional[Any]:
+    oracle_class = _tdc_oracle_class()
+    if oracle_class is None:
+        return None
+    try:
+        return oracle_class(name=name)
+    except Exception:
+        return None
+def _tdc_oracle_score(name: str, smiles: str) -> Optional[float]:
+    oracle = _tdc_oracle(name)
+    if oracle is None:
+        return None
+    try:
+        value = oracle(smiles)
+    except Exception:
+        return None
+    try:
+        return float(value)
+    except (TypeError, ValueError):
+        return None
+def _normalize_sa(value: Optional[float]) -> Optional[float]:
+    if value is None:
+        return None
+    if 0.0 <= value <= 1.0:
+        return _clamp01(value)
+    return _clamp01((10.0 - value) / 9.0)
+def _rdkit_synth_proxy(mol: Any, Descriptors: Any, Lipinski: Any, rdMolDescriptors: Any) -> float:
+    mol_wt = float(Descriptors.MolWt(mol))
+    rotatable = float(Lipinski.NumRotatableBonds(mol))
+    stereocenters = float(rdMolDescriptors.CalcNumAtomStereoCenters(mol))
+    ring_count = float(rdMolDescriptors.CalcNumRings(mol))
+    aromatic_rings = float(rdMolDescriptors.CalcNumAromaticRings(mol))
+    complexity = (
+        max(0.0, mol_wt - 350.0) / 260.0
+        + rotatable / 12.0
+        + stereocenters / 4.0
+        + max(0.0, ring_count - 3.0) / 4.0
+        + aromatic_rings / 8.0
+    )
+    return _clamp01(1.0 - 0.35 * complexity)
+def _property_risk(*, logp: float, tpsa: float, mol_wt: float, rotatable: float) -> float:
+    logp_risk = _sigmoid((logp - 3.5) / 1.15)
+    size_risk = _sigmoid((mol_wt - 500.0) / 90.0)
+    flexibility_risk = _sigmoid((rotatable - 8.0) / 2.5)
+    polarity_risk = _sigmoid((tpsa - 130.0) / 32.0)
+    return _clamp01(0.42 * logp_risk + 0.24 * size_risk + 0.20 * flexibility_risk + 0.14 * polarity_risk)
+def _structural_alert_risk(molecule: Mapping[str, str]) -> float:
+    risk = 0.18
+    if molecule["warhead"] == "acrylamide":
+        risk += 0.12
+    if molecule["warhead"] == "vinyl_sulfonamide":
+        risk += 0.22
+    if molecule["solvent_tail"] == "dimethylamino":
+        risk += 0.24
+    if molecule["back_pocket"] == "trifluoromethyl":
+        risk += 0.20
+    if molecule["hinge"] == "fluorophenyl" and molecule["back_pocket"] in {"chloro", "trifluoromethyl"}:
+        risk += 0.12
+    if molecule["solvent_tail"] in {"morpholine", "piperazine"}:
+        risk -= 0.08
+    if molecule["warhead"] == "nitrile":
+        risk -= 0.08
+    return _clamp01(risk)
+def _target_fit_proxy(
+    molecule: Mapping[str, str],
+    *,
+    qed_score: float,
+    logp: float,
+    tpsa: float,
+    aromatic_rings: float,
+) -> float:
+    lipophilic_match = 1.0 - min(abs(logp - 3.0) / 4.0, 1.0)
+    polarity_match = 1.0 - min(abs(tpsa - 85.0) / 110.0, 1.0)
+    pocket_match = 0.0
+    if molecule["hinge"] in {"azaindole", "quinazoline"}:
+        pocket_match += 0.18
+    if molecule["back_pocket"] in {"cyano", "chloro", "trifluoromethyl"}:
+        pocket_match += 0.14
+    if molecule["warhead"] in {"acrylamide", "reversible_cyanoacrylamide", "nitrile"}:
+        pocket_match += 0.12
+    if aromatic_rings >= 2:
+        pocket_match += 0.08
+    return _clamp01(0.20 + 0.30 * lipophilic_match + 0.22 * polarity_match + 0.18 * qed_score + pocket_match)
+def _novelty_proxy(mol: Any, Chem: Any, rdFingerprintGenerator: Any, DataStructs: Any) -> float:
+    refs = [
+        "c%10(C(=O)NC=C)c(c1ccncc1)c(C1CC1)c(OC)cc%10",
+        "c%10(C#N)c(c1ccncc1)c(N1CCOCC1)c(C#N)cc%10",
+        "c%10(C(=O)NC=C)c(c1ccc(F)cc1)c(N(C)C)c(Cl)cc%10",
+    ]
+    generator = rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=1024)
+    fp = generator.GetFingerprint(mol)
+    similarities = []
+    for ref in refs:
+        ref_mol = Chem.MolFromSmiles(ref)
+        if ref_mol is None:
+            continue
+        ref_fp = generator.GetFingerprint(ref_mol)
+        similarities.append(float(DataStructs.TanimotoSimilarity(fp, ref_fp)))
+    if not similarities:
+        return 0.5
+    return _clamp01(1.0 - max(similarities))
+def _blend(fallback_value: float, oracle_value: float, oracle_weight: float) -> float:
+    return _clamp01((1.0 - oracle_weight) * fallback_value + oracle_weight * oracle_value)
+def _sigmoid(value: float) -> float:
+    return 1.0 / (1.0 + math.exp(-value))
+def _clamp01(value: float) -> float:
+    return min(max(float(value), 0.0), 1.0)

openenv.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+spec_version: 1
+name: molforge
+type: space
+runtime: fastapi
+app: server.app:app
+port: 8000

openenv_shim.py ADDED Viewed

	@@ -0,0 +1,114 @@

+# -*- coding: utf-8 -*-
+"""Lightweight openenv-core shim for environments that only need the base types.
+Import this module **before** any ``from openenv.core...`` imports when the
+full ``openenv-core`` package is not installed (e.g. Colab RL training).  It
+registers minimal stubs into ``sys.modules`` so that the following imports
+work identically to the real package:
+    from openenv.core.env_server.types import Action, Observation, State
+    from openenv.core.env_server.interfaces import Environment
+Usage::
+    try:
+        import openenv          # real package available
+    except ImportError:
+        import openenv_shim     # registers lightweight stubs
+"""
+from __future__ import annotations
+import sys
+from abc import ABC, abstractmethod
+from types import ModuleType
+from typing import Any, Dict, Optional
+from pydantic import BaseModel, Field
+# ── Base types (mirror openenv.core.env_server.types) ────────────────────
+class Action(BaseModel):
+    """Minimal action base matching openenv-core's Action."""
+    metadata: Dict[str, Any] = Field(default_factory=dict)
+class Observation(BaseModel):
+    """Minimal observation base matching openenv-core's Observation."""
+    done: bool = False
+    reward: float = 0.0
+    metadata: Dict[str, Any] = Field(default_factory=dict)
+class State(BaseModel):
+    """Minimal state base matching openenv-core's State."""
+    episode_id: str = ""
+    step_count: int = 0
+# ── Environment ABC (mirror openenv.core.env_server.interfaces) ──────────
+class Environment(ABC):
+    """Minimal environment ABC matching openenv-core's Environment."""
+    SUPPORTS_CONCURRENT_SESSIONS: bool = False
+    def __init__(self, **_kwargs: Any):
+        pass
+    @abstractmethod
+    def reset(self, **kwargs: Any) -> Any:
+        ...
+    @abstractmethod
+    def step(self, action: Any, **kwargs: Any) -> Any:
+        ...
+    @property
+    @abstractmethod
+    def state(self) -> Any:
+        ...
+# ── Register shim modules into sys.modules ───────────────────────────────
+def _register() -> None:
+    """Inject stub modules so ``from openenv.core...`` imports resolve."""
+    # Build the types module
+    types_mod = ModuleType("openenv.core.env_server.types")
+    types_mod.Action = Action  # type: ignore[attr-defined]
+    types_mod.Observation = Observation  # type: ignore[attr-defined]
+    types_mod.State = State  # type: ignore[attr-defined]
+    # Build the interfaces module
+    interfaces_mod = ModuleType("openenv.core.env_server.interfaces")
+    interfaces_mod.Environment = Environment  # type: ignore[attr-defined]
+    # Build the package hierarchy
+    openenv_mod = ModuleType("openenv")
+    core_mod = ModuleType("openenv.core")
+    env_server_mod = ModuleType("openenv.core.env_server")
+    # Wire up sub-modules
+    env_server_mod.types = types_mod  # type: ignore[attr-defined]
+    env_server_mod.interfaces = interfaces_mod  # type: ignore[attr-defined]
+    core_mod.env_server = env_server_mod  # type: ignore[attr-defined]
+    openenv_mod.core = core_mod  # type: ignore[attr-defined]
+    # Register everything
+    for name, mod in [
+        ("openenv", openenv_mod),
+        ("openenv.core", core_mod),
+        ("openenv.core.env_server", env_server_mod),
+        ("openenv.core.env_server.types", types_mod),
+        ("openenv.core.env_server.interfaces", interfaces_mod),
+    ]:
+        sys.modules.setdefault(name, mod)
+_register()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,32 @@

+[build-system]
+requires = ["setuptools>=69", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "openenv-molforge"
+version = "0.1.0"
+description = "MolForge: a deterministic medicinal-chemistry OpenEnv environment."
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+    "openenv-core[core]>=0.2.3",
+    "pydantic>=2.8.0",
+    "rdkit>=2023.9.5,<2024.3.1; python_version < '3.13'",
+    "rdkit>=2026.3.1; python_version >= '3.13'",
+]
+[project.optional-dependencies]
+dev = [
+    "pytest>=8.0.0",
+]
+tdc = [
+    "pytdc>=1.1.0; python_version < '3.13'",
+]
+[project.scripts]
+server = "molforge.server.app:main"
+[tool.setuptools]
+include-package-data = true
+packages = ["molforge", "molforge.server"]
+package-dir = { "molforge" = ".", "molforge.server" = "server" }

scenarios.py ADDED Viewed

	@@ -0,0 +1,504 @@

+"""Scenario configs and RDKit/TDC-backed surrogate chemistry for MolForge."""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Dict, Iterable, List, Mapping
+SLOT_ORDER = ["warhead", "hinge", "solvent_tail", "back_pocket"]
+EDITABLE_SLOTS = ["warhead", "hinge", "solvent_tail", "back_pocket"]
+@dataclass(frozen=True)
+class FragmentSpec:
+    """Per-fragment surrogate property contributions."""
+    name: str
+    potency: float
+    safety: float
+    synth: float
+    novelty: float
+    literature_hint: str
+@dataclass(frozen=True)
+class ScenarioConfig:
+    """Single evaluation scenario."""
+    scenario_id: str
+    difficulty: str
+    target_name: str
+    task_brief: str
+    oracle_budget: int
+    max_steps: int
+    starting_scaffold: Mapping[str, str]
+    restart_scaffold: Mapping[str, str]
+    objective_weights: Mapping[str, float]
+    hard_constraints: Mapping[str, float]
+    target_shift_step: int | None = None
+    trap_penalty: bool = False
+    enabled_tools: List[str] = field(default_factory=list)
+    enabled_actions: List[str] = field(default_factory=list)
+    coordination_mode: str = "multi_agent"
+    enabled_roles: List[str] = field(default_factory=list)
+    required_review_roles: List[str] = field(default_factory=list)
+    max_messages_per_turn: int = 4
+    baseline_to_beat: float = 0.5
+FRAGMENT_LIBRARY: Dict[str, Dict[str, FragmentSpec]] = {
+    "warhead": {
+        "acrylamide": FragmentSpec(
+            "acrylamide",
+            potency=0.18,
+            safety=-0.03,
+            synth=0.02,
+            novelty=0.03,
+            literature_hint="Covalent warheads often boost KRAS potency but can increase reactivity risk.",
+        ),
+        "reversible_cyanoacrylamide": FragmentSpec(
+            "reversible_cyanoacrylamide",
+            potency=0.16,
+            safety=0.06,
+            synth=-0.04,
+            novelty=0.08,
+            literature_hint="Reversible covalent warheads can preserve potency while softening safety liabilities.",
+        ),
+        "nitrile": FragmentSpec(
+            "nitrile",
+            potency=0.11,
+            safety=0.09,
+            synth=0.05,
+            novelty=0.04,
+            literature_hint="Nitrile warheads are safer but may need stronger pocket complementarity to keep potency.",
+        ),
+        "vinyl_sulfonamide": FragmentSpec(
+            "vinyl_sulfonamide",
+            potency=0.13,
+            safety=-0.07,
+            synth=-0.05,
+            novelty=0.10,
+            literature_hint="Sulfonamide warheads can be potent but often pressure synthesis and safety.",
+        ),
+    },
+    "hinge": {
+        "azaindole": FragmentSpec(
+            "azaindole",
+            potency=0.17,
+            safety=0.01,
+            synth=-0.03,
+            novelty=0.06,
+            literature_hint="Azaindoles are strong binders in KRAS-like pockets when the warhead is well aligned.",
+        ),
+        "pyridine": FragmentSpec(
+            "pyridine",
+            potency=0.10,
+            safety=0.04,
+            synth=0.05,
+            novelty=0.02,
+            literature_hint="Simple heteroaryl hinges improve tractability and keep synthesis accessible.",
+        ),
+        "fluorophenyl": FragmentSpec(
+            "fluorophenyl",
+            potency=0.12,
+            safety=-0.08,
+            synth=0.04,
+            novelty=0.03,
+            literature_hint="Hydrophobic hinge binders can lift affinity while increasing lipophilic liability.",
+        ),
+        "quinazoline": FragmentSpec(
+            "quinazoline",
+            potency=0.15,
+            safety=-0.04,
+            synth=-0.06,
+            novelty=0.05,
+            literature_hint="Quinazolines are potent but can create a heavy, synthesis-taxing scaffold.",
+        ),
+    },
+    "solvent_tail": {
+        "morpholine": FragmentSpec(
+            "morpholine",
+            potency=0.06,
+            safety=0.16,
+            synth=0.07,
+            novelty=0.02,
+            literature_hint="Morpholine tails frequently de-risk hERG and improve solubility.",
+        ),
+        "piperazine": FragmentSpec(
+            "piperazine",
+            potency=0.05,
+            safety=0.10,
+            synth=0.03,
+            novelty=0.03,
+            literature_hint="Basic cyclic tails improve polarity but can trigger clearance concerns if overused.",
+        ),
+        "cyclopropyl": FragmentSpec(
+            "cyclopropyl",
+            potency=0.08,
+            safety=-0.03,
+            synth=0.04,
+            novelty=0.04,
+            literature_hint="Compact hydrophobes sometimes improve fit but rarely help safety.",
+        ),
+        "dimethylamino": FragmentSpec(
+            "dimethylamino",
+            potency=0.04,
+            safety=-0.13,
+            synth=0.02,
+            novelty=0.04,
+            literature_hint="Strongly basic tails can quickly create cardiac and CNS liabilities.",
+        ),
+    },
+    "back_pocket": {
+        "methoxy": FragmentSpec(
+            "methoxy",
+            potency=0.07,
+            safety=0.08,
+            synth=0.06,
+            novelty=0.02,
+            literature_hint="Small polar back-pocket groups often stabilize potency without blowing up toxicity.",
+        ),
+        "chloro": FragmentSpec(
+            "chloro",
+            potency=0.12,
+            safety=-0.12,
+            synth=0.04,
+            novelty=0.02,
+            literature_hint="Halogens often buy potency at the cost of lipophilic risk.",
+        ),
+        "trifluoromethyl": FragmentSpec(
+            "trifluoromethyl",
+            potency=0.14,
+            safety=-0.15,
+            synth=-0.02,
+            novelty=0.06,
+            literature_hint="CF3 groups can strongly improve affinity but frequently over-shoot safety windows.",
+        ),
+        "cyano": FragmentSpec(
+            "cyano",
+            potency=0.10,
+            safety=0.03,
+            synth=0.01,
+            novelty=0.05,
+            literature_hint="Cyano groups are efficient potency handles when hydrophobic groups are too risky.",
+        ),
+    },
+}
+DEFAULT_TOOL_COSTS: Dict[str, int] = {
+    "evaluate_properties": 50,
+    "search_literature": 100,
+    "dock_target": 300,
+    "estimate_synthesizability": 120,
+    "evaluate_novelty": 80,
+    "assay_toxicity": 2000,
+    "run_md_simulation": 2500,
+}
+SCENARIOS: List[ScenarioConfig] = [
+    ScenarioConfig(
+        scenario_id="level_0_easy",
+        difficulty="easy",
+        target_name="KRAS G12C",
+        task_brief=(
+            "Improve target potency while repairing a mild safety liability and keeping synthesis "
+            "evidence current. The starting scaffold is close, but a strong submission still needs "
+            "the right edit sequence plus assay support."
+        ),
+        oracle_budget=3600,
+        max_steps=7,
+        starting_scaffold={
+            "warhead": "acrylamide",
+            "hinge": "pyridine",
+            "solvent_tail": "cyclopropyl",
+            "back_pocket": "chloro",
+        },
+        restart_scaffold={
+            "warhead": "reversible_cyanoacrylamide",
+            "hinge": "pyridine",
+            "solvent_tail": "morpholine",
+            "back_pocket": "methoxy",
+        },
+        objective_weights={
+            "potency": 0.55,
+            "safety": 0.15,
+            "synth": 0.15,
+            "novelty": 0.15,
+        },
+        hard_constraints={"potency_min": 0.84, "toxicity_max": 0.28, "synth_min": 0.62},
+        enabled_tools=list(DEFAULT_TOOL_COSTS.keys()),
+        enabled_actions=["edit", "run_assay", "submit", "defer", "restart"],
+        enabled_roles=[
+            "lead_chemist",
+            "toxicologist",
+            "assay_planner",
+            "process_chemist",
+        ],
+        required_review_roles=["toxicologist", "assay_planner", "process_chemist"],
+        baseline_to_beat=0.70,
+    ),
+    ScenarioConfig(
+        scenario_id="level_1_medium",
+        difficulty="medium",
+        target_name="KRAS G12C",
+        task_brief=(
+            "Balance potency, toxicity, and synthesizability under budget pressure. The best "
+            "molecules require coordinated safety edits plus current assay evidence."
+        ),
+        oracle_budget=4300,
+        max_steps=8,
+        starting_scaffold={
+            "warhead": "acrylamide",
+            "hinge": "fluorophenyl",
+            "solvent_tail": "dimethylamino",
+            "back_pocket": "chloro",
+        },
+        restart_scaffold={
+            "warhead": "reversible_cyanoacrylamide",
+            "hinge": "azaindole",
+            "solvent_tail": "morpholine",
+            "back_pocket": "cyano",
+        },
+        objective_weights={
+            "potency": 0.42,
+            "safety": 0.33,
+            "synth": 0.13,
+            "novelty": 0.12,
+        },
+        hard_constraints={"potency_min": 0.76, "toxicity_max": 0.34, "synth_min": 0.62},
+        enabled_tools=list(DEFAULT_TOOL_COSTS.keys()),
+        enabled_actions=["edit", "run_assay", "submit", "defer", "restart"],
+        enabled_roles=[
+            "lead_chemist",
+            "toxicologist",
+            "assay_planner",
+            "process_chemist",
+        ],
+        required_review_roles=["toxicologist", "assay_planner", "process_chemist"],
+        baseline_to_beat=0.64,
+    ),
+    ScenarioConfig(
+        scenario_id="level_2_hard",
+        difficulty="hard",
+        target_name="KRAS G12C resistance panel",
+        task_brief=(
+            "Solve a non-stationary design problem with a fixed, problematic core. The starting "
+            "series is a sunk-cost trap, and the target pocket shifts late in the episode."
+        ),
+        oracle_budget=5000,
+        max_steps=9,
+        starting_scaffold={
+            "warhead": "acrylamide",
+            "hinge": "quinazoline",
+            "solvent_tail": "dimethylamino",
+            "back_pocket": "trifluoromethyl",
+        },
+        restart_scaffold={
+            "warhead": "nitrile",
+            "hinge": "azaindole",
+            "solvent_tail": "morpholine",
+            "back_pocket": "cyano",
+        },
+        objective_weights={
+            "potency": 0.38,
+            "safety": 0.32,
+            "synth": 0.16,
+            "novelty": 0.14,
+        },
+        hard_constraints={"potency_min": 0.78, "toxicity_max": 0.46, "synth_min": 0.62},
+        target_shift_step=4,
+        trap_penalty=True,
+        enabled_tools=list(DEFAULT_TOOL_COSTS.keys()),
+        enabled_actions=["edit", "run_assay", "submit", "defer", "restart"],
+        enabled_roles=[
+            "lead_chemist",
+            "toxicologist",
+            "assay_planner",
+            "process_chemist",
+        ],
+        required_review_roles=["toxicologist", "assay_planner", "process_chemist"],
+        baseline_to_beat=0.66,
+    ),
+]
+SCENARIO_BY_ID = {scenario.scenario_id: scenario for scenario in SCENARIOS}
+def get_scenario(index: int) -> ScenarioConfig:
+    """Return scenarios in a stable cycle so repeated resets cover all tasks."""
+    return SCENARIOS[index % len(SCENARIOS)]
+def format_molecule(molecule: Mapping[str, str]) -> str:
+    """Human-readable canonical representation."""
+    ordered = [f"{slot}={molecule[slot]}" for slot in SLOT_ORDER]
+    return " | ".join(ordered)
+def fragment_choices(slot: str) -> List[str]:
+    """Return the editable fragments for a slot."""
+    return sorted(FRAGMENT_LIBRARY[slot].keys())
+def evaluate_molecule(
+    molecule: Mapping[str, str],
+    scenario: ScenarioConfig,
+    *,
+    target_shift_active: bool = False,
+) -> Dict[str, float]:
+    """Evaluate a molecule with target logic plus RDKit/TDC medicinal chemistry signals."""
+    potency = 0.23
+    safety = 0.56
+    synth = 0.58
+    novelty = 0.18
+    for slot, fragment_name in molecule.items():
+        fragment = FRAGMENT_LIBRARY[slot][fragment_name]
+        potency += fragment.potency
+        safety += fragment.safety
+        synth += fragment.synth
+        novelty += fragment.novelty
+    if molecule["warhead"] == "acrylamide" and molecule["hinge"] == "azaindole":
+        potency += 0.10
+    if molecule["solvent_tail"] == "morpholine" and molecule["back_pocket"] == "methoxy":
+        safety += 0.08
+    if molecule["hinge"] == "fluorophenyl" and molecule["back_pocket"] == "chloro":
+        potency += 0.06
+        safety -= 0.16
+    if molecule["solvent_tail"] == "dimethylamino" and molecule["back_pocket"] == "trifluoromethyl":
+        safety -= 0.15
+    if molecule["warhead"] == "nitrile" and molecule["back_pocket"] == "cyano":
+        potency += 0.04
+        novelty += 0.03
+    if molecule["warhead"] == "reversible_cyanoacrylamide" and molecule["solvent_tail"] == "morpholine":
+        safety += 0.05
+    if target_shift_active:
+        if molecule["warhead"] == "acrylamide":
+            potency -= 0.16
+        if molecule["warhead"] == "nitrile":
+            potency += 0.10
+        if molecule["back_pocket"] == "cyano":
+            potency += 0.03
+    if scenario.trap_penalty:
+        potency = min(potency, 0.71)
+        safety = min(safety, 0.44)
+    potency = min(max(potency, 0.0), 1.0)
+    safety = min(max(safety, 0.0), 1.0)
+    synth = min(max(synth, 0.0), 1.0)
+    novelty = min(max(novelty, 0.0), 1.0)
+    toxicity = min(max(1.0 - safety, 0.0), 1.0)
+    fallback_properties = {
+        "potency": round(potency, 4),
+        "safety": round(safety, 4),
+        "toxicity": round(toxicity, 4),
+        "synth": round(synth, 4),
+        "novelty": round(novelty, 4),
+    }
+    try:
+        from molforge_oracles import evaluate_with_rdkit_tdc
+    except Exception:
+        return fallback_properties
+    return evaluate_with_rdkit_tdc(molecule, fallback_properties)
+def molecule_to_smiles(molecule: Mapping[str, str]) -> str:
+    """Return the RDKit/TDC surrogate SMILES used by the chemistry oracle."""
+    try:
+        from molforge_oracles import assemble_surrogate_smiles
+    except Exception:
+        return ""
+    return assemble_surrogate_smiles(molecule)
+def oracle_backend_status() -> Dict[str, bool]:
+    """Return whether RDKit and TDC are active for scoring."""
+    try:
+        from molforge_oracles import oracle_backend_status as backend_status
+    except Exception:
+        return {"rdkit": False, "tdc": False}
+    return backend_status()
+def compute_objective_score(properties: Mapping[str, float], scenario: ScenarioConfig) -> float:
+    """Aggregate visible scientific goals into a single 0-1 quality score."""
+    safety_score = 1.0 - properties["toxicity"]
+    score = (
+        scenario.objective_weights["potency"] * properties["potency"]
+        + scenario.objective_weights["safety"] * safety_score
+        + scenario.objective_weights["synth"] * properties["synth"]
+        + scenario.objective_weights["novelty"] * properties["novelty"]
+    )
+    return round(min(max(score, 0.0), 1.0), 4)
+def evaluate_constraints(
+    properties: Mapping[str, float], scenario: ScenarioConfig
+) -> Dict[str, tuple[bool, float]]:
+    """Return hard-constraint satisfaction results."""
+    results: Dict[str, tuple[bool, float]] = {}
+    if "potency_min" in scenario.hard_constraints:
+        threshold = scenario.hard_constraints["potency_min"]
+        results["potency_min"] = (properties["potency"] >= threshold, threshold)
+    if "toxicity_max" in scenario.hard_constraints:
+        threshold = scenario.hard_constraints["toxicity_max"]
+        results["toxicity_max"] = (properties["toxicity"] <= threshold, threshold)
+    if "synth_min" in scenario.hard_constraints:
+        threshold = scenario.hard_constraints["synth_min"]
+        results["synth_min"] = (properties["synth"] >= threshold, threshold)
+    return results
+def evaluate_constraint_margins(
+    properties: Mapping[str, float], scenario: ScenarioConfig
+) -> Dict[str, float]:
+    """Return proportional 0-1 constraint scores where larger violations score lower."""
+    margins: Dict[str, float] = {}
+    if "potency_min" in scenario.hard_constraints:
+        threshold = scenario.hard_constraints["potency_min"]
+        margins["potency_min"] = min(1.0, max(0.0, properties["potency"] / max(threshold, 1e-6)))
+    if "toxicity_max" in scenario.hard_constraints:
+        threshold = scenario.hard_constraints["toxicity_max"]
+        if properties["toxicity"] <= threshold:
+            margins["toxicity_max"] = 1.0
+        else:
+            excess = properties["toxicity"] - threshold
+            margins["toxicity_max"] = max(0.0, 1.0 - excess / max(1.0 - threshold, 1e-6))
+    if "synth_min" in scenario.hard_constraints:
+        threshold = scenario.hard_constraints["synth_min"]
+        margins["synth_min"] = min(1.0, max(0.0, properties["synth"] / max(threshold, 1e-6)))
+    return margins
+def literature_hints(molecule: Mapping[str, str]) -> List[str]:
+    """Collect deterministic medicinal chemistry hints for the current molecule."""
+    hints = []
+    for slot in SLOT_ORDER:
+        fragment_name = molecule[slot]
+        hints.append(FRAGMENT_LIBRARY[slot][fragment_name].literature_hint)
+    return hints
+def enumerate_candidate_edits(molecule: Mapping[str, str]) -> Iterable[tuple[str, str]]:
+    """Generate all single-edit candidates from the current molecule."""
+    for slot in SLOT_ORDER:
+        for fragment in fragment_choices(slot):
+            if molecule[slot] != fragment:
+                yield slot, fragment

scripts/convert_peft_lora_to_mlx.py ADDED Viewed

	@@ -0,0 +1,98 @@

+"""Convert a PEFT LoRA adapter into the adapter format expected by mlx-lm."""
+from __future__ import annotations
+import argparse
+import json
+import re
+import shutil
+from pathlib import Path
+import mlx.core as mx
+from safetensors import safe_open
+KEY_RE = re.compile(
+    r"^base_model\.model\.model\.(?P<prefix>.+?)\.layers\."
+    r"(?P<layer>\d+)\.(?P<module>.+?)\.lora_(?P<ab>[AB])\.weight$"
+)
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Convert PEFT LoRA adapter to MLX LoRA adapter.")
+    parser.add_argument("peft_adapter", help="Path containing PEFT adapter_model.safetensors")
+    parser.add_argument("mlx_adapter", help="Output path for MLX adapters.safetensors")
+    args = parser.parse_args()
+    peft_path = Path(args.peft_adapter)
+    mlx_path = Path(args.mlx_adapter)
+    mlx_path.mkdir(parents=True, exist_ok=True)
+    peft_config = json.loads((peft_path / "adapter_config.json").read_text())
+    rank = int(peft_config["r"])
+    alpha = float(peft_config["lora_alpha"])
+    scale = alpha / rank
+    target_modules = list(peft_config["target_modules"])
+    weights = {}
+    layer_ids = set()
+    module_keys = set()
+    with safe_open(peft_path / "adapter_model.safetensors", framework="numpy") as handle:
+        for key in handle.keys():
+            match = KEY_RE.match(key)
+            if not match:
+                continue
+            layer = int(match.group("layer"))
+            module = match.group("module")
+            ab = match.group("ab")
+            layer_ids.add(layer)
+            module_keys.add(module)
+            tensor = handle.get_tensor(key)
+            mlx_key = f"language_model.model.layers.{layer}.{module}.lora_{ab.lower()}"
+            weights[mlx_key] = mx.array(tensor.T)
+    if not weights:
+        raise SystemExit(f"No PEFT LoRA weights found in {peft_path}")
+    mx.save_safetensors(str(mlx_path / "adapters.safetensors"), weights)
+    config = {
+        "fine_tune_type": "lora",
+        "num_layers": max(layer_ids) + 1,
+        "lora_parameters": {
+            "rank": rank,
+            "scale": scale,
+            "dropout": float(peft_config.get("lora_dropout", 0.0)),
+            "keys": sorted(module_keys),
+        },
+    }
+    (mlx_path / "adapter_config.json").write_text(json.dumps(config, indent=2) + "\n")
+    for filename in [
+        "tokenizer.json",
+        "tokenizer_config.json",
+        "chat_template.jinja",
+        "processor_config.json",
+        "README.md",
+    ]:
+        source = peft_path / filename
+        if source.exists():
+            shutil.copy2(source, mlx_path / filename)
+    print(
+        json.dumps(
+            {
+                "output": str(mlx_path),
+                "weights": len(weights),
+                "num_layers": config["num_layers"],
+                "rank": rank,
+                "scale": scale,
+                "keys": sorted(module_keys),
+                "target_modules": target_modules,
+            },
+            indent=2,
+        )
+    )
+if __name__ == "__main__":
+    main()

scripts/generate_sft_all_actions_dataset.py ADDED Viewed

	@@ -0,0 +1,180 @@

+"""Generate a MolForge SFT JSONL dataset with rare-action coverage.
+Most records come from the deterministic team policy so the examples are
+grounded in real environment trajectories. A smaller coverage slice is added
+for rare but valid schema variants such as defer, each assay tool, and edit
+subtypes so SFT teaches the model the whole action surface.
+"""
+from __future__ import annotations
+import argparse
+import json
+import os
+import sys
+from pathlib import Path
+from typing import Iterable
+PROJECT_ROOT = Path(__file__).resolve().parents[1]
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+from inference_common import (  # noqa: E402
+    SYSTEM_PROMPT,
+    MolForgeAction,
+    MolForgeObservation,
+    attach_reasoning_fields,
+    attach_team_messages,
+    build_model_payload,
+    heuristic_team_action,
+)
+from scenarios import DEFAULT_TOOL_COSTS  # noqa: E402
+from server.molforge_environment import MolForgeEnvironment  # noqa: E402
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Generate MolForge all-action SFT JSONL.")
+    parser.add_argument("--episodes", type=int, default=90)
+    parser.add_argument("--max-turns", type=int, default=10)
+    parser.add_argument("--output", default="data/molforge_sft_all_actions.jsonl")
+    parser.add_argument(
+        "--randomized",
+        action="store_true",
+        help="Enable MolForge training randomization while collecting policy traces.",
+    )
+    args = parser.parse_args()
+    if args.randomized:
+        os.environ["MOLFORGE_TRAINING_RANDOMIZATION"] = "1"
+    output_path = Path(args.output)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    env = MolForgeEnvironment()
+    records = []
+    for _ in range(args.episodes):
+        observation = env.reset()
+        for _ in range(args.max_turns):
+            if observation.done:
+                break
+            action = heuristic_team_action(observation)
+            records.append(make_record(observation, action, source="policy_trace"))
+            observation = env.step(action)
+    for observation, action in curated_coverage_examples():
+        action = attach_reasoning_fields(observation, action)
+        action = attach_team_messages(observation, action)
+        records.append(make_record(observation, action, source="coverage_example"))
+    with output_path.open("w", encoding="utf-8") as handle:
+        for record in records:
+            handle.write(json.dumps(record, ensure_ascii=True) + "\n")
+    print(
+        json.dumps(
+            {
+                "output": str(output_path),
+                "records": len(records),
+                "coverage_records": sum(
+                    1 for record in records if record["metadata"]["source"] == "coverage_example"
+                ),
+            },
+            indent=2,
+        )
+    )
+def curated_coverage_examples() -> Iterable[tuple[MolForgeObservation, MolForgeAction]]:
+    env = MolForgeEnvironment()
+    observations = [env.reset(), env.reset(), env.reset()]
+    for observation in observations:
+        yield observation, MolForgeAction(
+            action_type="defer",
+            acting_role="lead_chemist",
+            rationale="Hold this turn because the team needs a cleaner evidence-backed move.",
+        )
+    easy, medium, hard = observations
+    yield easy, MolForgeAction(
+        action_type="edit",
+        acting_role="lead_chemist",
+        edit_type="add_fragment",
+        slot="back_pocket",
+        fragment="cyano",
+        rationale="Add a compact cyano handle to improve potency without large lipophilic risk.",
+    )
+    yield medium, MolForgeAction(
+        action_type="edit",
+        acting_role="lead_chemist",
+        edit_type="remove",
+        slot="back_pocket",
+        rationale="Remove the risky back-pocket group and return to a simpler default handle.",
+    )
+    yield hard, MolForgeAction(
+        action_type="edit",
+        acting_role="lead_chemist",
+        edit_type="undo_last_edit",
+        slot="solvent_tail",
+        rationale="Undo the last tail change when the visible evidence suggests it raised risk.",
+    )
+    for observation in observations:
+        for tool_name in DEFAULT_TOOL_COSTS:
+            yield observation, MolForgeAction(
+                action_type="run_assay",
+                acting_role="assay_planner",
+                tool_name=tool_name,
+                rationale=f"Run {tool_name} to close a visible evidence gap before committing.",
+            )
+    yield hard, MolForgeAction(
+        action_type="restart",
+        acting_role="lead_chemist",
+        rationale="Restart early because the hard scenario starts in a trap series.",
+    )
+    yield easy, MolForgeAction(
+        action_type="submit",
+        acting_role="lead_chemist",
+        rationale="Submit only when visible evidence is sufficient and budget should be preserved.",
+    )
+def make_record(
+    observation: MolForgeObservation,
+    action: MolForgeAction,
+    *,
+    source: str,
+) -> dict[str, object]:
+    return {
+        "messages": [
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {
+                "role": "user",
+                "content": json.dumps(
+                    build_model_payload(observation, compact=False),
+                    separators=(",", ":"),
+                ),
+            },
+            {
+                "role": "assistant",
+                "content": json.dumps(
+                    action.model_dump(exclude_none=True),
+                    separators=(",", ":"),
+                ),
+            },
+        ],
+        "metadata": {
+            "source": source,
+            "scenario_id": observation.scenario_id,
+            "difficulty": observation.difficulty,
+            "step_index": observation.step_index,
+            "action_type": action.action_type,
+        },
+    }
+if __name__ == "__main__":
+    main()

scripts/generate_sft_compact_policy_v4_dataset.py ADDED Viewed

	@@ -0,0 +1,446 @@

+"""Generate MolForge compact-policy SFT data aligned to MLX inference.
+V4 is designed around the failures seen in the v3 adapter:
+- train on the exact compact prompt/payload shape used at inference time
+- emphasize successful end-to-end expert trajectories
+- include recovery examples after governance vetoes
+- include enough schema coverage for all core action types without making
+  unsafe edits or wasteful assays dominate the positive training signal
+"""
+from __future__ import annotations
+import argparse
+import json
+import os
+import sys
+from pathlib import Path
+from typing import Any, Iterable
+PROJECT_ROOT = Path(__file__).resolve().parents[1]
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+from inference_common import (  # noqa: E402
+    MolForgeAction,
+    MolForgeObservation,
+    attach_reasoning_fields,
+    attach_team_messages,
+    heuristic_team_action,
+)
+from scenarios import DEFAULT_TOOL_COSTS  # noqa: E402
+from server.molforge_environment import MolForgeEnvironment  # noqa: E402
+COMPACT_ACTION_SYSTEM_PROMPT = """
+You control the MolForge action policy.
+Return exactly one JSON object with only these top-level keys:
+action_type, acting_role, edit_type, slot, fragment, tool_name, rationale,
+evidence, expected_effects.
+Valid action_type values are exactly:
+edit, run_assay, submit, restart, defer.
+Do not output team messages. Do not output proposal, approval, objection,
+risk_flag, assay_request, rejection, or submission_recommendation as action_type.
+The environment will attach governance messages automatically.
+Role rules:
+- run_assay uses acting_role "assay_planner" and a valid tool_name.
+- edit, submit, restart, and defer use acting_role "lead_chemist".
+- unused optional fields must be JSON null.
+""".strip()
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Generate compact MolForge v4 policy SFT JSONL.")
+    parser.add_argument("--episodes", type=int, default=520)
+    parser.add_argument("--max-turns", type=int, default=10)
+    parser.add_argument("--seed", default="policy-v4")
+    parser.add_argument("--output", default="issue/molforge_sft_compact_policy_v4.jsonl")
+    args = parser.parse_args()
+    records: list[dict[str, Any]] = []
+    seen: set[str] = set()
+    add_expert_traces(records, seen, episodes=18, max_turns=args.max_turns, randomized=False, seed=args.seed)
+    add_expert_traces(records, seen, episodes=args.episodes, max_turns=args.max_turns, randomized=True, seed=args.seed)
+    add_recovery_traces(records, seen, episodes=max(90, args.episodes // 3), seed=args.seed)
+    add_schema_coverage(records, seen, episodes=36, seed=args.seed)
+    output = Path(args.output)
+    output.parent.mkdir(parents=True, exist_ok=True)
+    with output.open("w", encoding="utf-8") as handle:
+        for record in records:
+            handle.write(json.dumps(record, ensure_ascii=True) + "\n")
+    print(json.dumps(summarize(records, str(output)), indent=2))
+def add_expert_traces(
+    records: list[dict[str, Any]],
+    seen: set[str],
+    *,
+    episodes: int,
+    max_turns: int,
+    randomized: bool,
+    seed: str,
+) -> None:
+    with_training_randomization(randomized, seed)
+    env = MolForgeEnvironment()
+    source = "expert_randomized" if randomized else "expert_canonical"
+    for _ in range(episodes):
+        observation = env.reset()
+        for _ in range(max_turns):
+            if observation.done:
+                break
+            action = heuristic_team_action(observation)
+            add_record(records, seen, observation, action, source=source)
+            observation = env.step(action)
+def add_recovery_traces(records: list[dict[str, Any]], seen: set[str], *, episodes: int, seed: str) -> None:
+    with_training_randomization(True, f"{seed}-recovery")
+    env = MolForgeEnvironment()
+    for episode_index in range(episodes):
+        observation = env.reset()
+        # Move some episodes to a useful intermediate state before injecting a bad decision.
+        for _ in range(episode_index % 3):
+            if observation.done:
+                break
+            observation = env.step(heuristic_team_action(observation))
+        if observation.done:
+            continue
+        for bad_action in bad_actions_for(observation):
+            trial = clone_env_at_observation(env, episode_index)
+            trial_obs = advance_like_source(trial, episode_index % 3)
+            if trial_obs.done:
+                continue
+            veto_obs = trial.step(attach_team_messages(trial_obs, attach_reasoning_fields(trial_obs, bad_action)))
+            if veto_obs.done:
+                continue
+            if veto_obs.governance.status != "policy_veto":
+                continue
+            recovery = heuristic_team_action(veto_obs)
+            add_record(records, seen, veto_obs, recovery, source="recovery_after_veto")
+def add_schema_coverage(records: list[dict[str, Any]], seen: set[str], *, episodes: int, seed: str) -> None:
+    with_training_randomization(True, f"{seed}-coverage")
+    env = MolForgeEnvironment()
+    observations: list[MolForgeObservation] = []
+    for _ in range(episodes):
+        observation = env.reset()
+        observations.append(observation)
+        for _ in range(2):
+            if observation.done:
+                break
+            observation = env.step(heuristic_team_action(observation))
+            observations.append(observation)
+    defer_examples = 0
+    for observation in observations:
+        current = {slot.slot: slot.fragment for slot in observation.molecule_slots}
+        safe_edits = [
+            ("solvent_tail", "morpholine", "Use morpholine to reduce safety risk."),
+            ("back_pocket", "cyano", "Use cyano to preserve potency with lower lipophilic risk."),
+            ("warhead", "reversible_cyanoacrylamide", "Use a softer warhead to reduce reactivity."),
+            ("hinge", "azaindole", "Use azaindole when potency needs recovery."),
+        ]
+        for slot, fragment, rationale in safe_edits:
+            if current.get(slot) == fragment:
+                continue
+            add_record(
+                records,
+                seen,
+                observation,
+                MolForgeAction(
+                    action_type="edit",
+                    acting_role="lead_chemist",
+                    edit_type="substitute",
+                    slot=slot,  # type: ignore[arg-type]
+                    fragment=fragment,
+                    rationale=rationale,
+                ),
+                source="schema_safe_edit",
+            )
+        if observation.step_index > 0:
+            add_record(
+                records,
+                seen,
+                observation,
+                MolForgeAction(
+                    action_type="edit",
+                    acting_role="lead_chemist",
+                    edit_type="remove",
+                    slot="back_pocket",
+                    rationale="Remove the back-pocket group to simplify risk before reassay.",
+                ),
+                source="schema_remove",
+            )
+        for tool_name in useful_tool_subset(observation):
+            add_record(
+                records,
+                seen,
+                observation,
+                MolForgeAction(
+                    action_type="run_assay",
+                    acting_role="assay_planner",
+                    tool_name=tool_name,  # type: ignore[arg-type]
+                    rationale=f"Run {tool_name} to close a visible evidence gap.",
+                ),
+                source="schema_tool_coverage",
+            )
+        if (
+            defer_examples < 36
+            and observation.step_index >= 1
+            and observation.scenario_id != "level_2_hard"
+        ):
+            add_record(
+                records,
+                seen,
+                observation,
+                MolForgeAction(
+                    action_type="defer",
+                    acting_role="lead_chemist",
+                    rationale="Defer because no safe evidence-backed action remains in the current budget window.",
+                ),
+                source="schema_defer",
+            )
+            defer_examples += 1
+def useful_tool_subset(observation: MolForgeObservation) -> list[str]:
+    gaps = set()
+    for constraint in observation.constraint_status:
+        if constraint.evidence_status == "unknown":
+            if constraint.name == "toxicity_max":
+                gaps.add("toxicity")
+            else:
+                gaps.add(constraint.name.split("_")[0])
+    tools: list[str] = []
+    if "potency" in gaps and observation.remaining_budget >= DEFAULT_TOOL_COSTS["dock_target"]:
+        tools.extend(["evaluate_properties", "dock_target"])
+    if "toxicity" in gaps and observation.remaining_budget >= DEFAULT_TOOL_COSTS["assay_toxicity"]:
+        tools.append("assay_toxicity")
+    if "synth" in gaps and observation.remaining_budget >= DEFAULT_TOOL_COSTS["estimate_synthesizability"]:
+        tools.append("estimate_synthesizability")
+    if observation.remaining_budget >= DEFAULT_TOOL_COSTS["evaluate_novelty"]:
+        tools.append("evaluate_novelty")
+    if observation.remaining_budget >= DEFAULT_TOOL_COSTS["search_literature"]:
+        tools.append("search_literature")
+    if observation.scenario_id == "level_2_hard" and observation.remaining_budget >= DEFAULT_TOOL_COSTS["run_md_simulation"]:
+        tools.append("run_md_simulation")
+    return tools
+def bad_actions_for(observation: MolForgeObservation) -> Iterable[MolForgeAction]:
+    current = {slot.slot: slot.fragment for slot in observation.molecule_slots}
+    candidates = [
+        ("solvent_tail", "dimethylamino", "This would add a safety liability and should be recovered from."),
+        ("back_pocket", "trifluoromethyl", "This would over-shoot lipophilic risk and should be recovered from."),
+        ("hinge", "quinazoline", "This can create route pressure and should be recovered from."),
+    ]
+    for slot, fragment, rationale in candidates:
+        if current.get(slot) == fragment:
+            continue
+        yield MolForgeAction(
+            action_type="edit",
+            acting_role="lead_chemist",
+            edit_type="substitute",
+            slot=slot,  # type: ignore[arg-type]
+            fragment=fragment,
+            rationale=rationale,
+        )
+def clone_env_at_observation(source_env: MolForgeEnvironment, episode_index: int) -> MolForgeEnvironment:
+    del source_env
+    env = MolForgeEnvironment()
+    for _ in range(episode_index + 1):
+        observation = env.reset()
+    return env
+def advance_like_source(env: MolForgeEnvironment, steps: int) -> MolForgeObservation:
+    observation = env._build_observation(reward=0.0, done=False, reward_components=[])  # noqa: SLF001
+    for _ in range(steps):
+        if observation.done:
+            return observation
+        observation = env.step(heuristic_team_action(observation))
+    return observation
+def with_training_randomization(enabled: bool, seed: str) -> None:
+    if enabled:
+        os.environ["MOLFORGE_TRAINING_RANDOMIZATION"] = "1"
+    else:
+        os.environ.pop("MOLFORGE_TRAINING_RANDOMIZATION", None)
+    os.environ["MOLFORGE_RANDOM_SEED"] = seed
+def add_record(
+    records: list[dict[str, Any]],
+    seen: set[str],
+    observation: MolForgeObservation,
+    action: MolForgeAction,
+    *,
+    source: str,
+) -> None:
+    action = attach_reasoning_fields(observation, action)
+    record = make_record(observation, action, source=source)
+    key = json.dumps(
+        {"user": record["messages"][1]["content"], "assistant": record["messages"][2]["content"]},
+        sort_keys=True,
+    )
+    if key in seen:
+        return
+    validate_target(record["messages"][2]["content"])
+    records.append(record)
+    seen.add(key)
+def make_record(observation: MolForgeObservation, action: MolForgeAction, *, source: str) -> dict[str, Any]:
+    return {
+        "messages": [
+            {"role": "system", "content": COMPACT_ACTION_SYSTEM_PROMPT},
+            {"role": "user", "content": json.dumps(compact_action_payload(observation), separators=(",", ":"))},
+            {"role": "assistant", "content": json.dumps(target_action(action), separators=(",", ":"))},
+        ],
+        "metadata": {
+            "source": source,
+            "scenario_id": observation.scenario_id,
+            "difficulty": observation.difficulty,
+            "step_index": observation.step_index,
+            "action_type": action.action_type,
+        },
+    }
+def compact_action_payload(observation: MolForgeObservation) -> dict[str, Any]:
+    lead_view = next(
+        (role.observation for role in observation.role_observations if role.role == "lead_chemist"),
+        {},
+    )
+    assay_view = next(
+        (role.observation for role in observation.role_observations if role.role == "assay_planner"),
+        {},
+    )
+    return {
+        "valid_action_types": ["edit", "run_assay", "submit", "restart", "defer"],
+        "scenario_id": observation.scenario_id,
+        "difficulty": observation.difficulty,
+        "task_brief": observation.task_brief,
+        "current_molecule": observation.current_molecule,
+        "current_smiles": observation.metadata.get("current_smiles", ""),
+        "visible_metrics": observation.visible_metrics,
+        "constraint_status": [constraint.model_dump() for constraint in observation.constraint_status],
+        "remaining_budget": observation.remaining_budget,
+        "max_budget": observation.max_budget,
+        "step_index": observation.step_index,
+        "max_steps": observation.max_steps,
+        "molecule_slots": lead_view.get("molecule_slots", {}),
+        "candidate_edits": lead_view.get("candidate_edits", [])[:12],
+        "open_questions": lead_view.get("open_questions", []),
+        "known_assays": [
+            {
+                "tool_name": reading.tool_name,
+                "property_name": reading.property_name,
+                "estimate": reading.estimate,
+                "confidence_low": reading.confidence_low,
+                "confidence_high": reading.confidence_high,
+                "molecule_signature": reading.molecule_signature,
+            }
+            for reading in observation.known_assays[-8:]
+        ],
+        "tool_costs": assay_view.get("tool_costs", {}),
+        "evidence_gaps": assay_view.get("evidence_gaps", []),
+        "estimated_information_value": assay_view.get("estimated_information_value", {}),
+    }
+def target_action(action: MolForgeAction) -> dict[str, Any]:
+    effects = {
+        "potency": "unknown",
+        "toxicity": "unknown",
+        "synth": "unknown",
+        "novelty": "unknown",
+        "budget": "neutral",
+    }
+    effects.update({key: value for key, value in action.expected_effects.items() if key in effects})
+    return {
+        "action_type": action.action_type,
+        "acting_role": action.acting_role,
+        "edit_type": action.edit_type,
+        "slot": action.slot,
+        "fragment": action.fragment,
+        "tool_name": action.tool_name,
+        "rationale": action.rationale[:220],
+        "evidence": list(action.evidence[:5]),
+        "expected_effects": effects,
+    }
+def validate_target(text: str) -> None:
+    data = json.loads(text)
+    allowed = {
+        "action_type",
+        "acting_role",
+        "edit_type",
+        "slot",
+        "fragment",
+        "tool_name",
+        "rationale",
+        "evidence",
+        "expected_effects",
+    }
+    if set(data) != allowed:
+        raise ValueError(f"target keys mismatch: {sorted(data)}")
+    if data["action_type"] not in {"edit", "run_assay", "submit", "restart", "defer"}:
+        raise ValueError(f"invalid action_type: {data['action_type']}")
+    if data["action_type"] == "proposal":
+        raise ValueError("proposal is not a compact action type")
+    if data["edit_type"] == "replace":
+        raise ValueError("replace must never be used; use substitute")
+    if "messages" in data:
+        raise ValueError("compact target must not contain messages")
+    if not isinstance(data["evidence"], list):
+        raise ValueError("evidence must be a list")
+    if set(data["expected_effects"]) != {"potency", "toxicity", "synth", "novelty", "budget"}:
+        raise ValueError("expected_effects must have exactly five keys")
+    MolForgeAction(**data)
+def summarize(records: list[dict[str, Any]], output: str) -> dict[str, Any]:
+    actions: dict[str, int] = {}
+    sources: dict[str, int] = {}
+    scenarios: dict[str, int] = {}
+    users = set()
+    assistants = set()
+    for record in records:
+        metadata = record["metadata"]
+        actions[metadata["action_type"]] = actions.get(metadata["action_type"], 0) + 1
+        sources[metadata["source"]] = sources.get(metadata["source"], 0) + 1
+        scenarios[metadata["scenario_id"]] = scenarios.get(metadata["scenario_id"], 0) + 1
+        users.add(record["messages"][1]["content"])
+        assistants.add(record["messages"][2]["content"])
+    return {
+        "output": output,
+        "records": len(records),
+        "unique_user_prompts": len(users),
+        "unique_assistant_targets": len(assistants),
+        "action_types": actions,
+        "sources": sources,
+        "scenario_ids": scenarios,
+    }
+if __name__ == "__main__":
+    main()

scripts/validate_sft_traces.py ADDED Viewed

	@@ -0,0 +1,97 @@

+"""Validate MolForge SFT JSONL before training."""
+from __future__ import annotations
+import argparse
+import json
+import sys
+from pathlib import Path
+from typing import Any
+PROJECT_ROOT = Path(__file__).resolve().parents[1]
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+from models import MolForgeAction
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Validate MolForge SFT trace JSONL.")
+    parser.add_argument("path", help="Path to JSONL generated by scripts/generate_sft_traces.py")
+    parser.add_argument("--max-errors", type=int, default=20)
+    args = parser.parse_args()
+    path = Path(args.path)
+    errors: list[str] = []
+    records = 0
+    action_types: dict[str, int] = {}
+    scenario_ids: dict[str, int] = {}
+    for line_number, line in enumerate(path.open(encoding="utf-8"), start=1):
+        if not line.strip():
+            continue
+        records += 1
+        try:
+            record = json.loads(line)
+            messages = record["messages"]
+            assistant_content = messages[-1]["content"]
+            action_dict = json.loads(assistant_content)
+            action = MolForgeAction(**action_dict)
+            validation_error = validate_action_contract(action)
+            if validation_error:
+                raise ValueError(validation_error)
+            metadata = record.get("metadata", {})
+            scenario_id = metadata.get("scenario_id", "unknown")
+            scenario_ids[scenario_id] = scenario_ids.get(scenario_id, 0) + 1
+            action_types[action.action_type] = action_types.get(action.action_type, 0) + 1
+        except Exception as exc:
+            errors.append(f"line {line_number}: {exc}")
+            if len(errors) >= args.max_errors:
+                break
+    summary: dict[str, Any] = {
+        "path": str(path),
+        "records_checked": records,
+        "valid": not errors,
+        "action_types": action_types,
+        "scenario_ids": scenario_ids,
+        "errors": errors,
+    }
+    print(json.dumps(summary, indent=2))
+    if errors:
+        raise SystemExit(1)
+def validate_action_contract(action: MolForgeAction) -> str:
+    if action.action_type == "run_assay" and action.acting_role != "assay_planner":
+        return "run_assay must use acting_role=assay_planner"
+    if action.action_type in {"edit", "submit", "restart", "defer"} and action.acting_role != "lead_chemist":
+        return f"{action.action_type} must use acting_role=lead_chemist"
+    if not action.rationale.strip():
+        return "missing rationale"
+    if not action.evidence:
+        return "missing evidence"
+    if not action.expected_effects:
+        return "missing expected_effects"
+    allowed_message_types = {
+        "lead_chemist": {"proposal", "revision_request", "submission_recommendation"},
+        "assay_planner": {"proposal", "approval", "rejection", "assay_request", "submission_recommendation"},
+        "toxicologist": {"approval", "objection", "risk_flag", "assay_request", "rejection"},
+        "process_chemist": {"approval", "objection", "risk_flag", "assay_request"},
+    }
+    seen_senders = set()
+    for message in action.messages:
+        if message.sender in seen_senders:
+            return f"duplicate message sender {message.sender}"
+        seen_senders.add(message.sender)
+        if message.message_type not in allowed_message_types.get(message.sender, set()):
+            return f"{message.sender} cannot emit {message.message_type}"
+    actor_message = next((message for message in action.messages if message.sender == action.acting_role), None)
+    if action.action_type != "defer" and (actor_message is None or actor_message.message_type != "proposal"):
+        return "acting_role must include a proposal message"
+    return ""
+if __name__ == "__main__":
+    main()

server/Dockerfile ADDED Viewed

	@@ -0,0 +1,45 @@

+ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
+FROM ${BASE_IMAGE} AS builder
+ARG INSTALL_TDC=0
+WORKDIR /app
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends git && \
+    rm -rf /var/lib/apt/lists/*
+COPY . /app/env
+WORKDIR /app/env
+RUN if ! command -v uv >/dev/null 2>&1; then \
+        curl -LsSf https://astral.sh/uv/install.sh | sh && \
+        mv /root/.local/bin/uv /usr/local/bin/uv && \
+        mv /root/.local/bin/uvx /usr/local/bin/uvx; \
+    fi
+ENV UV_LINK_MODE=copy
+RUN --mount=type=cache,target=/root/.cache/uv \
+    if [ "$INSTALL_TDC" = "1" ]; then \
+        uv sync --no-editable --extra tdc; \
+    else \
+        uv sync --no-editable; \
+    fi
+FROM ${BASE_IMAGE}
+WORKDIR /app
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends curl && \
+    rm -rf /var/lib/apt/lists/*
+COPY --from=builder /app/env/.venv /app/.venv
+COPY --from=builder /app/env /app/env
+ENV PATH="/app/.venv/bin:$PATH"
+ENV PYTHONPATH="/app/env:$PYTHONPATH"
+HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:8000/health || exit 1
+CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]

server/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Server package for MolForge."""

server/actions.py ADDED Viewed

	@@ -0,0 +1,414 @@

+"""Action execution mixin for MolForge."""
+from __future__ import annotations
+from typing import Dict, List, Mapping
+from .shared import (
+    DEFAULT_TOOL_COSTS,
+    compute_objective_score,
+    evaluate_constraint_margins,
+    evaluate_constraints,
+    literature_hints,
+)
+try:
+    from ..models import AssayReading, MolForgeAction, RewardComponent
+except ImportError:
+    from models import AssayReading, MolForgeAction, RewardComponent
+class MolForgeActionMixin:
+    """Methods that mutate environment state through actions."""
+    def _execute_action(
+        self,
+        action: MolForgeAction,
+        reward_components: List[RewardComponent],
+        previous_properties: Mapping[str, float],
+        previous_score: float,
+    ) -> tuple[float, bool]:
+        reward = 0.0
+        done = False
+        if action.action_type == "edit":
+            reward += self._apply_edit(action, reward_components, previous_score)
+        elif action.action_type == "run_assay":
+            reward += self._run_assay(action, reward_components)
+        elif action.action_type == "submit":
+            reward, done = self._submit(reward_components)
+        elif action.action_type == "restart":
+            reward += self._restart(reward_components)
+        elif action.action_type == "defer":
+            reward -= 0.05
+            reward_components.append(
+                RewardComponent(
+                    name="defer",
+                    value=-0.05,
+                    explanation="Deferring preserves state but lightly penalizes lost project time.",
+                )
+            )
+            self._last_summary = "The team deferred action to gather its thoughts."
+        return reward, done
+    def _apply_edit(
+        self,
+        action: MolForgeAction,
+        reward_components: List[RewardComponent],
+        previous_score: float,
+    ) -> float:
+        previous_signature = self._molecule_signature()
+        previous_fragment = self._molecule[action.slot]  # type: ignore[index]
+        safe_defaults = {
+            "warhead": "nitrile",
+            "hinge": "pyridine",
+            "solvent_tail": "morpholine",
+            "back_pocket": "methoxy",
+        }
+        if action.edit_type == "remove":
+            self._molecule[action.slot] = safe_defaults[action.slot]  # type: ignore[index]
+        else:
+            self._molecule[action.slot] = action.fragment  # type: ignore[index]
+        new_signature = self._molecule_signature()
+        new_properties = self._true_properties()
+        new_score = compute_objective_score(new_properties, self._scenario)
+        delta = round(new_score - previous_score, 4)
+        if self._reward_mode == "dense":
+            reward = delta * 2.0
+            explanation = (
+                f"Updated {action.slot} from {previous_fragment} to {self._molecule[action.slot]}, "
+                f"changing the internal objective score by {delta:+.3f}."
+            )
+        else:
+            reward = 0.04 if delta > 0 else (-0.04 if delta < 0 else 0.0)
+            explanation = (
+                f"Updated {action.slot} from {previous_fragment} to {self._molecule[action.slot]}. "
+                "Edit feedback is intentionally coarse; assays and terminal graders provide the main signal."
+            )
+        reward_components.append(
+            RewardComponent(
+                name="edit_delta",
+                value=round(reward, 4),
+                explanation=explanation,
+            )
+        )
+        if new_signature in self._visited_states:
+            reward -= 0.35
+            reward_components.append(
+                RewardComponent(
+                    name="loop_penalty",
+                    value=-0.35,
+                    explanation="This edit revisited a previously explored molecular state.",
+                )
+            )
+        else:
+            reward += 0.06
+            self._visited_states.add(new_signature)
+        reward -= 0.12
+        reward_components.append(
+            RewardComponent(
+                name="turn_cost",
+                value=-0.12,
+                explanation="Every chemistry edit consumes simulated project time.",
+            )
+        )
+        self._last_summary = (
+            f"Lead Chemist edited {action.slot}; molecule changed from "
+            f"{previous_signature} to {new_signature}."
+        )
+        return reward
+    def _run_assay(
+        self,
+        action: MolForgeAction,
+        reward_components: List[RewardComponent],
+    ) -> float:
+        tool_name = action.tool_name or ""
+        cost = DEFAULT_TOOL_COSTS[tool_name]
+        self._state.remaining_budget -= cost
+        self._state.budget_used += cost
+        self._state.oracle_call_count += 1
+        key = f"{self._molecule_signature()}::{tool_name}"
+        runs = self._assay_runs.get(key, 0) + 1
+        self._assay_runs[key] = runs
+        reward = 0.02
+        if runs == 1:
+            reward += 0.10
+            explanation = "First assay on this molecule/tool pair increased observability."
+        else:
+            reward -= 0.08
+            explanation = "Repeated assay spent budget on the same molecule/tool pair."
+        readings = self._build_assay_readings(tool_name, runs)
+        self._merge_assays(readings)
+        if tool_name == "search_literature":
+            reward += 0.04
+        if self._reward_mode == "curriculum" and runs == 1:
+            required_props = {"potency", "toxicity"}
+            if "synth_min" in self._scenario.hard_constraints:
+                required_props.add("synth")
+            covered_props = {
+                reading.property_name
+                for reading in readings
+                if reading.property_name in required_props
+            }
+            if covered_props:
+                bonus = 0.08 * len(covered_props)
+                reward += bonus
+                reward_components.append(
+                    RewardComponent(
+                        name="curriculum_evidence_gate",
+                        value=round(bonus, 4),
+                        explanation=(
+                            "Curriculum reward for collecting first-pass evidence "
+                            f"for: {', '.join(sorted(covered_props))}."
+                        ),
+                    )
+                )
+        reward_components.append(
+            RewardComponent(
+                name="assay_information_gain",
+                value=round(reward, 4),
+                explanation=explanation,
+            )
+        )
+        reward_components.append(
+            RewardComponent(
+                name="budget_spend",
+                value=round(-cost / max(self._scenario.oracle_budget, 1), 4),
+                explanation=f"Spent {cost} assay budget on {tool_name}.",
+            )
+        )
+        reward -= cost / max(self._scenario.oracle_budget, 1)
+        self._oracle_log.append(
+            {
+                "step": self._state.step_count,
+                "tool_name": tool_name,
+                "runs": runs,
+                "molecule": self._molecule_signature(),
+                "cost": cost,
+                "results": [reading.model_dump() for reading in readings],
+            }
+        )
+        self._last_summary = (
+            f"Assay Planner executed {tool_name}; {len(readings)} structured assay result(s) are now visible."
+        )
+        return reward
+    def _submit(self, reward_components: List[RewardComponent]) -> tuple[float, bool]:
+        properties = self._true_properties()
+        final_score = compute_objective_score(properties, self._scenario)
+        constraint_results = evaluate_constraints(properties, self._scenario)
+        constraint_margins = evaluate_constraint_margins(properties, self._scenario)
+        margin_score = sum(constraint_margins.values()) / max(len(constraint_margins), 1)
+        violation_penalty = round((1.0 - margin_score) * 2.0, 4)
+        hard_constraints_met = all(result[0] for result in constraint_results.values())
+        budget_efficiency = self._state.remaining_budget / max(self._scenario.oracle_budget, 1)
+        beats_baseline = final_score >= self._scenario.baseline_to_beat
+        current_signature = self._molecule_signature()
+        evidence_requirements = ["potency", "toxicity"]
+        if "synth_min" in self._scenario.hard_constraints:
+            evidence_requirements.append("synth")
+        missing_evidence = [
+            prop for prop in evidence_requirements if self._current_property_estimate(prop, current_signature) is None
+        ]
+        evidence_met = not missing_evidence
+        post_shift_evidence_met = True
+        if self._scenario.target_shift_step and self._target_shift_active():
+            post_shift_evidence_met = any(
+                entry["step"] >= self._scenario.target_shift_step
+                and entry["molecule"] == current_signature
+                and any(result["property_name"] == "potency" for result in entry["results"])
+                for entry in self._oracle_log
+            )
+        valid_submission = hard_constraints_met and beats_baseline and evidence_met and post_shift_evidence_met
+        reward = final_score * 2.0 if valid_submission else final_score * 0.25
+        if valid_submission:
+            reward += 3.5
+        elif not hard_constraints_met:
+            reward -= violation_penalty
+        if not beats_baseline:
+            reward -= 0.6
+        if not evidence_met:
+            reward -= 1.2
+        if not post_shift_evidence_met:
+            reward -= 0.8
+        if valid_submission:
+            reward += max(0.0, budget_efficiency) * 0.7
+        if self._reward_mode == "curriculum" and evidence_met and post_shift_evidence_met:
+            submit_bonus = 0.35
+            if hard_constraints_met:
+                submit_bonus += 0.15
+            reward += submit_bonus
+        self._state.submitted = True
+        self._report_card = self._build_report_card(submitted=True)
+        self._last_summary = (
+            f"The team submitted a candidate that "
+            f"{'passed' if hard_constraints_met else 'failed'} hard constraints."
+        )
+        reward_components.extend(
+            [
+                RewardComponent(
+                    name="submission_quality",
+                    value=round((final_score * 2.0 if valid_submission else final_score * 0.25), 4),
+                    explanation=(
+                        "Full scientific quality reward because the submission met constraints, baseline, and evidence gates."
+                        if valid_submission
+                        else "Only a small quality trace is awarded because the submit action missed a gate."
+                    ),
+                ),
+                RewardComponent(
+                    name="hard_constraints",
+                    value=(
+                        3.5
+                        if valid_submission
+                        else (-violation_penalty if not hard_constraints_met else 0.0)
+                    ),
+                    explanation=(
+                        "Large sparse bonus for beating baseline with required current evidence."
+                        if valid_submission
+                        else "Submission missed constraints, baseline, or evidence requirements; constraint penalty scales with violation severity."
+                    ),
+                ),
+                RewardComponent(
+                    name="constraint_margin",
+                    value=round(margin_score, 4),
+                    explanation=(
+                        "Proportional hard-constraint score: worse potency, toxicity, or synthesis violations produce lower values."
+                    ),
+                ),
+                RewardComponent(
+                    name="baseline_gate",
+                    value=0.0 if beats_baseline else -0.6,
+                    explanation=(
+                        "Submitted molecule beat the scenario baseline."
+                        if beats_baseline
+                        else "Submitted molecule did not beat the scenario baseline."
+                    ),
+                ),
+                RewardComponent(
+                    name="submission_evidence",
+                    value=0.0 if evidence_met else -1.2,
+                    explanation=(
+                        "Current-molecule potency/toxicity/synthesis evidence was available."
+                        if evidence_met
+                        else f"Submission lacked current evidence for: {', '.join(missing_evidence)}."
+                    ),
+                ),
+                RewardComponent(
+                    name="post_shift_evidence",
+                    value=0.0 if post_shift_evidence_met else -0.8,
+                    explanation=(
+                        "Post-shift potency evidence was available for the submitted molecule."
+                        if post_shift_evidence_met
+                        else "Hard scenario submission lacked post-shift potency evidence for the current molecule."
+                    ),
+                ),
+                RewardComponent(
+                    name="budget_efficiency",
+                    value=round(max(0.0, budget_efficiency) * 0.7, 4) if valid_submission else 0.0,
+                    explanation=(
+                        "Unused budget is rewarded to discourage wasteful oracle usage."
+                        if valid_submission
+                        else "Budget efficiency is not awarded to a gated or premature submission."
+                    ),
+                ),
+            ]
+        )
+        if self._reward_mode == "curriculum" and evidence_met and post_shift_evidence_met:
+            reward_components.append(
+                RewardComponent(
+                    name="curriculum_evidence_supported_submit",
+                    value=round(submit_bonus, 4),
+                    explanation=(
+                        "Curriculum reward for making a formal submit decision after the required "
+                        "current evidence package was available."
+                    ),
+                )
+            )
+        return reward, True
+    def _restart(self, reward_components: List[RewardComponent]) -> float:
+        self._molecule = dict(self._scenario.restart_scaffold)
+        self._trap_penalty_active = False
+        self._known_assays = []
+        self._assay_runs = {}
+        self._restart_used = True
+        self._visited_states.add(self._molecule_signature())
+        self._state.remaining_budget -= 350
+        self._state.budget_used += 350
+        reward_components.append(
+            RewardComponent(
+                name="restart_penalty",
+                value=-0.4,
+                explanation="Restarting discards sunk work but switches to a clean scaffold family.",
+            )
+        )
+        self._last_summary = (
+            "The team abandoned the original scaffold series and restarted from a cleaner alternative."
+        )
+        return -0.4
+    def _build_assay_readings(self, tool_name: str, runs: int) -> List[AssayReading]:
+        properties = self._true_properties()
+        signature = self._molecule_signature()
+        if tool_name == "evaluate_properties":
+            property_names = ["potency", "novelty"]
+        elif tool_name == "dock_target":
+            property_names = ["potency"]
+        elif tool_name == "assay_toxicity":
+            property_names = ["toxicity"]
+        elif tool_name == "estimate_synthesizability":
+            property_names = ["synth"]
+        elif tool_name == "evaluate_novelty":
+            property_names = ["novelty"]
+        elif tool_name == "search_literature":
+            hint_score = min(0.95, 0.45 + 0.08 * runs)
+            return [
+                AssayReading(
+                    tool_name=tool_name,
+                    property_name="literature_signal",
+                    estimate=round(hint_score, 4),
+                    confidence_low=max(0.0, round(hint_score - 0.08, 4)),
+                    confidence_high=min(1.0, round(hint_score + 0.08, 4)),
+                    runs=runs,
+                    molecule_signature=signature,
+                    summary=literature_hints(self._molecule)[0],
+                )
+            ]
+        else:
+            property_names = ["potency", "toxicity", "synth"]
+        readings = []
+        for property_name in property_names:
+            true_value = properties[property_name]
+            estimate = self._assay_estimate(signature, tool_name, property_name, runs, true_value)
+            width = max(0.03, 0.18 / runs)
+            readings.append(
+                AssayReading(
+                    tool_name=tool_name,
+                    property_name=property_name,
+                    estimate=estimate,
+                    confidence_low=max(0.0, round(estimate - width, 4)),
+                    confidence_high=min(1.0, round(estimate + width, 4)),
+                    runs=runs,
+                    molecule_signature=signature,
+                    summary=f"{tool_name} estimated {property_name} with run count {runs}.",
+                )
+            )
+        return readings

server/app.py ADDED Viewed

	@@ -0,0 +1,36 @@

+"""FastAPI app for MolForge."""
+try:
+    from openenv.core.env_server.http_server import create_app
+except Exception as exc:  # pragma: no cover
+    raise ImportError(
+        "openenv-core is required to run MolForge. Install dependencies from pyproject.toml."
+    ) from exc
+try:
+    from ..models import MolForgeAction, MolForgeObservation
+    from .molforge_environment import MolForgeEnvironment
+except ImportError:
+    from models import MolForgeAction, MolForgeObservation
+    from server.molforge_environment import MolForgeEnvironment
+app = create_app(
+    MolForgeEnvironment,
+    MolForgeAction,
+    MolForgeObservation,
+    env_name="molforge",
+    max_concurrent_envs=2,
+)
+def main(host: str = "0.0.0.0", port: int = 8000):
+    """Run the environment locally without Docker."""
+    import uvicorn
+    uvicorn.run(app, host=host, port=port)
+if __name__ == "__main__":
+    main()

server/governance.py ADDED Viewed

	@@ -0,0 +1,576 @@

+"""Governance, validation, and coordination logic for MolForge."""
+from __future__ import annotations
+from typing import Any, Dict, List, Mapping, Optional
+from .shared import (
+    DEFAULT_TOOL_COSTS,
+    EDITABLE_SLOTS,
+    FRAGMENT_LIBRARY,
+    ROLE_MESSAGE_TYPES,
+    ROLE_PERMISSIONS,
+)
+try:
+    from ..models import GovernanceStatus, MolForgeAction, RewardComponent
+except ImportError:
+    from models import GovernanceStatus, MolForgeAction, RewardComponent
+class MolForgeGovernanceMixin:
+    """Validation and multi-agent review methods."""
+    def _validate_action(self, action: MolForgeAction) -> Optional[tuple[str, str]]:
+        if action.action_type not in self._scenario.enabled_actions:
+            return "ACTION_DISABLED", f"{action.action_type} is disabled for this scenario."
+        if action.acting_role not in self._scenario.enabled_roles:
+            return "ROLE_DISABLED", f"{action.acting_role} is not enabled for this scenario."
+        allowed_actions = ROLE_PERMISSIONS.get(action.acting_role, [])
+        if action.action_type not in allowed_actions:
+            return (
+                "ROLE_PERMISSION_DENIED",
+                f"{action.acting_role} is not permitted to execute {action.action_type}.",
+            )
+        if len(action.messages) > self._scenario.max_messages_per_turn:
+            return (
+                "MESSAGE_LIMIT_EXCEEDED",
+                f"At most {self._scenario.max_messages_per_turn} messages may be sent per turn.",
+            )
+        seen_senders = set()
+        for message in action.messages:
+            if message.sender not in self._scenario.enabled_roles:
+                return "MESSAGE_ROLE_INVALID", f"{message.sender} is not enabled in this scenario."
+            if message.sender in seen_senders:
+                return (
+                    "DUPLICATE_ROLE_MESSAGE",
+                    f"Each specialist may emit at most one message per turn; duplicate from {message.sender}.",
+                )
+            seen_senders.add(message.sender)
+            if message.message_type not in ROLE_MESSAGE_TYPES.get(message.sender, []):
+                return (
+                    "MESSAGE_PERMISSION_DENIED",
+                    f"{message.sender} cannot emit message type {message.message_type}.",
+                )
+        if action.action_type == "edit":
+            if action.slot is None or action.edit_type is None:
+                return "MISSING_EDIT_FIELDS", "Edit actions require both slot and edit_type."
+            if action.slot not in EDITABLE_SLOTS:
+                return "INVALID_SLOT", f"{action.slot} is not editable in MolForge."
+            if action.edit_type in {"add_fragment", "substitute"} and not action.fragment:
+                return "MISSING_FRAGMENT", "Edit actions require a fragment for add/substitute."
+            if action.fragment:
+                if action.fragment not in FRAGMENT_LIBRARY[action.slot]:
+                    return "UNKNOWN_FRAGMENT", f"{action.fragment} is not valid for slot {action.slot}."
+                if self._molecule[action.slot] == action.fragment:
+                    return "NO_STATE_CHANGE", "Edit selected the fragment already present in that slot."
+        if action.action_type == "run_assay":
+            if action.tool_name is None:
+                return "MISSING_TOOL_NAME", "run_assay actions require a tool_name."
+            if action.tool_name not in self._scenario.enabled_tools:
+                return "TOOL_DISABLED", f"{action.tool_name} is not enabled for this scenario."
+            cost = DEFAULT_TOOL_COSTS[action.tool_name]
+            if self._state.remaining_budget < cost:
+                return "BUDGET_EXCEEDED", f"{action.tool_name} costs {cost}, exceeding remaining budget."
+        if action.action_type == "restart":
+            if self._restart_used:
+                return "RESTART_ALREADY_USED", "restart_from_new_scaffold may be used at most once per episode."
+            if self._state.remaining_budget < 350:
+                return "BUDGET_EXCEEDED", "Not enough budget remains to restart from a new scaffold."
+        return None
+    def _assess_governance(
+        self,
+        action: MolForgeAction,
+        previous_properties: Mapping[str, float],
+    ) -> tuple[GovernanceStatus, List[RewardComponent], bool]:
+        reward_components: List[RewardComponent] = []
+        approvals: List[str] = []
+        objections: List[str] = []
+        vetoes: List[str] = []
+        required_roles = (
+            []
+            if action.action_type == "defer"
+            else [role for role in self._scenario.required_review_roles if role != action.acting_role]
+        )
+        policy_veto = False
+        current_signature = self._molecule_signature()
+        simulated_properties = self._simulate_action_properties(action)
+        sender_map = {message.sender: message for message in action.messages}
+        actor_message = sender_map.get(action.acting_role)
+        if action.action_type != "defer":
+            if actor_message and actor_message.message_type == "proposal":
+                self._record_message(actor_message)
+                reward_components.append(
+                    RewardComponent(
+                        name="proposal_logged",
+                        value=0.05,
+                        explanation=f"{action.acting_role} logged a structured proposal before execution.",
+                    )
+                )
+                self._role_metrics[action.acting_role]["correct_messages"] += 1
+            else:
+                reward_components.append(
+                    RewardComponent(
+                        name="missing_proposal",
+                        value=-0.06,
+                        explanation="The acting specialist did not provide an explicit proposal message.",
+                    )
+                )
+        for role in required_roles:
+            expected = self._expected_feedback(role, action, previous_properties, simulated_properties)
+            actual = sender_map.get(role)
+            if actual is None:
+                reward_components.append(
+                    RewardComponent(
+                        name=f"missing_review_{role}",
+                        value=-0.08,
+                        explanation=f"{role} did not provide the required review for this turn.",
+                    )
+                )
+                if expected["hard_veto"]:
+                    policy_veto = True
+                    vetoes.append(role)
+                continue
+            if role != action.acting_role:
+                self._record_message(actual)
+            if self._matches_feedback(actual.message_type, expected["type"]):
+                reward_components.append(
+                    RewardComponent(
+                        name=f"coordination_{role}",
+                        value=0.12,
+                        explanation=expected["reason"],
+                    )
+                )
+                self._role_metrics[role]["correct_messages"] += 1
+                if expected["type"] in {"approval", "submission_recommendation"}:
+                    approvals.append(role)
+                else:
+                    objections.append(role)
+            elif expected["type"] == "neutral":
+                reward_components.append(
+                    RewardComponent(
+                        name=f"unnecessary_message_{role}",
+                        value=-0.02,
+                        explanation=f"{role} contributed a message even though no strong intervention was needed.",
+                    )
+                )
+                self._role_metrics[role]["incorrect_messages"] += 1
+            else:
+                reward_components.append(
+                    RewardComponent(
+                        name=f"misaligned_review_{role}",
+                        value=-0.1,
+                        explanation=(
+                            f"{role} sent {actual.message_type}, but the hidden environment evaluation "
+                            f"expected {expected['type']}."
+                        ),
+                    )
+                )
+                self._role_metrics[role]["incorrect_messages"] += 1
+                if expected["hard_veto"]:
+                    policy_veto = True
+                    vetoes.append(role)
+            if expected["hard_veto"] and actual and self._matches_feedback(actual.message_type, expected["type"]):
+                policy_veto = True
+                vetoes.append(role)
+        extra_roles = {
+            sender
+            for sender in sender_map
+            if sender not in required_roles and sender != action.acting_role
+        }
+        for role in sorted(extra_roles):
+            self._record_message(sender_map[role])
+            reward_components.append(
+                RewardComponent(
+                    name=f"optional_review_{role}",
+                    value=0.02,
+                    explanation=f"{role} added optional context for the current decision.",
+                )
+            )
+            self._role_metrics[role]["correct_messages"] += 1
+        if policy_veto:
+            reward_components.append(
+                RewardComponent(
+                    name="policy_veto",
+                    value=-0.35,
+                    explanation="A specialist raised a valid hard veto, so the action was blocked.",
+                )
+            )
+        explanation = (
+            "Team review approved the decision."
+            if not policy_veto
+            else f"Action blocked after hard veto from {', '.join(vetoes)}."
+        )
+        status = "policy_veto" if policy_veto else "ready"
+        return (
+            GovernanceStatus(
+                status=status,
+                explanation=explanation,
+                required_roles=required_roles,
+                approvals=approvals,
+                objections=objections,
+                vetoes=vetoes,
+                executable=not policy_veto,
+            ),
+            reward_components,
+            policy_veto,
+        )
+    def _expected_feedback(
+        self,
+        role: str,
+        action: MolForgeAction,
+        previous_properties: Mapping[str, float],
+        simulated_properties: Mapping[str, float],
+    ) -> Dict[str, Any]:
+        tox_threshold = self._scenario.hard_constraints.get("toxicity_max", 0.45)
+        synth_threshold = self._scenario.hard_constraints.get("synth_min", 0.56)
+        current_signature = self._molecule_signature()
+        known_toxicity = self._current_property_estimate("toxicity", current_signature)
+        known_synth = self._current_property_estimate("synth", current_signature)
+        neutral = {"type": "neutral", "hard_veto": False, "reason": f"{role} had no strong intervention."}
+        if role == "toxicologist":
+            if action.action_type == "run_assay":
+                if action.tool_name == "assay_toxicity":
+                    return {
+                        "type": "approval",
+                        "hard_veto": False,
+                        "reason": "Toxicologist correctly encouraged a direct safety assay.",
+                    }
+                return {
+                    "type": "approval",
+                    "hard_veto": False,
+                    "reason": "Toxicologist correctly allowed an assay that improves decision evidence without increasing chemical risk.",
+                }
+            if action.action_type == "submit":
+                if known_toxicity is not None and known_toxicity > tox_threshold:
+                    return {
+                        "type": "objection",
+                        "hard_veto": simulated_properties["toxicity"] > tox_threshold,
+                        "reason": "Toxicologist correctly objected to a submission with visible toxicity evidence above threshold.",
+                    }
+                if simulated_properties["toxicity"] > tox_threshold:
+                    return {
+                        "type": "objection",
+                        "hard_veto": True,
+                        "reason": "Toxicologist correctly blocked an unsafe submission.",
+                    }
+                if known_toxicity is None:
+                    return {
+                        "type": "assay_request",
+                        "hard_veto": True,
+                        "reason": "Toxicologist correctly demanded explicit toxicity evidence before submission.",
+                    }
+                return {
+                    "type": "approval",
+                    "hard_veto": False,
+                    "reason": "Toxicologist correctly approved a submission with acceptable visible safety evidence.",
+                }
+            if action.action_type in {"edit", "restart"}:
+                toxicity_delta = simulated_properties["toxicity"] - previous_properties["toxicity"]
+                if toxicity_delta > 0.08:
+                    return {
+                        "type": "objection",
+                        "hard_veto": True,
+                        "reason": "Toxicologist correctly raised a hard objection to a major safety regression.",
+                    }
+                if (
+                    simulated_properties["toxicity"] > tox_threshold + 0.02
+                    and toxicity_delta >= -0.02
+                ):
+                    return {
+                        "type": "objection",
+                        "hard_veto": True,
+                        "reason": "Toxicologist correctly blocked a move that left an unsafe scaffold unimproved.",
+                    }
+                if simulated_properties["toxicity"] > tox_threshold + 0.02:
+                    return {
+                        "type": "approval",
+                        "hard_veto": False,
+                        "reason": "Toxicologist correctly allowed a risk-reducing move while residual safety work remains.",
+                    }
+                return {
+                    "type": "approval",
+                    "hard_veto": False,
+                    "reason": "Toxicologist correctly approved a safety-compatible move.",
+                }
+            return neutral
+        if role == "assay_planner":
+            if action.action_type == "run_assay":
+                info_gain = self._estimate_information_gain(action.tool_name or "")
+                prior_runs = self._assay_runs.get(f"{current_signature}::{action.tool_name}", 0)
+                if (action.tool_name == "run_md_simulation" and self._state.remaining_budget < 4500) or (
+                    prior_runs > 0 and info_gain < 0.05
+                ):
+                    return {
+                        "type": "rejection",
+                        "hard_veto": True,
+                        "reason": "Assay Planner correctly blocked a wasteful or over-expensive assay.",
+                    }
+                if info_gain < 0.04 and action.tool_name != "search_literature":
+                    return {
+                        "type": "rejection",
+                        "hard_veto": False,
+                        "reason": "Assay Planner correctly questioned a low-value assay.",
+                    }
+                return {
+                    "type": "approval",
+                    "hard_veto": False,
+                    "reason": "Assay Planner correctly approved an information-efficient assay.",
+                }
+            if action.action_type == "submit":
+                required_props = ["potency", "toxicity"]
+                if "synth_min" in self._scenario.hard_constraints:
+                    required_props.append("synth")
+                missing = [
+                    prop for prop in required_props if self._current_property_estimate(prop, current_signature) is None
+                ]
+                if missing:
+                    return {
+                        "type": "assay_request",
+                        "hard_veto": True,
+                        "reason": "Assay Planner correctly asked for missing evidence before submission.",
+                    }
+                return {
+                    "type": "approval",
+                    "hard_veto": False,
+                    "reason": "Assay Planner correctly approved a well-supported submission.",
+                }
+            if action.action_type == "restart":
+                potency_threshold = self._scenario.hard_constraints.get("potency_min", 0.72)
+                if self._scenario.trap_penalty and previous_properties["potency"] < potency_threshold:
+                    return {
+                        "type": "approval",
+                        "hard_veto": False,
+                        "reason": "Assay Planner correctly endorsed escaping a low-value scaffold family.",
+                    }
+                return {
+                    "type": "rejection",
+                    "hard_veto": False,
+                    "reason": "Assay Planner correctly questioned an unnecessary restart.",
+                }
+            if action.action_type == "edit":
+                return {
+                    "type": "approval",
+                    "hard_veto": False,
+                    "reason": "Assay Planner correctly approved a low-cost edit before spending assay budget.",
+                }
+            return neutral
+        if role == "process_chemist":
+            if action.action_type == "run_assay":
+                if action.tool_name == "estimate_synthesizability":
+                    return {
+                        "type": "approval",
+                        "hard_veto": False,
+                        "reason": "Process Chemist correctly requested explicit synthesizeability evidence.",
+                    }
+                return {
+                    "type": "approval",
+                    "hard_veto": False,
+                    "reason": "Process Chemist correctly allowed an assay that does not worsen route feasibility.",
+                }
+            if action.action_type == "submit":
+                if known_synth is not None and known_synth < synth_threshold:
+                    return {
+                        "type": "objection",
+                        "hard_veto": simulated_properties["synth"] < synth_threshold,
+                        "reason": "Process Chemist correctly objected to a submission with visible route evidence below threshold.",
+                    }
+                if simulated_properties["synth"] < synth_threshold:
+                    return {
+                        "type": "objection",
+                        "hard_veto": "synth_min" in self._scenario.hard_constraints,
+                        "reason": "Process Chemist correctly blocked a submission that looks infeasible to make.",
+                    }
+                if known_synth is None:
+                    return {
+                        "type": "assay_request",
+                        "hard_veto": False,
+                        "reason": "Process Chemist correctly asked for synthesizeability evidence before submission.",
+                    }
+                return {
+                    "type": "approval",
+                    "hard_veto": False,
+                    "reason": "Process Chemist correctly approved a feasible-looking submission.",
+                }
+            if action.action_type in {"edit", "restart"}:
+                if simulated_properties["synth"] < synth_threshold - 0.03:
+                    return {
+                        "type": "objection",
+                        "hard_veto": False,
+                        "reason": "Process Chemist correctly flagged a severe feasibility regression.",
+                    }
+                if previous_properties["synth"] - simulated_properties["synth"] > 0.08:
+                    return {
+                        "type": "objection",
+                        "hard_veto": False,
+                        "reason": "Process Chemist correctly objected to a less tractable route.",
+                    }
+                return {
+                    "type": "approval",
+                    "hard_veto": False,
+                    "reason": "Process Chemist correctly approved a tractable chemistry move.",
+                }
+            return neutral
+        return neutral
+    @staticmethod
+    def _matches_feedback(actual_type: str, expected_type: str) -> bool:
+        if expected_type == "neutral":
+            return False
+        if expected_type == "approval":
+            return actual_type in {"approval", "submission_recommendation"}
+        if expected_type == "objection":
+            return actual_type in {"objection", "risk_flag", "rejection"}
+        if expected_type == "rejection":
+            return actual_type in {"rejection", "objection"}
+        if expected_type == "assay_request":
+            return actual_type == "assay_request"
+        return actual_type == expected_type
+    def _evaluate_reasoning_consistency(
+        self,
+        action: MolForgeAction,
+        previous_properties: Mapping[str, float],
+        current_properties: Mapping[str, float],
+        reward_components: List[RewardComponent],
+    ) -> float:
+        del previous_properties, current_properties
+        rationale = action.rationale.lower().strip()
+        evidence = [item.lower().strip() for item in action.evidence if item.strip()]
+        expected_effects = {key: value for key, value in action.expected_effects.items() if value}
+        score = 0.0
+        explanations = []
+        if rationale:
+            score += 0.02
+            explanations.append("short rationale present")
+        else:
+            score -= 0.03
+            explanations.append("missing rationale")
+        if evidence:
+            grounded = sum(1 for item in evidence if self._evidence_item_is_visible(item))
+            score += min(grounded, 3) * 0.015
+            if grounded < len(evidence):
+                score -= min(len(evidence) - grounded, 2) * 0.02
+            explanations.append(f"{grounded}/{len(evidence)} evidence item(s) matched visible state")
+        else:
+            score -= 0.03
+            explanations.append("missing visible evidence")
+        if expected_effects:
+            plausible = sum(
+                1
+                for metric, direction in expected_effects.items()
+                if self._expected_effect_is_plausible(action, metric, direction)
+            )
+            checked = len(expected_effects)
+            score += min(plausible, 3) * 0.01
+            if plausible < checked:
+                score -= min(checked - plausible, 2) * 0.015
+            explanations.append(f"{plausible}/{checked} expected effect(s) were directionally plausible")
+        else:
+            score -= 0.02
+            explanations.append("missing expected effects")
+        score = max(-0.04, min(0.04, score))
+        if score != 0.0:
+            reward_components.append(
+                RewardComponent(
+                    name="reasoning_grounding",
+                    value=round(score, 4),
+                    explanation="; ".join(explanations),
+                )
+            )
+        return score
+    def _evidence_item_is_visible(self, item: str) -> bool:
+        if not item:
+            return False
+        visible_terms = {
+            self._scenario.scenario_id.lower(),
+            self._scenario.difficulty.lower(),
+            self._molecule_signature().lower(),
+            str(self._state.remaining_budget),
+            str(self._state.max_budget),
+            str(self._state.step_count),
+            str(self._scenario.max_steps),
+        }
+        visible_terms.update(fragment.lower() for fragment in self._molecule.values())
+        visible_terms.update(tool.lower() for tool in self._scenario.enabled_tools)
+        visible_terms.update(constraint.lower() for constraint in self._scenario.hard_constraints)
+        visible_terms.update(reading.property_name.lower() for reading in self._known_assays)
+        visible_terms.update(reading.tool_name.lower() for reading in self._known_assays)
+        return any(term and term in item for term in visible_terms)
+    def _expected_effect_is_plausible(
+        self,
+        action: MolForgeAction,
+        metric: str,
+        direction: str,
+    ) -> bool:
+        if metric not in {"potency", "toxicity", "synth", "novelty", "budget"}:
+            return False
+        if direction not in {"up", "down", "neutral", "unknown", "not_applicable"}:
+            return False
+        if direction in {"unknown", "not_applicable"}:
+            return True
+        if metric == "budget":
+            if action.action_type in {"run_assay", "restart"}:
+                return direction == "down"
+            return direction == "neutral"
+        if action.action_type in {"run_assay", "submit", "defer"}:
+            return direction in {"neutral", "unknown", "not_applicable"}
+        if action.action_type == "restart":
+            if metric in {"toxicity", "budget"}:
+                return direction == "down"
+            if metric == "synth":
+                return direction in {"up", "unknown"}
+            return direction in {"up", "unknown", "neutral"}
+        if action.action_type != "edit" or not action.slot or not action.fragment:
+            return direction in {"neutral", "unknown"}
+        fragment = action.fragment
+        plausibility = {
+            ("hinge", "azaindole", "potency", "up"),
+            ("back_pocket", "cyano", "potency", "up"),
+            ("back_pocket", "cyano", "toxicity", "down"),
+            ("back_pocket", "chloro", "potency", "up"),
+            ("back_pocket", "chloro", "toxicity", "up"),
+            ("back_pocket", "trifluoromethyl", "potency", "up"),
+            ("back_pocket", "trifluoromethyl", "toxicity", "up"),
+            ("solvent_tail", "morpholine", "toxicity", "down"),
+            ("solvent_tail", "morpholine", "synth", "up"),
+            ("solvent_tail", "dimethylamino", "toxicity", "up"),
+            ("warhead", "reversible_cyanoacrylamide", "toxicity", "down"),
+            ("warhead", "reversible_cyanoacrylamide", "novelty", "up"),
+            ("warhead", "nitrile", "toxicity", "down"),
+        }
+        if (action.slot, fragment, metric, direction) in plausibility:
+            return True
+        return direction in {"neutral", "unknown"}

server/molforge_environment.py ADDED Viewed

	@@ -0,0 +1,342 @@

+"""MolForge environment implementation."""
+from __future__ import annotations
+import os
+import random
+from dataclasses import replace
+from typing import Any, Dict, List
+from uuid import uuid4
+from openenv.core.env_server.interfaces import Environment
+from .actions import MolForgeActionMixin
+from .governance import MolForgeGovernanceMixin
+from .shared import (
+    FRAGMENT_LIBRARY,
+    SCENARIOS,
+    SLOT_ORDER,
+    compute_objective_score,
+    get_scenario,
+)
+from .shared import MolForgeSharedMixin
+from .views import MolForgeViewMixin
+try:
+    from ..models import GovernanceStatus, MolForgeAction, MolForgeObservation, MolForgeState, RewardComponent
+except ImportError:
+    from models import GovernanceStatus, MolForgeAction, MolForgeObservation, MolForgeState, RewardComponent
+class MolForgeEnvironment(
+    MolForgeActionMixin,
+    MolForgeGovernanceMixin,
+    MolForgeViewMixin,
+    MolForgeSharedMixin,
+    Environment,
+):
+    """Deterministic medicinal-chemistry design environment for OpenEnv."""
+    SUPPORTS_CONCURRENT_SESSIONS: bool = True
+    def __init__(self):
+        self._debug_state_enabled = os.getenv("MOLFORGE_DEBUG_STATE", "").lower() in {"1", "true", "yes"}
+        self._training_randomization_enabled = os.getenv("MOLFORGE_TRAINING_RANDOMIZATION", "").lower() in {
+            "1",
+            "true",
+            "yes",
+        }
+        self._reward_mode = os.getenv("MOLFORGE_REWARD_MODE", "assay_gated").lower()
+        self._rng = random.Random(os.getenv("MOLFORGE_RANDOM_SEED", "molforge"))
+        self._reset_index = -1
+        self._state = MolForgeState(episode_id=str(uuid4()), step_count=0)
+        self._scenario = SCENARIOS[0]
+        self._molecule: Dict[str, str] = {}
+        self._assay_runs: Dict[str, int] = {}
+        self._known_assays: List = []
+        self._message_log: List = []
+        self._history: List[Dict[str, Any]] = []
+        self._oracle_log: List[Dict[str, Any]] = []
+        self._visited_states: set[str] = set()
+        self._last_summary = ""
+        self._report_card = ""
+        self._reward_total = 0.0
+        self._restart_used = False
+        self._trap_penalty_active = False
+        self._role_metrics = self._empty_role_metrics()
+        self._state_path: List[str] = ["[start]"]
+        self._last_governance = GovernanceStatus(
+            status="ready",
+            explanation="Awaiting the first coordinated decision.",
+            required_roles=[],
+            approvals=[],
+            objections=[],
+            vetoes=[],
+            executable=True,
+        )
+    def reset(self) -> MolForgeObservation:
+        """Start a new scenario in a deterministic rotation."""
+        self._reset_index += 1
+        self._scenario = self._select_reset_scenario()
+        self._molecule = dict(self._scenario.starting_scaffold)
+        self._assay_runs = {}
+        self._known_assays = []
+        self._message_log = []
+        self._history = []
+        self._oracle_log = []
+        self._visited_states = {self._molecule_signature()}
+        self._last_summary = "Episode initialized with a fresh multi-agent review board."
+        self._report_card = ""
+        self._reward_total = 0.0
+        self._restart_used = False
+        self._trap_penalty_active = self._scenario.trap_penalty
+        self._role_metrics = self._empty_role_metrics()
+        self._state_path = ["[start]"]
+        self._last_governance = GovernanceStatus(
+            status="ready",
+            explanation="Lead Chemist should propose the first coordinated action.",
+            required_roles=list(self._scenario.required_review_roles),
+            approvals=[],
+            objections=[],
+            vetoes=[],
+            executable=True,
+        )
+        self._state = MolForgeState(
+            episode_id=str(uuid4()),
+            step_count=0,
+            scenario_id=self._scenario.scenario_id,
+            difficulty=self._scenario.difficulty,
+            state_label="[start]",
+            state_path=list(self._state_path),
+            coordination_mode=self._scenario.coordination_mode,  # type: ignore[arg-type]
+            enabled_roles=list(self._scenario.enabled_roles),
+            target_name=self._scenario.target_name,
+            current_molecule=self._molecule_signature(),
+            remaining_budget=self._scenario.oracle_budget,
+            budget_used=0,
+            max_budget=self._scenario.oracle_budget,
+            visited_states=1,
+            known_assay_count=0,
+            invalid_action_count=0,
+            objection_count=0,
+            oracle_call_count=0,
+            message_count=0,
+            decision_count=0,
+            submitted=False,
+            reward_total=0.0,
+            metadata={},
+        )
+        self._sync_state_metadata()
+        return self._build_observation(reward=0.0, done=False, reward_components=[])
+    def _select_reset_scenario(self):
+        """Select a deterministic judge scenario or a randomized training variant."""
+        scenario = get_scenario(self._reset_index)
+        if not self._training_randomization_enabled:
+            return scenario
+        scenario = self._rng.choice(SCENARIOS)
+        budget_scale = self._rng.uniform(0.85, 1.15)
+        max_steps_delta = self._rng.choice([-1, 0, 0, 1])
+        starting_scaffold = dict(scenario.starting_scaffold)
+        if self._rng.random() < 0.35:
+            slot = self._rng.choice(SLOT_ORDER)
+            choices = [
+                fragment
+                for fragment in FRAGMENT_LIBRARY[slot]
+                if fragment != starting_scaffold[slot]
+            ]
+            starting_scaffold[slot] = self._rng.choice(choices)
+        return replace(
+            scenario,
+            oracle_budget=max(1, int(round(scenario.oracle_budget * budget_scale))),
+            max_steps=max(4, scenario.max_steps + max_steps_delta),
+            starting_scaffold=starting_scaffold,
+        )
+    def step(self, action: MolForgeAction) -> MolForgeObservation:  # type: ignore[override]
+        """Execute one coordinated environment action."""
+        reward_components: List[RewardComponent] = []
+        done = False
+        error_code = ""
+        self._state.step_count += 1
+        self._state.decision_count += 1
+        previous_properties = self._true_properties()
+        previous_score = compute_objective_score(previous_properties, self._scenario)
+        validation_error = self._validate_action(action)
+        if validation_error:
+            error_code, message = validation_error
+            self._state.invalid_action_count += 1
+            self._last_governance = GovernanceStatus(
+                status="needs_revision",
+                explanation=message,
+                required_roles=list(self._scenario.required_review_roles),
+                approvals=[],
+                objections=[],
+                vetoes=[],
+                executable=False,
+            )
+            reward_components.append(
+                RewardComponent(
+                    name="invalid_action",
+                    value=-1.0,
+                    explanation=message,
+                )
+            )
+            reward = -1.0
+            self._last_summary = message
+            self._append_state_label("[invalid]")
+        else:
+            governance, governance_components, policy_veto = self._assess_governance(
+                action, previous_properties
+            )
+            self._last_governance = governance
+            reward_components.extend(governance_components)
+            reward = sum(component.value for component in governance_components)
+            if policy_veto:
+                self._last_summary = governance.explanation
+                self._append_state_label("[policy_veto]")
+            else:
+                self._last_governance.status = "executed"
+                action_reward, done = self._execute_action(
+                    action, reward_components, previous_properties, previous_score
+                )
+                reward += action_reward
+                if not done:
+                    reward += self._evaluate_reasoning_consistency(
+                        action,
+                        previous_properties,
+                        self._true_properties(),
+                        reward_components,
+                    )
+                if done and self._state.submitted:
+                    self._append_state_label("[submitted]")
+                elif not done:
+                    self._append_state_label(f"[decision_{self._state.step_count:02d}]")
+        if not done and self._state.step_count >= self._scenario.max_steps:
+            done = True
+            reward_components.append(
+                RewardComponent(
+                    name="step_limit",
+                    value=-0.3,
+                    explanation="Episode ended because the maximum decision horizon was reached.",
+                )
+            )
+            reward -= 0.3
+            self._report_card = self._build_report_card(submitted=False)
+            self._last_summary = "Max-step termination triggered."
+            self._append_state_label("[terminated:max_steps]")
+        if not done and self._state.remaining_budget <= 0:
+            done = True
+            reward_components.append(
+                RewardComponent(
+                    name="budget_exhausted",
+                    value=-0.5,
+                    explanation="Episode terminated because the oracle budget reached zero.",
+                )
+            )
+            reward -= 0.5
+            self._report_card = self._build_report_card(submitted=False)
+            self._last_summary = "Budget exhausted before a valid submission."
+            self._append_state_label("[terminated:budget]")
+        if done and not self._report_card:
+            self._report_card = self._build_report_card(submitted=self._state.submitted)
+        if done and not self._state.submitted and self._reward_mode == "curriculum":
+            reward += self._curriculum_terminal_progress_reward(reward_components)
+        reward = round(reward, 4)
+        self._reward_total = round(self._reward_total + reward, 4)
+        self._state.reward_total = self._reward_total
+        self._state.current_molecule = self._molecule_signature()
+        self._state.state_label = self._state_path[-1]
+        self._state.state_path = list(self._state_path)
+        self._state.visited_states = len(self._visited_states)
+        self._state.known_assay_count = len(self._known_assays)
+        self._state.last_error_code = error_code
+        self._history.append(
+            {
+                "step": self._state.step_count,
+                "action": action.model_dump(exclude_none=True),
+                "reward": reward,
+                "done": done,
+                "molecule": self._molecule_signature(),
+                "state_label": self._state.state_label,
+                "summary": self._last_summary,
+                "governance": self._last_governance.model_dump(),
+            }
+        )
+        if done:
+            self._report_card = self._build_report_card(submitted=self._state.submitted)
+        self._sync_state_metadata()
+        return self._build_observation(
+            reward=reward,
+            done=done,
+            reward_components=reward_components,
+        )
+    def _curriculum_terminal_progress_reward(self, reward_components: List[RewardComponent]) -> float:
+        """Give bounded partial credit for near-miss episodes during RL warmup.
+        This intentionally does not change the public submission grader. It only
+        makes the training reward less sparse when a model builds evidence or a
+        chemically plausible candidate but fails to formally submit.
+        """
+        grader_scores = self._grade_all()
+        progress = (
+            0.25 * grader_scores["candidate_score"]
+            + 0.25 * grader_scores["constraint_margin_score"]
+            + 0.25 * grader_scores["evidence_score"]
+            + 0.15 * grader_scores["coordination_score"]
+            + 0.10 * grader_scores["budget_score"]
+        )
+        progress = min(0.75, max(0.0, progress))
+        reward_components.append(
+            RewardComponent(
+                name="curriculum_terminal_progress",
+                value=round(progress, 4),
+                explanation=(
+                    "Bounded warmup reward for non-submitted episodes based on candidate quality, "
+                    "constraint margin, evidence coverage, coordination, and budget discipline. "
+                    "Official submission_score remains 0.0 without a submit action."
+                ),
+            )
+        )
+        missed_nomination_penalty = 0.0
+        if (
+            grader_scores["evidence_score"] >= 0.99
+            and grader_scores["constraint_margin_score"] >= 0.9
+            and grader_scores["candidate_score"] >= self._scenario.baseline_to_beat
+        ):
+            missed_nomination_penalty = -0.25
+            reward_components.append(
+                RewardComponent(
+                    name="curriculum_missed_nomination",
+                    value=missed_nomination_penalty,
+                    explanation=(
+                        "The candidate had a strong evidence package near the decision deadline, "
+                        "but the team failed to make a formal submit decision."
+                    ),
+                )
+            )
+        return progress + missed_nomination_penalty
+    @property
+    def state(self) -> MolForgeState:
+        """Return the current environment state."""
+        return self._state

server/requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+openenv-core[core]>=0.2.3
+fastapi>=0.115.0
+uvicorn>=0.30.0
+pydantic>=2.8.0
+rdkit>=2023.9.5,<2024.3.1; python_version < "3.13"
+rdkit>=2026.3.1; python_version >= "3.13"
+# Optional TDC oracle support:
+# pytdc>=1.1.0; python_version < "3.13"

server/shared.py ADDED Viewed

	@@ -0,0 +1,227 @@

+"""Shared imports, constants, and utility mixins for MolForge."""
+from __future__ import annotations
+import hashlib
+from copy import deepcopy
+from typing import Any, Dict, List, Mapping, Optional
+try:
+    from ..models import (
+        AgentMessage,
+        AssayReading,
+        MolForgeAction,
+    )
+    from ..scenarios import (
+        DEFAULT_TOOL_COSTS,
+        EDITABLE_SLOTS,
+        FRAGMENT_LIBRARY,
+        SLOT_ORDER,
+        SCENARIOS,
+        ScenarioConfig,
+        compute_objective_score,
+        enumerate_candidate_edits,
+        evaluate_constraint_margins,
+        evaluate_constraints,
+        evaluate_molecule,
+        format_molecule,
+        get_scenario,
+        literature_hints,
+        molecule_to_smiles,
+        oracle_backend_status,
+    )
+except ImportError:
+    from models import (
+        AgentMessage,
+        AssayReading,
+        MolForgeAction,
+    )
+    from scenarios import (
+        DEFAULT_TOOL_COSTS,
+        EDITABLE_SLOTS,
+        FRAGMENT_LIBRARY,
+        SLOT_ORDER,
+        SCENARIOS,
+        ScenarioConfig,
+        compute_objective_score,
+        enumerate_candidate_edits,
+        evaluate_constraint_margins,
+        evaluate_constraints,
+        evaluate_molecule,
+        format_molecule,
+        get_scenario,
+        literature_hints,
+        molecule_to_smiles,
+        oracle_backend_status,
+    )
+ROLE_PERMISSIONS: Dict[str, List[str]] = {
+    "lead_chemist": ["edit", "submit", "restart", "defer"],
+    "toxicologist": [],
+    "assay_planner": ["run_assay"],
+    "process_chemist": [],
+}
+ROLE_MESSAGE_TYPES: Dict[str, List[str]] = {
+    "lead_chemist": ["proposal", "revision_request", "submission_recommendation"],
+    "toxicologist": ["approval", "objection", "risk_flag", "assay_request", "rejection"],
+    "assay_planner": ["proposal", "approval", "rejection", "assay_request", "submission_recommendation"],
+    "process_chemist": ["approval", "objection", "risk_flag", "assay_request"],
+}
+class MolForgeSharedMixin:
+    """Utility methods shared across the environment mixins."""
+    def _merge_assays(self, readings: List[AssayReading]) -> None:
+        keyed = {
+            (reading.tool_name, reading.property_name, reading.molecule_signature): reading
+            for reading in self._known_assays
+        }
+        for reading in readings:
+            keyed[(reading.tool_name, reading.property_name, reading.molecule_signature)] = reading
+        self._known_assays = list(keyed.values())
+    def _current_property_estimate(
+        self,
+        property_name: str,
+        molecule_signature: Optional[str] = None,
+    ) -> Optional[float]:
+        signature = molecule_signature or self._molecule_signature()
+        for reading in reversed(self._known_assays):
+            if reading.molecule_signature == signature and reading.property_name == property_name:
+                return reading.estimate
+        return None
+    def _estimate_information_gain(self, tool_name: str) -> float:
+        current_signature = self._molecule_signature()
+        prior_runs = self._assay_runs.get(f"{current_signature}::{tool_name}", 0)
+        base = {
+            "evaluate_properties": 0.7,
+            "dock_target": 0.62,
+            "assay_toxicity": 0.78 if self._scenario.difficulty != "easy" else 0.52,
+            "estimate_synthesizability": 0.66 if "synth_min" in self._scenario.hard_constraints else 0.42,
+            "evaluate_novelty": 0.38,
+            "search_literature": 0.32,
+            "run_md_simulation": 0.84,
+        }.get(tool_name, 0.25)
+        decay = 0.4**prior_runs
+        return round(base * decay, 4)
+    def _simulate_action_properties(self, action: MolForgeAction) -> Dict[str, float]:
+        if action.action_type == "edit" and action.slot:
+            molecule = dict(self._molecule)
+            if action.edit_type == "remove":
+                defaults = {
+                    "warhead": "nitrile",
+                    "hinge": "pyridine",
+                    "solvent_tail": "morpholine",
+                    "back_pocket": "methoxy",
+                }
+                molecule[action.slot] = defaults[action.slot]
+            elif action.fragment:
+                molecule[action.slot] = action.fragment
+            return self._evaluate_for_molecule(molecule, self._trap_penalty_active)
+        if action.action_type == "restart":
+            return self._evaluate_for_molecule(dict(self._scenario.restart_scaffold), False)
+        return self._true_properties()
+    def _record_message(self, message: AgentMessage) -> None:
+        if not message.message_id:
+            message.message_id = f"msg_{self._state.step_count:03d}_{len(self._message_log):03d}"
+        self._message_log.append(deepcopy(message))
+        self._state.message_count += 1
+        self._role_metrics[message.sender]["messages_sent"] += 1
+        if message.message_type in {"objection", "risk_flag", "rejection"}:
+            self._state.objection_count += 1
+    def _sync_state_metadata(self) -> None:
+        self._state.metadata = {
+            "state_label": self._state.state_label,
+            "state_path": list(self._state_path),
+            "trace": deepcopy(self._history),
+            "message_log": [message.model_dump() for message in self._message_log],
+            "oracle_log": deepcopy(self._oracle_log),
+            "role_metrics": deepcopy(self._role_metrics),
+            "terminal_grader_scores": self._grade_all() if self._state.submitted else {},
+        }
+        if self._debug_state_enabled:
+            self._state.metadata["debug_hidden_properties"] = self._true_properties()
+    def _true_properties(self) -> Dict[str, float]:
+        return self._evaluate_for_molecule(self._molecule, self._trap_penalty_active)
+    def _evaluate_for_molecule(
+        self,
+        molecule: Mapping[str, str],
+        trap_penalty_active: bool,
+    ) -> Dict[str, float]:
+        return evaluate_molecule(
+            molecule,
+            self._scenario.__class__(
+                **{**self._scenario.__dict__, "trap_penalty": trap_penalty_active}
+            ),
+            target_shift_active=self._target_shift_active(),
+        )
+    def _target_shift_active(self) -> bool:
+        return bool(
+            self._scenario.target_shift_step
+            and self._state.step_count >= self._scenario.target_shift_step
+        )
+    def _molecule_signature(self) -> str:
+        return format_molecule(self._molecule)
+    def _append_state_label(self, label: str) -> None:
+        if not self._state_path or self._state_path[-1] != label:
+            self._state_path.append(label)
+    def _safety_alerts(self) -> List[str]:
+        alerts = []
+        if self._molecule["solvent_tail"] == "dimethylamino":
+            alerts.append("Dimethylamino tail is a recurring liability for cardiac safety.")
+        if self._molecule["back_pocket"] == "trifluoromethyl":
+            alerts.append("Trifluoromethyl group may overshoot lipophilic safety windows.")
+        if self._molecule["hinge"] == "fluorophenyl" and self._molecule["back_pocket"] == "chloro":
+            alerts.append("Hydrophobic hinge/back-pocket combination looks safety-negative.")
+        return alerts
+    def _route_warnings(self) -> List[str]:
+        warnings = []
+        if self._molecule["hinge"] == "quinazoline":
+            warnings.append("Quinazoline hinge increases route complexity.")
+        if self._molecule["warhead"] == "vinyl_sulfonamide":
+            warnings.append("Vinyl sulfonamide warhead is reactive and harder to handle.")
+        if self._molecule["back_pocket"] == "trifluoromethyl":
+            warnings.append("CF3 substitution raises cost and scale-up complexity.")
+        return warnings
+    @staticmethod
+    def _empty_role_metrics() -> Dict[str, Dict[str, int]]:
+        return {
+            role: {"messages_sent": 0, "correct_messages": 0, "incorrect_messages": 0}
+            for role in ["lead_chemist", "toxicologist", "assay_planner", "process_chemist"]
+        }
+    @staticmethod
+    def _open_unit_interval(value: float, epsilon: float = 1e-4) -> float:
+        return round(min(max(value, epsilon), 1.0 - epsilon), 4)
+    @staticmethod
+    def _assay_estimate(
+        signature: str,
+        tool_name: str,
+        property_name: str,
+        runs: int,
+        true_value: float,
+    ) -> float:
+        digest = hashlib.sha256(
+            f"{signature}|{tool_name}|{property_name}|{runs}".encode("utf-8")
+        ).hexdigest()
+        centered = (int(digest[:8], 16) / 0xFFFFFFFF) - 0.5
+        noise = centered * (0.16 / runs)
+        return round(min(max(true_value + noise, 0.0), 1.0), 4)

server/views.py ADDED Viewed

	@@ -0,0 +1,436 @@

+"""Observation building and scoring mixin for MolForge."""
+from __future__ import annotations
+from copy import deepcopy
+from typing import Any, Dict, List, Mapping
+from .shared import (
+    DEFAULT_TOOL_COSTS,
+    EDITABLE_SLOTS,
+    ROLE_MESSAGE_TYPES,
+    ROLE_PERMISSIONS,
+    SCENARIOS,
+    SLOT_ORDER,
+    compute_objective_score,
+    enumerate_candidate_edits,
+    evaluate_constraint_margins,
+    evaluate_constraints,
+    literature_hints,
+    molecule_to_smiles,
+    oracle_backend_status,
+)
+try:
+    from ..models import ConstraintCheck, MolForgeObservation, MoleculeSlot, RoleObservation
+except ImportError:
+    from models import ConstraintCheck, MolForgeObservation, MoleculeSlot, RoleObservation
+class MolForgeViewMixin:
+    """Observation, report-card, and grader methods."""
+    def _build_observation(
+        self,
+        *,
+        reward: float,
+        done: bool,
+        reward_components: List,
+    ) -> MolForgeObservation:
+        current_signature = self._molecule_signature()
+        current_assays = [
+            reading for reading in self._known_assays if reading.molecule_signature == current_signature
+        ]
+        visible_metrics = {
+            "budget_fraction_remaining": round(
+                self._state.remaining_budget / max(self._scenario.oracle_budget, 1), 4
+            ),
+            "current_molecule_assay_count": float(len(current_assays)),
+        }
+        for property_name in ["potency", "toxicity", "synth", "novelty"]:
+            estimate = self._current_property_estimate(property_name, current_signature)
+            if estimate is not None:
+                visible_metrics[property_name] = estimate
+        constraint_status = self._build_visible_constraints(current_signature)
+        metadata: Dict[str, Any] = {
+            "task_index": self._reset_index % len(SCENARIOS),
+            "oracle_budget_costs": deepcopy(DEFAULT_TOOL_COSTS),
+            "history_length": len(self._history),
+            "trace_tail": [entry["summary"] for entry in self._history[-3:]],
+            "current_smiles": molecule_to_smiles(self._molecule),
+            "oracle_backend": oracle_backend_status(),
+            "candidate_edits": [
+                {"slot": slot, "fragment": fragment}
+                for slot, fragment in list(enumerate_candidate_edits(self._molecule))[:8]
+            ],
+            "literature_hints": literature_hints(self._molecule),
+            "target_shift_active": self._target_shift_active(),
+            "public_role_metrics": {
+                role: {
+                    "messages_sent": metrics["messages_sent"],
+                    "correct_messages": metrics["correct_messages"],
+                }
+                for role, metrics in self._role_metrics.items()
+            },
+        }
+        if done:
+            metadata["terminal_grader_scores"] = self._grade_all()
+        return MolForgeObservation(
+            scenario_id=self._scenario.scenario_id,
+            difficulty=self._scenario.difficulty,
+            state_label=self._state.state_label,
+            state_path=list(self._state_path),
+            coordination_mode=self._scenario.coordination_mode,  # type: ignore[arg-type]
+            enabled_roles=list(self._scenario.enabled_roles),
+            task_brief=self._scenario.task_brief,
+            target_name=self._scenario.target_name,
+            current_molecule=current_signature,
+            molecule_slots=[
+                MoleculeSlot(slot=slot, fragment=self._molecule[slot], editable=True)
+                for slot in SLOT_ORDER
+            ],
+            editable_slots=list(EDITABLE_SLOTS),
+            step_index=self._state.step_count,
+            max_steps=self._scenario.max_steps,
+            remaining_budget=self._state.remaining_budget,
+            budget_used=self._state.budget_used,
+            max_budget=self._scenario.oracle_budget,
+            known_assays=deepcopy(self._known_assays),
+            role_observations=self._build_role_observations(current_signature),
+            message_log=[message.model_dump() for message in self._message_log[-8:]],
+            governance=deepcopy(self._last_governance),
+            last_transition_summary=self._last_summary,
+            visible_metrics=visible_metrics,
+            constraint_status=constraint_status,
+            reward_breakdown=reward_components,
+            allowed_actions=[
+                "Lead Chemist: edit, submit, restart, defer",
+                "Assay Planner: run_assay",
+                "Messages: proposal, approval, objection, risk_flag, assay_request, rejection",
+            ],
+            report_card=self._report_card,
+            metadata=metadata,
+            done=done,
+            reward=reward,
+        )
+    def _build_visible_constraints(self, molecule_signature: str) -> List[ConstraintCheck]:
+        checks: List[ConstraintCheck] = []
+        for name, threshold in self._scenario.hard_constraints.items():
+            property_name = "toxicity" if name == "toxicity_max" else name.split("_")[0]
+            estimate = self._current_property_estimate(property_name, molecule_signature)
+            relation = "<=" if name.endswith("_max") else ">="
+            if estimate is None:
+                checks.append(
+                    ConstraintCheck(
+                        name=name,
+                        target=f"{relation} {threshold:.2f}",
+                        satisfied=None,
+                        actual=None,
+                        evidence_status="unknown",
+                    )
+                )
+                continue
+            satisfied = estimate <= threshold if name.endswith("_max") else estimate >= threshold
+            checks.append(
+                ConstraintCheck(
+                    name=name,
+                    target=f"{relation} {threshold:.2f}",
+                    satisfied=satisfied,
+                    actual=round(estimate, 4),
+                    evidence_status="known",
+                )
+            )
+        return checks
+    def _build_role_observations(self, molecule_signature: str) -> List[RoleObservation]:
+        current_assays = [
+            reading.model_dump()
+            for reading in self._known_assays
+            if reading.molecule_signature == molecule_signature
+        ]
+        evidence_gaps = [
+            prop
+            for prop in ["potency", "toxicity", "synth"]
+            if self._current_property_estimate(prop, molecule_signature) is None
+        ]
+        edit_history = [
+            entry["action"]
+            for entry in self._history
+            if entry["action"].get("action_type") == "edit"
+        ][-4:]
+        return [
+            RoleObservation(
+                role="lead_chemist",
+                local_objective="Propose high-value scaffold edits and decide when the team should submit.",
+                permissions=ROLE_PERMISSIONS["lead_chemist"],
+                observation={
+                    "molecule_slots": deepcopy(self._molecule),
+                    "edit_history": edit_history,
+                    "visible_assays": current_assays,
+                    "candidate_edits": [
+                        {"slot": slot, "fragment": fragment}
+                        for slot, fragment in list(enumerate_candidate_edits(self._molecule))[:8]
+                    ],
+                    "open_questions": evidence_gaps,
+                },
+            ),
+            RoleObservation(
+                role="toxicologist",
+                local_objective="Protect against safety regressions and unsafe submissions.",
+                permissions=ROLE_MESSAGE_TYPES["toxicologist"],
+                observation={
+                    "toxicity_readouts": [
+                        reading
+                        for reading in current_assays
+                        if reading["property_name"] == "toxicity"
+                    ],
+                    "hard_threshold": self._scenario.hard_constraints.get("toxicity_max"),
+                    "safety_alerts": self._safety_alerts(),
+                    "risk_history": [
+                        message.model_dump()
+                        for message in self._message_log
+                        if message.sender == "toxicologist"
+                    ][-4:],
+                },
+            ),
+            RoleObservation(
+                role="assay_planner",
+                local_objective="Allocate assay budget where the expected information gain is highest.",
+                permissions=ROLE_PERMISSIONS["assay_planner"] + ROLE_MESSAGE_TYPES["assay_planner"],
+                observation={
+                    "budget_ledger": {
+                        "remaining_budget": self._state.remaining_budget,
+                        "budget_used": self._state.budget_used,
+                        "max_budget": self._state.max_budget,
+                    },
+                    "tool_costs": deepcopy(DEFAULT_TOOL_COSTS),
+                    "tool_usage_history": deepcopy(self._assay_runs),
+                    "evidence_gaps": evidence_gaps,
+                    "estimated_information_value": {
+                        tool_name: round(self._estimate_information_gain(tool_name), 4)
+                        for tool_name in self._scenario.enabled_tools
+                    },
+                },
+            ),
+            RoleObservation(
+                role="process_chemist",
+                local_objective="Guard tractability and synthetic feasibility before the team commits.",
+                permissions=ROLE_MESSAGE_TYPES["process_chemist"],
+                observation={
+                    "synth_readouts": [
+                        reading for reading in current_assays if reading["property_name"] == "synth"
+                    ],
+                    "route_warnings": self._route_warnings(),
+                    "feasibility_flags": {
+                        "heavy_hinge": self._molecule["hinge"] == "quinazoline",
+                        "reactive_warhead": self._molecule["warhead"] == "vinyl_sulfonamide",
+                        "lipophilic_tail": self._molecule["back_pocket"] == "trifluoromethyl",
+                    },
+                },
+            ),
+        ]
+    def _grade_all(self) -> Dict[str, float]:
+        properties = self._true_properties()
+        constraints = evaluate_constraints(properties, self._scenario)
+        constraint_margins = evaluate_constraint_margins(properties, self._scenario)
+        constraint_margin_score = sum(constraint_margins.values()) / max(len(constraint_margins), 1)
+        constraint_fraction = sum(1.0 for passed, _ in constraints.values() if passed) / max(len(constraints), 1)
+        submitted = self._state.submitted
+        coordination_score = self._coordination_score()
+        evidence_score = self._evidence_score()
+        budget_score = self._open_unit_interval(
+            self._state.remaining_budget / max(self._scenario.oracle_budget, 1),
+        )
+        progress_score = self._grade_progress(
+            candidate_score=compute_objective_score(properties, self._scenario),
+            constraint_margin_score=constraint_margin_score,
+            constraint_fraction=constraint_fraction,
+            evidence_score=evidence_score,
+            coordination_score=coordination_score,
+            budget_score=budget_score,
+        )
+        submission_score = self._grade_submission(properties) if submitted else 0.0
+        final_score = self._grade_final(
+            submission_score=submission_score,
+            progress_score=progress_score,
+            submitted=submitted,
+            constraint_fraction=constraint_fraction,
+            evidence_score=evidence_score,
+        )
+        return {
+            "final_score": final_score,
+            "potency_score": self._open_unit_interval(properties["potency"]),
+            "safety_score": self._open_unit_interval(1.0 - properties["toxicity"]),
+            "synth_score": self._open_unit_interval(properties["synth"]),
+            "novelty_score": self._open_unit_interval(properties["novelty"]),
+            "candidate_score": self._open_unit_interval(compute_objective_score(properties, self._scenario)),
+            "constraint_score": self._open_unit_interval(
+                sum(1.0 for passed, _ in constraints.values() if passed) / max(len(constraints), 1),
+            ),
+            "constraint_margin_score": self._open_unit_interval(constraint_margin_score),
+            "budget_score": budget_score,
+            "submitted_score": 1.0 if submitted else 0.0,
+            "submission_score": submission_score,
+            "progress_score": progress_score,
+            "coordination_score": self._open_unit_interval(coordination_score),
+            "evidence_score": self._open_unit_interval(evidence_score),
+        }
+    def _grade_progress(
+        self,
+        *,
+        candidate_score: float,
+        constraint_margin_score: float,
+        constraint_fraction: float,
+        evidence_score: float,
+        coordination_score: float,
+        budget_score: float,
+    ) -> float:
+        """Score scientific progress even when no formal submission happened."""
+        progress = (
+            0.45 * candidate_score
+            + 0.35 * constraint_margin_score
+            + 0.10 * evidence_score
+            + 0.05 * coordination_score
+            + 0.05 * budget_score
+        )
+        repeated_assays = sum(max(0, runs - 1) for runs in self._assay_runs.values())
+        policy_vetoes = sum(
+            1
+            for entry in self._history
+            if entry.get("governance", {}).get("status") == "policy_veto"
+        )
+        progress -= min(0.20, 0.04 * repeated_assays)
+        progress -= min(0.20, 0.05 * policy_vetoes)
+        if constraint_fraction < 1.0:
+            progress = min(progress, 0.25 + 0.25 * constraint_fraction)
+        if not self._state.submitted and evidence_score < 0.99:
+            progress = min(progress, 0.45)
+        if self._scenario.trap_penalty and not self._restart_used:
+            progress = min(progress, 0.30)
+        if self._state.submitted:
+            progress += 0.05
+        return self._open_unit_interval(progress)
+    def _grade_final(
+        self,
+        *,
+        submission_score: float,
+        progress_score: float,
+        submitted: bool,
+        constraint_fraction: float,
+        evidence_score: float,
+    ) -> float:
+        """Single conservative scalar for RL/evaluation headline reporting."""
+        if submitted:
+            return self._open_unit_interval(submission_score)
+        score = 0.35 * progress_score
+        if constraint_fraction < 1.0:
+            score = min(score, 0.05 + 0.10 * constraint_fraction)
+        if evidence_score < 0.99:
+            score = min(score, 0.15)
+        if self._scenario.trap_penalty and not self._restart_used:
+            score = min(score, 0.08)
+        return self._open_unit_interval(score)
+    def _coordination_score(self) -> float:
+        expected_messages = 0
+        for entry in self._history:
+            action = entry.get("action", {})
+            if action.get("action_type") == "defer":
+                continue
+            expected_messages += 1 + len(entry.get("governance", {}).get("required_roles", []))
+        if expected_messages == 0:
+            return self._open_unit_interval(0.0)
+        total_correct = sum(metrics["correct_messages"] for metrics in self._role_metrics.values())
+        return self._open_unit_interval(min(total_correct, expected_messages) / expected_messages)
+    def _grade_submission(self, properties: Mapping[str, float]) -> float:
+        base = compute_objective_score(properties, self._scenario)
+        constraint_margins = evaluate_constraint_margins(properties, self._scenario)
+        constraint_margin_score = sum(constraint_margins.values()) / max(len(constraint_margins), 1)
+        constraints = evaluate_constraints(properties, self._scenario)
+        constraint_fraction = sum(1.0 for passed, _ in constraints.values() if passed) / max(len(constraints), 1)
+        submission_score = (
+            0.60 * base
+            + 0.20 * constraint_margin_score
+            + 0.10 * self._coordination_score()
+            + 0.10 * self._evidence_score()
+        )
+        evidence_score = self._evidence_score()
+        if evidence_score >= 0.99 and constraint_fraction >= 1.0 and base >= self._scenario.baseline_to_beat:
+            budget_efficiency = self._state.remaining_budget / max(self._scenario.oracle_budget, 1)
+            submission_score += 0.05 * max(0.0, budget_efficiency)
+        if evidence_score < 1.0:
+            submission_score = min(submission_score, 0.25 + 0.25 * evidence_score)
+        if constraint_fraction < 1.0:
+            submission_score = min(submission_score, 0.20 + 0.50 * constraint_margin_score)
+        if base < self._scenario.baseline_to_beat:
+            submission_score = min(submission_score, 0.45)
+        return self._open_unit_interval(submission_score)
+    def _evidence_score(self) -> float:
+        current_signature = self._molecule_signature()
+        required = ["potency", "toxicity"]
+        if "synth_min" in self._scenario.hard_constraints:
+            required.append("synth")
+        available = sum(
+            1
+            for prop in required
+            if self._current_property_estimate(prop, current_signature) is not None
+        )
+        score = available / max(len(required), 1)
+        if self._scenario.target_shift_step and self._target_shift_active():
+            has_post_shift_potency = any(
+                entry["step"] >= self._scenario.target_shift_step
+                and entry["molecule"] == current_signature
+                and any(result["property_name"] == "potency" for result in entry["results"])
+                for entry in self._oracle_log
+            )
+            score = min(score, 1.0 if has_post_shift_potency else 0.5)
+        return score
+    def _build_report_card(self, *, submitted: bool) -> str:
+        properties = self._true_properties()
+        grader_scores = self._grade_all()
+        constraints = evaluate_constraints(properties, self._scenario)
+        lines = [
+            f"Scenario: {self._scenario.scenario_id} ({self._scenario.difficulty})",
+            f"Final molecule: {self._molecule_signature()}",
+            f"Potency: {properties['potency']:.3f}",
+            f"Toxicity: {properties['toxicity']:.3f}",
+            f"Synthesizability: {properties['synth']:.3f}",
+            f"Novelty: {properties['novelty']:.3f}",
+            f"Final score: {grader_scores['final_score']:.3f}",
+            f"Candidate scientific score: {grader_scores['candidate_score']:.3f}",
+            f"Constraint margin score: {grader_scores['constraint_margin_score']:.3f}",
+            f"Submission grader: {grader_scores['submission_score']:.3f}",
+            f"Progress score: {grader_scores['progress_score']:.3f}",
+            f"Coordination score: {grader_scores['coordination_score']:.3f}",
+            f"Evidence score: {grader_scores['evidence_score']:.3f}",
+            "Constraints:",
+        ]
+        for name, (passed, threshold) in constraints.items():
+            metric_name = "toxicity" if name == "toxicity_max" else name.split("_")[0]
+            lines.append(
+                f"- {name}: {'pass' if passed else 'fail'} (actual={properties[metric_name]:.3f}, threshold={threshold:.3f})"
+            )
+        lines.append(
+            f"Messages sent: {self._state.message_count}, objections raised: {self._state.objection_count}, oracle calls: {self._state.oracle_call_count}"
+        )
+        if self._scenario.target_shift_step and self._target_shift_active():
+            lines.append("Target mutation triggered during this episode.")
+        if self._restart_used:
+            lines.append("Agent used restart_from_new_scaffold to escape the original trap series.")
+        if not submitted:
+            lines.append("Episode terminated without a formal submit action.")
+        return "\n".join(lines)

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff