Spaces:
Runtime error
Runtime error
initial: drugenv trainer control panel
Browse files- README.md +230 -5
- client.py +54 -0
- dashboard.html +543 -0
- dashboard.py +129 -0
- demo.html +1639 -0
- models.py +927 -0
- openenv.yaml +12 -0
- pyproject.toml +55 -0
- server/Dockerfile +80 -0
- server/__init__.py +3 -0
- server/app.py +81 -0
- server/biology/__init__.py +11 -0
- server/biology/target_index.py +96 -0
- server/hackathon_environment.py +325 -0
- server/requirements.txt +1 -0
- server/rewards/__init__.py +3 -0
- server/rewards/reward.py +265 -0
- server/rules/__init__.py +3 -0
- server/rules/engine.py +210 -0
- server/simulator/__init__.py +21 -0
- server/simulator/latent_state.py +175 -0
- server/simulator/noise.py +128 -0
- server/simulator/output_generator.py +695 -0
- server/simulator/transition.py +201 -0
- server/tasks/__init__.py +4 -0
- server/tasks/generator.py +132 -0
- server/tasks/procedural_generator.py +232 -0
- server/tasks/scenarios.py +370 -0
- space/__init__.py +0 -0
- space/training/Dockerfile +36 -0
- space/training/README.md +116 -0
- space/training/__init__.py +0 -0
- space/training/app.py +943 -0
- space/training/requirements.txt +1 -0
README.md
CHANGED
|
@@ -1,10 +1,235 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji: 👀
|
| 4 |
-
colorFrom: blue
|
| 5 |
-
colorTo: gray
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Drug Target Validation Environment
|
|
|
|
|
|
|
|
|
|
| 3 |
sdk: docker
|
| 4 |
pinned: false
|
| 5 |
+
app_port: 8000
|
| 6 |
+
tags:
|
| 7 |
+
- openenv
|
| 8 |
+
- reinforcement-learning
|
| 9 |
+
- drug-discovery
|
| 10 |
+
- pharma
|
| 11 |
---
|
| 12 |
|
| 13 |
+
# 🧬 DrugEnv — Drug Target Validation Environment
|
| 14 |
+
|
| 15 |
+
> **DrugEnv** — an OpenEnv RL environment that teaches LLMs to do computational drug-target validation.
|
| 16 |
+
|
| 17 |
+
This repository implements an OpenEnv-compatible reinforcement learning environment in which an agent acts as a **computational drug discovery scientist**. Given a proposed drug target (gene / protein) and a disease context, the agent must investigate target viability by issuing simulated bioinformatics, clinical, and experimental queries, and finally submit a calibrated **go / no-go** validation report with a confidence score.
|
| 18 |
+
|
| 19 |
+
The environment is designed as a partially observable Markov decision process (POMDP) with:
|
| 20 |
+
|
| 21 |
+
- a hidden ground-truth `TargetProfile` (expression, druggability, selectivity, toxicity, clinical precedent)
|
| 22 |
+
- noisy database / assay outputs governed by `DataQualityState`
|
| 23 |
+
- a single unified **experimental credit** budget per episode
|
| 24 |
+
- visible task metadata, dossier of accumulated findings, and step history
|
| 25 |
+
- dense step-wise reward plus terminal reward for decision quality and evidence coverage
|
| 26 |
+
|
| 27 |
+
## Why drug target validation?
|
| 28 |
+
|
| 29 |
+
Roughly **90% of drug development programs fail** in clinical trials, and a large fraction of failures trace back to mistakes during target validation: targets that are not actually disease-driving, are undruggable, lack selectivity, or have hidden toxicity. The cost of progressing a single bad target through Phase III can run into the **billions of dollars**. Even modest improvements in early-stage decision quality therefore translate into enormous savings and faster cures.
|
| 30 |
+
|
| 31 |
+
This environment lets you train and benchmark agents on exactly that bottleneck: **acquiring the right evidence cheaply and submitting a well-calibrated go / no-go**.
|
| 32 |
+
|
| 33 |
+
## How it works
|
| 34 |
+
|
| 35 |
+
At a high level, each episode looks like this:
|
| 36 |
+
|
| 37 |
+
1. `reset()` selects a drug-target-validation scenario and seeds the simulator.
|
| 38 |
+
2. The agent receives a `ValidationObservation` describing the target, indication, remaining credits, accumulated dossier, and step history.
|
| 39 |
+
3. The agent submits a `DrugTargetAction` such as `query_expression`, `druggability_screen`, `off_target_screen`, or `submit_validation_report`.
|
| 40 |
+
4. The rule engine checks credit budget, redundancy, and ordering prerequisites.
|
| 41 |
+
5. The transition engine deducts credits and asks the output generator to simulate evidence from the hidden `TargetProfile`.
|
| 42 |
+
6. The reward computer scores the step for novelty, reasoning coherence, credit efficiency, and rule compliance.
|
| 43 |
+
7. The environment returns a new observation with an updated `EvidenceDossier`, latest output, violations, and reward.
|
| 44 |
+
8. The episode ends when the agent submits a validation report, exhausts credits, or hits the step limit.
|
| 45 |
+
|
| 46 |
+
## The core mental model
|
| 47 |
+
|
| 48 |
+
### Hidden state
|
| 49 |
+
|
| 50 |
+
The simulator maintains a `FullLatentState` that the agent never sees directly:
|
| 51 |
+
|
| 52 |
+
- `TargetProfile` — true expression level / tissue specificity / disease over-expression, druggability score, binding-pocket quality, selectivity ratio, off-target genes, toxicity profile, clinical precedent, expected in-vitro and in-vivo behaviour, plus the hidden `correct_decision`, `true_viability_score`, `key_evidence_dimensions`, and any `misleading_signals`.
|
| 53 |
+
- `DataQualityState` — noise level, false-positive rate, false-negative rate, database coverage.
|
| 54 |
+
- `CreditState` — total / used / remaining experimental credits.
|
| 55 |
+
- `ValidationProgress` — boolean flags for which evidence dimensions have been investigated and whether a report has been submitted.
|
| 56 |
+
|
| 57 |
+
### Visible state
|
| 58 |
+
|
| 59 |
+
The agent only sees `ValidationObservation`, which includes:
|
| 60 |
+
|
| 61 |
+
- `target_gene`, `disease_context`, `indication`
|
| 62 |
+
- `credits_remaining` / `credits_total`
|
| 63 |
+
- `dossier` — running `EvidenceDossier` of expression / protein / clinical / safety / literature / experimental findings, plus any `flagged_red_flags`
|
| 64 |
+
- `pipeline_history` — list of past actions and their summary outputs
|
| 65 |
+
- `latest_output` — typed `IntermediateOutput` from the most recent step
|
| 66 |
+
- `rule_violations` and `step_reward_breakdown` for the last step
|
| 67 |
+
|
| 68 |
+
## Action space
|
| 69 |
+
|
| 70 |
+
| Category | Action | Cost (credits) |
|
| 71 |
+
|---|---|---|
|
| 72 |
+
| Expression & omics | `query_expression`, `differential_expression`, `pathway_enrichment`, `coexpression_network` | 2 |
|
| 73 |
+
| Protein & structure | `protein_structure_lookup`, `binding_site_analysis`, `druggability_screen` | 3 |
|
| 74 |
+
| Protein & structure | `protein_interaction_network` | 2 |
|
| 75 |
+
| Clinical & safety | `clinical_trial_lookup`, `toxicity_panel`, `off_target_screen`, `patient_stratification` | 3 |
|
| 76 |
+
| Literature | `literature_search`, `evidence_synthesis`, `competitor_landscape` | 1 |
|
| 77 |
+
| Experimental | `crispr_knockout`, `biomarker_correlation` | 4 / 3 |
|
| 78 |
+
| Experimental | `in_vitro_assay` | 5 |
|
| 79 |
+
| Experimental | `in_vivo_model` | 8 |
|
| 80 |
+
| Meta | `flag_red_flag`, `request_expert_review` | 0 / 1 |
|
| 81 |
+
| Terminal | `submit_validation_report` | 0 |
|
| 82 |
+
|
| 83 |
+
`submit_validation_report` carries two extra fields: `final_decision` (`"go"` or `"no_go"`) and `confidence` in `[0, 1]`. The episode ends as soon as the report is submitted.
|
| 84 |
+
|
| 85 |
+
## Reward function
|
| 86 |
+
|
| 87 |
+
Every step receives a decomposed reward:
|
| 88 |
+
|
| 89 |
+
```
|
| 90 |
+
R_t = evidence_novelty_bonus
|
| 91 |
+
+ reasoning_coherence_bonus
|
| 92 |
+
+ credit_efficiency_penalty
|
| 93 |
+
+ rule_violation_penalty
|
| 94 |
+
+ [φ(s_{t+1}) − φ(s_t)]
|
| 95 |
+
```
|
| 96 |
+
|
| 97 |
+
When the episode ends, a terminal reward is added:
|
| 98 |
+
|
| 99 |
+
```
|
| 100 |
+
R_T = 0.40 * decision_accuracy
|
| 101 |
+
+ 0.35 * evidence_coverage
|
| 102 |
+
+ 0.15 * credit_efficiency
|
| 103 |
+
+ 0.10 * reasoning_coherence
|
| 104 |
+
```
|
| 105 |
+
|
| 106 |
+
Where:
|
| 107 |
+
|
| 108 |
+
- `decision_accuracy` — `1.0` if the final go / no-go matched the hidden `correct_decision`, scaled by `2 * |confidence - 0.5|` so a confidently correct answer is fully rewarded and a confidently wrong answer is fully penalised.
|
| 109 |
+
- `evidence_coverage` — fraction of the scenario's `key_evidence_dimensions` (e.g. `expression`, `druggability`, `off_target`, `clinical`, `in_vitro`) that the agent actually investigated.
|
| 110 |
+
- `credit_efficiency` — `1 − redundant_calls / total_calls`.
|
| 111 |
+
- `reasoning_coherence` — fraction of actions whose soft prerequisites (e.g. `expression` before `toxicity`, `in_vitro` before `in_vivo`) were satisfied.
|
| 112 |
+
|
| 113 |
+
Hard penalties are applied for: submitting without any evidence, submitting without a decision or confidence, and exhausting credits without ever submitting a report.
|
| 114 |
+
|
| 115 |
+
## Curated scenarios
|
| 116 |
+
|
| 117 |
+
| Name | Difficulty | Correct decision | Why it's interesting |
|
| 118 |
+
|---|---|---|---|
|
| 119 |
+
| `egfr_nsclc_viable` | easy | `go` | Clear viable target — expression + druggability alone are sufficient. |
|
| 120 |
+
| `kras_pdac_borderline` | medium | `go` | Historically undruggable; recent inhibitor literature is decisive. |
|
| 121 |
+
| `cd33_aml_misleading` | hard | `no_go` | Naive expression query says "go", but off-target + toxicity + clinical reveal the right answer. |
|
| 122 |
+
| `tp53_solid_tumors_clear_fail` | easy-medium | `no_go` | Druggability check alone is sufficient. |
|
| 123 |
+
| `ptpn11_juvenile_mml_complex` | very hard | `go` | Requires `binding_site_analysis(include_allosteric=True)`, off-target work, patient stratification, and an in-vitro assay. |
|
| 124 |
+
|
| 125 |
+
The procedural generator (`server/tasks/procedural_generator.py`) layers on additional easy / medium / hard scenarios sampled from a pool of 20 real cancer targets and 8 cancer indications.
|
| 126 |
+
|
| 127 |
+
## Setup
|
| 128 |
+
|
| 129 |
+
```bash
|
| 130 |
+
# 1. Install dependencies (env runtime only)
|
| 131 |
+
pip install -e .
|
| 132 |
+
|
| 133 |
+
# 2. Or install with training extras (torch + transformers + trl + peft pinned to working set)
|
| 134 |
+
pip install -e .[train]
|
| 135 |
+
|
| 136 |
+
# 3. Run the environment server
|
| 137 |
+
PYTHONPATH=. python -m server.app
|
| 138 |
+
# server is now available at http://localhost:8000
|
| 139 |
+
```
|
| 140 |
+
|
| 141 |
+
The legacy `uv sync` workflow still works if you have `uv.lock` checked
|
| 142 |
+
in locally; the editable `pip install` path above is the primary
|
| 143 |
+
supported route.
|
| 144 |
+
|
| 145 |
+
## Talking to the environment
|
| 146 |
+
|
| 147 |
+
```python
|
| 148 |
+
from client import DrugTargetEnv
|
| 149 |
+
from models import DrugTargetAction
|
| 150 |
+
|
| 151 |
+
with DrugTargetEnv(base_url="http://localhost:8000") as env:
|
| 152 |
+
result = env.reset()
|
| 153 |
+
print(result.observation.target_gene, "/", result.observation.indication)
|
| 154 |
+
|
| 155 |
+
result = env.step(DrugTargetAction(
|
| 156 |
+
action_type="query_expression",
|
| 157 |
+
parameters={"database": "GTEx"},
|
| 158 |
+
reasoning="Establish tissue baseline",
|
| 159 |
+
))
|
| 160 |
+
print(result.observation.latest_output.summary)
|
| 161 |
+
|
| 162 |
+
result = env.step(DrugTargetAction(
|
| 163 |
+
action_type="submit_validation_report",
|
| 164 |
+
reasoning="Sufficient evidence for go",
|
| 165 |
+
final_decision="go",
|
| 166 |
+
confidence=0.85,
|
| 167 |
+
))
|
| 168 |
+
print("done:", result.done, "reward:", result.reward)
|
| 169 |
+
```
|
| 170 |
+
|
| 171 |
+
## Running the baseline agent
|
| 172 |
+
|
| 173 |
+
```bash
|
| 174 |
+
PYTHONPATH=. python run_agent.py
|
| 175 |
+
```
|
| 176 |
+
|
| 177 |
+
The script writes a live JSON snapshot to `_dashboard_state.json` after every step so you can watch the agent's progress. Default model is `Qwen/Qwen2.5-3B-Instruct`.
|
| 178 |
+
|
| 179 |
+
## Reproduce
|
| 180 |
+
|
| 181 |
+
Three commands cover the env-locally / training-locally / training-on-Space paths:
|
| 182 |
+
|
| 183 |
+
```bash
|
| 184 |
+
# 1. Env locally (CPU is fine — the env itself is dependency-light)
|
| 185 |
+
pip install -e . && PYTHONPATH=. python -m server.app
|
| 186 |
+
# → http://localhost:8000 (also at https://huggingface.co/spaces/anugrahteesdollar/drugenv when deployed)
|
| 187 |
+
|
| 188 |
+
# 2. Training locally (single GPU, vanilla GRPO)
|
| 189 |
+
pip install -e .[train]
|
| 190 |
+
PYTHONPATH=. python -m training.training_script \
|
| 191 |
+
--model-id Qwen/Qwen2.5-3B-Instruct \
|
| 192 |
+
--evidence-dir evidence \
|
| 193 |
+
--output-dir runs/grpo-output
|
| 194 |
+
|
| 195 |
+
# 3. Training on a Hugging Face Space (H200 single-GPU)
|
| 196 |
+
# Push space/training/ to anugrahteesdollar/drugenv-trainer, set PUSH_REPO + HF_TOKEN
|
| 197 |
+
# in the Space variables, then POST /train.
|
| 198 |
+
# → https://huggingface.co/spaces/anugrahteesdollar/drugenv-trainer
|
| 199 |
+
```
|
| 200 |
+
|
| 201 |
+
The trainer Space's FastAPI control panel (`space/training/app.py`)
|
| 202 |
+
streams a live evidence dashboard while training runs — per-step
|
| 203 |
+
training curve, mid-training checkpoint progression, and a before /
|
| 204 |
+
after summary card. Default expected hardware: **H200 single-GPU**
|
| 205 |
+
(`h200x1`); H200 is ≈ 4× A100 throughput, ~$0.05–0.10 per step on
|
| 206 |
+
Qwen2.5-3B-class GRPO.
|
| 207 |
+
|
| 208 |
+
An optional **SFT warm-start** (`training/sft_warmstart.py`) is
|
| 209 |
+
controlled via the `SFT_WARMSTART` env var on the Space (default on).
|
| 210 |
+
It collects oracle trajectories on the curated scenario library, SFTs
|
| 211 |
+
the base model with a small LoRA, and hands the merged checkpoint to
|
| 212 |
+
GRPO so the policy starts with a non-zero prior over correct
|
| 213 |
+
trajectories.
|
| 214 |
+
|
| 215 |
+
## Baseline scores
|
| 216 |
+
|
| 217 |
+
| Difficulty bucket | Random policy | Heuristic policy | Trained Qwen2.5-3B |
|
| 218 |
+
|---|---|---|---|
|
| 219 |
+
| Easy (`egfr_nsclc_viable`) | _filled in after first training run_ | _filled in after first training run_ | _filled in after first training run_ |
|
| 220 |
+
| Medium (`kras_pdac_borderline`) | _filled in after first training run_ | _filled in after first training run_ | _filled in after first training run_ |
|
| 221 |
+
| Hard (`cd33_aml_misleading`) | _filled in after first training run_ | _filled in after first training run_ | _filled in after first training run_ |
|
| 222 |
+
|
| 223 |
+
The trainer Space writes the populated table to
|
| 224 |
+
`evidence/before_after_metrics.json` automatically on every run.
|
| 225 |
+
|
| 226 |
+
## Evolution note
|
| 227 |
+
|
| 228 |
+
The deployment scaffolding in this repository — the trainer Space
|
| 229 |
+
control panel, the live training-evidence callback, the SFT warm-start
|
| 230 |
+
script, and the working dependency pin set — was originally validated
|
| 231 |
+
against a particle-physics-themed prototype and then carried forward
|
| 232 |
+
when we pivoted to drug discovery. The simulator, scenarios, action
|
| 233 |
+
space, reward function, and rules engine are all drug-domain native;
|
| 234 |
+
the inheritance is exclusively in the training and evaluation
|
| 235 |
+
scaffolding.
|
client.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Drug Target Validation Environment Client.
|
| 2 |
+
|
| 3 |
+
Provides the ``DrugTargetEnv`` class that communicates with the
|
| 4 |
+
environment server over WebSocket / HTTP using the OpenEnv protocol.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from typing import Dict
|
| 8 |
+
|
| 9 |
+
from openenv.core.client_types import StepResult
|
| 10 |
+
from openenv.core.env_server.types import State
|
| 11 |
+
from openenv.core import EnvClient
|
| 12 |
+
|
| 13 |
+
try: # pragma: no cover - package import path
|
| 14 |
+
from .models import DrugTargetAction, ValidationObservation
|
| 15 |
+
except ImportError: # pragma: no cover - direct module import path
|
| 16 |
+
from models import DrugTargetAction, ValidationObservation
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class DrugTargetEnv(
|
| 20 |
+
EnvClient[DrugTargetAction, ValidationObservation, State]
|
| 21 |
+
):
|
| 22 |
+
"""Client for the Drug Target Validation Environment.
|
| 23 |
+
|
| 24 |
+
Example:
|
| 25 |
+
>>> with DrugTargetEnv(base_url="http://localhost:8000") as env:
|
| 26 |
+
... result = env.reset()
|
| 27 |
+
... print(result.observation.target_gene)
|
| 28 |
+
... result = env.step(DrugTargetAction(
|
| 29 |
+
... action_type="query_expression",
|
| 30 |
+
... parameters={"database": "GTEx"},
|
| 31 |
+
... reasoning="baseline expression survey",
|
| 32 |
+
... ))
|
| 33 |
+
... print(result.observation.latest_output.summary)
|
| 34 |
+
"""
|
| 35 |
+
|
| 36 |
+
def _step_payload(self, action: DrugTargetAction) -> Dict:
|
| 37 |
+
return action.model_dump()
|
| 38 |
+
|
| 39 |
+
def _parse_result(
|
| 40 |
+
self, payload: Dict
|
| 41 |
+
) -> StepResult[ValidationObservation]:
|
| 42 |
+
obs_data = payload.get("observation", {})
|
| 43 |
+
observation = ValidationObservation(**obs_data)
|
| 44 |
+
return StepResult(
|
| 45 |
+
observation=observation,
|
| 46 |
+
reward=payload.get("reward"),
|
| 47 |
+
done=payload.get("done", False),
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
def _parse_state(self, payload: Dict) -> State:
|
| 51 |
+
return State(
|
| 52 |
+
episode_id=payload.get("episode_id"),
|
| 53 |
+
step_count=payload.get("step_count", 0),
|
| 54 |
+
)
|
dashboard.html
ADDED
|
@@ -0,0 +1,543 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="utf-8" />
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
| 6 |
+
<title>Bio-Experiment Agent Dashboard</title>
|
| 7 |
+
<link rel="preconnect" href="https://fonts.googleapis.com" />
|
| 8 |
+
<link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;600&family=DM+Sans:wght@400;500;700&display=swap" rel="stylesheet" />
|
| 9 |
+
<style>
|
| 10 |
+
:root {
|
| 11 |
+
--bg: #0c0e14;
|
| 12 |
+
--surface: #151822;
|
| 13 |
+
--surface2: #1c2030;
|
| 14 |
+
--border: #2a2f42;
|
| 15 |
+
--text: #e2e4ea;
|
| 16 |
+
--text-dim: #8b90a5;
|
| 17 |
+
--accent: #5ce0d8;
|
| 18 |
+
--accent2: #7c6cf0;
|
| 19 |
+
--green: #4ade80;
|
| 20 |
+
--red: #f87171;
|
| 21 |
+
--amber: #fbbf24;
|
| 22 |
+
--blue: #60a5fa;
|
| 23 |
+
--pink: #f472b6;
|
| 24 |
+
}
|
| 25 |
+
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
|
| 26 |
+
body { background: var(--bg); color: var(--text); font-family: 'DM Sans', system-ui, sans-serif; line-height: 1.5; min-height: 100vh; }
|
| 27 |
+
.mono { font-family: 'JetBrains Mono', monospace; }
|
| 28 |
+
|
| 29 |
+
.header { display: flex; align-items: center; justify-content: space-between; padding: 14px 28px; border-bottom: 1px solid var(--border); background: var(--surface); }
|
| 30 |
+
.header h1 { font-size: 18px; font-weight: 700; letter-spacing: -.3px; }
|
| 31 |
+
.header h1 span { color: var(--accent); }
|
| 32 |
+
.header-right { display: flex; align-items: center; gap: 10px; }
|
| 33 |
+
.status-pill { font-size: 12px; padding: 4px 14px; border-radius: 20px; font-weight: 600; text-transform: uppercase; letter-spacing: .5px; }
|
| 34 |
+
.status-pill.live { background: rgba(76,222,128,.15); color: var(--green); }
|
| 35 |
+
.status-pill.done { background: rgba(248,113,113,.15); color: var(--red); }
|
| 36 |
+
.status-pill.waiting { background: rgba(139,144,165,.15); color: var(--text-dim); }
|
| 37 |
+
|
| 38 |
+
.btn { padding: 6px 16px; border-radius: 8px; border: 1px solid var(--border); background: var(--surface2); color: var(--text); font-size: 12px; font-weight: 600; cursor: pointer; transition: all .15s; }
|
| 39 |
+
.btn:hover { border-color: var(--accent); color: var(--accent); }
|
| 40 |
+
.btn.primary { background: rgba(92,224,216,.12); border-color: var(--accent); color: var(--accent); }
|
| 41 |
+
.btn.primary:hover { background: rgba(92,224,216,.25); }
|
| 42 |
+
.btn.danger { border-color: var(--red); color: var(--red); }
|
| 43 |
+
.btn.danger:hover { background: rgba(248,113,113,.12); }
|
| 44 |
+
|
| 45 |
+
.grid { display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 16px; padding: 20px 28px; max-width: 1600px; }
|
| 46 |
+
@media (max-width: 1100px) { .grid { grid-template-columns: 1fr 1fr; } }
|
| 47 |
+
@media (max-width: 700px) { .grid { grid-template-columns: 1fr; } }
|
| 48 |
+
|
| 49 |
+
.card { background: var(--surface); border: 1px solid var(--border); border-radius: 12px; padding: 18px 20px; overflow: hidden; }
|
| 50 |
+
.card h2 { font-size: 11px; font-weight: 600; text-transform: uppercase; letter-spacing: 1px; color: var(--text-dim); margin-bottom: 12px; }
|
| 51 |
+
.card.span2 { grid-column: span 2; }
|
| 52 |
+
.card.span3 { grid-column: span 3; }
|
| 53 |
+
@media (max-width: 700px) { .card.span2, .card.span3 { grid-column: span 1; } }
|
| 54 |
+
|
| 55 |
+
.gauge-row { display: flex; gap: 14px; flex-wrap: wrap; }
|
| 56 |
+
.gauge { flex: 1; min-width: 130px; background: var(--surface2); border-radius: 10px; padding: 14px; }
|
| 57 |
+
.gauge-label { font-size: 11px; color: var(--text-dim); margin-bottom: 6px; text-transform: uppercase; letter-spacing: .5px; }
|
| 58 |
+
.gauge-value { font-size: 22px; font-weight: 700; }
|
| 59 |
+
.gauge-bar { height: 5px; border-radius: 3px; background: var(--border); margin-top: 8px; overflow: hidden; }
|
| 60 |
+
.gauge-bar-fill { height: 100%; border-radius: 3px; transition: width .6s ease; }
|
| 61 |
+
|
| 62 |
+
.timeline { position: relative; padding-left: 20px; }
|
| 63 |
+
.timeline::before { content: ''; position: absolute; left: 6px; top: 0; bottom: 0; width: 2px; background: var(--border); }
|
| 64 |
+
.timeline-item { position: relative; margin-bottom: 14px; padding-left: 18px; }
|
| 65 |
+
.timeline-item::before { content: ''; position: absolute; left: -18px; top: 6px; width: 10px; height: 10px; border-radius: 50%; border: 2px solid var(--accent); background: var(--bg); }
|
| 66 |
+
.timeline-item.fail::before { border-color: var(--red); }
|
| 67 |
+
.tl-action { font-weight: 600; font-size: 14px; }
|
| 68 |
+
.tl-meta { font-size: 12px; color: var(--text-dim); margin-top: 2px; }
|
| 69 |
+
|
| 70 |
+
.mini-table { width: 100%; font-size: 13px; border-collapse: collapse; }
|
| 71 |
+
.mini-table td { padding: 5px 8px; border-bottom: 1px solid var(--border); vertical-align: top; }
|
| 72 |
+
.mini-table td:first-child { color: var(--text-dim); white-space: nowrap; width: 40%; }
|
| 73 |
+
|
| 74 |
+
.tag-list { display: flex; flex-wrap: wrap; gap: 6px; }
|
| 75 |
+
.tag { font-size: 12px; padding: 3px 10px; border-radius: 6px; background: var(--surface2); border: 1px solid var(--border); font-family: 'JetBrains Mono', monospace; }
|
| 76 |
+
.tag.green { border-color: rgba(76,222,128,.3); color: var(--green); }
|
| 77 |
+
.tag.pink { border-color: rgba(244,114,182,.3); color: var(--pink); }
|
| 78 |
+
.tag.amber { border-color: rgba(251,191,36,.3); color: var(--amber); }
|
| 79 |
+
.tag.red { border-color: rgba(248,113,113,.3); color: var(--red); }
|
| 80 |
+
.tag.match { background: rgba(76,222,128,.15); }
|
| 81 |
+
.tag.miss { background: rgba(248,113,113,.08); }
|
| 82 |
+
|
| 83 |
+
.code-block { background: var(--surface2); border: 1px solid var(--border); border-radius: 8px; padding: 12px 14px; font-family: 'JetBrains Mono', monospace; font-size: 12px; white-space: pre-wrap; word-break: break-all; max-height: 220px; overflow-y: auto; color: var(--text-dim); line-height: 1.6; }
|
| 84 |
+
|
| 85 |
+
.progress-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(150px, 1fr)); gap: 6px; }
|
| 86 |
+
.progress-item { display: flex; align-items: center; gap: 6px; font-size: 12px; }
|
| 87 |
+
.dot { width: 8px; height: 8px; border-radius: 50%; flex-shrink: 0; background: var(--border); }
|
| 88 |
+
.dot.done { background: var(--green); }
|
| 89 |
+
|
| 90 |
+
.pop-bar-container { margin-bottom: 10px; }
|
| 91 |
+
.pop-bar-label { font-size: 12px; margin-bottom: 3px; display: flex; justify-content: space-between; }
|
| 92 |
+
.pop-bar { height: 14px; border-radius: 4px; background: var(--surface2); overflow: hidden; }
|
| 93 |
+
.pop-bar-fill { height: 100%; border-radius: 4px; }
|
| 94 |
+
|
| 95 |
+
#reward-chart { width: 100%; height: 120px; }
|
| 96 |
+
::-webkit-scrollbar { width: 6px; }
|
| 97 |
+
::-webkit-scrollbar-track { background: transparent; }
|
| 98 |
+
::-webkit-scrollbar-thumb { background: var(--border); border-radius: 3px; }
|
| 99 |
+
|
| 100 |
+
.conclusion-card { background: var(--surface2); border: 1px solid var(--border); border-radius: 10px; padding: 14px 16px; margin-bottom: 12px; }
|
| 101 |
+
.conclusion-card .cc-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 8px; }
|
| 102 |
+
.cc-type { font-size: 11px; padding: 2px 10px; border-radius: 4px; font-weight: 600; text-transform: uppercase; letter-spacing: .5px; }
|
| 103 |
+
.cc-type.causal { background: rgba(244,114,182,.15); color: var(--pink); }
|
| 104 |
+
.cc-type.correlative { background: rgba(96,165,250,.15); color: var(--blue); }
|
| 105 |
+
.cc-type.descriptive { background: rgba(139,144,165,.15); color: var(--text-dim); }
|
| 106 |
+
.cc-conf { font-family: 'JetBrains Mono', monospace; font-size: 13px; font-weight: 600; }
|
| 107 |
+
.cc-claim { font-size: 14px; margin-bottom: 8px; line-height: 1.5; }
|
| 108 |
+
.cc-section-label { font-size: 10px; color: var(--text-dim); text-transform: uppercase; letter-spacing: .5px; margin-bottom: 3px; margin-top: 8px; }
|
| 109 |
+
|
| 110 |
+
/* ── control panel ────────────────────────────── */
|
| 111 |
+
.control-panel { background: var(--surface); border: 1px solid var(--border); border-radius: 12px; margin: 20px 28px 0; padding: 18px 20px; }
|
| 112 |
+
.control-panel summary { cursor: pointer; font-size: 13px; font-weight: 600; color: var(--accent); }
|
| 113 |
+
.control-panel[open] summary { margin-bottom: 14px; }
|
| 114 |
+
.form-row { display: flex; gap: 12px; margin-bottom: 10px; flex-wrap: wrap; align-items: end; }
|
| 115 |
+
.form-field { display: flex; flex-direction: column; gap: 4px; }
|
| 116 |
+
.form-field label { font-size: 11px; color: var(--text-dim); text-transform: uppercase; letter-spacing: .5px; }
|
| 117 |
+
.form-field input, .form-field textarea, .form-field select {
|
| 118 |
+
background: var(--surface2); border: 1px solid var(--border); border-radius: 6px;
|
| 119 |
+
color: var(--text); padding: 7px 10px; font-size: 13px; font-family: inherit; outline: none;
|
| 120 |
+
}
|
| 121 |
+
.form-field input:focus, .form-field textarea:focus, .form-field select:focus { border-color: var(--accent); }
|
| 122 |
+
.form-field textarea { min-height: 60px; resize: vertical; }
|
| 123 |
+
|
| 124 |
+
/* ── final report ─────────────────────────────── */
|
| 125 |
+
.report-overlay { display: none; position: fixed; inset: 0; z-index: 100; background: rgba(12,14,20,.85); backdrop-filter: blur(6px); overflow-y: auto; padding: 40px 20px; }
|
| 126 |
+
.report-overlay.visible { display: flex; justify-content: center; align-items: flex-start; }
|
| 127 |
+
.report-card { background: var(--surface); border: 1px solid var(--border); border-radius: 16px; padding: 32px 36px; max-width: 900px; width: 100%; }
|
| 128 |
+
.report-card h2 { font-size: 22px; font-weight: 700; margin-bottom: 4px; color: var(--text); text-transform: none; letter-spacing: normal; }
|
| 129 |
+
.report-card .subtitle { font-size: 13px; color: var(--text-dim); margin-bottom: 20px; }
|
| 130 |
+
.report-section { margin-bottom: 20px; }
|
| 131 |
+
.report-section h3 { font-size: 12px; color: var(--accent); text-transform: uppercase; letter-spacing: 1px; margin-bottom: 8px; }
|
| 132 |
+
.comparison-row { display: flex; gap: 20px; margin-bottom: 16px; }
|
| 133 |
+
.comparison-col { flex: 1; }
|
| 134 |
+
.comparison-col h4 { font-size: 11px; color: var(--text-dim); text-transform: uppercase; margin-bottom: 6px; }
|
| 135 |
+
|
| 136 |
+
.pulse { animation: pulse 1.5s ease-in-out infinite; }
|
| 137 |
+
@keyframes pulse { 0%,100% { opacity: 1; } 50% { opacity: .5; } }
|
| 138 |
+
</style>
|
| 139 |
+
</head>
|
| 140 |
+
<body>
|
| 141 |
+
|
| 142 |
+
<div class="header">
|
| 143 |
+
<h1><span>BioExp</span> Agent Dashboard</h1>
|
| 144 |
+
<div class="header-right">
|
| 145 |
+
<span id="thinking-badge" class="mono" style="font-size:11px;color:var(--accent2);display:none">REASONING ON</span>
|
| 146 |
+
<span id="step-label" class="mono" style="font-size:13px;color:var(--text-dim)">Step 0</span>
|
| 147 |
+
<span id="status-pill" class="status-pill waiting">Waiting</span>
|
| 148 |
+
<button class="btn primary" onclick="doRestart()">Restart</button>
|
| 149 |
+
<button class="btn" onclick="showReport()">Report</button>
|
| 150 |
+
</div>
|
| 151 |
+
</div>
|
| 152 |
+
|
| 153 |
+
<!-- Control Panel (collapsible) -->
|
| 154 |
+
<details class="control-panel" id="control-panel">
|
| 155 |
+
<summary>New Task / Custom Ground Truth</summary>
|
| 156 |
+
<div class="form-row">
|
| 157 |
+
<div class="form-field" style="flex:2">
|
| 158 |
+
<label>Scenario (leave blank for random)</label>
|
| 159 |
+
<select id="f-scenario"><option value="">— random —</option></select>
|
| 160 |
+
</div>
|
| 161 |
+
<div class="form-field" style="flex:1">
|
| 162 |
+
<label>True Markers (comma-separated)</label>
|
| 163 |
+
<input id="f-markers" placeholder="e.g. MYH7, TNNT2, ACTA1" />
|
| 164 |
+
</div>
|
| 165 |
+
<div class="form-field" style="flex:1">
|
| 166 |
+
<label>Causal Mechanisms (comma-separated)</label>
|
| 167 |
+
<input id="f-mechanisms" placeholder="e.g. sarcomere dysfunction" />
|
| 168 |
+
</div>
|
| 169 |
+
</div>
|
| 170 |
+
<div class="form-row">
|
| 171 |
+
<div class="form-field" style="flex:2">
|
| 172 |
+
<label>True Pathways (name:score, comma-sep)</label>
|
| 173 |
+
<input id="f-pathways" placeholder="e.g. Wnt_signaling:0.8, MAPK:0.6" />
|
| 174 |
+
</div>
|
| 175 |
+
<div class="form-field">
|
| 176 |
+
<button class="btn primary" onclick="doCustomRun()">Run with Ground Truth</button>
|
| 177 |
+
</div>
|
| 178 |
+
</div>
|
| 179 |
+
</details>
|
| 180 |
+
|
| 181 |
+
<div class="grid">
|
| 182 |
+
<div class="card span2" id="card-task">
|
| 183 |
+
<h2>Task</h2>
|
| 184 |
+
<div id="task-statement" style="font-size:15px;font-weight:500;margin-bottom:8px;">—</div>
|
| 185 |
+
<div id="task-meta" style="font-size:13px;color:var(--text-dim)"></div>
|
| 186 |
+
</div>
|
| 187 |
+
|
| 188 |
+
<div class="card">
|
| 189 |
+
<h2>Reward</h2>
|
| 190 |
+
<div id="reward-value" class="mono" style="font-size:32px;font-weight:700;margin-bottom:6px;">0.000</div>
|
| 191 |
+
<canvas id="reward-chart"></canvas>
|
| 192 |
+
</div>
|
| 193 |
+
|
| 194 |
+
<div class="card span3"><h2>Resources</h2><div class="gauge-row" id="gauges"></div></div>
|
| 195 |
+
|
| 196 |
+
<div class="card span2" style="max-height:460px;overflow-y:auto">
|
| 197 |
+
<h2>Pipeline History <span style="color:var(--accent);font-size:10px">OBSERVABLE</span></h2>
|
| 198 |
+
<div class="timeline" id="timeline"></div>
|
| 199 |
+
</div>
|
| 200 |
+
|
| 201 |
+
<div class="card">
|
| 202 |
+
<h2>Current Action</h2>
|
| 203 |
+
<table class="mini-table" id="action-table"><tbody></tbody></table>
|
| 204 |
+
<h2 style="margin-top:14px" id="thinking-header" style="display:none">Model Reasoning</h2>
|
| 205 |
+
<div class="code-block" id="model-thinking" style="display:none;border-color:rgba(124,108,240,.2);max-height:140px;margin-bottom:10px">—</div>
|
| 206 |
+
<h2 style="margin-top:10px">Model Raw Output</h2>
|
| 207 |
+
<div class="code-block" id="model-response">—</div>
|
| 208 |
+
</div>
|
| 209 |
+
|
| 210 |
+
<div class="card">
|
| 211 |
+
<h2>Discovered Markers <span style="color:var(--accent);font-size:10px">OBSERVABLE</span></h2>
|
| 212 |
+
<div class="tag-list" id="markers-list"><span class="tag" style="color:var(--text-dim)">none yet</span></div>
|
| 213 |
+
<h2 style="margin-top:14px">Candidate Mechanisms</h2>
|
| 214 |
+
<div class="tag-list" id="mechanisms-list"><span class="tag" style="color:var(--text-dim)">none yet</span></div>
|
| 215 |
+
</div>
|
| 216 |
+
|
| 217 |
+
<div class="card">
|
| 218 |
+
<h2>Rule Violations</h2>
|
| 219 |
+
<div id="violations" style="font-size:13px;color:var(--text-dim)">None</div>
|
| 220 |
+
<h2 style="margin-top:14px">Uncertainty Summary</h2>
|
| 221 |
+
<table class="mini-table" id="uncertainty-table"><tbody></tbody></table>
|
| 222 |
+
<h2 style="margin-top:14px">Reward Breakdown</h2>
|
| 223 |
+
<table class="mini-table" id="reward-breakdown-table"><tbody></tbody></table>
|
| 224 |
+
</div>
|
| 225 |
+
|
| 226 |
+
<div class="card">
|
| 227 |
+
<h2>Latest Output</h2>
|
| 228 |
+
<table class="mini-table" id="output-table"><tbody></tbody></table>
|
| 229 |
+
<div class="code-block" id="output-data" style="margin-top:10px;max-height:140px">—</div>
|
| 230 |
+
</div>
|
| 231 |
+
|
| 232 |
+
<div class="card span3" id="card-conclusions" style="display:none;border-color:rgba(76,222,128,.25)">
|
| 233 |
+
<h2 style="color:var(--green)">Synthesized Conclusions</h2>
|
| 234 |
+
<div id="conclusions-list"></div>
|
| 235 |
+
</div>
|
| 236 |
+
|
| 237 |
+
<!-- Ground Truth Comparison (shown when episode done + has conclusions) -->
|
| 238 |
+
<div class="card span3" id="card-gt-comparison" style="display:none;border-color:rgba(251,191,36,.25)">
|
| 239 |
+
<h2 style="color:var(--amber)">Ground Truth Comparison</h2>
|
| 240 |
+
<div class="comparison-row">
|
| 241 |
+
<div class="comparison-col">
|
| 242 |
+
<h4>Agent's Markers</h4>
|
| 243 |
+
<div class="tag-list" id="gt-agent-markers"></div>
|
| 244 |
+
</div>
|
| 245 |
+
<div class="comparison-col">
|
| 246 |
+
<h4>True Markers</h4>
|
| 247 |
+
<div class="tag-list" id="gt-true-markers"></div>
|
| 248 |
+
</div>
|
| 249 |
+
</div>
|
| 250 |
+
<div class="comparison-row">
|
| 251 |
+
<div class="comparison-col">
|
| 252 |
+
<h4>Agent's Mechanisms</h4>
|
| 253 |
+
<div class="tag-list" id="gt-agent-mechs"></div>
|
| 254 |
+
</div>
|
| 255 |
+
<div class="comparison-col">
|
| 256 |
+
<h4>True Mechanisms</h4>
|
| 257 |
+
<div class="tag-list" id="gt-true-mechs"></div>
|
| 258 |
+
</div>
|
| 259 |
+
</div>
|
| 260 |
+
<div id="gt-score" style="margin-top:8px;font-size:14px;font-weight:600"></div>
|
| 261 |
+
</div>
|
| 262 |
+
|
| 263 |
+
<div class="card" style="border-color:rgba(124,108,240,.25)">
|
| 264 |
+
<h2 style="color:var(--accent2)">Cell Populations <span style="font-size:10px">HIDDEN</span></h2>
|
| 265 |
+
<div id="populations"></div>
|
| 266 |
+
</div>
|
| 267 |
+
<div class="card" style="border-color:rgba(124,108,240,.25)">
|
| 268 |
+
<h2 style="color:var(--accent2)">Ground Truth <span style="font-size:10px">HIDDEN</span></h2>
|
| 269 |
+
<div style="margin-bottom:8px"><span style="font-size:11px;color:var(--text-dim);text-transform:uppercase">True Markers</span><div class="tag-list" id="true-markers" style="margin-top:4px"></div></div>
|
| 270 |
+
<div style="margin-bottom:8px"><span style="font-size:11px;color:var(--text-dim);text-transform:uppercase">Causal Mechanisms</span><div class="tag-list" id="true-mechanisms" style="margin-top:4px"></div></div>
|
| 271 |
+
<div><span style="font-size:11px;color:var(--text-dim);text-transform:uppercase">Top Pathways</span><table class="mini-table" id="pathways-table" style="margin-top:4px"><tbody></tbody></table></div>
|
| 272 |
+
</div>
|
| 273 |
+
<div class="card" style="border-color:rgba(124,108,240,.25)">
|
| 274 |
+
<h2 style="color:var(--accent2)">Technical State <span style="font-size:10px">HIDDEN</span></h2>
|
| 275 |
+
<table class="mini-table" id="technical-table"><tbody></tbody></table>
|
| 276 |
+
<h2 style="margin-top:14px;color:var(--accent2)">Failure Conditions <span style="font-size:10px">HIDDEN</span></h2>
|
| 277 |
+
<div class="tag-list" id="failure-conditions"></div>
|
| 278 |
+
</div>
|
| 279 |
+
<div class="card span3" style="border-color:rgba(124,108,240,.25)">
|
| 280 |
+
<h2 style="color:var(--accent2)">Experiment Progress <span style="font-size:10px">HIDDEN</span></h2>
|
| 281 |
+
<div class="progress-grid" id="progress-grid"></div>
|
| 282 |
+
</div>
|
| 283 |
+
</div>
|
| 284 |
+
|
| 285 |
+
<!-- Final Report Overlay -->
|
| 286 |
+
<div class="report-overlay" id="report-overlay" onclick="if(event.target===this)hideReport()">
|
| 287 |
+
<div class="report-card" id="report-content"></div>
|
| 288 |
+
</div>
|
| 289 |
+
|
| 290 |
+
<script>
|
| 291 |
+
const POLL_MS = 1200;
|
| 292 |
+
const POP_COLORS = ['#5ce0d8','#7c6cf0','#f472b6','#60a5fa','#fbbf24','#4ade80','#f87171','#c084fc','#fb923c','#38bdf8'];
|
| 293 |
+
let rewardHistory = [];
|
| 294 |
+
let lastTimestamp = 0;
|
| 295 |
+
let latestState = null;
|
| 296 |
+
|
| 297 |
+
function $(id) { return document.getElementById(id); }
|
| 298 |
+
function setHTML(id, html) { $(id).innerHTML = html; }
|
| 299 |
+
function tagsHTML(arr, cls) {
|
| 300 |
+
if (!arr || !arr.length) return '<span class="tag" style="color:var(--text-dim)">—</span>';
|
| 301 |
+
return arr.map(t => `<span class="tag ${cls||''}">${esc(t)}</span>`).join('');
|
| 302 |
+
}
|
| 303 |
+
function esc(s) { if (s == null) return '—'; const d = document.createElement('div'); d.textContent = String(s); return d.innerHTML; }
|
| 304 |
+
function pct(used, total) { if (!total) return 0; return Math.min(100, Math.max(0, (used / total) * 100)); }
|
| 305 |
+
function gaugeColor(p) { return p < 50 ? 'var(--green)' : p < 80 ? 'var(--amber)' : 'var(--red)'; }
|
| 306 |
+
function fmt(n) { if (n == null) return '0'; return Number(n).toLocaleString('en-US', { maximumFractionDigits: 0 }); }
|
| 307 |
+
function uniqueItems(arr) {
|
| 308 |
+
const out = [];
|
| 309 |
+
const seen = new Set();
|
| 310 |
+
(arr || []).forEach(item => {
|
| 311 |
+
if (item == null) return;
|
| 312 |
+
const text = String(item).trim();
|
| 313 |
+
if (!text) return;
|
| 314 |
+
const key = text.toUpperCase();
|
| 315 |
+
if (seen.has(key)) return;
|
| 316 |
+
seen.add(key);
|
| 317 |
+
out.push(text);
|
| 318 |
+
});
|
| 319 |
+
return out;
|
| 320 |
+
}
|
| 321 |
+
function gauge(label, value, pctVal, inv) {
|
| 322 |
+
let bar = '';
|
| 323 |
+
if (pctVal != null) { const c = inv ? gaugeColor(100-pctVal) : gaugeColor(pctVal); bar = `<div class="gauge-bar"><div class="gauge-bar-fill" style="width:${pctVal.toFixed(1)}%;background:${c}"></div></div>`; }
|
| 324 |
+
return `<div class="gauge"><div class="gauge-label">${label}</div><div class="gauge-value mono">${value}</div>${bar}</div>`;
|
| 325 |
+
}
|
| 326 |
+
function miniRows(obj) { return Object.entries(obj).map(([k,v]) => `<tr><td>${esc(k)}</td><td>${esc(v)}</td></tr>`).join(''); }
|
| 327 |
+
|
| 328 |
+
function drawRewardChart(canvas, data) {
|
| 329 |
+
const ctx = canvas.getContext('2d'); const W = canvas.width = canvas.offsetWidth * 2; const H = canvas.height = canvas.offsetHeight * 2;
|
| 330 |
+
ctx.clearRect(0, 0, W, H); if (data.length < 2) return;
|
| 331 |
+
const vals = data.map(d => d.v); const minV = Math.min(0, ...vals); const maxV = Math.max(0.1, ...vals); const range = maxV - minV || 1; const pad = 8;
|
| 332 |
+
ctx.strokeStyle = 'rgba(92,224,216,.4)'; ctx.lineWidth = 2; ctx.beginPath();
|
| 333 |
+
const yZ = H - pad - ((0 - minV) / range) * (H - 2*pad); ctx.moveTo(pad, yZ); ctx.lineTo(W-pad, yZ); ctx.stroke();
|
| 334 |
+
ctx.strokeStyle = '#5ce0d8'; ctx.lineWidth = 3; ctx.beginPath();
|
| 335 |
+
data.forEach((d,i) => { const x = pad+(i/(data.length-1))*(W-2*pad); const y = H-pad-((d.v-minV)/range)*(H-2*pad); i===0?ctx.moveTo(x,y):ctx.lineTo(x,y); }); ctx.stroke();
|
| 336 |
+
data.forEach((d,i) => { const x = pad+(i/(data.length-1))*(W-2*pad); const y = H-pad-((d.v-minV)/range)*(H-2*pad); ctx.fillStyle = d.v>=0?'#4ade80':'#f87171'; ctx.beginPath(); ctx.arc(x,y,5,0,Math.PI*2); ctx.fill(); });
|
| 337 |
+
}
|
| 338 |
+
|
| 339 |
+
function comparedTags(agentArr, trueArr, cls) {
|
| 340 |
+
if (!agentArr || !agentArr.length) return '<span class="tag" style="color:var(--text-dim)">—</span>';
|
| 341 |
+
const trueSet = new Set((trueArr||[]).map(t => t.toUpperCase()));
|
| 342 |
+
return agentArr.map(t => {
|
| 343 |
+
const hit = trueSet.has(t.toUpperCase());
|
| 344 |
+
return `<span class="tag ${cls} ${hit?'match':'miss'}">${esc(t)} ${hit?'✓':'✗'}</span>`;
|
| 345 |
+
}).join('');
|
| 346 |
+
}
|
| 347 |
+
|
| 348 |
+
// ── API actions ──
|
| 349 |
+
async function doRestart() {
|
| 350 |
+
rewardHistory = []; lastTimestamp = 0;
|
| 351 |
+
await fetch('/api/restart', { method: 'POST' });
|
| 352 |
+
}
|
| 353 |
+
|
| 354 |
+
async function doCustomRun() {
|
| 355 |
+
const scenario = $('f-scenario').value || undefined;
|
| 356 |
+
const markers = $('f-markers').value.split(',').map(s=>s.trim()).filter(Boolean);
|
| 357 |
+
const mechs = $('f-mechanisms').value.split(',').map(s=>s.trim()).filter(Boolean);
|
| 358 |
+
const pwRaw = $('f-pathways').value.split(',').map(s=>s.trim()).filter(Boolean);
|
| 359 |
+
const pathways = {};
|
| 360 |
+
pwRaw.forEach(p => { const [k,v] = p.split(':'); if (k && v) pathways[k.trim()] = parseFloat(v); });
|
| 361 |
+
const gt = {};
|
| 362 |
+
if (markers.length) gt.true_markers = markers;
|
| 363 |
+
if (mechs.length) gt.causal_mechanisms = mechs;
|
| 364 |
+
if (Object.keys(pathways).length) gt.true_pathways = pathways;
|
| 365 |
+
rewardHistory = []; lastTimestamp = 0;
|
| 366 |
+
await fetch('/api/run', { method: 'POST', headers: {'Content-Type':'application/json'}, body: JSON.stringify({ scenario_name: scenario, ground_truth: Object.keys(gt).length ? gt : undefined }) });
|
| 367 |
+
}
|
| 368 |
+
|
| 369 |
+
function showReport() {
|
| 370 |
+
const s = latestState; if (!s) return;
|
| 371 |
+
const rc = $('report-content');
|
| 372 |
+
const t = s.task || {};
|
| 373 |
+
const lat = s.latent || {};
|
| 374 |
+
const conc = s.conclusions || [];
|
| 375 |
+
const trueM = lat.true_markers || [];
|
| 376 |
+
const trueMech = lat.causal_mechanisms || [];
|
| 377 |
+
const conclusionMarkers = uniqueItems(conc.flatMap(c => c.top_markers || []));
|
| 378 |
+
const conclusionMechanisms = uniqueItems(conc.flatMap(c => c.causal_mechanisms || []));
|
| 379 |
+
const agentM = uniqueItems((s.discovered_markers && s.discovered_markers.length) ? s.discovered_markers : conclusionMarkers);
|
| 380 |
+
const agentMechanisms = uniqueItems((s.candidate_mechanisms && s.candidate_mechanisms.length) ? s.candidate_mechanisms : conclusionMechanisms);
|
| 381 |
+
const markerHits = agentM.filter(m => trueM.some(t => t.toUpperCase() === m.toUpperCase()));
|
| 382 |
+
const r = s.resources || {};
|
| 383 |
+
|
| 384 |
+
let html = `<h2>Experiment Report</h2>
|
| 385 |
+
<div class="subtitle">${esc(t.problem_statement)}</div>
|
| 386 |
+
<div class="report-section"><h3>Summary</h3>
|
| 387 |
+
<table class="mini-table"><tbody>
|
| 388 |
+
<tr><td>Status</td><td>${s.episode_done ? 'Completed' : 'In Progress'}</td></tr>
|
| 389 |
+
<tr><td>Steps</td><td>${s.step}</td></tr>
|
| 390 |
+
<tr><td>Cumulative Reward</td><td style="color:${(s.cumulative_reward||0)>=0?'var(--green)':'var(--red)'}">${((s.cumulative_reward||0)>=0?'+':'')}${(s.cumulative_reward||0).toFixed(3)}</td></tr>
|
| 391 |
+
<tr><td>Budget Used</td><td>$${fmt(r.budget_used)} / $${fmt((r.budget_used||0)+(r.budget_remaining||0))}</td></tr>
|
| 392 |
+
<tr><td>Time Used</td><td>${(r.time_used_days||0).toFixed(0)}d / ${((r.time_used_days||0)+(r.time_remaining_days||0)).toFixed(0)}d</td></tr>
|
| 393 |
+
<tr><td>Markers Found</td><td>${agentM.length} (${markerHits.length} match ground truth)</td></tr>
|
| 394 |
+
</tbody></table>
|
| 395 |
+
</div>`;
|
| 396 |
+
|
| 397 |
+
if (conc.length) {
|
| 398 |
+
html += `<div class="report-section"><h3>Conclusions</h3>`;
|
| 399 |
+
conc.forEach(c => {
|
| 400 |
+
html += `<div class="conclusion-card"><div class="cc-header"><span class="cc-type ${(c.claim_type||'').toLowerCase()}">${esc(c.claim_type)}</span><span class="cc-conf" style="color:${c.confidence>=.7?'var(--green)':c.confidence>=.4?'var(--amber)':'var(--red)'}">${((c.confidence||0)*100).toFixed(0)}%</span></div>`;
|
| 401 |
+
if (c.claim) html += `<div class="cc-claim">${esc(c.claim)}</div>`;
|
| 402 |
+
if (c.top_markers?.length) html += `<div class="cc-section-label">Top Markers</div><div class="tag-list">${c.top_markers.map(m=>`<span class="tag green">${esc(m)}</span>`).join('')}</div>`;
|
| 403 |
+
if (c.causal_mechanisms?.length) html += `<div class="cc-section-label">Causal Mechanisms</div><div class="tag-list">${c.causal_mechanisms.map(m=>`<span class="tag pink">${esc(m)}</span>`).join('')}</div>`;
|
| 404 |
+
if (c.predicted_pathways && Object.keys(c.predicted_pathways).length) html += `<div class="cc-section-label">Predicted Pathways</div><table class="mini-table"><tbody>${Object.entries(c.predicted_pathways).map(([k,v])=>`<tr><td>${esc(k)}</td><td>${Number(v).toFixed(3)}</td></tr>`).join('')}</tbody></table>`;
|
| 405 |
+
html += `</div>`;
|
| 406 |
+
});
|
| 407 |
+
html += `</div>`;
|
| 408 |
+
}
|
| 409 |
+
|
| 410 |
+
html += `<div class="report-section"><h3>Ground Truth Comparison</h3>
|
| 411 |
+
<div class="comparison-row"><div class="comparison-col"><h4>Agent's Markers</h4><div class="tag-list">${comparedTags(agentM, trueM, 'green')}</div></div>
|
| 412 |
+
<div class="comparison-col"><h4>True Markers</h4><div class="tag-list">${tagsHTML(trueM,'green')}</div></div></div>
|
| 413 |
+
<div class="comparison-row"><div class="comparison-col"><h4>Agent's Mechanisms</h4><div class="tag-list">${comparedTags(agentMechanisms, trueMech, 'pink')}</div></div>
|
| 414 |
+
<div class="comparison-col"><h4>True Mechanisms</h4><div class="tag-list">${tagsHTML(trueMech,'pink')}</div></div></div>
|
| 415 |
+
</div>`;
|
| 416 |
+
|
| 417 |
+
const hist = s.pipeline_history || [];
|
| 418 |
+
if (hist.length) {
|
| 419 |
+
html += `<div class="report-section"><h3>Pipeline Steps</h3><table class="mini-table"><tbody>`;
|
| 420 |
+
hist.forEach(h => { html += `<tr><td>${h.success?'✓':'✗'} ${esc(h.action_type)}</td><td>${esc(h.output_summary)} · q=${h.quality_score}</td></tr>`; });
|
| 421 |
+
html += `</tbody></table></div>`;
|
| 422 |
+
}
|
| 423 |
+
|
| 424 |
+
html += `<div style="margin-top:20px;text-align:right"><button class="btn" onclick="hideReport()">Close</button> <button class="btn primary" onclick="doRestart();hideReport()">New Run</button></div>`;
|
| 425 |
+
rc.innerHTML = html;
|
| 426 |
+
$('report-overlay').classList.add('visible');
|
| 427 |
+
}
|
| 428 |
+
function hideReport() { $('report-overlay').classList.remove('visible'); }
|
| 429 |
+
|
| 430 |
+
function renderState(s) {
|
| 431 |
+
latestState = s;
|
| 432 |
+
if (s.error) { $('status-pill').className='status-pill waiting'; $('status-pill').textContent='Waiting'; $('task-statement').textContent=s.error; return; }
|
| 433 |
+
|
| 434 |
+
const pill = $('status-pill');
|
| 435 |
+
if (s.episode_done) { pill.className='status-pill done'; pill.textContent='Done'; } else { pill.className='status-pill live'; pill.textContent='Live'; }
|
| 436 |
+
$('step-label').textContent = `Step ${s.step}`;
|
| 437 |
+
|
| 438 |
+
if (s.thinking_enabled) { $('thinking-badge').style.display = ''; } else { $('thinking-badge').style.display = 'none'; }
|
| 439 |
+
|
| 440 |
+
const t = s.task || {};
|
| 441 |
+
$('task-statement').textContent = t.problem_statement || '—';
|
| 442 |
+
$('task-meta').innerHTML = [t.organism, t.tissue, t.modality, t.conditions ? t.conditions.join(' vs ') : null].filter(Boolean).map(v => `<span class="tag">${esc(v)}</span>`).join(' ');
|
| 443 |
+
|
| 444 |
+
const cum = s.cumulative_reward || 0;
|
| 445 |
+
$('reward-value').textContent = (cum >= 0 ? '+' : '') + cum.toFixed(3);
|
| 446 |
+
$('reward-value').style.color = cum >= 0 ? 'var(--green)' : 'var(--red)';
|
| 447 |
+
if (s.timestamp !== lastTimestamp && s.step > 0) { rewardHistory.push({ step: s.step, v: cum }); lastTimestamp = s.timestamp; }
|
| 448 |
+
drawRewardChart($('reward-chart'), rewardHistory);
|
| 449 |
+
|
| 450 |
+
const r = s.resources || {};
|
| 451 |
+
const bT = (r.budget_used||0)+(r.budget_remaining||0), tT = (r.time_used_days||0)+(r.time_remaining_days||0);
|
| 452 |
+
const bP = pct(r.budget_used, bT), tP = pct(r.time_used_days, tT);
|
| 453 |
+
$('gauges').innerHTML = [gauge('Budget Used',`$${fmt(r.budget_used)}`,bP), gauge('Budget Left',`$${fmt(r.budget_remaining)}`,100-bP,true), gauge('Time Used',`${(r.time_used_days||0).toFixed(0)}d`,tP), gauge('Time Left',`${(r.time_remaining_days||0).toFixed(0)}d`,100-tP,true), gauge('Samples',String(r.samples_consumed||0),null), gauge('Compute',`${(r.compute_hours_used||0).toFixed(1)}h`,null)].join('');
|
| 454 |
+
|
| 455 |
+
const hist = s.pipeline_history || [];
|
| 456 |
+
$('timeline').innerHTML = hist.length ? hist.map(h => `<div class="timeline-item ${!h.success?'fail':''}"><div class="tl-action">${esc(h.action_type)}${h.method?` <span style="color:var(--text-dim);font-weight:400;font-size:12px">${esc(h.method)}</span>`:''}</div><div class="tl-meta">${h.success?'✓':'✗'} ${esc(h.output_summary)} · q=${h.quality_score} · $${fmt(h.resource_cost)} · ${h.time_cost_days}d</div></div>`).join('') : '<div style="color:var(--text-dim);font-size:13px">No steps yet</div>';
|
| 457 |
+
|
| 458 |
+
const a = s.current_action;
|
| 459 |
+
if (a) { $('action-table').querySelector('tbody').innerHTML = miniRows({'Type':a.action_type,'Method':a.method||'—','Confidence':a.confidence?.toFixed(2),'Justification':a.justification||'—','Fallback?':s.used_fallback?'YES':'no'}); }
|
| 460 |
+
|
| 461 |
+
if (s.model_thinking) { $('model-thinking').style.display=''; $('model-thinking').textContent = s.model_thinking; } else { $('model-thinking').style.display='none'; }
|
| 462 |
+
$('model-response').textContent = s.model_response_raw || '—';
|
| 463 |
+
|
| 464 |
+
setHTML('markers-list', tagsHTML(s.discovered_markers, 'green'));
|
| 465 |
+
setHTML('mechanisms-list', tagsHTML(s.candidate_mechanisms, 'pink'));
|
| 466 |
+
|
| 467 |
+
const v = s.rule_violations || [];
|
| 468 |
+
$('violations').innerHTML = v.length ? v.map(x=>`<div class="tag red" style="margin-bottom:4px">${esc(x)}</div>`).join('') : '<span style="color:var(--text-dim)">None</span>';
|
| 469 |
+
$('uncertainty-table').querySelector('tbody').innerHTML = miniRows(s.uncertainty_summary || {});
|
| 470 |
+
const rb = s.reward_breakdown || {};
|
| 471 |
+
$('reward-breakdown-table').querySelector('tbody').innerHTML = miniRows(Object.fromEntries(Object.entries(rb).map(([k,v])=>[k,(v>=0?'+':'')+v.toFixed(4)])));
|
| 472 |
+
|
| 473 |
+
const lo = s.latest_output;
|
| 474 |
+
if (lo) { $('output-table').querySelector('tbody').innerHTML = miniRows({'Summary':lo.summary,'Success':lo.success?'✓':'✗','Quality':lo.quality_score,'Uncertainty':lo.uncertainty,'Warnings':(lo.warnings||[]).join('; ')||'—'}); $('output-data').textContent = lo.data_preview||'—'; }
|
| 475 |
+
|
| 476 |
+
const conc = s.conclusions || [];
|
| 477 |
+
if (conc.length) {
|
| 478 |
+
$('card-conclusions').style.display = '';
|
| 479 |
+
$('conclusions-list').innerHTML = conc.map(c => {
|
| 480 |
+
const confColor = c.confidence>=.7?'var(--green)':c.confidence>=.4?'var(--amber)':'var(--red)';
|
| 481 |
+
let h = `<div class="conclusion-card"><div class="cc-header"><span class="cc-type ${(c.claim_type||'').toLowerCase()}">${esc(c.claim_type||'unknown')}</span><span class="cc-conf" style="color:${confColor}">${((c.confidence||0)*100).toFixed(0)}%</span></div>`;
|
| 482 |
+
if (c.claim) h += `<div class="cc-claim">${esc(c.claim)}</div>`;
|
| 483 |
+
if (c.top_markers?.length) h += `<div class="cc-section-label">Top Markers</div><div class="tag-list">${c.top_markers.map(m=>`<span class="tag green">${esc(m)}</span>`).join('')}</div>`;
|
| 484 |
+
if (c.causal_mechanisms?.length) h += `<div class="cc-section-label">Causal Mechanisms</div><div class="tag-list">${c.causal_mechanisms.map(m=>`<span class="tag pink">${esc(m)}</span>`).join('')}</div>`;
|
| 485 |
+
if (c.predicted_pathways && Object.keys(c.predicted_pathways).length) h += `<div class="cc-section-label">Predicted Pathways</div><table class="mini-table"><tbody>${Object.entries(c.predicted_pathways).map(([k,v])=>`<tr><td>${esc(k)}</td><td>${Number(v).toFixed(3)}</td></tr>`).join('')}</tbody></table>`;
|
| 486 |
+
return h + '</div>';
|
| 487 |
+
}).join('');
|
| 488 |
+
} else { $('card-conclusions').style.display = 'none'; }
|
| 489 |
+
|
| 490 |
+
// Ground truth comparison (visible when done or has conclusions)
|
| 491 |
+
const lat = s.latent;
|
| 492 |
+
if ((s.episode_done || conc.length) && lat) {
|
| 493 |
+
const conclusionMarkers = uniqueItems(conc.flatMap(c => c.top_markers || []));
|
| 494 |
+
const conclusionMechanisms = uniqueItems(conc.flatMap(c => c.causal_mechanisms || []));
|
| 495 |
+
const comparisonMarkers = uniqueItems((s.discovered_markers && s.discovered_markers.length) ? s.discovered_markers : conclusionMarkers);
|
| 496 |
+
const comparisonMechanisms = uniqueItems((s.candidate_mechanisms && s.candidate_mechanisms.length) ? s.candidate_mechanisms : conclusionMechanisms);
|
| 497 |
+
$('card-gt-comparison').style.display = '';
|
| 498 |
+
setHTML('gt-agent-markers', comparedTags(comparisonMarkers, lat.true_markers, 'green'));
|
| 499 |
+
setHTML('gt-true-markers', tagsHTML(lat.true_markers, 'green'));
|
| 500 |
+
setHTML('gt-agent-mechs', comparedTags(comparisonMechanisms, lat.causal_mechanisms, 'pink'));
|
| 501 |
+
setHTML('gt-true-mechs', tagsHTML(lat.causal_mechanisms, 'pink'));
|
| 502 |
+
const hits = comparisonMarkers.filter(m => (lat.true_markers||[]).some(t => t.toUpperCase()===m.toUpperCase()));
|
| 503 |
+
$('gt-score').innerHTML = `Marker accuracy: <span style="color:var(--accent)">${hits.length}</span> / ${(lat.true_markers||[]).length} true markers recovered`;
|
| 504 |
+
} else { $('card-gt-comparison').style.display = 'none'; }
|
| 505 |
+
|
| 506 |
+
if (!lat) return;
|
| 507 |
+
const pops = lat.cell_populations || [];
|
| 508 |
+
$('populations').innerHTML = pops.map((p,i) => { const c = POP_COLORS[i%POP_COLORS.length]; const w = (p.proportion*100).toFixed(1); return `<div class="pop-bar-container"><div class="pop-bar-label"><span>${esc(p.name)} <span style="color:var(--text-dim);font-size:11px">${p.state}</span></span><span class="mono" style="font-size:12px">${w}%</span></div><div class="pop-bar"><div class="pop-bar-fill" style="width:${w}%;background:${c}"></div></div><div class="tag-list" style="margin-top:3px">${p.marker_genes.map(g=>`<span class="tag" style="font-size:11px">${esc(g)}</span>`).join('')}</div></div>`; }).join('') || '<span style="color:var(--text-dim)">—</span>';
|
| 509 |
+
|
| 510 |
+
setHTML('true-markers', tagsHTML(lat.true_markers, 'green'));
|
| 511 |
+
setHTML('true-mechanisms', tagsHTML(lat.causal_mechanisms, 'pink'));
|
| 512 |
+
const pw = lat.true_pathways || {};
|
| 513 |
+
$('pathways-table').querySelector('tbody').innerHTML = miniRows(Object.fromEntries(Object.entries(pw).slice(0,10).map(([k,v])=>[k,v.toFixed(3)])));
|
| 514 |
+
$('technical-table').querySelector('tbody').innerHTML = miniRows(lat.technical || {});
|
| 515 |
+
setHTML('failure-conditions', tagsHTML(lat.hidden_failure_conditions, 'red'));
|
| 516 |
+
const prog = lat.progress || {};
|
| 517 |
+
const bK = Object.entries(prog).filter(([,v])=>typeof v==='boolean'), nK = Object.entries(prog).filter(([,v])=>typeof v!=='boolean');
|
| 518 |
+
$('progress-grid').innerHTML = bK.map(([k,v])=>`<div class="progress-item"><div class="dot ${v?'done':''}"></div>${k.replace(/_/g,' ')}</div>`).join('') + nK.map(([k,v])=>`<div class="progress-item" style="color:var(--accent)"><span class="mono" style="font-size:11px;margin-right:4px">${v??'—'}</span>${k.replace(/_/g,' ')}</div>`).join('');
|
| 519 |
+
|
| 520 |
+
if (s.episode_done && !reportShownForTimestamp && s.timestamp) { reportShownForTimestamp = s.timestamp; setTimeout(showReport, 800); }
|
| 521 |
+
}
|
| 522 |
+
|
| 523 |
+
let reportShownForTimestamp = null;
|
| 524 |
+
|
| 525 |
+
async function loadScenarios() {
|
| 526 |
+
try {
|
| 527 |
+
const res = await fetch('/api/scenarios');
|
| 528 |
+
const data = await res.json();
|
| 529 |
+
const sel = $('f-scenario');
|
| 530 |
+
(data.scenarios || []).forEach(n => { const o = document.createElement('option'); o.value = n; o.textContent = n; sel.appendChild(o); });
|
| 531 |
+
} catch(e) {}
|
| 532 |
+
}
|
| 533 |
+
|
| 534 |
+
async function poll() {
|
| 535 |
+
try { const res = await fetch('/api/state',{cache:'no-store'}); const data = await res.json(); renderState(data); } catch(e) {}
|
| 536 |
+
setTimeout(poll, POLL_MS);
|
| 537 |
+
}
|
| 538 |
+
|
| 539 |
+
loadScenarios();
|
| 540 |
+
poll();
|
| 541 |
+
</script>
|
| 542 |
+
</body>
|
| 543 |
+
</html>
|
dashboard.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Lightweight dashboard server for the drug-target-validation agent.
|
| 2 |
+
|
| 3 |
+
No external dependencies — uses only the Python standard library.
|
| 4 |
+
|
| 5 |
+
Usage:
|
| 6 |
+
python dashboard.py # serves on http://localhost:8050
|
| 7 |
+
python dashboard.py --port 9000
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
from __future__ import annotations
|
| 11 |
+
|
| 12 |
+
import argparse
|
| 13 |
+
import json
|
| 14 |
+
from http.server import HTTPServer, SimpleHTTPRequestHandler
|
| 15 |
+
from pathlib import Path
|
| 16 |
+
|
| 17 |
+
ROOT = Path(__file__).parent
|
| 18 |
+
STATE_FILE = ROOT / "_dashboard_state.json"
|
| 19 |
+
CMD_FILE = ROOT / "_dashboard_cmd.json"
|
| 20 |
+
DASHBOARD_HTML = ROOT / "dashboard.html"
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class DashboardHandler(SimpleHTTPRequestHandler):
|
| 24 |
+
def do_GET(self):
|
| 25 |
+
if self.path == "/" or self.path == "/index.html":
|
| 26 |
+
self._serve_file(DASHBOARD_HTML, "text/html")
|
| 27 |
+
elif self.path == "/api/state":
|
| 28 |
+
self._serve_state()
|
| 29 |
+
elif self.path == "/api/scenarios":
|
| 30 |
+
self._serve_scenarios()
|
| 31 |
+
else:
|
| 32 |
+
self.send_error(404)
|
| 33 |
+
|
| 34 |
+
def do_POST(self):
|
| 35 |
+
if self.path == "/api/restart":
|
| 36 |
+
self._handle_command({"action": "restart"})
|
| 37 |
+
elif self.path == "/api/run":
|
| 38 |
+
body = self._read_body()
|
| 39 |
+
if body is None:
|
| 40 |
+
return
|
| 41 |
+
body["action"] = "restart"
|
| 42 |
+
self._handle_command(body)
|
| 43 |
+
else:
|
| 44 |
+
self.send_error(404)
|
| 45 |
+
|
| 46 |
+
def do_OPTIONS(self):
|
| 47 |
+
self.send_response(204)
|
| 48 |
+
self.send_header("Access-Control-Allow-Origin", "*")
|
| 49 |
+
self.send_header("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
|
| 50 |
+
self.send_header("Access-Control-Allow-Headers", "Content-Type")
|
| 51 |
+
self.end_headers()
|
| 52 |
+
|
| 53 |
+
def _read_body(self):
|
| 54 |
+
length = int(self.headers.get("Content-Length", 0))
|
| 55 |
+
if length == 0:
|
| 56 |
+
return {}
|
| 57 |
+
raw = self.rfile.read(length)
|
| 58 |
+
try:
|
| 59 |
+
return json.loads(raw)
|
| 60 |
+
except json.JSONDecodeError:
|
| 61 |
+
self._json_response(400, {"error": "Invalid JSON"})
|
| 62 |
+
return None
|
| 63 |
+
|
| 64 |
+
def _handle_command(self, cmd: dict):
|
| 65 |
+
CMD_FILE.write_text(json.dumps(cmd), encoding="utf-8")
|
| 66 |
+
self._json_response(200, {"ok": True, "command": cmd.get("action")})
|
| 67 |
+
|
| 68 |
+
def _serve_state(self):
|
| 69 |
+
self.send_response(200)
|
| 70 |
+
self.send_header("Content-Type", "application/json")
|
| 71 |
+
self.send_header("Access-Control-Allow-Origin", "*")
|
| 72 |
+
self.send_header("Cache-Control", "no-cache")
|
| 73 |
+
self.end_headers()
|
| 74 |
+
try:
|
| 75 |
+
data = STATE_FILE.read_bytes()
|
| 76 |
+
except FileNotFoundError:
|
| 77 |
+
data = b'{"error": "No state file yet. Run run_agent.py to start an episode."}'
|
| 78 |
+
self.wfile.write(data)
|
| 79 |
+
|
| 80 |
+
def _serve_scenarios(self):
|
| 81 |
+
try:
|
| 82 |
+
from server.tasks.scenarios import SCENARIO_LIBRARY
|
| 83 |
+
names = [s.name for s in SCENARIO_LIBRARY]
|
| 84 |
+
except Exception:
|
| 85 |
+
names = []
|
| 86 |
+
self._json_response(200, {"scenarios": names})
|
| 87 |
+
|
| 88 |
+
def _serve_file(self, path: Path, content_type: str):
|
| 89 |
+
try:
|
| 90 |
+
body = path.read_bytes()
|
| 91 |
+
except FileNotFoundError:
|
| 92 |
+
self.send_error(404, f"{path.name} not found")
|
| 93 |
+
return
|
| 94 |
+
self.send_response(200)
|
| 95 |
+
self.send_header("Content-Type", content_type)
|
| 96 |
+
self.send_header("Content-Length", str(len(body)))
|
| 97 |
+
self.end_headers()
|
| 98 |
+
self.wfile.write(body)
|
| 99 |
+
|
| 100 |
+
def _json_response(self, code: int, obj: dict):
|
| 101 |
+
body = json.dumps(obj).encode()
|
| 102 |
+
self.send_response(code)
|
| 103 |
+
self.send_header("Content-Type", "application/json")
|
| 104 |
+
self.send_header("Access-Control-Allow-Origin", "*")
|
| 105 |
+
self.send_header("Content-Length", str(len(body)))
|
| 106 |
+
self.end_headers()
|
| 107 |
+
self.wfile.write(body)
|
| 108 |
+
|
| 109 |
+
def log_message(self, format, *args):
|
| 110 |
+
pass
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def main():
|
| 114 |
+
parser = argparse.ArgumentParser(description="Drug-target-validation dashboard server")
|
| 115 |
+
parser.add_argument("--port", type=int, default=8050)
|
| 116 |
+
args = parser.parse_args()
|
| 117 |
+
|
| 118 |
+
server = HTTPServer(("0.0.0.0", args.port), DashboardHandler)
|
| 119 |
+
print(f"Dashboard running at http://localhost:{args.port}")
|
| 120 |
+
print("Waiting for agent state from run_agent.py ...")
|
| 121 |
+
try:
|
| 122 |
+
server.serve_forever()
|
| 123 |
+
except KeyboardInterrupt:
|
| 124 |
+
print("\nShutting down.")
|
| 125 |
+
server.server_close()
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
if __name__ == "__main__":
|
| 129 |
+
main()
|
demo.html
ADDED
|
@@ -0,0 +1,1639 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>BioEnv</title>
|
| 7 |
+
<style>
|
| 8 |
+
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@400;500;600&display=swap');
|
| 9 |
+
|
| 10 |
+
:root {
|
| 11 |
+
--bg: #07090d;
|
| 12 |
+
--bg-surface: #0c0f16;
|
| 13 |
+
--bg-raised: #111827;
|
| 14 |
+
--bg-hover: #1a2235;
|
| 15 |
+
--border: #1e293b;
|
| 16 |
+
--border-active: #334155;
|
| 17 |
+
--text: #e2e8f0;
|
| 18 |
+
--text-dim: #94a3b8;
|
| 19 |
+
--text-muted: #475569;
|
| 20 |
+
--accent: #38bdf8;
|
| 21 |
+
--accent-dim: rgba(56,189,248,0.12);
|
| 22 |
+
--green: #34d399;
|
| 23 |
+
--green-dim: rgba(52,211,153,0.10);
|
| 24 |
+
--amber: #fbbf24;
|
| 25 |
+
--amber-dim: rgba(251,191,36,0.10);
|
| 26 |
+
--red: #f87171;
|
| 27 |
+
--red-dim: rgba(248,113,113,0.10);
|
| 28 |
+
--cyan: #22d3ee;
|
| 29 |
+
--cyan-dim: rgba(34,211,238,0.10);
|
| 30 |
+
--pink: #f472b6;
|
| 31 |
+
--purple: #a78bfa;
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
* { margin: 0; padding: 0; box-sizing: border-box; }
|
| 35 |
+
html, body { height: 100%; overflow: hidden; }
|
| 36 |
+
|
| 37 |
+
body {
|
| 38 |
+
font-family: 'Inter', -apple-system, sans-serif;
|
| 39 |
+
background: var(--bg);
|
| 40 |
+
color: var(--text);
|
| 41 |
+
display: flex;
|
| 42 |
+
flex-direction: column;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
/* ---- Top Bar ---- */
|
| 46 |
+
.topbar {
|
| 47 |
+
height: 48px;
|
| 48 |
+
min-height: 48px;
|
| 49 |
+
background: var(--bg-surface);
|
| 50 |
+
border-bottom: 1px solid var(--border);
|
| 51 |
+
display: flex;
|
| 52 |
+
align-items: center;
|
| 53 |
+
padding: 0 20px;
|
| 54 |
+
gap: 16px;
|
| 55 |
+
z-index: 10;
|
| 56 |
+
}
|
| 57 |
+
.topbar-logo {
|
| 58 |
+
font-size: 15px;
|
| 59 |
+
font-weight: 800;
|
| 60 |
+
letter-spacing: -0.5px;
|
| 61 |
+
background: linear-gradient(135deg, #38bdf8, #22d3ee);
|
| 62 |
+
-webkit-background-clip: text;
|
| 63 |
+
-webkit-text-fill-color: transparent;
|
| 64 |
+
}
|
| 65 |
+
.topbar-sep { width: 1px; height: 20px; background: var(--border); }
|
| 66 |
+
.topbar-env {
|
| 67 |
+
font-size: 12px;
|
| 68 |
+
color: var(--text-dim);
|
| 69 |
+
font-family: 'JetBrains Mono', monospace;
|
| 70 |
+
}
|
| 71 |
+
.topbar-status {
|
| 72 |
+
display: flex;
|
| 73 |
+
align-items: center;
|
| 74 |
+
gap: 6px;
|
| 75 |
+
margin-left: auto;
|
| 76 |
+
font-size: 12px;
|
| 77 |
+
color: var(--text-dim);
|
| 78 |
+
}
|
| 79 |
+
.status-dot {
|
| 80 |
+
width: 7px; height: 7px;
|
| 81 |
+
border-radius: 50%;
|
| 82 |
+
background: var(--text-muted);
|
| 83 |
+
}
|
| 84 |
+
.status-dot.live {
|
| 85 |
+
background: var(--green);
|
| 86 |
+
box-shadow: 0 0 8px var(--green);
|
| 87 |
+
animation: pulse 2s infinite;
|
| 88 |
+
}
|
| 89 |
+
@keyframes pulse {
|
| 90 |
+
0%, 100% { opacity: 1; }
|
| 91 |
+
50% { opacity: 0.5; }
|
| 92 |
+
}
|
| 93 |
+
.topbar-btn {
|
| 94 |
+
font-size: 12px;
|
| 95 |
+
font-weight: 600;
|
| 96 |
+
padding: 6px 14px;
|
| 97 |
+
border-radius: 6px;
|
| 98 |
+
border: none;
|
| 99 |
+
cursor: pointer;
|
| 100 |
+
transition: all 0.15s;
|
| 101 |
+
font-family: inherit;
|
| 102 |
+
}
|
| 103 |
+
.btn-primary { background: var(--accent); color: #07090d; font-weight: 700; }
|
| 104 |
+
.btn-primary:hover { background: #7dd3fc; }
|
| 105 |
+
.btn-primary:disabled { opacity: 0.4; cursor: not-allowed; }
|
| 106 |
+
.btn-ghost {
|
| 107 |
+
background: transparent;
|
| 108 |
+
color: var(--text-dim);
|
| 109 |
+
border: 1px solid var(--border);
|
| 110 |
+
}
|
| 111 |
+
.btn-ghost:hover { background: var(--bg-hover); color: var(--text); }
|
| 112 |
+
|
| 113 |
+
/* ---- Main Layout ---- */
|
| 114 |
+
.main {
|
| 115 |
+
flex: 1;
|
| 116 |
+
display: grid;
|
| 117 |
+
grid-template-columns: 260px 1fr 340px;
|
| 118 |
+
overflow: hidden;
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
/* ---- Left Sidebar ---- */
|
| 122 |
+
.sidebar {
|
| 123 |
+
background: var(--bg-surface);
|
| 124 |
+
border-right: 1px solid var(--border);
|
| 125 |
+
display: flex;
|
| 126 |
+
flex-direction: column;
|
| 127 |
+
overflow-y: auto;
|
| 128 |
+
}
|
| 129 |
+
.sidebar-section {
|
| 130 |
+
padding: 16px;
|
| 131 |
+
border-bottom: 1px solid var(--border);
|
| 132 |
+
}
|
| 133 |
+
.sidebar-heading {
|
| 134 |
+
font-size: 10px;
|
| 135 |
+
font-weight: 600;
|
| 136 |
+
text-transform: uppercase;
|
| 137 |
+
letter-spacing: 1.5px;
|
| 138 |
+
color: var(--text-muted);
|
| 139 |
+
margin-bottom: 10px;
|
| 140 |
+
}
|
| 141 |
+
.scenario-list { display: flex; flex-direction: column; gap: 4px; }
|
| 142 |
+
.scenario-opt {
|
| 143 |
+
display: flex;
|
| 144 |
+
align-items: center;
|
| 145 |
+
gap: 10px;
|
| 146 |
+
padding: 8px 10px;
|
| 147 |
+
border-radius: 6px;
|
| 148 |
+
cursor: pointer;
|
| 149 |
+
transition: all 0.15s;
|
| 150 |
+
border: 1px solid transparent;
|
| 151 |
+
}
|
| 152 |
+
.scenario-opt:hover { background: var(--bg-hover); }
|
| 153 |
+
.scenario-opt.active {
|
| 154 |
+
background: var(--accent-dim);
|
| 155 |
+
border-color: rgba(56,189,248,0.2);
|
| 156 |
+
}
|
| 157 |
+
.scenario-opt .sc-dot { width: 8px; height: 8px; border-radius: 50%; flex-shrink: 0; }
|
| 158 |
+
.scenario-opt .sc-name {
|
| 159 |
+
font-size: 12px; font-weight: 500; flex: 1;
|
| 160 |
+
white-space: nowrap; overflow: hidden; text-overflow: ellipsis;
|
| 161 |
+
}
|
| 162 |
+
.scenario-opt .sc-diff {
|
| 163 |
+
font-size: 10px; font-weight: 600;
|
| 164 |
+
text-transform: uppercase; letter-spacing: 0.5px;
|
| 165 |
+
}
|
| 166 |
+
.gauge { margin-bottom: 14px; }
|
| 167 |
+
.gauge:last-child { margin-bottom: 0; }
|
| 168 |
+
.gauge-header {
|
| 169 |
+
display: flex; justify-content: space-between;
|
| 170 |
+
align-items: baseline; margin-bottom: 6px;
|
| 171 |
+
}
|
| 172 |
+
.gauge-label { font-size: 12px; color: var(--text-dim); font-weight: 500; }
|
| 173 |
+
.gauge-value {
|
| 174 |
+
font-size: 12px; font-weight: 600;
|
| 175 |
+
font-family: 'JetBrains Mono', monospace;
|
| 176 |
+
}
|
| 177 |
+
.gauge-track {
|
| 178 |
+
height: 4px; background: var(--bg-hover);
|
| 179 |
+
border-radius: 4px; overflow: hidden;
|
| 180 |
+
}
|
| 181 |
+
.gauge-fill {
|
| 182 |
+
height: 100%; border-radius: 4px;
|
| 183 |
+
transition: width 0.8s cubic-bezier(0.4,0,0.2,1);
|
| 184 |
+
}
|
| 185 |
+
.pipeline-steps { display: flex; flex-direction: column; gap: 2px; }
|
| 186 |
+
.pipe-step {
|
| 187 |
+
display: flex; align-items: center; gap: 8px;
|
| 188 |
+
padding: 5px 8px; border-radius: 4px;
|
| 189 |
+
font-size: 11px; font-family: 'JetBrains Mono', monospace;
|
| 190 |
+
color: var(--text-muted);
|
| 191 |
+
opacity: 0; transform: translateX(-8px);
|
| 192 |
+
transition: all 0.3s ease;
|
| 193 |
+
}
|
| 194 |
+
.pipe-step.visible { opacity: 1; transform: translateX(0); }
|
| 195 |
+
.pipe-step.active { color: var(--text); background: var(--accent-dim); }
|
| 196 |
+
.pipe-step.done { color: var(--text-dim); }
|
| 197 |
+
.pipe-step .step-icon {
|
| 198 |
+
width: 16px; height: 16px; border-radius: 50%;
|
| 199 |
+
border: 1.5px solid var(--text-muted);
|
| 200 |
+
display: flex; align-items: center; justify-content: center;
|
| 201 |
+
font-size: 8px; flex-shrink: 0; transition: all 0.3s;
|
| 202 |
+
}
|
| 203 |
+
.pipe-step.done .step-icon {
|
| 204 |
+
background: var(--green-dim); border-color: var(--green); color: var(--green);
|
| 205 |
+
}
|
| 206 |
+
.pipe-step.active .step-icon {
|
| 207 |
+
border-color: var(--accent); background: var(--accent-dim);
|
| 208 |
+
color: var(--accent); animation: pulse 1.5s infinite;
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
/* ---- Center: Lab + Terminal ---- */
|
| 212 |
+
.center {
|
| 213 |
+
display: flex;
|
| 214 |
+
flex-direction: column;
|
| 215 |
+
overflow: hidden;
|
| 216 |
+
background: var(--bg);
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
/* Lab canvas */
|
| 220 |
+
.lab-panel {
|
| 221 |
+
height: 300px;
|
| 222 |
+
min-height: 300px;
|
| 223 |
+
background: var(--bg-surface);
|
| 224 |
+
border-bottom: 1px solid var(--border);
|
| 225 |
+
position: relative;
|
| 226 |
+
overflow: hidden;
|
| 227 |
+
}
|
| 228 |
+
.lab-panel canvas {
|
| 229 |
+
display: block;
|
| 230 |
+
width: 100%;
|
| 231 |
+
height: 100%;
|
| 232 |
+
}
|
| 233 |
+
.lab-label {
|
| 234 |
+
position: absolute;
|
| 235 |
+
top: 8px;
|
| 236 |
+
left: 12px;
|
| 237 |
+
font-size: 10px;
|
| 238 |
+
font-weight: 600;
|
| 239 |
+
text-transform: uppercase;
|
| 240 |
+
letter-spacing: 1.5px;
|
| 241 |
+
color: var(--text-muted);
|
| 242 |
+
z-index: 2;
|
| 243 |
+
pointer-events: none;
|
| 244 |
+
}
|
| 245 |
+
.lab-action-label {
|
| 246 |
+
position: absolute;
|
| 247 |
+
bottom: 10px;
|
| 248 |
+
left: 50%;
|
| 249 |
+
transform: translateX(-50%);
|
| 250 |
+
font-size: 11px;
|
| 251 |
+
font-family: 'JetBrains Mono', monospace;
|
| 252 |
+
color: var(--text-dim);
|
| 253 |
+
background: rgba(12,15,22,0.85);
|
| 254 |
+
padding: 4px 14px;
|
| 255 |
+
border-radius: 100px;
|
| 256 |
+
border: 1px solid var(--border);
|
| 257 |
+
z-index: 2;
|
| 258 |
+
pointer-events: none;
|
| 259 |
+
opacity: 0;
|
| 260 |
+
transition: opacity 0.3s;
|
| 261 |
+
}
|
| 262 |
+
.lab-action-label.visible { opacity: 1; }
|
| 263 |
+
|
| 264 |
+
.center-header {
|
| 265 |
+
height: 36px;
|
| 266 |
+
min-height: 36px;
|
| 267 |
+
display: flex;
|
| 268 |
+
align-items: center;
|
| 269 |
+
padding: 0 16px;
|
| 270 |
+
background: var(--bg-surface);
|
| 271 |
+
border-bottom: 1px solid var(--border);
|
| 272 |
+
gap: 8px;
|
| 273 |
+
}
|
| 274 |
+
.tab {
|
| 275 |
+
font-size: 11px; font-weight: 500;
|
| 276 |
+
padding: 4px 12px; border-radius: 4px;
|
| 277 |
+
color: var(--text-dim); cursor: pointer;
|
| 278 |
+
transition: all 0.15s;
|
| 279 |
+
}
|
| 280 |
+
.tab.active { color: var(--text); background: var(--bg-hover); }
|
| 281 |
+
.tab:hover { color: var(--text); }
|
| 282 |
+
|
| 283 |
+
.terminal {
|
| 284 |
+
flex: 1;
|
| 285 |
+
overflow-y: auto;
|
| 286 |
+
padding: 16px 20px;
|
| 287 |
+
font-family: 'JetBrains Mono', monospace;
|
| 288 |
+
font-size: 12.5px;
|
| 289 |
+
line-height: 1.9;
|
| 290 |
+
scrollbar-width: thin;
|
| 291 |
+
scrollbar-color: var(--border) transparent;
|
| 292 |
+
}
|
| 293 |
+
.terminal::-webkit-scrollbar { width: 6px; }
|
| 294 |
+
.terminal::-webkit-scrollbar-track { background: transparent; }
|
| 295 |
+
.terminal::-webkit-scrollbar-thumb { background: var(--border); border-radius: 3px; }
|
| 296 |
+
|
| 297 |
+
.t-line {
|
| 298 |
+
white-space: pre-wrap;
|
| 299 |
+
opacity: 0;
|
| 300 |
+
animation: lineIn 0.25s ease forwards;
|
| 301 |
+
}
|
| 302 |
+
@keyframes lineIn {
|
| 303 |
+
from { opacity: 0; transform: translateY(4px); }
|
| 304 |
+
to { opacity: 1; transform: translateY(0); }
|
| 305 |
+
}
|
| 306 |
+
.t-prompt { color: var(--green); }
|
| 307 |
+
.t-cmd { color: var(--text); }
|
| 308 |
+
.t-dim { color: var(--text-muted); }
|
| 309 |
+
.t-label { color: var(--accent); }
|
| 310 |
+
.t-str { color: var(--amber); }
|
| 311 |
+
.t-kw { color: var(--pink); }
|
| 312 |
+
.t-fn { color: var(--cyan); }
|
| 313 |
+
.t-num { color: var(--purple); }
|
| 314 |
+
.t-ok { color: var(--green); }
|
| 315 |
+
.t-warn { color: var(--amber); }
|
| 316 |
+
.t-err { color: var(--red); }
|
| 317 |
+
.t-sub { color: var(--text-dim); }
|
| 318 |
+
|
| 319 |
+
/* ---- Right Panel ---- */
|
| 320 |
+
.right {
|
| 321 |
+
background: var(--bg-surface);
|
| 322 |
+
border-left: 1px solid var(--border);
|
| 323 |
+
display: flex;
|
| 324 |
+
flex-direction: column;
|
| 325 |
+
overflow-y: auto;
|
| 326 |
+
scrollbar-width: thin;
|
| 327 |
+
scrollbar-color: var(--border) transparent;
|
| 328 |
+
}
|
| 329 |
+
.panel-section {
|
| 330 |
+
padding: 16px;
|
| 331 |
+
border-bottom: 1px solid var(--border);
|
| 332 |
+
}
|
| 333 |
+
.panel-heading {
|
| 334 |
+
font-size: 10px; font-weight: 600;
|
| 335 |
+
text-transform: uppercase; letter-spacing: 1.5px;
|
| 336 |
+
color: var(--text-muted); margin-bottom: 12px;
|
| 337 |
+
display: flex; align-items: center; justify-content: space-between;
|
| 338 |
+
}
|
| 339 |
+
.reward-row {
|
| 340 |
+
display: flex; align-items: center; gap: 10px; margin-bottom: 8px;
|
| 341 |
+
}
|
| 342 |
+
.reward-row:last-child { margin-bottom: 0; }
|
| 343 |
+
.rw-label {
|
| 344 |
+
font-size: 11px; font-weight: 500; width: 80px;
|
| 345 |
+
color: var(--text-dim); text-align: right;
|
| 346 |
+
}
|
| 347 |
+
.rw-track {
|
| 348 |
+
flex: 1; height: 18px;
|
| 349 |
+
background: rgba(255,255,255,0.03);
|
| 350 |
+
border-radius: 4px; overflow: hidden; position: relative;
|
| 351 |
+
}
|
| 352 |
+
.rw-fill {
|
| 353 |
+
height: 100%; border-radius: 4px; width: 0%;
|
| 354 |
+
transition: width 0.6s cubic-bezier(0.4,0,0.2,1);
|
| 355 |
+
display: flex; align-items: center; justify-content: flex-end;
|
| 356 |
+
padding-right: 6px; font-size: 10px; font-weight: 600;
|
| 357 |
+
font-family: 'JetBrains Mono', monospace;
|
| 358 |
+
color: rgba(255,255,255,0.85); min-width: fit-content;
|
| 359 |
+
}
|
| 360 |
+
.rw-fill.validity { background: linear-gradient(90deg, rgba(52,211,153,0.5), rgba(52,211,153,0.85)); }
|
| 361 |
+
.rw-fill.ordering { background: linear-gradient(90deg, rgba(34,211,238,0.5), rgba(34,211,238,0.85)); }
|
| 362 |
+
.rw-fill.info_gain { background: linear-gradient(90deg, rgba(56,189,248,0.5), rgba(56,189,248,0.85)); }
|
| 363 |
+
.rw-fill.efficiency { background: linear-gradient(90deg, rgba(251,191,36,0.5), rgba(251,191,36,0.85)); }
|
| 364 |
+
.rw-fill.novelty { background: linear-gradient(90deg, rgba(167,139,250,0.5), rgba(167,139,250,0.85)); }
|
| 365 |
+
.rw-fill.penalty { background: linear-gradient(90deg, rgba(248,113,113,0.5), rgba(248,113,113,0.85)); }
|
| 366 |
+
.cumulative-row {
|
| 367 |
+
display: flex; align-items: baseline; justify-content: space-between;
|
| 368 |
+
margin-top: 12px; padding-top: 12px; border-top: 1px solid var(--border);
|
| 369 |
+
}
|
| 370 |
+
.cum-label { font-size: 11px; color: var(--text-dim); }
|
| 371 |
+
.cum-value {
|
| 372 |
+
font-size: 20px; font-weight: 700;
|
| 373 |
+
font-family: 'JetBrains Mono', monospace; color: var(--green);
|
| 374 |
+
}
|
| 375 |
+
.discovery-list { display: flex; flex-direction: column; gap: 6px; }
|
| 376 |
+
.discovery {
|
| 377 |
+
display: flex; align-items: flex-start; gap: 8px;
|
| 378 |
+
padding: 8px 10px; background: var(--bg-raised);
|
| 379 |
+
border-radius: 6px; border: 1px solid var(--border);
|
| 380 |
+
opacity: 0; transform: scale(0.95); transition: all 0.3s ease;
|
| 381 |
+
}
|
| 382 |
+
.discovery.visible { opacity: 1; transform: scale(1); }
|
| 383 |
+
.disc-icon {
|
| 384 |
+
width: 20px; height: 20px; border-radius: 4px;
|
| 385 |
+
display: flex; align-items: center; justify-content: center;
|
| 386 |
+
font-size: 10px; flex-shrink: 0; margin-top: 1px;
|
| 387 |
+
}
|
| 388 |
+
.disc-body { flex: 1; }
|
| 389 |
+
.disc-title { font-size: 11px; font-weight: 600; }
|
| 390 |
+
.disc-detail {
|
| 391 |
+
font-size: 10px; color: var(--text-dim); margin-top: 2px;
|
| 392 |
+
font-family: 'JetBrains Mono', monospace;
|
| 393 |
+
}
|
| 394 |
+
.empty-state {
|
| 395 |
+
font-size: 11px; color: var(--text-muted);
|
| 396 |
+
font-style: italic; padding: 8px 0;
|
| 397 |
+
}
|
| 398 |
+
.step-reward-mini {
|
| 399 |
+
display: flex; align-items: center; justify-content: space-between;
|
| 400 |
+
padding: 6px 10px; background: var(--bg-raised);
|
| 401 |
+
border-radius: 6px; margin-bottom: 4px;
|
| 402 |
+
font-size: 11px; font-family: 'JetBrains Mono', monospace;
|
| 403 |
+
opacity: 0; transition: all 0.3s;
|
| 404 |
+
}
|
| 405 |
+
.step-reward-mini.visible { opacity: 1; }
|
| 406 |
+
.step-reward-mini .srm-name { color: var(--text-dim); }
|
| 407 |
+
.step-reward-mini .srm-val { font-weight: 600; }
|
| 408 |
+
.step-reward-mini .srm-val.pos { color: var(--green); }
|
| 409 |
+
.step-reward-mini .srm-val.neg { color: var(--red); }
|
| 410 |
+
</style>
|
| 411 |
+
</head>
|
| 412 |
+
<body>
|
| 413 |
+
|
| 414 |
+
<!-- Top Bar -->
|
| 415 |
+
<div class="topbar">
|
| 416 |
+
<div class="topbar-logo">BioEnv</div>
|
| 417 |
+
<div class="topbar-sep"></div>
|
| 418 |
+
<div class="topbar-env">biomarker_validation_lung</div>
|
| 419 |
+
<div class="topbar-status">
|
| 420 |
+
<div class="status-dot" id="statusDot"></div>
|
| 421 |
+
<span id="statusText">Ready</span>
|
| 422 |
+
</div>
|
| 423 |
+
<button class="topbar-btn btn-ghost" id="resetBtn" onclick="resetDemo()">Reset</button>
|
| 424 |
+
<button class="topbar-btn btn-primary" id="runBtn" onclick="startDemo()">Run Episode</button>
|
| 425 |
+
</div>
|
| 426 |
+
|
| 427 |
+
<div class="main">
|
| 428 |
+
<!-- Left Sidebar -->
|
| 429 |
+
<div class="sidebar">
|
| 430 |
+
<div class="sidebar-section">
|
| 431 |
+
<div class="sidebar-heading">Scenario</div>
|
| 432 |
+
<div class="scenario-list">
|
| 433 |
+
<div class="scenario-opt" onclick="selectScenario(this)">
|
| 434 |
+
<div class="sc-dot" style="background: var(--green);"></div>
|
| 435 |
+
<span class="sc-name">Cardiac Disease DE</span>
|
| 436 |
+
<span class="sc-diff" style="color: var(--green);">Easy</span>
|
| 437 |
+
</div>
|
| 438 |
+
<div class="scenario-opt" onclick="selectScenario(this)">
|
| 439 |
+
<div class="sc-dot" style="background: var(--amber);"></div>
|
| 440 |
+
<span class="sc-name">Hematopoiesis Trajectory</span>
|
| 441 |
+
<span class="sc-diff" style="color: var(--amber);">Med</span>
|
| 442 |
+
</div>
|
| 443 |
+
<div class="scenario-opt" onclick="selectScenario(this)">
|
| 444 |
+
<div class="sc-dot" style="background: var(--amber);"></div>
|
| 445 |
+
<span class="sc-name">Perturbation Immune</span>
|
| 446 |
+
<span class="sc-diff" style="color: var(--amber);">Med</span>
|
| 447 |
+
</div>
|
| 448 |
+
<div class="scenario-opt active" onclick="selectScenario(this)">
|
| 449 |
+
<div class="sc-dot" style="background: var(--red);"></div>
|
| 450 |
+
<span class="sc-name">Biomarker Validation (Lung)</span>
|
| 451 |
+
<span class="sc-diff" style="color: var(--red);">Hard</span>
|
| 452 |
+
</div>
|
| 453 |
+
</div>
|
| 454 |
+
</div>
|
| 455 |
+
<div class="sidebar-section">
|
| 456 |
+
<div class="sidebar-heading">Environment State</div>
|
| 457 |
+
<div class="gauge">
|
| 458 |
+
<div class="gauge-header">
|
| 459 |
+
<span class="gauge-label">Budget</span>
|
| 460 |
+
<span class="gauge-value" id="budgetVal">$100,000</span>
|
| 461 |
+
</div>
|
| 462 |
+
<div class="gauge-track"><div class="gauge-fill" id="budgetFill" style="width:100%;background:var(--green);"></div></div>
|
| 463 |
+
</div>
|
| 464 |
+
<div class="gauge">
|
| 465 |
+
<div class="gauge-header">
|
| 466 |
+
<span class="gauge-label">Time</span>
|
| 467 |
+
<span class="gauge-value" id="timeVal">180 / 180 days</span>
|
| 468 |
+
</div>
|
| 469 |
+
<div class="gauge-track"><div class="gauge-fill" id="timeFill" style="width:100%;background:var(--cyan);"></div></div>
|
| 470 |
+
</div>
|
| 471 |
+
<div class="gauge">
|
| 472 |
+
<div class="gauge-header">
|
| 473 |
+
<span class="gauge-label">Steps</span>
|
| 474 |
+
<span class="gauge-value" id="stepVal">0 / 30</span>
|
| 475 |
+
</div>
|
| 476 |
+
<div class="gauge-track"><div class="gauge-fill" id="stepFill" style="width:0%;background:var(--accent);"></div></div>
|
| 477 |
+
</div>
|
| 478 |
+
</div>
|
| 479 |
+
<div class="sidebar-section" style="flex:1;overflow-y:auto;">
|
| 480 |
+
<div class="sidebar-heading">Pipeline</div>
|
| 481 |
+
<div class="pipeline-steps" id="pipelineSteps"></div>
|
| 482 |
+
</div>
|
| 483 |
+
</div>
|
| 484 |
+
|
| 485 |
+
<!-- Center: Lab + Terminal -->
|
| 486 |
+
<div class="center">
|
| 487 |
+
<div class="lab-panel">
|
| 488 |
+
<div class="lab-label">Virtual Lab</div>
|
| 489 |
+
<div class="lab-action-label" id="labActionLabel"></div>
|
| 490 |
+
<canvas id="labCanvas"></canvas>
|
| 491 |
+
</div>
|
| 492 |
+
<div class="center-header">
|
| 493 |
+
<div class="tab active">Agent Log</div>
|
| 494 |
+
<div class="tab">Raw JSON</div>
|
| 495 |
+
</div>
|
| 496 |
+
<div class="terminal" id="terminal"></div>
|
| 497 |
+
</div>
|
| 498 |
+
|
| 499 |
+
<!-- Right Panel -->
|
| 500 |
+
<div class="right">
|
| 501 |
+
<div class="panel-section">
|
| 502 |
+
<div class="panel-heading">
|
| 503 |
+
Step Reward
|
| 504 |
+
<span id="stepRewardLabel" style="font-family:'JetBrains Mono',monospace;font-size:11px;color:var(--text-dim);">--</span>
|
| 505 |
+
</div>
|
| 506 |
+
<div id="rewardBars">
|
| 507 |
+
<div class="reward-row"><span class="rw-label">Validity</span><div class="rw-track"><div class="rw-fill validity" id="rw-validity"></div></div></div>
|
| 508 |
+
<div class="reward-row"><span class="rw-label">Ordering</span><div class="rw-track"><div class="rw-fill ordering" id="rw-ordering"></div></div></div>
|
| 509 |
+
<div class="reward-row"><span class="rw-label">Info Gain</span><div class="rw-track"><div class="rw-fill info_gain" id="rw-info_gain"></div></div></div>
|
| 510 |
+
<div class="reward-row"><span class="rw-label">Efficiency</span><div class="rw-track"><div class="rw-fill efficiency" id="rw-efficiency"></div></div></div>
|
| 511 |
+
<div class="reward-row"><span class="rw-label">Novelty</span><div class="rw-track"><div class="rw-fill novelty" id="rw-novelty"></div></div></div>
|
| 512 |
+
<div class="reward-row"><span class="rw-label">Penalty</span><div class="rw-track"><div class="rw-fill penalty" id="rw-penalty"></div></div></div>
|
| 513 |
+
</div>
|
| 514 |
+
<div class="cumulative-row">
|
| 515 |
+
<span class="cum-label">Cumulative Reward</span>
|
| 516 |
+
<span class="cum-value" id="cumReward">0.00</span>
|
| 517 |
+
</div>
|
| 518 |
+
</div>
|
| 519 |
+
<div class="panel-section">
|
| 520 |
+
<div class="panel-heading">Reward History</div>
|
| 521 |
+
<div id="rewardHistory"><div class="empty-state">No steps yet</div></div>
|
| 522 |
+
</div>
|
| 523 |
+
<div class="panel-section">
|
| 524 |
+
<div class="panel-heading">Discoveries</div>
|
| 525 |
+
<div class="discovery-list" id="discoveries"><div class="empty-state">No discoveries yet</div></div>
|
| 526 |
+
</div>
|
| 527 |
+
<div class="panel-section">
|
| 528 |
+
<div class="panel-heading">Violations</div>
|
| 529 |
+
<div id="violations"><div class="empty-state">No violations</div></div>
|
| 530 |
+
</div>
|
| 531 |
+
</div>
|
| 532 |
+
</div>
|
| 533 |
+
|
| 534 |
+
<script>
|
| 535 |
+
// =====================================================
|
| 536 |
+
// VIRTUAL LAB - Canvas rendering
|
| 537 |
+
// =====================================================
|
| 538 |
+
const labCanvas = document.getElementById('labCanvas');
|
| 539 |
+
const ctx = labCanvas.getContext('2d');
|
| 540 |
+
let labW, labH, dpr;
|
| 541 |
+
|
| 542 |
+
function resizeLab() {
|
| 543 |
+
const rect = labCanvas.parentElement.getBoundingClientRect();
|
| 544 |
+
dpr = window.devicePixelRatio || 1;
|
| 545 |
+
labW = rect.width;
|
| 546 |
+
labH = rect.height;
|
| 547 |
+
labCanvas.width = labW * dpr;
|
| 548 |
+
labCanvas.height = labH * dpr;
|
| 549 |
+
ctx.setTransform(dpr, 0, 0, dpr, 0, 0);
|
| 550 |
+
}
|
| 551 |
+
resizeLab();
|
| 552 |
+
window.addEventListener('resize', () => { resizeLab(); });
|
| 553 |
+
|
| 554 |
+
// Lab stations (positions as fractions of canvas, converted in draw)
|
| 555 |
+
const STATIONS = {
|
| 556 |
+
idle: { fx: 0.06, fy: 0.55, label: 'ENTRANCE', icon: 'door', color: '#475569' },
|
| 557 |
+
sample: { fx: 0.20, fy: 0.35, label: 'SAMPLE BENCH', icon: 'bench', color: '#34d399' },
|
| 558 |
+
cohort: { fx: 0.20, fy: 0.75, label: 'COHORT SELECT', icon: 'people', color: '#34d399' },
|
| 559 |
+
prep: { fx: 0.38, fy: 0.35, label: 'LIBRARY PREP', icon: 'flask', color: '#2dd4bf' },
|
| 560 |
+
sequencer: { fx: 0.38, fy: 0.75, label: 'SEQUENCER', icon: 'machine', color: '#22d3ee' },
|
| 561 |
+
computer: { fx: 0.62, fy: 0.50, label: 'COMPUTE', icon: 'screen', color: '#38bdf8' },
|
| 562 |
+
whiteboard: { fx: 0.84, fy: 0.45, label: 'SYNTHESIS', icon: 'board', color: '#a78bfa' },
|
| 563 |
+
};
|
| 564 |
+
|
| 565 |
+
// Map actions to stations
|
| 566 |
+
const ACTION_STATION = {
|
| 567 |
+
collect_sample: 'sample',
|
| 568 |
+
select_cohort: 'cohort',
|
| 569 |
+
prepare_library: 'prep',
|
| 570 |
+
sequence_cells: 'sequencer',
|
| 571 |
+
run_qc: 'computer',
|
| 572 |
+
normalize_data: 'computer',
|
| 573 |
+
cluster_cells: 'computer',
|
| 574 |
+
differential_expression: 'computer',
|
| 575 |
+
pathway_enrichment: 'computer',
|
| 576 |
+
marker_selection: 'computer',
|
| 577 |
+
validate_marker: 'computer',
|
| 578 |
+
synthesize_conclusion: 'whiteboard',
|
| 579 |
+
};
|
| 580 |
+
|
| 581 |
+
// Agent state
|
| 582 |
+
let agent = { x: 0, y: 0, targetX: 0, targetY: 0, station: 'idle', working: false };
|
| 583 |
+
let agentTrail = [];
|
| 584 |
+
let workingTick = 0;
|
| 585 |
+
let terminalLines = []; // fake terminal on computer screen
|
| 586 |
+
let activeStationKey = null;
|
| 587 |
+
let particlesLab = [];
|
| 588 |
+
|
| 589 |
+
function stationPos(key) {
|
| 590 |
+
const s = STATIONS[key];
|
| 591 |
+
return { x: s.fx * labW, y: s.fy * labH };
|
| 592 |
+
}
|
| 593 |
+
|
| 594 |
+
function initAgent() {
|
| 595 |
+
const p = stationPos('idle');
|
| 596 |
+
agent.x = p.x; agent.y = p.y;
|
| 597 |
+
agent.targetX = p.x; agent.targetY = p.y;
|
| 598 |
+
agent.station = 'idle';
|
| 599 |
+
agent.working = false;
|
| 600 |
+
agent.facing = 1;
|
| 601 |
+
agentTrail = [];
|
| 602 |
+
terminalLines = [];
|
| 603 |
+
activeStationKey = null;
|
| 604 |
+
particlesLab = [];
|
| 605 |
+
}
|
| 606 |
+
initAgent();
|
| 607 |
+
|
| 608 |
+
function moveAgentTo(stationKey) {
|
| 609 |
+
const p = stationPos(stationKey);
|
| 610 |
+
agent.targetX = p.x;
|
| 611 |
+
agent.targetY = p.y;
|
| 612 |
+
agent.station = stationKey;
|
| 613 |
+
agent.working = false;
|
| 614 |
+
activeStationKey = stationKey;
|
| 615 |
+
}
|
| 616 |
+
|
| 617 |
+
function setAgentWorking(actionName) {
|
| 618 |
+
agent.working = true;
|
| 619 |
+
workingTick = 0;
|
| 620 |
+
// If at computer, set up terminal lines
|
| 621 |
+
if (agent.station === 'computer') {
|
| 622 |
+
terminalLines = [];
|
| 623 |
+
typeComputerLines(actionName);
|
| 624 |
+
}
|
| 625 |
+
}
|
| 626 |
+
|
| 627 |
+
const COMP_COMMANDS = {
|
| 628 |
+
run_qc: ['$ scanpy.pp.filter_cells()', ' filtering 11847 cells...', ' 10234 passed QC', ' doublet rate: 3.2%'],
|
| 629 |
+
normalize_data: ['$ scran.normalize(adata)', ' computing size factors...', ' log1p transform', ' HVGs: 3000 selected'],
|
| 630 |
+
cluster_cells: ['$ sc.tl.leiden(adata, 0.8)', ' building kNN graph...', ' optimizing modularity', ' 14 clusters found'],
|
| 631 |
+
differential_expression: ['$ DESeq2.run(IPF, Ctrl)', ' fitting GLM...', ' 1847 DE genes', ' SPP1 log2FC=3.42 ***'],
|
| 632 |
+
pathway_enrichment: ['$ gseapy.enrich(de_genes)', ' KEGG + Reactome...', ' ECM-receptor p=4.2e-12', ' TGF-beta p=1.8e-09'],
|
| 633 |
+
marker_selection: ['$ rank_markers(candidates)', ' SPP1 AUROC: 0.94', ' MMP7 AUROC: 0.87', ' COL1A1 AUROC: 0.81'],
|
| 634 |
+
validate_marker: ['$ cross_validate("SPP1")', ' fold 1: 0.93', ' fold 2: 0.89', ' mean AUROC: 0.91 OK'],
|
| 635 |
+
};
|
| 636 |
+
|
| 637 |
+
async function typeComputerLines(actionName) {
|
| 638 |
+
const lines = COMP_COMMANDS[actionName] || ['$ processing...', ' computing...', ' done'];
|
| 639 |
+
for (let i = 0; i < lines.length; i++) {
|
| 640 |
+
await wait(250);
|
| 641 |
+
terminalLines.push(lines[i]);
|
| 642 |
+
if (terminalLines.length > 5) terminalLines.shift();
|
| 643 |
+
}
|
| 644 |
+
}
|
| 645 |
+
|
| 646 |
+
// Particles burst
|
| 647 |
+
function spawnParticles(x, y, color, count = 8) {
|
| 648 |
+
for (let i = 0; i < count; i++) {
|
| 649 |
+
const angle = (Math.PI * 2 / count) * i + Math.random() * 0.5;
|
| 650 |
+
particlesLab.push({
|
| 651 |
+
x, y,
|
| 652 |
+
vx: Math.cos(angle) * (1.5 + Math.random() * 2),
|
| 653 |
+
vy: Math.sin(angle) * (1.5 + Math.random() * 2),
|
| 654 |
+
life: 1,
|
| 655 |
+
color,
|
| 656 |
+
size: 2 + Math.random() * 2,
|
| 657 |
+
});
|
| 658 |
+
}
|
| 659 |
+
}
|
| 660 |
+
|
| 661 |
+
// ---- Draw loop ----
|
| 662 |
+
let frameCount = 0;
|
| 663 |
+
const FLOOR_COLOR = '#0f1520';
|
| 664 |
+
const WALL_COLOR = '#1a2332';
|
| 665 |
+
const FLOOR_TILE_A = '#0d1219';
|
| 666 |
+
const FLOOR_TILE_B = '#10161f';
|
| 667 |
+
|
| 668 |
+
function drawLab() {
|
| 669 |
+
frameCount++;
|
| 670 |
+
ctx.clearRect(0, 0, labW, labH);
|
| 671 |
+
|
| 672 |
+
// Floor - checkerboard tiles
|
| 673 |
+
const tileSize = 24;
|
| 674 |
+
for (let ty = 0; ty < labH; ty += tileSize) {
|
| 675 |
+
for (let tx = 0; tx < labW; tx += tileSize) {
|
| 676 |
+
const checker = ((Math.floor(tx / tileSize) + Math.floor(ty / tileSize)) % 2 === 0);
|
| 677 |
+
ctx.fillStyle = checker ? FLOOR_TILE_A : FLOOR_TILE_B;
|
| 678 |
+
ctx.fillRect(tx, ty, tileSize, tileSize);
|
| 679 |
+
}
|
| 680 |
+
}
|
| 681 |
+
|
| 682 |
+
// Walls - top and bottom border
|
| 683 |
+
ctx.fillStyle = WALL_COLOR;
|
| 684 |
+
ctx.fillRect(0, 0, labW, 18);
|
| 685 |
+
ctx.fillRect(0, labH - 8, labW, 8);
|
| 686 |
+
ctx.strokeStyle = '#253040';
|
| 687 |
+
ctx.lineWidth = 1;
|
| 688 |
+
ctx.beginPath(); ctx.moveTo(0, 18); ctx.lineTo(labW, 18); ctx.stroke();
|
| 689 |
+
|
| 690 |
+
// Draw equipment at each station (behind the person)
|
| 691 |
+
for (const [key, s] of Object.entries(STATIONS)) {
|
| 692 |
+
const pos = stationPos(key);
|
| 693 |
+
const isActive = key === activeStationKey;
|
| 694 |
+
drawEquipment(key, pos.x, pos.y, s.color, isActive);
|
| 695 |
+
}
|
| 696 |
+
|
| 697 |
+
// Draw walking path (subtle floor markings)
|
| 698 |
+
ctx.strokeStyle = 'rgba(56,189,248,0.06)';
|
| 699 |
+
ctx.lineWidth = 16;
|
| 700 |
+
ctx.lineCap = 'round';
|
| 701 |
+
ctx.lineJoin = 'round';
|
| 702 |
+
const pathOrder = ['idle','sample','prep','computer','whiteboard'];
|
| 703 |
+
ctx.beginPath();
|
| 704 |
+
const p0 = stationPos(pathOrder[0]);
|
| 705 |
+
ctx.moveTo(p0.x, p0.y + 10);
|
| 706 |
+
for (let i = 1; i < pathOrder.length; i++) {
|
| 707 |
+
const p = stationPos(pathOrder[i]);
|
| 708 |
+
ctx.lineTo(p.x, p.y + 10);
|
| 709 |
+
}
|
| 710 |
+
ctx.stroke();
|
| 711 |
+
// Lower path
|
| 712 |
+
ctx.beginPath();
|
| 713 |
+
const pl0 = stationPos('idle');
|
| 714 |
+
ctx.moveTo(pl0.x, pl0.y + 10);
|
| 715 |
+
const pl1 = stationPos('cohort');
|
| 716 |
+
ctx.lineTo(pl1.x, pl1.y + 10);
|
| 717 |
+
const pl2 = stationPos('sequencer');
|
| 718 |
+
ctx.lineTo(pl2.x, pl2.y + 10);
|
| 719 |
+
const pl3 = stationPos('computer');
|
| 720 |
+
ctx.lineTo(pl3.x, pl3.y + 10);
|
| 721 |
+
ctx.stroke();
|
| 722 |
+
ctx.lineCap = 'butt';
|
| 723 |
+
|
| 724 |
+
// Floating terminal popup at computer
|
| 725 |
+
if (agent.station === 'computer' && agent.working && terminalLines.length > 0) {
|
| 726 |
+
const cp = stationPos('computer');
|
| 727 |
+
const sx = cp.x + 55, sy = cp.y - 65;
|
| 728 |
+
const sw = 170, sh = 95;
|
| 729 |
+
|
| 730 |
+
// Shadow
|
| 731 |
+
ctx.fillStyle = 'rgba(0,0,0,0.4)';
|
| 732 |
+
roundRect(ctx, sx + 3, sy + 3, sw, sh, 6);
|
| 733 |
+
ctx.fill();
|
| 734 |
+
|
| 735 |
+
ctx.fillStyle = 'rgba(7,9,13,0.97)';
|
| 736 |
+
ctx.strokeStyle = 'rgba(56,189,248,0.3)';
|
| 737 |
+
ctx.lineWidth = 1;
|
| 738 |
+
roundRect(ctx, sx, sy, sw, sh, 6);
|
| 739 |
+
ctx.fill(); ctx.stroke();
|
| 740 |
+
|
| 741 |
+
// Title bar
|
| 742 |
+
ctx.fillStyle = 'rgba(30,41,59,0.5)';
|
| 743 |
+
ctx.fillRect(sx + 1, sy + 1, sw - 2, 14);
|
| 744 |
+
ctx.fillStyle = '#475569';
|
| 745 |
+
ctx.font = '500 7px Inter, sans-serif';
|
| 746 |
+
ctx.textAlign = 'left';
|
| 747 |
+
ctx.fillText('terminal', sx + 6, sy + 10);
|
| 748 |
+
// dots
|
| 749 |
+
ctx.fillStyle = '#f87171'; ctx.beginPath(); ctx.arc(sx + sw - 28, sy + 7, 3, 0, Math.PI*2); ctx.fill();
|
| 750 |
+
ctx.fillStyle = '#fbbf24'; ctx.beginPath(); ctx.arc(sx + sw - 18, sy + 7, 3, 0, Math.PI*2); ctx.fill();
|
| 751 |
+
ctx.fillStyle = '#34d399'; ctx.beginPath(); ctx.arc(sx + sw - 8, sy + 7, 3, 0, Math.PI*2); ctx.fill();
|
| 752 |
+
|
| 753 |
+
ctx.font = '500 9px JetBrains Mono, monospace';
|
| 754 |
+
const startY = sy + 28;
|
| 755 |
+
for (let i = 0; i < terminalLines.length; i++) {
|
| 756 |
+
const line = terminalLines[i];
|
| 757 |
+
ctx.fillStyle = line.startsWith('$') ? '#34d399' : line.includes('***') || line.includes('OK') ? '#34d399' : '#94a3b8';
|
| 758 |
+
ctx.fillText(terminalLines[i].substring(0, 24), sx + 8, startY + i * 14);
|
| 759 |
+
}
|
| 760 |
+
if (frameCount % 60 < 30) {
|
| 761 |
+
ctx.fillStyle = '#34d399';
|
| 762 |
+
ctx.fillRect(sx + 8, startY + terminalLines.length * 14 - 8, 6, 11);
|
| 763 |
+
}
|
| 764 |
+
}
|
| 765 |
+
|
| 766 |
+
// Whiteboard popup
|
| 767 |
+
if (agent.station === 'whiteboard' && agent.working) {
|
| 768 |
+
const wp = stationPos('whiteboard');
|
| 769 |
+
const bx = wp.x - 60, by = wp.y - 75;
|
| 770 |
+
const bw = 120, bh = 72;
|
| 771 |
+
ctx.fillStyle = 'rgba(0,0,0,0.3)';
|
| 772 |
+
roundRect(ctx, bx + 3, by + 3, bw, bh, 6);
|
| 773 |
+
ctx.fill();
|
| 774 |
+
ctx.fillStyle = 'rgba(17,24,39,0.95)';
|
| 775 |
+
ctx.strokeStyle = 'rgba(167,139,250,0.3)';
|
| 776 |
+
ctx.lineWidth = 1;
|
| 777 |
+
roundRect(ctx, bx, by, bw, bh, 6);
|
| 778 |
+
ctx.fill(); ctx.stroke();
|
| 779 |
+
ctx.font = '600 8px JetBrains Mono, monospace';
|
| 780 |
+
ctx.textAlign = 'left';
|
| 781 |
+
ctx.fillStyle = '#a78bfa';
|
| 782 |
+
ctx.fillText('CONCLUSION', bx + 8, by + 14);
|
| 783 |
+
ctx.font = '400 7.5px JetBrains Mono, monospace';
|
| 784 |
+
const synthLines = ['SPP1 validated', 'AUROC = 0.91', 'Confidence: 0.85', 'Match: 4/5'];
|
| 785 |
+
for (let i = 0; i < synthLines.length; i++) {
|
| 786 |
+
ctx.fillStyle = i === 0 ? '#34d399' : '#94a3b8';
|
| 787 |
+
ctx.fillText(synthLines[i], bx + 8, by + 28 + i * 12);
|
| 788 |
+
}
|
| 789 |
+
}
|
| 790 |
+
|
| 791 |
+
// Activity text above active station
|
| 792 |
+
if (agent.working && activeStationKey && activeStationKey !== 'idle') {
|
| 793 |
+
const sp = stationPos(activeStationKey);
|
| 794 |
+
const actTexts = {
|
| 795 |
+
sample: 'collecting tissue...', cohort: 'selecting cohort...',
|
| 796 |
+
prep: 'preparing library...', sequencer: 'sequencing...',
|
| 797 |
+
computer: 'computing...', whiteboard: 'synthesizing...',
|
| 798 |
+
};
|
| 799 |
+
ctx.fillStyle = STATIONS[activeStationKey].color;
|
| 800 |
+
ctx.font = '500 9px JetBrains Mono, monospace';
|
| 801 |
+
ctx.textAlign = 'center';
|
| 802 |
+
ctx.globalAlpha = 0.5 + 0.3 * Math.sin(frameCount * 0.06);
|
| 803 |
+
const yOff = ['sample','prep'].includes(activeStationKey) ? -55 : -50;
|
| 804 |
+
ctx.fillText(actTexts[activeStationKey] || 'working...', sp.x, sp.y + yOff);
|
| 805 |
+
ctx.globalAlpha = 1;
|
| 806 |
+
}
|
| 807 |
+
|
| 808 |
+
// Move agent smoothly
|
| 809 |
+
const dx = agent.targetX - agent.x;
|
| 810 |
+
const dy = agent.targetY - agent.y;
|
| 811 |
+
const dist = Math.sqrt(dx * dx + dy * dy);
|
| 812 |
+
const isWalking = dist > 2;
|
| 813 |
+
if (isWalking) {
|
| 814 |
+
const speed = 0.05;
|
| 815 |
+
agent.x += dx * speed;
|
| 816 |
+
agent.y += dy * speed;
|
| 817 |
+
agent.facing = dx > 0 ? 1 : dx < -0.5 ? -1 : agent.facing;
|
| 818 |
+
}
|
| 819 |
+
|
| 820 |
+
// Draw person
|
| 821 |
+
drawPerson(agent.x, agent.y, isWalking, agent.working, agent.facing || 1);
|
| 822 |
+
|
| 823 |
+
// Particles
|
| 824 |
+
for (let i = particlesLab.length - 1; i >= 0; i--) {
|
| 825 |
+
const p = particlesLab[i];
|
| 826 |
+
p.x += p.vx; p.y += p.vy;
|
| 827 |
+
p.vx *= 0.95; p.vy *= 0.95;
|
| 828 |
+
p.life -= 0.02;
|
| 829 |
+
if (p.life <= 0) { particlesLab.splice(i, 1); continue; }
|
| 830 |
+
ctx.globalAlpha = p.life * 0.6;
|
| 831 |
+
ctx.fillStyle = p.color;
|
| 832 |
+
ctx.beginPath();
|
| 833 |
+
ctx.arc(p.x, p.y, p.size * p.life, 0, Math.PI * 2);
|
| 834 |
+
ctx.fill();
|
| 835 |
+
}
|
| 836 |
+
ctx.globalAlpha = 1;
|
| 837 |
+
|
| 838 |
+
// Station labels
|
| 839 |
+
for (const [key, s] of Object.entries(STATIONS)) {
|
| 840 |
+
if (key === 'idle') continue;
|
| 841 |
+
const pos = stationPos(key);
|
| 842 |
+
const isActive = key === activeStationKey;
|
| 843 |
+
ctx.fillStyle = isActive ? s.color : '#334155';
|
| 844 |
+
ctx.font = `600 ${isActive ? 9 : 8}px Inter, sans-serif`;
|
| 845 |
+
ctx.textAlign = 'center';
|
| 846 |
+
const ly = key === 'cohort' || key === 'sequencer' ? pos.y + 45 : pos.y + 42;
|
| 847 |
+
ctx.fillText(s.label, pos.x, ly);
|
| 848 |
+
}
|
| 849 |
+
|
| 850 |
+
requestAnimationFrame(drawLab);
|
| 851 |
+
}
|
| 852 |
+
|
| 853 |
+
// ---- Draw person (lab coat researcher) ----
|
| 854 |
+
function drawPerson(x, y, walking, working, facing) {
|
| 855 |
+
const f = facing;
|
| 856 |
+
const t = frameCount;
|
| 857 |
+
// Walking cycle
|
| 858 |
+
const walkCycle = walking ? Math.sin(t * 0.15) : 0;
|
| 859 |
+
const bobY = walking ? Math.abs(Math.sin(t * 0.15)) * 2 : 0;
|
| 860 |
+
// Working arm animation
|
| 861 |
+
const workArm = working ? Math.sin(t * 0.08) * 0.3 : 0;
|
| 862 |
+
|
| 863 |
+
const py = y - bobY; // feet position base
|
| 864 |
+
|
| 865 |
+
ctx.save();
|
| 866 |
+
ctx.translate(x, py);
|
| 867 |
+
|
| 868 |
+
// Shadow
|
| 869 |
+
ctx.fillStyle = 'rgba(0,0,0,0.25)';
|
| 870 |
+
ctx.beginPath();
|
| 871 |
+
ctx.ellipse(0, 12, 10, 4, 0, 0, Math.PI * 2);
|
| 872 |
+
ctx.fill();
|
| 873 |
+
|
| 874 |
+
// Legs
|
| 875 |
+
const legSpread = walking ? walkCycle * 5 : 0;
|
| 876 |
+
ctx.strokeStyle = '#1e3a5f';
|
| 877 |
+
ctx.lineWidth = 3;
|
| 878 |
+
ctx.lineCap = 'round';
|
| 879 |
+
// Left leg
|
| 880 |
+
ctx.beginPath();
|
| 881 |
+
ctx.moveTo(-3, 4);
|
| 882 |
+
ctx.lineTo(-3 + legSpread, 12);
|
| 883 |
+
ctx.stroke();
|
| 884 |
+
// Right leg
|
| 885 |
+
ctx.beginPath();
|
| 886 |
+
ctx.moveTo(3, 4);
|
| 887 |
+
ctx.lineTo(3 - legSpread, 12);
|
| 888 |
+
ctx.stroke();
|
| 889 |
+
// Shoes
|
| 890 |
+
ctx.fillStyle = '#1e293b';
|
| 891 |
+
ctx.beginPath(); ctx.arc(-3 + legSpread, 12, 2.5, 0, Math.PI * 2); ctx.fill();
|
| 892 |
+
ctx.beginPath(); ctx.arc(3 - legSpread, 12, 2.5, 0, Math.PI * 2); ctx.fill();
|
| 893 |
+
|
| 894 |
+
// Body / lab coat
|
| 895 |
+
ctx.fillStyle = '#e2e8f0'; // white lab coat
|
| 896 |
+
ctx.beginPath();
|
| 897 |
+
ctx.moveTo(-7, -4);
|
| 898 |
+
ctx.lineTo(-6, 6);
|
| 899 |
+
ctx.lineTo(6, 6);
|
| 900 |
+
ctx.lineTo(7, -4);
|
| 901 |
+
ctx.quadraticCurveTo(7, -10, 0, -10);
|
| 902 |
+
ctx.quadraticCurveTo(-7, -10, -7, -4);
|
| 903 |
+
ctx.fill();
|
| 904 |
+
// Coat outline
|
| 905 |
+
ctx.strokeStyle = '#94a3b8';
|
| 906 |
+
ctx.lineWidth = 0.5;
|
| 907 |
+
ctx.stroke();
|
| 908 |
+
// Coat split at bottom
|
| 909 |
+
ctx.beginPath();
|
| 910 |
+
ctx.moveTo(0, 1);
|
| 911 |
+
ctx.lineTo(0, 6);
|
| 912 |
+
ctx.strokeStyle = '#cbd5e1';
|
| 913 |
+
ctx.lineWidth = 0.5;
|
| 914 |
+
ctx.stroke();
|
| 915 |
+
// Pocket
|
| 916 |
+
ctx.strokeStyle = '#94a3b8';
|
| 917 |
+
ctx.lineWidth = 0.5;
|
| 918 |
+
ctx.strokeRect(f > 0 ? 1 : -5, -1, 4, 3);
|
| 919 |
+
|
| 920 |
+
// Arms
|
| 921 |
+
ctx.strokeStyle = '#e2e8f0';
|
| 922 |
+
ctx.lineWidth = 3.5;
|
| 923 |
+
ctx.lineCap = 'round';
|
| 924 |
+
// Back arm
|
| 925 |
+
const backArmSwing = walking ? -walkCycle * 4 : 0;
|
| 926 |
+
ctx.beginPath();
|
| 927 |
+
ctx.moveTo(-f * 6, -6);
|
| 928 |
+
ctx.lineTo(-f * 6 + backArmSwing, 2);
|
| 929 |
+
ctx.stroke();
|
| 930 |
+
// Front arm (active arm)
|
| 931 |
+
if (working) {
|
| 932 |
+
// Arm reaching forward/up for work
|
| 933 |
+
ctx.beginPath();
|
| 934 |
+
ctx.moveTo(f * 6, -6);
|
| 935 |
+
ctx.lineTo(f * 10 + workArm * 5, -8 + workArm * 3);
|
| 936 |
+
ctx.stroke();
|
| 937 |
+
// Hand/tool
|
| 938 |
+
ctx.fillStyle = '#fde68a';
|
| 939 |
+
ctx.beginPath();
|
| 940 |
+
ctx.arc(f * 10 + workArm * 5, -8 + workArm * 3, 2, 0, Math.PI * 2);
|
| 941 |
+
ctx.fill();
|
| 942 |
+
} else {
|
| 943 |
+
const frontArmSwing = walking ? walkCycle * 4 : 0;
|
| 944 |
+
ctx.beginPath();
|
| 945 |
+
ctx.moveTo(f * 6, -6);
|
| 946 |
+
ctx.lineTo(f * 6 + frontArmSwing, 2);
|
| 947 |
+
ctx.stroke();
|
| 948 |
+
}
|
| 949 |
+
// Skin for hands
|
| 950 |
+
ctx.fillStyle = '#fde68a';
|
| 951 |
+
ctx.beginPath(); ctx.arc(-f * 6 + backArmSwing, 2, 1.8, 0, Math.PI * 2); ctx.fill();
|
| 952 |
+
if (!working) {
|
| 953 |
+
const fs = walking ? walkCycle * 4 : 0;
|
| 954 |
+
ctx.beginPath(); ctx.arc(f * 6 + fs, 2, 1.8, 0, Math.PI * 2); ctx.fill();
|
| 955 |
+
}
|
| 956 |
+
|
| 957 |
+
// Head
|
| 958 |
+
ctx.fillStyle = '#fde68a'; // skin
|
| 959 |
+
ctx.beginPath();
|
| 960 |
+
ctx.arc(0, -15, 7, 0, Math.PI * 2);
|
| 961 |
+
ctx.fill();
|
| 962 |
+
// Hair
|
| 963 |
+
ctx.fillStyle = '#1e293b';
|
| 964 |
+
ctx.beginPath();
|
| 965 |
+
ctx.arc(0, -17, 7, Math.PI, 0);
|
| 966 |
+
ctx.fill();
|
| 967 |
+
// Face details
|
| 968 |
+
ctx.fillStyle = '#1e293b';
|
| 969 |
+
// Eyes
|
| 970 |
+
ctx.beginPath();
|
| 971 |
+
ctx.arc(f * 2.5, -15.5, 1, 0, Math.PI * 2);
|
| 972 |
+
ctx.fill();
|
| 973 |
+
ctx.beginPath();
|
| 974 |
+
ctx.arc(f * -1.5, -15.5, 1, 0, Math.PI * 2);
|
| 975 |
+
ctx.fill();
|
| 976 |
+
// Glasses
|
| 977 |
+
ctx.strokeStyle = '#475569';
|
| 978 |
+
ctx.lineWidth = 0.7;
|
| 979 |
+
ctx.beginPath();
|
| 980 |
+
ctx.arc(f * 2.5, -15.5, 2.5, 0, Math.PI * 2);
|
| 981 |
+
ctx.stroke();
|
| 982 |
+
ctx.beginPath();
|
| 983 |
+
ctx.arc(f * -1.5, -15.5, 2.5, 0, Math.PI * 2);
|
| 984 |
+
ctx.stroke();
|
| 985 |
+
ctx.beginPath();
|
| 986 |
+
ctx.moveTo(f * 0.5, -15.5);
|
| 987 |
+
ctx.lineTo(f * -0.5, -15.5);
|
| 988 |
+
ctx.stroke();
|
| 989 |
+
// Mouth
|
| 990 |
+
if (working) {
|
| 991 |
+
ctx.fillStyle = '#1e293b';
|
| 992 |
+
ctx.beginPath();
|
| 993 |
+
ctx.arc(f * 0.5, -12.5, 1, 0, Math.PI);
|
| 994 |
+
ctx.fill();
|
| 995 |
+
}
|
| 996 |
+
|
| 997 |
+
// ID Badge
|
| 998 |
+
ctx.fillStyle = '#38bdf8';
|
| 999 |
+
ctx.fillRect(f > 0 ? -6 : 2, -3, 4, 5);
|
| 1000 |
+
ctx.fillStyle = '#fff';
|
| 1001 |
+
ctx.font = 'bold 3px Inter, sans-serif';
|
| 1002 |
+
ctx.textAlign = 'center';
|
| 1003 |
+
ctx.fillText('AI', f > 0 ? -4 : 4, 0.5);
|
| 1004 |
+
|
| 1005 |
+
ctx.restore();
|
| 1006 |
+
}
|
| 1007 |
+
|
| 1008 |
+
// ---- Draw lab equipment ----
|
| 1009 |
+
function drawEquipment(stationKey, cx, cy, color, active) {
|
| 1010 |
+
ctx.save();
|
| 1011 |
+
|
| 1012 |
+
switch (stationKey) {
|
| 1013 |
+
case 'idle':
|
| 1014 |
+
// Door frame
|
| 1015 |
+
ctx.strokeStyle = '#334155';
|
| 1016 |
+
ctx.lineWidth = 2;
|
| 1017 |
+
ctx.strokeRect(cx - 12, cy - 30, 24, 40);
|
| 1018 |
+
ctx.fillStyle = '#1a2332';
|
| 1019 |
+
ctx.fillRect(cx - 10, cy - 28, 20, 36);
|
| 1020 |
+
ctx.fillStyle = '#475569';
|
| 1021 |
+
ctx.beginPath(); ctx.arc(cx + 6, cy - 10, 2, 0, Math.PI * 2); ctx.fill();
|
| 1022 |
+
break;
|
| 1023 |
+
|
| 1024 |
+
case 'sample':
|
| 1025 |
+
// Lab bench with sample tubes
|
| 1026 |
+
// Bench surface
|
| 1027 |
+
ctx.fillStyle = '#1a2332';
|
| 1028 |
+
ctx.fillRect(cx - 30, cy - 8, 60, 6);
|
| 1029 |
+
// Bench legs
|
| 1030 |
+
ctx.fillStyle = '#253040';
|
| 1031 |
+
ctx.fillRect(cx - 28, cy - 2, 4, 20);
|
| 1032 |
+
ctx.fillRect(cx + 24, cy - 2, 4, 20);
|
| 1033 |
+
// Tube rack
|
| 1034 |
+
ctx.fillStyle = '#253040';
|
| 1035 |
+
ctx.fillRect(cx - 18, cy - 18, 36, 10);
|
| 1036 |
+
// Test tubes
|
| 1037 |
+
const tubeColors = ['#34d399', '#22d3ee', '#fbbf24', '#f472b6', '#34d399', '#22d3ee'];
|
| 1038 |
+
for (let i = 0; i < 6; i++) {
|
| 1039 |
+
const tx = cx - 14 + i * 6;
|
| 1040 |
+
ctx.fillStyle = active ? tubeColors[i] : '#334155';
|
| 1041 |
+
ctx.globalAlpha = active ? 0.7 : 0.4;
|
| 1042 |
+
ctx.fillRect(tx, cy - 28, 4, 12);
|
| 1043 |
+
// Tube caps
|
| 1044 |
+
ctx.globalAlpha = 1;
|
| 1045 |
+
ctx.fillStyle = active ? tubeColors[i] : '#475569';
|
| 1046 |
+
ctx.fillRect(tx - 0.5, cy - 29, 5, 2);
|
| 1047 |
+
}
|
| 1048 |
+
ctx.globalAlpha = 1;
|
| 1049 |
+
// Pipette if active
|
| 1050 |
+
if (active) {
|
| 1051 |
+
const pipY = cy - 32 + Math.sin(frameCount * 0.08) * 4;
|
| 1052 |
+
ctx.strokeStyle = '#94a3b8';
|
| 1053 |
+
ctx.lineWidth = 2;
|
| 1054 |
+
ctx.beginPath();
|
| 1055 |
+
ctx.moveTo(cx + 5, pipY);
|
| 1056 |
+
ctx.lineTo(cx + 5, pipY - 14);
|
| 1057 |
+
ctx.stroke();
|
| 1058 |
+
ctx.fillStyle = '#64748b';
|
| 1059 |
+
ctx.fillRect(cx + 3, pipY - 18, 5, 6);
|
| 1060 |
+
// Droplet
|
| 1061 |
+
if (frameCount % 60 < 20) {
|
| 1062 |
+
ctx.fillStyle = '#34d399';
|
| 1063 |
+
ctx.globalAlpha = 0.6;
|
| 1064 |
+
ctx.beginPath();
|
| 1065 |
+
ctx.arc(cx + 5, pipY + 3, 1.5, 0, Math.PI * 2);
|
| 1066 |
+
ctx.fill();
|
| 1067 |
+
ctx.globalAlpha = 1;
|
| 1068 |
+
}
|
| 1069 |
+
}
|
| 1070 |
+
break;
|
| 1071 |
+
|
| 1072 |
+
case 'cohort':
|
| 1073 |
+
// Filing cabinet / patient records
|
| 1074 |
+
ctx.fillStyle = '#1a2332';
|
| 1075 |
+
ctx.fillRect(cx - 20, cy - 22, 40, 40);
|
| 1076 |
+
ctx.strokeStyle = '#253040';
|
| 1077 |
+
ctx.lineWidth = 1;
|
| 1078 |
+
for (let i = 0; i < 3; i++) {
|
| 1079 |
+
const dy = cy - 18 + i * 13;
|
| 1080 |
+
ctx.strokeRect(cx - 18, dy, 36, 11);
|
| 1081 |
+
ctx.fillStyle = active ? '#475569' : '#253040';
|
| 1082 |
+
ctx.fillRect(cx - 4, dy + 4, 8, 3);
|
| 1083 |
+
}
|
| 1084 |
+
// Clipboard
|
| 1085 |
+
ctx.fillStyle = '#253040';
|
| 1086 |
+
ctx.fillRect(cx + 24, cy - 16, 14, 20);
|
| 1087 |
+
ctx.strokeStyle = '#475569';
|
| 1088 |
+
ctx.lineWidth = 0.5;
|
| 1089 |
+
for (let i = 0; i < 4; i++) {
|
| 1090 |
+
ctx.beginPath();
|
| 1091 |
+
ctx.moveTo(cx + 27, cy - 12 + i * 4);
|
| 1092 |
+
ctx.lineTo(cx + 35, cy - 12 + i * 4);
|
| 1093 |
+
ctx.stroke();
|
| 1094 |
+
}
|
| 1095 |
+
if (active) {
|
| 1096 |
+
ctx.fillStyle = color;
|
| 1097 |
+
ctx.globalAlpha = 0.5;
|
| 1098 |
+
ctx.beginPath(); ctx.arc(cx + 31, cy - 14, 2, 0, Math.PI * 2); ctx.fill();
|
| 1099 |
+
ctx.globalAlpha = 1;
|
| 1100 |
+
}
|
| 1101 |
+
break;
|
| 1102 |
+
|
| 1103 |
+
case 'prep':
|
| 1104 |
+
// Library prep station - PCR machine + bench
|
| 1105 |
+
// Bench
|
| 1106 |
+
ctx.fillStyle = '#1a2332';
|
| 1107 |
+
ctx.fillRect(cx - 28, cy - 6, 56, 6);
|
| 1108 |
+
ctx.fillStyle = '#253040';
|
| 1109 |
+
ctx.fillRect(cx - 26, cy, 4, 18);
|
| 1110 |
+
ctx.fillRect(cx + 22, cy, 4, 18);
|
| 1111 |
+
// PCR/thermocycler machine
|
| 1112 |
+
ctx.fillStyle = active ? '#192535' : '#172030';
|
| 1113 |
+
ctx.strokeStyle = active ? color : '#253040';
|
| 1114 |
+
ctx.lineWidth = 1;
|
| 1115 |
+
roundRect(ctx, cx - 18, cy - 26, 36, 20, 3);
|
| 1116 |
+
ctx.fill(); ctx.stroke();
|
| 1117 |
+
// Display on machine
|
| 1118 |
+
ctx.fillStyle = active ? 'rgba(45,212,191,0.15)' : 'rgba(30,41,59,0.3)';
|
| 1119 |
+
ctx.fillRect(cx - 14, cy - 22, 16, 8);
|
| 1120 |
+
if (active) {
|
| 1121 |
+
ctx.fillStyle = color;
|
| 1122 |
+
ctx.font = '500 6px JetBrains Mono, monospace';
|
| 1123 |
+
ctx.textAlign = 'left';
|
| 1124 |
+
ctx.fillText('72.0°C', cx - 12, cy - 16);
|
| 1125 |
+
// LED
|
| 1126 |
+
ctx.fillStyle = color;
|
| 1127 |
+
ctx.beginPath(); ctx.arc(cx + 12, cy - 18, 2, 0, Math.PI * 2); ctx.fill();
|
| 1128 |
+
}
|
| 1129 |
+
// Microplate
|
| 1130 |
+
ctx.fillStyle = '#1e293b';
|
| 1131 |
+
ctx.fillRect(cx - 20, cy - 3, 18, 12);
|
| 1132 |
+
ctx.strokeStyle = '#334155';
|
| 1133 |
+
ctx.lineWidth = 0.3;
|
| 1134 |
+
for (let r = 0; r < 3; r++) {
|
| 1135 |
+
for (let c = 0; c < 4; c++) {
|
| 1136 |
+
ctx.beginPath();
|
| 1137 |
+
ctx.arc(cx - 17 + c * 4.5, cy + 1 + r * 3.5, 1.2, 0, Math.PI * 2);
|
| 1138 |
+
ctx.stroke();
|
| 1139 |
+
}
|
| 1140 |
+
}
|
| 1141 |
+
break;
|
| 1142 |
+
|
| 1143 |
+
case 'sequencer':
|
| 1144 |
+
// Big sequencing machine (NovaSeq-like)
|
| 1145 |
+
// Machine body
|
| 1146 |
+
ctx.fillStyle = '#172030';
|
| 1147 |
+
ctx.strokeStyle = active ? color : '#253040';
|
| 1148 |
+
ctx.lineWidth = active ? 1.5 : 1;
|
| 1149 |
+
roundRect(ctx, cx - 24, cy - 28, 48, 44, 4);
|
| 1150 |
+
ctx.fill(); ctx.stroke();
|
| 1151 |
+
// Front panel / screen
|
| 1152 |
+
ctx.fillStyle = active ? 'rgba(34,211,238,0.1)' : 'rgba(30,41,59,0.3)';
|
| 1153 |
+
roundRect(ctx, cx - 18, cy - 22, 36, 18, 2);
|
| 1154 |
+
ctx.fill();
|
| 1155 |
+
if (active) {
|
| 1156 |
+
// Progress bar on screen
|
| 1157 |
+
ctx.fillStyle = 'rgba(34,211,238,0.2)';
|
| 1158 |
+
ctx.fillRect(cx - 14, cy - 12, 28, 4);
|
| 1159 |
+
const progress = (frameCount % 120) / 120;
|
| 1160 |
+
ctx.fillStyle = color;
|
| 1161 |
+
ctx.fillRect(cx - 14, cy - 12, 28 * progress, 4);
|
| 1162 |
+
ctx.fillStyle = color;
|
| 1163 |
+
ctx.font = '500 6px JetBrains Mono, monospace';
|
| 1164 |
+
ctx.textAlign = 'center';
|
| 1165 |
+
ctx.fillText('SEQUENCING', cx, cy - 16);
|
| 1166 |
+
}
|
| 1167 |
+
// Slot
|
| 1168 |
+
ctx.fillStyle = '#0f1520';
|
| 1169 |
+
ctx.fillRect(cx - 10, cy, 20, 4);
|
| 1170 |
+
// Status LEDs
|
| 1171 |
+
ctx.fillStyle = active ? '#34d399' : '#334155';
|
| 1172 |
+
ctx.beginPath(); ctx.arc(cx - 14, cy + 10, 2, 0, Math.PI * 2); ctx.fill();
|
| 1173 |
+
if (active && frameCount % 30 < 15) {
|
| 1174 |
+
ctx.fillStyle = '#fbbf24';
|
| 1175 |
+
} else {
|
| 1176 |
+
ctx.fillStyle = '#334155';
|
| 1177 |
+
}
|
| 1178 |
+
ctx.beginPath(); ctx.arc(cx - 8, cy + 10, 2, 0, Math.PI * 2); ctx.fill();
|
| 1179 |
+
break;
|
| 1180 |
+
|
| 1181 |
+
case 'computer':
|
| 1182 |
+
// Computer desk with dual monitors
|
| 1183 |
+
// Desk
|
| 1184 |
+
ctx.fillStyle = '#1a2332';
|
| 1185 |
+
ctx.fillRect(cx - 36, cy + 2, 72, 5);
|
| 1186 |
+
ctx.fillStyle = '#253040';
|
| 1187 |
+
ctx.fillRect(cx - 32, cy + 7, 4, 16);
|
| 1188 |
+
ctx.fillRect(cx + 28, cy + 7, 4, 16);
|
| 1189 |
+
// Chair
|
| 1190 |
+
ctx.fillStyle = '#1e293b';
|
| 1191 |
+
ctx.beginPath();
|
| 1192 |
+
ctx.arc(cx, cy + 28, 8, 0, Math.PI * 2);
|
| 1193 |
+
ctx.fill();
|
| 1194 |
+
ctx.fillStyle = '#253040';
|
| 1195 |
+
ctx.fillRect(cx - 1, cy + 20, 2, 8);
|
| 1196 |
+
// Monitor 1 (main)
|
| 1197 |
+
ctx.fillStyle = active ? '#0c1219' : '#131c28';
|
| 1198 |
+
ctx.strokeStyle = active ? 'rgba(56,189,248,0.4)' : '#253040';
|
| 1199 |
+
ctx.lineWidth = 1;
|
| 1200 |
+
roundRect(ctx, cx - 30, cy - 28, 32, 24, 2);
|
| 1201 |
+
ctx.fill(); ctx.stroke();
|
| 1202 |
+
// Monitor stand
|
| 1203 |
+
ctx.fillStyle = '#334155';
|
| 1204 |
+
ctx.fillRect(cx - 16, cy - 4, 4, 6);
|
| 1205 |
+
ctx.fillRect(cx - 20, cy + 1, 12, 2);
|
| 1206 |
+
// Monitor 2
|
| 1207 |
+
ctx.fillStyle = active ? '#0c1219' : '#131c28';
|
| 1208 |
+
ctx.strokeStyle = active ? 'rgba(56,189,248,0.3)' : '#253040';
|
| 1209 |
+
roundRect(ctx, cx + 2, cy - 24, 26, 20, 2);
|
| 1210 |
+
ctx.fill(); ctx.stroke();
|
| 1211 |
+
ctx.fillStyle = '#334155';
|
| 1212 |
+
ctx.fillRect(cx + 13, cy - 4, 4, 6);
|
| 1213 |
+
ctx.fillRect(cx + 9, cy + 1, 12, 2);
|
| 1214 |
+
// Screen content
|
| 1215 |
+
if (active) {
|
| 1216 |
+
ctx.fillStyle = 'rgba(56,189,248,0.08)';
|
| 1217 |
+
ctx.fillRect(cx - 28, cy - 26, 28, 20);
|
| 1218 |
+
// Code lines
|
| 1219 |
+
for (let i = 0; i < 5; i++) {
|
| 1220 |
+
ctx.fillStyle = `rgba(56,189,248,${0.15 + i * 0.06})`;
|
| 1221 |
+
const w = 8 + Math.sin(i * 2.3 + frameCount * 0.02) * 6;
|
| 1222 |
+
ctx.fillRect(cx - 26, cy - 24 + i * 4, w, 2);
|
| 1223 |
+
}
|
| 1224 |
+
// Second screen - graph
|
| 1225 |
+
ctx.fillStyle = 'rgba(56,189,248,0.06)';
|
| 1226 |
+
ctx.fillRect(cx + 4, cy - 22, 22, 16);
|
| 1227 |
+
ctx.strokeStyle = 'rgba(34,211,238,0.3)';
|
| 1228 |
+
ctx.lineWidth = 1;
|
| 1229 |
+
ctx.beginPath();
|
| 1230 |
+
ctx.moveTo(cx + 6, cy - 8);
|
| 1231 |
+
for (let i = 0; i < 8; i++) {
|
| 1232 |
+
ctx.lineTo(cx + 6 + i * 2.5, cy - 10 - Math.sin(i * 0.8 + frameCount * 0.03) * 5);
|
| 1233 |
+
}
|
| 1234 |
+
ctx.stroke();
|
| 1235 |
+
}
|
| 1236 |
+
// Keyboard
|
| 1237 |
+
ctx.fillStyle = '#1e293b';
|
| 1238 |
+
ctx.fillRect(cx - 14, cy + 4, 28, 6);
|
| 1239 |
+
// Typing effect
|
| 1240 |
+
if (active && agent.working) {
|
| 1241 |
+
const keyX = cx - 12 + (frameCount % 20) * 1.2;
|
| 1242 |
+
ctx.fillStyle = 'rgba(56,189,248,0.4)';
|
| 1243 |
+
ctx.fillRect(keyX, cy + 5, 3, 4);
|
| 1244 |
+
}
|
| 1245 |
+
break;
|
| 1246 |
+
|
| 1247 |
+
case 'whiteboard':
|
| 1248 |
+
// Whiteboard on wall + standing desk
|
| 1249 |
+
// Board on wall
|
| 1250 |
+
ctx.fillStyle = '#1e293b';
|
| 1251 |
+
ctx.strokeStyle = '#334155';
|
| 1252 |
+
ctx.lineWidth = 1;
|
| 1253 |
+
ctx.fillRect(cx - 28, cy - 34, 56, 32);
|
| 1254 |
+
ctx.strokeRect(cx - 28, cy - 34, 56, 32);
|
| 1255 |
+
// Board content
|
| 1256 |
+
if (active) {
|
| 1257 |
+
ctx.fillStyle = 'rgba(167,139,250,0.1)';
|
| 1258 |
+
ctx.fillRect(cx - 26, cy - 32, 52, 28);
|
| 1259 |
+
// Diagram elements
|
| 1260 |
+
ctx.strokeStyle = 'rgba(167,139,250,0.4)';
|
| 1261 |
+
ctx.lineWidth = 0.8;
|
| 1262 |
+
// Boxes
|
| 1263 |
+
ctx.strokeRect(cx - 20, cy - 28, 14, 8);
|
| 1264 |
+
ctx.strokeRect(cx + 6, cy - 28, 14, 8);
|
| 1265 |
+
ctx.strokeRect(cx - 8, cy - 16, 16, 8);
|
| 1266 |
+
// Arrows
|
| 1267 |
+
ctx.beginPath();
|
| 1268 |
+
ctx.moveTo(cx - 6, cy - 24); ctx.lineTo(cx + 6, cy - 24); ctx.stroke();
|
| 1269 |
+
ctx.beginPath();
|
| 1270 |
+
ctx.moveTo(cx, cy - 20); ctx.lineTo(cx, cy - 16); ctx.stroke();
|
| 1271 |
+
// Checkmark
|
| 1272 |
+
ctx.strokeStyle = '#34d399';
|
| 1273 |
+
ctx.lineWidth = 1.5;
|
| 1274 |
+
ctx.beginPath();
|
| 1275 |
+
ctx.moveTo(cx - 4, cy - 12);
|
| 1276 |
+
ctx.lineTo(cx - 1, cy - 9);
|
| 1277 |
+
ctx.lineTo(cx + 5, cy - 15);
|
| 1278 |
+
ctx.stroke();
|
| 1279 |
+
} else {
|
| 1280 |
+
// Faint lines
|
| 1281 |
+
ctx.strokeStyle = '#253040';
|
| 1282 |
+
ctx.lineWidth = 0.5;
|
| 1283 |
+
for (let i = 0; i < 4; i++) {
|
| 1284 |
+
ctx.beginPath();
|
| 1285 |
+
ctx.moveTo(cx - 22, cy - 28 + i * 7);
|
| 1286 |
+
ctx.lineTo(cx + 22, cy - 28 + i * 7);
|
| 1287 |
+
ctx.stroke();
|
| 1288 |
+
}
|
| 1289 |
+
}
|
| 1290 |
+
// Standing desk
|
| 1291 |
+
ctx.fillStyle = '#1a2332';
|
| 1292 |
+
ctx.fillRect(cx - 16, cy + 2, 32, 4);
|
| 1293 |
+
ctx.fillStyle = '#253040';
|
| 1294 |
+
ctx.fillRect(cx - 2, cy + 6, 4, 14);
|
| 1295 |
+
break;
|
| 1296 |
+
}
|
| 1297 |
+
|
| 1298 |
+
ctx.restore();
|
| 1299 |
+
}
|
| 1300 |
+
|
| 1301 |
+
function roundRect(ctx, x, y, w, h, r) {
|
| 1302 |
+
ctx.beginPath();
|
| 1303 |
+
ctx.moveTo(x + r, y);
|
| 1304 |
+
ctx.lineTo(x + w - r, y);
|
| 1305 |
+
ctx.quadraticCurveTo(x + w, y, x + w, y + r);
|
| 1306 |
+
ctx.lineTo(x + w, y + h - r);
|
| 1307 |
+
ctx.quadraticCurveTo(x + w, y + h, x + w - r, y + h);
|
| 1308 |
+
ctx.lineTo(x + r, y + h);
|
| 1309 |
+
ctx.quadraticCurveTo(x, y + h, x, y + h - r);
|
| 1310 |
+
ctx.lineTo(x, y + r);
|
| 1311 |
+
ctx.quadraticCurveTo(x, y, x + r, y);
|
| 1312 |
+
ctx.closePath();
|
| 1313 |
+
}
|
| 1314 |
+
|
| 1315 |
+
drawLab();
|
| 1316 |
+
|
| 1317 |
+
// =====================================================
|
| 1318 |
+
// EPISODE DATA + APP LOGIC
|
| 1319 |
+
// =====================================================
|
| 1320 |
+
const EPISODE = [
|
| 1321 |
+
{
|
| 1322 |
+
action: 'collect_sample', params: 'n_samples=8, tissue="lung"', category: 'wet',
|
| 1323 |
+
budget: 92400, budgetPct: 92.4, time: 165, timePct: 91.7,
|
| 1324 |
+
output: ['Collected 8 lung tissue samples (4 IPF, 4 control)','Tissue quality: excellent | Storage: -80C'],
|
| 1325 |
+
reward: { validity: 0.90, ordering: 1.00, info_gain: 0.10, efficiency: 0.72, novelty: 1.00, penalty: 0.0 },
|
| 1326 |
+
total: 0.45,
|
| 1327 |
+
},
|
| 1328 |
+
{
|
| 1329 |
+
action: 'select_cohort', params: 'criteria="age_matched, sex_balanced"', category: 'wet',
|
| 1330 |
+
budget: 91800, budgetPct: 91.8, time: 162, timePct: 90.0,
|
| 1331 |
+
output: ['Cohort selected: 4 IPF patients (2M/2F, age 58-67)','Controls matched: 4 healthy donors (2M/2F, age 55-65)'],
|
| 1332 |
+
reward: { validity: 0.85, ordering: 0.90, info_gain: 0.15, efficiency: 0.80, novelty: 0.90, penalty: 0.0 },
|
| 1333 |
+
total: 0.38,
|
| 1334 |
+
},
|
| 1335 |
+
{
|
| 1336 |
+
action: 'prepare_library', params: 'protocol="10x_chromium_v3"', category: 'wet',
|
| 1337 |
+
budget: 84200, budgetPct: 84.2, time: 155, timePct: 86.1,
|
| 1338 |
+
output: ['Library prep complete using 10x Chromium v3','Estimated cell capture: ~12,000 cells','cDNA yield: 42ng (good)'],
|
| 1339 |
+
reward: { validity: 0.95, ordering: 1.00, info_gain: 0.20, efficiency: 0.70, novelty: 0.95, penalty: 0.0 },
|
| 1340 |
+
total: 0.52,
|
| 1341 |
+
},
|
| 1342 |
+
{
|
| 1343 |
+
action: 'sequence_cells', params: 'depth="standard", platform="NovaSeq"', category: 'wet',
|
| 1344 |
+
budget: 68500, budgetPct: 68.5, time: 142, timePct: 78.9,
|
| 1345 |
+
output: ['11,847 cells sequenced | 22,438 genes detected','Median reads/cell: 45,200 | Median genes/cell: 3,842','Sequencing saturation: 78.3%'],
|
| 1346 |
+
reward: { validity: 0.95, ordering: 1.00, info_gain: 0.55, efficiency: 0.60, novelty: 0.90, penalty: 0.0 },
|
| 1347 |
+
total: 0.68,
|
| 1348 |
+
},
|
| 1349 |
+
{
|
| 1350 |
+
action: 'run_qc', params: 'tool="scanpy", min_genes=200', category: 'comp',
|
| 1351 |
+
budget: 68100, budgetPct: 68.1, time: 141, timePct: 78.3,
|
| 1352 |
+
output: ['QC complete: 10,234 / 11,847 cells passed (86.4%)','Removed: 382 doublets (3.2%), 1,231 low-quality cells','Mitochondrial threshold: 20% (flagged 847 cells)'],
|
| 1353 |
+
reward: { validity: 0.95, ordering: 1.00, info_gain: 0.35, efficiency: 0.85, novelty: 0.80, penalty: 0.0 },
|
| 1354 |
+
total: 0.55,
|
| 1355 |
+
},
|
| 1356 |
+
{
|
| 1357 |
+
action: 'normalize_data', params: 'method="scran", log_transform=true', category: 'comp',
|
| 1358 |
+
budget: 67900, budgetPct: 67.9, time: 140, timePct: 77.8,
|
| 1359 |
+
output: ['Size-factor normalization (scran) applied','Log1p transform complete | HVG selection: 3,000 genes'],
|
| 1360 |
+
reward: { validity: 0.90, ordering: 1.00, info_gain: 0.25, efficiency: 0.90, novelty: 0.70, penalty: 0.0 },
|
| 1361 |
+
total: 0.42,
|
| 1362 |
+
},
|
| 1363 |
+
{
|
| 1364 |
+
action: 'cluster_cells', params: 'algorithm="leiden", resolution=0.8', category: 'comp',
|
| 1365 |
+
budget: 67500, budgetPct: 67.5, time: 139, timePct: 77.2,
|
| 1366 |
+
output: ['Leiden clustering: 14 clusters identified','AT1 (8.2%), AT2 (12.1%), Fibroblast (15.7%), Macrophage (18.3%)','Endothelial (9.4%), Basal (6.1%), Ciliated (5.8%), NK/T (7.2%)','Smooth Muscle (4.1%), Mast (2.9%), B cell (3.4%), pDC (2.0%)','Mesothelial (2.6%), Aberrant Basaloid (2.2%)'],
|
| 1367 |
+
reward: { validity: 0.95, ordering: 1.00, info_gain: 0.65, efficiency: 0.85, novelty: 0.85, penalty: 0.0 },
|
| 1368 |
+
total: 0.72,
|
| 1369 |
+
discovery: { title: '14 cell populations identified', detail: 'Including Aberrant Basaloid cells (IPF-associated)', color: 'var(--cyan)', bg: 'var(--cyan-dim)' },
|
| 1370 |
+
},
|
| 1371 |
+
{
|
| 1372 |
+
action: 'differential_expression', params: 'method="DESeq2", contrast="IPF_vs_Ctrl"', category: 'comp',
|
| 1373 |
+
budget: 67000, budgetPct: 67.0, time: 137, timePct: 76.1,
|
| 1374 |
+
output: ['1,847 DE genes (|log2FC| > 1, padj < 0.05)','Top upregulated in IPF:',' SPP1 log2FC=3.42 padj=1.2e-18',' MMP7 log2FC=2.89 padj=3.4e-15',' COL1A1 log2FC=2.67 padj=8.7e-14',' TGFB1 log2FC=1.95 padj=2.1e-09','Top downregulated: AGER (-3.1), SFTPC (-2.8), HOPX (-2.3)'],
|
| 1375 |
+
reward: { validity: 0.95, ordering: 1.00, info_gain: 0.78, efficiency: 0.80, novelty: 0.88, penalty: 0.0 },
|
| 1376 |
+
total: 0.82,
|
| 1377 |
+
discovery: { title: 'SPP1 strongly upregulated in IPF', detail: 'log2FC=3.42, padj=1.2e-18', color: 'var(--pink)', bg: 'rgba(244,114,182,0.10)' },
|
| 1378 |
+
},
|
| 1379 |
+
{
|
| 1380 |
+
action: 'pathway_enrichment', params: 'tool="gseapy", gene_sets="KEGG,Reactome"', category: 'comp',
|
| 1381 |
+
budget: 66600, budgetPct: 66.6, time: 136, timePct: 75.6,
|
| 1382 |
+
output: ['Top enriched pathways (IPF vs Control):',' ECM-receptor interaction padj=4.2e-12',' TGF-beta signaling padj=1.8e-09',' PI3K-Akt signaling padj=3.1e-07',' Focal adhesion padj=8.9e-07','SPP1 participates in 3/4 top pathways'],
|
| 1383 |
+
reward: { validity: 0.90, ordering: 1.00, info_gain: 0.60, efficiency: 0.85, novelty: 0.75, penalty: 0.0 },
|
| 1384 |
+
total: 0.58,
|
| 1385 |
+
discovery: { title: 'SPP1 in ECM/TGF-beta/PI3K pathways', detail: 'Core fibrosis signaling axis confirmed', color: 'var(--purple)', bg: 'rgba(167,139,250,0.10)' },
|
| 1386 |
+
},
|
| 1387 |
+
{
|
| 1388 |
+
action: 'marker_selection', params: 'candidates=["SPP1","MMP7","COL1A1"]', category: 'comp',
|
| 1389 |
+
budget: 66200, budgetPct: 66.2, time: 135, timePct: 75.0,
|
| 1390 |
+
output: ['Marker ranking by discriminative power:',' 1. SPP1 - AUROC: 0.94, specificity: 0.89',' 2. MMP7 - AUROC: 0.87, specificity: 0.82',' 3. COL1A1 - AUROC: 0.81, specificity: 0.76','SPP1 selected as primary biomarker candidate'],
|
| 1391 |
+
reward: { validity: 0.90, ordering: 1.00, info_gain: 0.50, efficiency: 0.88, novelty: 0.70, penalty: 0.0 },
|
| 1392 |
+
total: 0.55,
|
| 1393 |
+
},
|
| 1394 |
+
{
|
| 1395 |
+
action: 'validate_marker', params: 'gene="SPP1", method="cross_validation"', category: 'comp',
|
| 1396 |
+
budget: 65200, budgetPct: 65.2, time: 130, timePct: 72.2,
|
| 1397 |
+
output: ['SPP1 Biomarker Validation Report:',' 5-fold CV AUROC: 0.91 (+/- 0.03)',' Sensitivity: 0.88',' Specificity: 0.87',' Positive LR: 6.77',' Expression in Aberrant Basaloid: 94.2% of cells',' Status: VALIDATED as IPF biomarker'],
|
| 1398 |
+
reward: { validity: 0.95, ordering: 1.00, info_gain: 0.72, efficiency: 0.82, novelty: 0.85, penalty: 0.0 },
|
| 1399 |
+
total: 0.76,
|
| 1400 |
+
discovery: { title: 'SPP1 validated as IPF biomarker', detail: 'AUROC=0.91, specificity=0.87', color: 'var(--green)', bg: 'var(--green-dim)' },
|
| 1401 |
+
},
|
| 1402 |
+
{
|
| 1403 |
+
action: 'synthesize_conclusion', params: 'confidence=0.85', category: 'meta',
|
| 1404 |
+
budget: 65000, budgetPct: 65.0, time: 129, timePct: 71.7,
|
| 1405 |
+
output: ['CONCLUSION (confidence: 0.85):','','SPP1 is a validated biomarker for IPF with strong','discriminative power (AUROC=0.91). It is upregulated','3.42-fold in IPF lungs, concentrated in Aberrant Basaloid','cells (94.2%), and participates in ECM-receptor, TGF-beta,','and PI3K-Akt signaling pathways.','','Literature match: 4/5 expected findings confirmed','Calibration: Well-calibrated (no overconfidence penalty)'],
|
| 1406 |
+
reward: { validity: 1.00, ordering: 1.00, info_gain: 0.40, efficiency: 0.90, novelty: 0.50, penalty: 0.0 },
|
| 1407 |
+
total: 0.91, terminal: true,
|
| 1408 |
+
},
|
| 1409 |
+
];
|
| 1410 |
+
|
| 1411 |
+
// State
|
| 1412 |
+
let running = false;
|
| 1413 |
+
let cumReward = 0;
|
| 1414 |
+
|
| 1415 |
+
// DOM refs
|
| 1416 |
+
const terminalEl = document.getElementById('terminal');
|
| 1417 |
+
const statusDot = document.getElementById('statusDot');
|
| 1418 |
+
const statusText = document.getElementById('statusText');
|
| 1419 |
+
const runBtn = document.getElementById('runBtn');
|
| 1420 |
+
const labActionLabel = document.getElementById('labActionLabel');
|
| 1421 |
+
|
| 1422 |
+
// Helpers
|
| 1423 |
+
function addLine(html) {
|
| 1424 |
+
const div = document.createElement('div');
|
| 1425 |
+
div.className = 't-line';
|
| 1426 |
+
div.innerHTML = html || ' ';
|
| 1427 |
+
terminalEl.appendChild(div);
|
| 1428 |
+
terminalEl.scrollTop = terminalEl.scrollHeight;
|
| 1429 |
+
}
|
| 1430 |
+
|
| 1431 |
+
function setGauge(id, value, pct, color) {
|
| 1432 |
+
document.getElementById(id + 'Val').textContent = value;
|
| 1433 |
+
const fill = document.getElementById(id + 'Fill');
|
| 1434 |
+
fill.style.width = pct + '%';
|
| 1435 |
+
if (color) fill.style.background = color;
|
| 1436 |
+
}
|
| 1437 |
+
|
| 1438 |
+
function setRewardBars(r) {
|
| 1439 |
+
for (const key of ['validity','ordering','info_gain','efficiency','novelty','penalty']) {
|
| 1440 |
+
const el = document.getElementById('rw-' + key);
|
| 1441 |
+
el.style.width = (r[key] * 100) + '%';
|
| 1442 |
+
el.textContent = r[key] > 0.01 ? r[key].toFixed(2) : '';
|
| 1443 |
+
}
|
| 1444 |
+
}
|
| 1445 |
+
|
| 1446 |
+
function clearRewardBars() {
|
| 1447 |
+
for (const key of ['validity','ordering','info_gain','efficiency','novelty','penalty']) {
|
| 1448 |
+
const el = document.getElementById('rw-' + key);
|
| 1449 |
+
el.style.width = '0%';
|
| 1450 |
+
el.textContent = '';
|
| 1451 |
+
}
|
| 1452 |
+
}
|
| 1453 |
+
|
| 1454 |
+
function addPipeStep(step, index) {
|
| 1455 |
+
const el = document.createElement('div');
|
| 1456 |
+
el.className = 'pipe-step';
|
| 1457 |
+
el.id = 'pipe-' + index;
|
| 1458 |
+
const catColor = step.category === 'wet' ? 'var(--green)' : step.category === 'comp' ? 'var(--accent)' : 'var(--pink)';
|
| 1459 |
+
el.innerHTML = `<div class="step-icon" style="color:${catColor};border-color:${catColor};">${index + 1}</div><span>${step.action}</span>`;
|
| 1460 |
+
document.getElementById('pipelineSteps').appendChild(el);
|
| 1461 |
+
requestAnimationFrame(() => el.classList.add('visible'));
|
| 1462 |
+
return el;
|
| 1463 |
+
}
|
| 1464 |
+
|
| 1465 |
+
function addDiscovery(d) {
|
| 1466 |
+
const c = document.getElementById('discoveries');
|
| 1467 |
+
if (c.querySelector('.empty-state')) c.innerHTML = '';
|
| 1468 |
+
const el = document.createElement('div');
|
| 1469 |
+
el.className = 'discovery';
|
| 1470 |
+
el.innerHTML = `<div class="disc-icon" style="background:${d.bg};color:${d.color};">◆</div><div class="disc-body"><div class="disc-title">${d.title}</div><div class="disc-detail">${d.detail}</div></div>`;
|
| 1471 |
+
c.appendChild(el);
|
| 1472 |
+
requestAnimationFrame(() => el.classList.add('visible'));
|
| 1473 |
+
}
|
| 1474 |
+
|
| 1475 |
+
function addRewardHistory(step, index) {
|
| 1476 |
+
const c = document.getElementById('rewardHistory');
|
| 1477 |
+
if (c.querySelector('.empty-state')) c.innerHTML = '';
|
| 1478 |
+
const el = document.createElement('div');
|
| 1479 |
+
el.className = 'step-reward-mini';
|
| 1480 |
+
el.innerHTML = `<span class="srm-name">${index + 1}. ${step.action}</span><span class="srm-val ${step.total >= 0 ? 'pos' : 'neg'}">${step.total >= 0 ? '+' : ''}${step.total.toFixed(2)}</span>`;
|
| 1481 |
+
c.appendChild(el);
|
| 1482 |
+
requestAnimationFrame(() => el.classList.add('visible'));
|
| 1483 |
+
}
|
| 1484 |
+
|
| 1485 |
+
function selectScenario(el) {
|
| 1486 |
+
if (running) return;
|
| 1487 |
+
document.querySelectorAll('.scenario-opt').forEach(e => e.classList.remove('active'));
|
| 1488 |
+
el.classList.add('active');
|
| 1489 |
+
}
|
| 1490 |
+
|
| 1491 |
+
function wait(ms) { return new Promise(r => setTimeout(r, ms)); }
|
| 1492 |
+
|
| 1493 |
+
// ---- Run ----
|
| 1494 |
+
async function startDemo() {
|
| 1495 |
+
if (running) return;
|
| 1496 |
+
running = true;
|
| 1497 |
+
runBtn.disabled = true;
|
| 1498 |
+
runBtn.textContent = 'Running...';
|
| 1499 |
+
statusDot.classList.add('live');
|
| 1500 |
+
statusText.textContent = 'Running';
|
| 1501 |
+
terminalEl.innerHTML = '';
|
| 1502 |
+
cumReward = 0;
|
| 1503 |
+
document.getElementById('pipelineSteps').innerHTML = '';
|
| 1504 |
+
document.getElementById('discoveries').innerHTML = '<div class="empty-state">No discoveries yet</div>';
|
| 1505 |
+
document.getElementById('rewardHistory').innerHTML = '<div class="empty-state">No steps yet</div>';
|
| 1506 |
+
document.getElementById('violations').innerHTML = '<div class="empty-state">No violations</div>';
|
| 1507 |
+
clearRewardBars();
|
| 1508 |
+
document.getElementById('cumReward').textContent = '0.00';
|
| 1509 |
+
document.getElementById('stepRewardLabel').textContent = '--';
|
| 1510 |
+
initAgent();
|
| 1511 |
+
|
| 1512 |
+
addLine('<span class="t-label">[BioEnv]</span> <span class="t-dim">Initializing environment...</span>');
|
| 1513 |
+
await wait(500);
|
| 1514 |
+
addLine('<span class="t-label">[BioEnv]</span> Scenario: <span class="t-str">biomarker_validation_lung</span> (Hard)');
|
| 1515 |
+
await wait(200);
|
| 1516 |
+
addLine('<span class="t-label">[BioEnv]</span> Organism: <span class="t-str">Homo sapiens</span> | Tissue: <span class="t-str">Lung</span>');
|
| 1517 |
+
await wait(200);
|
| 1518 |
+
addLine('<span class="t-label">[BioEnv]</span> Budget: <span class="t-num">$100,000</span> | Time: <span class="t-num">180 days</span> | Max steps: <span class="t-num">30</span>');
|
| 1519 |
+
await wait(200);
|
| 1520 |
+
addLine('<span class="t-label">[BioEnv]</span> Task: Validate <span class="t-kw">SPP1</span> as biomarker for idiopathic pulmonary fibrosis');
|
| 1521 |
+
await wait(400);
|
| 1522 |
+
addLine('');
|
| 1523 |
+
|
| 1524 |
+
for (let i = 0; i < EPISODE.length; i++) {
|
| 1525 |
+
await runStep(i);
|
| 1526 |
+
await wait(500);
|
| 1527 |
+
}
|
| 1528 |
+
|
| 1529 |
+
// Done
|
| 1530 |
+
moveAgentTo('idle');
|
| 1531 |
+
labActionLabel.classList.remove('visible');
|
| 1532 |
+
addLine('');
|
| 1533 |
+
addLine('<span class="t-label">[BioEnv]</span> <span class="t-ok">Episode complete!</span>');
|
| 1534 |
+
addLine('<span class="t-label">[BioEnv]</span> Total reward: <span class="t-ok">+' + cumReward.toFixed(2) + '</span> | Steps: <span class="t-num">' + EPISODE.length + '</span> | Budget remaining: <span class="t-num">$65,000</span>');
|
| 1535 |
+
addLine('<span class="t-label">[BioEnv]</span> Literature match: <span class="t-ok">4/5 expected findings confirmed</span>');
|
| 1536 |
+
addLine('<span class="t-label">[BioEnv]</span> Calibration: <span class="t-ok">Well-calibrated</span> (no overconfidence penalty)');
|
| 1537 |
+
|
| 1538 |
+
statusDot.classList.remove('live');
|
| 1539 |
+
statusText.textContent = 'Complete';
|
| 1540 |
+
runBtn.textContent = 'Run Episode';
|
| 1541 |
+
runBtn.disabled = false;
|
| 1542 |
+
running = false;
|
| 1543 |
+
}
|
| 1544 |
+
|
| 1545 |
+
async function runStep(i) {
|
| 1546 |
+
const step = EPISODE[i];
|
| 1547 |
+
const station = ACTION_STATION[step.action] || 'computer';
|
| 1548 |
+
|
| 1549 |
+
// Move agent in lab
|
| 1550 |
+
moveAgentTo(station);
|
| 1551 |
+
labActionLabel.textContent = step.action + '()';
|
| 1552 |
+
labActionLabel.classList.add('visible');
|
| 1553 |
+
await wait(800); // wait for agent to travel
|
| 1554 |
+
|
| 1555 |
+
// Start working animation
|
| 1556 |
+
setAgentWorking(step.action);
|
| 1557 |
+
spawnParticles(agent.targetX, agent.targetY, STATIONS[station].color);
|
| 1558 |
+
|
| 1559 |
+
// Pipeline sidebar
|
| 1560 |
+
const pipeEl = addPipeStep(step, i);
|
| 1561 |
+
if (i > 0) {
|
| 1562 |
+
const prev = document.getElementById('pipe-' + (i - 1));
|
| 1563 |
+
prev.classList.remove('active');
|
| 1564 |
+
prev.classList.add('done');
|
| 1565 |
+
prev.querySelector('.step-icon').innerHTML = '✓';
|
| 1566 |
+
}
|
| 1567 |
+
pipeEl.classList.add('active');
|
| 1568 |
+
|
| 1569 |
+
// Gauges
|
| 1570 |
+
setGauge('budget', '$' + step.budget.toLocaleString(), step.budgetPct,
|
| 1571 |
+
step.budgetPct > 50 ? 'var(--green)' : step.budgetPct > 25 ? 'var(--amber)' : 'var(--red)');
|
| 1572 |
+
setGauge('time', step.time + ' / 180 days', step.timePct, 'var(--cyan)');
|
| 1573 |
+
setGauge('step', (i + 1) + ' / 30', ((i + 1) / 30 * 100), 'var(--accent)');
|
| 1574 |
+
|
| 1575 |
+
// Terminal output
|
| 1576 |
+
const catTag = step.category === 'wet' ? '<span class="t-ok">WET</span>'
|
| 1577 |
+
: step.category === 'comp' ? '<span class="t-label">CMP</span>'
|
| 1578 |
+
: '<span class="t-kw">META</span>';
|
| 1579 |
+
addLine(`<span class="t-dim">Step ${i + 1}</span> ${catTag} <span class="t-fn">${step.action}</span>(<span class="t-str">${step.params}</span>)`);
|
| 1580 |
+
await wait(300);
|
| 1581 |
+
|
| 1582 |
+
for (const line of step.output) {
|
| 1583 |
+
addLine(' <span class="t-sub">' + line + '</span>');
|
| 1584 |
+
await wait(80);
|
| 1585 |
+
}
|
| 1586 |
+
|
| 1587 |
+
// Reward
|
| 1588 |
+
cumReward += step.total;
|
| 1589 |
+
document.getElementById('stepRewardLabel').textContent = 'Step ' + (i + 1) + ': ' + step.action;
|
| 1590 |
+
setRewardBars(step.reward);
|
| 1591 |
+
document.getElementById('cumReward').textContent = cumReward.toFixed(2);
|
| 1592 |
+
addRewardHistory(step, i);
|
| 1593 |
+
|
| 1594 |
+
const rewardStr = step.total >= 0
|
| 1595 |
+
? '<span class="t-ok">+' + step.total.toFixed(2) + '</span>'
|
| 1596 |
+
: '<span class="t-err">' + step.total.toFixed(2) + '</span>';
|
| 1597 |
+
addLine(` <span class="t-dim">reward: ${rewardStr} <span class="t-dim">(cumulative: ${cumReward.toFixed(2)})</span></span>`);
|
| 1598 |
+
addLine('');
|
| 1599 |
+
|
| 1600 |
+
if (step.discovery) addDiscovery(step.discovery);
|
| 1601 |
+
|
| 1602 |
+
// Done working
|
| 1603 |
+
agent.working = false;
|
| 1604 |
+
spawnParticles(agent.targetX, agent.targetY, '#34d399', 6);
|
| 1605 |
+
|
| 1606 |
+
if (step.terminal) {
|
| 1607 |
+
pipeEl.classList.remove('active');
|
| 1608 |
+
pipeEl.classList.add('done');
|
| 1609 |
+
pipeEl.querySelector('.step-icon').innerHTML = '✓';
|
| 1610 |
+
}
|
| 1611 |
+
}
|
| 1612 |
+
|
| 1613 |
+
function resetDemo() {
|
| 1614 |
+
if (running) return;
|
| 1615 |
+
terminalEl.innerHTML = '';
|
| 1616 |
+
cumReward = 0;
|
| 1617 |
+
document.getElementById('pipelineSteps').innerHTML = '';
|
| 1618 |
+
document.getElementById('discoveries').innerHTML = '<div class="empty-state">No discoveries yet</div>';
|
| 1619 |
+
document.getElementById('rewardHistory').innerHTML = '<div class="empty-state">No steps yet</div>';
|
| 1620 |
+
document.getElementById('violations').innerHTML = '<div class="empty-state">No violations</div>';
|
| 1621 |
+
clearRewardBars();
|
| 1622 |
+
document.getElementById('cumReward').textContent = '0.00';
|
| 1623 |
+
document.getElementById('stepRewardLabel').textContent = '--';
|
| 1624 |
+
setGauge('budget', '$100,000', 100, 'var(--green)');
|
| 1625 |
+
setGauge('time', '180 / 180 days', 100, 'var(--cyan)');
|
| 1626 |
+
setGauge('step', '0 / 30', 0, 'var(--accent)');
|
| 1627 |
+
statusDot.classList.remove('live');
|
| 1628 |
+
statusText.textContent = 'Ready';
|
| 1629 |
+
labActionLabel.classList.remove('visible');
|
| 1630 |
+
initAgent();
|
| 1631 |
+
addLine('<span class="t-dim">Environment reset. Click "Run Episode" to start.</span>');
|
| 1632 |
+
}
|
| 1633 |
+
|
| 1634 |
+
// Init
|
| 1635 |
+
addLine('<span class="t-dim">BioEnv v1.0 | biomarker_validation_lung</span>');
|
| 1636 |
+
addLine('<span class="t-dim">Click "Run Episode" to start the demo.</span>');
|
| 1637 |
+
</script>
|
| 1638 |
+
</body>
|
| 1639 |
+
</html>
|
models.py
ADDED
|
@@ -0,0 +1,927 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Data models for the Drug Target Validation RL Environment.
|
| 3 |
+
|
| 4 |
+
Defines the POMDP action and observation contracts for an agent that acts
|
| 5 |
+
as a computational pharma scientist. Given a proposed drug target and a
|
| 6 |
+
disease context, the agent issues bioinformatics / clinical / experimental
|
| 7 |
+
queries one at a time and finally submits a go / no-go validation report.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
from __future__ import annotations
|
| 11 |
+
|
| 12 |
+
from enum import Enum
|
| 13 |
+
from typing import Any, Dict, List, Optional
|
| 14 |
+
|
| 15 |
+
from pydantic import BaseModel, Field
|
| 16 |
+
|
| 17 |
+
from openenv.core.env_server.types import Action, Observation
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
# ── Action vocabulary ───────────────────────────────────────────────────────
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class ActionType(str, Enum):
|
| 24 |
+
# Expression & Omics
|
| 25 |
+
QUERY_EXPRESSION = "query_expression"
|
| 26 |
+
DIFFERENTIAL_EXPRESSION = "differential_expression"
|
| 27 |
+
PATHWAY_ENRICHMENT = "pathway_enrichment"
|
| 28 |
+
COEXPRESSION_NETWORK = "coexpression_network"
|
| 29 |
+
|
| 30 |
+
# Protein & Structure
|
| 31 |
+
PROTEIN_STRUCTURE_LOOKUP = "protein_structure_lookup"
|
| 32 |
+
BINDING_SITE_ANALYSIS = "binding_site_analysis"
|
| 33 |
+
PROTEIN_INTERACTION_NETWORK = "protein_interaction_network"
|
| 34 |
+
DRUGGABILITY_SCREEN = "druggability_screen"
|
| 35 |
+
|
| 36 |
+
# Clinical & Safety
|
| 37 |
+
CLINICAL_TRIAL_LOOKUP = "clinical_trial_lookup"
|
| 38 |
+
TOXICITY_PANEL = "toxicity_panel"
|
| 39 |
+
OFF_TARGET_SCREEN = "off_target_screen"
|
| 40 |
+
PATIENT_STRATIFICATION = "patient_stratification"
|
| 41 |
+
|
| 42 |
+
# Literature & Evidence
|
| 43 |
+
LITERATURE_SEARCH = "literature_search"
|
| 44 |
+
EVIDENCE_SYNTHESIS = "evidence_synthesis"
|
| 45 |
+
COMPETITOR_LANDSCAPE = "competitor_landscape"
|
| 46 |
+
|
| 47 |
+
# Experimental (expensive, consume more credits)
|
| 48 |
+
IN_VITRO_ASSAY = "in_vitro_assay"
|
| 49 |
+
IN_VIVO_MODEL = "in_vivo_model"
|
| 50 |
+
CRISPR_KNOCKOUT = "crispr_knockout"
|
| 51 |
+
BIOMARKER_CORRELATION = "biomarker_correlation"
|
| 52 |
+
|
| 53 |
+
# Meta
|
| 54 |
+
FLAG_RED_FLAG = "flag_red_flag"
|
| 55 |
+
REQUEST_EXPERT_REVIEW = "request_expert_review"
|
| 56 |
+
SUBMIT_VALIDATION_REPORT = "submit_validation_report" # terminal action
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
OMICS_ACTIONS = frozenset({
|
| 60 |
+
ActionType.QUERY_EXPRESSION,
|
| 61 |
+
ActionType.DIFFERENTIAL_EXPRESSION,
|
| 62 |
+
ActionType.PATHWAY_ENRICHMENT,
|
| 63 |
+
ActionType.COEXPRESSION_NETWORK,
|
| 64 |
+
})
|
| 65 |
+
|
| 66 |
+
PROTEIN_ACTIONS = frozenset({
|
| 67 |
+
ActionType.PROTEIN_STRUCTURE_LOOKUP,
|
| 68 |
+
ActionType.BINDING_SITE_ANALYSIS,
|
| 69 |
+
ActionType.PROTEIN_INTERACTION_NETWORK,
|
| 70 |
+
ActionType.DRUGGABILITY_SCREEN,
|
| 71 |
+
})
|
| 72 |
+
|
| 73 |
+
CLINICAL_ACTIONS = frozenset({
|
| 74 |
+
ActionType.CLINICAL_TRIAL_LOOKUP,
|
| 75 |
+
ActionType.TOXICITY_PANEL,
|
| 76 |
+
ActionType.OFF_TARGET_SCREEN,
|
| 77 |
+
ActionType.PATIENT_STRATIFICATION,
|
| 78 |
+
})
|
| 79 |
+
|
| 80 |
+
LITERATURE_ACTIONS = frozenset({
|
| 81 |
+
ActionType.LITERATURE_SEARCH,
|
| 82 |
+
ActionType.EVIDENCE_SYNTHESIS,
|
| 83 |
+
ActionType.COMPETITOR_LANDSCAPE,
|
| 84 |
+
})
|
| 85 |
+
|
| 86 |
+
EXPERIMENTAL_ACTIONS = frozenset({
|
| 87 |
+
ActionType.IN_VITRO_ASSAY,
|
| 88 |
+
ActionType.IN_VIVO_MODEL,
|
| 89 |
+
ActionType.CRISPR_KNOCKOUT,
|
| 90 |
+
ActionType.BIOMARKER_CORRELATION,
|
| 91 |
+
})
|
| 92 |
+
|
| 93 |
+
META_ACTIONS = frozenset({
|
| 94 |
+
ActionType.FLAG_RED_FLAG,
|
| 95 |
+
ActionType.REQUEST_EXPERT_REVIEW,
|
| 96 |
+
ActionType.SUBMIT_VALIDATION_REPORT,
|
| 97 |
+
})
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
# ── Tool registry (pharma / bioinformatics) ─────────────────────────────────
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
class ToolCategory(str, Enum):
|
| 104 |
+
EXPRESSION_DB = "expression_db"
|
| 105 |
+
OMICS_ANALYSIS = "omics_analysis"
|
| 106 |
+
PATHWAY_DB = "pathway_db"
|
| 107 |
+
PROTEIN_STRUCTURE = "protein_structure"
|
| 108 |
+
BINDING_SITE = "binding_site"
|
| 109 |
+
INTERACTION_NETWORK = "interaction_network"
|
| 110 |
+
DRUGGABILITY = "druggability"
|
| 111 |
+
CLINICAL_DB = "clinical_db"
|
| 112 |
+
SAFETY_DB = "safety_db"
|
| 113 |
+
OFF_TARGET = "off_target"
|
| 114 |
+
LITERATURE = "literature"
|
| 115 |
+
PATIENT_GENOMICS = "patient_genomics"
|
| 116 |
+
IN_VITRO = "in_vitro"
|
| 117 |
+
IN_VIVO = "in_vivo"
|
| 118 |
+
CRISPR = "crispr"
|
| 119 |
+
BIOMARKER = "biomarker"
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
class ToolSpec(BaseModel):
|
| 123 |
+
"""Registry entry describing a pharma / bioinformatics tool or database."""
|
| 124 |
+
|
| 125 |
+
name: str
|
| 126 |
+
category: ToolCategory
|
| 127 |
+
relevant_actions: List[ActionType] = Field(default_factory=list)
|
| 128 |
+
description: str = ""
|
| 129 |
+
input_types: List[str] = Field(default_factory=list)
|
| 130 |
+
output_types: List[str] = Field(default_factory=list)
|
| 131 |
+
typical_runtime_hours: float = 0.1
|
| 132 |
+
typical_credit_cost: int = 1
|
| 133 |
+
requires_compute: bool = False
|
| 134 |
+
open_source: bool = True
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
TOOL_REGISTRY: Dict[str, ToolSpec] = {
|
| 138 |
+
# ── Expression & omics databases ──
|
| 139 |
+
"GTEx": ToolSpec(
|
| 140 |
+
name="GTEx",
|
| 141 |
+
category=ToolCategory.EXPRESSION_DB,
|
| 142 |
+
relevant_actions=[ActionType.QUERY_EXPRESSION],
|
| 143 |
+
description="Tissue-level expression atlas across normal human tissues",
|
| 144 |
+
input_types=["gene_symbol"],
|
| 145 |
+
output_types=["tissue_expression"],
|
| 146 |
+
typical_credit_cost=2,
|
| 147 |
+
),
|
| 148 |
+
"TCGA": ToolSpec(
|
| 149 |
+
name="TCGA",
|
| 150 |
+
category=ToolCategory.EXPRESSION_DB,
|
| 151 |
+
relevant_actions=[
|
| 152 |
+
ActionType.QUERY_EXPRESSION,
|
| 153 |
+
ActionType.DIFFERENTIAL_EXPRESSION,
|
| 154 |
+
ActionType.BIOMARKER_CORRELATION,
|
| 155 |
+
],
|
| 156 |
+
description="The Cancer Genome Atlas tumor vs normal expression / mutation",
|
| 157 |
+
input_types=["gene_symbol", "indication"],
|
| 158 |
+
output_types=["tumor_expression", "mutation_frequency"],
|
| 159 |
+
typical_credit_cost=2,
|
| 160 |
+
),
|
| 161 |
+
"Human_Protein_Atlas": ToolSpec(
|
| 162 |
+
name="Human_Protein_Atlas",
|
| 163 |
+
category=ToolCategory.EXPRESSION_DB,
|
| 164 |
+
relevant_actions=[ActionType.QUERY_EXPRESSION],
|
| 165 |
+
description="Antibody-based protein expression across normal and cancer tissues",
|
| 166 |
+
input_types=["gene_symbol"],
|
| 167 |
+
output_types=["protein_expression", "tissue_specificity"],
|
| 168 |
+
),
|
| 169 |
+
"DepMap": ToolSpec(
|
| 170 |
+
name="DepMap",
|
| 171 |
+
category=ToolCategory.OMICS_ANALYSIS,
|
| 172 |
+
relevant_actions=[
|
| 173 |
+
ActionType.CRISPR_KNOCKOUT,
|
| 174 |
+
ActionType.COEXPRESSION_NETWORK,
|
| 175 |
+
],
|
| 176 |
+
description="Cancer Dependency Map: genome-scale CRISPR essentiality scores",
|
| 177 |
+
input_types=["gene_symbol", "cell_line_panel"],
|
| 178 |
+
output_types=["essentiality_score", "synthetic_lethality"],
|
| 179 |
+
typical_credit_cost=4,
|
| 180 |
+
),
|
| 181 |
+
"ARCHS4": ToolSpec(
|
| 182 |
+
name="ARCHS4",
|
| 183 |
+
category=ToolCategory.OMICS_ANALYSIS,
|
| 184 |
+
relevant_actions=[
|
| 185 |
+
ActionType.COEXPRESSION_NETWORK,
|
| 186 |
+
ActionType.QUERY_EXPRESSION,
|
| 187 |
+
],
|
| 188 |
+
description="Massive RNA-seq compendium for coexpression and tissue queries",
|
| 189 |
+
input_types=["gene_symbol"],
|
| 190 |
+
output_types=["coexpression_partners"],
|
| 191 |
+
),
|
| 192 |
+
"GEO": ToolSpec(
|
| 193 |
+
name="GEO",
|
| 194 |
+
category=ToolCategory.OMICS_ANALYSIS,
|
| 195 |
+
relevant_actions=[
|
| 196 |
+
ActionType.DIFFERENTIAL_EXPRESSION,
|
| 197 |
+
ActionType.QUERY_EXPRESSION,
|
| 198 |
+
],
|
| 199 |
+
description="Gene Expression Omnibus: curated bulk and single-cell datasets",
|
| 200 |
+
input_types=["gene_symbol", "indication"],
|
| 201 |
+
output_types=["de_result"],
|
| 202 |
+
),
|
| 203 |
+
# ── Pathway / annotation databases ──
|
| 204 |
+
"Reactome": ToolSpec(
|
| 205 |
+
name="Reactome",
|
| 206 |
+
category=ToolCategory.PATHWAY_DB,
|
| 207 |
+
relevant_actions=[ActionType.PATHWAY_ENRICHMENT],
|
| 208 |
+
description="Curated human pathway and reaction database",
|
| 209 |
+
input_types=["gene_list"],
|
| 210 |
+
output_types=["pathway_enrichment"],
|
| 211 |
+
),
|
| 212 |
+
"KEGG": ToolSpec(
|
| 213 |
+
name="KEGG",
|
| 214 |
+
category=ToolCategory.PATHWAY_DB,
|
| 215 |
+
relevant_actions=[ActionType.PATHWAY_ENRICHMENT],
|
| 216 |
+
description="KEGG metabolic and signalling pathways",
|
| 217 |
+
input_types=["gene_list"],
|
| 218 |
+
output_types=["pathway_enrichment"],
|
| 219 |
+
),
|
| 220 |
+
"MSigDB": ToolSpec(
|
| 221 |
+
name="MSigDB",
|
| 222 |
+
category=ToolCategory.PATHWAY_DB,
|
| 223 |
+
relevant_actions=[ActionType.PATHWAY_ENRICHMENT],
|
| 224 |
+
description="Molecular Signatures Database for GSEA",
|
| 225 |
+
input_types=["ranked_gene_list"],
|
| 226 |
+
output_types=["pathway_enrichment"],
|
| 227 |
+
),
|
| 228 |
+
# ── Protein structure / binding-site tools ──
|
| 229 |
+
"AlphaFold": ToolSpec(
|
| 230 |
+
name="AlphaFold",
|
| 231 |
+
category=ToolCategory.PROTEIN_STRUCTURE,
|
| 232 |
+
relevant_actions=[
|
| 233 |
+
ActionType.PROTEIN_STRUCTURE_LOOKUP,
|
| 234 |
+
ActionType.BINDING_SITE_ANALYSIS,
|
| 235 |
+
],
|
| 236 |
+
description="Predicted full-length 3D protein structures",
|
| 237 |
+
input_types=["uniprot_id", "gene_symbol"],
|
| 238 |
+
output_types=["pdb_structure", "plddt_confidence"],
|
| 239 |
+
typical_credit_cost=3,
|
| 240 |
+
),
|
| 241 |
+
"PDB": ToolSpec(
|
| 242 |
+
name="PDB",
|
| 243 |
+
category=ToolCategory.PROTEIN_STRUCTURE,
|
| 244 |
+
relevant_actions=[ActionType.PROTEIN_STRUCTURE_LOOKUP],
|
| 245 |
+
description="Experimentally determined protein structures",
|
| 246 |
+
input_types=["uniprot_id"],
|
| 247 |
+
output_types=["pdb_structure"],
|
| 248 |
+
),
|
| 249 |
+
"UniProt": ToolSpec(
|
| 250 |
+
name="UniProt",
|
| 251 |
+
category=ToolCategory.PROTEIN_STRUCTURE,
|
| 252 |
+
relevant_actions=[
|
| 253 |
+
ActionType.PROTEIN_STRUCTURE_LOOKUP,
|
| 254 |
+
ActionType.PROTEIN_INTERACTION_NETWORK,
|
| 255 |
+
],
|
| 256 |
+
description="Curated protein sequence and functional annotation",
|
| 257 |
+
input_types=["gene_symbol"],
|
| 258 |
+
output_types=["uniprot_entry", "domain_annotation"],
|
| 259 |
+
),
|
| 260 |
+
"fpocket": ToolSpec(
|
| 261 |
+
name="fpocket",
|
| 262 |
+
category=ToolCategory.BINDING_SITE,
|
| 263 |
+
relevant_actions=[ActionType.BINDING_SITE_ANALYSIS],
|
| 264 |
+
description="Geometric pocket detection on protein structures",
|
| 265 |
+
input_types=["pdb_structure"],
|
| 266 |
+
output_types=["pocket_list", "druggability_score"],
|
| 267 |
+
requires_compute=True,
|
| 268 |
+
),
|
| 269 |
+
"SiteMap": ToolSpec(
|
| 270 |
+
name="SiteMap",
|
| 271 |
+
category=ToolCategory.BINDING_SITE,
|
| 272 |
+
relevant_actions=[ActionType.BINDING_SITE_ANALYSIS],
|
| 273 |
+
description="Schrödinger binding-site detection and scoring",
|
| 274 |
+
input_types=["pdb_structure"],
|
| 275 |
+
output_types=["pocket_list", "site_score"],
|
| 276 |
+
open_source=False,
|
| 277 |
+
typical_credit_cost=3,
|
| 278 |
+
),
|
| 279 |
+
# ── Druggability / chemistry ──
|
| 280 |
+
"ChEMBL": ToolSpec(
|
| 281 |
+
name="ChEMBL",
|
| 282 |
+
category=ToolCategory.DRUGGABILITY,
|
| 283 |
+
relevant_actions=[
|
| 284 |
+
ActionType.DRUGGABILITY_SCREEN,
|
| 285 |
+
ActionType.COMPETITOR_LANDSCAPE,
|
| 286 |
+
],
|
| 287 |
+
description="Bioactivity database of drug-like molecules vs targets",
|
| 288 |
+
input_types=["gene_symbol", "uniprot_id"],
|
| 289 |
+
output_types=["bioactivity", "known_ligands"],
|
| 290 |
+
typical_credit_cost=3,
|
| 291 |
+
),
|
| 292 |
+
"DrugBank": ToolSpec(
|
| 293 |
+
name="DrugBank",
|
| 294 |
+
category=ToolCategory.DRUGGABILITY,
|
| 295 |
+
relevant_actions=[
|
| 296 |
+
ActionType.DRUGGABILITY_SCREEN,
|
| 297 |
+
ActionType.COMPETITOR_LANDSCAPE,
|
| 298 |
+
],
|
| 299 |
+
description="Comprehensive drug and target reference",
|
| 300 |
+
input_types=["gene_symbol"],
|
| 301 |
+
output_types=["approved_drugs", "drug_target_pairs"],
|
| 302 |
+
),
|
| 303 |
+
"OpenTargets": ToolSpec(
|
| 304 |
+
name="OpenTargets",
|
| 305 |
+
category=ToolCategory.DRUGGABILITY,
|
| 306 |
+
relevant_actions=[
|
| 307 |
+
ActionType.DRUGGABILITY_SCREEN,
|
| 308 |
+
ActionType.EVIDENCE_SYNTHESIS,
|
| 309 |
+
],
|
| 310 |
+
description="Integrated target-disease evidence platform",
|
| 311 |
+
input_types=["gene_symbol", "indication"],
|
| 312 |
+
output_types=["target_score", "evidence_summary"],
|
| 313 |
+
),
|
| 314 |
+
"canSAR": ToolSpec(
|
| 315 |
+
name="canSAR",
|
| 316 |
+
category=ToolCategory.DRUGGABILITY,
|
| 317 |
+
relevant_actions=[ActionType.DRUGGABILITY_SCREEN],
|
| 318 |
+
description="Cancer translational research and drug discovery knowledgebase",
|
| 319 |
+
input_types=["gene_symbol"],
|
| 320 |
+
output_types=["druggability_score", "ligandability"],
|
| 321 |
+
),
|
| 322 |
+
# ── Interaction networks ──
|
| 323 |
+
"STRING": ToolSpec(
|
| 324 |
+
name="STRING",
|
| 325 |
+
category=ToolCategory.INTERACTION_NETWORK,
|
| 326 |
+
relevant_actions=[
|
| 327 |
+
ActionType.PROTEIN_INTERACTION_NETWORK,
|
| 328 |
+
ActionType.COEXPRESSION_NETWORK,
|
| 329 |
+
],
|
| 330 |
+
description="Protein-protein interaction database with confidence scores",
|
| 331 |
+
input_types=["gene_symbol"],
|
| 332 |
+
output_types=["ppi_network"],
|
| 333 |
+
),
|
| 334 |
+
"BioGRID": ToolSpec(
|
| 335 |
+
name="BioGRID",
|
| 336 |
+
category=ToolCategory.INTERACTION_NETWORK,
|
| 337 |
+
relevant_actions=[ActionType.PROTEIN_INTERACTION_NETWORK],
|
| 338 |
+
description="Curated genetic and protein-protein interactions",
|
| 339 |
+
input_types=["gene_symbol"],
|
| 340 |
+
output_types=["ppi_network", "genetic_interactions"],
|
| 341 |
+
),
|
| 342 |
+
# ── Clinical & safety ──
|
| 343 |
+
"ClinicalTrials_gov": ToolSpec(
|
| 344 |
+
name="ClinicalTrials_gov",
|
| 345 |
+
category=ToolCategory.CLINICAL_DB,
|
| 346 |
+
relevant_actions=[
|
| 347 |
+
ActionType.CLINICAL_TRIAL_LOOKUP,
|
| 348 |
+
ActionType.COMPETITOR_LANDSCAPE,
|
| 349 |
+
],
|
| 350 |
+
description="Registry of human clinical trials worldwide",
|
| 351 |
+
input_types=["gene_symbol", "indication"],
|
| 352 |
+
output_types=["trial_list", "phase_status"],
|
| 353 |
+
),
|
| 354 |
+
"FAERS": ToolSpec(
|
| 355 |
+
name="FAERS",
|
| 356 |
+
category=ToolCategory.SAFETY_DB,
|
| 357 |
+
relevant_actions=[ActionType.TOXICITY_PANEL],
|
| 358 |
+
description="FDA Adverse Event Reporting System",
|
| 359 |
+
input_types=["drug_name", "gene_symbol"],
|
| 360 |
+
output_types=["adverse_events"],
|
| 361 |
+
),
|
| 362 |
+
"ToxCast": ToolSpec(
|
| 363 |
+
name="ToxCast",
|
| 364 |
+
category=ToolCategory.SAFETY_DB,
|
| 365 |
+
relevant_actions=[ActionType.TOXICITY_PANEL],
|
| 366 |
+
description="EPA high-throughput toxicology assays",
|
| 367 |
+
input_types=["compound", "gene_symbol"],
|
| 368 |
+
output_types=["toxicity_assays"],
|
| 369 |
+
typical_credit_cost=3,
|
| 370 |
+
),
|
| 371 |
+
"gnomAD": ToolSpec(
|
| 372 |
+
name="gnomAD",
|
| 373 |
+
category=ToolCategory.PATIENT_GENOMICS,
|
| 374 |
+
relevant_actions=[
|
| 375 |
+
ActionType.PATIENT_STRATIFICATION,
|
| 376 |
+
ActionType.OFF_TARGET_SCREEN,
|
| 377 |
+
],
|
| 378 |
+
description="Population variant frequencies and constraint metrics",
|
| 379 |
+
input_types=["gene_symbol"],
|
| 380 |
+
output_types=["pLI_score", "loftool_score"],
|
| 381 |
+
),
|
| 382 |
+
"ClinVar": ToolSpec(
|
| 383 |
+
name="ClinVar",
|
| 384 |
+
category=ToolCategory.PATIENT_GENOMICS,
|
| 385 |
+
relevant_actions=[ActionType.PATIENT_STRATIFICATION],
|
| 386 |
+
description="Clinically interpreted germline and somatic variants",
|
| 387 |
+
input_types=["gene_symbol"],
|
| 388 |
+
output_types=["pathogenic_variants"],
|
| 389 |
+
),
|
| 390 |
+
# ── Off-target / selectivity ──
|
| 391 |
+
"Eurofins_DiscoverX": ToolSpec(
|
| 392 |
+
name="Eurofins_DiscoverX",
|
| 393 |
+
category=ToolCategory.OFF_TARGET,
|
| 394 |
+
relevant_actions=[ActionType.OFF_TARGET_SCREEN],
|
| 395 |
+
description="Kinome-wide selectivity profiling panels",
|
| 396 |
+
input_types=["compound"],
|
| 397 |
+
output_types=["kinase_selectivity"],
|
| 398 |
+
open_source=False,
|
| 399 |
+
typical_credit_cost=3,
|
| 400 |
+
),
|
| 401 |
+
"SafetyPanel": ToolSpec(
|
| 402 |
+
name="SafetyPanel",
|
| 403 |
+
category=ToolCategory.OFF_TARGET,
|
| 404 |
+
relevant_actions=[
|
| 405 |
+
ActionType.OFF_TARGET_SCREEN,
|
| 406 |
+
ActionType.TOXICITY_PANEL,
|
| 407 |
+
],
|
| 408 |
+
description="Standard secondary pharmacology / off-target assay panel",
|
| 409 |
+
input_types=["compound"],
|
| 410 |
+
output_types=["off_target_hits"],
|
| 411 |
+
typical_credit_cost=3,
|
| 412 |
+
),
|
| 413 |
+
# ── Literature ──
|
| 414 |
+
"PubMed": ToolSpec(
|
| 415 |
+
name="PubMed",
|
| 416 |
+
category=ToolCategory.LITERATURE,
|
| 417 |
+
relevant_actions=[
|
| 418 |
+
ActionType.LITERATURE_SEARCH,
|
| 419 |
+
ActionType.EVIDENCE_SYNTHESIS,
|
| 420 |
+
],
|
| 421 |
+
description="Biomedical literature database",
|
| 422 |
+
input_types=["query"],
|
| 423 |
+
output_types=["abstract_list"],
|
| 424 |
+
typical_credit_cost=1,
|
| 425 |
+
),
|
| 426 |
+
"Europe_PMC": ToolSpec(
|
| 427 |
+
name="Europe_PMC",
|
| 428 |
+
category=ToolCategory.LITERATURE,
|
| 429 |
+
relevant_actions=[ActionType.LITERATURE_SEARCH],
|
| 430 |
+
description="Open biomedical literature search with full-text mining",
|
| 431 |
+
input_types=["query"],
|
| 432 |
+
output_types=["abstract_list", "fulltext_excerpts"],
|
| 433 |
+
),
|
| 434 |
+
# ── Experimental wet-lab ──
|
| 435 |
+
"InVitroPanel": ToolSpec(
|
| 436 |
+
name="InVitroPanel",
|
| 437 |
+
category=ToolCategory.IN_VITRO,
|
| 438 |
+
relevant_actions=[
|
| 439 |
+
ActionType.IN_VITRO_ASSAY,
|
| 440 |
+
ActionType.BIOMARKER_CORRELATION,
|
| 441 |
+
],
|
| 442 |
+
description="Cell-line viability / IC50 panel against the proposed target",
|
| 443 |
+
input_types=["compound", "cell_line_panel"],
|
| 444 |
+
output_types=["IC50", "selectivity_window"],
|
| 445 |
+
typical_runtime_hours=72.0,
|
| 446 |
+
typical_credit_cost=5,
|
| 447 |
+
requires_compute=False,
|
| 448 |
+
),
|
| 449 |
+
"MouseModel": ToolSpec(
|
| 450 |
+
name="MouseModel",
|
| 451 |
+
category=ToolCategory.IN_VIVO,
|
| 452 |
+
relevant_actions=[ActionType.IN_VIVO_MODEL],
|
| 453 |
+
description="In-vivo efficacy + tolerability in disease-relevant mouse models",
|
| 454 |
+
input_types=["compound", "indication"],
|
| 455 |
+
output_types=["efficacy_endpoint", "tolerability", "PK_PD"],
|
| 456 |
+
typical_runtime_hours=720.0,
|
| 457 |
+
typical_credit_cost=8,
|
| 458 |
+
),
|
| 459 |
+
"CRISPR_screen": ToolSpec(
|
| 460 |
+
name="CRISPR_screen",
|
| 461 |
+
category=ToolCategory.CRISPR,
|
| 462 |
+
relevant_actions=[ActionType.CRISPR_KNOCKOUT],
|
| 463 |
+
description="Genome- or focused-library CRISPR knockout / dependency screen",
|
| 464 |
+
input_types=["gene_symbol", "cell_line_panel"],
|
| 465 |
+
output_types=["essentiality_score", "synthetic_lethality"],
|
| 466 |
+
typical_credit_cost=4,
|
| 467 |
+
),
|
| 468 |
+
"BiomarkerPanel": ToolSpec(
|
| 469 |
+
name="BiomarkerPanel",
|
| 470 |
+
category=ToolCategory.BIOMARKER,
|
| 471 |
+
relevant_actions=[
|
| 472 |
+
ActionType.BIOMARKER_CORRELATION,
|
| 473 |
+
ActionType.PATIENT_STRATIFICATION,
|
| 474 |
+
],
|
| 475 |
+
description="Patient-derived biomarker correlation with target activity",
|
| 476 |
+
input_types=["gene_symbol", "patient_cohort"],
|
| 477 |
+
output_types=["biomarker_correlation"],
|
| 478 |
+
typical_credit_cost=3,
|
| 479 |
+
),
|
| 480 |
+
}
|
| 481 |
+
|
| 482 |
+
|
| 483 |
+
# ── Registry helper functions ──────────────────────────────────────────────
|
| 484 |
+
|
| 485 |
+
|
| 486 |
+
def tools_by_category(category: ToolCategory) -> List[ToolSpec]:
|
| 487 |
+
"""Return all registered tools in a given category."""
|
| 488 |
+
return [t for t in TOOL_REGISTRY.values() if t.category == category]
|
| 489 |
+
|
| 490 |
+
|
| 491 |
+
def tools_for_action(action_type: ActionType) -> List[ToolSpec]:
|
| 492 |
+
"""Return all registered tools that are relevant for a given action type."""
|
| 493 |
+
return [t for t in TOOL_REGISTRY.values() if action_type in t.relevant_actions]
|
| 494 |
+
|
| 495 |
+
|
| 496 |
+
# ── Action schema ───────────────────────────────────────────────────────────
|
| 497 |
+
|
| 498 |
+
|
| 499 |
+
class DrugTargetAction(Action):
|
| 500 |
+
"""Structured action for one drug-target-validation step.
|
| 501 |
+
|
| 502 |
+
Hybrid representation: a discrete ``action_type`` plus typed
|
| 503 |
+
``parameters``, an optional free-text ``reasoning`` string, and the
|
| 504 |
+
terminal-only ``final_decision`` / ``confidence`` fields used when the
|
| 505 |
+
agent submits its validation report.
|
| 506 |
+
"""
|
| 507 |
+
|
| 508 |
+
action_type: ActionType = Field(
|
| 509 |
+
...,
|
| 510 |
+
description=(
|
| 511 |
+
"Discrete simulator step type. Each action type maps to a "
|
| 512 |
+
"specific class of pharma / bioinformatics query, in-vitro / "
|
| 513 |
+
"in-vivo experiment, or terminal report submission."
|
| 514 |
+
),
|
| 515 |
+
)
|
| 516 |
+
parameters: Dict[str, Any] = Field(
|
| 517 |
+
default_factory=dict,
|
| 518 |
+
description=(
|
| 519 |
+
"Action-specific arguments such as the database to query, the "
|
| 520 |
+
"compound to profile, or include_allosteric flags. Use only "
|
| 521 |
+
"parameters that materially change the simulated output."
|
| 522 |
+
),
|
| 523 |
+
)
|
| 524 |
+
reasoning: str = Field(
|
| 525 |
+
"",
|
| 526 |
+
description=(
|
| 527 |
+
"Short scientific rationale explaining why this is the right "
|
| 528 |
+
"next step in the current investigation."
|
| 529 |
+
),
|
| 530 |
+
)
|
| 531 |
+
final_decision: Optional[str] = Field(
|
| 532 |
+
None,
|
| 533 |
+
description=(
|
| 534 |
+
"'go' or 'no_go' recommendation. Only set on a "
|
| 535 |
+
"SUBMIT_VALIDATION_REPORT action."
|
| 536 |
+
),
|
| 537 |
+
)
|
| 538 |
+
confidence: Optional[float] = Field(
|
| 539 |
+
None,
|
| 540 |
+
ge=0.0,
|
| 541 |
+
le=1.0,
|
| 542 |
+
description=(
|
| 543 |
+
"Calibrated confidence in the final decision in [0, 1]. Only "
|
| 544 |
+
"set on a SUBMIT_VALIDATION_REPORT action."
|
| 545 |
+
),
|
| 546 |
+
)
|
| 547 |
+
|
| 548 |
+
|
| 549 |
+
# ── Intermediate outputs ──────────��─────────────────────────────────────────
|
| 550 |
+
|
| 551 |
+
|
| 552 |
+
class OutputType(str, Enum):
|
| 553 |
+
EXPRESSION_RESULT = "expression_result"
|
| 554 |
+
DE_RESULT = "de_result"
|
| 555 |
+
PATHWAY_RESULT = "pathway_result"
|
| 556 |
+
COEXPRESSION_RESULT = "coexpression_result"
|
| 557 |
+
STRUCTURE_RESULT = "structure_result"
|
| 558 |
+
BINDING_SITE_RESULT = "binding_site_result"
|
| 559 |
+
INTERACTION_RESULT = "interaction_result"
|
| 560 |
+
DRUGGABILITY_RESULT = "druggability_result"
|
| 561 |
+
CLINICAL_RESULT = "clinical_result"
|
| 562 |
+
TOXICITY_RESULT = "toxicity_result"
|
| 563 |
+
OFF_TARGET_RESULT = "off_target_result"
|
| 564 |
+
PATIENT_STRATIFICATION_RESULT = "patient_stratification_result"
|
| 565 |
+
LITERATURE_RESULT = "literature_result"
|
| 566 |
+
EVIDENCE_SYNTHESIS_RESULT = "evidence_synthesis_result"
|
| 567 |
+
COMPETITOR_LANDSCAPE_RESULT = "competitor_landscape_result"
|
| 568 |
+
IN_VITRO_RESULT = "in_vitro_result"
|
| 569 |
+
IN_VIVO_RESULT = "in_vivo_result"
|
| 570 |
+
CRISPR_RESULT = "crispr_result"
|
| 571 |
+
BIOMARKER_RESULT = "biomarker_result"
|
| 572 |
+
RED_FLAG_NOTE = "red_flag_note"
|
| 573 |
+
EXPERT_REVIEW = "expert_review"
|
| 574 |
+
VALIDATION_REPORT = "validation_report"
|
| 575 |
+
FAILURE_REPORT = "failure_report"
|
| 576 |
+
|
| 577 |
+
|
| 578 |
+
class IntermediateOutput(BaseModel):
|
| 579 |
+
"""A single simulated output from one validation step."""
|
| 580 |
+
|
| 581 |
+
output_type: OutputType
|
| 582 |
+
step_index: int
|
| 583 |
+
success: bool = True
|
| 584 |
+
quality_score: float = Field(1.0, ge=0.0, le=1.0)
|
| 585 |
+
summary: str = ""
|
| 586 |
+
data: Dict[str, Any] = Field(default_factory=dict)
|
| 587 |
+
uncertainty: float = Field(0.0, ge=0.0, le=1.0)
|
| 588 |
+
warnings: List[str] = Field(default_factory=list)
|
| 589 |
+
artifacts_available: List[str] = Field(default_factory=list)
|
| 590 |
+
|
| 591 |
+
|
| 592 |
+
# ── Observable state components ─────────────────────────────────────────────
|
| 593 |
+
|
| 594 |
+
|
| 595 |
+
class CreditUsage(BaseModel):
|
| 596 |
+
"""Agent-visible view of the experimental credit budget."""
|
| 597 |
+
|
| 598 |
+
credits_used: int = 0
|
| 599 |
+
credits_remaining: int = 50
|
| 600 |
+
credits_total: int = 50
|
| 601 |
+
|
| 602 |
+
|
| 603 |
+
class ValidationStepRecord(BaseModel):
|
| 604 |
+
"""One row of the agent's pipeline history."""
|
| 605 |
+
|
| 606 |
+
step_index: int
|
| 607 |
+
action_type: ActionType
|
| 608 |
+
parameters: Dict[str, Any] = Field(default_factory=dict)
|
| 609 |
+
output_summary: str = ""
|
| 610 |
+
output_type: OutputType
|
| 611 |
+
success: bool = True
|
| 612 |
+
quality_score: float = 1.0
|
| 613 |
+
credit_cost: int = 0
|
| 614 |
+
|
| 615 |
+
|
| 616 |
+
class EvidenceDossier(BaseModel):
|
| 617 |
+
"""Structured running dossier of everything the agent has discovered.
|
| 618 |
+
|
| 619 |
+
Maintained on the environment side and surfaced verbatim inside each
|
| 620 |
+
``ValidationObservation``. It is the primary state the agent should
|
| 621 |
+
consult when deciding what to investigate next.
|
| 622 |
+
"""
|
| 623 |
+
|
| 624 |
+
expression_findings: Dict[str, Any] = Field(default_factory=dict)
|
| 625 |
+
protein_findings: Dict[str, Any] = Field(default_factory=dict)
|
| 626 |
+
clinical_findings: Dict[str, Any] = Field(default_factory=dict)
|
| 627 |
+
safety_findings: Dict[str, Any] = Field(default_factory=dict)
|
| 628 |
+
literature_findings: Dict[str, Any] = Field(default_factory=dict)
|
| 629 |
+
experimental_results: List[Dict[str, Any]] = Field(default_factory=list)
|
| 630 |
+
flagged_red_flags: List[str] = Field(default_factory=list)
|
| 631 |
+
credits_used: int = 0
|
| 632 |
+
|
| 633 |
+
|
| 634 |
+
class ValidationTaskSpec(BaseModel):
|
| 635 |
+
"""Specification of the drug-target-validation problem to solve."""
|
| 636 |
+
|
| 637 |
+
problem_statement: str = "Unspecified drug target validation problem"
|
| 638 |
+
target_gene: str = "UNKNOWN"
|
| 639 |
+
disease_context: str = "unspecified disease"
|
| 640 |
+
indication: str = "unspecified indication"
|
| 641 |
+
credits_limit: int = 50
|
| 642 |
+
success_criteria: List[str] = Field(default_factory=list)
|
| 643 |
+
prior_observations: List[str] = Field(default_factory=list)
|
| 644 |
+
available_actions: List[str] = Field(
|
| 645 |
+
default_factory=lambda: [a.value for a in ActionType],
|
| 646 |
+
)
|
| 647 |
+
expected_findings: List[Any] = Field(default_factory=list)
|
| 648 |
+
dataset_metadata: Dict[str, Any] = Field(default_factory=dict)
|
| 649 |
+
|
| 650 |
+
|
| 651 |
+
# ── Observation schema ──────────────────────────────────────────────────────
|
| 652 |
+
|
| 653 |
+
|
| 654 |
+
class ValidationObservation(Observation):
|
| 655 |
+
"""Full observable state returned to the agent at each timestep.
|
| 656 |
+
|
| 657 |
+
Deliberately excludes the hidden ``TargetProfile``, which the agent
|
| 658 |
+
must infer through investigation.
|
| 659 |
+
"""
|
| 660 |
+
|
| 661 |
+
target_gene: str = "UNKNOWN"
|
| 662 |
+
disease_context: str = "unspecified disease"
|
| 663 |
+
indication: str = "unspecified indication"
|
| 664 |
+
credits_remaining: int = 50
|
| 665 |
+
credits_total: int = 50
|
| 666 |
+
dossier: EvidenceDossier = Field(default_factory=EvidenceDossier)
|
| 667 |
+
pipeline_history: List[Dict[str, Any]] = Field(default_factory=list)
|
| 668 |
+
available_actions: List[str] = Field(default_factory=list)
|
| 669 |
+
step_index: int = 0
|
| 670 |
+
done: bool = False
|
| 671 |
+
reward: float = 0.0
|
| 672 |
+
step_reward_breakdown: Dict[str, float] = Field(default_factory=dict)
|
| 673 |
+
rule_violations: List[str] = Field(default_factory=list)
|
| 674 |
+
latest_output: Optional[IntermediateOutput] = None
|
| 675 |
+
metadata: Dict[str, Any] = Field(default_factory=dict)
|
| 676 |
+
|
| 677 |
+
|
| 678 |
+
# ── Agent prompt scaffolding ────────────────────────────────────────────────
|
| 679 |
+
|
| 680 |
+
|
| 681 |
+
AGENT_ACTION_GUIDANCE: Dict[ActionType, str] = {
|
| 682 |
+
ActionType.QUERY_EXPRESSION: (
|
| 683 |
+
"Cheap expression lookup across normal and disease tissues. Run "
|
| 684 |
+
"early to gauge tissue specificity and disease over-expression."
|
| 685 |
+
),
|
| 686 |
+
ActionType.DIFFERENTIAL_EXPRESSION: (
|
| 687 |
+
"Disease-vs-normal differential expression. Useful to confirm "
|
| 688 |
+
"disease-driven dysregulation of the target."
|
| 689 |
+
),
|
| 690 |
+
ActionType.PATHWAY_ENRICHMENT: (
|
| 691 |
+
"Find pathways the target participates in. Best after expression / "
|
| 692 |
+
"DE so you have an informative gene context."
|
| 693 |
+
),
|
| 694 |
+
ActionType.COEXPRESSION_NETWORK: (
|
| 695 |
+
"Identify functionally related genes. Useful for mechanism "
|
| 696 |
+
"hypotheses and synthetic-lethality candidates."
|
| 697 |
+
),
|
| 698 |
+
ActionType.PROTEIN_STRUCTURE_LOOKUP: (
|
| 699 |
+
"Pull experimental or AlphaFold structures of the target."
|
| 700 |
+
),
|
| 701 |
+
ActionType.BINDING_SITE_ANALYSIS: (
|
| 702 |
+
"Detect ligandable pockets. Pass include_allosteric=true for "
|
| 703 |
+
"non-classical sites."
|
| 704 |
+
),
|
| 705 |
+
ActionType.PROTEIN_INTERACTION_NETWORK: (
|
| 706 |
+
"Map first-degree PPI partners. Useful for off-target reasoning."
|
| 707 |
+
),
|
| 708 |
+
ActionType.DRUGGABILITY_SCREEN: (
|
| 709 |
+
"High-level druggability assessment. Critical for any go/no_go."
|
| 710 |
+
),
|
| 711 |
+
ActionType.CLINICAL_TRIAL_LOOKUP: (
|
| 712 |
+
"Look up clinical precedent for this target / indication. Often "
|
| 713 |
+
"decisive for borderline scenarios."
|
| 714 |
+
),
|
| 715 |
+
ActionType.TOXICITY_PANEL: (
|
| 716 |
+
"Probe target-mediated toxicity. Best after expression so on-target "
|
| 717 |
+
"tissue toxicity can be interpreted."
|
| 718 |
+
),
|
| 719 |
+
ActionType.OFF_TARGET_SCREEN: (
|
| 720 |
+
"Quantify off-target / paralog selectivity. Always run when "
|
| 721 |
+
"selectivity is plausibly limiting."
|
| 722 |
+
),
|
| 723 |
+
ActionType.PATIENT_STRATIFICATION: (
|
| 724 |
+
"Identify responder subpopulations and biomarker hypotheses."
|
| 725 |
+
),
|
| 726 |
+
ActionType.LITERATURE_SEARCH: (
|
| 727 |
+
"Cheap PubMed / Europe-PMC scan. Cheap to run and often surfaces "
|
| 728 |
+
"recent precedent that overrides historical priors."
|
| 729 |
+
),
|
| 730 |
+
ActionType.EVIDENCE_SYNTHESIS: (
|
| 731 |
+
"Aggregate prior findings into a coherent picture. Best run after "
|
| 732 |
+
"several queries have populated the dossier."
|
| 733 |
+
),
|
| 734 |
+
ActionType.COMPETITOR_LANDSCAPE: (
|
| 735 |
+
"Survey other programs against the same target. Useful for "
|
| 736 |
+
"differentiation strategy."
|
| 737 |
+
),
|
| 738 |
+
ActionType.IN_VITRO_ASSAY: (
|
| 739 |
+
"Expensive cell-line assay (5 credits). Run after computational "
|
| 740 |
+
"evidence justifies wet-lab spend."
|
| 741 |
+
),
|
| 742 |
+
ActionType.IN_VIVO_MODEL: (
|
| 743 |
+
"Most expensive action (8 credits). Should only follow positive "
|
| 744 |
+
"in-vitro signal."
|
| 745 |
+
),
|
| 746 |
+
ActionType.CRISPR_KNOCKOUT: (
|
| 747 |
+
"Functional knockout / dependency check (4 credits)."
|
| 748 |
+
),
|
| 749 |
+
ActionType.BIOMARKER_CORRELATION: (
|
| 750 |
+
"Correlate target activity with patient biomarkers (3 credits)."
|
| 751 |
+
),
|
| 752 |
+
ActionType.FLAG_RED_FLAG: (
|
| 753 |
+
"Free annotation that records a concern in the dossier without "
|
| 754 |
+
"spending credits."
|
| 755 |
+
),
|
| 756 |
+
ActionType.REQUEST_EXPERT_REVIEW: (
|
| 757 |
+
"Lightweight critique by a simulated reviewer. Use sparingly."
|
| 758 |
+
),
|
| 759 |
+
ActionType.SUBMIT_VALIDATION_REPORT: (
|
| 760 |
+
"Terminal action. Must include final_decision ('go' / 'no_go') and "
|
| 761 |
+
"a calibrated confidence score; the episode ends immediately."
|
| 762 |
+
),
|
| 763 |
+
}
|
| 764 |
+
|
| 765 |
+
|
| 766 |
+
AGENT_ENVIRONMENT_RULES: List[str] = [
|
| 767 |
+
(
|
| 768 |
+
"You start with a fixed pool of experimental credits; every action "
|
| 769 |
+
"deducts a known credit cost and credit-exhaustion ends the episode."
|
| 770 |
+
),
|
| 771 |
+
(
|
| 772 |
+
"Each successful action returns concrete pharma evidence, so "
|
| 773 |
+
"repeated queries of the same type are usually wasteful."
|
| 774 |
+
),
|
| 775 |
+
(
|
| 776 |
+
"Some prerequisites apply: e.g. interpret toxicity in light of "
|
| 777 |
+
"expression, and run in-vitro work before in-vivo."
|
| 778 |
+
),
|
| 779 |
+
(
|
| 780 |
+
"Always finish the episode by submitting a calibrated "
|
| 781 |
+
"submit_validation_report — exhausting credits without a report "
|
| 782 |
+
"yields the worst possible reward."
|
| 783 |
+
),
|
| 784 |
+
]
|
| 785 |
+
|
| 786 |
+
|
| 787 |
+
_TOOL_CATEGORY_AGENT_NOTES: Dict[ToolCategory, str] = {
|
| 788 |
+
ToolCategory.EXPRESSION_DB: (
|
| 789 |
+
"Use early to characterise expression in normal vs disease tissue."
|
| 790 |
+
),
|
| 791 |
+
ToolCategory.OMICS_ANALYSIS: (
|
| 792 |
+
"Use to mine bulk / single-cell expression compendia for context."
|
| 793 |
+
),
|
| 794 |
+
ToolCategory.PATHWAY_DB: (
|
| 795 |
+
"Use after gathering a gene list for enrichment / mechanism."
|
| 796 |
+
),
|
| 797 |
+
ToolCategory.PROTEIN_STRUCTURE: (
|
| 798 |
+
"Use when reasoning about binding pockets or structure-based design."
|
| 799 |
+
),
|
| 800 |
+
ToolCategory.BINDING_SITE: (
|
| 801 |
+
"Use to score pocket druggability and detect allosteric sites."
|
| 802 |
+
),
|
| 803 |
+
ToolCategory.INTERACTION_NETWORK: (
|
| 804 |
+
"Use to reason about partners, paralogs, and pathway context."
|
| 805 |
+
),
|
| 806 |
+
ToolCategory.DRUGGABILITY: (
|
| 807 |
+
"Use to assess overall ligandability and known chemical matter."
|
| 808 |
+
),
|
| 809 |
+
ToolCategory.CLINICAL_DB: (
|
| 810 |
+
"Use to gather clinical precedent and competitor activity."
|
| 811 |
+
),
|
| 812 |
+
ToolCategory.SAFETY_DB: (
|
| 813 |
+
"Use after expression / off-target queries to interpret risk."
|
| 814 |
+
),
|
| 815 |
+
ToolCategory.OFF_TARGET: (
|
| 816 |
+
"Use whenever paralogs or kinase selectivity could limit the program."
|
| 817 |
+
),
|
| 818 |
+
ToolCategory.LITERATURE: (
|
| 819 |
+
"Cheap and often decisive — recent literature can flip historical "
|
| 820 |
+
"priors."
|
| 821 |
+
),
|
| 822 |
+
ToolCategory.PATIENT_GENOMICS: (
|
| 823 |
+
"Use for stratification and human genetics-based de-risking."
|
| 824 |
+
),
|
| 825 |
+
ToolCategory.IN_VITRO: (
|
| 826 |
+
"Expensive; run only after computational evidence justifies it."
|
| 827 |
+
),
|
| 828 |
+
ToolCategory.IN_VIVO: (
|
| 829 |
+
"Most expensive; only run after in-vitro / target-engagement data."
|
| 830 |
+
),
|
| 831 |
+
ToolCategory.CRISPR: (
|
| 832 |
+
"Use to test functional dependency or synthetic lethality."
|
| 833 |
+
),
|
| 834 |
+
ToolCategory.BIOMARKER: (
|
| 835 |
+
"Use to correlate target activity with patient-level biomarkers."
|
| 836 |
+
),
|
| 837 |
+
}
|
| 838 |
+
|
| 839 |
+
|
| 840 |
+
def describe_tool_for_agent(tool_name: str) -> str:
|
| 841 |
+
"""Return a compact environment-aware tool description for prompts."""
|
| 842 |
+
tool = TOOL_REGISTRY.get(tool_name)
|
| 843 |
+
if tool is None:
|
| 844 |
+
return tool_name
|
| 845 |
+
|
| 846 |
+
parts = [f"{tool.name}: {tool.description}."]
|
| 847 |
+
if tool.input_types or tool.output_types:
|
| 848 |
+
inputs = ", ".join(tool.input_types) or "context"
|
| 849 |
+
outputs = ", ".join(tool.output_types) or "evidence"
|
| 850 |
+
parts.append(f"Consumes {inputs}; yields {outputs}.")
|
| 851 |
+
|
| 852 |
+
category_note = _TOOL_CATEGORY_AGENT_NOTES.get(tool.category)
|
| 853 |
+
if category_note:
|
| 854 |
+
parts.append(category_note)
|
| 855 |
+
|
| 856 |
+
if tool.relevant_actions:
|
| 857 |
+
action_names = ", ".join(a.value for a in tool.relevant_actions[:3])
|
| 858 |
+
parts.append(f"Relevant for: {action_names}.")
|
| 859 |
+
|
| 860 |
+
if tool.typical_credit_cost > 0:
|
| 861 |
+
parts.append(f"Approx cost: {tool.typical_credit_cost} credits.")
|
| 862 |
+
|
| 863 |
+
return " ".join(parts)
|
| 864 |
+
|
| 865 |
+
|
| 866 |
+
def build_agent_system_prompt() -> str:
|
| 867 |
+
"""Build the shared agent system prompt for training and inference."""
|
| 868 |
+
lines = [
|
| 869 |
+
"You are a computational drug discovery scientist evaluating a "
|
| 870 |
+
"proposed drug target.",
|
| 871 |
+
"",
|
| 872 |
+
"Each turn, you observe the running evidence dossier and remaining "
|
| 873 |
+
"credits, and you must pick the next investigation step. Your goal "
|
| 874 |
+
"is to gather sufficient evidence to submit a calibrated go / no_go "
|
| 875 |
+
"validation report before credits run out.",
|
| 876 |
+
"",
|
| 877 |
+
"Environment-specific reasoning rules:",
|
| 878 |
+
]
|
| 879 |
+
lines.extend(f" - {rule}" for rule in AGENT_ENVIRONMENT_RULES)
|
| 880 |
+
lines.append("")
|
| 881 |
+
lines.append("Action guidance:")
|
| 882 |
+
lines.extend(
|
| 883 |
+
f" - {action_type.value}: {AGENT_ACTION_GUIDANCE[action_type]}"
|
| 884 |
+
for action_type in ActionType
|
| 885 |
+
)
|
| 886 |
+
lines.extend([
|
| 887 |
+
"",
|
| 888 |
+
"Respond with ONLY valid JSON, nothing else:",
|
| 889 |
+
'{"action_type": "...", "parameters": {}, "reasoning": "..."}',
|
| 890 |
+
"",
|
| 891 |
+
"When you submit the final report, use this exact shape:",
|
| 892 |
+
'{"action_type": "submit_validation_report", "parameters": {}, '
|
| 893 |
+
'"reasoning": "...", "final_decision": "go", "confidence": 0.8}',
|
| 894 |
+
])
|
| 895 |
+
return "\n".join(lines)
|
| 896 |
+
|
| 897 |
+
|
| 898 |
+
def build_agent_observation_context(
|
| 899 |
+
obs: ValidationObservation,
|
| 900 |
+
*,
|
| 901 |
+
max_tools: int = 6,
|
| 902 |
+
) -> str:
|
| 903 |
+
"""Summarize action / tool context for the agent's prompt."""
|
| 904 |
+
sections: List[str] = []
|
| 905 |
+
|
| 906 |
+
sections.append(
|
| 907 |
+
f"Target: {obs.target_gene} | Indication: {obs.indication} | "
|
| 908 |
+
f"Disease: {obs.disease_context}"
|
| 909 |
+
)
|
| 910 |
+
sections.append(
|
| 911 |
+
f"Credits: {obs.credits_remaining}/{obs.credits_total} remaining"
|
| 912 |
+
)
|
| 913 |
+
|
| 914 |
+
by_category: Dict[ToolCategory, List[ToolSpec]] = {}
|
| 915 |
+
for tool in TOOL_REGISTRY.values():
|
| 916 |
+
by_category.setdefault(tool.category, []).append(tool)
|
| 917 |
+
|
| 918 |
+
sections.append("Representative tools available (already filtered):")
|
| 919 |
+
shown = 0
|
| 920 |
+
for category, tools in by_category.items():
|
| 921 |
+
if shown >= max_tools:
|
| 922 |
+
break
|
| 923 |
+
first = tools[0]
|
| 924 |
+
sections.append(f" - {describe_tool_for_agent(first.name)}")
|
| 925 |
+
shown += 1
|
| 926 |
+
|
| 927 |
+
return "\n".join(sections)
|
openenv.yaml
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
spec_version: 1
|
| 2 |
+
name: drug_target_validation
|
| 3 |
+
type: space
|
| 4 |
+
runtime: fastapi
|
| 5 |
+
app: server.app:app
|
| 6 |
+
port: 8000
|
| 7 |
+
description: "RL environment for drug target validation — agent investigates a proposed drug target and makes a go/no-go recommendation"
|
| 8 |
+
tags:
|
| 9 |
+
- biology
|
| 10 |
+
- drug-discovery
|
| 11 |
+
- pharma
|
| 12 |
+
- world-modeling
|
pyproject.toml
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[build-system]
|
| 2 |
+
requires = ["setuptools>=45", "wheel"]
|
| 3 |
+
build-backend = "setuptools.build_meta"
|
| 4 |
+
|
| 5 |
+
[project]
|
| 6 |
+
name = "drugenv"
|
| 7 |
+
version = "0.1.0"
|
| 8 |
+
description = "OpenEnv RL environment for teaching LLMs computational drug-target validation"
|
| 9 |
+
requires-python = ">=3.10,<3.13"
|
| 10 |
+
dependencies = [
|
| 11 |
+
"openenv-core[core]>=0.2.3",
|
| 12 |
+
"numpy>=1.24.0",
|
| 13 |
+
"scipy>=1.10.0",
|
| 14 |
+
"pydantic>=2.0.0",
|
| 15 |
+
"fastapi>=0.110",
|
| 16 |
+
"uvicorn>=0.27",
|
| 17 |
+
]
|
| 18 |
+
|
| 19 |
+
[project.optional-dependencies]
|
| 20 |
+
dev = [
|
| 21 |
+
"pytest>=8.0.0",
|
| 22 |
+
"pytest-cov>=4.0.0",
|
| 23 |
+
]
|
| 24 |
+
train = [
|
| 25 |
+
"torch==2.6.0",
|
| 26 |
+
"torchvision==0.21.0",
|
| 27 |
+
"torchaudio==2.6.0",
|
| 28 |
+
"transformers==4.51.3",
|
| 29 |
+
"trl==0.18.2",
|
| 30 |
+
"peft==0.13.2",
|
| 31 |
+
"accelerate==1.5.0",
|
| 32 |
+
"datasets==3.4.1",
|
| 33 |
+
"bitsandbytes==0.45.5",
|
| 34 |
+
"matplotlib>=3.8",
|
| 35 |
+
"huggingface_hub>=0.26",
|
| 36 |
+
]
|
| 37 |
+
|
| 38 |
+
[project.scripts]
|
| 39 |
+
drugenv-server = "server.app:main"
|
| 40 |
+
|
| 41 |
+
[tool.setuptools]
|
| 42 |
+
include-package-data = true
|
| 43 |
+
packages = [
|
| 44 |
+
"server",
|
| 45 |
+
"server.simulator",
|
| 46 |
+
"server.rules",
|
| 47 |
+
"server.rewards",
|
| 48 |
+
"server.tasks",
|
| 49 |
+
"server.biology",
|
| 50 |
+
"training",
|
| 51 |
+
"tests",
|
| 52 |
+
]
|
| 53 |
+
|
| 54 |
+
[tool.uv]
|
| 55 |
+
package = false
|
server/Dockerfile
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
# Multi-stage build using openenv-base
|
| 8 |
+
# This Dockerfile is flexible and works for both:
|
| 9 |
+
# - In-repo environments (with local OpenEnv sources)
|
| 10 |
+
# - Standalone environments (with openenv from PyPI/Git)
|
| 11 |
+
# The build script (openenv build) handles context detection and sets appropriate build args.
|
| 12 |
+
|
| 13 |
+
ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
|
| 14 |
+
FROM ${BASE_IMAGE} AS builder
|
| 15 |
+
|
| 16 |
+
WORKDIR /app
|
| 17 |
+
|
| 18 |
+
# Ensure git is available (required for installing dependencies from VCS)
|
| 19 |
+
RUN apt-get update && \
|
| 20 |
+
apt-get install -y --no-install-recommends git && \
|
| 21 |
+
rm -rf /var/lib/apt/lists/*
|
| 22 |
+
|
| 23 |
+
# Build argument to control whether we're building standalone or in-repo
|
| 24 |
+
ARG BUILD_MODE=in-repo
|
| 25 |
+
ARG ENV_NAME=hackathon
|
| 26 |
+
|
| 27 |
+
# Copy environment code (always at root of build context)
|
| 28 |
+
COPY . /app/env
|
| 29 |
+
|
| 30 |
+
# For in-repo builds, openenv is already vendored in the build context
|
| 31 |
+
# For standalone builds, openenv will be installed via pyproject.toml
|
| 32 |
+
WORKDIR /app/env
|
| 33 |
+
|
| 34 |
+
# Ensure uv is available (for local builds where base image lacks it)
|
| 35 |
+
RUN if ! command -v uv >/dev/null 2>&1; then \
|
| 36 |
+
curl -LsSf https://astral.sh/uv/install.sh | sh && \
|
| 37 |
+
mv /root/.local/bin/uv /usr/local/bin/uv && \
|
| 38 |
+
mv /root/.local/bin/uvx /usr/local/bin/uvx; \
|
| 39 |
+
fi
|
| 40 |
+
|
| 41 |
+
# Install dependencies using uv sync
|
| 42 |
+
# If uv.lock exists, use it; otherwise resolve on the fly
|
| 43 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 44 |
+
if [ -f uv.lock ]; then \
|
| 45 |
+
uv sync --frozen --no-install-project --no-editable; \
|
| 46 |
+
else \
|
| 47 |
+
uv sync --no-install-project --no-editable; \
|
| 48 |
+
fi
|
| 49 |
+
|
| 50 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 51 |
+
if [ -f uv.lock ]; then \
|
| 52 |
+
uv sync --frozen --no-editable; \
|
| 53 |
+
else \
|
| 54 |
+
uv sync --no-editable; \
|
| 55 |
+
fi
|
| 56 |
+
|
| 57 |
+
# Final runtime stage
|
| 58 |
+
FROM ${BASE_IMAGE}
|
| 59 |
+
|
| 60 |
+
WORKDIR /app
|
| 61 |
+
|
| 62 |
+
# Copy the virtual environment from builder
|
| 63 |
+
COPY --from=builder /app/env/.venv /app/.venv
|
| 64 |
+
|
| 65 |
+
# Copy the environment code
|
| 66 |
+
COPY --from=builder /app/env /app/env
|
| 67 |
+
|
| 68 |
+
# Set PATH to use the virtual environment
|
| 69 |
+
ENV PATH="/app/.venv/bin:$PATH"
|
| 70 |
+
|
| 71 |
+
# Set PYTHONPATH so imports work correctly
|
| 72 |
+
ENV PYTHONPATH="/app/env:$PYTHONPATH"
|
| 73 |
+
|
| 74 |
+
# Health check
|
| 75 |
+
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
| 76 |
+
CMD curl -f http://localhost:8000/health || exit 1
|
| 77 |
+
|
| 78 |
+
# Run the FastAPI server
|
| 79 |
+
# The module path is constructed to work with the /app/env structure
|
| 80 |
+
CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]
|
server/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .hackathon_environment import DrugTargetEnvironment
|
| 2 |
+
|
| 3 |
+
__all__ = ["DrugTargetEnvironment"]
|
server/app.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""FastAPI application for the Drug Target Validation Environment.
|
| 2 |
+
|
| 3 |
+
Endpoints:
|
| 4 |
+
- POST /reset: Reset the environment
|
| 5 |
+
- POST /step: Execute an action
|
| 6 |
+
- GET /state: Get current environment state
|
| 7 |
+
- GET /schema: Get action/observation schemas
|
| 8 |
+
- WS /ws: WebSocket endpoint for persistent sessions
|
| 9 |
+
- GET / Demo UI
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import os
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
|
| 15 |
+
try:
|
| 16 |
+
from openenv.core.env_server.http_server import create_app
|
| 17 |
+
except Exception as e: # pragma: no cover
|
| 18 |
+
raise ImportError(
|
| 19 |
+
"openenv is required for the web interface. "
|
| 20 |
+
"Install dependencies with 'uv sync'"
|
| 21 |
+
) from e
|
| 22 |
+
|
| 23 |
+
from fastapi.responses import HTMLResponse
|
| 24 |
+
from models import DrugTargetAction, ValidationObservation
|
| 25 |
+
from .hackathon_environment import DrugTargetEnvironment
|
| 26 |
+
|
| 27 |
+
app = create_app(
|
| 28 |
+
DrugTargetEnvironment,
|
| 29 |
+
DrugTargetAction,
|
| 30 |
+
ValidationObservation,
|
| 31 |
+
env_name="drug_target_validation",
|
| 32 |
+
max_concurrent_envs=int(os.environ.get("MAX_ENVS", "4")),
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
DEMO_HTML = Path(__file__).resolve().parent.parent / "demo.html"
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
@app.get("/", response_class=HTMLResponse)
|
| 39 |
+
async def demo_ui():
|
| 40 |
+
if DEMO_HTML.exists():
|
| 41 |
+
return HTMLResponse(content=DEMO_HTML.read_text(), status_code=200)
|
| 42 |
+
return HTMLResponse(
|
| 43 |
+
content=(
|
| 44 |
+
"<h1>Drug Target Validation Env API</h1>"
|
| 45 |
+
"<p>Visit /docs for API documentation.</p>"
|
| 46 |
+
),
|
| 47 |
+
status_code=200,
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def main(host: str = "0.0.0.0", port: int = None):
|
| 52 |
+
import uvicorn
|
| 53 |
+
if port is None:
|
| 54 |
+
port = int(os.environ.get("PORT", "8000"))
|
| 55 |
+
uvicorn.run(app, host=host, port=port)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
if __name__ == "__main__":
|
| 59 |
+
import argparse
|
| 60 |
+
parser = argparse.ArgumentParser()
|
| 61 |
+
parser.add_argument("--host", default="0.0.0.0")
|
| 62 |
+
parser.add_argument("--port", type=int, default=None)
|
| 63 |
+
args = parser.parse_args()
|
| 64 |
+
main(host=args.host, port=args.port)
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
# ── Mount the Gradio demo at /demo (env Space landing) ────────────────────
|
| 68 |
+
# Optional: the env Space ships a small Gradio Blocks UI at
|
| 69 |
+
# ``space/env/gradio_demo.py``; mounting it here means a single Docker
|
| 70 |
+
# image can serve both the OpenEnv HTTP API and the human-friendly
|
| 71 |
+
# demo. Failures are degraded silently so a server-only deploy (no
|
| 72 |
+
# gradio installed) still boots.
|
| 73 |
+
try: # pragma: no cover - import-time best-effort
|
| 74 |
+
import gradio as _gr # type: ignore
|
| 75 |
+
from space.env.gradio_demo import build_gradio_demo as _build_gradio_demo
|
| 76 |
+
|
| 77 |
+
_demo = _build_gradio_demo()
|
| 78 |
+
if isinstance(_demo, _gr.Blocks):
|
| 79 |
+
_gr.mount_gradio_app(app, _demo, path="/demo")
|
| 80 |
+
except Exception: # pragma: no cover
|
| 81 |
+
pass
|
server/biology/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .target_index import (
|
| 2 |
+
score_decision_accuracy,
|
| 3 |
+
score_evidence_coverage,
|
| 4 |
+
score_reasoning_coherence,
|
| 5 |
+
)
|
| 6 |
+
|
| 7 |
+
__all__ = [
|
| 8 |
+
"score_decision_accuracy",
|
| 9 |
+
"score_evidence_coverage",
|
| 10 |
+
"score_reasoning_coherence",
|
| 11 |
+
]
|
server/biology/target_index.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Scoring helpers for the drug-target-validation reward function.
|
| 2 |
+
|
| 3 |
+
Implements the three core terminal-reward signals:
|
| 4 |
+
|
| 5 |
+
* ``score_evidence_coverage`` — what fraction of the scenario's
|
| 6 |
+
``key_evidence_dimensions`` did the agent actually investigate?
|
| 7 |
+
* ``score_decision_accuracy`` — was the final go / no_go correct, scaled
|
| 8 |
+
by how confidently it was stated?
|
| 9 |
+
* ``score_reasoning_coherence`` — did the agent's action sequence
|
| 10 |
+
respect light scientific prerequisites (e.g. expression before
|
| 11 |
+
toxicity, in-vitro before in-vivo)?
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
from __future__ import annotations
|
| 15 |
+
|
| 16 |
+
from typing import Iterable, List, Sequence
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
# Soft prerequisite map used by ``score_reasoning_coherence``. Each key
|
| 20 |
+
# *should* be preceded by at least one of the listed prerequisite action
|
| 21 |
+
# names earlier in the trajectory.
|
| 22 |
+
_PREREQUISITES: dict = {
|
| 23 |
+
"toxicity_panel": ["query_expression"],
|
| 24 |
+
"in_vivo_model": ["in_vitro_assay"],
|
| 25 |
+
"biomarker_correlation": ["query_expression", "patient_stratification"],
|
| 26 |
+
"off_target_screen": ["druggability_screen", "query_expression"],
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def score_evidence_coverage(
|
| 31 |
+
discovered_dimensions: Iterable[str],
|
| 32 |
+
key_dimensions: Sequence[str],
|
| 33 |
+
) -> float:
|
| 34 |
+
"""Fraction of ``key_dimensions`` that appear in
|
| 35 |
+
``discovered_dimensions``.
|
| 36 |
+
|
| 37 |
+
Returns 1.0 when no key dimensions are required (degenerate scenario)
|
| 38 |
+
so that the coverage term doesn't punish trivially-easy targets.
|
| 39 |
+
"""
|
| 40 |
+
if not key_dimensions:
|
| 41 |
+
return 1.0
|
| 42 |
+
discovered = {d.lower() for d in discovered_dimensions}
|
| 43 |
+
hits = sum(1 for d in key_dimensions if d.lower() in discovered)
|
| 44 |
+
return hits / len(key_dimensions)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def score_decision_accuracy(
|
| 48 |
+
predicted_decision: str | None,
|
| 49 |
+
confidence: float | None,
|
| 50 |
+
correct_decision: str,
|
| 51 |
+
) -> float:
|
| 52 |
+
"""Decision accuracy in [0, 1], with a confidence-aware scaling.
|
| 53 |
+
|
| 54 |
+
The base score is 1.0 for a correct decision and 0.0 for an incorrect
|
| 55 |
+
one. We then multiply by ``2 * |confidence - 0.5|`` so a confidently
|
| 56 |
+
correct answer is fully rewarded, an uncertain answer is partly
|
| 57 |
+
rewarded, and a confidently *wrong* answer is fully penalised.
|
| 58 |
+
"""
|
| 59 |
+
if predicted_decision is None:
|
| 60 |
+
return 0.0
|
| 61 |
+
correct = predicted_decision.strip().lower() == correct_decision.strip().lower()
|
| 62 |
+
base = 1.0 if correct else 0.0
|
| 63 |
+
if confidence is None:
|
| 64 |
+
confidence = 0.5
|
| 65 |
+
confidence = max(0.0, min(1.0, float(confidence)))
|
| 66 |
+
confidence_weight = 2.0 * abs(confidence - 0.5)
|
| 67 |
+
if correct:
|
| 68 |
+
# Full score 1.0 when confident & correct, 0.0 when uncertain & correct.
|
| 69 |
+
return base * confidence_weight
|
| 70 |
+
# When wrong, return a *negative* signal so the caller can penalise
|
| 71 |
+
# confident wrong answers more than uncertain ones.
|
| 72 |
+
return -confidence_weight
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def score_reasoning_coherence(action_history: List[str]) -> float:
|
| 76 |
+
"""Fraction of actions that respected their soft prerequisites.
|
| 77 |
+
|
| 78 |
+
An action with no listed prerequisite contributes a perfect 1.0.
|
| 79 |
+
"""
|
| 80 |
+
if not action_history:
|
| 81 |
+
return 1.0
|
| 82 |
+
seen: set = set()
|
| 83 |
+
n_checked = 0
|
| 84 |
+
n_passed = 0
|
| 85 |
+
for action in action_history:
|
| 86 |
+
prereqs = _PREREQUISITES.get(action)
|
| 87 |
+
if prereqs is None:
|
| 88 |
+
seen.add(action)
|
| 89 |
+
continue
|
| 90 |
+
n_checked += 1
|
| 91 |
+
if any(req in seen for req in prereqs):
|
| 92 |
+
n_passed += 1
|
| 93 |
+
seen.add(action)
|
| 94 |
+
if n_checked == 0:
|
| 95 |
+
return 1.0
|
| 96 |
+
return n_passed / n_checked
|
server/hackathon_environment.py
ADDED
|
@@ -0,0 +1,325 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Drug Target Validation Environment.
|
| 2 |
+
|
| 3 |
+
Implements the OpenEnv ``Environment`` interface as a POMDP where the
|
| 4 |
+
agent issues one structured pharma / bioinformatics step at a time and
|
| 5 |
+
ultimately submits a go / no_go validation report.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
from typing import Any, Dict, List, Optional
|
| 11 |
+
from uuid import uuid4
|
| 12 |
+
|
| 13 |
+
from openenv.core.env_server.interfaces import Environment
|
| 14 |
+
from openenv.core.env_server.types import State
|
| 15 |
+
|
| 16 |
+
from models import (
|
| 17 |
+
ActionType,
|
| 18 |
+
DrugTargetAction,
|
| 19 |
+
EvidenceDossier,
|
| 20 |
+
IntermediateOutput,
|
| 21 |
+
OutputType,
|
| 22 |
+
ValidationObservation,
|
| 23 |
+
ValidationStepRecord,
|
| 24 |
+
ValidationTaskSpec,
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
from server.rules.engine import RuleEngine
|
| 28 |
+
from server.rewards.reward import RewardBreakdown, RewardComputer
|
| 29 |
+
from server.simulator.latent_state import FullLatentState
|
| 30 |
+
from server.simulator.noise import NoiseModel
|
| 31 |
+
from server.simulator.transition import (
|
| 32 |
+
ACTION_COSTS,
|
| 33 |
+
TransitionEngine,
|
| 34 |
+
compute_action_cost,
|
| 35 |
+
)
|
| 36 |
+
from server.tasks.generator import TaskGenerator
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
MAX_STEPS = 30
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
class DrugTargetEnvironment(Environment):
|
| 43 |
+
"""POMDP environment for drug target validation.
|
| 44 |
+
|
| 45 |
+
The agent observes ``ValidationObservation`` (partial view) while the
|
| 46 |
+
environment maintains a ``FullLatentState`` (hidden ``TargetProfile``
|
| 47 |
+
plus credit / progress state).
|
| 48 |
+
"""
|
| 49 |
+
|
| 50 |
+
SUPPORTS_CONCURRENT_SESSIONS: bool = True
|
| 51 |
+
|
| 52 |
+
def __init__(
|
| 53 |
+
self,
|
| 54 |
+
scenario_name: Optional[str] = None,
|
| 55 |
+
*,
|
| 56 |
+
domain_randomise: bool = True,
|
| 57 |
+
) -> None:
|
| 58 |
+
self._state = State(episode_id=str(uuid4()), step_count=0)
|
| 59 |
+
self._latent: Optional[FullLatentState] = None
|
| 60 |
+
self._task: Optional[ValidationTaskSpec] = None
|
| 61 |
+
self._scenario_name = scenario_name
|
| 62 |
+
self._noise = NoiseModel()
|
| 63 |
+
self._engine = TransitionEngine(self._noise)
|
| 64 |
+
self._rules = RuleEngine()
|
| 65 |
+
self._rewards = RewardComputer()
|
| 66 |
+
self._task_gen = TaskGenerator(domain_randomise=domain_randomise)
|
| 67 |
+
|
| 68 |
+
self._history: List[ValidationStepRecord] = []
|
| 69 |
+
self._dossier: EvidenceDossier = EvidenceDossier()
|
| 70 |
+
self._evidence_dimensions_covered: List[str] = []
|
| 71 |
+
self._action_history: List[str] = []
|
| 72 |
+
self._submitted_decision: Optional[str] = None
|
| 73 |
+
self._submitted_confidence: Optional[float] = None
|
| 74 |
+
self._cumulative_reward: float = 0.0
|
| 75 |
+
|
| 76 |
+
# ── Environment interface ───────────────────────────────────────────
|
| 77 |
+
|
| 78 |
+
def reset(self, seed: Optional[int] = None) -> ValidationObservation:
|
| 79 |
+
seed = seed if seed is not None else hash(uuid4()) % (2**31)
|
| 80 |
+
self._noise.reseed(seed)
|
| 81 |
+
self._state = State(episode_id=str(uuid4()), step_count=0)
|
| 82 |
+
|
| 83 |
+
self._task, self._latent = self._task_gen.generate(
|
| 84 |
+
seed=seed,
|
| 85 |
+
scenario_name=self._scenario_name,
|
| 86 |
+
)
|
| 87 |
+
self._latent.rng_seed = seed
|
| 88 |
+
|
| 89 |
+
self._history.clear()
|
| 90 |
+
self._dossier = EvidenceDossier(
|
| 91 |
+
credits_used=0,
|
| 92 |
+
)
|
| 93 |
+
self._evidence_dimensions_covered.clear()
|
| 94 |
+
self._action_history.clear()
|
| 95 |
+
self._submitted_decision = None
|
| 96 |
+
self._submitted_confidence = None
|
| 97 |
+
self._cumulative_reward = 0.0
|
| 98 |
+
|
| 99 |
+
return self._build_observation(reward=0.0, done=False)
|
| 100 |
+
|
| 101 |
+
def step( # type: ignore[override]
|
| 102 |
+
self, action: DrugTargetAction
|
| 103 |
+
) -> ValidationObservation:
|
| 104 |
+
assert self._latent is not None, "Call reset() before step()"
|
| 105 |
+
assert self._task is not None
|
| 106 |
+
|
| 107 |
+
self._state.step_count += 1
|
| 108 |
+
prev_state = self._latent.model_copy(deep=True)
|
| 109 |
+
prev_history = list(self._action_history)
|
| 110 |
+
|
| 111 |
+
violations = self._rules.check(
|
| 112 |
+
action,
|
| 113 |
+
self._latent,
|
| 114 |
+
evidence_dimensions_covered=self._evidence_dimensions_covered,
|
| 115 |
+
)
|
| 116 |
+
hard_v = self._rules.hard_violations(violations)
|
| 117 |
+
soft_v = self._rules.soft_violations(violations)
|
| 118 |
+
|
| 119 |
+
result = self._engine.step(
|
| 120 |
+
self._latent,
|
| 121 |
+
action,
|
| 122 |
+
hard_violations=hard_v,
|
| 123 |
+
soft_violations=soft_v,
|
| 124 |
+
)
|
| 125 |
+
self._latent = result.next_state
|
| 126 |
+
self._action_history.append(action.action_type.value)
|
| 127 |
+
|
| 128 |
+
step_rb = self._rewards.step_reward(
|
| 129 |
+
action,
|
| 130 |
+
prev_state,
|
| 131 |
+
self._latent,
|
| 132 |
+
result.output,
|
| 133 |
+
hard_v,
|
| 134 |
+
soft_v,
|
| 135 |
+
action_history=prev_history,
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
cost = compute_action_cost(action)
|
| 139 |
+
self._history.append(ValidationStepRecord(
|
| 140 |
+
step_index=self._state.step_count,
|
| 141 |
+
action_type=action.action_type,
|
| 142 |
+
parameters=action.parameters,
|
| 143 |
+
output_summary=result.output.summary,
|
| 144 |
+
output_type=result.output.output_type,
|
| 145 |
+
success=result.output.success,
|
| 146 |
+
quality_score=result.output.quality_score,
|
| 147 |
+
credit_cost=cost,
|
| 148 |
+
))
|
| 149 |
+
self._update_discoveries(action, result.output)
|
| 150 |
+
self._dossier.credits_used = self._latent.credits.credits_used
|
| 151 |
+
|
| 152 |
+
if (
|
| 153 |
+
action.action_type == ActionType.SUBMIT_VALIDATION_REPORT
|
| 154 |
+
and result.output.success
|
| 155 |
+
and not hard_v
|
| 156 |
+
):
|
| 157 |
+
self._submitted_decision = action.final_decision
|
| 158 |
+
self._submitted_confidence = action.confidence
|
| 159 |
+
|
| 160 |
+
done = result.done or self._state.step_count >= MAX_STEPS
|
| 161 |
+
|
| 162 |
+
terminal_rb = RewardBreakdown()
|
| 163 |
+
if done:
|
| 164 |
+
terminal_rb = self._rewards.terminal_reward(
|
| 165 |
+
self._latent,
|
| 166 |
+
final_decision=self._submitted_decision,
|
| 167 |
+
confidence=self._submitted_confidence,
|
| 168 |
+
action_history=list(self._action_history),
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
total_reward = step_rb.total + terminal_rb.total
|
| 172 |
+
self._cumulative_reward += total_reward
|
| 173 |
+
|
| 174 |
+
breakdown = step_rb.to_dict()
|
| 175 |
+
breakdown.update({f"term_{k}": v for k, v in terminal_rb.to_dict().items()})
|
| 176 |
+
|
| 177 |
+
return self._build_observation(
|
| 178 |
+
reward=total_reward,
|
| 179 |
+
done=done,
|
| 180 |
+
latest_output=result.output,
|
| 181 |
+
rule_violations=hard_v + soft_v,
|
| 182 |
+
reward_breakdown=breakdown,
|
| 183 |
+
metadata_extra={"reward_breakdown": breakdown},
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
@property
|
| 187 |
+
def state(self) -> State:
|
| 188 |
+
return self._state
|
| 189 |
+
|
| 190 |
+
def set_scenario(self, scenario_name: Optional[str]) -> None:
|
| 191 |
+
"""Set the scenario used on the next reset."""
|
| 192 |
+
self._scenario_name = scenario_name
|
| 193 |
+
|
| 194 |
+
# ── internal helpers ────────────────────────────────────────────────
|
| 195 |
+
|
| 196 |
+
def _build_observation(
|
| 197 |
+
self,
|
| 198 |
+
*,
|
| 199 |
+
reward: float,
|
| 200 |
+
done: bool,
|
| 201 |
+
latest_output: Optional[IntermediateOutput] = None,
|
| 202 |
+
rule_violations: Optional[List[str]] = None,
|
| 203 |
+
reward_breakdown: Optional[Dict[str, float]] = None,
|
| 204 |
+
metadata_extra: Optional[Dict[str, Any]] = None,
|
| 205 |
+
) -> ValidationObservation:
|
| 206 |
+
assert self._task is not None
|
| 207 |
+
assert self._latent is not None
|
| 208 |
+
meta: Dict[str, Any] = {
|
| 209 |
+
"episode_id": self._state.episode_id,
|
| 210 |
+
"step": self._state.step_count,
|
| 211 |
+
"cumulative_reward": self._cumulative_reward,
|
| 212 |
+
}
|
| 213 |
+
if metadata_extra:
|
| 214 |
+
meta.update(metadata_extra)
|
| 215 |
+
return ValidationObservation(
|
| 216 |
+
target_gene=self._task.target_gene,
|
| 217 |
+
disease_context=self._task.disease_context,
|
| 218 |
+
indication=self._task.indication,
|
| 219 |
+
credits_remaining=self._latent.credits.credits_remaining,
|
| 220 |
+
credits_total=self._latent.credits.credits_total,
|
| 221 |
+
dossier=self._dossier.model_copy(deep=True),
|
| 222 |
+
pipeline_history=[h.model_dump() for h in self._history],
|
| 223 |
+
available_actions=list(self._task.available_actions),
|
| 224 |
+
step_index=self._state.step_count,
|
| 225 |
+
done=done,
|
| 226 |
+
reward=reward,
|
| 227 |
+
step_reward_breakdown=reward_breakdown or {},
|
| 228 |
+
rule_violations=rule_violations or [],
|
| 229 |
+
latest_output=latest_output,
|
| 230 |
+
metadata=meta,
|
| 231 |
+
)
|
| 232 |
+
|
| 233 |
+
def _update_discoveries(
|
| 234 |
+
self,
|
| 235 |
+
action: DrugTargetAction,
|
| 236 |
+
output: IntermediateOutput,
|
| 237 |
+
) -> None:
|
| 238 |
+
"""Fold the latest output into the running ``EvidenceDossier`` and
|
| 239 |
+
the per-dimension coverage tracker."""
|
| 240 |
+
if not output.success:
|
| 241 |
+
return
|
| 242 |
+
|
| 243 |
+
data = dict(output.data or {})
|
| 244 |
+
|
| 245 |
+
if output.output_type in {
|
| 246 |
+
OutputType.EXPRESSION_RESULT,
|
| 247 |
+
OutputType.DE_RESULT,
|
| 248 |
+
OutputType.PATHWAY_RESULT,
|
| 249 |
+
OutputType.COEXPRESSION_RESULT,
|
| 250 |
+
}:
|
| 251 |
+
self._dossier.expression_findings[action.action_type.value] = data
|
| 252 |
+
self._track_dim("expression")
|
| 253 |
+
if output.output_type == OutputType.PATHWAY_RESULT:
|
| 254 |
+
self._track_dim("pathway")
|
| 255 |
+
|
| 256 |
+
if output.output_type in {
|
| 257 |
+
OutputType.STRUCTURE_RESULT,
|
| 258 |
+
OutputType.BINDING_SITE_RESULT,
|
| 259 |
+
OutputType.INTERACTION_RESULT,
|
| 260 |
+
OutputType.DRUGGABILITY_RESULT,
|
| 261 |
+
}:
|
| 262 |
+
self._dossier.protein_findings[action.action_type.value] = data
|
| 263 |
+
if output.output_type in {
|
| 264 |
+
OutputType.DRUGGABILITY_RESULT,
|
| 265 |
+
OutputType.BINDING_SITE_RESULT,
|
| 266 |
+
}:
|
| 267 |
+
self._track_dim("druggability")
|
| 268 |
+
if output.output_type == OutputType.STRUCTURE_RESULT:
|
| 269 |
+
self._track_dim("structure")
|
| 270 |
+
if output.output_type == OutputType.INTERACTION_RESULT:
|
| 271 |
+
self._track_dim("interactions")
|
| 272 |
+
|
| 273 |
+
if output.output_type == OutputType.CLINICAL_RESULT:
|
| 274 |
+
self._dossier.clinical_findings[action.action_type.value] = data
|
| 275 |
+
self._track_dim("clinical")
|
| 276 |
+
if output.output_type == OutputType.PATIENT_STRATIFICATION_RESULT:
|
| 277 |
+
self._dossier.clinical_findings[action.action_type.value] = data
|
| 278 |
+
self._track_dim("patient_stratification")
|
| 279 |
+
|
| 280 |
+
if output.output_type in {
|
| 281 |
+
OutputType.TOXICITY_RESULT,
|
| 282 |
+
OutputType.OFF_TARGET_RESULT,
|
| 283 |
+
}:
|
| 284 |
+
self._dossier.safety_findings[action.action_type.value] = data
|
| 285 |
+
if output.output_type == OutputType.TOXICITY_RESULT:
|
| 286 |
+
self._track_dim("toxicity")
|
| 287 |
+
if output.output_type == OutputType.OFF_TARGET_RESULT:
|
| 288 |
+
self._track_dim("off_target")
|
| 289 |
+
|
| 290 |
+
if output.output_type in {
|
| 291 |
+
OutputType.LITERATURE_RESULT,
|
| 292 |
+
OutputType.EVIDENCE_SYNTHESIS_RESULT,
|
| 293 |
+
OutputType.COMPETITOR_LANDSCAPE_RESULT,
|
| 294 |
+
}:
|
| 295 |
+
self._dossier.literature_findings[action.action_type.value] = data
|
| 296 |
+
self._track_dim("literature")
|
| 297 |
+
|
| 298 |
+
if output.output_type in {
|
| 299 |
+
OutputType.IN_VITRO_RESULT,
|
| 300 |
+
OutputType.IN_VIVO_RESULT,
|
| 301 |
+
OutputType.CRISPR_RESULT,
|
| 302 |
+
OutputType.BIOMARKER_RESULT,
|
| 303 |
+
}:
|
| 304 |
+
entry = {"action": action.action_type.value, **data}
|
| 305 |
+
self._dossier.experimental_results.append(entry)
|
| 306 |
+
if output.output_type == OutputType.IN_VITRO_RESULT:
|
| 307 |
+
self._track_dim("in_vitro")
|
| 308 |
+
if output.output_type == OutputType.IN_VIVO_RESULT:
|
| 309 |
+
self._track_dim("in_vivo")
|
| 310 |
+
if output.output_type == OutputType.CRISPR_RESULT:
|
| 311 |
+
self._track_dim("crispr")
|
| 312 |
+
if output.output_type == OutputType.BIOMARKER_RESULT:
|
| 313 |
+
self._track_dim("biomarker")
|
| 314 |
+
|
| 315 |
+
if output.output_type == OutputType.RED_FLAG_NOTE:
|
| 316 |
+
note = data.get("note", "(no detail)")
|
| 317 |
+
if note not in self._dossier.flagged_red_flags:
|
| 318 |
+
self._dossier.flagged_red_flags.append(str(note))
|
| 319 |
+
|
| 320 |
+
def _track_dim(self, dim: str) -> None:
|
| 321 |
+
if dim not in self._evidence_dimensions_covered:
|
| 322 |
+
self._evidence_dimensions_covered.append(dim)
|
| 323 |
+
|
| 324 |
+
|
| 325 |
+
__all__ = ["DrugTargetEnvironment", "MAX_STEPS"]
|
server/requirements.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
-r ../requirements.txt
|
server/rewards/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .reward import RewardBreakdown, RewardComputer
|
| 2 |
+
|
| 3 |
+
__all__ = ["RewardBreakdown", "RewardComputer"]
|
server/rewards/reward.py
ADDED
|
@@ -0,0 +1,265 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Decomposable reward function for the drug-target-validation POMDP.
|
| 2 |
+
|
| 3 |
+
Reward components
|
| 4 |
+
─────────────────
|
| 5 |
+
evidence_coverage — did the agent investigate the
|
| 6 |
+
``key_evidence_dimensions`` for the scenario?
|
| 7 |
+
decision_accuracy — was the final go / no_go correct, weighted by
|
| 8 |
+
the agent's stated confidence? (terminal only)
|
| 9 |
+
credit_efficiency — did the agent avoid redundant or wasteful calls?
|
| 10 |
+
reasoning_coherence — did the action sequence respect light scientific
|
| 11 |
+
prerequisites?
|
| 12 |
+
novelty — bonus for opening a new evidence dimension.
|
| 13 |
+
penalty — hard violations / credit-exhaustion / very-low
|
| 14 |
+
confidence at submission.
|
| 15 |
+
shaping — potential-based shaping over the coverage potential
|
| 16 |
+
so the dense signal telescopes correctly.
|
| 17 |
+
terminal — composite terminal reward.
|
| 18 |
+
|
| 19 |
+
Step reward
|
| 20 |
+
R_t = evidence_novelty_bonus + reasoning_coherence_bonus
|
| 21 |
+
+ credit_efficiency_penalty + rule_violation_penalty
|
| 22 |
+
+ [φ(s_{t+1}) − φ(s_t)]
|
| 23 |
+
|
| 24 |
+
Terminal reward
|
| 25 |
+
R_T = 0.4 * decision_accuracy
|
| 26 |
+
+ 0.35 * evidence_coverage
|
| 27 |
+
+ 0.15 * credit_efficiency
|
| 28 |
+
+ 0.10 * reasoning_coherence
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
from __future__ import annotations
|
| 32 |
+
|
| 33 |
+
from dataclasses import dataclass, field
|
| 34 |
+
from typing import Dict, List, Optional
|
| 35 |
+
|
| 36 |
+
from models import (
|
| 37 |
+
ActionType,
|
| 38 |
+
DrugTargetAction,
|
| 39 |
+
IntermediateOutput,
|
| 40 |
+
META_ACTIONS,
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
from server.biology.target_index import (
|
| 44 |
+
score_decision_accuracy,
|
| 45 |
+
score_evidence_coverage,
|
| 46 |
+
score_reasoning_coherence,
|
| 47 |
+
)
|
| 48 |
+
from server.simulator.latent_state import FullLatentState
|
| 49 |
+
from server.simulator.transition import TransitionEngine
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
@dataclass
|
| 53 |
+
class RewardBreakdown:
|
| 54 |
+
"""Decomposed reward components recorded per step / per terminal."""
|
| 55 |
+
|
| 56 |
+
evidence_coverage: float = 0.0
|
| 57 |
+
decision_accuracy: float = 0.0
|
| 58 |
+
credit_efficiency: float = 0.0
|
| 59 |
+
reasoning_coherence: float = 0.0
|
| 60 |
+
novelty: float = 0.0
|
| 61 |
+
penalty: float = 0.0
|
| 62 |
+
shaping: float = 0.0
|
| 63 |
+
terminal: float = 0.0
|
| 64 |
+
components: Dict[str, float] = field(default_factory=dict)
|
| 65 |
+
|
| 66 |
+
@property
|
| 67 |
+
def total(self) -> float:
|
| 68 |
+
return (
|
| 69 |
+
self.evidence_coverage
|
| 70 |
+
+ self.decision_accuracy
|
| 71 |
+
+ self.credit_efficiency
|
| 72 |
+
+ self.reasoning_coherence
|
| 73 |
+
+ self.novelty
|
| 74 |
+
+ self.penalty
|
| 75 |
+
+ self.shaping
|
| 76 |
+
+ self.terminal
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
def to_dict(self) -> Dict[str, float]:
|
| 80 |
+
d = {
|
| 81 |
+
"evidence_coverage": self.evidence_coverage,
|
| 82 |
+
"decision_accuracy": self.decision_accuracy,
|
| 83 |
+
"credit_efficiency": self.credit_efficiency,
|
| 84 |
+
"reasoning_coherence": self.reasoning_coherence,
|
| 85 |
+
"novelty": self.novelty,
|
| 86 |
+
"penalty": self.penalty,
|
| 87 |
+
"shaping": self.shaping,
|
| 88 |
+
"terminal": self.terminal,
|
| 89 |
+
"total": self.total,
|
| 90 |
+
}
|
| 91 |
+
d.update(self.components)
|
| 92 |
+
return d
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
class RewardComputer:
|
| 96 |
+
"""Computes step-wise and terminal rewards for the POMDP."""
|
| 97 |
+
|
| 98 |
+
def __init__(
|
| 99 |
+
self,
|
| 100 |
+
novelty_weight: float = 0.20,
|
| 101 |
+
coherence_weight: float = 0.10,
|
| 102 |
+
efficiency_weight: float = 0.10,
|
| 103 |
+
):
|
| 104 |
+
self.w_novelty = novelty_weight
|
| 105 |
+
self.w_coh = coherence_weight
|
| 106 |
+
self.w_eff = efficiency_weight
|
| 107 |
+
|
| 108 |
+
# ── step reward ─────────────────────────────────────────────────────
|
| 109 |
+
|
| 110 |
+
def step_reward(
|
| 111 |
+
self,
|
| 112 |
+
action: DrugTargetAction,
|
| 113 |
+
prev_state: FullLatentState,
|
| 114 |
+
next_state: FullLatentState,
|
| 115 |
+
output: IntermediateOutput,
|
| 116 |
+
hard_violations: List[str],
|
| 117 |
+
soft_violations: List[str],
|
| 118 |
+
action_history: Optional[List[str]] = None,
|
| 119 |
+
) -> RewardBreakdown:
|
| 120 |
+
rb = RewardBreakdown()
|
| 121 |
+
|
| 122 |
+
# Hard violations short-circuit the step.
|
| 123 |
+
if hard_violations:
|
| 124 |
+
rb.penalty = -0.5 * len(hard_violations)
|
| 125 |
+
rb.components["hard_violations"] = float(len(hard_violations))
|
| 126 |
+
return rb
|
| 127 |
+
|
| 128 |
+
# Novelty bonus: did this action open a new evidence dimension?
|
| 129 |
+
prev_dims = set(TransitionEngine.covered_evidence_dimensions(prev_state))
|
| 130 |
+
next_dims = set(TransitionEngine.covered_evidence_dimensions(next_state))
|
| 131 |
+
new_dims = next_dims - prev_dims
|
| 132 |
+
if new_dims:
|
| 133 |
+
rb.novelty = self.w_novelty * len(new_dims)
|
| 134 |
+
rb.components["new_evidence_dims"] = float(len(new_dims))
|
| 135 |
+
|
| 136 |
+
# Reasoning coherence: small bonus / penalty based on the running
|
| 137 |
+
# action history (including this step).
|
| 138 |
+
history = list(action_history or []) + [action.action_type.value]
|
| 139 |
+
coherence = score_reasoning_coherence(history)
|
| 140 |
+
rb.reasoning_coherence = self.w_coh * (coherence - 0.5)
|
| 141 |
+
rb.components["coherence_running"] = coherence
|
| 142 |
+
|
| 143 |
+
# Credit efficiency penalty: cost relative to total budget.
|
| 144 |
+
if next_state.credits.credits_total > 0:
|
| 145 |
+
cost = next_state.credits.credits_used - prev_state.credits.credits_used
|
| 146 |
+
spent_frac = cost / max(next_state.credits.credits_total, 1)
|
| 147 |
+
rb.credit_efficiency = -self.w_eff * spent_frac
|
| 148 |
+
rb.components["credit_spent_frac"] = spent_frac
|
| 149 |
+
|
| 150 |
+
# Soft-violation penalty (e.g. redundancy, wrong ordering).
|
| 151 |
+
if soft_violations:
|
| 152 |
+
rb.penalty -= 0.15 * len(soft_violations)
|
| 153 |
+
rb.components["soft_violations"] = float(len(soft_violations))
|
| 154 |
+
|
| 155 |
+
# Penalise meta-only churn before any evidence was collected.
|
| 156 |
+
if (
|
| 157 |
+
action.action_type in META_ACTIONS
|
| 158 |
+
and action.action_type != ActionType.SUBMIT_VALIDATION_REPORT
|
| 159 |
+
and not next_dims
|
| 160 |
+
):
|
| 161 |
+
rb.penalty -= 0.20
|
| 162 |
+
rb.components["premature_meta_action_penalty"] = -0.20
|
| 163 |
+
|
| 164 |
+
# Potential-based shaping over evidence coverage.
|
| 165 |
+
phi_prev = self._potential(prev_state)
|
| 166 |
+
phi_next = self._potential(next_state)
|
| 167 |
+
rb.shaping = phi_next - phi_prev
|
| 168 |
+
|
| 169 |
+
return rb
|
| 170 |
+
|
| 171 |
+
# ── terminal reward ─────────────────────────────────────────────────
|
| 172 |
+
|
| 173 |
+
def terminal_reward(
|
| 174 |
+
self,
|
| 175 |
+
state: FullLatentState,
|
| 176 |
+
final_decision: Optional[str],
|
| 177 |
+
confidence: Optional[float],
|
| 178 |
+
action_history: Optional[List[str]] = None,
|
| 179 |
+
) -> RewardBreakdown:
|
| 180 |
+
rb = RewardBreakdown()
|
| 181 |
+
target = state.target
|
| 182 |
+
|
| 183 |
+
discovered_dims = TransitionEngine.covered_evidence_dimensions(state)
|
| 184 |
+
coverage = score_evidence_coverage(
|
| 185 |
+
discovered_dims, target.key_evidence_dimensions
|
| 186 |
+
)
|
| 187 |
+
rb.evidence_coverage = coverage
|
| 188 |
+
rb.components["discovered_dims_count"] = float(len(discovered_dims))
|
| 189 |
+
rb.components["required_dims_count"] = float(
|
| 190 |
+
len(target.key_evidence_dimensions)
|
| 191 |
+
)
|
| 192 |
+
|
| 193 |
+
decision_signed = score_decision_accuracy(
|
| 194 |
+
final_decision, confidence, target.correct_decision,
|
| 195 |
+
)
|
| 196 |
+
# Map signed score to non-negative ``decision_accuracy`` and route
|
| 197 |
+
# the negative arm into ``penalty`` so the breakdown is readable.
|
| 198 |
+
rb.decision_accuracy = max(0.0, decision_signed)
|
| 199 |
+
if decision_signed < 0:
|
| 200 |
+
rb.penalty += decision_signed # negative
|
| 201 |
+
rb.components["confident_wrong_answer_penalty"] = decision_signed
|
| 202 |
+
|
| 203 |
+
# Credit efficiency (how much budget remains).
|
| 204 |
+
credits_total = max(1, state.credits.credits_total)
|
| 205 |
+
credits_remaining_frac = state.credits.credits_remaining / credits_total
|
| 206 |
+
# Penalise running totally redundant calls (count > 2 of same type).
|
| 207 |
+
redundant_calls = sum(
|
| 208 |
+
max(0, count - 2) for count in state.action_call_counts.values()
|
| 209 |
+
)
|
| 210 |
+
total_calls = max(1, sum(state.action_call_counts.values()))
|
| 211 |
+
redundancy_frac = redundant_calls / total_calls
|
| 212 |
+
credit_efficiency = max(0.0, 1.0 - redundancy_frac)
|
| 213 |
+
rb.credit_efficiency = credit_efficiency
|
| 214 |
+
rb.components["credits_remaining_frac"] = credits_remaining_frac
|
| 215 |
+
rb.components["redundancy_frac"] = redundancy_frac
|
| 216 |
+
|
| 217 |
+
# Reasoning coherence on the full trajectory.
|
| 218 |
+
coherence = score_reasoning_coherence(action_history or [])
|
| 219 |
+
rb.reasoning_coherence = coherence
|
| 220 |
+
rb.components["final_reasoning_coherence"] = coherence
|
| 221 |
+
|
| 222 |
+
# Hard penalties.
|
| 223 |
+
if not state.progress.report_submitted:
|
| 224 |
+
rb.penalty -= 1.0
|
| 225 |
+
rb.components["no_report_submitted_penalty"] = -1.0
|
| 226 |
+
if (
|
| 227 |
+
state.progress.report_submitted
|
| 228 |
+
and (final_decision is None or confidence is None)
|
| 229 |
+
):
|
| 230 |
+
rb.penalty -= 1.0
|
| 231 |
+
rb.components["malformed_report_penalty"] = -1.0
|
| 232 |
+
if (
|
| 233 |
+
state.progress.report_submitted
|
| 234 |
+
and confidence is not None
|
| 235 |
+
and confidence < 0.30
|
| 236 |
+
):
|
| 237 |
+
rb.penalty -= 0.30
|
| 238 |
+
rb.components["low_confidence_submission_penalty"] = -0.30
|
| 239 |
+
|
| 240 |
+
rb.terminal = (
|
| 241 |
+
0.40 * rb.decision_accuracy
|
| 242 |
+
+ 0.35 * coverage
|
| 243 |
+
+ 0.15 * credit_efficiency
|
| 244 |
+
+ 0.10 * coherence
|
| 245 |
+
)
|
| 246 |
+
return rb
|
| 247 |
+
|
| 248 |
+
# ── helpers ─────────────────────────────────────────────────────────
|
| 249 |
+
|
| 250 |
+
@staticmethod
|
| 251 |
+
def _potential(state: FullLatentState) -> float:
|
| 252 |
+
"""Progress potential φ(s) — fraction of *target* evidence
|
| 253 |
+
dimensions covered. Returns 0.0 once a report has been submitted
|
| 254 |
+
so the shaping signal telescopes correctly.
|
| 255 |
+
"""
|
| 256 |
+
if state.progress.report_submitted:
|
| 257 |
+
return 0.0
|
| 258 |
+
target = state.target
|
| 259 |
+
dims = TransitionEngine.covered_evidence_dimensions(state)
|
| 260 |
+
if not target.key_evidence_dimensions:
|
| 261 |
+
return min(1.0, len(dims) / 6.0)
|
| 262 |
+
hits = sum(
|
| 263 |
+
1 for d in target.key_evidence_dimensions if d in set(dims)
|
| 264 |
+
)
|
| 265 |
+
return hits / len(target.key_evidence_dimensions)
|
server/rules/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .engine import RuleEngine, RuleViolation
|
| 2 |
+
|
| 3 |
+
__all__ = ["RuleEngine", "RuleViolation"]
|
server/rules/engine.py
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Pharma rule engine — hard and soft constraint checking.
|
| 2 |
+
|
| 3 |
+
Hard violations block action execution entirely (the action still
|
| 4 |
+
deducts no credits and the simulator returns a ``FailureReport``).
|
| 5 |
+
Soft violations allow execution but degrade output quality and incur
|
| 6 |
+
penalties.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
|
| 11 |
+
from dataclasses import dataclass
|
| 12 |
+
from enum import Enum
|
| 13 |
+
from typing import Iterable, List, Optional
|
| 14 |
+
|
| 15 |
+
from models import ActionType, DrugTargetAction
|
| 16 |
+
|
| 17 |
+
from server.simulator.latent_state import FullLatentState
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class Severity(str, Enum):
|
| 21 |
+
HARD = "hard"
|
| 22 |
+
SOFT = "soft"
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
@dataclass
|
| 26 |
+
class RuleViolation:
|
| 27 |
+
rule_id: str
|
| 28 |
+
severity: Severity
|
| 29 |
+
message: str
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class RuleEngine:
|
| 33 |
+
"""Evaluates drug-target-validation constraints against the current
|
| 34 |
+
latent state before each action is applied.
|
| 35 |
+
"""
|
| 36 |
+
|
| 37 |
+
def check(
|
| 38 |
+
self,
|
| 39 |
+
action: DrugTargetAction,
|
| 40 |
+
state: FullLatentState,
|
| 41 |
+
*,
|
| 42 |
+
evidence_dimensions_covered: Optional[Iterable[str]] = None,
|
| 43 |
+
) -> List[RuleViolation]:
|
| 44 |
+
violations: List[RuleViolation] = []
|
| 45 |
+
violations.extend(self._check_resource_constraints(action, state))
|
| 46 |
+
violations.extend(self._check_submission(
|
| 47 |
+
action, state, evidence_dimensions_covered or [],
|
| 48 |
+
))
|
| 49 |
+
violations.extend(self._check_redundancy(action, state))
|
| 50 |
+
violations.extend(self._check_ordering(action, state))
|
| 51 |
+
return violations
|
| 52 |
+
|
| 53 |
+
@staticmethod
|
| 54 |
+
def hard_violations(violations: List[RuleViolation]) -> List[str]:
|
| 55 |
+
return [v.message for v in violations if v.severity == Severity.HARD]
|
| 56 |
+
|
| 57 |
+
@staticmethod
|
| 58 |
+
def soft_violations(violations: List[RuleViolation]) -> List[str]:
|
| 59 |
+
return [v.message for v in violations if v.severity == Severity.SOFT]
|
| 60 |
+
|
| 61 |
+
# ── resource / credit constraints ───────────────────────────────────
|
| 62 |
+
|
| 63 |
+
def _check_resource_constraints(
|
| 64 |
+
self, action: DrugTargetAction, s: FullLatentState
|
| 65 |
+
) -> List[RuleViolation]:
|
| 66 |
+
vs: List[RuleViolation] = []
|
| 67 |
+
from server.simulator.transition import compute_action_cost
|
| 68 |
+
|
| 69 |
+
cost = compute_action_cost(action)
|
| 70 |
+
if s.credits.exhausted and action.action_type != ActionType.SUBMIT_VALIDATION_REPORT:
|
| 71 |
+
vs.append(RuleViolation(
|
| 72 |
+
rule_id="credits_exhausted",
|
| 73 |
+
severity=Severity.HARD,
|
| 74 |
+
message="Credits exhausted - submit validation report or end episode",
|
| 75 |
+
))
|
| 76 |
+
elif cost > s.credits.credits_remaining and cost > 0:
|
| 77 |
+
vs.append(RuleViolation(
|
| 78 |
+
rule_id="credits_insufficient",
|
| 79 |
+
severity=Severity.HARD,
|
| 80 |
+
message=(
|
| 81 |
+
f"Action costs {cost} credits but only "
|
| 82 |
+
f"{s.credits.credits_remaining} remain"
|
| 83 |
+
),
|
| 84 |
+
))
|
| 85 |
+
return vs
|
| 86 |
+
|
| 87 |
+
# ── submission validation ───────────────────────────────────────────
|
| 88 |
+
|
| 89 |
+
def _check_submission(
|
| 90 |
+
self,
|
| 91 |
+
action: DrugTargetAction,
|
| 92 |
+
s: FullLatentState,
|
| 93 |
+
evidence_dimensions_covered: Iterable[str],
|
| 94 |
+
) -> List[RuleViolation]:
|
| 95 |
+
vs: List[RuleViolation] = []
|
| 96 |
+
if action.action_type != ActionType.SUBMIT_VALIDATION_REPORT:
|
| 97 |
+
return vs
|
| 98 |
+
|
| 99 |
+
# Hard: report with no evidence at all.
|
| 100 |
+
if not list(evidence_dimensions_covered):
|
| 101 |
+
vs.append(RuleViolation(
|
| 102 |
+
rule_id="report_without_evidence",
|
| 103 |
+
severity=Severity.HARD,
|
| 104 |
+
message=(
|
| 105 |
+
"Cannot submit validation report without gathering "
|
| 106 |
+
"any evidence"
|
| 107 |
+
),
|
| 108 |
+
))
|
| 109 |
+
|
| 110 |
+
# Hard: report missing decision or confidence.
|
| 111 |
+
if action.final_decision is None:
|
| 112 |
+
vs.append(RuleViolation(
|
| 113 |
+
rule_id="report_missing_decision",
|
| 114 |
+
severity=Severity.HARD,
|
| 115 |
+
message=(
|
| 116 |
+
"Submitting validation report without a final_decision "
|
| 117 |
+
"is not allowed"
|
| 118 |
+
),
|
| 119 |
+
))
|
| 120 |
+
elif action.final_decision.lower() not in {"go", "no_go"}:
|
| 121 |
+
vs.append(RuleViolation(
|
| 122 |
+
rule_id="report_invalid_decision",
|
| 123 |
+
severity=Severity.HARD,
|
| 124 |
+
message=(
|
| 125 |
+
f"final_decision must be 'go' or 'no_go', got "
|
| 126 |
+
f"{action.final_decision!r}"
|
| 127 |
+
),
|
| 128 |
+
))
|
| 129 |
+
|
| 130 |
+
if action.confidence is None:
|
| 131 |
+
vs.append(RuleViolation(
|
| 132 |
+
rule_id="report_missing_confidence",
|
| 133 |
+
severity=Severity.HARD,
|
| 134 |
+
message=(
|
| 135 |
+
"Submitting validation report without a confidence "
|
| 136 |
+
"score is not allowed"
|
| 137 |
+
),
|
| 138 |
+
))
|
| 139 |
+
elif action.confidence < 0.30:
|
| 140 |
+
vs.append(RuleViolation(
|
| 141 |
+
rule_id="report_low_confidence",
|
| 142 |
+
severity=Severity.SOFT,
|
| 143 |
+
message=(
|
| 144 |
+
f"Submitting with very low confidence "
|
| 145 |
+
f"({action.confidence:.2f}) — the agent appears "
|
| 146 |
+
f"poorly calibrated"
|
| 147 |
+
),
|
| 148 |
+
))
|
| 149 |
+
|
| 150 |
+
return vs
|
| 151 |
+
|
| 152 |
+
# ── redundancy checks ───────────────────────────────────────────────
|
| 153 |
+
|
| 154 |
+
def _check_redundancy(
|
| 155 |
+
self, action: DrugTargetAction, s: FullLatentState
|
| 156 |
+
) -> List[RuleViolation]:
|
| 157 |
+
vs: List[RuleViolation] = []
|
| 158 |
+
if action.action_type == ActionType.FLAG_RED_FLAG:
|
| 159 |
+
return vs
|
| 160 |
+
if action.action_type == ActionType.SUBMIT_VALIDATION_REPORT:
|
| 161 |
+
if s.progress.report_submitted:
|
| 162 |
+
vs.append(RuleViolation(
|
| 163 |
+
rule_id="duplicate_report",
|
| 164 |
+
severity=Severity.HARD,
|
| 165 |
+
message="Validation report has already been submitted",
|
| 166 |
+
))
|
| 167 |
+
return vs
|
| 168 |
+
count = s.action_call_counts.get(action.action_type.value, 0)
|
| 169 |
+
if count >= 2:
|
| 170 |
+
vs.append(RuleViolation(
|
| 171 |
+
rule_id=f"redundant_{action.action_type.value}",
|
| 172 |
+
severity=Severity.SOFT,
|
| 173 |
+
message=(
|
| 174 |
+
f"Action '{action.action_type.value}' has already been "
|
| 175 |
+
f"executed {count} time(s); further repeats are "
|
| 176 |
+
f"redundant"
|
| 177 |
+
),
|
| 178 |
+
))
|
| 179 |
+
return vs
|
| 180 |
+
|
| 181 |
+
# ── ordering checks ─────────────────────────────────────────────────
|
| 182 |
+
|
| 183 |
+
def _check_ordering(
|
| 184 |
+
self, action: DrugTargetAction, s: FullLatentState
|
| 185 |
+
) -> List[RuleViolation]:
|
| 186 |
+
vs: List[RuleViolation] = []
|
| 187 |
+
p = s.progress
|
| 188 |
+
|
| 189 |
+
if action.action_type == ActionType.IN_VIVO_MODEL and not p.in_vitro_done:
|
| 190 |
+
vs.append(RuleViolation(
|
| 191 |
+
rule_id="in_vivo_before_in_vitro",
|
| 192 |
+
severity=Severity.SOFT,
|
| 193 |
+
message=(
|
| 194 |
+
"Running in_vivo_model before in_vitro_assay is "
|
| 195 |
+
"scientifically backwards"
|
| 196 |
+
),
|
| 197 |
+
))
|
| 198 |
+
if (
|
| 199 |
+
action.action_type == ActionType.TOXICITY_PANEL
|
| 200 |
+
and not p.expression_queried
|
| 201 |
+
):
|
| 202 |
+
vs.append(RuleViolation(
|
| 203 |
+
rule_id="toxicity_before_expression",
|
| 204 |
+
severity=Severity.SOFT,
|
| 205 |
+
message=(
|
| 206 |
+
"Toxicity panel before any expression query — "
|
| 207 |
+
"tissue-specific toxicity will be hard to interpret"
|
| 208 |
+
),
|
| 209 |
+
))
|
| 210 |
+
return vs
|
server/simulator/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .latent_state import (
|
| 2 |
+
CreditState,
|
| 3 |
+
DataQualityState,
|
| 4 |
+
FullLatentState,
|
| 5 |
+
TargetProfile,
|
| 6 |
+
ValidationProgress,
|
| 7 |
+
)
|
| 8 |
+
from .noise import NoiseModel
|
| 9 |
+
from .output_generator import OutputGenerator
|
| 10 |
+
from .transition import TransitionEngine
|
| 11 |
+
|
| 12 |
+
__all__ = [
|
| 13 |
+
"CreditState",
|
| 14 |
+
"DataQualityState",
|
| 15 |
+
"FullLatentState",
|
| 16 |
+
"NoiseModel",
|
| 17 |
+
"OutputGenerator",
|
| 18 |
+
"TargetProfile",
|
| 19 |
+
"TransitionEngine",
|
| 20 |
+
"ValidationProgress",
|
| 21 |
+
]
|
server/simulator/latent_state.py
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Hidden ground-truth target state for the drug-target-validation POMDP.
|
| 2 |
+
|
| 3 |
+
The agent never directly observes any of these models; it must infer them
|
| 4 |
+
through investigation. The simulator uses ``FullLatentState`` to generate
|
| 5 |
+
all simulated outputs and to compute terminal rewards.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
from typing import List, Optional
|
| 11 |
+
|
| 12 |
+
from pydantic import BaseModel, Field
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class TargetProfile(BaseModel):
|
| 16 |
+
"""Hidden ground-truth drug target properties."""
|
| 17 |
+
|
| 18 |
+
# Expression
|
| 19 |
+
expression_level: str = Field(
|
| 20 |
+
"moderate",
|
| 21 |
+
description=(
|
| 22 |
+
"One of 'high_specific', 'high_nonspecific', 'moderate', 'low'."
|
| 23 |
+
),
|
| 24 |
+
)
|
| 25 |
+
tissue_specificity: float = Field(0.5, ge=0.0, le=1.0)
|
| 26 |
+
disease_overexpression: float = Field(
|
| 27 |
+
1.0, description="Fold change vs. matched normal tissue."
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
# Druggability
|
| 31 |
+
druggability_score: float = Field(0.5, ge=0.0, le=1.0)
|
| 32 |
+
binding_pocket_quality: str = Field(
|
| 33 |
+
"good",
|
| 34 |
+
description=(
|
| 35 |
+
"One of 'excellent', 'good', 'poor', 'undruggable'."
|
| 36 |
+
),
|
| 37 |
+
)
|
| 38 |
+
has_known_ligands: bool = False
|
| 39 |
+
allosteric_site_available: bool = Field(
|
| 40 |
+
False,
|
| 41 |
+
description=(
|
| 42 |
+
"Whether a non-classical (allosteric) druggable site exists. "
|
| 43 |
+
"Only revealed by binding-site analyses with the appropriate "
|
| 44 |
+
"parameters."
|
| 45 |
+
),
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
# Selectivity
|
| 49 |
+
selectivity_ratio: float = Field(
|
| 50 |
+
5.0,
|
| 51 |
+
description="On-target vs off-target activity ratio.",
|
| 52 |
+
)
|
| 53 |
+
off_target_count: int = 0
|
| 54 |
+
off_target_genes: List[str] = Field(default_factory=list)
|
| 55 |
+
|
| 56 |
+
# Safety
|
| 57 |
+
toxicity_profile: str = Field(
|
| 58 |
+
"mild",
|
| 59 |
+
description="One of 'clean', 'mild', 'moderate', 'severe'.",
|
| 60 |
+
)
|
| 61 |
+
toxicity_tissues: List[str] = Field(default_factory=list)
|
| 62 |
+
|
| 63 |
+
# Clinical
|
| 64 |
+
clinical_precedent: str = Field(
|
| 65 |
+
"none",
|
| 66 |
+
description=(
|
| 67 |
+
"One of 'positive', 'mixed', 'negative', 'none'."
|
| 68 |
+
),
|
| 69 |
+
)
|
| 70 |
+
clinical_stage_reached: Optional[str] = Field(
|
| 71 |
+
None,
|
| 72 |
+
description=(
|
| 73 |
+
"Highest clinical stage previously reached: 'phase1' / 'phase2' "
|
| 74 |
+
"/ 'phase3' / None."
|
| 75 |
+
),
|
| 76 |
+
)
|
| 77 |
+
competitor_programs: List[str] = Field(default_factory=list)
|
| 78 |
+
|
| 79 |
+
# Patient stratification / biomarker context
|
| 80 |
+
requires_patient_stratification: bool = False
|
| 81 |
+
responder_biomarker: Optional[str] = None
|
| 82 |
+
|
| 83 |
+
# In-vitro / in-vivo expectations
|
| 84 |
+
in_vitro_ic50_nM: float = Field(
|
| 85 |
+
100.0, description="Expected on-target IC50 (nM)."
|
| 86 |
+
)
|
| 87 |
+
in_vivo_efficacy: str = Field(
|
| 88 |
+
"moderate",
|
| 89 |
+
description=(
|
| 90 |
+
"Expected pharmacological efficacy in disease-relevant models: "
|
| 91 |
+
"'strong', 'moderate', 'weak', 'none'."
|
| 92 |
+
),
|
| 93 |
+
)
|
| 94 |
+
crispr_essentiality: float = Field(
|
| 95 |
+
-0.3,
|
| 96 |
+
description=(
|
| 97 |
+
"DepMap-style essentiality score (more negative = more "
|
| 98 |
+
"essential)."
|
| 99 |
+
),
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
# Hidden truth used for terminal reward computation
|
| 103 |
+
true_viability_score: float = Field(0.5, ge=0.0, le=1.0)
|
| 104 |
+
correct_decision: str = Field(
|
| 105 |
+
"no_go", description="Either 'go' or 'no_go'."
|
| 106 |
+
)
|
| 107 |
+
misleading_signals: List[str] = Field(default_factory=list)
|
| 108 |
+
key_evidence_dimensions: List[str] = Field(
|
| 109 |
+
default_factory=list,
|
| 110 |
+
description=(
|
| 111 |
+
"Evidence categories the agent must touch to score well, e.g. "
|
| 112 |
+
"'expression', 'druggability', 'off_target', 'toxicity', "
|
| 113 |
+
"'clinical', 'literature', 'in_vitro', 'in_vivo', "
|
| 114 |
+
"'patient_stratification'."
|
| 115 |
+
),
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
class DataQualityState(BaseModel):
|
| 120 |
+
"""Technical noise parameters for simulated experimental outputs."""
|
| 121 |
+
|
| 122 |
+
noise_level: float = Field(0.1, ge=0.0, le=1.0)
|
| 123 |
+
false_positive_rate: float = Field(0.05, ge=0.0, le=1.0)
|
| 124 |
+
false_negative_rate: float = Field(0.05, ge=0.0, le=1.0)
|
| 125 |
+
database_coverage: float = Field(0.85, ge=0.0, le=1.0)
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
class CreditState(BaseModel):
|
| 129 |
+
"""Tracks the single unified experimental-credit budget."""
|
| 130 |
+
|
| 131 |
+
credits_total: int = 50
|
| 132 |
+
credits_used: int = 0
|
| 133 |
+
|
| 134 |
+
@property
|
| 135 |
+
def credits_remaining(self) -> int:
|
| 136 |
+
return max(0, self.credits_total - self.credits_used)
|
| 137 |
+
|
| 138 |
+
@property
|
| 139 |
+
def exhausted(self) -> bool:
|
| 140 |
+
return self.credits_used >= self.credits_total
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
class ValidationProgress(BaseModel):
|
| 144 |
+
"""Flags tracking which evidence dimensions have been investigated."""
|
| 145 |
+
|
| 146 |
+
expression_queried: bool = False
|
| 147 |
+
druggability_assessed: bool = False
|
| 148 |
+
selectivity_checked: bool = False
|
| 149 |
+
toxicity_assessed: bool = False
|
| 150 |
+
clinical_checked: bool = False
|
| 151 |
+
literature_reviewed: bool = False
|
| 152 |
+
in_vitro_done: bool = False
|
| 153 |
+
in_vivo_done: bool = False
|
| 154 |
+
patient_stratification_done: bool = False
|
| 155 |
+
pathway_analysed: bool = False
|
| 156 |
+
structure_resolved: bool = False
|
| 157 |
+
interactions_mapped: bool = False
|
| 158 |
+
crispr_done: bool = False
|
| 159 |
+
biomarker_correlated: bool = False
|
| 160 |
+
evidence_synthesised: bool = False
|
| 161 |
+
expert_reviewed: bool = False
|
| 162 |
+
report_submitted: bool = False
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
class FullLatentState(BaseModel):
|
| 166 |
+
"""Complete hidden state of the simulated drug-target world."""
|
| 167 |
+
|
| 168 |
+
target: TargetProfile = Field(default_factory=TargetProfile)
|
| 169 |
+
data_quality: DataQualityState = Field(default_factory=DataQualityState)
|
| 170 |
+
credits: CreditState = Field(default_factory=CreditState)
|
| 171 |
+
progress: ValidationProgress = Field(default_factory=ValidationProgress)
|
| 172 |
+
|
| 173 |
+
# Tracking which action types have been executed (used by rules / rewards)
|
| 174 |
+
action_call_counts: dict = Field(default_factory=dict)
|
| 175 |
+
rng_seed: int = 0
|
server/simulator/noise.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Stochastic noise models for the biological simulator."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from typing import Dict, List, Tuple
|
| 6 |
+
|
| 7 |
+
import numpy as np
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class NoiseModel:
|
| 11 |
+
"""Generates calibrated noise for simulated experimental outputs.
|
| 12 |
+
|
| 13 |
+
All randomness is funnelled through a single ``numpy.Generator``
|
| 14 |
+
so that episodes are reproducible given the same seed.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
def __init__(self, seed: int = 42):
|
| 18 |
+
self.rng = np.random.default_rng(seed)
|
| 19 |
+
|
| 20 |
+
def reseed(self, seed: int) -> None:
|
| 21 |
+
self.rng = np.random.default_rng(seed)
|
| 22 |
+
|
| 23 |
+
# ── expression-level noise ──────────────────────────────────────────
|
| 24 |
+
|
| 25 |
+
def add_expression_noise(
|
| 26 |
+
self,
|
| 27 |
+
true_values: Dict[str, float],
|
| 28 |
+
noise_level: float,
|
| 29 |
+
dropout_rate: float,
|
| 30 |
+
) -> Dict[str, float]:
|
| 31 |
+
noisy: Dict[str, float] = {}
|
| 32 |
+
for gene, value in true_values.items():
|
| 33 |
+
# Dropout probability is inversely proportional to expression
|
| 34 |
+
# magnitude: lowly expressed genes drop out much more readily,
|
| 35 |
+
# matching the zero-inflation pattern in real scRNA-seq data.
|
| 36 |
+
p_drop = dropout_rate / (1.0 + abs(value))
|
| 37 |
+
if self.rng.random() < p_drop:
|
| 38 |
+
noisy[gene] = 0.0
|
| 39 |
+
else:
|
| 40 |
+
sigma = noise_level * abs(value) + 0.1
|
| 41 |
+
noisy[gene] = float(value + self.rng.normal(0, sigma))
|
| 42 |
+
return noisy
|
| 43 |
+
|
| 44 |
+
# ── effect-size sampling ────────────────────────────────────────────
|
| 45 |
+
|
| 46 |
+
def sample_effect_sizes(
|
| 47 |
+
self,
|
| 48 |
+
true_effects: Dict[str, float],
|
| 49 |
+
sample_size: int,
|
| 50 |
+
noise_level: float,
|
| 51 |
+
) -> Dict[str, float]:
|
| 52 |
+
se = noise_level / max(np.sqrt(max(sample_size, 1)), 1e-6)
|
| 53 |
+
return {
|
| 54 |
+
gene: float(effect + self.rng.normal(0, se))
|
| 55 |
+
for gene, effect in true_effects.items()
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
def sample_p_values(
|
| 59 |
+
self,
|
| 60 |
+
true_effects: Dict[str, float],
|
| 61 |
+
sample_size: int,
|
| 62 |
+
noise_level: float,
|
| 63 |
+
) -> Dict[str, float]:
|
| 64 |
+
"""Simulate approximate p-values from z-statistics."""
|
| 65 |
+
from scipy import stats # type: ignore[import-untyped]
|
| 66 |
+
|
| 67 |
+
p_values: Dict[str, float] = {}
|
| 68 |
+
se = noise_level / max(np.sqrt(max(sample_size, 1)), 1e-6)
|
| 69 |
+
for gene, effect in true_effects.items():
|
| 70 |
+
z = abs(effect) / max(se, 1e-8)
|
| 71 |
+
p_values[gene] = float(2 * stats.norm.sf(z))
|
| 72 |
+
return p_values
|
| 73 |
+
|
| 74 |
+
# ── false discovery helpers ─────────────────────────────────────────
|
| 75 |
+
|
| 76 |
+
def generate_false_positives(
|
| 77 |
+
self, n_background_genes: int, fdr: float
|
| 78 |
+
) -> List[str]:
|
| 79 |
+
n_fp = int(self.rng.binomial(n_background_genes, fdr))
|
| 80 |
+
return [f"FP_GENE_{i}" for i in range(n_fp)]
|
| 81 |
+
|
| 82 |
+
def generate_false_negatives(
|
| 83 |
+
self, true_genes: List[str], fnr: float
|
| 84 |
+
) -> List[str]:
|
| 85 |
+
"""Return the subset of *true_genes* that are missed."""
|
| 86 |
+
return [g for g in true_genes if self.rng.random() < fnr]
|
| 87 |
+
|
| 88 |
+
# ── quality helpers ─────────────────────────────────────────────────
|
| 89 |
+
|
| 90 |
+
def quality_degradation(
|
| 91 |
+
self, base_quality: float, factors: List[float]
|
| 92 |
+
) -> float:
|
| 93 |
+
q = base_quality
|
| 94 |
+
for f in factors:
|
| 95 |
+
q *= f
|
| 96 |
+
return float(np.clip(q + self.rng.normal(0, 0.02), 0.0, 1.0))
|
| 97 |
+
|
| 98 |
+
def sample_qc_metric(
|
| 99 |
+
self, mean: float, std: float, clip_lo: float = 0.0, clip_hi: float = 1.0
|
| 100 |
+
) -> float:
|
| 101 |
+
return float(np.clip(self.rng.normal(mean, std), clip_lo, clip_hi))
|
| 102 |
+
|
| 103 |
+
def sample_count(self, lam: float) -> int:
|
| 104 |
+
return int(self.rng.poisson(max(lam, 0)))
|
| 105 |
+
|
| 106 |
+
def coin_flip(self, p: float) -> bool:
|
| 107 |
+
return bool(self.rng.random() < p)
|
| 108 |
+
|
| 109 |
+
def sample_cluster_count(
|
| 110 |
+
self, n_true_populations: int, quality: float
|
| 111 |
+
) -> int:
|
| 112 |
+
"""Over- or under-clustering depending on preprocessing quality."""
|
| 113 |
+
delta = self.rng.integers(-2, 3)
|
| 114 |
+
noise_clusters = max(0, int(round((1.0 - quality) * 3)))
|
| 115 |
+
return max(1, n_true_populations + delta + noise_clusters)
|
| 116 |
+
|
| 117 |
+
def shuffle_ranking(
|
| 118 |
+
self, items: List[str], noise_level: float
|
| 119 |
+
) -> List[str]:
|
| 120 |
+
"""Permute a ranking with Gaussian noise on ordinals."""
|
| 121 |
+
n = len(items)
|
| 122 |
+
if n == 0:
|
| 123 |
+
return []
|
| 124 |
+
scores = np.arange(n, dtype=float) + self.rng.normal(
|
| 125 |
+
0, noise_level * n, size=n
|
| 126 |
+
)
|
| 127 |
+
order = np.argsort(scores)
|
| 128 |
+
return [items[int(i)] for i in order]
|
server/simulator/output_generator.py
ADDED
|
@@ -0,0 +1,695 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Generate simulated drug-target-validation outputs from latent state."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from typing import Any, Dict, List
|
| 6 |
+
|
| 7 |
+
from models import (
|
| 8 |
+
ActionType,
|
| 9 |
+
DrugTargetAction,
|
| 10 |
+
IntermediateOutput,
|
| 11 |
+
OutputType,
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
from .latent_state import FullLatentState, TargetProfile
|
| 15 |
+
from .noise import NoiseModel
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
# Pool of plausible adverse-event tissues used to inject realistic
|
| 19 |
+
# false-positive toxicity hits.
|
| 20 |
+
_NOISE_TISSUES: List[str] = [
|
| 21 |
+
"liver", "kidney", "GI", "skin", "cardiac", "CNS", "lung",
|
| 22 |
+
]
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class OutputGenerator:
|
| 26 |
+
"""Creates structured ``IntermediateOutput`` objects from the hidden
|
| 27 |
+
``TargetProfile`` plus a stochastic noise model.
|
| 28 |
+
|
| 29 |
+
Every action has a dedicated handler that:
|
| 30 |
+
- reads relevant fields from the ``TargetProfile``
|
| 31 |
+
- applies ``DataQualityState``-driven noise (false positive / false
|
| 32 |
+
negative / database coverage)
|
| 33 |
+
- returns a typed ``IntermediateOutput`` whose ``data`` dict is the
|
| 34 |
+
evidence the agent reasons over.
|
| 35 |
+
"""
|
| 36 |
+
|
| 37 |
+
def __init__(self, noise: NoiseModel):
|
| 38 |
+
self.noise = noise
|
| 39 |
+
|
| 40 |
+
def generate(
|
| 41 |
+
self,
|
| 42 |
+
action: DrugTargetAction,
|
| 43 |
+
state: FullLatentState,
|
| 44 |
+
step_index: int,
|
| 45 |
+
) -> IntermediateOutput:
|
| 46 |
+
handler = _HANDLERS.get(action.action_type, self._default)
|
| 47 |
+
out = handler(self, action, state, step_index)
|
| 48 |
+
# Database coverage globally reduces quality_score for under-curated
|
| 49 |
+
# targets.
|
| 50 |
+
coverage = state.data_quality.database_coverage
|
| 51 |
+
if coverage < 1.0:
|
| 52 |
+
out.quality_score = float(
|
| 53 |
+
max(0.0, out.quality_score * (0.5 + 0.5 * coverage))
|
| 54 |
+
)
|
| 55 |
+
return out
|
| 56 |
+
|
| 57 |
+
# ── Expression & omics ──────────────────────────────────────────────
|
| 58 |
+
|
| 59 |
+
def _query_expression(
|
| 60 |
+
self, action: DrugTargetAction, s: FullLatentState, idx: int
|
| 61 |
+
) -> IntermediateOutput:
|
| 62 |
+
t = s.target
|
| 63 |
+
flipped = self.noise.coin_flip(s.data_quality.false_positive_rate)
|
| 64 |
+
observed_specificity = float(
|
| 65 |
+
max(0.0, min(1.0, t.tissue_specificity
|
| 66 |
+
+ self.noise.rng.normal(0, s.data_quality.noise_level)))
|
| 67 |
+
)
|
| 68 |
+
observed_overexpr = float(
|
| 69 |
+
max(0.1, t.disease_overexpression
|
| 70 |
+
+ self.noise.rng.normal(0, 0.4 * s.data_quality.noise_level))
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
specificity_concern = (t.expression_level == "high_nonspecific")
|
| 74 |
+
# Soft summary that *can* mislead when expression is high but
|
| 75 |
+
# non-specific.
|
| 76 |
+
if t.expression_level in {"high_specific", "high_nonspecific"}:
|
| 77 |
+
summary = (
|
| 78 |
+
f"{action.parameters.get('database', 'GTEx')}: "
|
| 79 |
+
f"{t.expression_level} expression "
|
| 80 |
+
f"({observed_overexpr:.2f}× over normal)"
|
| 81 |
+
)
|
| 82 |
+
else:
|
| 83 |
+
summary = (
|
| 84 |
+
f"{action.parameters.get('database', 'GTEx')}: "
|
| 85 |
+
f"{t.expression_level} expression"
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
return IntermediateOutput(
|
| 89 |
+
output_type=OutputType.EXPRESSION_RESULT,
|
| 90 |
+
step_index=idx,
|
| 91 |
+
quality_score=0.85 if not flipped else 0.55,
|
| 92 |
+
summary=summary,
|
| 93 |
+
data={
|
| 94 |
+
"expression_level": t.expression_level,
|
| 95 |
+
"tissue_specificity": round(observed_specificity, 3),
|
| 96 |
+
"disease_overexpression": round(observed_overexpr, 2),
|
| 97 |
+
"specificity_concern": specificity_concern,
|
| 98 |
+
"database": action.parameters.get("database", "GTEx"),
|
| 99 |
+
},
|
| 100 |
+
uncertainty=0.10 + 0.5 * s.data_quality.noise_level,
|
| 101 |
+
artifacts_available=["expression_table"],
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
def _differential_expression(
|
| 105 |
+
self, action: DrugTargetAction, s: FullLatentState, idx: int
|
| 106 |
+
) -> IntermediateOutput:
|
| 107 |
+
t = s.target
|
| 108 |
+
log2fc = float(self.noise.rng.normal(
|
| 109 |
+
0.0 if t.disease_overexpression < 1.0
|
| 110 |
+
else max(0.5, 1.5 * (t.disease_overexpression - 1.0)),
|
| 111 |
+
0.4 + s.data_quality.noise_level,
|
| 112 |
+
))
|
| 113 |
+
n_de_genes = self.noise.sample_count(40 + int(20 * t.disease_overexpression))
|
| 114 |
+
return IntermediateOutput(
|
| 115 |
+
output_type=OutputType.DE_RESULT,
|
| 116 |
+
step_index=idx,
|
| 117 |
+
quality_score=0.80,
|
| 118 |
+
summary=(
|
| 119 |
+
f"DE in {action.parameters.get('cohort', 'TCGA')}: "
|
| 120 |
+
f"{t.target if hasattr(t, 'target') else ''} log2FC≈{log2fc:.2f}, "
|
| 121 |
+
f"{n_de_genes} co-regulated genes"
|
| 122 |
+
),
|
| 123 |
+
data={
|
| 124 |
+
"target_log2fc": round(log2fc, 3),
|
| 125 |
+
"n_de_genes": n_de_genes,
|
| 126 |
+
"cohort": action.parameters.get("cohort", "TCGA"),
|
| 127 |
+
},
|
| 128 |
+
uncertainty=0.15 + s.data_quality.noise_level,
|
| 129 |
+
artifacts_available=["de_table"],
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
def _pathway_enrichment(
|
| 133 |
+
self, action: DrugTargetAction, s: FullLatentState, idx: int
|
| 134 |
+
) -> IntermediateOutput:
|
| 135 |
+
# Pathway calls are largely driven by indication-level priors.
|
| 136 |
+
pathways = [
|
| 137 |
+
{"pathway": "MAPK_signalling", "score": round(0.6 + self.noise.rng.normal(0, 0.1), 3)},
|
| 138 |
+
{"pathway": "Cell_cycle", "score": round(0.55 + self.noise.rng.normal(0, 0.1), 3)},
|
| 139 |
+
{"pathway": "Apoptosis", "score": round(0.45 + self.noise.rng.normal(0, 0.1), 3)},
|
| 140 |
+
{"pathway": "DNA_damage_response", "score": round(0.40 + self.noise.rng.normal(0, 0.1), 3)},
|
| 141 |
+
]
|
| 142 |
+
return IntermediateOutput(
|
| 143 |
+
output_type=OutputType.PATHWAY_RESULT,
|
| 144 |
+
step_index=idx,
|
| 145 |
+
quality_score=0.70,
|
| 146 |
+
summary=f"Pathway enrichment: {len(pathways)} top pathways",
|
| 147 |
+
data={"top_pathways": pathways},
|
| 148 |
+
uncertainty=0.20,
|
| 149 |
+
artifacts_available=["enrichment_table"],
|
| 150 |
+
)
|
| 151 |
+
|
| 152 |
+
def _coexpression_network(
|
| 153 |
+
self, action: DrugTargetAction, s: FullLatentState, idx: int
|
| 154 |
+
) -> IntermediateOutput:
|
| 155 |
+
partners = list(s.target.off_target_genes[:5]) + [
|
| 156 |
+
f"PARTNER_{i}" for i in range(2)
|
| 157 |
+
]
|
| 158 |
+
return IntermediateOutput(
|
| 159 |
+
output_type=OutputType.COEXPRESSION_RESULT,
|
| 160 |
+
step_index=idx,
|
| 161 |
+
quality_score=0.65,
|
| 162 |
+
summary=f"{len(partners)} top coexpression partners identified",
|
| 163 |
+
data={"partners": partners},
|
| 164 |
+
uncertainty=0.25,
|
| 165 |
+
artifacts_available=["coexpression_table"],
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
+
# ── Protein & structure ─────────────────────────────────────────────
|
| 169 |
+
|
| 170 |
+
def _protein_structure_lookup(
|
| 171 |
+
self, action: DrugTargetAction, s: FullLatentState, idx: int
|
| 172 |
+
) -> IntermediateOutput:
|
| 173 |
+
method = action.parameters.get("method", "AlphaFold")
|
| 174 |
+
plddt = float(self.noise.sample_qc_metric(0.78, 0.08, 0.30, 1.0))
|
| 175 |
+
return IntermediateOutput(
|
| 176 |
+
output_type=OutputType.STRUCTURE_RESULT,
|
| 177 |
+
step_index=idx,
|
| 178 |
+
quality_score=plddt,
|
| 179 |
+
summary=f"{method} structure resolved (pLDDT={plddt:.2f})",
|
| 180 |
+
data={
|
| 181 |
+
"method": method,
|
| 182 |
+
"pLDDT": round(plddt, 3),
|
| 183 |
+
"n_residues": int(self.noise.sample_count(420)),
|
| 184 |
+
},
|
| 185 |
+
uncertainty=1.0 - plddt,
|
| 186 |
+
artifacts_available=["pdb_structure"],
|
| 187 |
+
)
|
| 188 |
+
|
| 189 |
+
def _binding_site_analysis(
|
| 190 |
+
self, action: DrugTargetAction, s: FullLatentState, idx: int
|
| 191 |
+
) -> IntermediateOutput:
|
| 192 |
+
t = s.target
|
| 193 |
+
include_allosteric = bool(action.parameters.get("include_allosteric", False))
|
| 194 |
+
classic_score = {
|
| 195 |
+
"excellent": 0.92,
|
| 196 |
+
"good": 0.70,
|
| 197 |
+
"poor": 0.32,
|
| 198 |
+
"undruggable": 0.10,
|
| 199 |
+
}[t.binding_pocket_quality]
|
| 200 |
+
classic_score = float(self.noise.sample_qc_metric(
|
| 201 |
+
classic_score, 0.05, 0.0, 1.0
|
| 202 |
+
))
|
| 203 |
+
allo_detected = bool(include_allosteric and t.allosteric_site_available)
|
| 204 |
+
allo_score = (
|
| 205 |
+
float(self.noise.sample_qc_metric(0.65, 0.08, 0.0, 1.0))
|
| 206 |
+
if allo_detected else 0.0
|
| 207 |
+
)
|
| 208 |
+
return IntermediateOutput(
|
| 209 |
+
output_type=OutputType.BINDING_SITE_RESULT,
|
| 210 |
+
step_index=idx,
|
| 211 |
+
quality_score=max(classic_score, allo_score),
|
| 212 |
+
summary=(
|
| 213 |
+
f"Binding-site analysis: classic_score={classic_score:.2f}"
|
| 214 |
+
+ (f", allosteric_site_score={allo_score:.2f}" if allo_detected else "")
|
| 215 |
+
),
|
| 216 |
+
data={
|
| 217 |
+
"binding_pocket_quality": t.binding_pocket_quality,
|
| 218 |
+
"classic_score": round(classic_score, 3),
|
| 219 |
+
"allosteric_site_detected": allo_detected,
|
| 220 |
+
"allosteric_site_score": round(allo_score, 3) if allo_detected else None,
|
| 221 |
+
"include_allosteric": include_allosteric,
|
| 222 |
+
},
|
| 223 |
+
uncertainty=0.12,
|
| 224 |
+
artifacts_available=["pocket_table"],
|
| 225 |
+
)
|
| 226 |
+
|
| 227 |
+
def _protein_interaction_network(
|
| 228 |
+
self, action: DrugTargetAction, s: FullLatentState, idx: int
|
| 229 |
+
) -> IntermediateOutput:
|
| 230 |
+
partners = list(s.target.off_target_genes[:6])
|
| 231 |
+
return IntermediateOutput(
|
| 232 |
+
output_type=OutputType.INTERACTION_RESULT,
|
| 233 |
+
step_index=idx,
|
| 234 |
+
quality_score=0.70,
|
| 235 |
+
summary=f"{len(partners)} high-confidence interactors",
|
| 236 |
+
data={
|
| 237 |
+
"partners": partners,
|
| 238 |
+
"source": action.parameters.get("source", "STRING"),
|
| 239 |
+
},
|
| 240 |
+
uncertainty=0.20,
|
| 241 |
+
artifacts_available=["ppi_network"],
|
| 242 |
+
)
|
| 243 |
+
|
| 244 |
+
def _druggability_screen(
|
| 245 |
+
self, action: DrugTargetAction, s: FullLatentState, idx: int
|
| 246 |
+
) -> IntermediateOutput:
|
| 247 |
+
t = s.target
|
| 248 |
+
observed_score = float(self.noise.sample_qc_metric(
|
| 249 |
+
t.druggability_score, 0.06, 0.0, 1.0
|
| 250 |
+
))
|
| 251 |
+
return IntermediateOutput(
|
| 252 |
+
output_type=OutputType.DRUGGABILITY_RESULT,
|
| 253 |
+
step_index=idx,
|
| 254 |
+
quality_score=0.85,
|
| 255 |
+
summary=(
|
| 256 |
+
f"Druggability score={observed_score:.2f}, "
|
| 257 |
+
f"pocket={t.binding_pocket_quality}, "
|
| 258 |
+
f"known_ligands={t.has_known_ligands}"
|
| 259 |
+
),
|
| 260 |
+
data={
|
| 261 |
+
"druggability_score": round(observed_score, 3),
|
| 262 |
+
"binding_pocket_quality": t.binding_pocket_quality,
|
| 263 |
+
"has_known_ligands": t.has_known_ligands,
|
| 264 |
+
"n_known_ligands": int(self.noise.sample_count(
|
| 265 |
+
20 if t.has_known_ligands else 1
|
| 266 |
+
)),
|
| 267 |
+
},
|
| 268 |
+
uncertainty=0.15,
|
| 269 |
+
artifacts_available=["druggability_report"],
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
+
# ── Clinical & safety ───────────────────────────────────────────────
|
| 273 |
+
|
| 274 |
+
def _clinical_trial_lookup(
|
| 275 |
+
self, action: DrugTargetAction, s: FullLatentState, idx: int
|
| 276 |
+
) -> IntermediateOutput:
|
| 277 |
+
t = s.target
|
| 278 |
+
positive_signals: List[str] = []
|
| 279 |
+
negative_signals: List[str] = []
|
| 280 |
+
if t.clinical_precedent in {"positive", "mixed"}:
|
| 281 |
+
positive_signals.append(
|
| 282 |
+
f"Reached {t.clinical_stage_reached or 'preclinical'} with at "
|
| 283 |
+
f"least one program"
|
| 284 |
+
)
|
| 285 |
+
if t.clinical_precedent in {"mixed", "negative"}:
|
| 286 |
+
negative_signals.append("Prior failures or withdrawals on record")
|
| 287 |
+
if t.clinical_precedent == "negative":
|
| 288 |
+
negative_signals.append("No active programs progressing")
|
| 289 |
+
return IntermediateOutput(
|
| 290 |
+
output_type=OutputType.CLINICAL_RESULT,
|
| 291 |
+
step_index=idx,
|
| 292 |
+
quality_score=0.85,
|
| 293 |
+
summary=(
|
| 294 |
+
f"Clinical precedent: {t.clinical_precedent} "
|
| 295 |
+
f"(stage={t.clinical_stage_reached})"
|
| 296 |
+
),
|
| 297 |
+
data={
|
| 298 |
+
"clinical_precedent": t.clinical_precedent,
|
| 299 |
+
"clinical_stage_reached": t.clinical_stage_reached,
|
| 300 |
+
"positive_signals": positive_signals,
|
| 301 |
+
"negative_signals": negative_signals,
|
| 302 |
+
"competitor_programs": list(t.competitor_programs),
|
| 303 |
+
},
|
| 304 |
+
uncertainty=0.10,
|
| 305 |
+
artifacts_available=["trial_table"],
|
| 306 |
+
)
|
| 307 |
+
|
| 308 |
+
def _toxicity_panel(
|
| 309 |
+
self, action: DrugTargetAction, s: FullLatentState, idx: int
|
| 310 |
+
) -> IntermediateOutput:
|
| 311 |
+
t = s.target
|
| 312 |
+
# Higher uncertainty if the agent jumps to toxicity before expression
|
| 313 |
+
prereq_met = s.progress.expression_queried
|
| 314 |
+
unc = 0.15 if prereq_met else 0.45
|
| 315 |
+
toxicity_tissues = list(t.toxicity_tissues)
|
| 316 |
+
# False-positive tissue noise
|
| 317 |
+
if self.noise.coin_flip(s.data_quality.false_positive_rate):
|
| 318 |
+
toxicity_tissues = list(toxicity_tissues) + [
|
| 319 |
+
str(self.noise.rng.choice(_NOISE_TISSUES))
|
| 320 |
+
]
|
| 321 |
+
return IntermediateOutput(
|
| 322 |
+
output_type=OutputType.TOXICITY_RESULT,
|
| 323 |
+
step_index=idx,
|
| 324 |
+
quality_score=0.80 if prereq_met else 0.55,
|
| 325 |
+
summary=(
|
| 326 |
+
f"Toxicity profile: {t.toxicity_profile}, "
|
| 327 |
+
f"flagged tissues: {toxicity_tissues}"
|
| 328 |
+
),
|
| 329 |
+
data={
|
| 330 |
+
"toxicity_profile": t.toxicity_profile,
|
| 331 |
+
"toxicity_tissues": toxicity_tissues,
|
| 332 |
+
"prerequisite_expression_done": prereq_met,
|
| 333 |
+
},
|
| 334 |
+
uncertainty=unc,
|
| 335 |
+
warnings=[] if prereq_met else [
|
| 336 |
+
"Toxicity called without prior expression context — "
|
| 337 |
+
"interpret with caution"
|
| 338 |
+
],
|
| 339 |
+
artifacts_available=["toxicity_panel_report"],
|
| 340 |
+
)
|
| 341 |
+
|
| 342 |
+
def _off_target_screen(
|
| 343 |
+
self, action: DrugTargetAction, s: FullLatentState, idx: int
|
| 344 |
+
) -> IntermediateOutput:
|
| 345 |
+
t = s.target
|
| 346 |
+
observed_count = max(0, int(self.noise.sample_count(t.off_target_count or 1)))
|
| 347 |
+
observed_genes = list(t.off_target_genes[:max(1, observed_count)])
|
| 348 |
+
observed_ratio = float(self.noise.sample_qc_metric(
|
| 349 |
+
t.selectivity_ratio, 0.5, 0.0, 100.0
|
| 350 |
+
))
|
| 351 |
+
return IntermediateOutput(
|
| 352 |
+
output_type=OutputType.OFF_TARGET_RESULT,
|
| 353 |
+
step_index=idx,
|
| 354 |
+
quality_score=0.80,
|
| 355 |
+
summary=(
|
| 356 |
+
f"Off-target screen: selectivity ratio={observed_ratio:.2f}, "
|
| 357 |
+
f"{len(observed_genes)} hits"
|
| 358 |
+
),
|
| 359 |
+
data={
|
| 360 |
+
"selectivity_ratio": round(observed_ratio, 3),
|
| 361 |
+
"off_target_count": observed_count,
|
| 362 |
+
"off_target_genes": observed_genes,
|
| 363 |
+
},
|
| 364 |
+
uncertainty=0.15,
|
| 365 |
+
artifacts_available=["off_target_table"],
|
| 366 |
+
)
|
| 367 |
+
|
| 368 |
+
def _patient_stratification(
|
| 369 |
+
self, action: DrugTargetAction, s: FullLatentState, idx: int
|
| 370 |
+
) -> IntermediateOutput:
|
| 371 |
+
t = s.target
|
| 372 |
+
return IntermediateOutput(
|
| 373 |
+
output_type=OutputType.PATIENT_STRATIFICATION_RESULT,
|
| 374 |
+
step_index=idx,
|
| 375 |
+
quality_score=0.78,
|
| 376 |
+
summary=(
|
| 377 |
+
f"Patient stratification: required={t.requires_patient_stratification}, "
|
| 378 |
+
f"biomarker={t.responder_biomarker}"
|
| 379 |
+
),
|
| 380 |
+
data={
|
| 381 |
+
"requires_stratification": t.requires_patient_stratification,
|
| 382 |
+
"responder_biomarker": t.responder_biomarker,
|
| 383 |
+
"estimated_responder_fraction": round(float(
|
| 384 |
+
self.noise.sample_qc_metric(
|
| 385 |
+
0.30 if t.requires_patient_stratification else 0.65,
|
| 386 |
+
0.10, 0.0, 1.0,
|
| 387 |
+
)
|
| 388 |
+
), 3),
|
| 389 |
+
},
|
| 390 |
+
uncertainty=0.20,
|
| 391 |
+
artifacts_available=["stratification_report"],
|
| 392 |
+
)
|
| 393 |
+
|
| 394 |
+
# ── Literature & evidence ───────────────────────────────────────────
|
| 395 |
+
|
| 396 |
+
def _literature_search(
|
| 397 |
+
self, action: DrugTargetAction, s: FullLatentState, idx: int
|
| 398 |
+
) -> IntermediateOutput:
|
| 399 |
+
t = s.target
|
| 400 |
+
n_abstracts = int(self.noise.sample_count(4)) + 3
|
| 401 |
+
abstracts: List[Dict[str, Any]] = []
|
| 402 |
+
for i in range(min(5, n_abstracts)):
|
| 403 |
+
abstracts.append({
|
| 404 |
+
"title": (
|
| 405 |
+
f"Recent perspective on {action.parameters.get('query', 'target')} "
|
| 406 |
+
f"({2020 + i % 6})"
|
| 407 |
+
),
|
| 408 |
+
"snippet": "...findings consistent with a viable program...",
|
| 409 |
+
})
|
| 410 |
+
# Scenario-specific recent precedent: surface a precedent-changing
|
| 411 |
+
# abstract when the current target has positive recent clinical
|
| 412 |
+
# precedent reached at least phase 2.
|
| 413 |
+
if (
|
| 414 |
+
t.clinical_precedent in {"positive", "mixed"}
|
| 415 |
+
and t.clinical_stage_reached in {"phase2", "phase3"}
|
| 416 |
+
):
|
| 417 |
+
abstracts.insert(0, {
|
| 418 |
+
"title": (
|
| 419 |
+
"Clinical activity of recent inhibitors against this "
|
| 420 |
+
"target supports renewed interest"
|
| 421 |
+
),
|
| 422 |
+
"snippet": (
|
| 423 |
+
"...recent programs have demonstrated clinical activity, "
|
| 424 |
+
"overturning prior assumptions of undruggability..."
|
| 425 |
+
),
|
| 426 |
+
})
|
| 427 |
+
return IntermediateOutput(
|
| 428 |
+
output_type=OutputType.LITERATURE_RESULT,
|
| 429 |
+
step_index=idx,
|
| 430 |
+
quality_score=0.70,
|
| 431 |
+
summary=f"{len(abstracts)} relevant abstracts retrieved",
|
| 432 |
+
data={
|
| 433 |
+
"abstracts": abstracts,
|
| 434 |
+
"query": action.parameters.get("query", ""),
|
| 435 |
+
},
|
| 436 |
+
uncertainty=0.18,
|
| 437 |
+
artifacts_available=["abstract_list"],
|
| 438 |
+
)
|
| 439 |
+
|
| 440 |
+
def _evidence_synthesis(
|
| 441 |
+
self, action: DrugTargetAction, s: FullLatentState, idx: int
|
| 442 |
+
) -> IntermediateOutput:
|
| 443 |
+
# Quality grows with the number of evidence dimensions already covered.
|
| 444 |
+
flags = s.progress.model_dump()
|
| 445 |
+
covered = sum(1 for k, v in flags.items() if isinstance(v, bool) and v)
|
| 446 |
+
quality = float(min(0.85, 0.20 + 0.06 * covered))
|
| 447 |
+
return IntermediateOutput(
|
| 448 |
+
output_type=OutputType.EVIDENCE_SYNTHESIS_RESULT,
|
| 449 |
+
step_index=idx,
|
| 450 |
+
quality_score=quality,
|
| 451 |
+
summary=f"Evidence synthesis (coverage signal={covered})",
|
| 452 |
+
data={
|
| 453 |
+
"evidence_signal_count": covered,
|
| 454 |
+
"notes": (
|
| 455 |
+
"Synthesis is more reliable once multiple evidence "
|
| 456 |
+
"dimensions have been investigated."
|
| 457 |
+
),
|
| 458 |
+
},
|
| 459 |
+
uncertainty=max(0.20, 0.80 - 0.06 * covered),
|
| 460 |
+
artifacts_available=["synthesis_report"],
|
| 461 |
+
)
|
| 462 |
+
|
| 463 |
+
def _competitor_landscape(
|
| 464 |
+
self, action: DrugTargetAction, s: FullLatentState, idx: int
|
| 465 |
+
) -> IntermediateOutput:
|
| 466 |
+
t = s.target
|
| 467 |
+
return IntermediateOutput(
|
| 468 |
+
output_type=OutputType.COMPETITOR_LANDSCAPE_RESULT,
|
| 469 |
+
step_index=idx,
|
| 470 |
+
quality_score=0.75,
|
| 471 |
+
summary=f"{len(t.competitor_programs)} competitor programs identified",
|
| 472 |
+
data={
|
| 473 |
+
"competitor_programs": list(t.competitor_programs),
|
| 474 |
+
"clinical_precedent": t.clinical_precedent,
|
| 475 |
+
},
|
| 476 |
+
uncertainty=0.15,
|
| 477 |
+
artifacts_available=["competitor_report"],
|
| 478 |
+
)
|
| 479 |
+
|
| 480 |
+
# ── Experimental ───────────────────────────────────────────────────
|
| 481 |
+
|
| 482 |
+
def _in_vitro_assay(
|
| 483 |
+
self, action: DrugTargetAction, s: FullLatentState, idx: int
|
| 484 |
+
) -> IntermediateOutput:
|
| 485 |
+
t = s.target
|
| 486 |
+
ic50 = float(self.noise.sample_qc_metric(
|
| 487 |
+
t.in_vitro_ic50_nM, 0.2 * t.in_vitro_ic50_nM, 0.5, 100_000.0
|
| 488 |
+
))
|
| 489 |
+
sel_window = float(self.noise.sample_qc_metric(
|
| 490 |
+
t.selectivity_ratio, 0.4, 0.0, 100.0
|
| 491 |
+
))
|
| 492 |
+
viability_drop = float(self.noise.sample_qc_metric(
|
| 493 |
+
0.5 if t.in_vivo_efficacy in {"strong", "moderate"} else 0.2,
|
| 494 |
+
0.1, 0.0, 1.0,
|
| 495 |
+
))
|
| 496 |
+
return IntermediateOutput(
|
| 497 |
+
output_type=OutputType.IN_VITRO_RESULT,
|
| 498 |
+
step_index=idx,
|
| 499 |
+
quality_score=0.85,
|
| 500 |
+
summary=(
|
| 501 |
+
f"In-vitro: IC50={ic50:.1f} nM, selectivity_window={sel_window:.2f}, "
|
| 502 |
+
f"viability_drop={viability_drop:.2f}"
|
| 503 |
+
),
|
| 504 |
+
data={
|
| 505 |
+
"IC50_nM": round(ic50, 2),
|
| 506 |
+
"selectivity_window": round(sel_window, 3),
|
| 507 |
+
"viability_drop": round(viability_drop, 3),
|
| 508 |
+
},
|
| 509 |
+
uncertainty=0.18,
|
| 510 |
+
artifacts_available=["in_vitro_report"],
|
| 511 |
+
)
|
| 512 |
+
|
| 513 |
+
def _in_vivo_model(
|
| 514 |
+
self, action: DrugTargetAction, s: FullLatentState, idx: int
|
| 515 |
+
) -> IntermediateOutput:
|
| 516 |
+
t = s.target
|
| 517 |
+
efficacy_score = {
|
| 518 |
+
"strong": 0.85, "moderate": 0.55, "weak": 0.25, "none": 0.05,
|
| 519 |
+
}.get(t.in_vivo_efficacy, 0.5)
|
| 520 |
+
efficacy = float(self.noise.sample_qc_metric(efficacy_score, 0.08, 0.0, 1.0))
|
| 521 |
+
tolerability = float(self.noise.sample_qc_metric(
|
| 522 |
+
{"clean": 0.9, "mild": 0.75, "moderate": 0.5, "severe": 0.25}
|
| 523 |
+
.get(t.toxicity_profile, 0.6),
|
| 524 |
+
0.08, 0.0, 1.0,
|
| 525 |
+
))
|
| 526 |
+
return IntermediateOutput(
|
| 527 |
+
output_type=OutputType.IN_VIVO_RESULT,
|
| 528 |
+
step_index=idx,
|
| 529 |
+
quality_score=0.85,
|
| 530 |
+
summary=(
|
| 531 |
+
f"In-vivo: efficacy={efficacy:.2f}, tolerability={tolerability:.2f}"
|
| 532 |
+
),
|
| 533 |
+
data={
|
| 534 |
+
"efficacy_endpoint": round(efficacy, 3),
|
| 535 |
+
"tolerability": round(tolerability, 3),
|
| 536 |
+
"PK_PD_summary": {
|
| 537 |
+
"halflife_hours": round(float(
|
| 538 |
+
self.noise.sample_qc_metric(8.0, 2.0, 0.5, 48.0)
|
| 539 |
+
), 2),
|
| 540 |
+
"Cmax_nM": round(float(
|
| 541 |
+
self.noise.sample_qc_metric(500.0, 150.0, 1.0, 5000.0)
|
| 542 |
+
), 2),
|
| 543 |
+
},
|
| 544 |
+
},
|
| 545 |
+
uncertainty=0.20,
|
| 546 |
+
artifacts_available=["in_vivo_report"],
|
| 547 |
+
)
|
| 548 |
+
|
| 549 |
+
def _crispr_knockout(
|
| 550 |
+
self, action: DrugTargetAction, s: FullLatentState, idx: int
|
| 551 |
+
) -> IntermediateOutput:
|
| 552 |
+
t = s.target
|
| 553 |
+
ess = float(self.noise.sample_qc_metric(
|
| 554 |
+
t.crispr_essentiality, 0.15, -3.0, 1.0
|
| 555 |
+
))
|
| 556 |
+
synthetic_lethal = list(t.off_target_genes[:3])
|
| 557 |
+
return IntermediateOutput(
|
| 558 |
+
output_type=OutputType.CRISPR_RESULT,
|
| 559 |
+
step_index=idx,
|
| 560 |
+
quality_score=0.80,
|
| 561 |
+
summary=(
|
| 562 |
+
f"CRISPR essentiality score={ess:.2f}; "
|
| 563 |
+
f"{len(synthetic_lethal)} synthetic-lethal candidates"
|
| 564 |
+
),
|
| 565 |
+
data={
|
| 566 |
+
"essentiality_score": round(ess, 3),
|
| 567 |
+
"synthetic_lethal_partners": synthetic_lethal,
|
| 568 |
+
},
|
| 569 |
+
uncertainty=0.18,
|
| 570 |
+
artifacts_available=["crispr_report"],
|
| 571 |
+
)
|
| 572 |
+
|
| 573 |
+
def _biomarker_correlation(
|
| 574 |
+
self, action: DrugTargetAction, s: FullLatentState, idx: int
|
| 575 |
+
) -> IntermediateOutput:
|
| 576 |
+
t = s.target
|
| 577 |
+
corr = float(self.noise.sample_qc_metric(
|
| 578 |
+
0.6 if t.responder_biomarker else 0.2, 0.12, -1.0, 1.0,
|
| 579 |
+
))
|
| 580 |
+
return IntermediateOutput(
|
| 581 |
+
output_type=OutputType.BIOMARKER_RESULT,
|
| 582 |
+
step_index=idx,
|
| 583 |
+
quality_score=0.78,
|
| 584 |
+
summary=(
|
| 585 |
+
f"Biomarker correlation r={corr:.2f} "
|
| 586 |
+
f"({t.responder_biomarker or 'no_biomarker'})"
|
| 587 |
+
),
|
| 588 |
+
data={
|
| 589 |
+
"biomarker": t.responder_biomarker,
|
| 590 |
+
"correlation": round(corr, 3),
|
| 591 |
+
},
|
| 592 |
+
uncertainty=0.22,
|
| 593 |
+
artifacts_available=["biomarker_report"],
|
| 594 |
+
)
|
| 595 |
+
|
| 596 |
+
# ── Meta ────────────────────────────────────────────────────────────
|
| 597 |
+
|
| 598 |
+
def _flag_red_flag(
|
| 599 |
+
self, action: DrugTargetAction, s: FullLatentState, idx: int
|
| 600 |
+
) -> IntermediateOutput:
|
| 601 |
+
note = str(action.parameters.get("note", "(no detail)"))
|
| 602 |
+
return IntermediateOutput(
|
| 603 |
+
output_type=OutputType.RED_FLAG_NOTE,
|
| 604 |
+
step_index=idx,
|
| 605 |
+
quality_score=1.0,
|
| 606 |
+
summary=f"Red flag recorded: {note[:80]}",
|
| 607 |
+
data={"note": note},
|
| 608 |
+
uncertainty=0.0,
|
| 609 |
+
artifacts_available=["dossier_red_flag"],
|
| 610 |
+
)
|
| 611 |
+
|
| 612 |
+
def _request_expert_review(
|
| 613 |
+
self, action: DrugTargetAction, s: FullLatentState, idx: int
|
| 614 |
+
) -> IntermediateOutput:
|
| 615 |
+
flags = s.progress.model_dump()
|
| 616 |
+
covered = sum(1 for k, v in flags.items() if isinstance(v, bool) and v)
|
| 617 |
+
quality = float(min(0.75, 0.20 + 0.05 * covered))
|
| 618 |
+
return IntermediateOutput(
|
| 619 |
+
output_type=OutputType.EXPERT_REVIEW,
|
| 620 |
+
step_index=idx,
|
| 621 |
+
quality_score=quality,
|
| 622 |
+
summary=(
|
| 623 |
+
f"Expert review (coverage signal={covered})"
|
| 624 |
+
),
|
| 625 |
+
data={
|
| 626 |
+
"evidence_signal_count": covered,
|
| 627 |
+
"review": (
|
| 628 |
+
"Review more meaningful when more evidence dimensions "
|
| 629 |
+
"have been opened."
|
| 630 |
+
),
|
| 631 |
+
},
|
| 632 |
+
uncertainty=max(0.25, 0.80 - 0.05 * covered),
|
| 633 |
+
artifacts_available=["expert_review_note"],
|
| 634 |
+
)
|
| 635 |
+
|
| 636 |
+
def _submit_validation_report(
|
| 637 |
+
self, action: DrugTargetAction, s: FullLatentState, idx: int
|
| 638 |
+
) -> IntermediateOutput:
|
| 639 |
+
decision = action.final_decision or "no_decision"
|
| 640 |
+
confidence = float(action.confidence) if action.confidence is not None else 0.0
|
| 641 |
+
return IntermediateOutput(
|
| 642 |
+
output_type=OutputType.VALIDATION_REPORT,
|
| 643 |
+
step_index=idx,
|
| 644 |
+
quality_score=1.0,
|
| 645 |
+
summary=(
|
| 646 |
+
f"Validation report submitted: decision={decision}, "
|
| 647 |
+
f"confidence={confidence:.2f}"
|
| 648 |
+
),
|
| 649 |
+
data={
|
| 650 |
+
"decision": decision,
|
| 651 |
+
"confidence": confidence,
|
| 652 |
+
"reasoning": action.reasoning or "",
|
| 653 |
+
},
|
| 654 |
+
uncertainty=0.0,
|
| 655 |
+
artifacts_available=["validation_report"],
|
| 656 |
+
)
|
| 657 |
+
|
| 658 |
+
# ── Default ────────────────────────────────────────────────────────
|
| 659 |
+
|
| 660 |
+
def _default(
|
| 661 |
+
self, action: DrugTargetAction, s: FullLatentState, idx: int
|
| 662 |
+
) -> IntermediateOutput:
|
| 663 |
+
return IntermediateOutput(
|
| 664 |
+
output_type=OutputType.FAILURE_REPORT,
|
| 665 |
+
step_index=idx,
|
| 666 |
+
success=False,
|
| 667 |
+
summary=f"Unhandled action type: {action.action_type}",
|
| 668 |
+
data={},
|
| 669 |
+
)
|
| 670 |
+
|
| 671 |
+
|
| 672 |
+
_HANDLERS = {
|
| 673 |
+
ActionType.QUERY_EXPRESSION: OutputGenerator._query_expression,
|
| 674 |
+
ActionType.DIFFERENTIAL_EXPRESSION: OutputGenerator._differential_expression,
|
| 675 |
+
ActionType.PATHWAY_ENRICHMENT: OutputGenerator._pathway_enrichment,
|
| 676 |
+
ActionType.COEXPRESSION_NETWORK: OutputGenerator._coexpression_network,
|
| 677 |
+
ActionType.PROTEIN_STRUCTURE_LOOKUP: OutputGenerator._protein_structure_lookup,
|
| 678 |
+
ActionType.BINDING_SITE_ANALYSIS: OutputGenerator._binding_site_analysis,
|
| 679 |
+
ActionType.PROTEIN_INTERACTION_NETWORK: OutputGenerator._protein_interaction_network,
|
| 680 |
+
ActionType.DRUGGABILITY_SCREEN: OutputGenerator._druggability_screen,
|
| 681 |
+
ActionType.CLINICAL_TRIAL_LOOKUP: OutputGenerator._clinical_trial_lookup,
|
| 682 |
+
ActionType.TOXICITY_PANEL: OutputGenerator._toxicity_panel,
|
| 683 |
+
ActionType.OFF_TARGET_SCREEN: OutputGenerator._off_target_screen,
|
| 684 |
+
ActionType.PATIENT_STRATIFICATION: OutputGenerator._patient_stratification,
|
| 685 |
+
ActionType.LITERATURE_SEARCH: OutputGenerator._literature_search,
|
| 686 |
+
ActionType.EVIDENCE_SYNTHESIS: OutputGenerator._evidence_synthesis,
|
| 687 |
+
ActionType.COMPETITOR_LANDSCAPE: OutputGenerator._competitor_landscape,
|
| 688 |
+
ActionType.IN_VITRO_ASSAY: OutputGenerator._in_vitro_assay,
|
| 689 |
+
ActionType.IN_VIVO_MODEL: OutputGenerator._in_vivo_model,
|
| 690 |
+
ActionType.CRISPR_KNOCKOUT: OutputGenerator._crispr_knockout,
|
| 691 |
+
ActionType.BIOMARKER_CORRELATION: OutputGenerator._biomarker_correlation,
|
| 692 |
+
ActionType.FLAG_RED_FLAG: OutputGenerator._flag_red_flag,
|
| 693 |
+
ActionType.REQUEST_EXPERT_REVIEW: OutputGenerator._request_expert_review,
|
| 694 |
+
ActionType.SUBMIT_VALIDATION_REPORT: OutputGenerator._submit_validation_report,
|
| 695 |
+
}
|
server/simulator/transition.py
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Transition dynamics engine for the drug-target-validation simulator.
|
| 2 |
+
|
| 3 |
+
Orchestrates latent-state updates, output generation, credit accounting,
|
| 4 |
+
and constraint propagation for every agent action.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
from copy import deepcopy
|
| 10 |
+
from dataclasses import dataclass, field
|
| 11 |
+
from typing import Dict, List, Optional, Tuple
|
| 12 |
+
|
| 13 |
+
from models import (
|
| 14 |
+
ActionType,
|
| 15 |
+
DrugTargetAction,
|
| 16 |
+
IntermediateOutput,
|
| 17 |
+
OutputType,
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
from .latent_state import FullLatentState
|
| 21 |
+
from .noise import NoiseModel
|
| 22 |
+
from .output_generator import OutputGenerator
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
# Credit costs per ActionType.
|
| 26 |
+
_BASE_ACTION_COSTS: Dict[ActionType, int] = {
|
| 27 |
+
ActionType.QUERY_EXPRESSION: 2,
|
| 28 |
+
ActionType.DIFFERENTIAL_EXPRESSION: 2,
|
| 29 |
+
ActionType.PATHWAY_ENRICHMENT: 2,
|
| 30 |
+
ActionType.COEXPRESSION_NETWORK: 2,
|
| 31 |
+
ActionType.PROTEIN_STRUCTURE_LOOKUP: 3,
|
| 32 |
+
ActionType.BINDING_SITE_ANALYSIS: 3,
|
| 33 |
+
ActionType.PROTEIN_INTERACTION_NETWORK: 2,
|
| 34 |
+
ActionType.DRUGGABILITY_SCREEN: 3,
|
| 35 |
+
ActionType.CLINICAL_TRIAL_LOOKUP: 3,
|
| 36 |
+
ActionType.TOXICITY_PANEL: 3,
|
| 37 |
+
ActionType.OFF_TARGET_SCREEN: 3,
|
| 38 |
+
ActionType.PATIENT_STRATIFICATION: 3,
|
| 39 |
+
ActionType.LITERATURE_SEARCH: 1,
|
| 40 |
+
ActionType.EVIDENCE_SYNTHESIS: 1,
|
| 41 |
+
ActionType.COMPETITOR_LANDSCAPE: 1,
|
| 42 |
+
ActionType.IN_VITRO_ASSAY: 5,
|
| 43 |
+
ActionType.IN_VIVO_MODEL: 8,
|
| 44 |
+
ActionType.CRISPR_KNOCKOUT: 4,
|
| 45 |
+
ActionType.BIOMARKER_CORRELATION: 3,
|
| 46 |
+
ActionType.FLAG_RED_FLAG: 0,
|
| 47 |
+
ActionType.REQUEST_EXPERT_REVIEW: 1,
|
| 48 |
+
ActionType.SUBMIT_VALIDATION_REPORT: 0,
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
# Public alias kept for callers that historically imported ACTION_COSTS.
|
| 52 |
+
ACTION_COSTS = _BASE_ACTION_COSTS
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def compute_action_cost(action: DrugTargetAction) -> int:
|
| 56 |
+
"""Return the credit cost for a single action."""
|
| 57 |
+
return _BASE_ACTION_COSTS.get(action.action_type, 0)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
# Map action type → progress flag that should be set when it succeeds.
|
| 61 |
+
_PROGRESS_MAP: Dict[ActionType, str] = {
|
| 62 |
+
ActionType.QUERY_EXPRESSION: "expression_queried",
|
| 63 |
+
ActionType.DIFFERENTIAL_EXPRESSION: "expression_queried",
|
| 64 |
+
ActionType.PATHWAY_ENRICHMENT: "pathway_analysed",
|
| 65 |
+
ActionType.COEXPRESSION_NETWORK: "interactions_mapped",
|
| 66 |
+
ActionType.PROTEIN_STRUCTURE_LOOKUP: "structure_resolved",
|
| 67 |
+
ActionType.BINDING_SITE_ANALYSIS: "druggability_assessed",
|
| 68 |
+
ActionType.PROTEIN_INTERACTION_NETWORK: "interactions_mapped",
|
| 69 |
+
ActionType.DRUGGABILITY_SCREEN: "druggability_assessed",
|
| 70 |
+
ActionType.CLINICAL_TRIAL_LOOKUP: "clinical_checked",
|
| 71 |
+
ActionType.TOXICITY_PANEL: "toxicity_assessed",
|
| 72 |
+
ActionType.OFF_TARGET_SCREEN: "selectivity_checked",
|
| 73 |
+
ActionType.PATIENT_STRATIFICATION: "patient_stratification_done",
|
| 74 |
+
ActionType.LITERATURE_SEARCH: "literature_reviewed",
|
| 75 |
+
ActionType.EVIDENCE_SYNTHESIS: "evidence_synthesised",
|
| 76 |
+
ActionType.COMPETITOR_LANDSCAPE: "literature_reviewed",
|
| 77 |
+
ActionType.IN_VITRO_ASSAY: "in_vitro_done",
|
| 78 |
+
ActionType.IN_VIVO_MODEL: "in_vivo_done",
|
| 79 |
+
ActionType.CRISPR_KNOCKOUT: "crispr_done",
|
| 80 |
+
ActionType.BIOMARKER_CORRELATION: "biomarker_correlated",
|
| 81 |
+
ActionType.REQUEST_EXPERT_REVIEW: "expert_reviewed",
|
| 82 |
+
ActionType.SUBMIT_VALIDATION_REPORT: "report_submitted",
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
@dataclass
|
| 87 |
+
class TransitionResult:
|
| 88 |
+
"""Bundle returned by the transition engine after one step."""
|
| 89 |
+
|
| 90 |
+
next_state: FullLatentState
|
| 91 |
+
output: IntermediateOutput
|
| 92 |
+
reward_components: Dict[str, float] = field(default_factory=dict)
|
| 93 |
+
hard_violations: List[str] = field(default_factory=list)
|
| 94 |
+
soft_violations: List[str] = field(default_factory=list)
|
| 95 |
+
done: bool = False
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
class TransitionEngine:
|
| 99 |
+
"""Applies one action to the latent state, producing the next state and
|
| 100 |
+
a simulated intermediate output. Delegates output generation to
|
| 101 |
+
``OutputGenerator``.
|
| 102 |
+
"""
|
| 103 |
+
|
| 104 |
+
def __init__(self, noise: NoiseModel):
|
| 105 |
+
self.noise = noise
|
| 106 |
+
self.output_gen = OutputGenerator(noise)
|
| 107 |
+
|
| 108 |
+
def step(
|
| 109 |
+
self,
|
| 110 |
+
state: FullLatentState,
|
| 111 |
+
action: DrugTargetAction,
|
| 112 |
+
*,
|
| 113 |
+
hard_violations: Optional[List[str]] = None,
|
| 114 |
+
soft_violations: Optional[List[str]] = None,
|
| 115 |
+
) -> TransitionResult:
|
| 116 |
+
s = deepcopy(state)
|
| 117 |
+
step_idx = sum(s.action_call_counts.values()) + 1
|
| 118 |
+
|
| 119 |
+
hard_v = hard_violations or []
|
| 120 |
+
soft_v = soft_violations or []
|
| 121 |
+
|
| 122 |
+
if hard_v:
|
| 123 |
+
output = IntermediateOutput(
|
| 124 |
+
output_type=OutputType.FAILURE_REPORT,
|
| 125 |
+
step_index=step_idx,
|
| 126 |
+
success=False,
|
| 127 |
+
summary=f"Action blocked: {'; '.join(hard_v)}",
|
| 128 |
+
)
|
| 129 |
+
done = action.action_type == ActionType.SUBMIT_VALIDATION_REPORT
|
| 130 |
+
return TransitionResult(
|
| 131 |
+
next_state=s,
|
| 132 |
+
output=output,
|
| 133 |
+
hard_violations=hard_v,
|
| 134 |
+
soft_violations=soft_v,
|
| 135 |
+
done=done,
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
# Track call counts before deduction so the rule engine can use
|
| 139 |
+
# them when reasoning about redundancy on the next step.
|
| 140 |
+
key = action.action_type.value
|
| 141 |
+
s.action_call_counts[key] = s.action_call_counts.get(key, 0) + 1
|
| 142 |
+
|
| 143 |
+
# Deduct credits.
|
| 144 |
+
cost = compute_action_cost(action)
|
| 145 |
+
s.credits.credits_used += cost
|
| 146 |
+
|
| 147 |
+
# If credits exhausted *and* this isn't a terminal report, the
|
| 148 |
+
# episode ends with a failure-style output (the caller still
|
| 149 |
+
# records the action).
|
| 150 |
+
credits_exhausted_after = s.credits.exhausted
|
| 151 |
+
|
| 152 |
+
# Generate the simulated output.
|
| 153 |
+
output = self.output_gen.generate(action, s, step_idx)
|
| 154 |
+
|
| 155 |
+
if soft_v:
|
| 156 |
+
output.quality_score = float(max(0.0, output.quality_score * 0.7))
|
| 157 |
+
output.warnings = list(output.warnings) + list(soft_v)
|
| 158 |
+
|
| 159 |
+
# Update progress flags for successful actions.
|
| 160 |
+
flag = _PROGRESS_MAP.get(action.action_type)
|
| 161 |
+
if flag and output.success:
|
| 162 |
+
setattr(s.progress, flag, True)
|
| 163 |
+
|
| 164 |
+
# Determine episode termination.
|
| 165 |
+
done = (
|
| 166 |
+
action.action_type == ActionType.SUBMIT_VALIDATION_REPORT
|
| 167 |
+
or credits_exhausted_after
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
return TransitionResult(
|
| 171 |
+
next_state=s,
|
| 172 |
+
output=output,
|
| 173 |
+
soft_violations=soft_v,
|
| 174 |
+
done=done,
|
| 175 |
+
)
|
| 176 |
+
|
| 177 |
+
@staticmethod
|
| 178 |
+
def covered_evidence_dimensions(s: FullLatentState) -> List[str]:
|
| 179 |
+
"""Return the set of *evidence dimensions* the agent has touched.
|
| 180 |
+
|
| 181 |
+
Mirrors the keys used in ``TargetProfile.key_evidence_dimensions``
|
| 182 |
+
so the reward computer can compute coverage directly.
|
| 183 |
+
"""
|
| 184 |
+
p = s.progress
|
| 185 |
+
flags: List[Tuple[str, bool]] = [
|
| 186 |
+
("expression", p.expression_queried),
|
| 187 |
+
("druggability", p.druggability_assessed),
|
| 188 |
+
("off_target", p.selectivity_checked),
|
| 189 |
+
("toxicity", p.toxicity_assessed),
|
| 190 |
+
("clinical", p.clinical_checked),
|
| 191 |
+
("literature", p.literature_reviewed),
|
| 192 |
+
("in_vitro", p.in_vitro_done),
|
| 193 |
+
("in_vivo", p.in_vivo_done),
|
| 194 |
+
("patient_stratification", p.patient_stratification_done),
|
| 195 |
+
("pathway", p.pathway_analysed),
|
| 196 |
+
("structure", p.structure_resolved),
|
| 197 |
+
("interactions", p.interactions_mapped),
|
| 198 |
+
("crispr", p.crispr_done),
|
| 199 |
+
("biomarker", p.biomarker_correlated),
|
| 200 |
+
]
|
| 201 |
+
return [name for name, hit in flags if hit]
|
server/tasks/__init__.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .generator import TaskGenerator
|
| 2 |
+
from .scenarios import SCENARIO_LIBRARY, Scenario
|
| 3 |
+
|
| 4 |
+
__all__ = ["SCENARIO_LIBRARY", "Scenario", "TaskGenerator"]
|
server/tasks/generator.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Task generator — produces (ValidationTaskSpec, FullLatentState) pairs
|
| 2 |
+
for drug-target-validation episodes.
|
| 3 |
+
|
| 4 |
+
Supports two modes:
|
| 5 |
+
1. Select from the curated ``SCENARIO_LIBRARY``.
|
| 6 |
+
2. Add procedurally-generated scenarios on top.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
|
| 11 |
+
from typing import List, Optional, Tuple
|
| 12 |
+
|
| 13 |
+
import numpy as np
|
| 14 |
+
|
| 15 |
+
from models import ActionType, ValidationTaskSpec
|
| 16 |
+
|
| 17 |
+
from server.simulator.latent_state import (
|
| 18 |
+
CreditState,
|
| 19 |
+
DataQualityState,
|
| 20 |
+
FullLatentState,
|
| 21 |
+
TargetProfile,
|
| 22 |
+
ValidationProgress,
|
| 23 |
+
)
|
| 24 |
+
from .scenarios import SCENARIO_LIBRARY, Scenario
|
| 25 |
+
from .procedural_generator import generate_procedural_scenarios
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class TaskGenerator:
|
| 29 |
+
"""Generates task + latent-state pairs for environment episodes."""
|
| 30 |
+
|
| 31 |
+
def __init__(
|
| 32 |
+
self,
|
| 33 |
+
scenarios: Optional[List[Scenario]] = None,
|
| 34 |
+
domain_randomise: bool = True,
|
| 35 |
+
):
|
| 36 |
+
if scenarios is not None:
|
| 37 |
+
self.scenarios = scenarios
|
| 38 |
+
else:
|
| 39 |
+
self.scenarios = list(SCENARIO_LIBRARY) + generate_procedural_scenarios(
|
| 40 |
+
n=20, seed=42,
|
| 41 |
+
)
|
| 42 |
+
self.domain_randomise = domain_randomise
|
| 43 |
+
|
| 44 |
+
def generate(
|
| 45 |
+
self,
|
| 46 |
+
*,
|
| 47 |
+
seed: Optional[int] = None,
|
| 48 |
+
scenario_name: Optional[str] = None,
|
| 49 |
+
) -> Tuple[ValidationTaskSpec, FullLatentState]:
|
| 50 |
+
rng = np.random.default_rng(seed)
|
| 51 |
+
|
| 52 |
+
if scenario_name:
|
| 53 |
+
scenario = self._find_scenario(scenario_name)
|
| 54 |
+
else:
|
| 55 |
+
idx = int(rng.integers(0, len(self.scenarios)))
|
| 56 |
+
scenario = self.scenarios[idx]
|
| 57 |
+
|
| 58 |
+
task = scenario.task.model_copy(deep=True)
|
| 59 |
+
target = scenario.target.model_copy(deep=True)
|
| 60 |
+
data_quality = scenario.data_quality.model_copy(deep=True)
|
| 61 |
+
|
| 62 |
+
if self.domain_randomise:
|
| 63 |
+
self._randomise(rng, task, target, data_quality)
|
| 64 |
+
|
| 65 |
+
if not task.available_actions:
|
| 66 |
+
task.available_actions = [a.value for a in ActionType]
|
| 67 |
+
|
| 68 |
+
latent = FullLatentState(
|
| 69 |
+
target=target,
|
| 70 |
+
data_quality=data_quality,
|
| 71 |
+
progress=ValidationProgress(),
|
| 72 |
+
credits=CreditState(credits_total=task.credits_limit),
|
| 73 |
+
rng_seed=seed or 0,
|
| 74 |
+
)
|
| 75 |
+
return task, latent
|
| 76 |
+
|
| 77 |
+
def list_scenarios(self) -> List[str]:
|
| 78 |
+
return [s.name for s in self.scenarios]
|
| 79 |
+
|
| 80 |
+
# ── internals ───────────────────────────────────────────────────────
|
| 81 |
+
|
| 82 |
+
def _find_scenario(self, name: str) -> Scenario:
|
| 83 |
+
for s in self.scenarios:
|
| 84 |
+
if s.name == name:
|
| 85 |
+
return s
|
| 86 |
+
available = ", ".join(self.list_scenarios())
|
| 87 |
+
raise ValueError(f"Unknown scenario '{name}'. Available: {available}")
|
| 88 |
+
|
| 89 |
+
@staticmethod
|
| 90 |
+
def _randomise(
|
| 91 |
+
rng: np.random.Generator,
|
| 92 |
+
task: ValidationTaskSpec,
|
| 93 |
+
target: TargetProfile,
|
| 94 |
+
data_quality: DataQualityState,
|
| 95 |
+
) -> None:
|
| 96 |
+
"""Light domain randomisation that nudges noise / numerics without
|
| 97 |
+
flipping ``correct_decision`` or ``key_evidence_dimensions``."""
|
| 98 |
+
# Credit budget jitter
|
| 99 |
+
task.credits_limit = int(
|
| 100 |
+
max(15, round(task.credits_limit * float(rng.uniform(0.9, 1.1))))
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
# Data-quality jitter
|
| 104 |
+
data_quality.noise_level = float(np.clip(
|
| 105 |
+
data_quality.noise_level + rng.normal(0, 0.02), 0.02, 0.4
|
| 106 |
+
))
|
| 107 |
+
data_quality.false_positive_rate = float(np.clip(
|
| 108 |
+
data_quality.false_positive_rate + rng.normal(0, 0.01), 0.0, 0.3
|
| 109 |
+
))
|
| 110 |
+
data_quality.false_negative_rate = float(np.clip(
|
| 111 |
+
data_quality.false_negative_rate + rng.normal(0, 0.01), 0.0, 0.3
|
| 112 |
+
))
|
| 113 |
+
data_quality.database_coverage = float(np.clip(
|
| 114 |
+
data_quality.database_coverage + rng.normal(0, 0.03), 0.5, 1.0
|
| 115 |
+
))
|
| 116 |
+
|
| 117 |
+
# Target profile numerics — keep categorical fields fixed.
|
| 118 |
+
target.tissue_specificity = float(np.clip(
|
| 119 |
+
target.tissue_specificity * float(rng.uniform(0.9, 1.1)), 0.0, 1.0
|
| 120 |
+
))
|
| 121 |
+
target.disease_overexpression = float(max(
|
| 122 |
+
0.1, target.disease_overexpression * float(rng.uniform(0.85, 1.15))
|
| 123 |
+
))
|
| 124 |
+
target.druggability_score = float(np.clip(
|
| 125 |
+
target.druggability_score * float(rng.uniform(0.9, 1.1)), 0.0, 1.0
|
| 126 |
+
))
|
| 127 |
+
target.selectivity_ratio = float(max(
|
| 128 |
+
0.0, target.selectivity_ratio * float(rng.uniform(0.85, 1.15))
|
| 129 |
+
))
|
| 130 |
+
target.in_vitro_ic50_nM = float(max(
|
| 131 |
+
0.5, target.in_vitro_ic50_nM * float(rng.uniform(0.7, 1.3))
|
| 132 |
+
))
|
server/tasks/procedural_generator.py
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Procedural drug-target-validation scenario generator.
|
| 2 |
+
|
| 3 |
+
Composes coherent ``Scenario`` objects by sampling from a pool of real
|
| 4 |
+
cancer targets and disease contexts and bundling them with an internally
|
| 5 |
+
consistent ``TargetProfile`` (viable vs non-viable bundles).
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import logging
|
| 11 |
+
from typing import List, Optional
|
| 12 |
+
|
| 13 |
+
import numpy as np
|
| 14 |
+
|
| 15 |
+
from models import ValidationTaskSpec
|
| 16 |
+
|
| 17 |
+
from server.simulator.latent_state import (
|
| 18 |
+
DataQualityState,
|
| 19 |
+
TargetProfile,
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
from .scenarios import Scenario
|
| 23 |
+
|
| 24 |
+
logger = logging.getLogger(__name__)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
_TARGET_POOL: List[str] = [
|
| 28 |
+
"BRAF", "MET", "FGFR1", "PIK3CA", "AKT1", "CDK4", "MDM2", "BCL2",
|
| 29 |
+
"PARP1", "IDH1", "IDH2", "FLT3", "JAK2", "BTK", "MTOR", "ALK",
|
| 30 |
+
"ROS1", "KIT", "ERBB2", "ABL1",
|
| 31 |
+
]
|
| 32 |
+
|
| 33 |
+
_DISEASE_POOL: List[str] = [
|
| 34 |
+
"non-small cell lung cancer",
|
| 35 |
+
"colorectal cancer",
|
| 36 |
+
"melanoma",
|
| 37 |
+
"acute myeloid leukemia",
|
| 38 |
+
"chronic myeloid leukemia",
|
| 39 |
+
"glioblastoma",
|
| 40 |
+
"breast cancer",
|
| 41 |
+
"ovarian cancer",
|
| 42 |
+
]
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
_DIFFICULTY_PARAMS = {
|
| 46 |
+
"easy": {
|
| 47 |
+
"noise_level": (0.05, 0.10),
|
| 48 |
+
"false_positive_rate": (0.02, 0.05),
|
| 49 |
+
"false_negative_rate": (0.02, 0.05),
|
| 50 |
+
"database_coverage": (0.90, 1.0),
|
| 51 |
+
"credits_limit": (45, 60),
|
| 52 |
+
"viable_prob": 0.65,
|
| 53 |
+
"n_key_evidence": (1, 2),
|
| 54 |
+
"misleading_prob": 0.0,
|
| 55 |
+
},
|
| 56 |
+
"medium": {
|
| 57 |
+
"noise_level": (0.08, 0.15),
|
| 58 |
+
"false_positive_rate": (0.04, 0.08),
|
| 59 |
+
"false_negative_rate": (0.04, 0.08),
|
| 60 |
+
"database_coverage": (0.80, 0.95),
|
| 61 |
+
"credits_limit": (40, 55),
|
| 62 |
+
"viable_prob": 0.50,
|
| 63 |
+
"n_key_evidence": (2, 3),
|
| 64 |
+
"misleading_prob": 0.20,
|
| 65 |
+
},
|
| 66 |
+
"hard": {
|
| 67 |
+
"noise_level": (0.12, 0.22),
|
| 68 |
+
"false_positive_rate": (0.06, 0.12),
|
| 69 |
+
"false_negative_rate": (0.06, 0.12),
|
| 70 |
+
"database_coverage": (0.65, 0.90),
|
| 71 |
+
"credits_limit": (35, 50),
|
| 72 |
+
"viable_prob": 0.45,
|
| 73 |
+
"n_key_evidence": (3, 4),
|
| 74 |
+
"misleading_prob": 0.50,
|
| 75 |
+
},
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def _build_viable_target(rng: np.random.Generator) -> TargetProfile:
|
| 80 |
+
return TargetProfile(
|
| 81 |
+
expression_level=str(rng.choice(["high_specific", "moderate"])),
|
| 82 |
+
tissue_specificity=float(rng.uniform(0.55, 0.90)),
|
| 83 |
+
disease_overexpression=float(rng.uniform(2.0, 5.0)),
|
| 84 |
+
druggability_score=float(rng.uniform(0.55, 0.90)),
|
| 85 |
+
binding_pocket_quality=str(rng.choice(["excellent", "good"])),
|
| 86 |
+
has_known_ligands=True,
|
| 87 |
+
allosteric_site_available=bool(rng.choice([True, False])),
|
| 88 |
+
selectivity_ratio=float(rng.uniform(5.0, 20.0)),
|
| 89 |
+
off_target_count=int(rng.integers(0, 4)),
|
| 90 |
+
off_target_genes=[],
|
| 91 |
+
toxicity_profile=str(rng.choice(["clean", "mild", "moderate"])),
|
| 92 |
+
toxicity_tissues=[],
|
| 93 |
+
clinical_precedent=str(rng.choice(["positive", "mixed"])),
|
| 94 |
+
clinical_stage_reached=str(rng.choice(["phase1", "phase2", "phase3"])),
|
| 95 |
+
competitor_programs=[],
|
| 96 |
+
requires_patient_stratification=bool(rng.choice([True, False])),
|
| 97 |
+
responder_biomarker=None,
|
| 98 |
+
in_vitro_ic50_nM=float(rng.uniform(2.0, 100.0)),
|
| 99 |
+
in_vivo_efficacy=str(rng.choice(["strong", "moderate"])),
|
| 100 |
+
crispr_essentiality=float(rng.uniform(-1.5, -0.5)),
|
| 101 |
+
true_viability_score=float(rng.uniform(0.65, 0.90)),
|
| 102 |
+
correct_decision="go",
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def _build_nonviable_target(rng: np.random.Generator) -> TargetProfile:
|
| 107 |
+
return TargetProfile(
|
| 108 |
+
expression_level=str(rng.choice(["high_nonspecific", "low", "moderate"])),
|
| 109 |
+
tissue_specificity=float(rng.uniform(0.10, 0.45)),
|
| 110 |
+
disease_overexpression=float(rng.uniform(0.5, 1.8)),
|
| 111 |
+
druggability_score=float(rng.uniform(0.05, 0.40)),
|
| 112 |
+
binding_pocket_quality=str(rng.choice(["poor", "undruggable"])),
|
| 113 |
+
has_known_ligands=False,
|
| 114 |
+
allosteric_site_available=False,
|
| 115 |
+
selectivity_ratio=float(rng.uniform(0.5, 3.0)),
|
| 116 |
+
off_target_count=int(rng.integers(5, 12)),
|
| 117 |
+
off_target_genes=[f"OFF_{i}" for i in range(int(rng.integers(2, 6)))],
|
| 118 |
+
toxicity_profile=str(rng.choice(["moderate", "severe"])),
|
| 119 |
+
toxicity_tissues=[
|
| 120 |
+
str(rng.choice(["liver", "kidney", "cardiac", "CNS", "GI"]))
|
| 121 |
+
],
|
| 122 |
+
clinical_precedent=str(rng.choice(["negative", "none", "mixed"])),
|
| 123 |
+
clinical_stage_reached=None,
|
| 124 |
+
competitor_programs=[],
|
| 125 |
+
requires_patient_stratification=False,
|
| 126 |
+
responder_biomarker=None,
|
| 127 |
+
in_vitro_ic50_nM=float(rng.uniform(500.0, 10_000.0)),
|
| 128 |
+
in_vivo_efficacy=str(rng.choice(["weak", "none"])),
|
| 129 |
+
crispr_essentiality=float(rng.uniform(-0.3, 0.3)),
|
| 130 |
+
true_viability_score=float(rng.uniform(0.05, 0.35)),
|
| 131 |
+
correct_decision="no_go",
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
_DIMENSION_POOL: List[str] = [
|
| 136 |
+
"expression",
|
| 137 |
+
"druggability",
|
| 138 |
+
"off_target",
|
| 139 |
+
"toxicity",
|
| 140 |
+
"clinical",
|
| 141 |
+
"literature",
|
| 142 |
+
"in_vitro",
|
| 143 |
+
"in_vivo",
|
| 144 |
+
"patient_stratification",
|
| 145 |
+
]
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def generate_scenario(
|
| 149 |
+
seed: int,
|
| 150 |
+
difficulty: str = "medium",
|
| 151 |
+
) -> Scenario:
|
| 152 |
+
"""Generate a single procedural scenario with complete latent state."""
|
| 153 |
+
rng = np.random.default_rng(seed)
|
| 154 |
+
params = _DIFFICULTY_PARAMS[difficulty]
|
| 155 |
+
|
| 156 |
+
target_gene = str(rng.choice(_TARGET_POOL))
|
| 157 |
+
disease = str(rng.choice(_DISEASE_POOL))
|
| 158 |
+
|
| 159 |
+
if rng.random() < params["viable_prob"]:
|
| 160 |
+
target = _build_viable_target(rng)
|
| 161 |
+
else:
|
| 162 |
+
target = _build_nonviable_target(rng)
|
| 163 |
+
|
| 164 |
+
n_key = int(rng.integers(*params["n_key_evidence"]))
|
| 165 |
+
target.key_evidence_dimensions = list(
|
| 166 |
+
rng.choice(_DIMENSION_POOL, size=min(n_key, len(_DIMENSION_POOL)),
|
| 167 |
+
replace=False)
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
if rng.random() < params["misleading_prob"]:
|
| 171 |
+
target.misleading_signals = [
|
| 172 |
+
"high_expression_looks_positive"
|
| 173 |
+
if target.correct_decision == "no_go"
|
| 174 |
+
else "historical_undruggability"
|
| 175 |
+
]
|
| 176 |
+
|
| 177 |
+
data_quality = DataQualityState(
|
| 178 |
+
noise_level=round(float(rng.uniform(*params["noise_level"])), 3),
|
| 179 |
+
false_positive_rate=round(
|
| 180 |
+
float(rng.uniform(*params["false_positive_rate"])), 3
|
| 181 |
+
),
|
| 182 |
+
false_negative_rate=round(
|
| 183 |
+
float(rng.uniform(*params["false_negative_rate"])), 3
|
| 184 |
+
),
|
| 185 |
+
database_coverage=round(
|
| 186 |
+
float(rng.uniform(*params["database_coverage"])), 3
|
| 187 |
+
),
|
| 188 |
+
)
|
| 189 |
+
|
| 190 |
+
credits_limit = int(rng.integers(*params["credits_limit"]))
|
| 191 |
+
|
| 192 |
+
task = ValidationTaskSpec(
|
| 193 |
+
problem_statement=(
|
| 194 |
+
f"Validate {target_gene} as a drug target in {disease}."
|
| 195 |
+
),
|
| 196 |
+
target_gene=target_gene,
|
| 197 |
+
disease_context=disease,
|
| 198 |
+
indication=f"{target_gene}-driven {disease}",
|
| 199 |
+
credits_limit=credits_limit,
|
| 200 |
+
success_criteria=[
|
| 201 |
+
f"Investigate the key evidence for {target_gene}",
|
| 202 |
+
"Submit a calibrated go / no_go validation report",
|
| 203 |
+
],
|
| 204 |
+
)
|
| 205 |
+
|
| 206 |
+
name = f"proc_{target_gene}_{difficulty}_{seed}"
|
| 207 |
+
tags = [difficulty, target_gene, disease.replace(" ", "_")]
|
| 208 |
+
|
| 209 |
+
return Scenario(
|
| 210 |
+
name=name,
|
| 211 |
+
task=task,
|
| 212 |
+
target=target,
|
| 213 |
+
data_quality=data_quality,
|
| 214 |
+
difficulty=difficulty,
|
| 215 |
+
tags=tags,
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
def generate_procedural_scenarios(
|
| 220 |
+
n: int = 20,
|
| 221 |
+
seed: int = 42,
|
| 222 |
+
) -> List[Scenario]:
|
| 223 |
+
"""Pre-generate a pool of procedural scenarios across difficulties."""
|
| 224 |
+
rng = np.random.default_rng(seed)
|
| 225 |
+
scenarios: List[Scenario] = []
|
| 226 |
+
difficulties = ["easy", "medium", "hard"]
|
| 227 |
+
for i in range(n):
|
| 228 |
+
diff = difficulties[i % len(difficulties)]
|
| 229 |
+
child_seed = int(rng.integers(0, 2**31))
|
| 230 |
+
scenarios.append(generate_scenario(seed=child_seed, difficulty=diff))
|
| 231 |
+
logger.info("Generated %d procedural scenarios.", len(scenarios))
|
| 232 |
+
return scenarios
|
server/tasks/scenarios.py
ADDED
|
@@ -0,0 +1,370 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Pre-defined drug-target-validation scenarios.
|
| 2 |
+
|
| 3 |
+
Each ``Scenario`` bundles a ``ValidationTaskSpec`` together with the
|
| 4 |
+
matching hidden ``TargetProfile`` so the simulator can instantiate
|
| 5 |
+
consistent episodes. The library spans the easy → very-hard difficulty
|
| 6 |
+
range and intentionally includes misleading-signal scenarios where the
|
| 7 |
+
naive answer disagrees with the correct decision.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
from __future__ import annotations
|
| 11 |
+
|
| 12 |
+
from dataclasses import dataclass, field
|
| 13 |
+
from typing import List
|
| 14 |
+
|
| 15 |
+
from models import ValidationTaskSpec
|
| 16 |
+
|
| 17 |
+
from server.simulator.latent_state import (
|
| 18 |
+
DataQualityState,
|
| 19 |
+
TargetProfile,
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
@dataclass
|
| 24 |
+
class Scenario:
|
| 25 |
+
"""A reproducible (task, ground-truth) pair."""
|
| 26 |
+
|
| 27 |
+
name: str
|
| 28 |
+
task: ValidationTaskSpec
|
| 29 |
+
target: TargetProfile
|
| 30 |
+
data_quality: DataQualityState = field(default_factory=DataQualityState)
|
| 31 |
+
difficulty: str = "medium"
|
| 32 |
+
tags: List[str] = field(default_factory=list)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
# ── Scenario library ────────────────────────────────────────────────────────
|
| 36 |
+
|
| 37 |
+
SCENARIO_LIBRARY: List[Scenario] = [
|
| 38 |
+
# ── 1. EGFR / NSCLC — easy go ────────────────────────────────────────
|
| 39 |
+
Scenario(
|
| 40 |
+
name="egfr_nsclc_viable",
|
| 41 |
+
difficulty="easy",
|
| 42 |
+
tags=["oncology", "kinase", "clear_go"],
|
| 43 |
+
task=ValidationTaskSpec(
|
| 44 |
+
problem_statement=(
|
| 45 |
+
"Validate EGFR as a drug target in EGFR-mutant non-small "
|
| 46 |
+
"cell lung cancer."
|
| 47 |
+
),
|
| 48 |
+
target_gene="EGFR",
|
| 49 |
+
disease_context="EGFR-mutant non-small cell lung cancer (NSCLC)",
|
| 50 |
+
indication="EGFR-mutant NSCLC",
|
| 51 |
+
credits_limit=50,
|
| 52 |
+
success_criteria=[
|
| 53 |
+
"Confirm tumor-selective expression",
|
| 54 |
+
"Confirm druggable kinase pocket",
|
| 55 |
+
"Confirm positive clinical precedent",
|
| 56 |
+
"Submit go recommendation with calibrated confidence",
|
| 57 |
+
],
|
| 58 |
+
prior_observations=[
|
| 59 |
+
"EGFR mutations are well-established oncogenic drivers in NSCLC",
|
| 60 |
+
],
|
| 61 |
+
),
|
| 62 |
+
target=TargetProfile(
|
| 63 |
+
expression_level="high_specific",
|
| 64 |
+
tissue_specificity=0.85,
|
| 65 |
+
disease_overexpression=4.5,
|
| 66 |
+
druggability_score=0.92,
|
| 67 |
+
binding_pocket_quality="excellent",
|
| 68 |
+
has_known_ligands=True,
|
| 69 |
+
allosteric_site_available=True,
|
| 70 |
+
selectivity_ratio=15.0,
|
| 71 |
+
off_target_count=2,
|
| 72 |
+
off_target_genes=["ERBB2", "ERBB4"],
|
| 73 |
+
toxicity_profile="mild",
|
| 74 |
+
toxicity_tissues=["skin", "GI"],
|
| 75 |
+
clinical_precedent="positive",
|
| 76 |
+
clinical_stage_reached="phase3",
|
| 77 |
+
competitor_programs=["erlotinib", "gefitinib", "osimertinib"],
|
| 78 |
+
requires_patient_stratification=True,
|
| 79 |
+
responder_biomarker="EGFR_activating_mutation",
|
| 80 |
+
in_vitro_ic50_nM=2.0,
|
| 81 |
+
in_vivo_efficacy="strong",
|
| 82 |
+
crispr_essentiality=-1.4,
|
| 83 |
+
true_viability_score=0.88,
|
| 84 |
+
correct_decision="go",
|
| 85 |
+
misleading_signals=[],
|
| 86 |
+
key_evidence_dimensions=["expression", "druggability"],
|
| 87 |
+
),
|
| 88 |
+
data_quality=DataQualityState(
|
| 89 |
+
noise_level=0.08,
|
| 90 |
+
false_positive_rate=0.04,
|
| 91 |
+
false_negative_rate=0.04,
|
| 92 |
+
database_coverage=0.95,
|
| 93 |
+
),
|
| 94 |
+
),
|
| 95 |
+
|
| 96 |
+
# ── 2. KRAS G12C / PDAC — borderline go ──────────────────────────────
|
| 97 |
+
Scenario(
|
| 98 |
+
name="kras_pdac_borderline",
|
| 99 |
+
difficulty="medium",
|
| 100 |
+
tags=["oncology", "GTPase", "borderline_go", "literature_critical"],
|
| 101 |
+
task=ValidationTaskSpec(
|
| 102 |
+
problem_statement=(
|
| 103 |
+
"Validate KRAS G12C as a drug target in pancreatic ductal "
|
| 104 |
+
"adenocarcinoma (PDAC)."
|
| 105 |
+
),
|
| 106 |
+
target_gene="KRAS_G12C",
|
| 107 |
+
disease_context="Pancreatic ductal adenocarcinoma (PDAC)",
|
| 108 |
+
indication="KRAS G12C-mutant PDAC",
|
| 109 |
+
credits_limit=50,
|
| 110 |
+
success_criteria=[
|
| 111 |
+
"Re-evaluate druggability given recent inhibitor development",
|
| 112 |
+
"Check clinical precedent for KRAS G12C inhibitors",
|
| 113 |
+
"Submit go recommendation if recent advances support it",
|
| 114 |
+
],
|
| 115 |
+
prior_observations=[
|
| 116 |
+
"KRAS was historically considered undruggable",
|
| 117 |
+
"Recent G12C-specific inhibitors have entered clinical use",
|
| 118 |
+
],
|
| 119 |
+
),
|
| 120 |
+
target=TargetProfile(
|
| 121 |
+
expression_level="high_specific",
|
| 122 |
+
tissue_specificity=0.70,
|
| 123 |
+
disease_overexpression=3.0,
|
| 124 |
+
druggability_score=0.65,
|
| 125 |
+
binding_pocket_quality="good",
|
| 126 |
+
has_known_ligands=True,
|
| 127 |
+
allosteric_site_available=True,
|
| 128 |
+
selectivity_ratio=6.0,
|
| 129 |
+
off_target_count=4,
|
| 130 |
+
off_target_genes=["HRAS", "NRAS", "RRAS", "MRAS"],
|
| 131 |
+
toxicity_profile="moderate",
|
| 132 |
+
toxicity_tissues=["GI", "skin"],
|
| 133 |
+
clinical_precedent="positive",
|
| 134 |
+
clinical_stage_reached="phase2",
|
| 135 |
+
competitor_programs=["sotorasib", "adagrasib"],
|
| 136 |
+
requires_patient_stratification=True,
|
| 137 |
+
responder_biomarker="KRAS_G12C_mutation",
|
| 138 |
+
in_vitro_ic50_nM=15.0,
|
| 139 |
+
in_vivo_efficacy="moderate",
|
| 140 |
+
crispr_essentiality=-1.1,
|
| 141 |
+
true_viability_score=0.62,
|
| 142 |
+
correct_decision="go",
|
| 143 |
+
misleading_signals=["historical_undruggability"],
|
| 144 |
+
key_evidence_dimensions=[
|
| 145 |
+
"druggability",
|
| 146 |
+
"literature",
|
| 147 |
+
"clinical",
|
| 148 |
+
],
|
| 149 |
+
),
|
| 150 |
+
data_quality=DataQualityState(
|
| 151 |
+
noise_level=0.12,
|
| 152 |
+
false_positive_rate=0.06,
|
| 153 |
+
false_negative_rate=0.06,
|
| 154 |
+
database_coverage=0.85,
|
| 155 |
+
),
|
| 156 |
+
),
|
| 157 |
+
|
| 158 |
+
# ── 3. CD33 / AML — misleading no-go ────────────────────────────────
|
| 159 |
+
Scenario(
|
| 160 |
+
name="cd33_aml_misleading",
|
| 161 |
+
difficulty="hard",
|
| 162 |
+
tags=["oncology", "antibody", "misleading", "selectivity_critical"],
|
| 163 |
+
task=ValidationTaskSpec(
|
| 164 |
+
problem_statement=(
|
| 165 |
+
"Validate CD33 as a drug target in acute myeloid leukemia "
|
| 166 |
+
"(AML)."
|
| 167 |
+
),
|
| 168 |
+
target_gene="CD33",
|
| 169 |
+
disease_context="Acute myeloid leukemia (AML)",
|
| 170 |
+
indication="CD33-positive AML",
|
| 171 |
+
credits_limit=50,
|
| 172 |
+
success_criteria=[
|
| 173 |
+
"Quantify on-target expression in AML blasts vs normal myeloid",
|
| 174 |
+
"Run off-target / paralog screen",
|
| 175 |
+
"Run toxicity panel and clinical precedent",
|
| 176 |
+
"Submit calibrated go/no_go decision",
|
| 177 |
+
],
|
| 178 |
+
prior_observations=[
|
| 179 |
+
"CD33 is highly expressed on AML blasts",
|
| 180 |
+
"Gemtuzumab ozogamicin had a complicated regulatory history",
|
| 181 |
+
],
|
| 182 |
+
),
|
| 183 |
+
target=TargetProfile(
|
| 184 |
+
expression_level="high_nonspecific",
|
| 185 |
+
tissue_specificity=0.35,
|
| 186 |
+
disease_overexpression=2.0,
|
| 187 |
+
druggability_score=0.55,
|
| 188 |
+
binding_pocket_quality="good",
|
| 189 |
+
has_known_ligands=True,
|
| 190 |
+
allosteric_site_available=False,
|
| 191 |
+
selectivity_ratio=1.6,
|
| 192 |
+
off_target_count=8,
|
| 193 |
+
off_target_genes=[
|
| 194 |
+
"CD33L",
|
| 195 |
+
"SIGLEC5",
|
| 196 |
+
"SIGLEC6",
|
| 197 |
+
"SIGLEC7",
|
| 198 |
+
"SIGLEC9",
|
| 199 |
+
],
|
| 200 |
+
toxicity_profile="severe",
|
| 201 |
+
toxicity_tissues=[
|
| 202 |
+
"bone_marrow",
|
| 203 |
+
"myeloid_progenitors",
|
| 204 |
+
"liver",
|
| 205 |
+
],
|
| 206 |
+
clinical_precedent="mixed",
|
| 207 |
+
clinical_stage_reached="phase3",
|
| 208 |
+
competitor_programs=["gemtuzumab_ozogamicin"],
|
| 209 |
+
requires_patient_stratification=False,
|
| 210 |
+
responder_biomarker=None,
|
| 211 |
+
in_vitro_ic50_nM=120.0,
|
| 212 |
+
in_vivo_efficacy="weak",
|
| 213 |
+
crispr_essentiality=-0.2,
|
| 214 |
+
true_viability_score=0.22,
|
| 215 |
+
correct_decision="no_go",
|
| 216 |
+
misleading_signals=[
|
| 217 |
+
"high_expression_looks_positive",
|
| 218 |
+
"partial_clinical_precedent",
|
| 219 |
+
],
|
| 220 |
+
key_evidence_dimensions=[
|
| 221 |
+
"off_target",
|
| 222 |
+
"toxicity",
|
| 223 |
+
"clinical",
|
| 224 |
+
],
|
| 225 |
+
),
|
| 226 |
+
data_quality=DataQualityState(
|
| 227 |
+
noise_level=0.15,
|
| 228 |
+
false_positive_rate=0.08,
|
| 229 |
+
false_negative_rate=0.08,
|
| 230 |
+
database_coverage=0.85,
|
| 231 |
+
),
|
| 232 |
+
),
|
| 233 |
+
|
| 234 |
+
# ── 4. TP53 — clear no-go ────────────────────────────────────────────
|
| 235 |
+
Scenario(
|
| 236 |
+
name="tp53_solid_tumors_clear_fail",
|
| 237 |
+
difficulty="easy_medium",
|
| 238 |
+
tags=["oncology", "transcription_factor", "clear_no_go"],
|
| 239 |
+
task=ValidationTaskSpec(
|
| 240 |
+
problem_statement=(
|
| 241 |
+
"Validate TP53 (small-molecule restoration approach) as a "
|
| 242 |
+
"drug target across solid tumors."
|
| 243 |
+
),
|
| 244 |
+
target_gene="TP53",
|
| 245 |
+
disease_context="Pan-cancer solid tumors with TP53 loss",
|
| 246 |
+
indication="TP53-mutant solid tumors",
|
| 247 |
+
credits_limit=50,
|
| 248 |
+
success_criteria=[
|
| 249 |
+
"Assess druggability honestly",
|
| 250 |
+
"Submit no_go if druggability is poor",
|
| 251 |
+
],
|
| 252 |
+
prior_observations=[
|
| 253 |
+
"TP53 is the most frequently mutated gene in cancer",
|
| 254 |
+
"Direct small-molecule restoration has historically failed",
|
| 255 |
+
],
|
| 256 |
+
),
|
| 257 |
+
target=TargetProfile(
|
| 258 |
+
expression_level="moderate",
|
| 259 |
+
tissue_specificity=0.20,
|
| 260 |
+
disease_overexpression=0.6,
|
| 261 |
+
druggability_score=0.10,
|
| 262 |
+
binding_pocket_quality="undruggable",
|
| 263 |
+
has_known_ligands=False,
|
| 264 |
+
allosteric_site_available=False,
|
| 265 |
+
selectivity_ratio=1.0,
|
| 266 |
+
off_target_count=0,
|
| 267 |
+
off_target_genes=[],
|
| 268 |
+
toxicity_profile="moderate",
|
| 269 |
+
toxicity_tissues=["multiple"],
|
| 270 |
+
clinical_precedent="negative",
|
| 271 |
+
clinical_stage_reached="phase1",
|
| 272 |
+
competitor_programs=["APR-246_eprenetapopt"],
|
| 273 |
+
requires_patient_stratification=False,
|
| 274 |
+
responder_biomarker=None,
|
| 275 |
+
in_vitro_ic50_nM=10000.0,
|
| 276 |
+
in_vivo_efficacy="none",
|
| 277 |
+
crispr_essentiality=0.1,
|
| 278 |
+
true_viability_score=0.08,
|
| 279 |
+
correct_decision="no_go",
|
| 280 |
+
misleading_signals=[],
|
| 281 |
+
key_evidence_dimensions=["druggability"],
|
| 282 |
+
),
|
| 283 |
+
data_quality=DataQualityState(
|
| 284 |
+
noise_level=0.10,
|
| 285 |
+
false_positive_rate=0.05,
|
| 286 |
+
false_negative_rate=0.05,
|
| 287 |
+
database_coverage=0.90,
|
| 288 |
+
),
|
| 289 |
+
),
|
| 290 |
+
|
| 291 |
+
# ── 5. SHP2 / JMML — very hard go ────────────────────────────────────
|
| 292 |
+
Scenario(
|
| 293 |
+
name="ptpn11_juvenile_mml_complex",
|
| 294 |
+
difficulty="very_hard",
|
| 295 |
+
tags=[
|
| 296 |
+
"oncology",
|
| 297 |
+
"phosphatase",
|
| 298 |
+
"allosteric",
|
| 299 |
+
"patient_stratification",
|
| 300 |
+
"complex_go",
|
| 301 |
+
],
|
| 302 |
+
task=ValidationTaskSpec(
|
| 303 |
+
problem_statement=(
|
| 304 |
+
"Validate SHP2 (PTPN11) as a drug target in juvenile "
|
| 305 |
+
"myelomonocytic leukemia (JMML)."
|
| 306 |
+
),
|
| 307 |
+
target_gene="PTPN11",
|
| 308 |
+
disease_context="Juvenile myelomonocytic leukemia (JMML)",
|
| 309 |
+
indication="PTPN11 GOF-mutant JMML",
|
| 310 |
+
credits_limit=50,
|
| 311 |
+
success_criteria=[
|
| 312 |
+
"Detect allosteric druggability via dedicated pocket analysis",
|
| 313 |
+
"Quantify pan-phosphatase off-target risk",
|
| 314 |
+
"Identify GOF-mutation-stratified patient population",
|
| 315 |
+
"Run in-vitro confirmation before final go/no_go",
|
| 316 |
+
],
|
| 317 |
+
prior_observations=[
|
| 318 |
+
"PTPN11 GOF mutations drive JMML",
|
| 319 |
+
"Active site is shallow and considered undruggable; allosteric "
|
| 320 |
+
"inhibitors have changed the landscape",
|
| 321 |
+
],
|
| 322 |
+
),
|
| 323 |
+
target=TargetProfile(
|
| 324 |
+
expression_level="moderate",
|
| 325 |
+
tissue_specificity=0.45,
|
| 326 |
+
disease_overexpression=1.6,
|
| 327 |
+
druggability_score=0.40,
|
| 328 |
+
binding_pocket_quality="poor",
|
| 329 |
+
has_known_ligands=True,
|
| 330 |
+
allosteric_site_available=True,
|
| 331 |
+
selectivity_ratio=2.5,
|
| 332 |
+
off_target_count=12,
|
| 333 |
+
off_target_genes=[
|
| 334 |
+
"PTPN6",
|
| 335 |
+
"PTPN11_paralog",
|
| 336 |
+
"PTPN1",
|
| 337 |
+
"PTPN2",
|
| 338 |
+
"DUSP6",
|
| 339 |
+
],
|
| 340 |
+
toxicity_profile="moderate",
|
| 341 |
+
toxicity_tissues=["bone_marrow", "GI"],
|
| 342 |
+
clinical_precedent="mixed",
|
| 343 |
+
clinical_stage_reached="phase2",
|
| 344 |
+
competitor_programs=["TNO155", "RMC-4630"],
|
| 345 |
+
requires_patient_stratification=True,
|
| 346 |
+
responder_biomarker="PTPN11_GOF_mutation",
|
| 347 |
+
in_vitro_ic50_nM=45.0,
|
| 348 |
+
in_vivo_efficacy="moderate",
|
| 349 |
+
crispr_essentiality=-0.9,
|
| 350 |
+
true_viability_score=0.58,
|
| 351 |
+
correct_decision="go",
|
| 352 |
+
misleading_signals=[
|
| 353 |
+
"pan-phosphatase_toxicity_concern",
|
| 354 |
+
"low_classic_druggability_score",
|
| 355 |
+
],
|
| 356 |
+
key_evidence_dimensions=[
|
| 357 |
+
"druggability",
|
| 358 |
+
"off_target",
|
| 359 |
+
"patient_stratification",
|
| 360 |
+
"in_vitro",
|
| 361 |
+
],
|
| 362 |
+
),
|
| 363 |
+
data_quality=DataQualityState(
|
| 364 |
+
noise_level=0.18,
|
| 365 |
+
false_positive_rate=0.10,
|
| 366 |
+
false_negative_rate=0.10,
|
| 367 |
+
database_coverage=0.80,
|
| 368 |
+
),
|
| 369 |
+
),
|
| 370 |
+
]
|
space/__init__.py
ADDED
|
File without changes
|
space/training/Dockerfile
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# DrugEnv trainer Space (Docker, single H200 GPU)
|
| 2 |
+
# Serves the FastAPI control panel (space.training.app:app) on port 8000,
|
| 3 |
+
# matched by README YAML app_port: 8000.
|
| 4 |
+
FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
|
| 5 |
+
|
| 6 |
+
ENV DEBIAN_FRONTEND=noninteractive \
|
| 7 |
+
PYTHONUNBUFFERED=1 \
|
| 8 |
+
PIP_NO_CACHE_DIR=1 \
|
| 9 |
+
HF_HOME=/home/user/.cache/huggingface \
|
| 10 |
+
TRANSFORMERS_CACHE=/home/user/.cache/huggingface/transformers \
|
| 11 |
+
PYTHONPATH=/home/user/app \
|
| 12 |
+
PORT=8000
|
| 13 |
+
|
| 14 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 15 |
+
python3.11 python3.11-venv python3.11-dev python3-pip \
|
| 16 |
+
git curl ca-certificates build-essential \
|
| 17 |
+
&& rm -rf /var/lib/apt/lists/* \
|
| 18 |
+
&& ln -sf /usr/bin/python3.11 /usr/local/bin/python \
|
| 19 |
+
&& ln -sf /usr/bin/python3.11 /usr/local/bin/python3
|
| 20 |
+
|
| 21 |
+
RUN useradd -ms /bin/bash user
|
| 22 |
+
USER user
|
| 23 |
+
ENV PATH="/home/user/.local/bin:${PATH}"
|
| 24 |
+
WORKDIR /home/user/app
|
| 25 |
+
|
| 26 |
+
# Copy the entire repo first so relative -r references inside the
|
| 27 |
+
# trainer requirements file (-r ../../requirements-train.txt etc.)
|
| 28 |
+
# resolve correctly. Only after the tree is in place do we install.
|
| 29 |
+
COPY --chown=user:user . /home/user/app
|
| 30 |
+
|
| 31 |
+
RUN python -m pip install --upgrade pip && \
|
| 32 |
+
python -m pip install --user -r /home/user/app/space/training/requirements.txt
|
| 33 |
+
|
| 34 |
+
EXPOSE 8000
|
| 35 |
+
|
| 36 |
+
CMD ["python", "-m", "uvicorn", "space.training.app:app", "--host", "0.0.0.0", "--port", "8000"]
|
space/training/README.md
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: DrugEnv Trainer
|
| 3 |
+
sdk: docker
|
| 4 |
+
pinned: false
|
| 5 |
+
app_port: 8000
|
| 6 |
+
tags:
|
| 7 |
+
- openenv
|
| 8 |
+
- reinforcement-learning
|
| 9 |
+
- drug-discovery
|
| 10 |
+
- grpo
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
# 🧬 DrugEnv Trainer
|
| 14 |
+
|
| 15 |
+
A self-contained Hugging Face Space that runs **GRPO** (Group-Relative
|
| 16 |
+
Policy Optimization) inside the **DrugEnv** drug-target-validation
|
| 17 |
+
environment, with a live dashboard that streams reward curves, mid-
|
| 18 |
+
training checkpoint evals, and a before/after summary as the run
|
| 19 |
+
progresses.
|
| 20 |
+
|
| 21 |
+
The trainer is designed to be flipped on with a single `POST /train`
|
| 22 |
+
once the Space has been provisioned and (optionally) given an
|
| 23 |
+
`HF_TOKEN` for pushing the resulting model and evidence artefacts.
|
| 24 |
+
|
| 25 |
+
## Expected hardware
|
| 26 |
+
|
| 27 |
+
| Knob | Value |
|
| 28 |
+
|---|---|
|
| 29 |
+
| Hardware target | **`h200x1`** (single H200 GPU) |
|
| 30 |
+
| Throughput | ~4× A100 on Qwen2.5-3B-class GRPO |
|
| 31 |
+
| Cost (rough) | ~$0.05–0.10 per GRPO step on Qwen2.5-3B |
|
| 32 |
+
|
| 33 |
+
H200 is set via the Space settings page on the Hub — this README and
|
| 34 |
+
the title bar of the dashboard advertise it; the runtime detects what
|
| 35 |
+
it actually got via `torch.cuda.device_count()`.
|
| 36 |
+
|
| 37 |
+
## Configuration
|
| 38 |
+
|
| 39 |
+
Every knob is an environment variable so the Space can be reconfigured
|
| 40 |
+
without a redeploy. Defaults match a sensible single-H200 run.
|
| 41 |
+
|
| 42 |
+
| Variable | Default | Description |
|
| 43 |
+
|---|---|---|
|
| 44 |
+
| `MODEL_NAME` | `Qwen/Qwen2.5-3B-Instruct` | Base model loaded by GRPO. |
|
| 45 |
+
| `TRAINING_BACKEND` | `vanilla` | `vanilla` (transformers) or `unsloth`. |
|
| 46 |
+
| `DIFFICULTY` | `easy` | Default difficulty bucket. |
|
| 47 |
+
| `TOTAL_EPISODES` | `120` | Prompt budget for GRPO. |
|
| 48 |
+
| `MAX_STEPS` | `20` | Max env steps per rollout (DrugEnv allows up to 30). |
|
| 49 |
+
| `NUM_GENERATIONS` | `4` | GRPO group size. |
|
| 50 |
+
| `CHECKPOINT_EVAL_STEPS` | `50` | Run a held-out eval every N updates. |
|
| 51 |
+
| `CHECKPOINT_EVAL_EPISODES` | `4` | Episodes per mid-training eval. |
|
| 52 |
+
| `EVAL_EPISODES` | `8` | Pre/post-training eval size. |
|
| 53 |
+
| `OUTPUT_DIR` | `runs/grpo-output` | Trained model directory. |
|
| 54 |
+
| `EVIDENCE_DIR` | `evidence` | Where CSV/PNG artefacts land. |
|
| 55 |
+
| `PUSH_REPO` | `anugrahteesdollar/drugenv-grpo-qwen3b` | Hub repo to upload to. |
|
| 56 |
+
| `SFT_WARMSTART` | `true` | Run an oracle-driven SFT phase before GRPO. |
|
| 57 |
+
| `SFT_NUM_EPISODES` | `200` | Oracle trajectories collected for SFT. |
|
| 58 |
+
| `SFT_MAX_STEPS` | `25` | Per-episode cap for SFT trajectories. |
|
| 59 |
+
| `SFT_EPOCHS` | `1` | SFT epochs over the collected dataset. |
|
| 60 |
+
| `SFT_LR` | `1e-5` | SFT learning rate. |
|
| 61 |
+
| `AUTOSTART` | `0` | Auto-launch a run on Space startup. |
|
| 62 |
+
|
| 63 |
+
## Endpoints
|
| 64 |
+
|
| 65 |
+
The control panel is served at `/` and refreshes every 5 s. The
|
| 66 |
+
underlying JSON API surface:
|
| 67 |
+
|
| 68 |
+
```
|
| 69 |
+
GET / status page (HTML)
|
| 70 |
+
GET /status run state
|
| 71 |
+
GET /metrics pre / post evaluation metrics
|
| 72 |
+
GET /sft_summary SFT warm-start summary (404 if not yet run)
|
| 73 |
+
GET /evidence JSON index of evidence artefacts
|
| 74 |
+
GET /evidence/<name> serve an artefact (with on-demand PNG synth fallback)
|
| 75 |
+
GET /logs?tail=N last N lines of training.log
|
| 76 |
+
POST /train start a run; body is a JSON object of CONFIG overrides
|
| 77 |
+
GET /health liveness probe
|
| 78 |
+
```
|
| 79 |
+
|
| 80 |
+
## Triggering a run
|
| 81 |
+
|
| 82 |
+
```bash
|
| 83 |
+
# Start with defaults
|
| 84 |
+
curl -X POST https://<your-space>.hf.space/train
|
| 85 |
+
|
| 86 |
+
# Override a few knobs for this run only
|
| 87 |
+
curl -X POST https://<your-space>.hf.space/train \
|
| 88 |
+
-H 'Content-Type: application/json' \
|
| 89 |
+
-d '{"total_episodes": 240, "difficulty": "medium"}'
|
| 90 |
+
```
|
| 91 |
+
|
| 92 |
+
## Why this exists
|
| 93 |
+
|
| 94 |
+
DrugEnv's reward function decomposes the terminal grade across
|
| 95 |
+
`decision_accuracy`, `evidence_coverage`, `credit_efficiency`, and
|
| 96 |
+
`reasoning_coherence`, with potential-based per-step shaping over the
|
| 97 |
+
evidence-coverage potential and rule-driven redundancy / prerequisite
|
| 98 |
+
penalties. With those guard-rails in place, the dominant failure mode
|
| 99 |
+
of a small base model is *not* reward hacking — it's that the policy
|
| 100 |
+
never sees a positive-reward rollout in the first place, because
|
| 101 |
+
zero-shot Qwen2.5-3B cannot solve the drug-target-validation pipeline.
|
| 102 |
+
|
| 103 |
+
The trainer Space addresses this with an optional SFT warm-start
|
| 104 |
+
phase (`SFT_WARMSTART=true` by default): a short pass on oracle
|
| 105 |
+
trajectories gives the policy a non-zero prior over the correct
|
| 106 |
+
sequence, which GRPO then refines. The control panel surfaces both
|
| 107 |
+
phases so you can see the warm-start loss, the GRPO reward curve, and
|
| 108 |
+
the before / after summary in one view.
|
| 109 |
+
|
| 110 |
+
## Evolution note
|
| 111 |
+
|
| 112 |
+
The deployment scaffolding here — control panel, on-demand PNG synth,
|
| 113 |
+
auto-refresh, evidence index — was originally validated against a
|
| 114 |
+
particle-physics-themed prototype before being carried forward to
|
| 115 |
+
DrugEnv. The reward shape, action space, and scenario library it
|
| 116 |
+
targets are entirely drug-domain native.
|
space/training/__init__.py
ADDED
|
File without changes
|
space/training/app.py
ADDED
|
@@ -0,0 +1,943 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""FastAPI control panel for the DrugEnv trainer Space.
|
| 2 |
+
|
| 3 |
+
Endpoints
|
| 4 |
+
---------
|
| 5 |
+
GET / → status page (HTML)
|
| 6 |
+
GET /status → JSON status of the current training run
|
| 7 |
+
GET /metrics → JSON snapshot of pre / post evaluation metrics
|
| 8 |
+
GET /logs → tail of the training log
|
| 9 |
+
GET /sft_summary → SFT warm-start summary if available
|
| 10 |
+
GET /evidence → JSON index of evidence/ directory
|
| 11 |
+
GET /evidence/<name> → individual artefact (PNG/CSV/JSON/MD), with
|
| 12 |
+
on-demand PNG synthesis as a fallback
|
| 13 |
+
POST /train → start (or restart) a training run
|
| 14 |
+
GET /health → liveness probe
|
| 15 |
+
|
| 16 |
+
Designed to run on a Hugging Face Space with ``sdk: docker``. Heavy
|
| 17 |
+
training work runs in a background thread so the HTTP server stays
|
| 18 |
+
responsive.
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
from __future__ import annotations
|
| 22 |
+
|
| 23 |
+
import ast
|
| 24 |
+
import io
|
| 25 |
+
import json
|
| 26 |
+
import logging
|
| 27 |
+
import os
|
| 28 |
+
import re
|
| 29 |
+
import subprocess
|
| 30 |
+
import sys
|
| 31 |
+
import threading
|
| 32 |
+
import time
|
| 33 |
+
from datetime import datetime, timezone
|
| 34 |
+
from pathlib import Path
|
| 35 |
+
from typing import Any, Dict, List, Optional
|
| 36 |
+
|
| 37 |
+
from fastapi import FastAPI, HTTPException, Request
|
| 38 |
+
from fastapi.responses import (
|
| 39 |
+
FileResponse,
|
| 40 |
+
HTMLResponse,
|
| 41 |
+
JSONResponse,
|
| 42 |
+
PlainTextResponse,
|
| 43 |
+
Response,
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
| 48 |
+
logger = logging.getLogger(__name__)
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
# Expected hardware target for the trainer Space. The actual hardware
|
| 52 |
+
# is set via the Space settings page on the Hub — this constant is just
|
| 53 |
+
# what the dashboard advertises in its title bar so reviewers know what
|
| 54 |
+
# to provision. H200 ≈ 4× A100 throughput and is comfortably the
|
| 55 |
+
# cheapest viable target for Qwen2.5-3B-class GRPO.
|
| 56 |
+
EXPECTED_HARDWARE = "h200x1"
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def _resolve_repo_root() -> Path:
|
| 60 |
+
env_root = os.environ.get("DRUGENV_ROOT")
|
| 61 |
+
candidates: List[Path] = []
|
| 62 |
+
if env_root:
|
| 63 |
+
candidates.append(Path(env_root))
|
| 64 |
+
candidates.extend([
|
| 65 |
+
Path("/home/user/app"),
|
| 66 |
+
Path(__file__).resolve().parent.parent.parent,
|
| 67 |
+
])
|
| 68 |
+
for p in candidates:
|
| 69 |
+
try:
|
| 70 |
+
if p.exists():
|
| 71 |
+
return p.resolve()
|
| 72 |
+
except OSError:
|
| 73 |
+
continue
|
| 74 |
+
return candidates[-1].resolve()
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
REPO_ROOT = _resolve_repo_root()
|
| 78 |
+
LOG_DIR = REPO_ROOT / "training" / "runs"
|
| 79 |
+
try:
|
| 80 |
+
LOG_DIR.mkdir(parents=True, exist_ok=True)
|
| 81 |
+
except OSError as exc: # pragma: no cover - read-only filesystem fallback
|
| 82 |
+
logger.warning("could not create %s (%s); using /tmp", LOG_DIR, exc)
|
| 83 |
+
LOG_DIR = Path("/tmp/drugenv-runs")
|
| 84 |
+
LOG_DIR.mkdir(parents=True, exist_ok=True)
|
| 85 |
+
LOG_FILE = LOG_DIR / "training.log"
|
| 86 |
+
EVIDENCE_DIR = REPO_ROOT / "evidence"
|
| 87 |
+
try:
|
| 88 |
+
EVIDENCE_DIR.mkdir(parents=True, exist_ok=True)
|
| 89 |
+
except OSError: # pragma: no cover
|
| 90 |
+
EVIDENCE_DIR = Path("/tmp/drugenv-evidence")
|
| 91 |
+
EVIDENCE_DIR.mkdir(parents=True, exist_ok=True)
|
| 92 |
+
METRICS_FILE = EVIDENCE_DIR / "before_after_metrics.json"
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def _env(name: str, default: str) -> str:
|
| 96 |
+
return os.environ.get(name, default)
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def _detect_gpus() -> int:
|
| 100 |
+
try:
|
| 101 |
+
import torch # type: ignore
|
| 102 |
+
if torch.cuda.is_available():
|
| 103 |
+
return torch.cuda.device_count()
|
| 104 |
+
except Exception:
|
| 105 |
+
pass
|
| 106 |
+
try:
|
| 107 |
+
out = subprocess.run(
|
| 108 |
+
["nvidia-smi", "--query-gpu=name", "--format=csv,noheader"],
|
| 109 |
+
capture_output=True, text=True, timeout=5,
|
| 110 |
+
)
|
| 111 |
+
return len([l for l in out.stdout.splitlines() if l.strip()])
|
| 112 |
+
except Exception:
|
| 113 |
+
return 0
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
_NUM_GPUS = _detect_gpus()
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
def _bool_env(name: str, default: str) -> bool:
|
| 120 |
+
return _env(name, default).strip().lower() in ("1", "true", "yes", "on")
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
CONFIG = {
|
| 124 |
+
"training_backend": _env("TRAINING_BACKEND", "vanilla"),
|
| 125 |
+
"model_name": _env("MODEL_NAME", "Qwen/Qwen2.5-3B-Instruct"),
|
| 126 |
+
"difficulty": _env("DIFFICULTY", "easy"),
|
| 127 |
+
"total_episodes": int(_env("TOTAL_EPISODES", "120")),
|
| 128 |
+
"max_steps": int(_env("MAX_STEPS", "20")),
|
| 129 |
+
"num_generations": int(_env("NUM_GENERATIONS", "4")),
|
| 130 |
+
"checkpoint_eval_steps": int(_env("CHECKPOINT_EVAL_STEPS", "50")),
|
| 131 |
+
"checkpoint_eval_episodes": int(_env("CHECKPOINT_EVAL_EPISODES", "4")),
|
| 132 |
+
"eval_episodes": int(_env("EVAL_EPISODES", "8")),
|
| 133 |
+
"output_dir": _env("OUTPUT_DIR", "runs/grpo-output"),
|
| 134 |
+
"evidence_dir": _env("EVIDENCE_DIR", "evidence"),
|
| 135 |
+
"num_gpus": int(_env("NUM_GPUS", "1")),
|
| 136 |
+
"hf_username": _env("HF_USERNAME", "anugrahteesdollar"),
|
| 137 |
+
"push_repo": _env(
|
| 138 |
+
"PUSH_REPO",
|
| 139 |
+
f"{_env('HF_USERNAME', 'anugrahteesdollar')}/drugenv-grpo-qwen3b",
|
| 140 |
+
),
|
| 141 |
+
"autostart": _env("AUTOSTART", "0") == "1",
|
| 142 |
+
# ── SFT warm-start phase (defeats the no-submit avoidance hack
|
| 143 |
+
# by giving GRPO a non-zero prior over correct trajectories) ─────
|
| 144 |
+
"sft_warmstart": _bool_env("SFT_WARMSTART", "true"),
|
| 145 |
+
"sft_num_episodes": int(_env("SFT_NUM_EPISODES", "200")),
|
| 146 |
+
"sft_max_steps": int(_env("SFT_MAX_STEPS", "25")),
|
| 147 |
+
"sft_epochs": int(_env("SFT_EPOCHS", "1")),
|
| 148 |
+
"sft_lr": float(_env("SFT_LR", "1e-5")),
|
| 149 |
+
"sft_difficulty": _env("SFT_DIFFICULTY", "mixed"),
|
| 150 |
+
"sft_out_dir": _env("SFT_OUT_DIR", "runs/sft-warmstart"),
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
# ── Run state ────────────────────────────────────────────────────────────
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
class RunState:
|
| 158 |
+
def __init__(self) -> None:
|
| 159 |
+
self.lock = threading.Lock()
|
| 160 |
+
self.thread: Optional[threading.Thread] = None
|
| 161 |
+
self.process: Optional[subprocess.Popen] = None
|
| 162 |
+
self.status: str = "idle" # idle | running | finished | failed
|
| 163 |
+
self.started_at: Optional[str] = None
|
| 164 |
+
self.finished_at: Optional[str] = None
|
| 165 |
+
self.last_error: Optional[str] = None
|
| 166 |
+
self.last_config: Dict[str, Any] = {}
|
| 167 |
+
|
| 168 |
+
def to_dict(self) -> Dict[str, Any]:
|
| 169 |
+
with self.lock:
|
| 170 |
+
return {
|
| 171 |
+
"status": self.status,
|
| 172 |
+
"started_at": self.started_at,
|
| 173 |
+
"finished_at": self.finished_at,
|
| 174 |
+
"last_error": self.last_error,
|
| 175 |
+
"last_config": self.last_config,
|
| 176 |
+
"expected_hardware": EXPECTED_HARDWARE,
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
STATE = RunState()
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
# ── Training pipeline ────────────────────────────────────────────────────
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
def _stream_subprocess(cmd: list[str], log_handle) -> int:
|
| 187 |
+
log_handle.write(f"\n$ {' '.join(cmd)}\n")
|
| 188 |
+
log_handle.flush()
|
| 189 |
+
proc = subprocess.Popen(
|
| 190 |
+
cmd,
|
| 191 |
+
cwd=str(REPO_ROOT),
|
| 192 |
+
stdout=subprocess.PIPE,
|
| 193 |
+
stderr=subprocess.STDOUT,
|
| 194 |
+
bufsize=1,
|
| 195 |
+
universal_newlines=True,
|
| 196 |
+
env={**os.environ, "PYTHONPATH": str(REPO_ROOT)},
|
| 197 |
+
)
|
| 198 |
+
STATE.process = proc
|
| 199 |
+
assert proc.stdout is not None
|
| 200 |
+
for line in proc.stdout:
|
| 201 |
+
log_handle.write(line)
|
| 202 |
+
log_handle.flush()
|
| 203 |
+
rc = proc.wait()
|
| 204 |
+
log_handle.write(f"[exit code {rc}]\n")
|
| 205 |
+
log_handle.flush()
|
| 206 |
+
STATE.process = None
|
| 207 |
+
return rc
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
def _build_sft_warmstart_cmd(config: Dict[str, Any]) -> list[str]:
|
| 211 |
+
"""Compose the SFT-warm-start subprocess command."""
|
| 212 |
+
python_bin = "/usr/local/bin/python" if Path("/usr/local/bin/python").exists() else sys.executable
|
| 213 |
+
return [
|
| 214 |
+
python_bin, "-m", "training.sft_warmstart",
|
| 215 |
+
"--out_dir", config["sft_out_dir"],
|
| 216 |
+
"--num_episodes", str(config["sft_num_episodes"]),
|
| 217 |
+
"--max_steps", str(config["sft_max_steps"]),
|
| 218 |
+
"--epochs", str(config["sft_epochs"]),
|
| 219 |
+
"--lr", str(config["sft_lr"]),
|
| 220 |
+
"--base_model", config["model_name"],
|
| 221 |
+
"--difficulty", config["sft_difficulty"],
|
| 222 |
+
"--evidence_dir", config["evidence_dir"],
|
| 223 |
+
]
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
def _build_training_cmd(config: Dict[str, Any]) -> list[str]:
|
| 227 |
+
"""Compose the selected training launcher.
|
| 228 |
+
|
| 229 |
+
When ``sft_warmstart`` is on, ``model_name`` is expected to already
|
| 230 |
+
have been overwritten with the SFT output directory by the caller
|
| 231 |
+
(``_training_pipeline``), so this function never has to know about
|
| 232 |
+
the SFT phase explicitly — it just trains GRPO from the path
|
| 233 |
+
is sitting in ``model_name``.
|
| 234 |
+
"""
|
| 235 |
+
backend = str(config.get("training_backend", "vanilla")).lower()
|
| 236 |
+
python_bin = "/usr/local/bin/python" if Path("/usr/local/bin/python").exists() else sys.executable
|
| 237 |
+
|
| 238 |
+
common = [
|
| 239 |
+
"--model-id", config["model_name"],
|
| 240 |
+
"--evidence-dir", config["evidence_dir"],
|
| 241 |
+
"--output-dir", config["output_dir"],
|
| 242 |
+
"--checkpoint-eval-steps", str(config["checkpoint_eval_steps"]),
|
| 243 |
+
"--checkpoint-eval-episodes", str(config["checkpoint_eval_episodes"]),
|
| 244 |
+
]
|
| 245 |
+
if backend == "vanilla":
|
| 246 |
+
return [python_bin, "-m", "training.training_script", *common]
|
| 247 |
+
|
| 248 |
+
if backend != "unsloth":
|
| 249 |
+
raise ValueError(f"unknown TRAINING_BACKEND={backend!r}")
|
| 250 |
+
|
| 251 |
+
cmd = ["-m", "training.training_unsloth", *common]
|
| 252 |
+
n = max(int(config.get("num_gpus", 1)), 1)
|
| 253 |
+
if n > 1:
|
| 254 |
+
return [
|
| 255 |
+
"accelerate", "launch", "--num_processes", str(n),
|
| 256 |
+
"--mixed_precision", "bf16",
|
| 257 |
+
] + cmd
|
| 258 |
+
return [python_bin] + cmd
|
| 259 |
+
|
| 260 |
+
|
| 261 |
+
def _push_model_folder_to_hub(*, output_dir: Path, repo_id: str, base_model: str, log) -> None:
|
| 262 |
+
"""Upload a trained model directory to the Hub (best-effort)."""
|
| 263 |
+
token = os.environ.get("HF_TOKEN")
|
| 264 |
+
if not token:
|
| 265 |
+
log.write("\n[skip] HF_TOKEN not set — model not pushed\n")
|
| 266 |
+
log.flush()
|
| 267 |
+
return
|
| 268 |
+
try:
|
| 269 |
+
from huggingface_hub import HfApi
|
| 270 |
+
api = HfApi(token=token)
|
| 271 |
+
api.create_repo(repo_id=repo_id, repo_type="model", exist_ok=True)
|
| 272 |
+
api.upload_folder(
|
| 273 |
+
folder_path=str(output_dir),
|
| 274 |
+
repo_id=repo_id,
|
| 275 |
+
repo_type="model",
|
| 276 |
+
commit_message=f"Upload DrugEnv GRPO model based on {base_model}",
|
| 277 |
+
)
|
| 278 |
+
log.write(f"\n[ok] uploaded model → https://huggingface.co/{repo_id}\n")
|
| 279 |
+
log.flush()
|
| 280 |
+
except Exception as exc:
|
| 281 |
+
log.write(f"\n[warn] model push failed: {exc}\n")
|
| 282 |
+
log.flush()
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
def _push_evidence_to_hub(*, evidence_dir: Path, repo_id: str, log) -> None:
|
| 286 |
+
"""Upload the entire evidence/ directory to the model repo (best-effort)."""
|
| 287 |
+
token = os.environ.get("HF_TOKEN")
|
| 288 |
+
if not token:
|
| 289 |
+
log.write("\n[skip] HF_TOKEN not set — evidence not pushed\n")
|
| 290 |
+
log.flush()
|
| 291 |
+
return
|
| 292 |
+
try:
|
| 293 |
+
from huggingface_hub import HfApi
|
| 294 |
+
api = HfApi(token=token)
|
| 295 |
+
api.upload_folder(
|
| 296 |
+
folder_path=str(evidence_dir),
|
| 297 |
+
repo_id=repo_id,
|
| 298 |
+
repo_type="model",
|
| 299 |
+
path_in_repo="evidence",
|
| 300 |
+
commit_message="Upload DrugEnv training evidence (curves, evals, plots)",
|
| 301 |
+
)
|
| 302 |
+
log.write(f"\n[ok] uploaded evidence/ → https://huggingface.co/{repo_id}/tree/main/evidence\n")
|
| 303 |
+
log.flush()
|
| 304 |
+
except Exception as exc:
|
| 305 |
+
log.write(f"\n[warn] evidence push failed: {exc}\n")
|
| 306 |
+
log.flush()
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
def _training_pipeline(config: Dict[str, Any]) -> None:
|
| 310 |
+
started = datetime.now(timezone.utc).isoformat()
|
| 311 |
+
with STATE.lock:
|
| 312 |
+
STATE.status = "running"
|
| 313 |
+
STATE.started_at = started
|
| 314 |
+
STATE.finished_at = None
|
| 315 |
+
STATE.last_error = None
|
| 316 |
+
STATE.last_config = dict(config)
|
| 317 |
+
|
| 318 |
+
evidence_dir = Path(config["evidence_dir"]).resolve()
|
| 319 |
+
evidence_dir.mkdir(parents=True, exist_ok=True)
|
| 320 |
+
|
| 321 |
+
LOG_FILE.parent.mkdir(parents=True, exist_ok=True)
|
| 322 |
+
with open(LOG_FILE, "a") as log:
|
| 323 |
+
log.write(f"\n=== Training started {started} ===\n")
|
| 324 |
+
log.write(json.dumps(config, indent=2) + "\n")
|
| 325 |
+
log.flush()
|
| 326 |
+
try:
|
| 327 |
+
output_dir = config["output_dir"]
|
| 328 |
+
model_name = config["model_name"]
|
| 329 |
+
push_repo = config["push_repo"]
|
| 330 |
+
|
| 331 |
+
if config.get("sft_warmstart"):
|
| 332 |
+
# Phase 1 — SFT warm-start. Produces a *full* causal-LM
|
| 333 |
+
# checkpoint at config['sft_out_dir'] (LoRA adapters are
|
| 334 |
+
# merged in by training/sft_warmstart.py) so we can hand
|
| 335 |
+
# it to GRPO as a drop-in --model-id.
|
| 336 |
+
sft_out = config["sft_out_dir"]
|
| 337 |
+
log.write(
|
| 338 |
+
f"\n--- SFT warm-start ({config['sft_num_episodes']} oracle "
|
| 339 |
+
f"episodes, epochs={config['sft_epochs']}, → {sft_out}) ---\n"
|
| 340 |
+
)
|
| 341 |
+
log.flush()
|
| 342 |
+
sft_rc = _stream_subprocess(_build_sft_warmstart_cmd(config), log)
|
| 343 |
+
if sft_rc != 0:
|
| 344 |
+
raise RuntimeError(f"SFT warm-start failed (rc={sft_rc})")
|
| 345 |
+
log.write(
|
| 346 |
+
f"\n[ok] SFT done; switching GRPO base model "
|
| 347 |
+
f"{config['model_name']} → {sft_out}\n"
|
| 348 |
+
)
|
| 349 |
+
log.flush()
|
| 350 |
+
config["model_name"] = sft_out
|
| 351 |
+
|
| 352 |
+
backend = str(config.get("training_backend", "vanilla")).lower()
|
| 353 |
+
log.write(
|
| 354 |
+
f"\n--- GRPO training ({backend}, "
|
| 355 |
+
f"{config['num_gpus']} GPU process(es), expected hardware "
|
| 356 |
+
f"{EXPECTED_HARDWARE}) ---\n"
|
| 357 |
+
)
|
| 358 |
+
log.flush()
|
| 359 |
+
rc = _stream_subprocess(_build_training_cmd(config), log)
|
| 360 |
+
if rc != 0:
|
| 361 |
+
raise RuntimeError(f"training failed (rc={rc})")
|
| 362 |
+
|
| 363 |
+
log.write(
|
| 364 |
+
"\n--- evidence: training/training_script.save_training_plots already "
|
| 365 |
+
"ran on train-end via the LiveTrainingCallback; CSVs + PNGs live in "
|
| 366 |
+
f"{config['evidence_dir']} ---\n"
|
| 367 |
+
)
|
| 368 |
+
log.flush()
|
| 369 |
+
|
| 370 |
+
if os.environ.get("HF_TOKEN"):
|
| 371 |
+
log.write("\n--- push trained model to Hub ---\n")
|
| 372 |
+
log.flush()
|
| 373 |
+
_push_model_folder_to_hub(
|
| 374 |
+
output_dir=Path(output_dir),
|
| 375 |
+
repo_id=push_repo,
|
| 376 |
+
base_model=model_name,
|
| 377 |
+
log=log,
|
| 378 |
+
)
|
| 379 |
+
_push_evidence_to_hub(
|
| 380 |
+
evidence_dir=evidence_dir,
|
| 381 |
+
repo_id=push_repo,
|
| 382 |
+
log=log,
|
| 383 |
+
)
|
| 384 |
+
else:
|
| 385 |
+
log.write("\n[skip] HF_TOKEN not set — not pushing to Hub\n")
|
| 386 |
+
log.flush()
|
| 387 |
+
with STATE.lock:
|
| 388 |
+
STATE.status = "finished"
|
| 389 |
+
except Exception as exc:
|
| 390 |
+
logger.exception("training pipeline failed")
|
| 391 |
+
with STATE.lock:
|
| 392 |
+
STATE.status = "failed"
|
| 393 |
+
STATE.last_error = str(exc)
|
| 394 |
+
finally:
|
| 395 |
+
finished = datetime.now(timezone.utc).isoformat()
|
| 396 |
+
log.write(f"\n=== Training ended {finished} ===\n")
|
| 397 |
+
log.flush()
|
| 398 |
+
with STATE.lock:
|
| 399 |
+
STATE.finished_at = finished
|
| 400 |
+
|
| 401 |
+
|
| 402 |
+
def _start_training(config: Dict[str, Any]) -> None:
|
| 403 |
+
with STATE.lock:
|
| 404 |
+
if STATE.status == "running":
|
| 405 |
+
raise RuntimeError("a training run is already in progress")
|
| 406 |
+
STATE.thread = threading.Thread(
|
| 407 |
+
target=_training_pipeline,
|
| 408 |
+
args=(config,),
|
| 409 |
+
name="drugenv-trainer",
|
| 410 |
+
daemon=True,
|
| 411 |
+
)
|
| 412 |
+
STATE.thread.start()
|
| 413 |
+
|
| 414 |
+
|
| 415 |
+
# ── On-demand evidence-PNG synthesis ─────────────────────────────────────
|
| 416 |
+
#
|
| 417 |
+
# When the GRPO loop hasn't had a chance to write training_log.csv /
|
| 418 |
+
# reward_components.csv yet, we still want the dashboard to show
|
| 419 |
+
# *something* meaningful. The on-demand handlers below parse the
|
| 420 |
+
# captured TRL stdout log (training/runs/training.log) for log dicts
|
| 421 |
+
# and synthesise the corresponding PNG on the fly, returning it to the
|
| 422 |
+
# browser with ``Cache-Control: no-store`` so cards refresh smoothly.
|
| 423 |
+
|
| 424 |
+
_TQDM_PROGRESS_RE = re.compile(r"\b(\d+)\s*/\s*(\d+)\s*\[")
|
| 425 |
+
|
| 426 |
+
|
| 427 |
+
def _parse_training_log_dicts(text: str) -> List[Dict[str, Any]]:
|
| 428 |
+
rows: List[Dict[str, Any]] = []
|
| 429 |
+
last_step: Optional[int] = None
|
| 430 |
+
for raw in text.splitlines():
|
| 431 |
+
m = _TQDM_PROGRESS_RE.search(raw)
|
| 432 |
+
if m:
|
| 433 |
+
try:
|
| 434 |
+
last_step = int(m.group(1))
|
| 435 |
+
except ValueError:
|
| 436 |
+
pass
|
| 437 |
+
continue
|
| 438 |
+
s = raw.strip()
|
| 439 |
+
if not (s.startswith("{") and s.endswith("}")):
|
| 440 |
+
continue
|
| 441 |
+
if "'loss'" not in s and "'reward'" not in s and "'kl'" not in s:
|
| 442 |
+
continue
|
| 443 |
+
try:
|
| 444 |
+
d = ast.literal_eval(s)
|
| 445 |
+
except (ValueError, SyntaxError):
|
| 446 |
+
continue
|
| 447 |
+
if not isinstance(d, dict):
|
| 448 |
+
continue
|
| 449 |
+
reward = (
|
| 450 |
+
d.get("reward")
|
| 451 |
+
or d.get("rewards/mean")
|
| 452 |
+
or d.get("rewards/reward_fn/mean")
|
| 453 |
+
)
|
| 454 |
+
reward_std = (
|
| 455 |
+
d.get("reward_std")
|
| 456 |
+
or d.get("rewards/std")
|
| 457 |
+
or d.get("rewards/reward_fn/std")
|
| 458 |
+
)
|
| 459 |
+
rows.append({
|
| 460 |
+
"step": last_step if last_step is not None else len(rows),
|
| 461 |
+
"loss": d.get("loss"),
|
| 462 |
+
"reward": reward,
|
| 463 |
+
"reward_std": reward_std,
|
| 464 |
+
"kl": d.get("kl"),
|
| 465 |
+
"grad_norm": d.get("grad_norm"),
|
| 466 |
+
"learning_rate": d.get("learning_rate"),
|
| 467 |
+
"epoch": d.get("epoch"),
|
| 468 |
+
"frac_reward_zero_std": d.get("frac_reward_zero_std"),
|
| 469 |
+
"completions_mean_length": d.get("completions/mean_length"),
|
| 470 |
+
"completions_clipped_ratio": d.get("completions/clipped_ratio"),
|
| 471 |
+
})
|
| 472 |
+
return rows
|
| 473 |
+
|
| 474 |
+
|
| 475 |
+
def _try_matplotlib():
|
| 476 |
+
try:
|
| 477 |
+
import matplotlib # type: ignore
|
| 478 |
+
matplotlib.use("Agg")
|
| 479 |
+
import matplotlib.pyplot as plt # type: ignore
|
| 480 |
+
return plt
|
| 481 |
+
except Exception as exc: # pragma: no cover - plotting is best-effort
|
| 482 |
+
logger.warning("matplotlib unavailable: %s", exc)
|
| 483 |
+
return None
|
| 484 |
+
|
| 485 |
+
|
| 486 |
+
def _png_bytes(fig) -> bytes:
|
| 487 |
+
buf = io.BytesIO()
|
| 488 |
+
fig.savefig(buf, format="png", dpi=140)
|
| 489 |
+
return buf.getvalue()
|
| 490 |
+
|
| 491 |
+
|
| 492 |
+
def _read_log_text() -> Optional[str]:
|
| 493 |
+
if not LOG_FILE.exists():
|
| 494 |
+
return None
|
| 495 |
+
try:
|
| 496 |
+
return LOG_FILE.read_text(errors="replace")
|
| 497 |
+
except OSError:
|
| 498 |
+
return None
|
| 499 |
+
|
| 500 |
+
|
| 501 |
+
def _synth_training_curve_png() -> Optional[bytes]:
|
| 502 |
+
text = _read_log_text()
|
| 503 |
+
if not text:
|
| 504 |
+
return None
|
| 505 |
+
rows = _parse_training_log_dicts(text)
|
| 506 |
+
if not rows:
|
| 507 |
+
return None
|
| 508 |
+
plt = _try_matplotlib()
|
| 509 |
+
if plt is None:
|
| 510 |
+
return None
|
| 511 |
+
|
| 512 |
+
steps = [r["step"] for r in rows]
|
| 513 |
+
rewards = [(s, r["reward"]) for s, r in zip(steps, rows) if r["reward"] is not None]
|
| 514 |
+
losses = [(s, r["loss"]) for s, r in zip(steps, rows) if r["loss"] is not None]
|
| 515 |
+
|
| 516 |
+
fig, axes = plt.subplots(2, 1, figsize=(8, 6), sharex=True)
|
| 517 |
+
if rewards:
|
| 518 |
+
axes[0].plot([x for x, _ in rewards], [y for _, y in rewards],
|
| 519 |
+
lw=1.6, color="#1d4ed8")
|
| 520 |
+
axes[0].set_ylabel("mean reward")
|
| 521 |
+
axes[0].set_title(
|
| 522 |
+
"DrugEnv GRPO training — reward over steps "
|
| 523 |
+
f"(synthesised from {len(rewards)} log events)"
|
| 524 |
+
)
|
| 525 |
+
axes[0].grid(alpha=0.25)
|
| 526 |
+
if losses:
|
| 527 |
+
axes[1].plot([x for x, _ in losses], [y for _, y in losses],
|
| 528 |
+
lw=1.6, color="#c026d3")
|
| 529 |
+
axes[1].set_ylabel("GRPO loss")
|
| 530 |
+
axes[1].set_xlabel("training step")
|
| 531 |
+
axes[1].grid(alpha=0.25)
|
| 532 |
+
fig.tight_layout()
|
| 533 |
+
try:
|
| 534 |
+
return _png_bytes(fig)
|
| 535 |
+
finally:
|
| 536 |
+
plt.close(fig)
|
| 537 |
+
|
| 538 |
+
|
| 539 |
+
def _synth_reward_components_png() -> Optional[bytes]:
|
| 540 |
+
text = _read_log_text()
|
| 541 |
+
if not text:
|
| 542 |
+
return None
|
| 543 |
+
rows = _parse_training_log_dicts(text)
|
| 544 |
+
if not rows:
|
| 545 |
+
return None
|
| 546 |
+
plt = _try_matplotlib()
|
| 547 |
+
if plt is None:
|
| 548 |
+
return None
|
| 549 |
+
|
| 550 |
+
steps = [r["step"] for r in rows]
|
| 551 |
+
rmean = [r.get("reward") for r in rows]
|
| 552 |
+
rstd = [r.get("reward_std") for r in rows]
|
| 553 |
+
kls = [r.get("kl") for r in rows]
|
| 554 |
+
fzero = [r.get("frac_reward_zero_std") for r in rows]
|
| 555 |
+
clen = [r.get("completions_mean_length") for r in rows]
|
| 556 |
+
|
| 557 |
+
fig, axes = plt.subplots(2, 1, figsize=(8, 6.5), sharex=True)
|
| 558 |
+
band = [(s, m, sd) for s, m, sd in zip(steps, rmean, rstd) if m is not None]
|
| 559 |
+
if band:
|
| 560 |
+
sx = [b[0] for b in band]
|
| 561 |
+
rm = [b[1] for b in band]
|
| 562 |
+
rs = [b[2] if b[2] is not None else 0.0 for b in band]
|
| 563 |
+
axes[0].plot(sx, rm, lw=2.0, color="#0f172a", label="reward (group mean)")
|
| 564 |
+
axes[0].fill_between(
|
| 565 |
+
sx,
|
| 566 |
+
[m - s for m, s in zip(rm, rs)],
|
| 567 |
+
[m + s for m, s in zip(rm, rs)],
|
| 568 |
+
alpha=0.18, color="#1d4ed8", label="±1 std (group dispersion)",
|
| 569 |
+
)
|
| 570 |
+
axes[0].set_ylabel("reward at logging step")
|
| 571 |
+
axes[0].set_title(
|
| 572 |
+
"DrugEnv reward — group mean ± dispersion "
|
| 573 |
+
"(stdout-derived; install EvidenceCallback for terminal vs shaping split)"
|
| 574 |
+
)
|
| 575 |
+
axes[0].grid(alpha=0.25)
|
| 576 |
+
axes[0].legend(loc="lower right", fontsize=9)
|
| 577 |
+
|
| 578 |
+
kl_pts = [(s, k) for s, k in zip(steps, kls) if k is not None]
|
| 579 |
+
if kl_pts:
|
| 580 |
+
axes[1].plot([p[0] for p in kl_pts], [p[1] for p in kl_pts],
|
| 581 |
+
lw=1.5, color="#9333ea", label="KL divergence")
|
| 582 |
+
axes[1].set_ylabel("KL", color="#9333ea")
|
| 583 |
+
fz_pts = [(s, f) for s, f in zip(steps, fzero) if f is not None]
|
| 584 |
+
cl_pts = [(s, c) for s, c in zip(steps, clen) if c is not None]
|
| 585 |
+
if fz_pts or cl_pts:
|
| 586 |
+
ax2 = axes[1].twinx()
|
| 587 |
+
if fz_pts:
|
| 588 |
+
ax2.plot([p[0] for p in fz_pts], [p[1] for p in fz_pts],
|
| 589 |
+
"o-", lw=1.0, ms=3, color="#ea580c",
|
| 590 |
+
label="frac rollouts with zero-std (saturation)")
|
| 591 |
+
ax2.set_ylim(-0.02, 1.05)
|
| 592 |
+
if cl_pts:
|
| 593 |
+
cmax = max(p[1] for p in cl_pts) or 1.0
|
| 594 |
+
ax2.plot([p[0] for p in cl_pts], [p[1] / cmax for p in cl_pts],
|
| 595 |
+
"x:", lw=1.0, ms=4, color="#16a34a",
|
| 596 |
+
label=f"completion mean length / {cmax:.0f}")
|
| 597 |
+
ax2.set_ylabel("auxiliary (right axis, normalised)", color="#475569")
|
| 598 |
+
ax2.legend(loc="upper right", fontsize=8)
|
| 599 |
+
axes[1].set_xlabel("training step")
|
| 600 |
+
axes[1].grid(alpha=0.25)
|
| 601 |
+
fig.tight_layout()
|
| 602 |
+
try:
|
| 603 |
+
return _png_bytes(fig)
|
| 604 |
+
finally:
|
| 605 |
+
plt.close(fig)
|
| 606 |
+
|
| 607 |
+
|
| 608 |
+
_SYNTH_HANDLERS = {
|
| 609 |
+
"training_curve.png": _synth_training_curve_png,
|
| 610 |
+
"reward_components.png": _synth_reward_components_png,
|
| 611 |
+
}
|
| 612 |
+
|
| 613 |
+
|
| 614 |
+
# ── FastAPI app ──────────────────────────────────────────────────────────
|
| 615 |
+
|
| 616 |
+
|
| 617 |
+
app = FastAPI(title="DrugEnv Trainer", version="0.1.0")
|
| 618 |
+
|
| 619 |
+
|
| 620 |
+
_HTML = """\
|
| 621 |
+
<!doctype html>
|
| 622 |
+
<html lang=en>
|
| 623 |
+
<head>
|
| 624 |
+
<meta charset=utf-8>
|
| 625 |
+
<title>DrugEnv Trainer</title>
|
| 626 |
+
<meta http-equiv="refresh" content="60">
|
| 627 |
+
<style>
|
| 628 |
+
body { font-family: ui-sans-serif, system-ui, sans-serif; margin: 2rem auto;
|
| 629 |
+
max-width: 1000px; color:#111; padding: 0 1rem; line-height:1.5 }
|
| 630 |
+
h1 { margin-bottom: 0 }
|
| 631 |
+
h2 { margin-top: 2rem; border-bottom:1px solid #eee; padding-bottom:.25rem }
|
| 632 |
+
.muted { color:#666 }
|
| 633 |
+
pre { background:#0e1116; color:#e6edf3; padding:1rem; border-radius:6px;
|
| 634 |
+
overflow-x:auto; max-height:40vh; font-size:.85em }
|
| 635 |
+
button { font-size:1rem; padding:.6rem 1rem; border-radius:6px; border:1px solid #888;
|
| 636 |
+
background:#fff; cursor:pointer; margin-right:.4rem }
|
| 637 |
+
.pill { display:inline-block; padding:.1rem .55rem; border-radius:999px;
|
| 638 |
+
background:#eef; color:#225; font-size:.85em }
|
| 639 |
+
.ok { background:#dfd; color:#272 }
|
| 640 |
+
.fail { background:#fdd; color:#822 }
|
| 641 |
+
.run { background:#fdf6d8; color:#774 }
|
| 642 |
+
table { border-collapse:collapse; margin:.5rem 0 }
|
| 643 |
+
td, th { padding:.25rem .8rem .25rem 0; vertical-align: top; text-align:left }
|
| 644 |
+
th { color:#444; font-weight:600 }
|
| 645 |
+
.grid { display:grid; grid-template-columns:1fr 1fr; gap:1rem }
|
| 646 |
+
.card { border:1px solid #e5e7eb; border-radius:8px; padding:.75rem; background:#fafafa }
|
| 647 |
+
.card img { max-width:100%; border-radius:4px }
|
| 648 |
+
.delta-pos { color:#15803d; font-weight:600 }
|
| 649 |
+
.delta-neg { color:#b91c1c; font-weight:600 }
|
| 650 |
+
code { background:#f4f4f4; padding:.05rem .35rem; border-radius:4px }
|
| 651 |
+
a { color:#1d4ed8 }
|
| 652 |
+
</style>
|
| 653 |
+
</head>
|
| 654 |
+
<body>
|
| 655 |
+
<h1>🧬 DrugEnv Trainer</h1>
|
| 656 |
+
<p class=muted>GRPO + LoRA on the DrugEnv drug-target-validation environment.
|
| 657 |
+
Expected hardware: <code>__HW__</code>. H200 ≈ 4× A100 throughput,
|
| 658 |
+
~$0.05–0.10 per step on small models like Qwen2.5-3B.</p>
|
| 659 |
+
|
| 660 |
+
<h2>Run status</h2>
|
| 661 |
+
<p>Status: <span id=status class=pill>?</span></p>
|
| 662 |
+
<table id=meta></table>
|
| 663 |
+
<p>
|
| 664 |
+
<button onclick="startRun()">▶ Start training</button>
|
| 665 |
+
<button onclick="refresh()">↻ Refresh</button>
|
| 666 |
+
<a href="/evidence" target=_blank><button>📁 Evidence index</button></a>
|
| 667 |
+
<a href="/docs" target=_blank><button>🛠 API</button></a>
|
| 668 |
+
</p>
|
| 669 |
+
|
| 670 |
+
<h2>Training-progress evidence</h2>
|
| 671 |
+
<p class=muted>Auto-updated as training runs. All artifacts are also saved to <code>evidence/</code> and pushed to the model repo on the Hub.</p>
|
| 672 |
+
<div class=grid>
|
| 673 |
+
<div class=card><b>Per-step training curve</b><br>
|
| 674 |
+
<img id=curve src="/evidence/training_curve.png" onerror="this.style.display='none'">
|
| 675 |
+
<div id=curve_missing class=muted style="display:none">(not yet — waiting for first GRPO step)</div>
|
| 676 |
+
</div>
|
| 677 |
+
<div class=card><b>Reward components (terminal vs shaping)</b><br>
|
| 678 |
+
<img id=components src="/evidence/reward_components.png" onerror="this.style.display='none'">
|
| 679 |
+
<div id=components_missing class=muted style="display:none">(populated after a few rollouts — watches verifier hacks)</div>
|
| 680 |
+
</div>
|
| 681 |
+
<div class=card><b>Mid-training checkpoint progression</b><br>
|
| 682 |
+
<img id=ckpt src="/evidence/checkpoint_progression.png" onerror="this.style.display='none'">
|
| 683 |
+
<div id=ckpt_missing class=muted style="display:none">(not yet — waiting for first checkpoint eval)</div>
|
| 684 |
+
</div>
|
| 685 |
+
<div class=card><b>Before vs after summary</b><br>
|
| 686 |
+
<img id=summary src="/evidence/before_after_summary.png" onerror="this.style.display='none'">
|
| 687 |
+
<div id=summary_missing class=muted style="display:none">(generated after post-train eval)</div>
|
| 688 |
+
</div>
|
| 689 |
+
<div class=card><b>Reward distribution: pre vs post</b><br>
|
| 690 |
+
<img id=dist src="/evidence/reward_distribution.png" onerror="this.style.display='none'">
|
| 691 |
+
<div id=dist_missing class=muted style="display:none">(generated after post-train eval)</div>
|
| 692 |
+
</div>
|
| 693 |
+
<div class=card><b>Decision accuracy progression</b><br>
|
| 694 |
+
<img id=decision src="/evidence/checkpoint_progression.png" onerror="this.style.display='none'">
|
| 695 |
+
<div id=decision_missing class=muted style="display:none">(progression chart includes decision accuracy line)</div>
|
| 696 |
+
</div>
|
| 697 |
+
<div class=card><b>Evidence coverage progression</b><br>
|
| 698 |
+
<img id=coverage src="/evidence/checkpoint_progression.png" onerror="this.style.display='none'">
|
| 699 |
+
<div id=coverage_missing class=muted style="display:none">(progression chart includes evidence coverage line)</div>
|
| 700 |
+
</div>
|
| 701 |
+
<div class=card><b>Warm-start (SFT)</b><br>
|
| 702 |
+
<div id=sft_card class=muted>(SFT_WARMSTART disabled — set the env var to enable)</div>
|
| 703 |
+
</div>
|
| 704 |
+
</div>
|
| 705 |
+
|
| 706 |
+
<h2>Before / after metrics</h2>
|
| 707 |
+
<table id=metrics_table>
|
| 708 |
+
<tr><th>metric</th><th>pre</th><th>post</th><th>Δ</th></tr>
|
| 709 |
+
</table>
|
| 710 |
+
|
| 711 |
+
<h2>Live logs (tail)</h2>
|
| 712 |
+
<pre id=logs>loading…</pre>
|
| 713 |
+
|
| 714 |
+
<script>
|
| 715 |
+
function fmt(v) {
|
| 716 |
+
if (v == null) return '–';
|
| 717 |
+
if (typeof v === 'number') return v.toFixed(3);
|
| 718 |
+
return v;
|
| 719 |
+
}
|
| 720 |
+
function fmtDelta(d) {
|
| 721 |
+
if (d == null || isNaN(d)) return '–';
|
| 722 |
+
const sign = d >= 0 ? '+' : '';
|
| 723 |
+
const cls = d >= 0 ? 'delta-pos' : 'delta-neg';
|
| 724 |
+
return `<span class="${cls}">${sign}${d.toFixed(3)}</span>`;
|
| 725 |
+
}
|
| 726 |
+
|
| 727 |
+
async function refresh() {
|
| 728 |
+
const s = await fetch('/status').then(r => r.json());
|
| 729 |
+
const pill = document.getElementById('status');
|
| 730 |
+
pill.textContent = s.status;
|
| 731 |
+
pill.className = 'pill ' + ({idle:'',running:'run',finished:'ok',failed:'fail'}[s.status] || '');
|
| 732 |
+
|
| 733 |
+
const meta = document.getElementById('meta');
|
| 734 |
+
meta.innerHTML = '';
|
| 735 |
+
const obj = {
|
| 736 |
+
started_at: s.started_at, finished_at: s.finished_at, error: s.last_error,
|
| 737 |
+
expected_hardware: s.expected_hardware,
|
| 738 |
+
...(s.last_config || {}),
|
| 739 |
+
};
|
| 740 |
+
for (const [k, v] of Object.entries(obj)) {
|
| 741 |
+
if (v == null || v === '') continue;
|
| 742 |
+
const tr = document.createElement('tr');
|
| 743 |
+
tr.innerHTML = `<td><b>${k}</b></td><td><code>${v}</code></td>`;
|
| 744 |
+
meta.appendChild(tr);
|
| 745 |
+
}
|
| 746 |
+
|
| 747 |
+
const m = await fetch('/metrics').then(r => r.json()).catch(() => ({pre:null, post:null}));
|
| 748 |
+
const tbody = document.getElementById('metrics_table');
|
| 749 |
+
tbody.innerHTML = '<tr><th>metric</th><th>pre</th><th>post</th><th>Δ</th></tr>';
|
| 750 |
+
const fields = [
|
| 751 |
+
'mean_reward', 'success_rate', 'decision_accuracy_rate',
|
| 752 |
+
'evidence_coverage_rate', 'median_reward',
|
| 753 |
+
];
|
| 754 |
+
for (const f of fields) {
|
| 755 |
+
const pre = m.pre && m.pre[f];
|
| 756 |
+
const post = m.post && m.post[f];
|
| 757 |
+
const delta = m.delta && m.delta[f];
|
| 758 |
+
const tr = document.createElement('tr');
|
| 759 |
+
tr.innerHTML = `<td><code>${f}</code></td><td>${fmt(pre)}</td><td>${fmt(post)}</td><td>${fmtDelta(delta)}</td>`;
|
| 760 |
+
tbody.appendChild(tr);
|
| 761 |
+
}
|
| 762 |
+
|
| 763 |
+
const bust = '?t=' + Date.now();
|
| 764 |
+
for (const [imgId, missingId] of [
|
| 765 |
+
['curve', 'curve_missing'],
|
| 766 |
+
['components', 'components_missing'],
|
| 767 |
+
['ckpt', 'ckpt_missing'],
|
| 768 |
+
['summary', 'summary_missing'],
|
| 769 |
+
['dist', 'dist_missing'],
|
| 770 |
+
['decision', 'decision_missing'],
|
| 771 |
+
['coverage', 'coverage_missing'],
|
| 772 |
+
]) {
|
| 773 |
+
const img = document.getElementById(imgId);
|
| 774 |
+
const miss = document.getElementById(missingId);
|
| 775 |
+
const baseSrc = img.getAttribute('src').split('?')[0];
|
| 776 |
+
const probe = new Image();
|
| 777 |
+
probe.onload = () => { img.src = baseSrc + bust; img.style.display=''; miss.style.display='none'; };
|
| 778 |
+
probe.onerror = () => { img.style.display='none'; miss.style.display=''; };
|
| 779 |
+
probe.src = baseSrc + bust;
|
| 780 |
+
}
|
| 781 |
+
|
| 782 |
+
const sft_resp = await fetch('/sft_summary');
|
| 783 |
+
const sft_card = document.getElementById('sft_card');
|
| 784 |
+
if (sft_resp.ok) {
|
| 785 |
+
try {
|
| 786 |
+
const sft = await sft_resp.json();
|
| 787 |
+
sft_card.classList.remove('muted');
|
| 788 |
+
sft_card.innerHTML =
|
| 789 |
+
`<table>` +
|
| 790 |
+
`<tr><td><b>final loss</b></td><td><code>${fmt(sft.final_loss)}</code></td></tr>` +
|
| 791 |
+
`<tr><td><b>oracle success</b></td><td><code>${fmt(sft.oracle_success_rate)}</code></td></tr>` +
|
| 792 |
+
`<tr><td><b>transitions trained</b></td><td><code>${sft.num_train_rows ?? '–'}</code></td></tr>` +
|
| 793 |
+
`<tr><td><b>duration</b></td><td><code>${fmt(sft.duration_s)} s</code></td></tr>` +
|
| 794 |
+
`<tr><td><b>base → SFT dir</b></td><td><code>${sft.base_model} → ${sft.out_dir}</code></td></tr>` +
|
| 795 |
+
`</table>`;
|
| 796 |
+
} catch (e) { /* keep placeholder */ }
|
| 797 |
+
}
|
| 798 |
+
|
| 799 |
+
const logs = await fetch('/logs?tail=200').then(r => r.text());
|
| 800 |
+
document.getElementById('logs').textContent = logs || '(no logs yet)';
|
| 801 |
+
}
|
| 802 |
+
async function startRun() {
|
| 803 |
+
const r = await fetch('/train', {method:'POST'});
|
| 804 |
+
if (!r.ok) alert((await r.json()).detail || 'failed');
|
| 805 |
+
setTimeout(refresh, 500);
|
| 806 |
+
}
|
| 807 |
+
refresh();
|
| 808 |
+
setInterval(refresh, 5000);
|
| 809 |
+
</script>
|
| 810 |
+
</body>
|
| 811 |
+
</html>
|
| 812 |
+
""".replace("__HW__", EXPECTED_HARDWARE)
|
| 813 |
+
|
| 814 |
+
|
| 815 |
+
@app.get("/", response_class=HTMLResponse)
|
| 816 |
+
def index() -> HTMLResponse:
|
| 817 |
+
return HTMLResponse(_HTML)
|
| 818 |
+
|
| 819 |
+
|
| 820 |
+
@app.get("/health")
|
| 821 |
+
def health() -> Dict[str, str]:
|
| 822 |
+
return {"status": "ok"}
|
| 823 |
+
|
| 824 |
+
|
| 825 |
+
@app.get("/status")
|
| 826 |
+
def status() -> JSONResponse:
|
| 827 |
+
return JSONResponse(STATE.to_dict())
|
| 828 |
+
|
| 829 |
+
|
| 830 |
+
@app.get("/metrics")
|
| 831 |
+
def metrics() -> JSONResponse:
|
| 832 |
+
if METRICS_FILE.exists():
|
| 833 |
+
try:
|
| 834 |
+
return JSONResponse(json.loads(METRICS_FILE.read_text()))
|
| 835 |
+
except Exception:
|
| 836 |
+
return JSONResponse({"error": "metrics file unreadable"}, status_code=500)
|
| 837 |
+
return JSONResponse({"pre": None, "post": None, "delta": None})
|
| 838 |
+
|
| 839 |
+
|
| 840 |
+
@app.get("/sft_summary")
|
| 841 |
+
def sft_summary() -> JSONResponse:
|
| 842 |
+
"""Return the SFT warm-start summary if it exists.
|
| 843 |
+
|
| 844 |
+
Powers the dashboard's "Warm-start (SFT)" card.
|
| 845 |
+
"""
|
| 846 |
+
path = EVIDENCE_DIR / "sft_summary.json"
|
| 847 |
+
if path.exists():
|
| 848 |
+
try:
|
| 849 |
+
return JSONResponse(json.loads(path.read_text()))
|
| 850 |
+
except Exception:
|
| 851 |
+
return JSONResponse({"error": "sft_summary unreadable"}, status_code=500)
|
| 852 |
+
return JSONResponse({}, status_code=404)
|
| 853 |
+
|
| 854 |
+
|
| 855 |
+
@app.get("/evidence")
|
| 856 |
+
def evidence_index() -> JSONResponse:
|
| 857 |
+
"""List every evidence artifact currently on disk."""
|
| 858 |
+
files = []
|
| 859 |
+
if EVIDENCE_DIR.exists():
|
| 860 |
+
for p in sorted(EVIDENCE_DIR.iterdir()):
|
| 861 |
+
if p.is_file():
|
| 862 |
+
files.append({
|
| 863 |
+
"name": p.name,
|
| 864 |
+
"size": p.stat().st_size,
|
| 865 |
+
"url": f"/evidence/{p.name}",
|
| 866 |
+
})
|
| 867 |
+
return JSONResponse({"dir": str(EVIDENCE_DIR), "files": files})
|
| 868 |
+
|
| 869 |
+
|
| 870 |
+
@app.get("/evidence/{name}")
|
| 871 |
+
def evidence_file(name: str):
|
| 872 |
+
"""Serve a single evidence artifact (PNG/CSV/JSON/MD) by filename.
|
| 873 |
+
|
| 874 |
+
For ``training_curve.png`` and ``reward_components.png`` we fall back
|
| 875 |
+
to on-demand synthesis from the captured TRL stdout log when the
|
| 876 |
+
underlying file does not yet exist on disk.
|
| 877 |
+
"""
|
| 878 |
+
if "/" in name or ".." in name:
|
| 879 |
+
raise HTTPException(status_code=400, detail="invalid name")
|
| 880 |
+
target = EVIDENCE_DIR / name
|
| 881 |
+
if target.exists() and target.is_file():
|
| 882 |
+
return FileResponse(target)
|
| 883 |
+
|
| 884 |
+
handler = _SYNTH_HANDLERS.get(name)
|
| 885 |
+
if handler is not None:
|
| 886 |
+
try:
|
| 887 |
+
png = handler()
|
| 888 |
+
except Exception as exc: # pragma: no cover - synthesis is best-effort
|
| 889 |
+
logger.warning("on-demand synthesis of %s failed: %s", name, exc)
|
| 890 |
+
png = None
|
| 891 |
+
if png:
|
| 892 |
+
return Response(
|
| 893 |
+
content=png,
|
| 894 |
+
media_type="image/png",
|
| 895 |
+
headers={"Cache-Control": "no-store, max-age=0"},
|
| 896 |
+
)
|
| 897 |
+
raise HTTPException(status_code=404, detail=f"{name} not found")
|
| 898 |
+
|
| 899 |
+
|
| 900 |
+
@app.get("/logs", response_class=PlainTextResponse)
|
| 901 |
+
def logs(tail: int = 400) -> PlainTextResponse:
|
| 902 |
+
if not LOG_FILE.exists():
|
| 903 |
+
return PlainTextResponse("")
|
| 904 |
+
text = LOG_FILE.read_text()
|
| 905 |
+
lines = text.splitlines()
|
| 906 |
+
return PlainTextResponse("\n".join(lines[-max(tail, 1):]))
|
| 907 |
+
|
| 908 |
+
|
| 909 |
+
@app.post("/train")
|
| 910 |
+
async def train(request: Request) -> JSONResponse:
|
| 911 |
+
"""Start a training run.
|
| 912 |
+
|
| 913 |
+
The request body (JSON) is merged into the global ``CONFIG`` for
|
| 914 |
+
*this* run only, so future API-only triggers can flip
|
| 915 |
+
``sft_warmstart`` (or any other config key) without redeploying
|
| 916 |
+
the Space.
|
| 917 |
+
"""
|
| 918 |
+
overrides: Dict[str, Any] = {}
|
| 919 |
+
try:
|
| 920 |
+
body = await request.body()
|
| 921 |
+
if body:
|
| 922 |
+
overrides = json.loads(body)
|
| 923 |
+
if not isinstance(overrides, dict):
|
| 924 |
+
raise ValueError("request body must be a JSON object")
|
| 925 |
+
except (ValueError, json.JSONDecodeError) as exc:
|
| 926 |
+
raise HTTPException(status_code=400, detail=f"bad request body: {exc}")
|
| 927 |
+
cfg = dict(CONFIG)
|
| 928 |
+
cfg.update(overrides)
|
| 929 |
+
try:
|
| 930 |
+
_start_training(cfg)
|
| 931 |
+
except RuntimeError as exc:
|
| 932 |
+
raise HTTPException(status_code=409, detail=str(exc))
|
| 933 |
+
return JSONResponse({"status": "started", "config": cfg})
|
| 934 |
+
|
| 935 |
+
|
| 936 |
+
@app.on_event("startup")
|
| 937 |
+
def _maybe_autostart() -> None:
|
| 938 |
+
if CONFIG["autostart"]:
|
| 939 |
+
try:
|
| 940 |
+
_start_training(dict(CONFIG))
|
| 941 |
+
logger.info("autostarted training run")
|
| 942 |
+
except RuntimeError as exc:
|
| 943 |
+
logger.warning("autostart skipped: %s", exc)
|
space/training/requirements.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
-r ../../requirements-train.txt
|