Commit ·
c0a7163
1
Parent(s): 5af70c3
Add project files
Browse files- .dockerignore +3 -0
- .gitattributes +1 -0
- AGENTS.md +69 -38
- Dockerfile +3 -0
- Dockerfile.hf +3 -0
- PROJECT_OVERVIEW.md +80 -35
- README.md +97 -23
- conftest.py +1 -1
- data/knowledge_base/README.md +5 -3
- docker-compose.yml +3 -0
- docker-entrypoint.sh +30 -0
- requirements.txt +2 -0
- src/agents/orchestrator.py +217 -10
- src/agents/prompts.py +1 -0
- src/agents/routing.py +81 -0
- src/agents/schemas.py +4 -1
- src/agents/tools.py +2 -1
- src/api/routes.py +77 -8
- src/api/schemas.py +36 -0
- src/frontend/app.py +49 -0
- src/models/mri_model.py +149 -0
- tests/agents/test_agent_route.py +1 -1
- tests/agents/test_orchestrator.py +70 -0
- tests/agents/test_tools.py +25 -1
- tests/api/test_routes.py +68 -0
- tests/fixtures/build_dummy_mri_onnx.py +20 -0
- tests/models/test_mri_model.py +54 -0
.dockerignore
CHANGED
|
@@ -11,6 +11,9 @@ mlruns/
|
|
| 11 |
.github/
|
| 12 |
docs/
|
| 13 |
tests/
|
|
|
|
|
|
|
| 14 |
!tests/fixtures/
|
|
|
|
| 15 |
.streamlit/
|
| 16 |
notebooks/
|
|
|
|
| 11 |
.github/
|
| 12 |
docs/
|
| 13 |
tests/
|
| 14 |
+
!tests/
|
| 15 |
+
tests/*
|
| 16 |
!tests/fixtures/
|
| 17 |
+
!tests/fixtures/**
|
| 18 |
.streamlit/
|
| 19 |
notebooks/
|
.gitattributes
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
*.sh text eol=lf
|
AGENTS.md
CHANGED
|
@@ -50,10 +50,14 @@ All experiment runs are tracked in **MLflow**. All services ship as **Docker** i
|
|
| 50 |
│ │ ├── storage.py # Parquet read/write helpers (snappy, single-threaded, deterministic)
|
| 51 |
│ │ └── tracking.py # MLflow `track_pipeline_run` context manager (see §7)
|
| 52 |
│ ├── pipelines/ # One file per modality. Pure functions + a `run_pipeline()` entry.
|
| 53 |
-
│ ├── models/ # Downstream decision-layer models
|
| 54 |
-
│ │
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
│ └── frontend/
|
| 56 |
-
│ └── app.py # Streamlit dashboard
|
| 57 |
└── tests/
|
| 58 |
├── core/
|
| 59 |
├── api/
|
|
@@ -148,31 +152,43 @@ The repo-wide `conftest.py` autouse fixture pins `MLFLOW_TRACKING_URI` to a tmp
|
|
| 148 |
## 8. Decision Layer (Downstream Models)
|
| 149 |
|
| 150 |
Pipelines produce features (`data/processed/<modality>_features.parquet`).
|
| 151 |
-
Downstream models live in `src/models/` and consume
|
|
|
|
| 152 |
|
| 153 |
| Model | File | Output | Endpoint |
|
| 154 |
|---|---|---|---|
|
| 155 |
| BBB permeability | `src/models/bbb_model.py` | `data/processed/bbb_model.joblib` | `POST /predict/bbb` |
|
|
|
|
| 156 |
|
| 157 |
-
|
| 158 |
- `train(df, label_col, ...)` → fitted classifier
|
| 159 |
- `save(model, path)` / `load(path)` → joblib artifact I/O
|
| 160 |
- `predict_with_proba(model, smiles)` → `{label, confidence}` (confidence is the max-class probability)
|
| 161 |
- `explain_prediction(model, smiles, top_k)` → SHAP top-k attributions sorted by `|shap_value|` descending
|
| 162 |
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
|
| 173 |
**Override `BBB_MODEL_PATH`** env var to point the API at a non-default
|
| 174 |
artifact location (used by tests for tmp_path isolation).
|
| 175 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
**Calibration metadata** (Day 6): `train()` does an 80/20 stratified split,
|
| 177 |
computes precision-at-confidence-threshold bins on the held-out test set,
|
| 178 |
and stashes them on `model._neurobridge_calibration: list[dict]` (sorted
|
|
@@ -282,8 +298,9 @@ metrics, params). `POST /experiments/diff {run_id_a, run_id_b}`
|
|
| 282 |
returns a side-by-side metric+param diff (`RunDiffRow`).
|
| 283 |
|
| 284 |
When `NEUROBRIDGE_DISABLE_MLFLOW=1`, both endpoints return empty
|
| 285 |
-
responses without raising —
|
| 286 |
-
|
|
|
|
| 287 |
|
| 288 |
The Streamlit "Experiments" tab is the user-facing surface. Cached
|
| 289 |
in session state with an explicit Refresh button.
|
|
@@ -293,15 +310,22 @@ in session state with an explicit Refresh button.
|
|
| 293 |
`Dockerfile.hf` is the Hugging Face Spaces image. Single container,
|
| 294 |
two processes (FastAPI :8000 + Streamlit :7860) launched via
|
| 295 |
`supervisord.conf`. Build-time `RUN python -m src.models.bbb_model`
|
| 296 |
-
bakes the model artifact into the image so the first `/predict/bbb`
|
| 297 |
-
call is instant on cold start.
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
`
|
| 301 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
`OPENROUTER_API_KEY` is configured in the Space's Secrets panel. Set
|
| 303 |
-
`NEUROBRIDGE_DISABLE_LLM=1` only when you want to force the
|
| 304 |
-
|
| 305 |
|
| 306 |
The README's YAML front-matter declares the Space metadata
|
| 307 |
(SDK=docker, port=7860, app_file=src/frontend/app.py).
|
|
@@ -309,24 +333,30 @@ The README's YAML front-matter declares the Space metadata
|
|
| 309 |
## 15. Orchestrator Agent Surface
|
| 310 |
|
| 311 |
`src/agents/orchestrator.py` exposes a single-agent function-calling
|
| 312 |
-
loop over the openai SDK (no LangChain / framework dep). The
|
| 313 |
-
|
|
|
|
|
|
|
|
|
|
| 314 |
|
| 315 |
- `run_bbb_pipeline(smiles, top_k)` — wraps `POST /predict/bbb`
|
| 316 |
- `run_eeg_pipeline(input_path)` — wraps `POST /pipeline/eeg`
|
| 317 |
-
- `run_mri_pipeline(input_dir, sites_csv)` — wraps `POST /pipeline/mri`
|
|
|
|
| 318 |
- `retrieve_context(query, k)` — wraps `src/rag/retrieve.py`
|
| 319 |
|
| 320 |
The system prompt (`src/agents/prompts.py:ORCHESTRATOR_SYSTEM_PROMPT`)
|
| 321 |
-
|
| 322 |
-
focused retrieval query → call retrieve_context → synthesize a
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
`
|
| 328 |
-
|
| 329 |
-
|
|
|
|
|
|
|
| 330 |
|
| 331 |
Diagnostics: `GET /diag/agent` returns key presence, configured model,
|
| 332 |
RAG index status (chunk count), and the registered tool names.
|
|
@@ -345,9 +375,10 @@ user-supplied `.md` / `.txt` / `.pdf`). Build the FAISS index with:
|
|
| 345 |
|
| 346 |
Defaults: input=`data/knowledge_base/`, output=`data/processed/faiss_index/`.
|
| 347 |
The Dockerfile runs this at build time so deployed Spaces start with
|
| 348 |
-
a populated index.
|
| 349 |
-
|
| 350 |
-
|
|
|
|
| 351 |
|
| 352 |
`tests/fixtures/kb_sample/` ships 3 seed markdown files (Lipinski,
|
| 353 |
ComBat, MNE+ICA) — these double as test fixtures and as the demo
|
|
|
|
| 50 |
│ │ ├── storage.py # Parquet read/write helpers (snappy, single-threaded, deterministic)
|
| 51 |
│ │ └── tracking.py # MLflow `track_pipeline_run` context manager (see §7)
|
| 52 |
│ ├── pipelines/ # One file per modality. Pure functions + a `run_pipeline()` entry.
|
| 53 |
+
│ ├── models/ # Downstream decision-layer models
|
| 54 |
+
│ │ ├── bbb_model.py # BBB-permeability classifier + SHAP explainer + trainer CLI
|
| 55 |
+
│ │ └── mri_model.py # Volumetric MRI ONNX inference surface (external training)
|
| 56 |
+
│ ├── llm/ # Natural-language explainers (template + OpenRouter fallback)
|
| 57 |
+
│ ├── rag/ # Fastembed + FAISS retrieval layer
|
| 58 |
+
│ ├── agents/ # Tool registry + guarded OpenRouter orchestrator
|
| 59 |
│ └── frontend/
|
| 60 |
+
│ └── app.py # Streamlit dashboard
|
| 61 |
└── tests/
|
| 62 |
├── core/
|
| 63 |
├── api/
|
|
|
|
| 152 |
## 8. Decision Layer (Downstream Models)
|
| 153 |
|
| 154 |
Pipelines produce features (`data/processed/<modality>_features.parquet`).
|
| 155 |
+
Downstream models live in `src/models/` and consume processed features or a
|
| 156 |
+
deterministic model-local preprocessing contract:
|
| 157 |
|
| 158 |
| Model | File | Output | Endpoint |
|
| 159 |
|---|---|---|---|
|
| 160 |
| BBB permeability | `src/models/bbb_model.py` | `data/processed/bbb_model.joblib` | `POST /predict/bbb` |
|
| 161 |
+
| MRI image classifier | `src/models/mri_model.py` | `data/processed/mri_model.onnx` | `POST /predict/mri` |
|
| 162 |
|
| 163 |
+
In-repo trainable downstream model modules expose a uniform surface:
|
| 164 |
- `train(df, label_col, ...)` → fitted classifier
|
| 165 |
- `save(model, path)` / `load(path)` → joblib artifact I/O
|
| 166 |
- `predict_with_proba(model, smiles)` → `{label, confidence}` (confidence is the max-class probability)
|
| 167 |
- `explain_prediction(model, smiles, top_k)` → SHAP top-k attributions sorted by `|shap_value|` descending
|
| 168 |
|
| 169 |
+
MRI DL exception: training happens outside this repo and exports ONNX, so it
|
| 170 |
+
does not expose `train()` or SHAP. Runtime
|
| 171 |
+
loads the ONNX artifact with `mri_model.load()`, preprocesses one NIfTI via the
|
| 172 |
+
same deterministic resize + z-score contract used during training
|
| 173 |
+
(`preprocess_nifti()`), then returns class probabilities via `predict_nifti()`.
|
| 174 |
|
| 175 |
+
The API loads model artifacts at request time. If an artifact is missing,
|
| 176 |
+
the endpoint returns **HTTP 503** with a remediation hint instead of failing
|
| 177 |
+
process startup. BBB points at the trainer CLI (`python -m src.models.bbb_model`);
|
| 178 |
+
MRI points at the external ONNX export path.
|
| 179 |
+
|
| 180 |
+
**Determinism**: all in-repo classifiers are seeded (`random_state=42`
|
| 181 |
+
default), `n_jobs=1` (no tree-parallelism races). Re-running the BBB trainer
|
| 182 |
+
on the same Parquet produces identical predictions. MRI ONNX determinism is
|
| 183 |
+
bounded by the exported model plus the fixed runtime preprocessing contract.
|
| 184 |
|
| 185 |
**Override `BBB_MODEL_PATH`** env var to point the API at a non-default
|
| 186 |
artifact location (used by tests for tmp_path isolation).
|
| 187 |
|
| 188 |
+
**Override `MRI_MODEL_PATH`** env var to point the API at a non-default ONNX
|
| 189 |
+
artifact location. If the ONNX artifact is missing, `POST /predict/mri`
|
| 190 |
+
returns **HTTP 503** with a remediation hint.
|
| 191 |
+
|
| 192 |
**Calibration metadata** (Day 6): `train()` does an 80/20 stratified split,
|
| 193 |
computes precision-at-confidence-threshold bins on the held-out test set,
|
| 194 |
and stashes them on `model._neurobridge_calibration: list[dict]` (sorted
|
|
|
|
| 298 |
returns a side-by-side metric+param diff (`RunDiffRow`).
|
| 299 |
|
| 300 |
When `NEUROBRIDGE_DISABLE_MLFLOW=1`, both endpoints return empty
|
| 301 |
+
responses without raising — useful for deployments where there is no
|
| 302 |
+
writable `mlruns/` tree or the tracking server is unavailable. Unknown
|
| 303 |
+
run ids → 404.
|
| 304 |
|
| 305 |
The Streamlit "Experiments" tab is the user-facing surface. Cached
|
| 306 |
in session state with an explicit Refresh button.
|
|
|
|
| 310 |
`Dockerfile.hf` is the Hugging Face Spaces image. Single container,
|
| 311 |
two processes (FastAPI :8000 + Streamlit :7860) launched via
|
| 312 |
`supervisord.conf`. Build-time `RUN python -m src.models.bbb_model`
|
| 313 |
+
bakes the BBB model artifact into the image so the first `/predict/bbb`
|
| 314 |
+
call is instant on cold start. Build-time RAG ingest creates
|
| 315 |
+
`data/processed/faiss_index/`.
|
| 316 |
+
|
| 317 |
+
`docker-entrypoint.sh` is the runtime guard for local Docker/Compose demos:
|
| 318 |
+
when a mounted `./data` volume hides image-built artifacts, it seeds fixture
|
| 319 |
+
raw data, rebuilds missing BBB features/model artifacts, and rebuilds the
|
| 320 |
+
FAISS index before starting supervisord. It does not bake
|
| 321 |
+
`NEUROBRIDGE_DISABLE_MLFLOW=1` into the image; operators may set that env at
|
| 322 |
+
runtime if their tracking service is unavailable.
|
| 323 |
+
|
| 324 |
+
Default environment: `DEPLOY_ENV=hf_spaces`. The LLM kill-switch is **not**
|
| 325 |
+
set — deployed Spaces use the real OpenRouter free-tier chain (§11) when
|
| 326 |
`OPENROUTER_API_KEY` is configured in the Space's Secrets panel. Set
|
| 327 |
+
`NEUROBRIDGE_DISABLE_LLM=1` only when you want to force the deterministic
|
| 328 |
+
template path for a fully-reproducible demo.
|
| 329 |
|
| 330 |
The README's YAML front-matter declares the Space metadata
|
| 331 |
(SDK=docker, port=7860, app_file=src/frontend/app.py).
|
|
|
|
| 333 |
## 15. Orchestrator Agent Surface
|
| 334 |
|
| 335 |
`src/agents/orchestrator.py` exposes a single-agent function-calling
|
| 336 |
+
loop over the openai SDK (no LangChain / framework dep). The API enables
|
| 337 |
+
the guarded workflow mode: if the LLM skips or mis-shapes a required tool
|
| 338 |
+
call, deterministic routing in `src/agents/routing.py` falls back to exactly
|
| 339 |
+
one pipeline tool, then exactly one retrieval tool, then final synthesis.
|
| 340 |
+
The agent holds 4 tools, defined in `src/agents/tools.py`:
|
| 341 |
|
| 342 |
- `run_bbb_pipeline(smiles, top_k)` — wraps `POST /predict/bbb`
|
| 343 |
- `run_eeg_pipeline(input_path)` — wraps `POST /pipeline/eeg`
|
| 344 |
+
- `run_mri_pipeline(input_dir, sites_csv=None)` — wraps `POST /pipeline/mri`
|
| 345 |
+
and defaults `sites_csv` to `<input_dir>/sites.csv`
|
| 346 |
- `retrieve_context(query, k)` — wraps `src/rag/retrieve.py`
|
| 347 |
|
| 348 |
The system prompt (`src/agents/prompts.py:ORCHESTRATOR_SYSTEM_PROMPT`)
|
| 349 |
+
describes the workflow: pick exactly one pipeline → run it → formulate a
|
| 350 |
+
focused retrieval query → call retrieve_context → synthesize a 3-5 sentence
|
| 351 |
+
response that cites at least one chunk. The API-side workflow guard enforces
|
| 352 |
+
that order in code; the prompt is guidance, not the only control plane.
|
| 353 |
+
Language of the final response is mirrored from the user's question.
|
| 354 |
+
|
| 355 |
+
`POST /agent/run` is the public surface. It accepts `user_input`,
|
| 356 |
+
optional `user_question`, and optional MRI `sites_csv`. Default model is
|
| 357 |
+
`google/gemini-2.0-flash-exp:free` on OpenRouter (function-calling support
|
| 358 |
+
verified). Override via `NEUROBRIDGE_AGENT_MODEL` env var. Returns 503 when
|
| 359 |
+
`OPENROUTER_API_KEY` is unset.
|
| 360 |
|
| 361 |
Diagnostics: `GET /diag/agent` returns key presence, configured model,
|
| 362 |
RAG index status (chunk count), and the registered tool names.
|
|
|
|
| 375 |
|
| 376 |
Defaults: input=`data/knowledge_base/`, output=`data/processed/faiss_index/`.
|
| 377 |
The Dockerfile runs this at build time so deployed Spaces start with
|
| 378 |
+
a populated index. `docker-entrypoint.sh` also rebuilds the index at
|
| 379 |
+
startup when a mounted `data/` volume hides the image-built artifacts.
|
| 380 |
+
Empty KB → empty index → `retrieve_context` returns 0 chunks; the agent
|
| 381 |
+
surfaces this and answers from the pipeline result alone.
|
| 382 |
|
| 383 |
`tests/fixtures/kb_sample/` ships 3 seed markdown files (Lipinski,
|
| 384 |
ComBat, MNE+ICA) — these double as test fixtures and as the demo
|
Dockerfile
CHANGED
|
@@ -30,6 +30,8 @@ RUN pip install -r requirements.txt
|
|
| 30 |
COPY src/ ./src/
|
| 31 |
COPY tests/fixtures/ ./tests/fixtures/
|
| 32 |
COPY supervisord.conf ./supervisord.conf
|
|
|
|
|
|
|
| 33 |
|
| 34 |
# Seed raw data from fixtures so the deployed Signal/Image/Molecule tabs
|
| 35 |
# work on first click. Then run all three pipelines so mlruns/ contains
|
|
@@ -55,4 +57,5 @@ RUN python -m src.rag.ingest data/knowledge_base data/processed/faiss_index
|
|
| 55 |
EXPOSE 7860
|
| 56 |
|
| 57 |
# --- launch FastAPI + Streamlit under supervisord ---
|
|
|
|
| 58 |
CMD ["supervisord", "-n", "-c", "/app/supervisord.conf"]
|
|
|
|
| 30 |
COPY src/ ./src/
|
| 31 |
COPY tests/fixtures/ ./tests/fixtures/
|
| 32 |
COPY supervisord.conf ./supervisord.conf
|
| 33 |
+
COPY docker-entrypoint.sh ./docker-entrypoint.sh
|
| 34 |
+
RUN chmod +x /app/docker-entrypoint.sh
|
| 35 |
|
| 36 |
# Seed raw data from fixtures so the deployed Signal/Image/Molecule tabs
|
| 37 |
# work on first click. Then run all three pipelines so mlruns/ contains
|
|
|
|
| 57 |
EXPOSE 7860
|
| 58 |
|
| 59 |
# --- launch FastAPI + Streamlit under supervisord ---
|
| 60 |
+
ENTRYPOINT ["/app/docker-entrypoint.sh"]
|
| 61 |
CMD ["supervisord", "-n", "-c", "/app/supervisord.conf"]
|
Dockerfile.hf
CHANGED
|
@@ -30,6 +30,8 @@ RUN pip install -r requirements.txt
|
|
| 30 |
COPY src/ ./src/
|
| 31 |
COPY tests/fixtures/ ./tests/fixtures/
|
| 32 |
COPY supervisord.conf ./supervisord.conf
|
|
|
|
|
|
|
| 33 |
|
| 34 |
# Seed raw data from fixtures so the deployed Signal/Image/Molecule tabs
|
| 35 |
# work on first click. Then run all three pipelines so mlruns/ contains
|
|
@@ -55,4 +57,5 @@ RUN python -m src.rag.ingest data/knowledge_base data/processed/faiss_index
|
|
| 55 |
EXPOSE 7860
|
| 56 |
|
| 57 |
# --- launch FastAPI + Streamlit under supervisord ---
|
|
|
|
| 58 |
CMD ["supervisord", "-n", "-c", "/app/supervisord.conf"]
|
|
|
|
| 30 |
COPY src/ ./src/
|
| 31 |
COPY tests/fixtures/ ./tests/fixtures/
|
| 32 |
COPY supervisord.conf ./supervisord.conf
|
| 33 |
+
COPY docker-entrypoint.sh ./docker-entrypoint.sh
|
| 34 |
+
RUN chmod +x /app/docker-entrypoint.sh
|
| 35 |
|
| 36 |
# Seed raw data from fixtures so the deployed Signal/Image/Molecule tabs
|
| 37 |
# work on first click. Then run all three pipelines so mlruns/ contains
|
|
|
|
| 57 |
EXPOSE 7860
|
| 58 |
|
| 59 |
# --- launch FastAPI + Streamlit under supervisord ---
|
| 60 |
+
ENTRYPOINT ["/app/docker-entrypoint.sh"]
|
| 61 |
CMD ["supervisord", "-n", "-c", "/app/supervisord.conf"]
|
PROJECT_OVERVIEW.md
CHANGED
|
@@ -9,7 +9,7 @@
|
|
| 9 |
|
| 10 |
## 1. Tek Cümleyle Ne Yaptık?
|
| 11 |
|
| 12 |
-
Üç farklı klinik veri tipini (molekül / EEG sinyali / MRI görüntüsü) tek bir API + tek bir web arayüzü arkasında işleyen, her tahmin için **etiket + güven skoru + kalibrasyon + drift sinyali + MLflow izlenebilirlik bilgisi + doğal-dilde AI açıklaması** döndüren, jüri demosu için olası
|
| 13 |
|
| 14 |
Hackathon teması: **"Building AI Systems for Neurotechnology & Health"** — ve jüri 6 boyutta puanlıyor (Problem Depth, System Quality, Robustness, Interaction, Execution, Creativity). Her boyutu spesifik feature'larla cevapladık. Detayları aşağıda.
|
| 15 |
|
|
@@ -106,6 +106,19 @@ Pipeline iki kez çalışır (pre + post ComBat) ve long-format DataFrame döner
|
|
| 106 |
|
| 107 |
**Neden ComBat?** ComBat orijinal olarak gen ekspresyon batch effect'leri için icat edildi (Johnson et al. 2007), neuroimaging'e (Fortin et al. 2017, 2018) uyarlandı. Empirical Bayes yaklaşımıyla site-bağımlı location + scale parametrelerini öğreniyor, **biological signal'i koruyarak** site bias'ını kaldırıyor. Tek başına z-score normalization farkı tam kapatamaz; ComBat hem mean hem variance'ı düzeltir.
|
| 108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
---
|
| 110 |
|
| 111 |
## 4. "Living Decision System" — Yedi Şeffaflık Katmanı
|
|
@@ -144,9 +157,11 @@ Bu yedi katman birlikte "Black-Box AI ≠ Trust" mit'ini çürütür: kara kutu
|
|
| 144 |
│ /pipeline/{bbb,eeg,mri} → batch processing │
|
| 145 |
│ /pipeline/mri/diagnostics → pre/post ComBat KPIs │
|
| 146 |
│ /predict/bbb → single-molecule infer │
|
|
|
|
| 147 |
│ /explain/{bbb,eeg,mri} → LLM/template rationale │
|
| 148 |
│ /experiments/runs → MLflow run list │
|
| 149 |
│ /experiments/diff → side-by-side run diff │
|
|
|
|
| 150 |
│ /health → liveness check │
|
| 151 |
└─┬────────────┬────────────┬────────────┬─────────────────┘
|
| 152 |
│ │ │ │
|
|
@@ -156,6 +171,8 @@ bbb_pipeline eeg_pipeline mri_pipeline llm.explainer
|
|
| 156 |
+ shap + template fallback
|
| 157 |
```
|
| 158 |
|
|
|
|
|
|
|
| 159 |
### 5.2 Process Modeli
|
| 160 |
|
| 161 |
Tek Docker container'da supervisord iki process çalıştırıyor:
|
|
@@ -169,7 +186,9 @@ Streamlit, container içinde `httpx.post("http://127.0.0.1:8000/...")` ile FastA
|
|
| 169 |
| Veri | Yer | Yaşam süresi |
|
| 170 |
|---|---|---|
|
| 171 |
| Trained BBB model (joblib) | `data/processed/bbb_model.joblib` | Container build-time'da train, image içinde gömülü |
|
| 172 |
-
|
|
|
|
|
|
|
|
| 173 |
| Worker drift deque | In-memory (`collections.deque(maxlen=100)`) | Container restart'a kadar; Worker restart = state reset |
|
| 174 |
| Streamlit session state | Browser sekmesi | Sekme kapanana kadar |
|
| 175 |
|
|
@@ -182,7 +201,7 @@ Streamlit, container içinde `httpx.post("http://127.0.0.1:8000/...")` ile FastA
|
|
| 182 |
- **Type-safe schemas:** Pydantic v2 ile request/response Pydantic modelleri otomatik validation + 422 errors
|
| 183 |
- **OpenAPI auto-generation:** `/docs` endpoint'i jüri'ye Swagger UI sunar, integration documentation ücretsiz
|
| 184 |
- **Async-ready:** Bizim use case sync ama gerekirse async pipeline kolayca eklenebilir
|
| 185 |
-
- **Test-friendly:** `fastapi.testclient.TestClient`
|
| 186 |
- **Alternatif neden değil:** Flask çok ham (her şeyi elden yazarsın), Django overkill (admin + ORM gereksiz)
|
| 187 |
|
| 188 |
### 6.2 Frontend: Streamlit
|
|
@@ -241,17 +260,26 @@ Streamlit, container içinde `httpx.post("http://127.0.0.1:8000/...")` ile FastA
|
|
| 241 |
|
| 242 |
- **Self-contained:** Tüm dependencies + kod + data tek image'da
|
| 243 |
- **Portable:** Aynı image local'de, HF'de, ileride Railway/AWS'de çalışır
|
| 244 |
-
- **Build-time
|
|
|
|
| 245 |
- **Supervisord:** İki process tek container'da minimal overhead
|
| 246 |
- **Alternatif neden değil:** docker-compose multi-container çözüm güzel ama HF Spaces tek container istiyor
|
| 247 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 248 |
---
|
| 249 |
|
| 250 |
## 7. Test Disiplini: TDD + Subagent-Driven Development
|
| 251 |
|
| 252 |
### 7.1 Sayılar
|
| 253 |
|
| 254 |
-
- **
|
| 255 |
- 8 günlük sprint, ~50 atomik commit
|
| 256 |
- Her test-bearing task **RED → GREEN → REFACTOR** disipliniyle yazıldı
|
| 257 |
- Her task ayrı bir Subagent (Claude Code) tarafından implementasyon edildi, ana ajan koordine + review
|
|
@@ -260,20 +288,22 @@ Streamlit, container içinde `httpx.post("http://127.0.0.1:8000/...")` ile FastA
|
|
| 260 |
|
| 261 |
```
|
| 262 |
tests/
|
| 263 |
-
├── core/
|
| 264 |
├── pipelines/
|
| 265 |
-
│ ├── test_bbb_pipeline.py
|
| 266 |
-
│ ├── test_eeg_pipeline.py
|
| 267 |
-
│ └── test_mri_pipeline.py
|
| 268 |
-
├── models/
|
| 269 |
-
├──
|
| 270 |
-
│
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
├──
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
|
|
|
|
|
|
| 277 |
```
|
| 278 |
|
| 279 |
### 7.3 UserWarning Gate
|
|
@@ -289,7 +319,7 @@ pytest -W error::UserWarning tests/
|
|
| 289 |
- Her feature'ın **kabul kriteri** test koduyla yazılıyor → spec ambigüitesi sıfır
|
| 290 |
- Implementation-first yapsak refactor cesareti olmaz
|
| 291 |
- Subagent dispatch yaparken her task'ın **net bitiş koşulu** var: "tests pass + lint clean"
|
| 292 |
-
-
|
| 293 |
|
| 294 |
---
|
| 295 |
|
|
@@ -323,17 +353,20 @@ except httpx.HTTPStatusError as e:
|
|
| 323 |
|
| 324 |
400 → `st.warning` ayrımı önemli: jüri "sistem reject etti ama çökmedi" hikâyesini görsün, kırmızı ERROR yerine sarı WARNING.
|
| 325 |
|
| 326 |
-
### 8.3
|
| 327 |
|
| 328 |
-
Demo gününde her şey ters gidebilir.
|
| 329 |
|
| 330 |
| Env | Etkisi |
|
| 331 |
|---|---|
|
| 332 |
| `NEUROBRIDGE_DISABLE_LLM=1` | OpenRouter çağrısı yapılmaz, deterministic template path'i her zaman cevap verir |
|
| 333 |
| `NEUROBRIDGE_DISABLE_MLFLOW=1` | MLflow lookup yapılmaz, provenance badge "—" gösterir, sistem çalışmaya devam eder |
|
| 334 |
| `BBB_MODEL_PATH=...` | Default `data/processed/bbb_model.joblib` yerine farklı yol |
|
|
|
|
|
|
|
|
|
|
| 335 |
|
| 336 |
-
|
| 337 |
|
| 338 |
### 8.4 Drift detection
|
| 339 |
|
|
@@ -392,10 +425,10 @@ Bu "Adapt Over Time" katmanı (Living Systems pillar). Sistem **kendi tahminleri
|
|
| 392 |
| Boyut | Puan | Kanıt |
|
| 393 |
|---|---|---|
|
| 394 |
| **Problem Depth** | 9.5/10 | 3 zor real-world problem (BBB drug-discovery, EEG artifact, MRI multi-site domain shift); slayt 11'in "blood-brain barrier" örneğine doğrudan referans |
|
| 395 |
-
| **System Quality** | 9.7/10 |
|
| 396 |
| **Robustness** | 9.5/10 | Edge-case dropdown (5 probe), HTTP 400 → graceful warning, fallback chains everywhere |
|
| 397 |
| **Interaction** | 9.8/10 | 5 tab + edge-case probes + calibration caption + drift caption + AI Assistant chat (3 modalite × inline expander + standalone tab) |
|
| 398 |
-
| **Execution** | 9.8/10 | 8-day disciplined sprint, atomic commits, AGENTS.md
|
| 399 |
| **Creativity** | 9.7/10 | LLM hybrid (template fallback) + drift z-score + ComBat KDE faceted + "Living Decision System" framing + Track-1 multi-modal AI agents + Track-5 Experiments tab |
|
| 400 |
| **TOPLAM** | **58.0/60 (~96.8%)** | |
|
| 401 |
|
|
@@ -415,7 +448,7 @@ Bu "Adapt Over Time" katmanı (Living Systems pillar). Sistem **kendi tahminleri
|
|
| 415 |
- ✅ Working Prototype (FastAPI + Streamlit + Docker, end-to-end functional)
|
| 416 |
- ✅ Interactive System (5 tab, real-time predictions, custom SMILES, AI Assistant chat)
|
| 417 |
- ✅ Explanation of Behavior (7-layer transparency stack)
|
| 418 |
-
- ✅ Tested Under Real Conditions (
|
| 419 |
- ❌ No slides-only — gerçek çalışan sistem
|
| 420 |
- ❌ No perfect-data-only — edge-case dropdown bunun kanıtı
|
| 421 |
|
|
@@ -433,6 +466,7 @@ Bu "Adapt Over Time" katmanı (Living Systems pillar). Sistem **kendi tahminleri
|
|
| 433 |
| Day 6 | Edge-case dropdown + calibration metadata + ComBat diagnostics endpoint + altair faceted KDE | 165 |
|
| 434 |
| Day 7 | Drift detection (deque + z-score) + MLflow provenance badge + LLM explainer (OpenRouter hybrid) + AI Assistant tab | 175 |
|
| 435 |
| Day 8 | Multi-modal explain (`/explain/{eeg,mri}`) + Experiments tab (MLflow runs + diff) + HF Spaces deploy (Dockerfile.hf + supervisord) + README pitch craft | 184 |
|
|
|
|
| 436 |
|
| 437 |
Her gün için ayrı plan ve spec dosyası: `docs/superpowers/plans/` ve `docs/superpowers/specs/`.
|
| 438 |
|
|
@@ -498,8 +532,8 @@ HF free tier'da değil. Production'da:
|
|
| 498 |
- Drift deque per-worker olduğu için 4 worker = 4 bağımsız buffer (production'da Redis sentinel'a çekilir)
|
| 499 |
- ComBat batch (~500 subject/dakika single-thread, vectorize edilmiş)
|
| 500 |
|
| 501 |
-
### "Test sayısı
|
| 502 |
-
TDD disipliniyle her feature'a 2-4 test yazıldı. Pipeline'lar fixture-driven (synthetic NIfTI, sample SMILES CSV, sample EEG FIF). API testleri `fastapi.testclient.TestClient` üzerinden (no real network). LLM testleri env-gated
|
| 503 |
|
| 504 |
---
|
| 505 |
|
|
@@ -527,25 +561,36 @@ hackathon/
|
|
| 527 |
├── src/
|
| 528 |
│ ├── api/ # FastAPI app + routes + schemas
|
| 529 |
│ │ ├── main.py
|
| 530 |
-
│ │ ├── routes.py #
|
| 531 |
-
│ │ └── schemas.py #
|
| 532 |
│ ├── core/
|
| 533 |
-
│ │
|
|
|
|
|
|
|
| 534 |
│ ├── pipelines/
|
| 535 |
│ │ ├── bbb_pipeline.py # SMILES → Morgan FP → Parquet
|
| 536 |
│ │ ├── eeg_pipeline.py # FIF/EDF → ICA → epochs → features
|
| 537 |
│ │ └── mri_pipeline.py # NIfTI → ROI → ComBat → diagnostics
|
| 538 |
│ ├── models/
|
| 539 |
-
│ │
|
|
|
|
| 540 |
│ ├── llm/
|
| 541 |
│ │ ├── __init__.py
|
| 542 |
│ │ └── explainer.py # OpenRouter + deterministic template fallback
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 543 |
│ └── frontend/
|
| 544 |
│ └── app.py # Streamlit 5-tab dashboard (editorial redesign)
|
| 545 |
-
├── tests/ #
|
| 546 |
├── data/
|
| 547 |
│ ├── raw/ # Input data (gitignored)
|
| 548 |
-
│
|
|
|
|
| 549 |
├── docs/
|
| 550 |
│ └── superpowers/
|
| 551 |
│ ├── plans/ # 8 day-by-day implementation plans
|
|
@@ -554,7 +599,7 @@ hackathon/
|
|
| 554 |
├── Dockerfile # Alias for HF (auto-discovery)
|
| 555 |
├── supervisord.conf # Two-process launcher
|
| 556 |
├── requirements.txt # Pinned deps (fastapi==0.115, sklearn==1.5.1, openai==1.51, ...)
|
| 557 |
-
├── AGENTS.md # Team contract
|
| 558 |
├── README.md # Public-facing overview + Demo Recipe
|
| 559 |
└── PROJECT_OVERVIEW.md # This file
|
| 560 |
```
|
|
@@ -667,7 +712,7 @@ Yukarıdaki bölümlerde geçen teknik terimlerin sade Türkçe karşılıkları
|
|
| 667 |
|
| 668 |
## 17. Kapanış
|
| 669 |
|
| 670 |
-
NeuroBridge Enterprise hackathon'un sloganına ("**Stop Building Ideas. Start Building Systems.**") en doğrudan cevap. 8
|
| 671 |
|
| 672 |
Şampiyonluğa oynuyoruz.
|
| 673 |
|
|
|
|
| 9 |
|
| 10 |
## 1. Tek Cümleyle Ne Yaptık?
|
| 11 |
|
| 12 |
+
Üç farklı klinik veri tipini (molekül / EEG sinyali / MRI görüntüsü) tek bir API + tek bir web arayüzü arkasında işleyen, her tahmin için **etiket + güven skoru + kalibrasyon + drift sinyali + MLflow izlenebilirlik bilgisi + doğal-dilde AI açıklaması** döndüren, MRI tarafında dışarıda eğitilecek volumetrik deep-learning modelini ONNX üzerinden `POST /predict/mri` ile sisteme bağlayan, RAG destekli agent yüzeyiyle pipeline araçlarını orkestre eden ve jüri demosu için olası çökme noktalarını "kill-switch" ile koruyan bir B2B "Living Decision System" inşa ettik.
|
| 13 |
|
| 14 |
Hackathon teması: **"Building AI Systems for Neurotechnology & Health"** — ve jüri 6 boyutta puanlıyor (Problem Depth, System Quality, Robustness, Interaction, Execution, Creativity). Her boyutu spesifik feature'larla cevapladık. Detayları aşağıda.
|
| 15 |
|
|
|
|
| 106 |
|
| 107 |
**Neden ComBat?** ComBat orijinal olarak gen ekspresyon batch effect'leri için icat edildi (Johnson et al. 2007), neuroimaging'e (Fortin et al. 2017, 2018) uyarlandı. Empirical Bayes yaklaşımıyla site-bağımlı location + scale parametrelerini öğreniyor, **biological signal'i koruyarak** site bias'ını kaldırıyor. Tek başına z-score normalization farkı tam kapatamaz; ComBat hem mean hem variance'ı düzeltir.
|
| 108 |
|
| 109 |
+
### 3.4 MRI Image Deep Learning Modeli (External Training → ONNX)
|
| 110 |
+
|
| 111 |
+
MRI için eğiteceğimiz deep-learning model bu repoda train edilmiyor. Eğitim ayrı GPU ortamında yapılacak, export edilen ONNX artifact'i NeuroBridge runtime'a takılacak:
|
| 112 |
+
|
| 113 |
+
- Artifact yolu: `data/processed/mri_model.onnx`
|
| 114 |
+
- Override: `MRI_MODEL_PATH=/path/to/model.onnx`
|
| 115 |
+
- Input: `.nii` / `.nii.gz` NIfTI volume
|
| 116 |
+
- Preprocess: 3D finite-volume validation → trilinear resize (`64×64×64` default) → non-zero voxel z-score normalization → `[1, 1, D, H, W]` float32 tensor
|
| 117 |
+
- Output: `[1, C]` class vector; logits veya probability kabul edilir
|
| 118 |
+
- API: `POST /predict/mri`
|
| 119 |
+
|
| 120 |
+
Bu ayrım önemli: `src/pipelines/mri_pipeline.py` çok-merkezli MRI verisini temizleyip ComBat ile harmonize eder; `src/models/mri_model.py` ise klinik sınıflandırma için dışarıda eğitilmiş volumetrik modeli inference aşamasında çalıştırır. Artifact yoksa endpoint HTTP 503 döner ve operatöre ONNX export yolunu söyler.
|
| 121 |
+
|
| 122 |
---
|
| 123 |
|
| 124 |
## 4. "Living Decision System" — Yedi Şeffaflık Katmanı
|
|
|
|
| 157 |
│ /pipeline/{bbb,eeg,mri} → batch processing │
|
| 158 |
│ /pipeline/mri/diagnostics → pre/post ComBat KPIs │
|
| 159 |
│ /predict/bbb → single-molecule infer │
|
| 160 |
+
│ /predict/mri → volumetric ONNX infer │
|
| 161 |
│ /explain/{bbb,eeg,mri} → LLM/template rationale │
|
| 162 |
│ /experiments/runs → MLflow run list │
|
| 163 |
│ /experiments/diff → side-by-side run diff │
|
| 164 |
+
│ /agent/run → pipeline tools + RAG │
|
| 165 |
│ /health → liveness check │
|
| 166 |
└─┬────────────┬────────────┬────────────┬─────────────────┘
|
| 167 |
│ │ │ │
|
|
|
|
| 171 |
+ shap + template fallback
|
| 172 |
```
|
| 173 |
|
| 174 |
+
Agent yüzeyi (`src/agents/orchestrator.py`) LLM function-calling'i dener; model tool çağrısını atlar veya yanlış sıraya girerse guarded workflow devreye girer. Deterministik router (`src/agents/routing.py`) bir pipeline seçer, ilgili tool'u çalıştırır, `retrieve_context` ile FAISS/RAG bağlamını alır ve son sentezi yine aynı API kontratıyla döndürür.
|
| 175 |
+
|
| 176 |
### 5.2 Process Modeli
|
| 177 |
|
| 178 |
Tek Docker container'da supervisord iki process çalıştırıyor:
|
|
|
|
| 186 |
| Veri | Yer | Yaşam süresi |
|
| 187 |
|---|---|---|
|
| 188 |
| Trained BBB model (joblib) | `data/processed/bbb_model.joblib` | Container build-time'da train, image içinde gömülü |
|
| 189 |
+
| MRI DL model (ONNX) | `data/processed/mri_model.onnx` veya `MRI_MODEL_PATH` | Dış eğitim ortamından export edilir; runtime yalnızca load + inference yapar |
|
| 190 |
+
| RAG FAISS index | `data/processed/faiss_index/` | Build-time ingest + container startup guard ile eksikse yeniden oluşturulur |
|
| 191 |
+
| MLflow runs | `mlruns/` (default backend: SQLite) | Runtime ortamına bağlı; `NEUROBRIDGE_DISABLE_MLFLOW=1` ile kapatılabilir |
|
| 192 |
| Worker drift deque | In-memory (`collections.deque(maxlen=100)`) | Container restart'a kadar; Worker restart = state reset |
|
| 193 |
| Streamlit session state | Browser sekmesi | Sekme kapanana kadar |
|
| 194 |
|
|
|
|
| 201 |
- **Type-safe schemas:** Pydantic v2 ile request/response Pydantic modelleri otomatik validation + 422 errors
|
| 202 |
- **OpenAPI auto-generation:** `/docs` endpoint'i jüri'ye Swagger UI sunar, integration documentation ücretsiz
|
| 203 |
- **Async-ready:** Bizim use case sync ama gerekirse async pipeline kolayca eklenebilir
|
| 204 |
+
- **Test-friendly:** `fastapi.testclient.TestClient` API testlerinin çoğunu gerçek network olmadan çalıştırıyor
|
| 205 |
- **Alternatif neden değil:** Flask çok ham (her şeyi elden yazarsın), Django overkill (admin + ORM gereksiz)
|
| 206 |
|
| 207 |
### 6.2 Frontend: Streamlit
|
|
|
|
| 260 |
|
| 261 |
- **Self-contained:** Tüm dependencies + kod + data tek image'da
|
| 262 |
- **Portable:** Aynı image local'de, HF'de, ileride Railway/AWS'de çalışır
|
| 263 |
+
- **Build-time artifacts:** BBB model train edilir, RAG index ingest edilir; cold start'ta ana demo artifact'leri hazır gelir
|
| 264 |
+
- **Runtime guard:** `docker-entrypoint.sh`, host volume boş geldiyse fixture data'dan BBB modeli ve FAISS index'i yeniden üretir
|
| 265 |
- **Supervisord:** İki process tek container'da minimal overhead
|
| 266 |
- **Alternatif neden değil:** docker-compose multi-container çözüm güzel ama HF Spaces tek container istiyor
|
| 267 |
|
| 268 |
+
### 6.9 Agent + RAG
|
| 269 |
+
|
| 270 |
+
- **Tool-first orchestration:** Agent'ın kullanabildiği tool'lar pipeline surface'in aynısı: BBB predict, EEG pipeline, MRI pipeline ve RAG retrieval.
|
| 271 |
+
- **Guarded workflow:** LLM function-calling başarısız olursa API deterministik router ile pipeline → retrieval → synthesis sırasını zorlar; demo sırasında "agent tool çağırmadı" riski kalmaz.
|
| 272 |
+
- **RAG stack:** `fastembed` (`BAAI/bge-small-en-v1.5`, 384 dim) + `faiss-cpu` (`IndexFlatIP`, L2 normalize edilmiş cosine search). Torch bağımlılığı yok.
|
| 273 |
+
- **Knowledge base:** `data/knowledge_base/` altındaki `.md`, `.txt`, `.pdf` dosyaları `python -m src.rag.ingest` ile `data/processed/faiss_index/` altına yazılır.
|
| 274 |
+
- **Default agent model:** `NEUROBRIDGE_AGENT_MODEL` override edilebilir; `OPENROUTER_API_KEY` yoksa `/agent/run` HTTP 503 döner.
|
| 275 |
+
|
| 276 |
---
|
| 277 |
|
| 278 |
## 7. Test Disiplini: TDD + Subagent-Driven Development
|
| 279 |
|
| 280 |
### 7.1 Sayılar
|
| 281 |
|
| 282 |
+
- **242 passed, 2 skipped** (Windows / Python 3.11 doğrulaması)
|
| 283 |
- 8 günlük sprint, ~50 atomik commit
|
| 284 |
- Her test-bearing task **RED → GREEN → REFACTOR** disipliniyle yazıldı
|
| 285 |
- Her task ayrı bir Subagent (Claude Code) tarafından implementasyon edildi, ana ajan koordine + review
|
|
|
|
| 288 |
|
| 289 |
```
|
| 290 |
tests/
|
| 291 |
+
├── core/ logger, storage, tracking, determinism
|
| 292 |
├── pipelines/
|
| 293 |
+
│ ├── test_bbb_pipeline.py SMILES validation, FP, drop+log, idempotence
|
| 294 |
+
│ ├── test_eeg_pipeline.py filter, ICA, epoching, feature extraction
|
| 295 |
+
│ └── test_mri_pipeline.py volume validation, masking, ComBat split, diagnostics
|
| 296 |
+
├── models/
|
| 297 |
+
│ ├── test_bbb_model.py train, save/load, predict, SHAP, calibration, train_stats
|
| 298 |
+
│ └── test_mri_model.py NIfTI preprocess + ONNX inference contract
|
| 299 |
+
├── api/ route contracts, error mapping, drift/calibration/provenance
|
| 300 |
+
���── llm/ template determinism, modality dispatch, kill-switch
|
| 301 |
+
├── rag/ ingest, empty-index behavior, retrieval
|
| 302 |
+
├── agents/ tool schemas, guarded orchestration, agent route
|
| 303 |
+
├── frontend/ Streamlit module import smoke
|
| 304 |
+
└── deploy/ Dockerfile.hf / startup contract
|
| 305 |
+
|
| 306 |
+
Total: 242 passed, 2 skipped
|
| 307 |
```
|
| 308 |
|
| 309 |
### 7.3 UserWarning Gate
|
|
|
|
| 319 |
- Her feature'ın **kabul kriteri** test koduyla yazılıyor → spec ambigüitesi sıfır
|
| 320 |
- Implementation-first yapsak refactor cesareti olmaz
|
| 321 |
- Subagent dispatch yaparken her task'ın **net bitiş koşulu** var: "tests pass + lint clean"
|
| 322 |
+
- 242 passed / 2 skipped demosu jüri için "production-aware" sinyali
|
| 323 |
|
| 324 |
---
|
| 325 |
|
|
|
|
| 353 |
|
| 354 |
400 → `st.warning` ayrımı önemli: jüri "sistem reject etti ama çökmedi" hikâyesini görsün, kırmızı ERROR yerine sarı WARNING.
|
| 355 |
|
| 356 |
+
### 8.3 Demo Lifelines (kill-switches + artifact overrides)
|
| 357 |
|
| 358 |
+
Demo gününde her şey ters gidebilir. Bu env variable'lar kritik senaryoları kontrollü hale getirir:
|
| 359 |
|
| 360 |
| Env | Etkisi |
|
| 361 |
|---|---|
|
| 362 |
| `NEUROBRIDGE_DISABLE_LLM=1` | OpenRouter çağrısı yapılmaz, deterministic template path'i her zaman cevap verir |
|
| 363 |
| `NEUROBRIDGE_DISABLE_MLFLOW=1` | MLflow lookup yapılmaz, provenance badge "—" gösterir, sistem çalışmaya devam eder |
|
| 364 |
| `BBB_MODEL_PATH=...` | Default `data/processed/bbb_model.joblib` yerine farklı yol |
|
| 365 |
+
| `MRI_MODEL_PATH=...` | Default `data/processed/mri_model.onnx` yerine dışarıda eğitilen ONNX artifact yolu |
|
| 366 |
+
| `OPENROUTER_API_KEY=...` | LLM explainer ve orchestrator agent'ı gerçek OpenRouter çağrılarıyla açar |
|
| 367 |
+
| `NEUROBRIDGE_AGENT_MODEL=...` | Agent'ın OpenRouter modelini override eder |
|
| 368 |
|
| 369 |
+
Docker image artık `NEUROBRIDGE_DISABLE_MLFLOW=1` değerini hard-code etmez; operatör ortamına göre açar/kapatır. LLM **default ON** — `Dockerfile` ve `Dockerfile.hf` `NEUROBRIDGE_DISABLE_LLM`'i hard-code etmiyor; deployed Space `OPENROUTER_API_KEY` Secret'ı varsa free-tier chain'i kullanır, yoksa template'e düşer. LLM'i jüri demosunda %100 deterministik istersen Space → Settings → Variables → `NEUROBRIDGE_DISABLE_LLM=1` ekle. LLM'in hangi state'te olduğunu canlı görmek için sidebar'daki "🔧 Diagnose LLM" butonu (`GET /diag/openrouter`'a vurur) key presence + chain head + 8-token probe döner.
|
| 370 |
|
| 371 |
### 8.4 Drift detection
|
| 372 |
|
|
|
|
| 425 |
| Boyut | Puan | Kanıt |
|
| 426 |
|---|---|---|
|
| 427 |
| **Problem Depth** | 9.5/10 | 3 zor real-world problem (BBB drug-discovery, EEG artifact, MRI multi-site domain shift); slayt 11'in "blood-brain barrier" örneğine doğrudan referans |
|
| 428 |
+
| **System Quality** | 9.7/10 | 242 passed / 2 skipped, TDD, 50+ atomik commit, FastAPI+Streamlit+MLflow+Docker, error mapping (400/404/422/503), lifeline gates |
|
| 429 |
| **Robustness** | 9.5/10 | Edge-case dropdown (5 probe), HTTP 400 → graceful warning, fallback chains everywhere |
|
| 430 |
| **Interaction** | 9.8/10 | 5 tab + edge-case probes + calibration caption + drift caption + AI Assistant chat (3 modalite × inline expander + standalone tab) |
|
| 431 |
+
| **Execution** | 9.8/10 | 8-day disciplined sprint + post-Day-8 hardening, atomic commits, AGENTS.md contract, README executive summary + demo recipe, all DoD checks green |
|
| 432 |
| **Creativity** | 9.7/10 | LLM hybrid (template fallback) + drift z-score + ComBat KDE faceted + "Living Decision System" framing + Track-1 multi-modal AI agents + Track-5 Experiments tab |
|
| 433 |
| **TOPLAM** | **58.0/60 (~96.8%)** | |
|
| 434 |
|
|
|
|
| 448 |
- ✅ Working Prototype (FastAPI + Streamlit + Docker, end-to-end functional)
|
| 449 |
- ✅ Interactive System (5 tab, real-time predictions, custom SMILES, AI Assistant chat)
|
| 450 |
- ✅ Explanation of Behavior (7-layer transparency stack)
|
| 451 |
+
- ✅ Tested Under Real Conditions (242 passed / 2 skipped + edge-case dropdown probes)
|
| 452 |
- ❌ No slides-only — gerçek çalışan sistem
|
| 453 |
- ❌ No perfect-data-only — edge-case dropdown bunun kanıtı
|
| 454 |
|
|
|
|
| 466 |
| Day 6 | Edge-case dropdown + calibration metadata + ComBat diagnostics endpoint + altair faceted KDE | 165 |
|
| 467 |
| Day 7 | Drift detection (deque + z-score) + MLflow provenance badge + LLM explainer (OpenRouter hybrid) + AI Assistant tab | 175 |
|
| 468 |
| Day 8 | Multi-modal explain (`/explain/{eeg,mri}`) + Experiments tab (MLflow runs + diff) + HF Spaces deploy (Dockerfile.hf + supervisord) + README pitch craft | 184 |
|
| 469 |
+
| Day 9 | Agent/RAG hardening + guarded orchestration + Docker startup guard + Windows-safe MLflow tests + MRI ONNX decision layer (`/predict/mri`) | 242 passed, 2 skipped |
|
| 470 |
|
| 471 |
Her gün için ayrı plan ve spec dosyası: `docs/superpowers/plans/` ve `docs/superpowers/specs/`.
|
| 472 |
|
|
|
|
| 532 |
- Drift deque per-worker olduğu için 4 worker = 4 bağımsız buffer (production'da Redis sentinel'a çekilir)
|
| 533 |
- ComBat batch (~500 subject/dakika single-thread, vectorize edilmiş)
|
| 534 |
|
| 535 |
+
### "Test sayısı 242 passed / 2 skipped ama nasıl?"
|
| 536 |
+
TDD disipliniyle her feature'a 2-4 test yazıldı. Pipeline'lar fixture-driven (synthetic NIfTI, sample SMILES CSV, sample EEG FIF). API testleri `fastapi.testclient.TestClient` üzerinden (no real network). LLM ve agent testleri env-gated; RAG testleri fixture knowledge base ile çalışır; MRI ONNX kontratı dummy ONNX artifact ile doğrulanır.
|
| 537 |
|
| 538 |
---
|
| 539 |
|
|
|
|
| 561 |
├── src/
|
| 562 |
│ ├── api/ # FastAPI app + routes + schemas
|
| 563 |
│ │ ├── main.py
|
| 564 |
+
│ │ ├── routes.py # pipeline, predict, explain, experiments, agent routers
|
| 565 |
+
│ │ └── schemas.py # Pydantic request/response contracts
|
| 566 |
│ ├── core/
|
| 567 |
+
│ │ ├── logger.py # Structured logging (no print())
|
| 568 |
+
│ │ ├── storage.py # Deterministic Parquet helpers
|
| 569 |
+
│ │ └── tracking.py # MLflow tracking context
|
| 570 |
│ ├── pipelines/
|
| 571 |
│ │ ├── bbb_pipeline.py # SMILES → Morgan FP → Parquet
|
| 572 |
│ │ ├── eeg_pipeline.py # FIF/EDF → ICA → epochs → features
|
| 573 |
│ │ └── mri_pipeline.py # NIfTI → ROI → ComBat → diagnostics
|
| 574 |
│ ├── models/
|
| 575 |
+
│ │ ├── bbb_model.py # RF train + SHAP + calibration + train_stats
|
| 576 |
+
│ │ └── mri_model.py # External ONNX MRI inference surface
|
| 577 |
│ ├── llm/
|
| 578 |
│ │ ├── __init__.py
|
| 579 |
│ │ └── explainer.py # OpenRouter + deterministic template fallback
|
| 580 |
+
│ ├── rag/
|
| 581 |
+
│ │ ├── ingest.py # KB → chunks + FAISS index
|
| 582 |
+
│ │ └── retrieve.py # Top-k retrieval API
|
| 583 |
+
│ ├── agents/
|
| 584 |
+
│ │ ├── orchestrator.py # OpenRouter function-calling + guarded workflow
|
| 585 |
+
│ │ ├── routing.py # Deterministic pipeline/query routing fallback
|
| 586 |
+
│ │ └── tools.py # Pipeline/RAG tool registry
|
| 587 |
│ └── frontend/
|
| 588 |
│ └── app.py # Streamlit 5-tab dashboard (editorial redesign)
|
| 589 |
+
├── tests/ # 242 passed, 2 skipped across core/api/pipelines/models/rag/agents
|
| 590 |
├── data/
|
| 591 |
│ ├── raw/ # Input data (gitignored)
|
| 592 |
+
│ ├── knowledge_base/ # User-supplied RAG docs (gitignored)
|
| 593 |
+
│ └── processed/ # Pipeline outputs + model artifacts + FAISS index
|
| 594 |
├── docs/
|
| 595 |
│ └── superpowers/
|
| 596 |
│ ├── plans/ # 8 day-by-day implementation plans
|
|
|
|
| 599 |
├── Dockerfile # Alias for HF (auto-discovery)
|
| 600 |
├── supervisord.conf # Two-process launcher
|
| 601 |
├── requirements.txt # Pinned deps (fastapi==0.115, sklearn==1.5.1, openai==1.51, ...)
|
| 602 |
+
├── AGENTS.md # Team contract
|
| 603 |
├── README.md # Public-facing overview + Demo Recipe
|
| 604 |
└── PROJECT_OVERVIEW.md # This file
|
| 605 |
```
|
|
|
|
| 712 |
|
| 713 |
## 17. Kapanış
|
| 714 |
|
| 715 |
+
NeuroBridge Enterprise hackathon'un sloganına ("**Stop Building Ideas. Start Building Systems.**") en doğrudan cevap. 8 günlük sprint sonrası agent/RAG hardening ve MRI ONNX decision layer ile sistemi daha ileri taşıdık. Public deploy'lu, jüri tarayıcıdan tıklayıp dokunabiliyor. 242 passed / 2 skipped, 96.8% jüri skoru projeksiyonu, 5/5 hackathon track strong, 4/4 Living Systems pillar full.
|
| 716 |
|
| 717 |
Şampiyonluğa oynuyoruz.
|
| 718 |
|
README.md
CHANGED
|
@@ -19,26 +19,27 @@ short_description: Living decision system for BBB, EEG, and MRI clinical ML
|
|
| 19 |
|
| 20 |
**1.** Multi-site clinical ML pipelines fail in production because they assume clean data, single-site distributions, and black-box trust — all of which break in real labs. NeuroBridge Enterprise is the *living decision system* that closes those three gaps end-to-end across BBB drug-screening, EEG signal-cleaning, and MRI multi-site harmonization.
|
| 21 |
|
| 22 |
-
**2.** Three production pipelines (RDKit + Morgan, MNE+ICA, neuroHarmonize ComBat) sit behind one FastAPI surface and one Streamlit dashboard, with a Random Forest BBB classifier
|
| 23 |
|
| 24 |
**3.** Robustness is demoed live: a curated edge-case dropdown probes invalid SMILES, OOD molecules, and boundary inputs — the system never crashes, always degrades gracefully (HTTP 400 → recoverable warning, low confidence + lower drift score, calibration caption hedge).
|
| 25 |
|
| 26 |
**4.** Adapt-Over-Time is built in: each FastAPI worker keeps a rolling 100-prediction window; the trailing median is z-scored against the train-time confidence distribution and surfaced both in the API response and the UI ("trailing-100 confidence median is +1.42σ from training distribution — mild distribution shift").
|
| 27 |
|
| 28 |
-
**5.**
|
| 29 |
|
| 30 |
## Status
|
| 31 |
|
| 32 |
| Day | Modality | Pipeline | Status |
|
| 33 |
|-----|----------|----------|--------|
|
| 34 |
-
| 1 | Tabular (BBB / molecules) | [`bbb_pipeline.py`](src/pipelines/bbb_pipeline.py) | Shipped
|
| 35 |
-
| 2 | Signal (EEG) | [`eeg_pipeline.py`](src/pipelines/eeg_pipeline.py) | Shipped
|
| 36 |
-
| 3 | Image (MRI / fMRI) | [`mri_pipeline.py`](src/pipelines/mri_pipeline.py) | Shipped
|
| 37 |
-
| 4 | API + MLOps + Frontend | FastAPI + MLflow + Streamlit + Docker | Shipped
|
| 38 |
-
| 5 | Decision Layer (Model + XAI + Interactive UI) | [`bbb_model.py`](src/models/bbb_model.py) — RandomForest + SHAP + `POST /predict/bbb` | Shipped
|
| 39 |
-
| 6 | Final Polish & Demo Features (Edge cases + Calibration + ComBat viz) | Calibration metadata + edge-case probes + `POST /pipeline/mri/diagnostics` | Shipped
|
| 40 |
-
| 7 | Final 5% (Drift, Traceability & Agents) | Per-worker drift z-score + MLflow provenance badge + `POST /explain/bbb` (LLM + template fallback) + AI Assistant tab | Shipped
|
| 41 |
-
|
|
|
|
|
| 42 |
|
| 43 |
## Quick Start
|
| 44 |
|
|
@@ -49,7 +50,7 @@ short_description: Living decision system for BBB, EEG, and MRI clinical ML
|
|
| 49 |
# 1. Create venv and install
|
| 50 |
python3.12 -m venv .venv312 && source .venv312/bin/activate && pip install -r requirements.txt
|
| 51 |
|
| 52 |
-
# 2. Verify —
|
| 53 |
pytest -v
|
| 54 |
|
| 55 |
# 3. Smoke run with the bundled 6-row fixture
|
|
@@ -99,6 +100,37 @@ curl -s -X POST http://localhost:8000/predict/bbb \
|
|
| 99 |
-d '{"smiles": "CCO", "top_k": 5}' | python3 -m json.tool
|
| 100 |
```
|
| 101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
### Run the full stack with Docker
|
| 103 |
|
| 104 |
```bash
|
|
@@ -112,6 +144,22 @@ Then browse to:
|
|
| 112 |
|
| 113 |
Live-demo robustness: if the MLflow service is unreachable, set `NEUROBRIDGE_DISABLE_MLFLOW=1` to make the pipelines run without tracking.
|
| 114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
## Repository Layout
|
| 116 |
|
| 117 |
```text
|
|
@@ -126,14 +174,21 @@ Live-demo robustness: if the MLflow service is unreachable, set `NEUROBRIDGE_DIS
|
|
| 126 |
│ └── processed/ # Parquet outputs from pipelines; gitignored
|
| 127 |
├── docs/superpowers/plans/ # Per-day implementation plans
|
| 128 |
├── src/
|
| 129 |
-
│ ├── core/
|
| 130 |
│ ├── pipelines/
|
| 131 |
│ │ ├── bbb_pipeline.py # Day-1 pipeline (4 public funcs + CLI entry)
|
| 132 |
│ │ ├── eeg_pipeline.py # Day-2 pipeline (6 public funcs + CLI entry)
|
| 133 |
│ │ └── mri_pipeline.py # Day-3 pipeline (5 public funcs + CLI entry)
|
| 134 |
-
│
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
└── tests/
|
| 136 |
-
├── core/, pipelines/
|
| 137 |
└── fixtures/ # bbbp_sample.csv, eeg_sample.fif, mri_sample/ + build_*.py
|
| 138 |
```
|
| 139 |
|
|
@@ -175,6 +230,23 @@ The pipeline is seeded (`random_state=97`) and produces byte-identical Parquet o
|
|
| 175 |
|
| 176 |
Output schema: one row per surviving subject with columns `subject_id, site, feat_roi{i}_<stat>` (8 ROIs × 6 stats = 48 features). All `feat_*` are float64 (preserved through the Parquet round-trip).
|
| 177 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
## Storage Format
|
| 179 |
|
| 180 |
Pipeline outputs are written as Parquet files using the `pyarrow` engine with snappy
|
|
@@ -186,16 +258,17 @@ for the `float64` EEG features Day 2 produces. See AGENTS.md §6.
|
|
| 186 |
|
| 187 |
All pipeline functions and the shared logger were built TDD-first across Days 1–3 (RED → GREEN →
|
| 188 |
REFACTOR). Each task ended in a green commit; review-and-fix loops landed as separate
|
| 189 |
-
commits with `fix:` / `refactor:` prefixes. Run `pytest -v` at any time
|
| 190 |
-
|
| 191 |
|
| 192 |
## Roadmap
|
| 193 |
|
| 194 |
- **Day 2 (shipped):** `eeg_pipeline.py` — bandpass + MNE ICA artifact removal + PSD + statistical features → Parquet.
|
| 195 |
-
- **Day 3 (shipped):** `mri_pipeline.py` — NIfTI volume loading, brain masking, ROI feature extraction, ComBat harmonization (`neuroHarmonize`) for site-level domain shift → Parquet
|
| 196 |
-
- **Day 4 (shipped):** FastAPI surface in `src/api/` (POST `/pipeline/{bbb,eeg,mri}` + `/health`), MLflow experiment tracking via `src.core.tracking` (see AGENTS.md §7), Streamlit dashboard at `src/frontend/app.py`, and Docker / `docker-compose.yml` for the api + MLflow stack
|
| 197 |
-
- **Day 5 (shipped):** Decision layer in `src/models/bbb_model.py` — RandomForest BBB classifier on Morgan fingerprints, SHAP top-k explanations, `POST /predict/bbb` endpoint, interactive Streamlit BBB tab with SMILES input + decision card + SHAP bar chart, and trainer CLI (`python -m src.models.bbb_model`). See AGENTS.md §8
|
| 198 |
-
- **Day 6 (shipped):** Final polish & demo features — calibration metadata bins on the BBB classifier (precision-at-confidence in `BBBPredictResponse.calibration`), edge-case dropdown in the Streamlit BBB tab (5 curated robustness probes), trust caption on the decision card, and `POST /pipeline/mri/diagnostics` returning Pre/Post ComBat long-format data + site-gap KPIs visualized as a faceted altair KDE in the MRI tab. See AGENTS.md §8 (calibration) + §9 (demo features)
|
|
|
|
| 199 |
|
| 200 |
## Where to Look
|
| 201 |
|
|
@@ -214,7 +287,8 @@ finishes in under 4 seconds on a 2024 laptop.
|
|
| 214 |
- **Container stack:** [`Dockerfile`](Dockerfile), [`docker-compose.yml`](docker-compose.yml)
|
| 215 |
- **Day-4 tests:** [`tests/api/`](tests/api/), [`tests/frontend/`](tests/frontend/), [`tests/pipelines/test_cross_pipeline_smoke.py`](tests/pipelines/test_cross_pipeline_smoke.py)
|
| 216 |
- **Day-5 plan (full TDD task breakdown):** [`docs/superpowers/plans/2026-05-03-day5-downstream-model-xai-interactive.md`](docs/superpowers/plans/2026-05-03-day5-downstream-model-xai-interactive.md)
|
| 217 |
-
- **BBB downstream model (classifier + SHAP explainer + trainer CLI):** [`src/models/bbb_model.py`](src/models/bbb_model.py) + [`tests/models/test_bbb_model.py`](tests/models/test_bbb_model.py)
|
|
|
|
| 218 |
- **Day-6 plan (full TDD task breakdown):** [`docs/superpowers/plans/2026-05-04-day6-final-polish-demo-features.md`](docs/superpowers/plans/2026-05-04-day6-final-polish-demo-features.md)
|
| 219 |
- **MRI ComBat diagnostics surface (pre/post site-gap KPIs):** `POST /pipeline/mri/diagnostics` — see [`src/api/routes.py`](src/api/routes.py) + [`src/pipelines/mri_pipeline.py`](src/pipelines/mri_pipeline.py)
|
| 220 |
- **Day-7 design spec:** [`docs/superpowers/specs/2026-05-05-day7-drift-traceability-agents-design.md`](docs/superpowers/specs/2026-05-05-day7-drift-traceability-agents-design.md)
|
|
@@ -225,10 +299,10 @@ finishes in under 4 seconds on a 2024 laptop.
|
|
| 225 |
- **New surfaces:** `POST /explain/eeg`, `POST /explain/mri`, `GET /experiments/runs`, `POST /experiments/diff`
|
| 226 |
- **New deploy artifacts:** `Dockerfile.hf`, `supervisord.conf`
|
| 227 |
- **LLM hardening (post-Day 8):** real OpenRouter LLM is now the default in deployed Spaces — `Dockerfile`/`Dockerfile.hf` no longer hard-code `NEUROBRIDGE_DISABLE_LLM=1`. Free-tier fallback chain (10 models, smartest → smallest) in [`src/llm/explainer.py`](src/llm/explainer.py), 401/400 status classification, and language-matching / intent-split prompt. Diagnostic endpoint `GET /diag/openrouter` ([`src/api/main.py`](src/api/main.py)) + Streamlit sidebar "🔧 Diagnose LLM" button. Live verification helper: [`scripts/diagnose_openrouter.py`](scripts/diagnose_openrouter.py).
|
| 228 |
-
- **Orchestrator agent (Task 13):** [`src/agents/orchestrator.py`](src/agents/orchestrator.py), [`src/agents/tools.py`](src/agents/tools.py), [`src/agents/prompts.py`](src/agents/prompts.py)
|
| 229 |
- **RAG layer:** [`src/rag/`](src/rag/) — chunker, embedder (fastembed), FAISS store, retriever, ingest CLI
|
| 230 |
- **Agent endpoint:** `POST /agent/run` (orchestrator + RAG); diagnostic at `GET /diag/agent`
|
| 231 |
-
- **Streamlit Agent tab:** "🤖 Agent" tab in [`src/frontend/app.py`](src/frontend/app.py) — input box + decision-trace expander
|
| 232 |
- **RAG knowledge base:** drop `.md`/`.pdf` into [`data/knowledge_base/`](data/knowledge_base/) — see its README
|
| 233 |
|
| 234 |
## Day 7 — Demo Recipe
|
|
|
|
| 19 |
|
| 20 |
**1.** Multi-site clinical ML pipelines fail in production because they assume clean data, single-site distributions, and black-box trust — all of which break in real labs. NeuroBridge Enterprise is the *living decision system* that closes those three gaps end-to-end across BBB drug-screening, EEG signal-cleaning, and MRI multi-site harmonization.
|
| 21 |
|
| 22 |
+
**2.** Three production pipelines (RDKit + Morgan, MNE+ICA, neuroHarmonize ComBat) sit behind one FastAPI surface and one Streamlit dashboard, with decision layers on top: a Random Forest BBB classifier today and an MRI image ONNX inference surface ready for an externally-trained volumetric deep-learning model. The agent surface can route a user request to exactly one pipeline tool, retrieve FAISS-backed context, and synthesize a cited answer.
|
| 23 |
|
| 24 |
**3.** Robustness is demoed live: a curated edge-case dropdown probes invalid SMILES, OOD molecules, and boundary inputs — the system never crashes, always degrades gracefully (HTTP 400 → recoverable warning, low confidence + lower drift score, calibration caption hedge).
|
| 25 |
|
| 26 |
**4.** Adapt-Over-Time is built in: each FastAPI worker keeps a rolling 100-prediction window; the trailing median is z-scored against the train-time confidence distribution and surfaced both in the API response and the UI ("trailing-100 confidence median is +1.42σ from training distribution — mild distribution shift").
|
| 27 |
|
| 28 |
+
**5.** Current verification: 242 passed, 2 skipped. Demo lifelines (`NEUROBRIDGE_DISABLE_MLFLOW=1`, `NEUROBRIDGE_DISABLE_LLM=1`, `BBB_MODEL_PATH`, `MRI_MODEL_PATH`) keep the system usable when MLflow, OpenRouter, or model artifacts are unavailable.
|
| 29 |
|
| 30 |
## Status
|
| 31 |
|
| 32 |
| Day | Modality | Pipeline | Status |
|
| 33 |
|-----|----------|----------|--------|
|
| 34 |
+
| 1 | Tabular (BBB / molecules) | [`bbb_pipeline.py`](src/pipelines/bbb_pipeline.py) | Shipped |
|
| 35 |
+
| 2 | Signal (EEG) | [`eeg_pipeline.py`](src/pipelines/eeg_pipeline.py) | Shipped |
|
| 36 |
+
| 3 | Image (MRI / fMRI) | [`mri_pipeline.py`](src/pipelines/mri_pipeline.py) | Shipped |
|
| 37 |
+
| 4 | API + MLOps + Frontend | FastAPI + MLflow + Streamlit + Docker | Shipped |
|
| 38 |
+
| 5 | Decision Layer (Model + XAI + Interactive UI) | [`bbb_model.py`](src/models/bbb_model.py) — RandomForest + SHAP + `POST /predict/bbb` | Shipped |
|
| 39 |
+
| 6 | Final Polish & Demo Features (Edge cases + Calibration + ComBat viz) | Calibration metadata + edge-case probes + `POST /pipeline/mri/diagnostics` | Shipped |
|
| 40 |
+
| 7 | Final 5% (Drift, Traceability & Agents) | Per-worker drift z-score + MLflow provenance badge + `POST /explain/bbb` (LLM + template fallback) + AI Assistant tab | Shipped |
|
| 41 |
+
| 8 | Grand Finale (Multi-Modal Agents, Track 5 & Public Deploy) | Multi-modal explainers + experiments + deploy surface | Shipped |
|
| 42 |
+
| 9 | Agent/RAG hardening + MRI DL decision layer | Guarded orchestration + `POST /predict/mri` ONNX surface | Shipped — 242 passed, 2 skipped |
|
| 43 |
|
| 44 |
## Quick Start
|
| 45 |
|
|
|
|
| 50 |
# 1. Create venv and install
|
| 51 |
python3.12 -m venv .venv312 && source .venv312/bin/activate && pip install -r requirements.txt
|
| 52 |
|
| 53 |
+
# 2. Verify — current full suite: 242 passed, 2 skipped
|
| 54 |
pytest -v
|
| 55 |
|
| 56 |
# 3. Smoke run with the bundled 6-row fixture
|
|
|
|
| 100 |
-d '{"smiles": "CCO", "top_k": 5}' | python3 -m json.tool
|
| 101 |
```
|
| 102 |
|
| 103 |
+
### Add the MRI image deep-learning model
|
| 104 |
+
|
| 105 |
+
MRI deep-learning training happens outside this repository. Export the trained
|
| 106 |
+
volumetric model to ONNX and place it at:
|
| 107 |
+
|
| 108 |
+
```text
|
| 109 |
+
data/processed/mri_model.onnx
|
| 110 |
+
```
|
| 111 |
+
|
| 112 |
+
The runtime contract is:
|
| 113 |
+
|
| 114 |
+
- Input file: one `.nii` / `.nii.gz` MRI volume.
|
| 115 |
+
- Preprocess: trilinear resize to `target_shape` (default `[64, 64, 64]`), z-score normalization over non-zero voxels, then tensor shape `[1, 1, D, H, W]`.
|
| 116 |
+
- ONNX output: one class vector `[1, C]`, either logits or probabilities.
|
| 117 |
+
- Override artifact path with `MRI_MODEL_PATH=/path/to/model.onnx`.
|
| 118 |
+
|
| 119 |
+
Try the endpoint after adding the artifact:
|
| 120 |
+
|
| 121 |
+
```bash
|
| 122 |
+
curl -s -X POST http://localhost:8000/predict/mri \
|
| 123 |
+
-H 'Content-Type: application/json' \
|
| 124 |
+
-d '{
|
| 125 |
+
"input_path": "tests/fixtures/mri_sample/subject_0.nii.gz",
|
| 126 |
+
"target_shape": [64, 64, 64],
|
| 127 |
+
"label_names": ["control", "abnormal"]
|
| 128 |
+
}' | python3 -m json.tool
|
| 129 |
+
```
|
| 130 |
+
|
| 131 |
+
If the ONNX artifact is missing, the endpoint returns HTTP 503 with a
|
| 132 |
+
remediation hint instead of crashing.
|
| 133 |
+
|
| 134 |
### Run the full stack with Docker
|
| 135 |
|
| 136 |
```bash
|
|
|
|
| 144 |
|
| 145 |
Live-demo robustness: if the MLflow service is unreachable, set `NEUROBRIDGE_DISABLE_MLFLOW=1` to make the pipelines run without tracking.
|
| 146 |
|
| 147 |
+
The container startup script also protects local demos with a mounted `./data`
|
| 148 |
+
directory: if the host volume is empty, it seeds fixture data, trains the BBB
|
| 149 |
+
model artifact, and builds the RAG FAISS index before launching the app.
|
| 150 |
+
|
| 151 |
+
## Runtime Configuration
|
| 152 |
+
|
| 153 |
+
| Variable | Purpose |
|
| 154 |
+
|---|---|
|
| 155 |
+
| `BBB_MODEL_PATH` | Override the BBB joblib artifact path (`data/processed/bbb_model.joblib`). |
|
| 156 |
+
| `MRI_MODEL_PATH` | Override the MRI ONNX artifact path (`data/processed/mri_model.onnx`). |
|
| 157 |
+
| `OPENROUTER_API_KEY` | Enables LLM explainer and orchestrator agent calls through OpenRouter. |
|
| 158 |
+
| `OPENROUTER_FREE_MODELS` | Optional comma-separated fallback chain for the explainer. |
|
| 159 |
+
| `NEUROBRIDGE_AGENT_MODEL` | OpenRouter model id for `/agent/run`. |
|
| 160 |
+
| `NEUROBRIDGE_DISABLE_LLM=1` | Forces deterministic template explanations. |
|
| 161 |
+
| `NEUROBRIDGE_DISABLE_MLFLOW=1` | Skips MLflow tracking/lookups when the tracking service is unavailable. |
|
| 162 |
+
|
| 163 |
## Repository Layout
|
| 164 |
|
| 165 |
```text
|
|
|
|
| 174 |
│ └── processed/ # Parquet outputs from pipelines; gitignored
|
| 175 |
├── docs/superpowers/plans/ # Per-day implementation plans
|
| 176 |
├── src/
|
| 177 |
+
│ ├── core/ # logger, deterministic storage, MLflow tracking
|
| 178 |
│ ├── pipelines/
|
| 179 |
│ │ ├── bbb_pipeline.py # Day-1 pipeline (4 public funcs + CLI entry)
|
| 180 |
│ │ ├── eeg_pipeline.py # Day-2 pipeline (6 public funcs + CLI entry)
|
| 181 |
│ │ └── mri_pipeline.py # Day-3 pipeline (5 public funcs + CLI entry)
|
| 182 |
+
│ ├── models/
|
| 183 |
+
│ │ ├── bbb_model.py # RandomForest BBB classifier + SHAP
|
| 184 |
+
│ │ └── mri_model.py # External ONNX MRI inference surface
|
| 185 |
+
│ ├── rag/ # fastembed + FAISS ingest/retrieve layer
|
| 186 |
+
│ ├── agents/ # OpenRouter orchestrator + guarded routing + tools
|
| 187 |
+
│ ├── llm/ # LLM/template explanation surface
|
| 188 |
+
│ ├── api/ # FastAPI routes + schemas
|
| 189 |
+
│ └── frontend/ # Streamlit dashboard
|
| 190 |
└── tests/
|
| 191 |
+
├── core/, pipelines/, models/, rag/, agents/
|
| 192 |
└── fixtures/ # bbbp_sample.csv, eeg_sample.fif, mri_sample/ + build_*.py
|
| 193 |
```
|
| 194 |
|
|
|
|
| 230 |
|
| 231 |
Output schema: one row per surviving subject with columns `subject_id, site, feat_roi{i}_<stat>` (8 ROIs × 6 stats = 48 features). All `feat_*` are float64 (preserved through the Parquet round-trip).
|
| 232 |
|
| 233 |
+
## MRI Image Model
|
| 234 |
+
|
| 235 |
+
`src/models/mri_model.py` is intentionally separate from `mri_pipeline.py`.
|
| 236 |
+
The pipeline remains the deterministic ComBat feature-preparation surface. The
|
| 237 |
+
image model is a decision layer for externally-trained volumetric DL models:
|
| 238 |
+
|
| 239 |
+
| Function | Purpose |
|
| 240 |
+
|---|---|
|
| 241 |
+
| `load(path)` | Loads an ONNX artifact with `onnxruntime` CPU execution. |
|
| 242 |
+
| `load_nifti_volume(path)` | Reads one `.nii` / `.nii.gz` volume as `float32`. |
|
| 243 |
+
| `preprocess_volume(volume, target_shape)` | Validates 3-D finite data, resizes, z-scores, returns `[1, 1, D, H, W]`. |
|
| 244 |
+
| `predict_nifti(model, input_path, target_shape, label_names)` | Runs preprocessing + ONNX inference and returns label, confidence, probabilities. |
|
| 245 |
+
|
| 246 |
+
Public API: `POST /predict/mri`. Streamlit exposes it in the Image tab under
|
| 247 |
+
"MRI Image Model". The trained artifact is not committed; put it in
|
| 248 |
+
`data/processed/mri_model.onnx` or set `MRI_MODEL_PATH`.
|
| 249 |
+
|
| 250 |
## Storage Format
|
| 251 |
|
| 252 |
Pipeline outputs are written as Parquet files using the `pyarrow` engine with snappy
|
|
|
|
| 258 |
|
| 259 |
All pipeline functions and the shared logger were built TDD-first across Days 1–3 (RED → GREEN →
|
| 260 |
REFACTOR). Each task ended in a green commit; review-and-fix loops landed as separate
|
| 261 |
+
commits with `fix:` / `refactor:` prefixes. Run `pytest -v` at any time. Current
|
| 262 |
+
verification on Windows/Python 3.11: `242 passed, 2 skipped`.
|
| 263 |
|
| 264 |
## Roadmap
|
| 265 |
|
| 266 |
- **Day 2 (shipped):** `eeg_pipeline.py` — bandpass + MNE ICA artifact removal + PSD + statistical features → Parquet.
|
| 267 |
+
- **Day 3 (shipped):** `mri_pipeline.py` — NIfTI volume loading, brain masking, ROI feature extraction, ComBat harmonization (`neuroHarmonize`) for site-level domain shift → Parquet.
|
| 268 |
+
- **Day 4 (shipped):** FastAPI surface in `src/api/` (POST `/pipeline/{bbb,eeg,mri}` + `/health`), MLflow experiment tracking via `src.core.tracking` (see AGENTS.md §7), Streamlit dashboard at `src/frontend/app.py`, and Docker / `docker-compose.yml` for the api + MLflow stack.
|
| 269 |
+
- **Day 5 (shipped):** Decision layer in `src/models/bbb_model.py` — RandomForest BBB classifier on Morgan fingerprints, SHAP top-k explanations, `POST /predict/bbb` endpoint, interactive Streamlit BBB tab with SMILES input + decision card + SHAP bar chart, and trainer CLI (`python -m src.models.bbb_model`). See AGENTS.md §8.
|
| 270 |
+
- **Day 6 (shipped):** Final polish & demo features — calibration metadata bins on the BBB classifier (precision-at-confidence in `BBBPredictResponse.calibration`), edge-case dropdown in the Streamlit BBB tab (5 curated robustness probes), trust caption on the decision card, and `POST /pipeline/mri/diagnostics` returning Pre/Post ComBat long-format data + site-gap KPIs visualized as a faceted altair KDE in the MRI tab. See AGENTS.md §8 (calibration) + §9 (demo features).
|
| 271 |
+
- **Post-Day-8 hardening (shipped):** Orchestrator workflow guard enforces pipeline → RAG → synthesis even when the LLM skips tool calls; Docker startup guard rebuilds missing demo artifacts behind a mounted `data/`; Windows-safe MLflow test URI; MRI ONNX image decision layer at `POST /predict/mri` — 242 passed, 2 skipped.
|
| 272 |
|
| 273 |
## Where to Look
|
| 274 |
|
|
|
|
| 287 |
- **Container stack:** [`Dockerfile`](Dockerfile), [`docker-compose.yml`](docker-compose.yml)
|
| 288 |
- **Day-4 tests:** [`tests/api/`](tests/api/), [`tests/frontend/`](tests/frontend/), [`tests/pipelines/test_cross_pipeline_smoke.py`](tests/pipelines/test_cross_pipeline_smoke.py)
|
| 289 |
- **Day-5 plan (full TDD task breakdown):** [`docs/superpowers/plans/2026-05-03-day5-downstream-model-xai-interactive.md`](docs/superpowers/plans/2026-05-03-day5-downstream-model-xai-interactive.md)
|
| 290 |
+
- **BBB downstream model (classifier + SHAP explainer + trainer CLI):** [`src/models/bbb_model.py`](src/models/bbb_model.py) + [`tests/models/test_bbb_model.py`](tests/models/test_bbb_model.py)
|
| 291 |
+
- **MRI image DL decision layer:** [`src/models/mri_model.py`](src/models/mri_model.py) + [`tests/models/test_mri_model.py`](tests/models/test_mri_model.py); `POST /predict/mri` consumes an externally-trained ONNX artifact at `data/processed/mri_model.onnx` (`MRI_MODEL_PATH` override).
|
| 292 |
- **Day-6 plan (full TDD task breakdown):** [`docs/superpowers/plans/2026-05-04-day6-final-polish-demo-features.md`](docs/superpowers/plans/2026-05-04-day6-final-polish-demo-features.md)
|
| 293 |
- **MRI ComBat diagnostics surface (pre/post site-gap KPIs):** `POST /pipeline/mri/diagnostics` — see [`src/api/routes.py`](src/api/routes.py) + [`src/pipelines/mri_pipeline.py`](src/pipelines/mri_pipeline.py)
|
| 294 |
- **Day-7 design spec:** [`docs/superpowers/specs/2026-05-05-day7-drift-traceability-agents-design.md`](docs/superpowers/specs/2026-05-05-day7-drift-traceability-agents-design.md)
|
|
|
|
| 299 |
- **New surfaces:** `POST /explain/eeg`, `POST /explain/mri`, `GET /experiments/runs`, `POST /experiments/diff`
|
| 300 |
- **New deploy artifacts:** `Dockerfile.hf`, `supervisord.conf`
|
| 301 |
- **LLM hardening (post-Day 8):** real OpenRouter LLM is now the default in deployed Spaces — `Dockerfile`/`Dockerfile.hf` no longer hard-code `NEUROBRIDGE_DISABLE_LLM=1`. Free-tier fallback chain (10 models, smartest → smallest) in [`src/llm/explainer.py`](src/llm/explainer.py), 401/400 status classification, and language-matching / intent-split prompt. Diagnostic endpoint `GET /diag/openrouter` ([`src/api/main.py`](src/api/main.py)) + Streamlit sidebar "🔧 Diagnose LLM" button. Live verification helper: [`scripts/diagnose_openrouter.py`](scripts/diagnose_openrouter.py).
|
| 302 |
+
- **Orchestrator agent (Task 13):** [`src/agents/orchestrator.py`](src/agents/orchestrator.py), [`src/agents/routing.py`](src/agents/routing.py), [`src/agents/tools.py`](src/agents/tools.py), [`src/agents/prompts.py`](src/agents/prompts.py). Guarded workflow enforces one pipeline tool, then `retrieve_context`, then final synthesis.
|
| 303 |
- **RAG layer:** [`src/rag/`](src/rag/) — chunker, embedder (fastembed), FAISS store, retriever, ingest CLI
|
| 304 |
- **Agent endpoint:** `POST /agent/run` (orchestrator + RAG); diagnostic at `GET /diag/agent`
|
| 305 |
+
- **Streamlit Agent tab:** "🤖 Agent" tab in [`src/frontend/app.py`](src/frontend/app.py) — input box + optional MRI `sites_csv` + decision-trace expander.
|
| 306 |
- **RAG knowledge base:** drop `.md`/`.pdf` into [`data/knowledge_base/`](data/knowledge_base/) — see its README
|
| 307 |
|
| 308 |
## Day 7 — Demo Recipe
|
conftest.py
CHANGED
|
@@ -17,7 +17,7 @@ import pytest
|
|
| 17 |
@pytest.fixture(autouse=True, scope="session")
|
| 18 |
def _isolate_mlflow_tracking_uri() -> Iterator[None]:
|
| 19 |
tmp_root = Path(tempfile.mkdtemp(prefix="mlflow_test_"))
|
| 20 |
-
os.environ["MLFLOW_TRACKING_URI"] =
|
| 21 |
yield
|
| 22 |
# Don't rmtree — pytest tmpdir cleanup or OS handles it; rmtree
|
| 23 |
# races with mlflow background writes on slow CI.
|
|
|
|
| 17 |
@pytest.fixture(autouse=True, scope="session")
|
| 18 |
def _isolate_mlflow_tracking_uri() -> Iterator[None]:
|
| 19 |
tmp_root = Path(tempfile.mkdtemp(prefix="mlflow_test_"))
|
| 20 |
+
os.environ["MLFLOW_TRACKING_URI"] = tmp_root.as_uri()
|
| 21 |
yield
|
| 22 |
# Don't rmtree — pytest tmpdir cleanup or OS handles it; rmtree
|
| 23 |
# races with mlflow background writes on slow CI.
|
data/knowledge_base/README.md
CHANGED
|
@@ -1,8 +1,10 @@
|
|
| 1 |
# RAG Knowledge Base
|
| 2 |
|
| 3 |
-
Drop reference documents here (`.md`, `.txt`, or `.pdf`). They
|
| 4 |
-
|
| 5 |
-
|
|
|
|
|
|
|
| 6 |
|
| 7 |
## Recommended seed set
|
| 8 |
|
|
|
|
| 1 |
# RAG Knowledge Base
|
| 2 |
|
| 3 |
+
Drop reference documents here (`.md`, `.txt`, or `.pdf`). They are ingested by
|
| 4 |
+
`python -m src.rag.ingest` at Docker build time and surfaced to the orchestrator
|
| 5 |
+
agent via the `retrieve_context` tool. The container entrypoint also rebuilds
|
| 6 |
+
the index at startup when a mounted `data/` volume does not already contain
|
| 7 |
+
`data/processed/faiss_index/`.
|
| 8 |
|
| 9 |
## Recommended seed set
|
| 10 |
|
docker-compose.yml
CHANGED
|
@@ -18,6 +18,9 @@ services:
|
|
| 18 |
- "8000:8000"
|
| 19 |
environment:
|
| 20 |
MLFLOW_TRACKING_URI: http://mlflow:5000
|
|
|
|
|
|
|
|
|
|
| 21 |
depends_on:
|
| 22 |
- mlflow
|
| 23 |
volumes:
|
|
|
|
| 18 |
- "8000:8000"
|
| 19 |
environment:
|
| 20 |
MLFLOW_TRACKING_URI: http://mlflow:5000
|
| 21 |
+
NEUROBRIDGE_DISABLE_MLFLOW: "0"
|
| 22 |
+
OPENROUTER_API_KEY: ${OPENROUTER_API_KEY:-}
|
| 23 |
+
NEUROBRIDGE_AGENT_MODEL: ${NEUROBRIDGE_AGENT_MODEL:-google/gemini-2.0-flash-exp:free}
|
| 24 |
depends_on:
|
| 25 |
- mlflow
|
| 26 |
volumes:
|
docker-entrypoint.sh
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/sh
|
| 2 |
+
set -eu
|
| 3 |
+
|
| 4 |
+
mkdir -p data/raw data/processed data/knowledge_base/seed
|
| 5 |
+
|
| 6 |
+
if [ -f tests/fixtures/bbbp_sample.csv ] && [ ! -f data/raw/bbbp.csv ]; then
|
| 7 |
+
cp tests/fixtures/bbbp_sample.csv data/raw/bbbp.csv
|
| 8 |
+
fi
|
| 9 |
+
|
| 10 |
+
if [ -f tests/fixtures/eeg_sample.fif ] && [ ! -f data/raw/eeg.fif ]; then
|
| 11 |
+
cp tests/fixtures/eeg_sample.fif data/raw/eeg.fif
|
| 12 |
+
fi
|
| 13 |
+
|
| 14 |
+
if [ -d tests/fixtures/kb_sample ] && [ ! -f data/knowledge_base/seed/lipinski_rule_of_five.md ]; then
|
| 15 |
+
cp tests/fixtures/kb_sample/* data/knowledge_base/seed/
|
| 16 |
+
fi
|
| 17 |
+
|
| 18 |
+
if [ ! -f data/processed/bbbp_features.parquet ]; then
|
| 19 |
+
NEUROBRIDGE_DISABLE_MLFLOW=1 python -m src.pipelines.bbb_pipeline
|
| 20 |
+
fi
|
| 21 |
+
|
| 22 |
+
if [ ! -f data/processed/bbb_model.joblib ]; then
|
| 23 |
+
python -m src.models.bbb_model
|
| 24 |
+
fi
|
| 25 |
+
|
| 26 |
+
if [ ! -f data/processed/faiss_index/index.bin ]; then
|
| 27 |
+
python -m src.rag.ingest data/knowledge_base data/processed/faiss_index
|
| 28 |
+
fi
|
| 29 |
+
|
| 30 |
+
exec "$@"
|
requirements.txt
CHANGED
|
@@ -31,6 +31,7 @@ mlflow==2.16.0
|
|
| 31 |
# --- Downstream ML / XAI (Day 5 decision layer) ---
|
| 32 |
shap==0.46.0
|
| 33 |
joblib==1.4.2
|
|
|
|
| 34 |
|
| 35 |
# --- Tooling / tests ---
|
| 36 |
pytest==8.3.3
|
|
@@ -47,3 +48,4 @@ streamlit==1.39.0
|
|
| 47 |
|
| 48 |
# --- LLM provider (Day 7 explainer) ---
|
| 49 |
openai==1.51.0 # OpenRouter SDK (Day-7 LLM explainer; deterministic-template fallback always available)
|
|
|
|
|
|
| 31 |
# --- Downstream ML / XAI (Day 5 decision layer) ---
|
| 32 |
shap==0.46.0
|
| 33 |
joblib==1.4.2
|
| 34 |
+
onnxruntime==1.19.2 # MRI volumetric ONNX inference (external DL artifact)
|
| 35 |
|
| 36 |
# --- Tooling / tests ---
|
| 37 |
pytest==8.3.3
|
|
|
|
| 48 |
|
| 49 |
# --- LLM provider (Day 7 explainer) ---
|
| 50 |
openai==1.51.0 # OpenRouter SDK (Day-7 LLM explainer; deterministic-template fallback always available)
|
| 51 |
+
python-dotenv==1.0.1 # Load OPENROUTER_API_KEY from local .env for API/agent demos
|
src/agents/orchestrator.py
CHANGED
|
@@ -10,6 +10,7 @@ Returns an `AgentResult` with synthesized text + full tool-call trace.
|
|
| 10 |
from __future__ import annotations
|
| 11 |
|
| 12 |
import json
|
|
|
|
| 13 |
from typing import Any
|
| 14 |
|
| 15 |
from src.agents.schemas import AgentResult, ToolTraceItem
|
|
@@ -19,6 +20,10 @@ from src.core.logger import get_logger
|
|
| 19 |
logger = get_logger(__name__)
|
| 20 |
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
class Orchestrator:
|
| 23 |
"""Single-agent function-calling loop. Stops on (a) text response, (b) max steps."""
|
| 24 |
|
|
@@ -30,16 +35,34 @@ class Orchestrator:
|
|
| 30 |
model: str,
|
| 31 |
max_steps: int = 5,
|
| 32 |
temperature: float = 0.0,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
) -> None:
|
| 34 |
self._client = llm_client
|
| 35 |
self._tools_by_name = {t.name: t for t in tools}
|
| 36 |
self._tool_schemas = [t.openai_schema() for t in tools]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
self._system_prompt = system_prompt
|
| 38 |
self._model = model
|
| 39 |
self._max_steps = max_steps
|
| 40 |
self._temperature = temperature
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
-
def run(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
messages: list[dict[str, Any]] = [
|
| 44 |
{"role": "system", "content": self._system_prompt},
|
| 45 |
{"role": "user", "content": user_input},
|
|
@@ -47,16 +70,33 @@ class Orchestrator:
|
|
| 47 |
trace: list[ToolTraceItem] = []
|
| 48 |
|
| 49 |
for _step in range(self._max_steps):
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
tools=self._tool_schemas,
|
| 54 |
-
tool_choice="auto",
|
| 55 |
-
temperature=self._temperature,
|
| 56 |
-
)
|
| 57 |
msg = response.choices[0].message
|
| 58 |
|
| 59 |
if not getattr(msg, "tool_calls", None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
return AgentResult(
|
| 61 |
text=(msg.content or "").strip(),
|
| 62 |
trace=trace,
|
|
@@ -64,13 +104,37 @@ class Orchestrator:
|
|
| 64 |
finish_reason="complete",
|
| 65 |
)
|
| 66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
messages.append({
|
| 68 |
"role": "assistant",
|
| 69 |
"content": msg.content,
|
| 70 |
-
"tool_calls": [tc.model_dump() for tc in
|
| 71 |
})
|
| 72 |
|
| 73 |
-
for tc in
|
| 74 |
name = tc.function.name
|
| 75 |
tool = self._tools_by_name.get(name)
|
| 76 |
if tool is None:
|
|
@@ -106,3 +170,146 @@ class Orchestrator:
|
|
| 106 |
model=self._model,
|
| 107 |
finish_reason="max_steps",
|
| 108 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
from __future__ import annotations
|
| 11 |
|
| 12 |
import json
|
| 13 |
+
from collections.abc import Callable
|
| 14 |
from typing import Any
|
| 15 |
|
| 16 |
from src.agents.schemas import AgentResult, ToolTraceItem
|
|
|
|
| 20 |
logger = get_logger(__name__)
|
| 21 |
|
| 22 |
|
| 23 |
+
WorkflowRouter = Callable[[str, dict[str, Any] | None], tuple[str, dict[str, Any]] | None]
|
| 24 |
+
WorkflowQueryBuilder = Callable[[str, ToolTraceItem, dict[str, Any] | None], str]
|
| 25 |
+
|
| 26 |
+
|
| 27 |
class Orchestrator:
|
| 28 |
"""Single-agent function-calling loop. Stops on (a) text response, (b) max steps."""
|
| 29 |
|
|
|
|
| 35 |
model: str,
|
| 36 |
max_steps: int = 5,
|
| 37 |
temperature: float = 0.0,
|
| 38 |
+
enforce_workflow: bool = False,
|
| 39 |
+
workflow_pipeline_tools: set[str] | None = None,
|
| 40 |
+
workflow_retrieval_tool: str | None = None,
|
| 41 |
+
workflow_router: WorkflowRouter | None = None,
|
| 42 |
+
workflow_query_builder: WorkflowQueryBuilder | None = None,
|
| 43 |
) -> None:
|
| 44 |
self._client = llm_client
|
| 45 |
self._tools_by_name = {t.name: t for t in tools}
|
| 46 |
self._tool_schemas = [t.openai_schema() for t in tools]
|
| 47 |
+
self._tool_schemas_by_name = {
|
| 48 |
+
t.name: t.openai_schema()
|
| 49 |
+
for t in tools
|
| 50 |
+
}
|
| 51 |
self._system_prompt = system_prompt
|
| 52 |
self._model = model
|
| 53 |
self._max_steps = max_steps
|
| 54 |
self._temperature = temperature
|
| 55 |
+
self._enforce_workflow = enforce_workflow
|
| 56 |
+
self._workflow_pipeline_tools = workflow_pipeline_tools or set()
|
| 57 |
+
self._workflow_retrieval_tool = workflow_retrieval_tool
|
| 58 |
+
self._workflow_router = workflow_router
|
| 59 |
+
self._workflow_query_builder = workflow_query_builder
|
| 60 |
|
| 61 |
+
def run(
|
| 62 |
+
self,
|
| 63 |
+
user_input: str,
|
| 64 |
+
context: dict[str, Any] | None = None,
|
| 65 |
+
) -> AgentResult:
|
| 66 |
messages: list[dict[str, Any]] = [
|
| 67 |
{"role": "system", "content": self._system_prompt},
|
| 68 |
{"role": "user", "content": user_input},
|
|
|
|
| 70 |
trace: list[ToolTraceItem] = []
|
| 71 |
|
| 72 |
for _step in range(self._max_steps):
|
| 73 |
+
stage = self._workflow_stage(trace)
|
| 74 |
+
request_kwargs = self._completion_kwargs(messages, stage)
|
| 75 |
+
response = self._client.chat.completions.create(**request_kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
msg = response.choices[0].message
|
| 77 |
|
| 78 |
if not getattr(msg, "tool_calls", None):
|
| 79 |
+
if self._enforce_workflow and stage == "pipeline":
|
| 80 |
+
if self._invoke_routed_pipeline(user_input, context, trace, messages):
|
| 81 |
+
continue
|
| 82 |
+
return AgentResult(
|
| 83 |
+
text=(
|
| 84 |
+
"Cannot identify modality. Provide a SMILES, .fif/.edf "
|
| 85 |
+
"path, or NIfTI directory."
|
| 86 |
+
),
|
| 87 |
+
trace=trace,
|
| 88 |
+
model=self._model,
|
| 89 |
+
finish_reason="error",
|
| 90 |
+
)
|
| 91 |
+
if self._enforce_workflow and stage == "retrieve":
|
| 92 |
+
if self._invoke_fallback_retrieval(user_input, context, trace, messages):
|
| 93 |
+
continue
|
| 94 |
+
return AgentResult(
|
| 95 |
+
text="Pipeline completed, but retrieval could not be executed.",
|
| 96 |
+
trace=trace,
|
| 97 |
+
model=self._model,
|
| 98 |
+
finish_reason="error",
|
| 99 |
+
)
|
| 100 |
return AgentResult(
|
| 101 |
text=(msg.content or "").strip(),
|
| 102 |
trace=trace,
|
|
|
|
| 104 |
finish_reason="complete",
|
| 105 |
)
|
| 106 |
|
| 107 |
+
selected_tool_calls = self._select_tool_calls(msg.tool_calls, stage)
|
| 108 |
+
if self._enforce_workflow and not selected_tool_calls:
|
| 109 |
+
if stage == "pipeline":
|
| 110 |
+
if self._invoke_routed_pipeline(user_input, context, trace, messages):
|
| 111 |
+
continue
|
| 112 |
+
return AgentResult(
|
| 113 |
+
text=(
|
| 114 |
+
"Cannot identify modality. Provide a SMILES, .fif/.edf "
|
| 115 |
+
"path, or NIfTI directory."
|
| 116 |
+
),
|
| 117 |
+
trace=trace,
|
| 118 |
+
model=self._model,
|
| 119 |
+
finish_reason="error",
|
| 120 |
+
)
|
| 121 |
+
if stage == "retrieve":
|
| 122 |
+
if self._invoke_fallback_retrieval(user_input, context, trace, messages):
|
| 123 |
+
continue
|
| 124 |
+
return AgentResult(
|
| 125 |
+
text="Pipeline completed, but retrieval could not be executed.",
|
| 126 |
+
trace=trace,
|
| 127 |
+
model=self._model,
|
| 128 |
+
finish_reason="error",
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
messages.append({
|
| 132 |
"role": "assistant",
|
| 133 |
"content": msg.content,
|
| 134 |
+
"tool_calls": [tc.model_dump() for tc in selected_tool_calls],
|
| 135 |
})
|
| 136 |
|
| 137 |
+
for tc in selected_tool_calls:
|
| 138 |
name = tc.function.name
|
| 139 |
tool = self._tools_by_name.get(name)
|
| 140 |
if tool is None:
|
|
|
|
| 170 |
model=self._model,
|
| 171 |
finish_reason="max_steps",
|
| 172 |
)
|
| 173 |
+
|
| 174 |
+
def _completion_kwargs(
|
| 175 |
+
self,
|
| 176 |
+
messages: list[dict[str, Any]],
|
| 177 |
+
stage: str,
|
| 178 |
+
) -> dict[str, Any]:
|
| 179 |
+
kwargs: dict[str, Any] = {
|
| 180 |
+
"model": self._model,
|
| 181 |
+
"messages": messages,
|
| 182 |
+
"temperature": self._temperature,
|
| 183 |
+
}
|
| 184 |
+
if not self._enforce_workflow:
|
| 185 |
+
kwargs["tools"] = self._tool_schemas
|
| 186 |
+
kwargs["tool_choice"] = "auto"
|
| 187 |
+
return kwargs
|
| 188 |
+
|
| 189 |
+
schemas = self._schemas_for_stage(stage)
|
| 190 |
+
if schemas:
|
| 191 |
+
kwargs["tools"] = schemas
|
| 192 |
+
kwargs["tool_choice"] = "auto"
|
| 193 |
+
return kwargs
|
| 194 |
+
|
| 195 |
+
def _schemas_for_stage(self, stage: str) -> list[dict[str, Any]]:
|
| 196 |
+
if stage == "pipeline":
|
| 197 |
+
return [
|
| 198 |
+
self._tool_schemas_by_name[name]
|
| 199 |
+
for name in sorted(self._workflow_pipeline_tools)
|
| 200 |
+
if name in self._tool_schemas_by_name
|
| 201 |
+
]
|
| 202 |
+
if stage == "retrieve" and self._workflow_retrieval_tool:
|
| 203 |
+
schema = self._tool_schemas_by_name.get(self._workflow_retrieval_tool)
|
| 204 |
+
return [schema] if schema else []
|
| 205 |
+
return []
|
| 206 |
+
|
| 207 |
+
def _workflow_stage(self, trace: list[ToolTraceItem]) -> str:
|
| 208 |
+
if not self._enforce_workflow:
|
| 209 |
+
return "open"
|
| 210 |
+
has_pipeline = any(
|
| 211 |
+
t.name in self._workflow_pipeline_tools and t.result is not None and t.error is None
|
| 212 |
+
for t in trace
|
| 213 |
+
)
|
| 214 |
+
if not has_pipeline:
|
| 215 |
+
return "pipeline"
|
| 216 |
+
has_retrieval = any(
|
| 217 |
+
t.name == self._workflow_retrieval_tool and t.result is not None and t.error is None
|
| 218 |
+
for t in trace
|
| 219 |
+
)
|
| 220 |
+
return "final" if has_retrieval else "retrieve"
|
| 221 |
+
|
| 222 |
+
def _select_tool_calls(self, tool_calls: list[Any], stage: str) -> list[Any]:
|
| 223 |
+
if not self._enforce_workflow:
|
| 224 |
+
return list(tool_calls)
|
| 225 |
+
if stage == "pipeline":
|
| 226 |
+
for tc in tool_calls:
|
| 227 |
+
if tc.function.name in self._workflow_pipeline_tools:
|
| 228 |
+
return [tc]
|
| 229 |
+
return []
|
| 230 |
+
if stage == "retrieve":
|
| 231 |
+
for tc in tool_calls:
|
| 232 |
+
if tc.function.name == self._workflow_retrieval_tool:
|
| 233 |
+
return [tc]
|
| 234 |
+
return []
|
| 235 |
+
return []
|
| 236 |
+
|
| 237 |
+
def _invoke_routed_pipeline(
|
| 238 |
+
self,
|
| 239 |
+
user_input: str,
|
| 240 |
+
context: dict[str, Any] | None,
|
| 241 |
+
trace: list[ToolTraceItem],
|
| 242 |
+
messages: list[dict[str, Any]],
|
| 243 |
+
) -> bool:
|
| 244 |
+
if self._workflow_router is None:
|
| 245 |
+
return False
|
| 246 |
+
routed = self._workflow_router(user_input, context)
|
| 247 |
+
if routed is None:
|
| 248 |
+
return False
|
| 249 |
+
name, args = routed
|
| 250 |
+
tool = self._tools_by_name.get(name)
|
| 251 |
+
if tool is None:
|
| 252 |
+
trace.append(ToolTraceItem(name=name, args=args, error=f"unknown tool: {name}"))
|
| 253 |
+
return False
|
| 254 |
+
try:
|
| 255 |
+
result = tool.invoke(args)
|
| 256 |
+
trace.append(ToolTraceItem(name=name, args=args, result=result))
|
| 257 |
+
messages.append({
|
| 258 |
+
"role": "user",
|
| 259 |
+
"content": (
|
| 260 |
+
"Workflow guard executed the required pipeline tool. "
|
| 261 |
+
f"Tool: {name}. Result: {json.dumps(result, default=str)}. "
|
| 262 |
+
"Now call retrieve_context with a focused scientific query."
|
| 263 |
+
),
|
| 264 |
+
})
|
| 265 |
+
return True
|
| 266 |
+
except Exception as e:
|
| 267 |
+
trace.append(ToolTraceItem(name=name, args=args, error=str(e)))
|
| 268 |
+
return False
|
| 269 |
+
|
| 270 |
+
def _invoke_fallback_retrieval(
|
| 271 |
+
self,
|
| 272 |
+
user_input: str,
|
| 273 |
+
context: dict[str, Any] | None,
|
| 274 |
+
trace: list[ToolTraceItem],
|
| 275 |
+
messages: list[dict[str, Any]],
|
| 276 |
+
) -> bool:
|
| 277 |
+
if self._workflow_retrieval_tool is None or self._workflow_query_builder is None:
|
| 278 |
+
return False
|
| 279 |
+
pipeline_trace = next(
|
| 280 |
+
(
|
| 281 |
+
t for t in trace
|
| 282 |
+
if t.name in self._workflow_pipeline_tools and t.result is not None and t.error is None
|
| 283 |
+
),
|
| 284 |
+
None,
|
| 285 |
+
)
|
| 286 |
+
if pipeline_trace is None:
|
| 287 |
+
return False
|
| 288 |
+
tool = self._tools_by_name.get(self._workflow_retrieval_tool)
|
| 289 |
+
if tool is None:
|
| 290 |
+
return False
|
| 291 |
+
query = self._workflow_query_builder(user_input, pipeline_trace, context)
|
| 292 |
+
args = {"query": query, "k": 4}
|
| 293 |
+
try:
|
| 294 |
+
result = tool.invoke(args)
|
| 295 |
+
trace.append(ToolTraceItem(
|
| 296 |
+
name=self._workflow_retrieval_tool,
|
| 297 |
+
args=args,
|
| 298 |
+
result=result,
|
| 299 |
+
))
|
| 300 |
+
messages.append({
|
| 301 |
+
"role": "user",
|
| 302 |
+
"content": (
|
| 303 |
+
"Workflow guard executed retrieve_context. "
|
| 304 |
+
f"Result: {json.dumps(result, default=str)}. "
|
| 305 |
+
"Now synthesize the final answer in the user's language."
|
| 306 |
+
),
|
| 307 |
+
})
|
| 308 |
+
return True
|
| 309 |
+
except Exception as e:
|
| 310 |
+
trace.append(ToolTraceItem(
|
| 311 |
+
name=self._workflow_retrieval_tool,
|
| 312 |
+
args=args,
|
| 313 |
+
error=str(e),
|
| 314 |
+
))
|
| 315 |
+
return False
|
src/agents/prompts.py
CHANGED
|
@@ -20,6 +20,7 @@ Workflow — follow exactly:
|
|
| 20 |
- SMILES (short, all-letters/digits, no slashes, no .ext) → run_bbb_pipeline
|
| 21 |
- Path ending in .fif or .edf → run_eeg_pipeline
|
| 22 |
- Path that is a directory (no file extension at the tail) → run_mri_pipeline
|
|
|
|
| 23 |
If ambiguous, prefer SMILES if it parses; otherwise return:
|
| 24 |
"Cannot identify modality. Provide a SMILES, .fif/.edf path, or NIfTI directory."
|
| 25 |
|
|
|
|
| 20 |
- SMILES (short, all-letters/digits, no slashes, no .ext) → run_bbb_pipeline
|
| 21 |
- Path ending in .fif or .edf → run_eeg_pipeline
|
| 22 |
- Path that is a directory (no file extension at the tail) → run_mri_pipeline
|
| 23 |
+
Use sites_csv="<input_dir>/sites.csv" unless the user explicitly gives another CSV.
|
| 24 |
If ambiguous, prefer SMILES if it parses; otherwise return:
|
| 25 |
"Cannot identify modality. Provide a SMILES, .fif/.edf path, or NIfTI directory."
|
| 26 |
|
src/agents/routing.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Deterministic fallbacks for the orchestrator workflow.
|
| 2 |
+
|
| 3 |
+
The LLM remains responsible for normal function-calling, but these helpers
|
| 4 |
+
keep the public agent route reliable when a model skips or mis-shapes a tool
|
| 5 |
+
call during a live demo.
|
| 6 |
+
"""
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
from typing import Any
|
| 11 |
+
|
| 12 |
+
from src.agents.schemas import ToolTraceItem
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
_EEG_SUFFIXES = {".fif", ".edf"}
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def route_pipeline_input(
|
| 19 |
+
user_input: str,
|
| 20 |
+
context: dict[str, Any] | None = None,
|
| 21 |
+
) -> tuple[str, dict[str, Any]] | None:
|
| 22 |
+
"""Map raw user input to exactly one pipeline tool and argument dict."""
|
| 23 |
+
text = _primary_input(user_input)
|
| 24 |
+
if not text:
|
| 25 |
+
return None
|
| 26 |
+
|
| 27 |
+
path = Path(text)
|
| 28 |
+
lower = text.lower()
|
| 29 |
+
if path.suffix.lower() in _EEG_SUFFIXES:
|
| 30 |
+
return "run_eeg_pipeline", {"input_path": text}
|
| 31 |
+
|
| 32 |
+
if _looks_like_mri_input(path, lower):
|
| 33 |
+
input_dir = path.parent if lower.endswith(".nii.gz") or path.suffix.lower() == ".nii" else path
|
| 34 |
+
sites_csv = _sites_csv_for(input_dir, context)
|
| 35 |
+
return "run_mri_pipeline", {
|
| 36 |
+
"input_dir": str(input_dir),
|
| 37 |
+
"sites_csv": sites_csv,
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
if _looks_like_path(text):
|
| 41 |
+
return None
|
| 42 |
+
|
| 43 |
+
return "run_bbb_pipeline", {"smiles": text, "top_k": 5}
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def build_retrieval_query(
|
| 47 |
+
user_input: str,
|
| 48 |
+
pipeline_trace: ToolTraceItem,
|
| 49 |
+
context: dict[str, Any] | None = None,
|
| 50 |
+
) -> str:
|
| 51 |
+
"""Build the canonical scientific RAG query for a completed pipeline tool."""
|
| 52 |
+
if pipeline_trace.name == "run_eeg_pipeline":
|
| 53 |
+
return "ICA artifact removal in multi-channel EEG"
|
| 54 |
+
if pipeline_trace.name == "run_mri_pipeline":
|
| 55 |
+
return "ComBat scanner site harmonization in multi-center MRI"
|
| 56 |
+
return "BBB permeability of small lipophilic molecules"
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def _primary_input(user_input: str) -> str:
|
| 60 |
+
"""Return the first non-empty input line, excluding appended user questions."""
|
| 61 |
+
before_question = user_input.split("\n\nUser question:", 1)[0]
|
| 62 |
+
return before_question.strip().strip("\"'")
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def _looks_like_mri_input(path: Path, lower: str) -> bool:
|
| 66 |
+
if lower.endswith(".nii.gz") or path.suffix.lower() == ".nii":
|
| 67 |
+
return True
|
| 68 |
+
if path.exists() and path.is_dir():
|
| 69 |
+
return True
|
| 70 |
+
return not path.suffix and _looks_like_path(str(path))
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def _looks_like_path(text: str) -> bool:
|
| 74 |
+
return "/" in text or "\\" in text
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def _sites_csv_for(input_dir: Path, context: dict[str, Any] | None) -> str:
|
| 78 |
+
explicit = (context or {}).get("sites_csv")
|
| 79 |
+
if explicit:
|
| 80 |
+
return str(explicit)
|
| 81 |
+
return str(input_dir / "sites.csv")
|
src/agents/schemas.py
CHANGED
|
@@ -28,7 +28,10 @@ class EEGPipelineInput(BaseModel):
|
|
| 28 |
class MRIPipelineInput(BaseModel):
|
| 29 |
"""Input for `run_mri_pipeline` — directory of NIfTI files + sites CSV."""
|
| 30 |
input_dir: str = Field(..., description="Directory containing .nii.gz volumes")
|
| 31 |
-
sites_csv: str
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
|
| 34 |
class RetrieveContextInput(BaseModel):
|
|
|
|
| 28 |
class MRIPipelineInput(BaseModel):
|
| 29 |
"""Input for `run_mri_pipeline` — directory of NIfTI files + sites CSV."""
|
| 30 |
input_dir: str = Field(..., description="Directory containing .nii.gz volumes")
|
| 31 |
+
sites_csv: str | None = Field(
|
| 32 |
+
None,
|
| 33 |
+
description="CSV mapping subject_id → site; defaults to <input_dir>/sites.csv",
|
| 34 |
+
)
|
| 35 |
|
| 36 |
|
| 37 |
class RetrieveContextInput(BaseModel):
|
src/agents/tools.py
CHANGED
|
@@ -130,11 +130,12 @@ def _make_mri_executor(processed_dir: Path) -> Callable[[MRIPipelineInput], MRIP
|
|
| 130 |
from src.api import routes as api_routes
|
| 131 |
from fastapi import HTTPException
|
| 132 |
out_path = processed_dir / "mri_features.parquet"
|
|
|
|
| 133 |
try:
|
| 134 |
response = api_routes.run_mri(
|
| 135 |
MRIRequest(
|
| 136 |
input_dir=inp.input_dir,
|
| 137 |
-
sites_csv=
|
| 138 |
output_path=str(out_path),
|
| 139 |
)
|
| 140 |
)
|
|
|
|
| 130 |
from src.api import routes as api_routes
|
| 131 |
from fastapi import HTTPException
|
| 132 |
out_path = processed_dir / "mri_features.parquet"
|
| 133 |
+
sites_csv = inp.sites_csv or str(Path(inp.input_dir) / "sites.csv")
|
| 134 |
try:
|
| 135 |
response = api_routes.run_mri(
|
| 136 |
MRIRequest(
|
| 137 |
input_dir=inp.input_dir,
|
| 138 |
+
sites_csv=sites_csv,
|
| 139 |
output_path=str(out_path),
|
| 140 |
)
|
| 141 |
)
|
src/api/routes.py
CHANGED
|
@@ -37,8 +37,11 @@ from src.api.schemas import (
|
|
| 37 |
ModelProvenance,
|
| 38 |
MRIDiagnosticsRequest,
|
| 39 |
MRIDiagnosticsResponse,
|
|
|
|
| 40 |
MRIExplainRequest,
|
| 41 |
MRIExplainResponse,
|
|
|
|
|
|
|
| 42 |
MRIRequest,
|
| 43 |
PipelineResponse,
|
| 44 |
RunDiffRequest,
|
|
@@ -47,7 +50,7 @@ from src.api.schemas import (
|
|
| 47 |
)
|
| 48 |
from src.core.logger import get_logger
|
| 49 |
from src.llm import explainer as llm_explainer
|
| 50 |
-
from src.models import bbb_model
|
| 51 |
from src.pipelines import bbb_pipeline, eeg_pipeline, mri_pipeline
|
| 52 |
|
| 53 |
logger = get_logger(__name__)
|
|
@@ -75,12 +78,7 @@ def _wrap(
|
|
| 75 |
duration_sec = time.perf_counter() - started
|
| 76 |
|
| 77 |
df = pd.read_parquet(output_path)
|
| 78 |
-
|
| 79 |
-
experiment_names=[experiment_name],
|
| 80 |
-
max_results=1,
|
| 81 |
-
order_by=["start_time DESC"],
|
| 82 |
-
)
|
| 83 |
-
run_id = runs.iloc[0]["run_id"] if len(runs) else None
|
| 84 |
|
| 85 |
return PipelineResponse(
|
| 86 |
status="ok",
|
|
@@ -92,6 +90,22 @@ def _wrap(
|
|
| 92 |
)
|
| 93 |
|
| 94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
@router.post("/bbb", response_model=PipelineResponse)
|
| 96 |
def run_bbb(req: BBBRequest) -> PipelineResponse:
|
| 97 |
"""Run the BBB pipeline; return rows/cols/duration + the MLflow run id."""
|
|
@@ -142,6 +156,7 @@ def run_mri(req: MRIRequest) -> PipelineResponse:
|
|
| 142 |
# Default artifact location. Overridable via BBB_MODEL_PATH env var so tests
|
| 143 |
# can point at a tmp-built model without touching production paths.
|
| 144 |
_DEFAULT_BBB_MODEL_PATH = Path("data/processed/bbb_model.joblib")
|
|
|
|
| 145 |
|
| 146 |
|
| 147 |
def _bbb_model_path() -> Path:
|
|
@@ -149,6 +164,11 @@ def _bbb_model_path() -> Path:
|
|
| 149 |
return Path(os.environ.get("BBB_MODEL_PATH", str(_DEFAULT_BBB_MODEL_PATH)))
|
| 150 |
|
| 151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
# Per-worker rolling window of recent prediction confidences.
|
| 153 |
# Cleared on worker restart; multi-worker setups have independent windows.
|
| 154 |
WORKER_CONFIDENCE_DEQUE: deque[float] = deque(maxlen=100)
|
|
@@ -295,6 +315,45 @@ def predict_bbb(req: BBBPredictRequest) -> BBBPredictResponse:
|
|
| 295 |
)
|
| 296 |
|
| 297 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
@router.post("/mri/diagnostics", response_model=MRIDiagnosticsResponse)
|
| 299 |
def mri_diagnostics(req: MRIDiagnosticsRequest) -> MRIDiagnosticsResponse:
|
| 300 |
"""Run the MRI pipeline twice and return pre/post ComBat data + site-gap KPIs."""
|
|
@@ -521,6 +580,7 @@ def _build_orchestrator():
|
|
| 521 |
|
| 522 |
from src.agents.orchestrator import Orchestrator
|
| 523 |
from src.agents.prompts import ORCHESTRATOR_SYSTEM_PROMPT
|
|
|
|
| 524 |
from src.agents.tools import build_default_tools
|
| 525 |
|
| 526 |
api_key = os.environ.get("OPENROUTER_API_KEY")
|
|
@@ -543,6 +603,15 @@ def _build_orchestrator():
|
|
| 543 |
system_prompt=ORCHESTRATOR_SYSTEM_PROMPT,
|
| 544 |
model=model,
|
| 545 |
max_steps=5,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 546 |
)
|
| 547 |
|
| 548 |
|
|
@@ -553,7 +622,7 @@ def run_agent(req: AgentRunRequest) -> AgentRunResponse:
|
|
| 553 |
user_text = req.user_input
|
| 554 |
if req.user_question:
|
| 555 |
user_text = f"{req.user_input}\n\nUser question: {req.user_question}"
|
| 556 |
-
result = orch.run(user_text)
|
| 557 |
return AgentRunResponse(
|
| 558 |
text=result.text,
|
| 559 |
trace=[
|
|
|
|
| 37 |
ModelProvenance,
|
| 38 |
MRIDiagnosticsRequest,
|
| 39 |
MRIDiagnosticsResponse,
|
| 40 |
+
MRIClassProbability,
|
| 41 |
MRIExplainRequest,
|
| 42 |
MRIExplainResponse,
|
| 43 |
+
MRIPredictRequest,
|
| 44 |
+
MRIPredictResponse,
|
| 45 |
MRIRequest,
|
| 46 |
PipelineResponse,
|
| 47 |
RunDiffRequest,
|
|
|
|
| 50 |
)
|
| 51 |
from src.core.logger import get_logger
|
| 52 |
from src.llm import explainer as llm_explainer
|
| 53 |
+
from src.models import bbb_model, mri_model
|
| 54 |
from src.pipelines import bbb_pipeline, eeg_pipeline, mri_pipeline
|
| 55 |
|
| 56 |
logger = get_logger(__name__)
|
|
|
|
| 78 |
duration_sec = time.perf_counter() - started
|
| 79 |
|
| 80 |
df = pd.read_parquet(output_path)
|
| 81 |
+
run_id = _latest_mlflow_run_id(experiment_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
|
| 83 |
return PipelineResponse(
|
| 84 |
status="ok",
|
|
|
|
| 90 |
)
|
| 91 |
|
| 92 |
|
| 93 |
+
def _latest_mlflow_run_id(experiment_name: str) -> str | None:
|
| 94 |
+
"""Return the newest MLflow run id, degrading to None when tracking is off."""
|
| 95 |
+
if os.environ.get("NEUROBRIDGE_DISABLE_MLFLOW") == "1":
|
| 96 |
+
return None
|
| 97 |
+
try:
|
| 98 |
+
runs = mlflow.search_runs(
|
| 99 |
+
experiment_names=[experiment_name],
|
| 100 |
+
max_results=1,
|
| 101 |
+
order_by=["start_time DESC"],
|
| 102 |
+
)
|
| 103 |
+
except Exception as e:
|
| 104 |
+
logger.warning("MLflow run lookup failed for %s: %s", experiment_name, e)
|
| 105 |
+
return None
|
| 106 |
+
return str(runs.iloc[0]["run_id"]) if len(runs) else None
|
| 107 |
+
|
| 108 |
+
|
| 109 |
@router.post("/bbb", response_model=PipelineResponse)
|
| 110 |
def run_bbb(req: BBBRequest) -> PipelineResponse:
|
| 111 |
"""Run the BBB pipeline; return rows/cols/duration + the MLflow run id."""
|
|
|
|
| 156 |
# Default artifact location. Overridable via BBB_MODEL_PATH env var so tests
|
| 157 |
# can point at a tmp-built model without touching production paths.
|
| 158 |
_DEFAULT_BBB_MODEL_PATH = Path("data/processed/bbb_model.joblib")
|
| 159 |
+
_DEFAULT_MRI_MODEL_PATH = Path("data/processed/mri_model.onnx")
|
| 160 |
|
| 161 |
|
| 162 |
def _bbb_model_path() -> Path:
|
|
|
|
| 164 |
return Path(os.environ.get("BBB_MODEL_PATH", str(_DEFAULT_BBB_MODEL_PATH)))
|
| 165 |
|
| 166 |
|
| 167 |
+
def _mri_model_path() -> Path:
|
| 168 |
+
"""Return the MRI ONNX model artifact path, overridable via MRI_MODEL_PATH."""
|
| 169 |
+
return Path(os.environ.get("MRI_MODEL_PATH", str(_DEFAULT_MRI_MODEL_PATH)))
|
| 170 |
+
|
| 171 |
+
|
| 172 |
# Per-worker rolling window of recent prediction confidences.
|
| 173 |
# Cleared on worker restart; multi-worker setups have independent windows.
|
| 174 |
WORKER_CONFIDENCE_DEQUE: deque[float] = deque(maxlen=100)
|
|
|
|
| 315 |
)
|
| 316 |
|
| 317 |
|
| 318 |
+
@predict_router.post("/mri", response_model=MRIPredictResponse)
|
| 319 |
+
def predict_mri(req: MRIPredictRequest) -> MRIPredictResponse:
|
| 320 |
+
"""Predict from one MRI NIfTI image using an externally-trained ONNX model."""
|
| 321 |
+
artifact = _mri_model_path()
|
| 322 |
+
if not artifact.exists():
|
| 323 |
+
raise HTTPException(
|
| 324 |
+
status_code=503,
|
| 325 |
+
detail=(
|
| 326 |
+
f"MRI model artifact not available at {artifact}. "
|
| 327 |
+
"Export the trained volumetric model to ONNX and place it there, "
|
| 328 |
+
"or set MRI_MODEL_PATH."
|
| 329 |
+
),
|
| 330 |
+
)
|
| 331 |
+
try:
|
| 332 |
+
model = mri_model.load(artifact)
|
| 333 |
+
pred = mri_model.predict_nifti(
|
| 334 |
+
model,
|
| 335 |
+
Path(req.input_path),
|
| 336 |
+
target_shape=req.target_shape,
|
| 337 |
+
label_names=req.label_names,
|
| 338 |
+
)
|
| 339 |
+
except FileNotFoundError as e:
|
| 340 |
+
raise HTTPException(status_code=404, detail=str(e))
|
| 341 |
+
except ValueError as e:
|
| 342 |
+
raise HTTPException(status_code=400, detail=str(e))
|
| 343 |
+
|
| 344 |
+
return MRIPredictResponse(
|
| 345 |
+
label=int(pred["label"]),
|
| 346 |
+
label_text=str(pred["label_text"]),
|
| 347 |
+
confidence=float(pred["confidence"]),
|
| 348 |
+
probabilities=[
|
| 349 |
+
MRIClassProbability(**p)
|
| 350 |
+
for p in pred["probabilities"]
|
| 351 |
+
],
|
| 352 |
+
input_path=req.input_path,
|
| 353 |
+
model_path=str(artifact),
|
| 354 |
+
)
|
| 355 |
+
|
| 356 |
+
|
| 357 |
@router.post("/mri/diagnostics", response_model=MRIDiagnosticsResponse)
|
| 358 |
def mri_diagnostics(req: MRIDiagnosticsRequest) -> MRIDiagnosticsResponse:
|
| 359 |
"""Run the MRI pipeline twice and return pre/post ComBat data + site-gap KPIs."""
|
|
|
|
| 580 |
|
| 581 |
from src.agents.orchestrator import Orchestrator
|
| 582 |
from src.agents.prompts import ORCHESTRATOR_SYSTEM_PROMPT
|
| 583 |
+
from src.agents.routing import build_retrieval_query, route_pipeline_input
|
| 584 |
from src.agents.tools import build_default_tools
|
| 585 |
|
| 586 |
api_key = os.environ.get("OPENROUTER_API_KEY")
|
|
|
|
| 603 |
system_prompt=ORCHESTRATOR_SYSTEM_PROMPT,
|
| 604 |
model=model,
|
| 605 |
max_steps=5,
|
| 606 |
+
enforce_workflow=True,
|
| 607 |
+
workflow_pipeline_tools={
|
| 608 |
+
"run_bbb_pipeline",
|
| 609 |
+
"run_eeg_pipeline",
|
| 610 |
+
"run_mri_pipeline",
|
| 611 |
+
},
|
| 612 |
+
workflow_retrieval_tool="retrieve_context",
|
| 613 |
+
workflow_router=route_pipeline_input,
|
| 614 |
+
workflow_query_builder=build_retrieval_query,
|
| 615 |
)
|
| 616 |
|
| 617 |
|
|
|
|
| 622 |
user_text = req.user_input
|
| 623 |
if req.user_question:
|
| 624 |
user_text = f"{req.user_input}\n\nUser question: {req.user_question}"
|
| 625 |
+
result = orch.run(user_text, context={"sites_csv": req.sites_csv})
|
| 626 |
return AgentRunResponse(
|
| 627 |
text=result.text,
|
| 628 |
trace=[
|
src/api/schemas.py
CHANGED
|
@@ -113,6 +113,38 @@ class BBBPredictResponse(BaseModel):
|
|
| 113 |
)
|
| 114 |
|
| 115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
class MRIDiagnosticsRequest(BaseModel):
|
| 117 |
"""Request body for /pipeline/mri/diagnostics — same as MRIRequest minus output_path."""
|
| 118 |
input_dir: str = Field(..., description="Directory of .nii.gz files")
|
|
@@ -238,6 +270,10 @@ class AgentRunRequest(BaseModel):
|
|
| 238 |
user_question: str | None = Field(
|
| 239 |
None, description="Optional natural-language question to language-match the response"
|
| 240 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
|
| 242 |
|
| 243 |
class AgentToolTraceItem(BaseModel):
|
|
|
|
| 113 |
)
|
| 114 |
|
| 115 |
|
| 116 |
+
class MRIPredictRequest(BaseModel):
|
| 117 |
+
"""Single-subject MRI image prediction request."""
|
| 118 |
+
input_path: str = Field(..., description="Path to one .nii or .nii.gz MRI volume")
|
| 119 |
+
target_shape: tuple[int, int, int] = Field(
|
| 120 |
+
(64, 64, 64),
|
| 121 |
+
description="Model preprocessing resize target as (D, H, W)",
|
| 122 |
+
)
|
| 123 |
+
label_names: list[str] | None = Field(
|
| 124 |
+
None,
|
| 125 |
+
description="Optional class labels matching ONNX output order",
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
class MRIClassProbability(BaseModel):
|
| 130 |
+
"""One MRI model class probability."""
|
| 131 |
+
label: int
|
| 132 |
+
label_text: str
|
| 133 |
+
probability: float
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
class MRIPredictResponse(BaseModel):
|
| 137 |
+
"""MRI DL decision payload from a volumetric ONNX model."""
|
| 138 |
+
model_config = ConfigDict(protected_namespaces=())
|
| 139 |
+
|
| 140 |
+
label: int
|
| 141 |
+
label_text: str
|
| 142 |
+
confidence: float
|
| 143 |
+
probabilities: list[MRIClassProbability]
|
| 144 |
+
input_path: str
|
| 145 |
+
model_path: str
|
| 146 |
+
|
| 147 |
+
|
| 148 |
class MRIDiagnosticsRequest(BaseModel):
|
| 149 |
"""Request body for /pipeline/mri/diagnostics — same as MRIRequest minus output_path."""
|
| 150 |
input_dir: str = Field(..., description="Directory of .nii.gz files")
|
|
|
|
| 270 |
user_question: str | None = Field(
|
| 271 |
None, description="Optional natural-language question to language-match the response"
|
| 272 |
)
|
| 273 |
+
sites_csv: str | None = Field(
|
| 274 |
+
None,
|
| 275 |
+
description="Optional MRI sites CSV. Defaults to <user_input>/sites.csv for directory inputs.",
|
| 276 |
+
)
|
| 277 |
|
| 278 |
|
| 279 |
class AgentToolTraceItem(BaseModel):
|
src/frontend/app.py
CHANGED
|
@@ -1318,6 +1318,48 @@ def _render_mri_tab() -> None:
|
|
| 1318 |
except httpx.RequestError as e:
|
| 1319 |
st.error(f"Cannot reach FastAPI at {_API_URL}: {e!r}")
|
| 1320 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1321 |
|
| 1322 |
def _render_prediction_card(result: dict) -> None:
|
| 1323 |
"""Editorial decision card: provenance · verdict · signals · SHAP."""
|
|
@@ -1790,6 +1832,11 @@ def main() -> None:
|
|
| 1790 |
value="",
|
| 1791 |
help="Ask in any language — the agent will mirror it in the response",
|
| 1792 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1793 |
submitted = st.form_submit_button("Run agent")
|
| 1794 |
|
| 1795 |
if submitted and agent_input:
|
|
@@ -1798,6 +1845,8 @@ def main() -> None:
|
|
| 1798 |
payload: dict = {"user_input": agent_input}
|
| 1799 |
if agent_question:
|
| 1800 |
payload["user_question"] = agent_question
|
|
|
|
|
|
|
| 1801 |
response = _post("/agent/run", payload, timeout=120.0)
|
| 1802 |
except Exception as e:
|
| 1803 |
st.error(f"Agent run failed: {e}")
|
|
|
|
| 1318 |
except httpx.RequestError as e:
|
| 1319 |
st.error(f"Cannot reach FastAPI at {_API_URL}: {e!r}")
|
| 1320 |
|
| 1321 |
+
st.markdown("#### MRI Image Model")
|
| 1322 |
+
mri_image = st.text_input(
|
| 1323 |
+
"NIfTI image",
|
| 1324 |
+
"tests/fixtures/mri_sample/subject_0.nii.gz",
|
| 1325 |
+
key="mri_predict_image",
|
| 1326 |
+
)
|
| 1327 |
+
mri_labels = st.text_input(
|
| 1328 |
+
"Class labels",
|
| 1329 |
+
"control,abnormal",
|
| 1330 |
+
key="mri_predict_labels",
|
| 1331 |
+
)
|
| 1332 |
+
if st.button("Predict MRI image", key="mri_predict"):
|
| 1333 |
+
labels = [x.strip() for x in mri_labels.split(",") if x.strip()]
|
| 1334 |
+
payload: dict = {
|
| 1335 |
+
"input_path": mri_image,
|
| 1336 |
+
"target_shape": [64, 64, 64],
|
| 1337 |
+
}
|
| 1338 |
+
if labels:
|
| 1339 |
+
payload["label_names"] = labels
|
| 1340 |
+
with st.spinner("Running MRI image model..."):
|
| 1341 |
+
try:
|
| 1342 |
+
result = _post("/predict/mri", payload, timeout=120.0)
|
| 1343 |
+
except httpx.HTTPStatusError as e:
|
| 1344 |
+
detail = e.response.text
|
| 1345 |
+
if e.response.status_code == 503:
|
| 1346 |
+
st.warning(
|
| 1347 |
+
"MRI model artifact is not available yet. Export the trained "
|
| 1348 |
+
"ONNX model to `data/processed/mri_model.onnx` or set `MRI_MODEL_PATH`."
|
| 1349 |
+
)
|
| 1350 |
+
else:
|
| 1351 |
+
st.error(f"MRI prediction failed (HTTP {e.response.status_code}): {detail}")
|
| 1352 |
+
except httpx.RequestError as e:
|
| 1353 |
+
st.error(f"Cannot reach FastAPI at {_API_URL}: {e!r}")
|
| 1354 |
+
else:
|
| 1355 |
+
st.metric(
|
| 1356 |
+
label=result.get("label_text", "prediction"),
|
| 1357 |
+
value=f"{float(result.get('confidence', 0.0)) * 100:.1f}%",
|
| 1358 |
+
)
|
| 1359 |
+
probs = result.get("probabilities", [])
|
| 1360 |
+
if probs:
|
| 1361 |
+
st.dataframe(probs, use_container_width=True, hide_index=True)
|
| 1362 |
+
|
| 1363 |
|
| 1364 |
def _render_prediction_card(result: dict) -> None:
|
| 1365 |
"""Editorial decision card: provenance · verdict · signals · SHAP."""
|
|
|
|
| 1832 |
value="",
|
| 1833 |
help="Ask in any language — the agent will mirror it in the response",
|
| 1834 |
)
|
| 1835 |
+
agent_sites_csv = st.text_input(
|
| 1836 |
+
"MRI sites CSV (optional)",
|
| 1837 |
+
value="",
|
| 1838 |
+
help="Defaults to <MRI input directory>/sites.csv",
|
| 1839 |
+
)
|
| 1840 |
submitted = st.form_submit_button("Run agent")
|
| 1841 |
|
| 1842 |
if submitted and agent_input:
|
|
|
|
| 1845 |
payload: dict = {"user_input": agent_input}
|
| 1846 |
if agent_question:
|
| 1847 |
payload["user_question"] = agent_question
|
| 1848 |
+
if agent_sites_csv:
|
| 1849 |
+
payload["sites_csv"] = agent_sites_csv
|
| 1850 |
response = _post("/agent/run", payload, timeout=120.0)
|
| 1851 |
except Exception as e:
|
| 1852 |
st.error(f"Agent run failed: {e}")
|
src/models/mri_model.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""MRI image deep-learning inference utilities.
|
| 2 |
+
|
| 3 |
+
This module is the decision-layer bridge for an externally-trained volumetric
|
| 4 |
+
MRI model. The training code can live outside this repo; production only needs
|
| 5 |
+
an ONNX artifact plus the preprocessing contract below.
|
| 6 |
+
"""
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
from typing import Any, Sequence
|
| 11 |
+
|
| 12 |
+
import nibabel as nib
|
| 13 |
+
import numpy as np
|
| 14 |
+
from scipy import ndimage as scipy_ndimage
|
| 15 |
+
|
| 16 |
+
from src.core.logger import get_logger
|
| 17 |
+
from src.pipelines.mri_pipeline import is_valid_volume
|
| 18 |
+
|
| 19 |
+
logger = get_logger(__name__)
|
| 20 |
+
|
| 21 |
+
DEFAULT_MODEL_PATH = Path("data/processed/mri_model.onnx")
|
| 22 |
+
DEFAULT_TARGET_SHAPE: tuple[int, int, int] = (64, 64, 64)
|
| 23 |
+
DEFAULT_LABEL_NAMES: tuple[str, ...] = ("class_0", "class_1")
|
| 24 |
+
_MIN_STD = 1e-6
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def load(path: Path) -> Any:
|
| 28 |
+
"""Load an ONNX MRI model artifact.
|
| 29 |
+
|
| 30 |
+
Args:
|
| 31 |
+
path: Path to an externally-trained `.onnx` artifact.
|
| 32 |
+
|
| 33 |
+
Returns:
|
| 34 |
+
An `onnxruntime.InferenceSession`.
|
| 35 |
+
|
| 36 |
+
Raises:
|
| 37 |
+
FileNotFoundError: if the artifact does not exist.
|
| 38 |
+
"""
|
| 39 |
+
path = Path(path)
|
| 40 |
+
if not path.exists():
|
| 41 |
+
raise FileNotFoundError(f"MRI model artifact not found: {path}")
|
| 42 |
+
import onnxruntime as ort
|
| 43 |
+
|
| 44 |
+
return ort.InferenceSession(str(path), providers=["CPUExecutionProvider"])
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def load_nifti_volume(path: Path) -> np.ndarray:
|
| 48 |
+
"""Read a NIfTI volume from disk as float32."""
|
| 49 |
+
path = Path(path)
|
| 50 |
+
if not path.exists():
|
| 51 |
+
raise FileNotFoundError(f"MRI input not found: {path}")
|
| 52 |
+
img = nib.load(str(path))
|
| 53 |
+
return np.asarray(img.get_fdata(dtype=np.float32), dtype=np.float32)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def preprocess_volume(
|
| 57 |
+
volume: np.ndarray,
|
| 58 |
+
target_shape: tuple[int, int, int] = DEFAULT_TARGET_SHAPE,
|
| 59 |
+
) -> np.ndarray:
|
| 60 |
+
"""Convert a 3-D MRI volume into model input `[1, 1, D, H, W]`.
|
| 61 |
+
|
| 62 |
+
The external trainer must use the same contract: trilinear resize to
|
| 63 |
+
`target_shape`, z-score over non-zero voxels when present, then add batch
|
| 64 |
+
and channel dimensions.
|
| 65 |
+
"""
|
| 66 |
+
if not is_valid_volume(volume):
|
| 67 |
+
raise ValueError("MRI volume must be a finite numeric 3-D array")
|
| 68 |
+
if len(target_shape) != 3 or any(int(x) <= 0 for x in target_shape):
|
| 69 |
+
raise ValueError(f"target_shape must contain three positive integers: {target_shape}")
|
| 70 |
+
|
| 71 |
+
resized = _resize_volume(np.asarray(volume, dtype=np.float32), target_shape)
|
| 72 |
+
normalized = _zscore_volume(resized)
|
| 73 |
+
return normalized[np.newaxis, np.newaxis, :, :, :].astype(np.float32, copy=False)
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def preprocess_nifti(
|
| 77 |
+
input_path: Path,
|
| 78 |
+
target_shape: tuple[int, int, int] = DEFAULT_TARGET_SHAPE,
|
| 79 |
+
) -> np.ndarray:
|
| 80 |
+
"""Read and preprocess one NIfTI file for ONNX inference."""
|
| 81 |
+
return preprocess_volume(load_nifti_volume(input_path), target_shape=target_shape)
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def predict_with_proba(
|
| 85 |
+
model: Any,
|
| 86 |
+
model_input: np.ndarray,
|
| 87 |
+
label_names: Sequence[str] | None = None,
|
| 88 |
+
) -> dict[str, object]:
|
| 89 |
+
"""Run an ONNX model and return label, confidence, and per-class probabilities."""
|
| 90 |
+
labels = tuple(label_names or DEFAULT_LABEL_NAMES)
|
| 91 |
+
if model_input.ndim != 5:
|
| 92 |
+
raise ValueError(f"model_input must have shape [1, 1, D, H, W], got {model_input.shape}")
|
| 93 |
+
|
| 94 |
+
input_name = model.get_inputs()[0].name
|
| 95 |
+
output = model.run(None, {input_name: model_input.astype(np.float32, copy=False)})[0]
|
| 96 |
+
proba = _as_probabilities(np.asarray(output, dtype=np.float32))
|
| 97 |
+
if len(labels) != proba.shape[0]:
|
| 98 |
+
labels = tuple(f"class_{i}" for i in range(proba.shape[0]))
|
| 99 |
+
|
| 100 |
+
label_idx = int(np.argmax(proba))
|
| 101 |
+
return {
|
| 102 |
+
"label": label_idx,
|
| 103 |
+
"label_text": labels[label_idx],
|
| 104 |
+
"confidence": float(proba[label_idx]),
|
| 105 |
+
"probabilities": [
|
| 106 |
+
{"label": i, "label_text": labels[i], "probability": float(p)}
|
| 107 |
+
for i, p in enumerate(proba)
|
| 108 |
+
],
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def predict_nifti(
|
| 113 |
+
model: Any,
|
| 114 |
+
input_path: Path,
|
| 115 |
+
target_shape: tuple[int, int, int] = DEFAULT_TARGET_SHAPE,
|
| 116 |
+
label_names: Sequence[str] | None = None,
|
| 117 |
+
) -> dict[str, object]:
|
| 118 |
+
"""Preprocess one NIfTI image and run MRI model inference."""
|
| 119 |
+
model_input = preprocess_nifti(input_path, target_shape=target_shape)
|
| 120 |
+
return predict_with_proba(model, model_input, label_names=label_names)
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def _resize_volume(volume: np.ndarray, target_shape: tuple[int, int, int]) -> np.ndarray:
|
| 124 |
+
zoom = tuple(t / s for t, s in zip(target_shape, volume.shape, strict=True))
|
| 125 |
+
return scipy_ndimage.zoom(volume, zoom=zoom, order=1).astype(np.float32, copy=False)
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def _zscore_volume(volume: np.ndarray) -> np.ndarray:
|
| 129 |
+
mask = volume != 0
|
| 130 |
+
ref = volume[mask] if np.any(mask) else volume.reshape(-1)
|
| 131 |
+
mean = float(ref.mean())
|
| 132 |
+
std = float(ref.std())
|
| 133 |
+
if std < _MIN_STD:
|
| 134 |
+
return np.zeros_like(volume, dtype=np.float32)
|
| 135 |
+
return ((volume - mean) / std).astype(np.float32, copy=False)
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def _as_probabilities(raw_output: np.ndarray) -> np.ndarray:
|
| 139 |
+
logits = np.squeeze(raw_output)
|
| 140 |
+
if logits.ndim != 1:
|
| 141 |
+
raise ValueError(f"MRI model output must be one class vector, got shape {raw_output.shape}")
|
| 142 |
+
if logits.size < 2:
|
| 143 |
+
raise ValueError("MRI model output must contain at least two class scores")
|
| 144 |
+
|
| 145 |
+
if np.all(logits >= 0.0) and np.all(logits <= 1.0) and np.isclose(logits.sum(), 1.0, atol=1e-4):
|
| 146 |
+
return logits.astype(np.float32, copy=False)
|
| 147 |
+
shifted = logits - np.max(logits)
|
| 148 |
+
exp = np.exp(shifted)
|
| 149 |
+
return (exp / exp.sum()).astype(np.float32, copy=False)
|
tests/agents/test_agent_route.py
CHANGED
|
@@ -19,7 +19,7 @@ class _FakeOrchestrator:
|
|
| 19 |
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
| 20 |
pass
|
| 21 |
|
| 22 |
-
def run(self, user_input: str) -> AgentResult:
|
| 23 |
return AgentResult(
|
| 24 |
text=f"Synthesized answer for: {user_input}",
|
| 25 |
trace=[
|
|
|
|
| 19 |
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
| 20 |
pass
|
| 21 |
|
| 22 |
+
def run(self, user_input: str, context: dict[str, Any] | None = None) -> AgentResult:
|
| 23 |
return AgentResult(
|
| 24 |
text=f"Synthesized answer for: {user_input}",
|
| 25 |
trace=[
|
tests/agents/test_orchestrator.py
CHANGED
|
@@ -67,6 +67,45 @@ def _make_ping_tool() -> Tool:
|
|
| 67 |
)
|
| 68 |
|
| 69 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
# --- Tests ------------------------------------------------------------------
|
| 71 |
|
| 72 |
|
|
@@ -159,3 +198,34 @@ class TestOrchestrator:
|
|
| 159 |
result = orch.run("trivial input")
|
| 160 |
assert result.text == "Direct answer."
|
| 161 |
assert result.trace == []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
)
|
| 68 |
|
| 69 |
|
| 70 |
+
class _BBBInput(BaseModel):
|
| 71 |
+
smiles: str
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
class _BBBOutput(BaseModel):
|
| 75 |
+
label_text: str
|
| 76 |
+
confidence: float
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
class _RetrieveInput(BaseModel):
|
| 80 |
+
query: str
|
| 81 |
+
k: int = 4
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
class _RetrieveOutput(BaseModel):
|
| 85 |
+
chunks: list[dict[str, Any]]
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def _make_workflow_tools() -> list[Tool]:
|
| 89 |
+
return [
|
| 90 |
+
Tool(
|
| 91 |
+
name="run_bbb_pipeline",
|
| 92 |
+
description="Run BBB.",
|
| 93 |
+
input_model=_BBBInput,
|
| 94 |
+
output_model=_BBBOutput,
|
| 95 |
+
execute=lambda inp: _BBBOutput(label_text="permeable", confidence=0.82),
|
| 96 |
+
),
|
| 97 |
+
Tool(
|
| 98 |
+
name="retrieve_context",
|
| 99 |
+
description="Retrieve context.",
|
| 100 |
+
input_model=_RetrieveInput,
|
| 101 |
+
output_model=_RetrieveOutput,
|
| 102 |
+
execute=lambda inp: _RetrieveOutput(
|
| 103 |
+
chunks=[{"source": "lipinski.md", "text": "BBB context"}]
|
| 104 |
+
),
|
| 105 |
+
),
|
| 106 |
+
]
|
| 107 |
+
|
| 108 |
+
|
| 109 |
# --- Tests ------------------------------------------------------------------
|
| 110 |
|
| 111 |
|
|
|
|
| 198 |
result = orch.run("trivial input")
|
| 199 |
assert result.text == "Direct answer."
|
| 200 |
assert result.trace == []
|
| 201 |
+
|
| 202 |
+
def test_enforced_workflow_falls_back_when_model_skips_tool_calls(self) -> None:
|
| 203 |
+
client = MagicMock()
|
| 204 |
+
client.chat.completions.create.side_effect = [
|
| 205 |
+
_fake_choice_with_text("I will answer directly."),
|
| 206 |
+
_fake_choice_with_text("Still no retrieval."),
|
| 207 |
+
_fake_choice_with_text("Grounded final answer."),
|
| 208 |
+
]
|
| 209 |
+
orch = Orchestrator(
|
| 210 |
+
llm_client=client,
|
| 211 |
+
tools=_make_workflow_tools(),
|
| 212 |
+
system_prompt="sys",
|
| 213 |
+
model="stub-model",
|
| 214 |
+
max_steps=5,
|
| 215 |
+
enforce_workflow=True,
|
| 216 |
+
workflow_pipeline_tools={"run_bbb_pipeline"},
|
| 217 |
+
workflow_retrieval_tool="retrieve_context",
|
| 218 |
+
workflow_router=lambda user_input, context: (
|
| 219 |
+
"run_bbb_pipeline",
|
| 220 |
+
{"smiles": user_input},
|
| 221 |
+
),
|
| 222 |
+
workflow_query_builder=lambda user_input, pipeline_trace, context: (
|
| 223 |
+
"BBB permeability of small lipophilic molecules"
|
| 224 |
+
),
|
| 225 |
+
)
|
| 226 |
+
result = orch.run("CCO")
|
| 227 |
+
assert result.finish_reason == "complete"
|
| 228 |
+
assert result.text == "Grounded final answer."
|
| 229 |
+
assert [t.name for t in result.trace] == ["run_bbb_pipeline", "retrieve_context"]
|
| 230 |
+
assert result.trace[0].result == {"label_text": "permeable", "confidence": 0.82}
|
| 231 |
+
assert result.trace[1].args["query"] == "BBB permeability of small lipophilic molecules"
|
tests/agents/test_tools.py
CHANGED
|
@@ -2,6 +2,8 @@
|
|
| 2 |
from __future__ import annotations
|
| 3 |
|
| 4 |
from pathlib import Path
|
|
|
|
|
|
|
| 5 |
|
| 6 |
import pytest
|
| 7 |
from pydantic import BaseModel
|
|
@@ -91,6 +93,7 @@ class TestBuildDefaultTools:
|
|
| 91 |
assert "input_path" in EEGPipelineInput.model_fields
|
| 92 |
assert "input_dir" in MRIPipelineInput.model_fields
|
| 93 |
assert "sites_csv" in MRIPipelineInput.model_fields
|
|
|
|
| 94 |
assert "query" in RetrieveContextInput.model_fields
|
| 95 |
assert "k" in RetrieveContextInput.model_fields
|
| 96 |
|
|
@@ -116,7 +119,6 @@ class TestBuildDefaultTools:
|
|
| 116 |
assert len(tools) == 4
|
| 117 |
|
| 118 |
def test_bbb_executor_translates_httpexception_to_valueerror(self) -> None:
|
| 119 |
-
from unittest.mock import patch
|
| 120 |
from fastapi import HTTPException
|
| 121 |
|
| 122 |
tools = build_default_tools(rag_index_dir=None)
|
|
@@ -126,3 +128,25 @@ class TestBuildDefaultTools:
|
|
| 126 |
side_effect=HTTPException(status_code=503, detail="model missing")):
|
| 127 |
with pytest.raises(ValueError, match="bbb tool failed"):
|
| 128 |
bbb.invoke({"smiles": "CCO"})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
from __future__ import annotations
|
| 3 |
|
| 4 |
from pathlib import Path
|
| 5 |
+
from types import SimpleNamespace
|
| 6 |
+
from unittest.mock import patch
|
| 7 |
|
| 8 |
import pytest
|
| 9 |
from pydantic import BaseModel
|
|
|
|
| 93 |
assert "input_path" in EEGPipelineInput.model_fields
|
| 94 |
assert "input_dir" in MRIPipelineInput.model_fields
|
| 95 |
assert "sites_csv" in MRIPipelineInput.model_fields
|
| 96 |
+
assert "sites_csv" not in MRIPipelineInput.model_json_schema().get("required", [])
|
| 97 |
assert "query" in RetrieveContextInput.model_fields
|
| 98 |
assert "k" in RetrieveContextInput.model_fields
|
| 99 |
|
|
|
|
| 119 |
assert len(tools) == 4
|
| 120 |
|
| 121 |
def test_bbb_executor_translates_httpexception_to_valueerror(self) -> None:
|
|
|
|
| 122 |
from fastapi import HTTPException
|
| 123 |
|
| 124 |
tools = build_default_tools(rag_index_dir=None)
|
|
|
|
| 128 |
side_effect=HTTPException(status_code=503, detail="model missing")):
|
| 129 |
with pytest.raises(ValueError, match="bbb tool failed"):
|
| 130 |
bbb.invoke({"smiles": "CCO"})
|
| 131 |
+
|
| 132 |
+
def test_mri_executor_defaults_sites_csv_to_input_dir_sites_csv(self, tmp_path: Path) -> None:
|
| 133 |
+
tools = build_default_tools(rag_index_dir=None, processed_dir=tmp_path / "processed")
|
| 134 |
+
mri = next(t for t in tools if t.name == "run_mri_pipeline")
|
| 135 |
+
input_dir = tmp_path / "mri"
|
| 136 |
+
input_dir.mkdir()
|
| 137 |
+
|
| 138 |
+
with patch(
|
| 139 |
+
"src.api.routes.run_mri",
|
| 140 |
+
return_value=SimpleNamespace(
|
| 141 |
+
output_path=str(tmp_path / "processed" / "mri_features.parquet"),
|
| 142 |
+
rows=2,
|
| 143 |
+
columns=3,
|
| 144 |
+
duration_sec=0.1,
|
| 145 |
+
),
|
| 146 |
+
) as run_mri:
|
| 147 |
+
out = mri.invoke({"input_dir": str(input_dir)})
|
| 148 |
+
|
| 149 |
+
assert out["rows"] == 2
|
| 150 |
+
req = run_mri.call_args.args[0]
|
| 151 |
+
assert req.input_dir == str(input_dir)
|
| 152 |
+
assert req.sites_csv == str(input_dir / "sites.csv")
|
tests/api/test_routes.py
CHANGED
|
@@ -2,7 +2,9 @@
|
|
| 2 |
from __future__ import annotations
|
| 3 |
|
| 4 |
from pathlib import Path
|
|
|
|
| 5 |
|
|
|
|
| 6 |
import pytest
|
| 7 |
from fastapi.testclient import TestClient
|
| 8 |
|
|
@@ -73,6 +75,22 @@ class TestMRIRoute:
|
|
| 73 |
assert resp.json()["rows"] > 0
|
| 74 |
|
| 75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
class TestBBBPredictRoute:
|
| 77 |
def _setup_model_artifact(self, tmp_path: Path) -> Path:
|
| 78 |
"""Build features + train + save a tiny model. Returns artifact path."""
|
|
@@ -198,6 +216,56 @@ class TestBBBPredictRoute:
|
|
| 198 |
assert resp.status_code == 503
|
| 199 |
|
| 200 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
class TestMRIDiagnosticsRoute:
|
| 202 |
def test_returns_200_with_pre_and_post_data(self, tmp_path: Path):
|
| 203 |
from tests.fixtures.build_mri_fixture import build as build_mri
|
|
|
|
| 2 |
from __future__ import annotations
|
| 3 |
|
| 4 |
from pathlib import Path
|
| 5 |
+
from unittest.mock import patch
|
| 6 |
|
| 7 |
+
import pandas as pd
|
| 8 |
import pytest
|
| 9 |
from fastapi.testclient import TestClient
|
| 10 |
|
|
|
|
| 75 |
assert resp.json()["rows"] > 0
|
| 76 |
|
| 77 |
|
| 78 |
+
class TestPipelineWrap:
|
| 79 |
+
def test_wrap_skips_mlflow_lookup_when_disabled(self, tmp_path: Path, monkeypatch):
|
| 80 |
+
from src.api import routes
|
| 81 |
+
|
| 82 |
+
out = tmp_path / "out.parquet"
|
| 83 |
+
pd.DataFrame({"x": [1]}).to_parquet(out)
|
| 84 |
+
monkeypatch.setenv("NEUROBRIDGE_DISABLE_MLFLOW", "1")
|
| 85 |
+
|
| 86 |
+
with patch("src.api.routes.mlflow.search_runs") as search_runs:
|
| 87 |
+
resp = routes._wrap("bbb_pipeline", out, lambda: None)
|
| 88 |
+
|
| 89 |
+
search_runs.assert_not_called()
|
| 90 |
+
assert resp.status == "ok"
|
| 91 |
+
assert resp.mlflow_run_id is None
|
| 92 |
+
|
| 93 |
+
|
| 94 |
class TestBBBPredictRoute:
|
| 95 |
def _setup_model_artifact(self, tmp_path: Path) -> Path:
|
| 96 |
"""Build features + train + save a tiny model. Returns artifact path."""
|
|
|
|
| 216 |
assert resp.status_code == 503
|
| 217 |
|
| 218 |
|
| 219 |
+
class TestMRIPredictRoute:
|
| 220 |
+
def test_returns_503_when_artifact_missing(self, tmp_path: Path, monkeypatch):
|
| 221 |
+
monkeypatch.setenv("MRI_MODEL_PATH", str(tmp_path / "missing.onnx"))
|
| 222 |
+
|
| 223 |
+
resp = client.post(
|
| 224 |
+
"/predict/mri",
|
| 225 |
+
json={"input_path": str(_FIXTURES / "mri_sample" / "subject_0.nii.gz")},
|
| 226 |
+
)
|
| 227 |
+
|
| 228 |
+
assert resp.status_code == 503
|
| 229 |
+
assert "MRI model artifact not available" in resp.text
|
| 230 |
+
|
| 231 |
+
def test_returns_404_when_input_missing(self, tmp_path: Path, monkeypatch):
|
| 232 |
+
from tests.fixtures.build_dummy_mri_onnx import build as build_dummy_mri_onnx
|
| 233 |
+
|
| 234 |
+
artifact = build_dummy_mri_onnx(tmp_path / "mri_model.onnx")
|
| 235 |
+
monkeypatch.setenv("MRI_MODEL_PATH", str(artifact))
|
| 236 |
+
|
| 237 |
+
resp = client.post(
|
| 238 |
+
"/predict/mri",
|
| 239 |
+
json={"input_path": str(tmp_path / "missing.nii.gz"), "target_shape": [8, 8, 8]},
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
assert resp.status_code == 404
|
| 243 |
+
|
| 244 |
+
def test_returns_200_with_prediction(self, tmp_path: Path, monkeypatch):
|
| 245 |
+
from tests.fixtures.build_dummy_mri_onnx import build as build_dummy_mri_onnx
|
| 246 |
+
|
| 247 |
+
artifact = build_dummy_mri_onnx(tmp_path / "mri_model.onnx")
|
| 248 |
+
monkeypatch.setenv("MRI_MODEL_PATH", str(artifact))
|
| 249 |
+
|
| 250 |
+
resp = client.post(
|
| 251 |
+
"/predict/mri",
|
| 252 |
+
json={
|
| 253 |
+
"input_path": str(_FIXTURES / "mri_sample" / "subject_0.nii.gz"),
|
| 254 |
+
"target_shape": [8, 8, 8],
|
| 255 |
+
"label_names": ["control", "abnormal"],
|
| 256 |
+
},
|
| 257 |
+
)
|
| 258 |
+
|
| 259 |
+
assert resp.status_code == 200, resp.text
|
| 260 |
+
body = resp.json()
|
| 261 |
+
assert body["label"] == 1
|
| 262 |
+
assert body["label_text"] == "abnormal"
|
| 263 |
+
assert body["confidence"] > 0.5
|
| 264 |
+
assert body["input_path"].endswith("subject_0.nii.gz")
|
| 265 |
+
assert body["model_path"] == str(artifact)
|
| 266 |
+
assert len(body["probabilities"]) == 2
|
| 267 |
+
|
| 268 |
+
|
| 269 |
class TestMRIDiagnosticsRoute:
|
| 270 |
def test_returns_200_with_pre_and_post_data(self, tmp_path: Path):
|
| 271 |
from tests.fixtures.build_mri_fixture import build as build_mri
|
tests/fixtures/build_dummy_mri_onnx.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Build a tiny ONNX MRI classifier fixture for API/model tests."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def build(path: Path, logits: tuple[float, float] = (0.1, 2.0)) -> Path:
|
| 8 |
+
"""Write an ONNX model that returns constant logits for any MRI tensor."""
|
| 9 |
+
import onnx
|
| 10 |
+
from onnx import TensorProto, helper
|
| 11 |
+
|
| 12 |
+
input_info = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 1, 8, 8, 8])
|
| 13 |
+
output_info = helper.make_tensor_value_info("logits", TensorProto.FLOAT, [1, 2])
|
| 14 |
+
value = helper.make_tensor("const_logits", TensorProto.FLOAT, [1, 2], list(logits))
|
| 15 |
+
node = helper.make_node("Constant", inputs=[], outputs=["logits"], value=value)
|
| 16 |
+
graph = helper.make_graph([node], "dummy_mri_classifier", [input_info], [output_info])
|
| 17 |
+
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
| 18 |
+
model.ir_version = 10
|
| 19 |
+
onnx.save(model, path)
|
| 20 |
+
return path
|
tests/models/test_mri_model.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests for src.models.mri_model — image-based MRI DL inference surface."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
import pytest
|
| 8 |
+
|
| 9 |
+
from src.models import mri_model
|
| 10 |
+
from tests.fixtures.build_dummy_mri_onnx import build as build_dummy_mri_onnx
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
_FIXTURE_MRI = Path(__file__).resolve().parents[1] / "fixtures" / "mri_sample" / "subject_0.nii.gz"
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class TestMRIDLModel:
|
| 17 |
+
def test_preprocess_volume_returns_batch_channel_tensor(self) -> None:
|
| 18 |
+
volume = np.ones((4, 5, 6), dtype=np.float32)
|
| 19 |
+
volume[1:3, 1:4, 2:5] = 5.0
|
| 20 |
+
|
| 21 |
+
out = mri_model.preprocess_volume(volume, target_shape=(8, 8, 8))
|
| 22 |
+
|
| 23 |
+
assert out.shape == (1, 1, 8, 8, 8)
|
| 24 |
+
assert out.dtype == np.float32
|
| 25 |
+
assert np.all(np.isfinite(out))
|
| 26 |
+
|
| 27 |
+
def test_preprocess_rejects_nan_volume(self) -> None:
|
| 28 |
+
volume = np.zeros((4, 4, 4), dtype=np.float32)
|
| 29 |
+
volume[0, 0, 0] = np.nan
|
| 30 |
+
|
| 31 |
+
with pytest.raises(ValueError, match="finite numeric 3-D"):
|
| 32 |
+
mri_model.preprocess_volume(volume, target_shape=(8, 8, 8))
|
| 33 |
+
|
| 34 |
+
def test_load_missing_artifact_raises(self, tmp_path: Path) -> None:
|
| 35 |
+
with pytest.raises(FileNotFoundError, match="MRI model artifact not found"):
|
| 36 |
+
mri_model.load(tmp_path / "missing.onnx")
|
| 37 |
+
|
| 38 |
+
def test_predict_nifti_with_dummy_onnx(self, tmp_path: Path) -> None:
|
| 39 |
+
artifact = build_dummy_mri_onnx(tmp_path / "mri_model.onnx")
|
| 40 |
+
model = mri_model.load(artifact)
|
| 41 |
+
|
| 42 |
+
result = mri_model.predict_nifti(
|
| 43 |
+
model,
|
| 44 |
+
_FIXTURE_MRI,
|
| 45 |
+
target_shape=(8, 8, 8),
|
| 46 |
+
label_names=("control", "abnormal"),
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
assert result["label"] == 1
|
| 50 |
+
assert result["label_text"] == "abnormal"
|
| 51 |
+
assert result["confidence"] > 0.5
|
| 52 |
+
probs = result["probabilities"]
|
| 53 |
+
assert len(probs) == 2
|
| 54 |
+
assert sum(p["probability"] for p in probs) == pytest.approx(1.0, abs=1e-6)
|