test: cross-pipeline smoke run for all three modalities
Browse filesCo-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
tests/pipelines/test_cross_pipeline_smoke.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""End-to-end smoke test exercising all three pipelines back-to-back.
|
| 2 |
+
|
| 3 |
+
Asserts each pipeline produces a non-empty Parquet at its expected schema —
|
| 4 |
+
the hackathon-judge "does the whole stack still work?" check. Each pipeline
|
| 5 |
+
uses its own fixture (no cross-modality data sharing).
|
| 6 |
+
"""
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
import pandas as pd
|
| 12 |
+
import pytest
|
| 13 |
+
|
| 14 |
+
from src.pipelines import bbb_pipeline, eeg_pipeline, mri_pipeline
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
_REPO_ROOT = Path(__file__).resolve().parents[2]
|
| 18 |
+
_FIXTURES = _REPO_ROOT / "tests" / "fixtures"
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def test_bbb_pipeline_smoke(tmp_path: Path) -> None:
|
| 22 |
+
"""Run the BBB pipeline on the committed CSV fixture; validate fp_ column count."""
|
| 23 |
+
out = tmp_path / "bbb.parquet"
|
| 24 |
+
bbb_pipeline.run_pipeline(
|
| 25 |
+
input_path=_FIXTURES / "bbbp_sample.csv",
|
| 26 |
+
output_path=out,
|
| 27 |
+
)
|
| 28 |
+
df = pd.read_parquet(out)
|
| 29 |
+
assert len(df) == 4
|
| 30 |
+
assert sum(c.startswith("fp_") for c in df.columns) == 2048
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def test_eeg_pipeline_smoke(tmp_path: Path) -> None:
|
| 34 |
+
"""Use the committed EEG fixture; build_eeg_fixture.build() takes no args."""
|
| 35 |
+
fif = _FIXTURES / "eeg_sample.fif"
|
| 36 |
+
if not fif.exists():
|
| 37 |
+
pytest.skip(f"Committed EEG fixture missing: {fif}")
|
| 38 |
+
out = tmp_path / "eeg.parquet"
|
| 39 |
+
eeg_pipeline.run_pipeline(input_path=fif, output_path=out)
|
| 40 |
+
df = pd.read_parquet(out)
|
| 41 |
+
assert len(df) == 5
|
| 42 |
+
feat_cols = [c for c in df.columns if c.startswith("feat_")]
|
| 43 |
+
assert len(feat_cols) > 0
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def test_mri_pipeline_smoke(tmp_path: Path) -> None:
|
| 47 |
+
"""Use the MRI fixture builder to materialize NIfTI inputs + sites.csv."""
|
| 48 |
+
from tests.fixtures.build_mri_fixture import build as build_mri
|
| 49 |
+
fixture_dir = build_mri(out_dir=tmp_path / "mri_fixture")
|
| 50 |
+
out = tmp_path / "mri.parquet"
|
| 51 |
+
mri_pipeline.run_pipeline(
|
| 52 |
+
input_dir=fixture_dir,
|
| 53 |
+
sites_csv=fixture_dir / "sites.csv",
|
| 54 |
+
output_path=out,
|
| 55 |
+
)
|
| 56 |
+
df = pd.read_parquet(out)
|
| 57 |
+
assert len(df) == 6
|
| 58 |
+
assert "subject_id" in df.columns
|
| 59 |
+
assert "site" in df.columns
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def test_all_three_pipelines_run_in_one_process(tmp_path: Path) -> None:
|
| 63 |
+
"""Sanity: nothing in pipeline A leaks state that breaks pipeline B."""
|
| 64 |
+
test_bbb_pipeline_smoke(tmp_path / "bbb")
|
| 65 |
+
test_eeg_pipeline_smoke(tmp_path / "eeg")
|
| 66 |
+
test_mri_pipeline_smoke(tmp_path / "mri")
|