Spaces:

mekosotto
/

hackathon

Running

App Files Files Community

mekosotto commited on 4 days ago

Commit

a3f2882

1 Parent(s): 27a97bf

feat(models): EEG classifier loader + predict (stub-able for hackathon demo)

Browse files

Files changed (3) hide show

src/models/eeg_model.py +72 -0
tests/fixtures/build_dummy_eeg_clf.py +34 -0
tests/models/test_eeg_model.py +56 -0

src/models/eeg_model.py ADDED Viewed

	@@ -0,0 +1,72 @@

+"""EEG classifier inference utilities.
+Loads any sklearn-style classifier (object with `predict_proba`) from joblib
+and emits the same dict shape as src.models.mri_model.predict_with_proba so
+the API surface and fusion engine treat MRI and EEG predictions identically.
+The real pretrained artifact swaps in at data/processed/eeg_clf.joblib (or
+override via EEG_CLF_ARTIFACT env). Tests use a stub fixture; the real model
+drops in without code changes.
+"""
+from __future__ import annotations
+import os
+from pathlib import Path
+from typing import Any, Sequence
+import joblib
+import numpy as np
+from src.core.logger import get_logger
+logger = get_logger(__name__)
+DEFAULT_LABELS: tuple[str, ...] = ("control", "alzheimers")
+def _resolve_labels() -> tuple[str, ...]:
+    raw = os.environ.get("EEG_CLF_LABELS")
+    if not raw:
+        return DEFAULT_LABELS
+    return tuple(s.strip() for s in raw.split(",") if s.strip())
+def load(path: Path) -> Any:
+    path = Path(path)
+    if not path.exists():
+        raise FileNotFoundError(f"EEG classifier artifact not found: {path}")
+    return joblib.load(str(path))
+def predict_features(
+    model: Any,
+    features: np.ndarray,
+    labels: Sequence[str] | None = None,
+) -> dict[str, Any]:
+    """Run inference on one row of EEG features."""
+    arr = np.asarray(features, dtype=np.float32).reshape(-1)
+    expected = int(getattr(model, "n_features_in_", arr.size))
+    if arr.size != expected:
+        raise ValueError(
+            f"EEG feature count mismatch: model expects {expected}, got {arr.size}"
+        )
+    proba = np.asarray(model.predict_proba(arr.reshape(1, -1))[0], dtype=np.float32)
+    label_names = tuple(labels or _resolve_labels())
+    if len(label_names) != proba.shape[0]:
+        logger.warning(
+            "EEG label count (%d) != model output dim (%d); falling back to class_0..N",
+            len(label_names), proba.shape[0],
+        )
+        label_names = tuple(f"class_{i}" for i in range(proba.shape[0]))
+    label_idx = int(np.argmax(proba))
+    return {
+        "label": label_idx,
+        "label_text": label_names[label_idx],
+        "confidence": float(proba[label_idx]),
+        "probabilities": [
+            {"label": i, "label_text": label_names[i], "probability": float(p)}
+            for i, p in enumerate(proba)
+        ],
+    }

tests/fixtures/build_dummy_eeg_clf.py ADDED Viewed

	@@ -0,0 +1,34 @@

+"""Build a stub EEG classifier (sklearn RF) for tests.
+Demo-time placeholder — produces a 2-class probability output matching the
+eeg_model.predict_features contract. Replace with the real artifact when
+the user provides it; tests don't change.
+"""
+from __future__ import annotations
+from pathlib import Path
+import joblib
+import numpy as np
+from sklearn.ensemble import RandomForestClassifier
+def build(path: Path, n_features: int = 16, seed: int = 0) -> Path:
+    """Save a fitted RandomForestClassifier at `path` and return the path."""
+    path = Path(path)
+    if path.exists():
+        return path
+    path.parent.mkdir(parents=True, exist_ok=True)
+    rng = np.random.default_rng(seed)
+    n = 200
+    n_alz = n // 2
+    X_ctrl = rng.normal(0.0, 1.0, size=(n - n_alz, n_features))
+    X_alz = rng.normal(2.0, 1.0, size=(n_alz, n_features))
+    X = np.vstack([X_ctrl, X_alz])
+    y = np.array([0] * (n - n_alz) + [1] * n_alz)
+    clf = RandomForestClassifier(n_estimators=12, max_depth=6, random_state=seed)
+    clf.fit(X, y)
+    joblib.dump(clf, str(path))
+    return path

tests/models/test_eeg_model.py ADDED Viewed

	@@ -0,0 +1,56 @@

+"""Tests for src.models.eeg_model."""
+from __future__ import annotations
+from pathlib import Path
+import numpy as np
+import pytest
+from src.models import eeg_model
+from tests.fixtures.build_dummy_eeg_clf import build as build_dummy_eeg
+class TestEEGModel:
+    def test_load_missing_artifact_raises(self, tmp_path: Path) -> None:
+        with pytest.raises(FileNotFoundError, match="EEG classifier artifact not found"):
+            eeg_model.load(tmp_path / "nope.joblib")
+    def test_predict_returns_full_dict(self, tmp_path: Path) -> None:
+        ckpt = build_dummy_eeg(tmp_path / "eeg.joblib", n_features=16)
+        clf = eeg_model.load(ckpt)
+        features = np.zeros((16,), dtype=np.float32)
+        out = eeg_model.predict_features(clf, features)
+        assert set(out) == {"label", "label_text", "confidence", "probabilities"}
+        assert out["label"] in {0, 1}
+        assert out["label_text"] in eeg_model.DEFAULT_LABELS
+        assert 0.0 <= out["confidence"] <= 1.0
+        probs = out["probabilities"]
+        assert len(probs) == 2
+        assert abs(sum(p["probability"] for p in probs) - 1.0) < 1e-5
+    def test_alzheimers_separation_with_synthetic_features(self, tmp_path: Path) -> None:
+        ckpt = build_dummy_eeg(tmp_path / "eeg.joblib", n_features=16)
+        clf = eeg_model.load(ckpt)
+        alz_features = np.full((16,), 2.0, dtype=np.float32)
+        ctrl_features = np.zeros((16,), dtype=np.float32)
+        alz_pred = eeg_model.predict_features(clf, alz_features)
+        ctrl_pred = eeg_model.predict_features(clf, ctrl_features)
+        assert alz_pred["label_text"] == "alzheimers"
+        assert ctrl_pred["label_text"] == "control"
+    def test_label_override_via_env(self, tmp_path: Path, monkeypatch) -> None:
+        monkeypatch.setenv("EEG_CLF_LABELS", "no_disease,alzheimers")
+        ckpt = build_dummy_eeg(tmp_path / "eeg.joblib", n_features=16)
+        clf = eeg_model.load(ckpt)
+        out = eeg_model.predict_features(clf, np.zeros((16,), dtype=np.float32))
+        assert out["label_text"] in {"no_disease", "alzheimers"}
+    def test_feature_count_mismatch_raises(self, tmp_path: Path) -> None:
+        ckpt = build_dummy_eeg(tmp_path / "eeg.joblib", n_features=16)
+        clf = eeg_model.load(ckpt)
+        with pytest.raises(ValueError, match="feature count"):
+            eeg_model.predict_features(clf, np.zeros((8,), dtype=np.float32))