Spaces:

mekosotto
/

hackathon

Running

App Files Files Community

mekosotto commited on 7 days ago

Commit

ae883d4

1 Parent(s): 7dad1a9

feat(api): POST /predict/bbb with prediction, uncertainty, SHAP top-k

Browse files

Files changed (4) hide show

src/api/main.py +2 -1
src/api/routes.py +53 -0
src/api/schemas.py +23 -0
tests/api/test_routes.py +54 -0

src/api/main.py CHANGED Viewed

@@ -6,7 +6,7 @@ from __future__ import annotations
 from fastapi import FastAPI
-from src.api.routes import router as pipeline_router
 from src.api.schemas import HealthResponse
 app = FastAPI(
@@ -16,6 +16,7 @@ app = FastAPI(
 )
 app.include_router(pipeline_router)
 @app.get("/health", response_model=HealthResponse)

 from fastapi import FastAPI
+from src.api.routes import router as pipeline_router, predict_router
 from src.api.schemas import HealthResponse
 app = FastAPI(
 )
 app.include_router(pipeline_router)
+app.include_router(predict_router)
 @app.get("/health", response_model=HealthResponse)

src/api/routes.py CHANGED Viewed

@@ -7,6 +7,7 @@ codes: FileNotFoundError -> 404, ValueError -> 400, anything else -> 500.
 """
 from __future__ import annotations
 import time
 from pathlib import Path
 from typing import Callable
@@ -16,16 +17,21 @@ import pandas as pd
 from fastapi import APIRouter, HTTPException
 from src.api.schemas import (
     BBBRequest,
     EEGRequest,
     MRIRequest,
     PipelineResponse,
 )
 from src.core.logger import get_logger
 from src.pipelines import bbb_pipeline, eeg_pipeline, mri_pipeline
 logger = get_logger(__name__)
 router = APIRouter(prefix="/pipeline")
 def _wrap(
@@ -108,3 +114,50 @@ def run_mri(req: MRIRequest) -> PipelineResponse:
             output_path=Path(req.output_path),
         ),
     )

 """
 from __future__ import annotations
+import os
 import time
 from pathlib import Path
 from typing import Callable
 from fastapi import APIRouter, HTTPException
 from src.api.schemas import (
+    BBBPredictRequest,
+    BBBPredictResponse,
     BBBRequest,
     EEGRequest,
+    FeatureAttribution,
     MRIRequest,
     PipelineResponse,
 )
 from src.core.logger import get_logger
+from src.models import bbb_model
 from src.pipelines import bbb_pipeline, eeg_pipeline, mri_pipeline
 logger = get_logger(__name__)
 router = APIRouter(prefix="/pipeline")
+predict_router = APIRouter(prefix="/predict")
 def _wrap(
             output_path=Path(req.output_path),
         ),
     )
+# Default artifact location. Overridable via BBB_MODEL_PATH env var so tests
+# can point at a tmp-built model without touching production paths.
+_DEFAULT_BBB_MODEL_PATH = Path("data/processed/bbb_model.joblib")
+def _bbb_model_path() -> Path:
+    """Return the BBB model artifact path, overridable via BBB_MODEL_PATH env var."""
+    return Path(os.environ.get("BBB_MODEL_PATH", str(_DEFAULT_BBB_MODEL_PATH)))
+@predict_router.post("/bbb", response_model=BBBPredictResponse)
+def predict_bbb(req: BBBPredictRequest) -> BBBPredictResponse:
+    """Predict BBB permeability + return SHAP attributions for one SMILES.
+    Returns 503 if the model artifact is missing (operator hasn't run the
+    trainer CLI yet); 400 on invalid SMILES; 200 with the decision payload
+    on success.
+    """
+    artifact = _bbb_model_path()
+    if not artifact.exists():
+        raise HTTPException(
+            status_code=503,
+            detail=(
+                f"BBB model artifact not available at {artifact}. "
+                f"Run `python -m src.models.bbb_model` to train it."
+            ),
+        )
+    try:
+        model = bbb_model.load(artifact)
+    except FileNotFoundError as e:
+        raise HTTPException(status_code=503, detail=str(e))
+    try:
+        pred = bbb_model.predict_with_proba(model, req.smiles)
+        attributions = bbb_model.explain_prediction(model, req.smiles, top_k=req.top_k)
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    label_text = "permeable" if pred["label"] == 1 else "non-permeable"
+    return BBBPredictResponse(
+        label=pred["label"],
+        label_text=label_text,
+        confidence=pred["confidence"],
+        top_features=[FeatureAttribution(**a) for a in attributions],
+    )

src/api/schemas.py CHANGED Viewed

@@ -46,3 +46,26 @@ class PipelineResponse(BaseModel):
 class HealthResponse(BaseModel):
     status: str
     pipelines: list[str]

 class HealthResponse(BaseModel):
     status: str
     pipelines: list[str]
+class BBBPredictRequest(BaseModel):
+    """Single-molecule BBB-permeability prediction request."""
+    smiles: str = Field(..., description="SMILES string; e.g. 'CCO' for ethanol")
+    top_k: int = Field(5, ge=1, le=20, description="Top-k SHAP features to return")
+class FeatureAttribution(BaseModel):
+    """A single SHAP attribution: which fingerprint bit contributed and by how much."""
+    feature: str = Field(..., description="Fingerprint column name, e.g. 'fp_1234'")
+    shap_value: float = Field(
+        ...,
+        description="Signed SHAP value for the predicted class (positive pushed model toward, negative away)",
+    )
+class BBBPredictResponse(BaseModel):
+    """Decision-system payload: prediction + uncertainty + explanation."""
+    label: int
+    label_text: str = Field(..., description="'permeable' or 'non-permeable'")
+    confidence: float
+    top_features: list[FeatureAttribution]

tests/api/test_routes.py CHANGED Viewed

@@ -70,3 +70,57 @@ class TestMRIRoute:
         )
         assert resp.status_code == 200
         assert resp.json()["rows"] > 0

         )
         assert resp.status_code == 200
         assert resp.json()["rows"] > 0
+class TestBBBPredictRoute:
+    def _setup_model_artifact(self, tmp_path: Path) -> Path:
+        """Build features + train + save a tiny model. Returns artifact path."""
+        from src.pipelines import bbb_pipeline
+        from src.models import bbb_model
+        import pandas as pd
+        features_path = tmp_path / "features.parquet"
+        bbb_pipeline.run_pipeline(
+            input_path=_FIXTURES / "bbbp_sample.csv",
+            output_path=features_path,
+        )
+        df = pd.read_parquet(features_path)
+        model = bbb_model.train(df, label_col="p_np", n_estimators=10, random_state=42)
+        artifact = tmp_path / "bbb_model.joblib"
+        bbb_model.save(model, artifact)
+        return artifact
+    def test_returns_200_with_prediction_and_attributions(self, tmp_path: Path, monkeypatch):
+        artifact = self._setup_model_artifact(tmp_path)
+        monkeypatch.setenv("BBB_MODEL_PATH", str(artifact))
+        resp = client.post(
+            "/predict/bbb",
+            json={"smiles": "CCO", "top_k": 5},
+        )
+        assert resp.status_code == 200
+        body = resp.json()
+        assert body["label"] in (0, 1)
+        assert body["label_text"] in ("permeable", "non-permeable")
+        assert 0.0 <= body["confidence"] <= 1.0
+        assert len(body["top_features"]) == 5
+        for f in body["top_features"]:
+            assert f["feature"].startswith("fp_")
+            assert isinstance(f["shap_value"], float)
+    def test_returns_400_on_invalid_smiles(self, tmp_path: Path, monkeypatch):
+        artifact = self._setup_model_artifact(tmp_path)
+        monkeypatch.setenv("BBB_MODEL_PATH", str(artifact))
+        resp = client.post(
+            "/predict/bbb",
+            json={"smiles": "this_is_not_a_smiles", "top_k": 5},
+        )
+        assert resp.status_code == 400
+    def test_returns_503_when_artifact_missing(self, tmp_path: Path, monkeypatch):
+        monkeypatch.setenv("BBB_MODEL_PATH", str(tmp_path / "does_not_exist.joblib"))
+        resp = client.post(
+            "/predict/bbb",
+            json={"smiles": "CCO", "top_k": 5},
+        )
+        assert resp.status_code == 503