Spaces:

mekosotto
/

hackathon

Running

mekosotto Claude Opus 4.7 (1M context) commited on 7 days ago

Commit

42366a8

1 Parent(s): 90167c7

feat(api): expose calibration bin in /predict/bbb response

- Adds CalibrationContext schema (threshold/precision/support).
- BBBPredictResponse gains optional calibration field; populated by
_matching_calibration_bin helper that picks the highest-threshold
bin whose threshold <= confidence.
- Returns None for legacy models without _neurobridge_calibration or
when confidence < lowest threshold (< 0.50).
- Extends existing 200-happy-path test with calibration assertions.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (3) hide show

src/api/routes.py +23 -0
src/api/schemas.py +11 -0
tests/api/test_routes.py +13 -0

src/api/routes.py CHANGED Viewed

@@ -20,6 +20,7 @@ from src.api.schemas import (
     BBBPredictRequest,
     BBBPredictResponse,
     BBBRequest,
     EEGRequest,
     FeatureAttribution,
     MRIRequest,
@@ -126,6 +127,26 @@ def _bbb_model_path() -> Path:
     return Path(os.environ.get("BBB_MODEL_PATH", str(_DEFAULT_BBB_MODEL_PATH)))
 @predict_router.post("/bbb", response_model=BBBPredictResponse)
 def predict_bbb(req: BBBPredictRequest) -> BBBPredictResponse:
     """Predict BBB permeability + return SHAP attributions for one SMILES.
@@ -155,9 +176,11 @@ def predict_bbb(req: BBBPredictRequest) -> BBBPredictResponse:
         raise HTTPException(status_code=400, detail=str(e))
     label_text = "permeable" if pred["label"] == 1 else "non-permeable"
     return BBBPredictResponse(
         label=pred["label"],
         label_text=label_text,
         confidence=pred["confidence"],
         top_features=[FeatureAttribution(**a) for a in attributions],
     )

     BBBPredictRequest,
     BBBPredictResponse,
     BBBRequest,
+    CalibrationContext,
     EEGRequest,
     FeatureAttribution,
     MRIRequest,
     return Path(os.environ.get("BBB_MODEL_PATH", str(_DEFAULT_BBB_MODEL_PATH)))
+def _matching_calibration_bin(model, confidence: float) -> CalibrationContext | None:
+    """Pick the highest-threshold bin whose threshold <= confidence. None if no match or no metadata."""
+    bins = getattr(model, "_neurobridge_calibration", None)
+    if not bins:
+        return None
+    matched = None
+    for bin_ in bins:
+        if bin_["threshold"] <= confidence:
+            matched = bin_
+        else:
+            break
+    if matched is None:
+        return None
+    return CalibrationContext(
+        threshold=matched["threshold"],
+        precision=matched["precision"],
+        support=matched["support"],
+    )
 @predict_router.post("/bbb", response_model=BBBPredictResponse)
 def predict_bbb(req: BBBPredictRequest) -> BBBPredictResponse:
     """Predict BBB permeability + return SHAP attributions for one SMILES.
         raise HTTPException(status_code=400, detail=str(e))
     label_text = "permeable" if pred["label"] == 1 else "non-permeable"
+    calibration = _matching_calibration_bin(model, pred["confidence"])
     return BBBPredictResponse(
         label=pred["label"],
         label_text=label_text,
         confidence=pred["confidence"],
         top_features=[FeatureAttribution(**a) for a in attributions],
+        calibration=calibration,
     )

src/api/schemas.py CHANGED Viewed

@@ -63,9 +63,20 @@ class FeatureAttribution(BaseModel):
     )
 class BBBPredictResponse(BaseModel):
     """Decision-system payload: prediction + uncertainty + explanation."""
     label: int
     label_text: str = Field(..., description="'permeable' or 'non-permeable'")
     confidence: float
     top_features: list[FeatureAttribution]

     )
+class CalibrationContext(BaseModel):
+    """Precision-at-confidence-threshold bin matched to a single prediction."""
+    threshold: float = Field(..., description="Lowest confidence threshold this bin covers (0.0-1.0)")
+    precision: float = Field(..., description="Precision on the held-out test set among predictions ≥ threshold")
+    support: int = Field(..., description="Number of held-out predictions falling in this bin")
 class BBBPredictResponse(BaseModel):
     """Decision-system payload: prediction + uncertainty + explanation."""
     label: int
     label_text: str = Field(..., description="'permeable' or 'non-permeable'")
     confidence: float
     top_features: list[FeatureAttribution]
+    calibration: CalibrationContext | None = Field(
+        None,
+        description="Statistical context: how often the model is right when this confident on held-out data.",
+    )

tests/api/test_routes.py CHANGED Viewed

@@ -90,6 +90,7 @@ class TestBBBPredictRoute:
         return artifact
     def test_returns_200_with_prediction_and_attributions(self, tmp_path: Path, monkeypatch):
         artifact = self._setup_model_artifact(tmp_path)
         monkeypatch.setenv("BBB_MODEL_PATH", str(artifact))
@@ -106,6 +107,18 @@ class TestBBBPredictRoute:
         for f in body["top_features"]:
             assert f["feature"].startswith("fp_")
             assert isinstance(f["shap_value"], float)
     def test_returns_400_on_invalid_smiles(self, tmp_path: Path, monkeypatch):
         artifact = self._setup_model_artifact(tmp_path)

         return artifact
     def test_returns_200_with_prediction_and_attributions(self, tmp_path: Path, monkeypatch):
+        import pytest
         artifact = self._setup_model_artifact(tmp_path)
         monkeypatch.setenv("BBB_MODEL_PATH", str(artifact))
         for f in body["top_features"]:
             assert f["feature"].startswith("fp_")
             assert isinstance(f["shap_value"], float)
+        # Day-6 calibration assertions: trained test fixture model has
+        # _neurobridge_calibration metadata, so calibration must be populated.
+        assert body["calibration"] is not None
+        cal = body["calibration"]
+        valid_thresholds = [0.50, 0.60, 0.70, 0.75, 0.80, 0.90]
+        assert any(
+            cal["threshold"] == pytest.approx(t) for t in valid_thresholds
+        ), f"threshold {cal['threshold']} not in {valid_thresholds}"
+        assert cal["threshold"] <= body["confidence"]
+        assert 0.0 <= cal["precision"] <= 1.0
+        assert isinstance(cal["support"], int)
+        assert cal["support"] >= 0
     def test_returns_400_on_invalid_smiles(self, tmp_path: Path, monkeypatch):
         artifact = self._setup_model_artifact(tmp_path)