Spaces:

mekosotto
/

hackathon

Running

mekosotto Claude Opus 4.7 (1M context) commited on 7 days ago

Commit

28ca4f9

1 Parent(s): 95c5aff

feat(api+frontend): MLflow provenance badge in decision card

- ModelProvenance schema (mlflow_run_id, model_version, train_date,
n_examples). BBBPredictResponse.provenance is always populated; failed
MLflow lookup degrades to None fields without breaking the response.
- _build_provenance() module-level cache: one MLflow query per worker.
NEUROBRIDGE_DISABLE_MLFLOW=1 short-circuits to None fields. n_examples
pulled per-request from model._neurobridge_train_stats.
- Streamlit decision card renders a one-line audit badge above the
label: run id (first 8 chars), model version, train date, n_examples.
- 1 new test: provenance field present in /predict/bbb body with the
fixture model (n_examples ≥ 1 from train stats).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (4) hide show

src/api/routes.py +56 -0
src/api/schemas.py +12 -0
src/frontend/app.py +12 -0
tests/api/test_routes.py +19 -0

src/api/routes.py CHANGED Viewed

@@ -25,6 +25,7 @@ from src.api.schemas import (
     EEGRequest,
     FeatureAttribution,
     HarmonizationRow,
     MRIDiagnosticsRequest,
     MRIDiagnosticsResponse,
     MRIRequest,
@@ -160,6 +161,59 @@ def _compute_drift_z(model, confidence: float) -> tuple[float | None, int]:
     return float(drift_z), rolling_n
 def _matching_calibration_bin(model, confidence: float) -> CalibrationContext | None:
     """Pick the highest-threshold bin whose threshold <= confidence. None if no match or no metadata."""
     bins = getattr(model, "_neurobridge_calibration", None)
@@ -211,6 +265,7 @@ def predict_bbb(req: BBBPredictRequest) -> BBBPredictResponse:
     label_text = "permeable" if pred["label"] == 1 else "non-permeable"
     calibration = _matching_calibration_bin(model, pred["confidence"])
     drift_z, rolling_n = _compute_drift_z(model, pred["confidence"])
     return BBBPredictResponse(
         label=pred["label"],
         label_text=label_text,
@@ -219,6 +274,7 @@ def predict_bbb(req: BBBPredictRequest) -> BBBPredictResponse:
         calibration=calibration,
         drift_z=drift_z,
         rolling_n=rolling_n,
     )

     EEGRequest,
     FeatureAttribution,
     HarmonizationRow,
+    ModelProvenance,
     MRIDiagnosticsRequest,
     MRIDiagnosticsResponse,
     MRIRequest,
     return float(drift_z), rolling_n
+_PROVENANCE_CACHE: ModelProvenance | None = None
+_MODEL_VERSION = "v1"  # bump manually per train cycle
+def _build_provenance(model) -> ModelProvenance:
+    """Look up the most recent BBB MLflow run; build a ModelProvenance.
+    Cached at module level so we hit MLflow once per worker. Failures (no
+    runs found, MLflow unreachable, NEUROBRIDGE_DISABLE_MLFLOW=1) all
+    degrade to a partial ModelProvenance with mlflow_run_id=None — the
+    badge still renders, just without a run id.
+    """
+    global _PROVENANCE_CACHE
+    if _PROVENANCE_CACHE is not None:
+        # Refresh n_examples each call from the model (cheap lookup).
+        n_train = None
+        stats = getattr(model, "_neurobridge_train_stats", None)
+        if stats is not None:
+            n_train = int(stats.get("n_train", 0)) or None
+        return _PROVENANCE_CACHE.model_copy(update={"n_examples": n_train})
+    run_id: str | None = None
+    train_date: str | None = None
+    if os.environ.get("NEUROBRIDGE_DISABLE_MLFLOW") != "1":
+        try:
+            runs = mlflow.search_runs(
+                experiment_names=["bbb_pipeline"],
+                max_results=1,
+                order_by=["start_time DESC"],
+            )
+            if len(runs):
+                row = runs.iloc[0]
+                run_id = str(row["run_id"])
+                ts = row.get("start_time")
+                if ts is not None:
+                    train_date = str(pd.Timestamp(ts).isoformat())
+        except Exception as e:  # broad: MLflow store unreachable, schema mismatch, etc.
+            logger.warning("MLflow provenance lookup failed: %s", e)
+    n_train = None
+    stats = getattr(model, "_neurobridge_train_stats", None)
+    if stats is not None:
+        n_train = int(stats.get("n_train", 0)) or None
+    _PROVENANCE_CACHE = ModelProvenance(
+        mlflow_run_id=run_id,
+        model_version=_MODEL_VERSION,
+        train_date=train_date,
+        n_examples=n_train,
+    )
+    return _PROVENANCE_CACHE
 def _matching_calibration_bin(model, confidence: float) -> CalibrationContext | None:
     """Pick the highest-threshold bin whose threshold <= confidence. None if no match or no metadata."""
     bins = getattr(model, "_neurobridge_calibration", None)
     label_text = "permeable" if pred["label"] == 1 else "non-permeable"
     calibration = _matching_calibration_bin(model, pred["confidence"])
     drift_z, rolling_n = _compute_drift_z(model, pred["confidence"])
+    provenance = _build_provenance(model)
     return BBBPredictResponse(
         label=pred["label"],
         label_text=label_text,
         calibration=calibration,
         drift_z=drift_z,
         rolling_n=rolling_n,
+        provenance=provenance,
     )

src/api/schemas.py CHANGED Viewed

@@ -70,6 +70,14 @@ class CalibrationContext(BaseModel):
     support: int = Field(..., description="Number of held-out predictions falling in this bin")
 class BBBPredictResponse(BaseModel):
     """Decision-system payload: prediction + uncertainty + explanation + drift."""
     label: int
@@ -95,6 +103,10 @@ class BBBPredictResponse(BaseModel):
             "rolling window (max 100). Zero on a fresh worker."
         ),
     )
 class MRIDiagnosticsRequest(BaseModel):

     support: int = Field(..., description="Number of held-out predictions falling in this bin")
+class ModelProvenance(BaseModel):
+    """Auditable provenance of the BBB model that produced a prediction."""
+    mlflow_run_id: str | None = Field(None, description="MLflow run id of the most recent training run, if any")
+    model_version: str = Field("v1", description="Manually-bumped model version label")
+    train_date: str | None = Field(None, description="ISO 8601 train timestamp from MLflow run start_time")
+    n_examples: int | None = Field(None, description="Training set size (from model._neurobridge_train_stats[\"n_train\"])")
 class BBBPredictResponse(BaseModel):
     """Decision-system payload: prediction + uncertainty + explanation + drift."""
     label: int
             "rolling window (max 100). Zero on a fresh worker."
         ),
     )
+    provenance: ModelProvenance | None = Field(
+        None,
+        description="Auditing metadata (MLflow run id, train date, n_examples).",
+    )
 class MRIDiagnosticsRequest(BaseModel):

src/frontend/app.py CHANGED Viewed

@@ -457,6 +457,18 @@ def _render_mri_tab() -> None:
 def _render_prediction_card(result: dict) -> None:
     """Render a B2B-styled decision card: label badge + confidence + SHAP bars."""
     st.session_state["last_bbb_prediction"] = result
     label_text = _html.escape(str(result["label_text"]))
     badge_color = "#166534" if result["label"] == 1 else "#991B1B"
     badge_bg    = "#DCFCE7" if result["label"] == 1 else "#FEE2E2"

 def _render_prediction_card(result: dict) -> None:
     """Render a B2B-styled decision card: label badge + confidence + SHAP bars."""
     st.session_state["last_bbb_prediction"] = result
+    provenance = result.get("provenance")
+    if provenance is not None:
+        run_id = provenance.get("mlflow_run_id")
+        run_label = run_id[:8] if run_id else "—"
+        train_date = provenance.get("train_date") or "—"
+        n_examples = provenance.get("n_examples")
+        n_label = f"n={n_examples}" if n_examples else "n=—"
+        st.caption(
+            f"🔎 MLflow run **{run_label}** · "
+            f"Model **{provenance.get('model_version', 'v1')}** · "
+            f"trained {train_date} · {n_label}"
+        )
     label_text = _html.escape(str(result["label_text"]))
     badge_color = "#166534" if result["label"] == 1 else "#991B1B"
     badge_bg    = "#DCFCE7" if result["label"] == 1 else "#FEE2E2"

tests/api/test_routes.py CHANGED Viewed

@@ -160,6 +160,25 @@ class TestBBBPredictRoute:
         # By call 105, drift_z is computable (≥10 samples) — assert numeric.
         assert isinstance(last_body["drift_z"], float)
     def test_returns_400_on_invalid_smiles(self, tmp_path: Path, monkeypatch):
         artifact = self._setup_model_artifact(tmp_path)
         monkeypatch.setenv("BBB_MODEL_PATH", str(artifact))

         # By call 105, drift_z is computable (≥10 samples) — assert numeric.
         assert isinstance(last_body["drift_z"], float)
+    def test_predict_response_includes_provenance(self, _set_bbb_model_path):
+        """T2: provenance field is present in body (fields may be None)."""
+        from src.api import routes
+        routes.WORKER_CONFIDENCE_DEQUE.clear()
+        resp = client.post("/predict/bbb", json={"smiles": "CCO", "top_k": 3})
+        assert resp.status_code == 200, resp.text
+        body = resp.json()
+        assert "provenance" in body
+        assert body["provenance"] is not None, "provenance should be populated even when MLflow is empty"
+        prov = body["provenance"]
+        assert "mlflow_run_id" in prov
+        assert "model_version" in prov
+        assert prov["model_version"] == "v1"  # default until bumped manually
+        assert "train_date" in prov
+        assert "n_examples" in prov
+        # n_examples comes from train_stats — must be a positive int for the test fixture
+        assert isinstance(prov["n_examples"], int) and prov["n_examples"] >= 1
     def test_returns_400_on_invalid_smiles(self, tmp_path: Path, monkeypatch):
         artifact = self._setup_model_artifact(tmp_path)
         monkeypatch.setenv("BBB_MODEL_PATH", str(artifact))