File size: 7,856 Bytes
3cc6a7d 7dad1a9 90167c7 efb8713 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 | """Tests for src.models.bbb_model — train, save/load, predict, uncertainty."""
from __future__ import annotations
from pathlib import Path
import numpy as np
import pandas as pd
import pytest
from src.models import bbb_model
_FIXTURES = Path(__file__).resolve().parents[1] / "fixtures"
@pytest.fixture(scope="module")
def trained_model_and_features():
"""Train one tiny model from the committed BBBP fixture; cache for the module."""
from src.pipelines import bbb_pipeline
import tempfile
tmp = Path(tempfile.mkdtemp(prefix="bbb_model_test_"))
out = tmp / "features.parquet"
bbb_pipeline.run_pipeline(
input_path=_FIXTURES / "bbbp_sample.csv",
output_path=out,
)
df = pd.read_parquet(out)
# Tiny n_estimators for test speed; real training uses default 100.
model = bbb_model.train(df, label_col="p_np", n_estimators=10, random_state=42)
return model, df
class TestTrain:
def test_returns_fitted_classifier(self, trained_model_and_features):
model, _ = trained_model_and_features
assert hasattr(model, "classes_")
assert len(model.classes_) == 2
def test_raises_on_missing_label_column(self, trained_model_and_features):
_, df = trained_model_and_features
with pytest.raises(KeyError):
bbb_model.train(df.drop(columns=["p_np"]), label_col="p_np")
def test_deterministic_with_random_state(self, trained_model_and_features):
_, df = trained_model_and_features
m1 = bbb_model.train(df, label_col="p_np", n_estimators=10, random_state=42)
m2 = bbb_model.train(df, label_col="p_np", n_estimators=10, random_state=42)
fp_cols = [c for c in df.columns if c.startswith("fp_")]
X = df[fp_cols].to_numpy()
np.testing.assert_array_equal(m1.predict_proba(X), m2.predict_proba(X))
class TestSaveLoad:
def test_save_then_load_roundtrip(self, trained_model_and_features, tmp_path: Path):
model, df = trained_model_and_features
artifact = tmp_path / "bbb_model.joblib"
bbb_model.save(model, artifact)
assert artifact.exists()
reloaded = bbb_model.load(artifact)
fp_cols = [c for c in df.columns if c.startswith("fp_")]
X = df[fp_cols].to_numpy()
np.testing.assert_array_equal(model.predict(X), reloaded.predict(X))
def test_load_raises_on_missing_path(self, tmp_path: Path):
with pytest.raises(FileNotFoundError):
bbb_model.load(tmp_path / "does_not_exist.joblib")
class TestPredictWithProba:
def test_returns_label_and_confidence(self, trained_model_and_features):
model, _ = trained_model_and_features
result = bbb_model.predict_with_proba(model, "CCO")
assert "label" in result
assert "confidence" in result
assert result["label"] in (0, 1)
assert 0.0 <= result["confidence"] <= 1.0
def test_raises_on_invalid_smiles(self, trained_model_and_features):
model, _ = trained_model_and_features
with pytest.raises(ValueError):
bbb_model.predict_with_proba(model, "this_is_not_a_smiles_AT_ALL")
def test_confidence_equals_max_class_probability(self, trained_model_and_features):
"""confidence is the max class probability — verifies against raw predict_proba."""
model, _ = trained_model_and_features
from src.pipelines.bbb_pipeline import compute_morgan_fingerprint
fp = compute_morgan_fingerprint("CCO").reshape(1, -1)
raw_proba = model.predict_proba(fp)[0]
result = bbb_model.predict_with_proba(model, "CCO")
assert abs(result["confidence"] - float(max(raw_proba))) < 1e-9
class TestExplainPrediction:
def test_returns_top_k_features(self, trained_model_and_features):
model, _ = trained_model_and_features
attributions = bbb_model.explain_prediction(model, "CCO", top_k=5)
assert len(attributions) == 5
for a in attributions:
assert "feature" in a
assert "shap_value" in a
assert isinstance(a["shap_value"], float)
def test_features_sorted_by_absolute_shap_value_descending(
self, trained_model_and_features,
):
model, _ = trained_model_and_features
attributions = bbb_model.explain_prediction(model, "CCO", top_k=10)
abs_vals = [abs(a["shap_value"]) for a in attributions]
assert abs_vals == sorted(abs_vals, reverse=True)
def test_features_named_fp_INDEX(self, trained_model_and_features):
model, _ = trained_model_and_features
attributions = bbb_model.explain_prediction(model, "CCO", top_k=3)
for a in attributions:
assert a["feature"].startswith("fp_")
int(a["feature"].split("_")[1]) # parses cleanly
def test_raises_on_invalid_smiles(self, trained_model_and_features):
model, _ = trained_model_and_features
with pytest.raises(ValueError):
bbb_model.explain_prediction(model, "still_not_a_smiles", top_k=5)
def test_deterministic_output(self, trained_model_and_features):
"""AGENTS.md §4 rule 3: identical input → identical SHAP attributions."""
model, _ = trained_model_and_features
r1 = bbb_model.explain_prediction(model, "CCO", top_k=5)
r2 = bbb_model.explain_prediction(model, "CCO", top_k=5)
assert r1 == r2
class TestCalibrationMetadata:
def test_train_attaches_calibration_attribute(self, trained_model_and_features):
model, _ = trained_model_and_features
assert hasattr(model, "_neurobridge_calibration")
bins = model._neurobridge_calibration
assert isinstance(bins, list)
# Always at least one bin (the lowest-threshold one)
assert len(bins) >= 1
for b in bins:
assert "threshold" in b
assert "precision" in b
assert "support" in b
assert 0.0 <= b["threshold"] <= 1.0
assert 0.0 <= b["precision"] <= 1.0
assert b["support"] >= 0
def test_calibration_thresholds_are_sorted_ascending(
self, trained_model_and_features,
):
model, _ = trained_model_and_features
thresholds = [b["threshold"] for b in model._neurobridge_calibration]
assert thresholds == sorted(thresholds)
def test_calibration_survives_save_load_roundtrip(
self, trained_model_and_features, tmp_path: Path,
):
model, _ = trained_model_and_features
artifact = tmp_path / "calibrated.joblib"
bbb_model.save(model, artifact)
reloaded = bbb_model.load(artifact)
assert hasattr(reloaded, "_neurobridge_calibration")
assert reloaded._neurobridge_calibration == model._neurobridge_calibration
class TestTrainStatsMetadata:
"""Day 7 — T1A: train()-time confidence distribution stash."""
def test_train_attaches_train_stats_attribute(self, trained_model_and_features):
model, _ = trained_model_and_features
assert hasattr(model, "_neurobridge_train_stats")
stats = model._neurobridge_train_stats
assert isinstance(stats, dict)
for key in ("median", "std", "n_train"):
assert key in stats, f"missing key {key!r} in train stats"
assert 0.0 <= stats["median"] <= 1.0
assert stats["std"] >= 0.0
assert stats["n_train"] >= 1
def test_train_stats_survives_save_load_roundtrip(
self, trained_model_and_features, tmp_path: Path,
):
from src.models import bbb_model
model, _ = trained_model_and_features
path = tmp_path / "m.joblib"
bbb_model.save(model, path)
reloaded = bbb_model.load(path)
assert hasattr(reloaded, "_neurobridge_train_stats")
assert reloaded._neurobridge_train_stats == model._neurobridge_train_stats
|