File size: 7,856 Bytes
3cc6a7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7dad1a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90167c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
efb8713
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
"""Tests for src.models.bbb_model — train, save/load, predict, uncertainty."""
from __future__ import annotations

from pathlib import Path

import numpy as np
import pandas as pd
import pytest

from src.models import bbb_model


_FIXTURES = Path(__file__).resolve().parents[1] / "fixtures"


@pytest.fixture(scope="module")
def trained_model_and_features():
    """Train one tiny model from the committed BBBP fixture; cache for the module."""
    from src.pipelines import bbb_pipeline
    import tempfile
    tmp = Path(tempfile.mkdtemp(prefix="bbb_model_test_"))
    out = tmp / "features.parquet"
    bbb_pipeline.run_pipeline(
        input_path=_FIXTURES / "bbbp_sample.csv",
        output_path=out,
    )
    df = pd.read_parquet(out)
    # Tiny n_estimators for test speed; real training uses default 100.
    model = bbb_model.train(df, label_col="p_np", n_estimators=10, random_state=42)
    return model, df


class TestTrain:
    def test_returns_fitted_classifier(self, trained_model_and_features):
        model, _ = trained_model_and_features
        assert hasattr(model, "classes_")
        assert len(model.classes_) == 2

    def test_raises_on_missing_label_column(self, trained_model_and_features):
        _, df = trained_model_and_features
        with pytest.raises(KeyError):
            bbb_model.train(df.drop(columns=["p_np"]), label_col="p_np")

    def test_deterministic_with_random_state(self, trained_model_and_features):
        _, df = trained_model_and_features
        m1 = bbb_model.train(df, label_col="p_np", n_estimators=10, random_state=42)
        m2 = bbb_model.train(df, label_col="p_np", n_estimators=10, random_state=42)
        fp_cols = [c for c in df.columns if c.startswith("fp_")]
        X = df[fp_cols].to_numpy()
        np.testing.assert_array_equal(m1.predict_proba(X), m2.predict_proba(X))


class TestSaveLoad:
    def test_save_then_load_roundtrip(self, trained_model_and_features, tmp_path: Path):
        model, df = trained_model_and_features
        artifact = tmp_path / "bbb_model.joblib"
        bbb_model.save(model, artifact)
        assert artifact.exists()

        reloaded = bbb_model.load(artifact)
        fp_cols = [c for c in df.columns if c.startswith("fp_")]
        X = df[fp_cols].to_numpy()
        np.testing.assert_array_equal(model.predict(X), reloaded.predict(X))

    def test_load_raises_on_missing_path(self, tmp_path: Path):
        with pytest.raises(FileNotFoundError):
            bbb_model.load(tmp_path / "does_not_exist.joblib")


class TestPredictWithProba:
    def test_returns_label_and_confidence(self, trained_model_and_features):
        model, _ = trained_model_and_features
        result = bbb_model.predict_with_proba(model, "CCO")
        assert "label" in result
        assert "confidence" in result
        assert result["label"] in (0, 1)
        assert 0.0 <= result["confidence"] <= 1.0

    def test_raises_on_invalid_smiles(self, trained_model_and_features):
        model, _ = trained_model_and_features
        with pytest.raises(ValueError):
            bbb_model.predict_with_proba(model, "this_is_not_a_smiles_AT_ALL")

    def test_confidence_equals_max_class_probability(self, trained_model_and_features):
        """confidence is the max class probability — verifies against raw predict_proba."""
        model, _ = trained_model_and_features
        from src.pipelines.bbb_pipeline import compute_morgan_fingerprint
        fp = compute_morgan_fingerprint("CCO").reshape(1, -1)
        raw_proba = model.predict_proba(fp)[0]
        result = bbb_model.predict_with_proba(model, "CCO")
        assert abs(result["confidence"] - float(max(raw_proba))) < 1e-9


class TestExplainPrediction:
    def test_returns_top_k_features(self, trained_model_and_features):
        model, _ = trained_model_and_features
        attributions = bbb_model.explain_prediction(model, "CCO", top_k=5)
        assert len(attributions) == 5
        for a in attributions:
            assert "feature" in a
            assert "shap_value" in a
            assert isinstance(a["shap_value"], float)

    def test_features_sorted_by_absolute_shap_value_descending(
        self, trained_model_and_features,
    ):
        model, _ = trained_model_and_features
        attributions = bbb_model.explain_prediction(model, "CCO", top_k=10)
        abs_vals = [abs(a["shap_value"]) for a in attributions]
        assert abs_vals == sorted(abs_vals, reverse=True)

    def test_features_named_fp_INDEX(self, trained_model_and_features):
        model, _ = trained_model_and_features
        attributions = bbb_model.explain_prediction(model, "CCO", top_k=3)
        for a in attributions:
            assert a["feature"].startswith("fp_")
            int(a["feature"].split("_")[1])  # parses cleanly

    def test_raises_on_invalid_smiles(self, trained_model_and_features):
        model, _ = trained_model_and_features
        with pytest.raises(ValueError):
            bbb_model.explain_prediction(model, "still_not_a_smiles", top_k=5)

    def test_deterministic_output(self, trained_model_and_features):
        """AGENTS.md §4 rule 3: identical input → identical SHAP attributions."""
        model, _ = trained_model_and_features
        r1 = bbb_model.explain_prediction(model, "CCO", top_k=5)
        r2 = bbb_model.explain_prediction(model, "CCO", top_k=5)
        assert r1 == r2


class TestCalibrationMetadata:
    def test_train_attaches_calibration_attribute(self, trained_model_and_features):
        model, _ = trained_model_and_features
        assert hasattr(model, "_neurobridge_calibration")
        bins = model._neurobridge_calibration
        assert isinstance(bins, list)
        # Always at least one bin (the lowest-threshold one)
        assert len(bins) >= 1
        for b in bins:
            assert "threshold" in b
            assert "precision" in b
            assert "support" in b
            assert 0.0 <= b["threshold"] <= 1.0
            assert 0.0 <= b["precision"] <= 1.0
            assert b["support"] >= 0

    def test_calibration_thresholds_are_sorted_ascending(
        self, trained_model_and_features,
    ):
        model, _ = trained_model_and_features
        thresholds = [b["threshold"] for b in model._neurobridge_calibration]
        assert thresholds == sorted(thresholds)

    def test_calibration_survives_save_load_roundtrip(
        self, trained_model_and_features, tmp_path: Path,
    ):
        model, _ = trained_model_and_features
        artifact = tmp_path / "calibrated.joblib"
        bbb_model.save(model, artifact)
        reloaded = bbb_model.load(artifact)
        assert hasattr(reloaded, "_neurobridge_calibration")
        assert reloaded._neurobridge_calibration == model._neurobridge_calibration


class TestTrainStatsMetadata:
    """Day 7 — T1A: train()-time confidence distribution stash."""

    def test_train_attaches_train_stats_attribute(self, trained_model_and_features):
        model, _ = trained_model_and_features
        assert hasattr(model, "_neurobridge_train_stats")
        stats = model._neurobridge_train_stats
        assert isinstance(stats, dict)
        for key in ("median", "std", "n_train"):
            assert key in stats, f"missing key {key!r} in train stats"
        assert 0.0 <= stats["median"] <= 1.0
        assert stats["std"] >= 0.0
        assert stats["n_train"] >= 1

    def test_train_stats_survives_save_load_roundtrip(
        self, trained_model_and_features, tmp_path: Path,
    ):
        from src.models import bbb_model
        model, _ = trained_model_and_features
        path = tmp_path / "m.joblib"
        bbb_model.save(model, path)
        reloaded = bbb_model.load(path)
        assert hasattr(reloaded, "_neurobridge_train_stats")
        assert reloaded._neurobridge_train_stats == model._neurobridge_train_stats