refactor(bbb): migrate to MorganGenerator API; consolidate test imports
Browse files
src/pipelines/bbb_pipeline.py
CHANGED
|
@@ -59,7 +59,8 @@ def compute_morgan_fingerprint(
|
|
| 59 |
`is_valid_smiles` first if the source is untrusted.
|
| 60 |
n_bits: Length of the bit vector. 2048 is the de-facto default
|
| 61 |
for downstream scikit-learn classifiers.
|
| 62 |
-
radius: Morgan radius (2 ≈ ECFP4).
|
|
|
|
| 63 |
|
| 64 |
Returns:
|
| 65 |
A 1-D `np.ndarray` of length `n_bits` and dtype `uint8`, where
|
|
@@ -72,7 +73,8 @@ def compute_morgan_fingerprint(
|
|
| 72 |
if mol is None:
|
| 73 |
raise ValueError(f"invalid SMILES: {smiles!r}")
|
| 74 |
|
| 75 |
-
|
|
|
|
| 76 |
arr = np.zeros((n_bits,), dtype=np.uint8)
|
| 77 |
ConvertToNumpyArray(bit_vect, arr)
|
| 78 |
return arr
|
|
|
|
| 59 |
`is_valid_smiles` first if the source is untrusted.
|
| 60 |
n_bits: Length of the bit vector. 2048 is the de-facto default
|
| 61 |
for downstream scikit-learn classifiers.
|
| 62 |
+
radius: Morgan radius (2 ≈ ECFP4). Passed to RDKit's modern
|
| 63 |
+
MorganGenerator API.
|
| 64 |
|
| 65 |
Returns:
|
| 66 |
A 1-D `np.ndarray` of length `n_bits` and dtype `uint8`, where
|
|
|
|
| 73 |
if mol is None:
|
| 74 |
raise ValueError(f"invalid SMILES: {smiles!r}")
|
| 75 |
|
| 76 |
+
generator = AllChem.GetMorganGenerator(radius=radius, fpSize=n_bits)
|
| 77 |
+
bit_vect = generator.GetFingerprint(mol)
|
| 78 |
arr = np.zeros((n_bits,), dtype=np.uint8)
|
| 79 |
ConvertToNumpyArray(bit_vect, arr)
|
| 80 |
return arr
|
tests/pipelines/test_bbb_pipeline.py
CHANGED
|
@@ -3,10 +3,14 @@ from __future__ import annotations
|
|
| 3 |
|
| 4 |
from pathlib import Path
|
| 5 |
|
|
|
|
| 6 |
import pandas as pd
|
| 7 |
import pytest
|
| 8 |
|
| 9 |
-
from src.pipelines.bbb_pipeline import
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
FIXTURE = Path(__file__).parent.parent / "fixtures" / "bbbp_sample.csv"
|
|
@@ -33,11 +37,6 @@ class TestIsValidSmiles:
|
|
| 33 |
assert is_valid_smiles(math.nan) is False
|
| 34 |
|
| 35 |
|
| 36 |
-
import numpy as np
|
| 37 |
-
|
| 38 |
-
from src.pipelines.bbb_pipeline import compute_morgan_fingerprint
|
| 39 |
-
|
| 40 |
-
|
| 41 |
class TestComputeMorganFingerprint:
|
| 42 |
def test_returns_numpy_array_of_correct_length(self) -> None:
|
| 43 |
fp = compute_morgan_fingerprint("CCCO", n_bits=2048, radius=2)
|
|
|
|
| 3 |
|
| 4 |
from pathlib import Path
|
| 5 |
|
| 6 |
+
import numpy as np
|
| 7 |
import pandas as pd
|
| 8 |
import pytest
|
| 9 |
|
| 10 |
+
from src.pipelines.bbb_pipeline import (
|
| 11 |
+
compute_morgan_fingerprint,
|
| 12 |
+
is_valid_smiles,
|
| 13 |
+
)
|
| 14 |
|
| 15 |
|
| 16 |
FIXTURE = Path(__file__).parent.parent / "fixtures" / "bbbp_sample.csv"
|
|
|
|
| 37 |
assert is_valid_smiles(math.nan) is False
|
| 38 |
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
class TestComputeMorganFingerprint:
|
| 41 |
def test_returns_numpy_array_of_correct_length(self) -> None:
|
| 42 |
fp = compute_morgan_fingerprint("CCCO", n_bits=2048, radius=2)
|