mekosotto Claude Sonnet 4.6 commited on
Commit
80528e7
·
1 Parent(s): b1bd8db

refactor(bbb): migrate to MorganGenerator API; consolidate test imports

Browse files
src/pipelines/bbb_pipeline.py CHANGED
@@ -59,7 +59,8 @@ def compute_morgan_fingerprint(
59
  `is_valid_smiles` first if the source is untrusted.
60
  n_bits: Length of the bit vector. 2048 is the de-facto default
61
  for downstream scikit-learn classifiers.
62
- radius: Morgan radius (2 ≈ ECFP4).
 
63
 
64
  Returns:
65
  A 1-D `np.ndarray` of length `n_bits` and dtype `uint8`, where
@@ -72,7 +73,8 @@ def compute_morgan_fingerprint(
72
  if mol is None:
73
  raise ValueError(f"invalid SMILES: {smiles!r}")
74
 
75
- bit_vect = AllChem.GetMorganFingerprintAsBitVect(mol, radius=radius, nBits=n_bits)
 
76
  arr = np.zeros((n_bits,), dtype=np.uint8)
77
  ConvertToNumpyArray(bit_vect, arr)
78
  return arr
 
59
  `is_valid_smiles` first if the source is untrusted.
60
  n_bits: Length of the bit vector. 2048 is the de-facto default
61
  for downstream scikit-learn classifiers.
62
+ radius: Morgan radius (2 ≈ ECFP4). Passed to RDKit's modern
63
+ MorganGenerator API.
64
 
65
  Returns:
66
  A 1-D `np.ndarray` of length `n_bits` and dtype `uint8`, where
 
73
  if mol is None:
74
  raise ValueError(f"invalid SMILES: {smiles!r}")
75
 
76
+ generator = AllChem.GetMorganGenerator(radius=radius, fpSize=n_bits)
77
+ bit_vect = generator.GetFingerprint(mol)
78
  arr = np.zeros((n_bits,), dtype=np.uint8)
79
  ConvertToNumpyArray(bit_vect, arr)
80
  return arr
tests/pipelines/test_bbb_pipeline.py CHANGED
@@ -3,10 +3,14 @@ from __future__ import annotations
3
 
4
  from pathlib import Path
5
 
 
6
  import pandas as pd
7
  import pytest
8
 
9
- from src.pipelines.bbb_pipeline import is_valid_smiles
 
 
 
10
 
11
 
12
  FIXTURE = Path(__file__).parent.parent / "fixtures" / "bbbp_sample.csv"
@@ -33,11 +37,6 @@ class TestIsValidSmiles:
33
  assert is_valid_smiles(math.nan) is False
34
 
35
 
36
- import numpy as np
37
-
38
- from src.pipelines.bbb_pipeline import compute_morgan_fingerprint
39
-
40
-
41
  class TestComputeMorganFingerprint:
42
  def test_returns_numpy_array_of_correct_length(self) -> None:
43
  fp = compute_morgan_fingerprint("CCCO", n_bits=2048, radius=2)
 
3
 
4
  from pathlib import Path
5
 
6
+ import numpy as np
7
  import pandas as pd
8
  import pytest
9
 
10
+ from src.pipelines.bbb_pipeline import (
11
+ compute_morgan_fingerprint,
12
+ is_valid_smiles,
13
+ )
14
 
15
 
16
  FIXTURE = Path(__file__).parent.parent / "fixtures" / "bbbp_sample.csv"
 
37
  assert is_valid_smiles(math.nan) is False
38
 
39
 
 
 
 
 
 
40
  class TestComputeMorganFingerprint:
41
  def test_returns_numpy_array_of_correct_length(self) -> None:
42
  fp = compute_morgan_fingerprint("CCCO", n_bits=2048, radius=2)