docs(bbb): fix stale CSV mentions in run_pipeline + module docstrings
Browse files
src/pipelines/bbb_pipeline.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
|
| 3 |
Reads the Kaggle BBBP dataset (SMILES strings + binary penetration label),
|
| 4 |
filters chemically invalid SMILES, computes Morgan circular fingerprints with
|
| 5 |
-
RDKit, and writes a model-ready feature table to `data/processed/`.
|
| 6 |
|
| 7 |
This module follows the Data Readiness contract in AGENTS.md §4:
|
| 8 |
schema validity, domain validity (drop invalid SMILES), determinism,
|
|
@@ -192,7 +192,7 @@ def run_pipeline(
|
|
| 192 |
n_bits: int = 2048,
|
| 193 |
radius: int = 2,
|
| 194 |
) -> None:
|
| 195 |
-
"""Run the BBB pipeline end-to-end: raw CSV → processed feature
|
| 196 |
|
| 197 |
Reads the Kaggle BBBP CSV at `input_path`, validates and converts
|
| 198 |
SMILES into Morgan fingerprints, and writes the model-ready table
|
|
|
|
| 2 |
|
| 3 |
Reads the Kaggle BBBP dataset (SMILES strings + binary penetration label),
|
| 4 |
filters chemically invalid SMILES, computes Morgan circular fingerprints with
|
| 5 |
+
RDKit, and writes a model-ready Parquet feature table to `data/processed/`.
|
| 6 |
|
| 7 |
This module follows the Data Readiness contract in AGENTS.md §4:
|
| 8 |
schema validity, domain validity (drop invalid SMILES), determinism,
|
|
|
|
| 192 |
n_bits: int = 2048,
|
| 193 |
radius: int = 2,
|
| 194 |
) -> None:
|
| 195 |
+
"""Run the BBB pipeline end-to-end: raw CSV → processed feature Parquet.
|
| 196 |
|
| 197 |
Reads the Kaggle BBBP CSV at `input_path`, validates and converts
|
| 198 |
SMILES into Morgan fingerprints, and writes the model-ready table
|