feat(eeg): add is_valid_epoch guard for NaN/inf/shape/dtype
Browse filesCo-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
src/pipelines/eeg_pipeline.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""EEG (electroencephalography) pipeline.
|
| 2 |
+
|
| 3 |
+
Loads raw recordings (FIF/EDF), bandpass-filters, removes EOG artifacts via
|
| 4 |
+
ICA, slices into fixed-duration epochs, computes per-band PSD + statistical
|
| 5 |
+
features, flattens to a 2D table, and writes a model-ready Parquet at
|
| 6 |
+
`data/processed/eeg_features.parquet`.
|
| 7 |
+
|
| 8 |
+
Follows the Data Readiness contract in AGENTS.md §4 and the Parquet storage
|
| 9 |
+
convention in §6: schema validity, domain validity (drop NaN/inf epochs with
|
| 10 |
+
a logged WARNING), determinism (seeded ICA + sklearn RNG), traceability
|
| 11 |
+
(in/out/dropped counts at INFO), and idempotent overwrite output.
|
| 12 |
+
"""
|
| 13 |
+
from __future__ import annotations
|
| 14 |
+
|
| 15 |
+
import numpy as np
|
| 16 |
+
|
| 17 |
+
from src.core.logger import get_logger
|
| 18 |
+
|
| 19 |
+
logger = get_logger(__name__)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def is_valid_epoch(epoch: object) -> bool:
|
| 23 |
+
"""Return True iff `epoch` is a non-empty 2-D float array with no NaN/inf.
|
| 24 |
+
|
| 25 |
+
Used to drop corrupted segments before feature extraction. Defensive
|
| 26 |
+
against the full set of garbage we expect from real recordings: lists,
|
| 27 |
+
None, NaN/inf samples, zero-sized arrays.
|
| 28 |
+
"""
|
| 29 |
+
if not isinstance(epoch, np.ndarray):
|
| 30 |
+
return False
|
| 31 |
+
if epoch.ndim != 2:
|
| 32 |
+
return False
|
| 33 |
+
if epoch.size == 0:
|
| 34 |
+
return False
|
| 35 |
+
if not np.all(np.isfinite(epoch)):
|
| 36 |
+
return False
|
| 37 |
+
return True
|
tests/pipelines/test_eeg_pipeline.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Unit + integration tests for the EEG pipeline."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
import pytest
|
| 8 |
+
|
| 9 |
+
from src.pipelines.eeg_pipeline import is_valid_epoch
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
FIXTURE = Path(__file__).parent.parent / "fixtures" / "eeg_sample.fif"
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class TestIsValidEpoch:
|
| 16 |
+
def test_accepts_2d_finite_array(self) -> None:
|
| 17 |
+
epoch = np.zeros((4, 256), dtype=np.float64)
|
| 18 |
+
assert is_valid_epoch(epoch) is True
|
| 19 |
+
|
| 20 |
+
def test_rejects_wrong_dimension(self) -> None:
|
| 21 |
+
assert is_valid_epoch(np.zeros((4,))) is False
|
| 22 |
+
assert is_valid_epoch(np.zeros((4, 256, 2))) is False
|
| 23 |
+
|
| 24 |
+
def test_rejects_nan(self) -> None:
|
| 25 |
+
epoch = np.zeros((4, 256))
|
| 26 |
+
epoch[0, 10] = np.nan
|
| 27 |
+
assert is_valid_epoch(epoch) is False
|
| 28 |
+
|
| 29 |
+
def test_rejects_inf(self) -> None:
|
| 30 |
+
epoch = np.zeros((4, 256))
|
| 31 |
+
epoch[1, 5] = np.inf
|
| 32 |
+
assert is_valid_epoch(epoch) is False
|
| 33 |
+
|
| 34 |
+
def test_rejects_empty(self) -> None:
|
| 35 |
+
assert is_valid_epoch(np.zeros((0, 256))) is False
|
| 36 |
+
assert is_valid_epoch(np.zeros((4, 0))) is False
|
| 37 |
+
|
| 38 |
+
def test_rejects_non_array(self) -> None:
|
| 39 |
+
assert is_valid_epoch([[1, 2, 3]]) is False
|
| 40 |
+
assert is_valid_epoch(None) is False
|