mekosotto Claude Sonnet 4.6 commited on
Commit
7e0ed24
·
1 Parent(s): ac25478

feat(eeg): add is_valid_epoch guard for NaN/inf/shape/dtype

Browse files

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

src/pipelines/eeg_pipeline.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """EEG (electroencephalography) pipeline.
2
+
3
+ Loads raw recordings (FIF/EDF), bandpass-filters, removes EOG artifacts via
4
+ ICA, slices into fixed-duration epochs, computes per-band PSD + statistical
5
+ features, flattens to a 2D table, and writes a model-ready Parquet at
6
+ `data/processed/eeg_features.parquet`.
7
+
8
+ Follows the Data Readiness contract in AGENTS.md §4 and the Parquet storage
9
+ convention in §6: schema validity, domain validity (drop NaN/inf epochs with
10
+ a logged WARNING), determinism (seeded ICA + sklearn RNG), traceability
11
+ (in/out/dropped counts at INFO), and idempotent overwrite output.
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import numpy as np
16
+
17
+ from src.core.logger import get_logger
18
+
19
+ logger = get_logger(__name__)
20
+
21
+
22
+ def is_valid_epoch(epoch: object) -> bool:
23
+ """Return True iff `epoch` is a non-empty 2-D float array with no NaN/inf.
24
+
25
+ Used to drop corrupted segments before feature extraction. Defensive
26
+ against the full set of garbage we expect from real recordings: lists,
27
+ None, NaN/inf samples, zero-sized arrays.
28
+ """
29
+ if not isinstance(epoch, np.ndarray):
30
+ return False
31
+ if epoch.ndim != 2:
32
+ return False
33
+ if epoch.size == 0:
34
+ return False
35
+ if not np.all(np.isfinite(epoch)):
36
+ return False
37
+ return True
tests/pipelines/test_eeg_pipeline.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Unit + integration tests for the EEG pipeline."""
2
+ from __future__ import annotations
3
+
4
+ from pathlib import Path
5
+
6
+ import numpy as np
7
+ import pytest
8
+
9
+ from src.pipelines.eeg_pipeline import is_valid_epoch
10
+
11
+
12
+ FIXTURE = Path(__file__).parent.parent / "fixtures" / "eeg_sample.fif"
13
+
14
+
15
+ class TestIsValidEpoch:
16
+ def test_accepts_2d_finite_array(self) -> None:
17
+ epoch = np.zeros((4, 256), dtype=np.float64)
18
+ assert is_valid_epoch(epoch) is True
19
+
20
+ def test_rejects_wrong_dimension(self) -> None:
21
+ assert is_valid_epoch(np.zeros((4,))) is False
22
+ assert is_valid_epoch(np.zeros((4, 256, 2))) is False
23
+
24
+ def test_rejects_nan(self) -> None:
25
+ epoch = np.zeros((4, 256))
26
+ epoch[0, 10] = np.nan
27
+ assert is_valid_epoch(epoch) is False
28
+
29
+ def test_rejects_inf(self) -> None:
30
+ epoch = np.zeros((4, 256))
31
+ epoch[1, 5] = np.inf
32
+ assert is_valid_epoch(epoch) is False
33
+
34
+ def test_rejects_empty(self) -> None:
35
+ assert is_valid_epoch(np.zeros((0, 256))) is False
36
+ assert is_valid_epoch(np.zeros((4, 0))) is False
37
+
38
+ def test_rejects_non_array(self) -> None:
39
+ assert is_valid_epoch([[1, 2, 3]]) is False
40
+ assert is_valid_epoch(None) is False