docs(eeg): correct float64 dtype claim; tighten test match; clarify CLI default
Browse files- AGENTS.md +1 -1
- src/pipelines/eeg_pipeline.py +5 -1
- tests/pipelines/test_eeg_pipeline.py +1 -1
AGENTS.md
CHANGED
|
@@ -89,7 +89,7 @@ refactored into a pipeline.
|
|
| 89 |
## 6. Storage Format Convention
|
| 90 |
|
| 91 |
All `data/processed/` outputs MUST be **Parquet** (`pyarrow` engine, `compression="snappy"`):
|
| 92 |
-
- Preserves dtypes (uint8 fingerprints stay uint8;
|
| 93 |
- Byte-deterministic with fixed compression and single-threaded writes (satisfies §4 Determinism).
|
| 94 |
- Read with `pd.read_parquet(path)`; no dtype hints required.
|
| 95 |
|
|
|
|
| 89 |
## 6. Storage Format Convention
|
| 90 |
|
| 91 |
All `data/processed/` outputs MUST be **Parquet** (`pyarrow` engine, `compression="snappy"`):
|
| 92 |
+
- Preserves dtypes (uint8 fingerprints stay uint8; float64 EEG features stay float64) — CSV silently widens numeric columns and is unsuitable for the high-dimensional float arrays produced by the EEG and MRI pipelines.
|
| 93 |
- Byte-deterministic with fixed compression and single-threaded writes (satisfies §4 Determinism).
|
| 94 |
- Read with `pd.read_parquet(path)`; no dtype hints required.
|
| 95 |
|
src/pipelines/eeg_pipeline.py
CHANGED
|
@@ -433,6 +433,8 @@ def run_pipeline(
|
|
| 433 |
raise FileNotFoundError(f"Raw EEG file not found: {input_path}")
|
| 434 |
|
| 435 |
logger.info("Reading raw EEG from %s", input_path)
|
|
|
|
|
|
|
| 436 |
if input_path.suffix.lower() == ".edf":
|
| 437 |
raw = mne.io.read_raw_edf(input_path, preload=True, verbose="ERROR")
|
| 438 |
else:
|
|
@@ -468,6 +470,8 @@ def run_pipeline(
|
|
| 468 |
|
| 469 |
if __name__ == "__main__":
|
| 470 |
# Day-2 CLI entrypoint — runs with default paths against `data/raw/eeg.fif`.
|
| 471 |
-
#
|
|
|
|
|
|
|
| 472 |
# python -m src.pipelines.eeg_pipeline
|
| 473 |
run_pipeline()
|
|
|
|
| 433 |
raise FileNotFoundError(f"Raw EEG file not found: {input_path}")
|
| 434 |
|
| 435 |
logger.info("Reading raw EEG from %s", input_path)
|
| 436 |
+
# Format dispatch: .edf via read_raw_edf, anything else (FIF, gzipped FIF)
|
| 437 |
+
# via read_raw_fif. .bdf / .set / .vhdr support can be added here.
|
| 438 |
if input_path.suffix.lower() == ".edf":
|
| 439 |
raw = mne.io.read_raw_edf(input_path, preload=True, verbose="ERROR")
|
| 440 |
else:
|
|
|
|
| 470 |
|
| 471 |
if __name__ == "__main__":
|
| 472 |
# Day-2 CLI entrypoint — runs with default paths against `data/raw/eeg.fif`.
|
| 473 |
+
# Defaults to `eog_ch_name=None` (ICA disabled). Pass an EOG channel
|
| 474 |
+
# name programmatically via run_pipeline(eog_ch_name=...) to enable
|
| 475 |
+
# artifact rejection. Argument parsing (argparse / click) lands later.
|
| 476 |
# python -m src.pipelines.eeg_pipeline
|
| 477 |
run_pipeline()
|
tests/pipelines/test_eeg_pipeline.py
CHANGED
|
@@ -408,7 +408,7 @@ class TestRunPipeline:
|
|
| 408 |
assert first == second, "EEG pipeline output must be byte-deterministic"
|
| 409 |
|
| 410 |
def test_run_pipeline_raises_when_input_missing(self, tmp_path: Path) -> None:
|
| 411 |
-
with pytest.raises(FileNotFoundError):
|
| 412 |
run_pipeline(
|
| 413 |
input_path=tmp_path / "nope.fif",
|
| 414 |
output_path=tmp_path / "out.parquet",
|
|
|
|
| 408 |
assert first == second, "EEG pipeline output must be byte-deterministic"
|
| 409 |
|
| 410 |
def test_run_pipeline_raises_when_input_missing(self, tmp_path: Path) -> None:
|
| 411 |
+
with pytest.raises(FileNotFoundError, match="Raw EEG file not found"):
|
| 412 |
run_pipeline(
|
| 413 |
input_path=tmp_path / "nope.fif",
|
| 414 |
output_path=tmp_path / "out.parquet",
|