mekosotto Claude Sonnet 4.6 commited on
Commit
ea055f0
·
1 Parent(s): c4c7642

docs(eeg): correct float64 dtype claim; tighten test match; clarify CLI default

Browse files
AGENTS.md CHANGED
@@ -89,7 +89,7 @@ refactored into a pipeline.
89
  ## 6. Storage Format Convention
90
 
91
  All `data/processed/` outputs MUST be **Parquet** (`pyarrow` engine, `compression="snappy"`):
92
- - Preserves dtypes (uint8 fingerprints stay uint8; float32 EEG features stay float32) — CSV silently widens numeric columns and is unsuitable for the high-dimensional float arrays produced by the EEG and MRI pipelines.
93
  - Byte-deterministic with fixed compression and single-threaded writes (satisfies §4 Determinism).
94
  - Read with `pd.read_parquet(path)`; no dtype hints required.
95
 
 
89
  ## 6. Storage Format Convention
90
 
91
  All `data/processed/` outputs MUST be **Parquet** (`pyarrow` engine, `compression="snappy"`):
92
+ - Preserves dtypes (uint8 fingerprints stay uint8; float64 EEG features stay float64) — CSV silently widens numeric columns and is unsuitable for the high-dimensional float arrays produced by the EEG and MRI pipelines.
93
  - Byte-deterministic with fixed compression and single-threaded writes (satisfies §4 Determinism).
94
  - Read with `pd.read_parquet(path)`; no dtype hints required.
95
 
src/pipelines/eeg_pipeline.py CHANGED
@@ -433,6 +433,8 @@ def run_pipeline(
433
  raise FileNotFoundError(f"Raw EEG file not found: {input_path}")
434
 
435
  logger.info("Reading raw EEG from %s", input_path)
 
 
436
  if input_path.suffix.lower() == ".edf":
437
  raw = mne.io.read_raw_edf(input_path, preload=True, verbose="ERROR")
438
  else:
@@ -468,6 +470,8 @@ def run_pipeline(
468
 
469
  if __name__ == "__main__":
470
  # Day-2 CLI entrypoint — runs with default paths against `data/raw/eeg.fif`.
471
- # Argument parsing (argparse / click) will land in a later task.
 
 
472
  # python -m src.pipelines.eeg_pipeline
473
  run_pipeline()
 
433
  raise FileNotFoundError(f"Raw EEG file not found: {input_path}")
434
 
435
  logger.info("Reading raw EEG from %s", input_path)
436
+ # Format dispatch: .edf via read_raw_edf, anything else (FIF, gzipped FIF)
437
+ # via read_raw_fif. .bdf / .set / .vhdr support can be added here.
438
  if input_path.suffix.lower() == ".edf":
439
  raw = mne.io.read_raw_edf(input_path, preload=True, verbose="ERROR")
440
  else:
 
470
 
471
  if __name__ == "__main__":
472
  # Day-2 CLI entrypoint — runs with default paths against `data/raw/eeg.fif`.
473
+ # Defaults to `eog_ch_name=None` (ICA disabled). Pass an EOG channel
474
+ # name programmatically via run_pipeline(eog_ch_name=...) to enable
475
+ # artifact rejection. Argument parsing (argparse / click) lands later.
476
  # python -m src.pipelines.eeg_pipeline
477
  run_pipeline()
tests/pipelines/test_eeg_pipeline.py CHANGED
@@ -408,7 +408,7 @@ class TestRunPipeline:
408
  assert first == second, "EEG pipeline output must be byte-deterministic"
409
 
410
  def test_run_pipeline_raises_when_input_missing(self, tmp_path: Path) -> None:
411
- with pytest.raises(FileNotFoundError):
412
  run_pipeline(
413
  input_path=tmp_path / "nope.fif",
414
  output_path=tmp_path / "out.parquet",
 
408
  assert first == second, "EEG pipeline output must be byte-deterministic"
409
 
410
  def test_run_pipeline_raises_when_input_missing(self, tmp_path: Path) -> None:
411
+ with pytest.raises(FileNotFoundError, match="Raw EEG file not found"):
412
  run_pipeline(
413
  input_path=tmp_path / "nope.fif",
414
  output_path=tmp_path / "out.parquet",