Spaces:

mekosotto
/

hackathon

Running

App Files Files Community

mekosotto Claude Sonnet 4.6 commited on 7 days ago

Commit

4d00e0f

1 Parent(s): 853cb9e

refactor(mri): bind ROI_STATS to callables; guard volume/mask shape mismatch

Browse files

Files changed (2) hide show

src/pipelines/mri_pipeline.py +39 -11
tests/pipelines/test_mri_pipeline.py +14 -0

src/pipelines/mri_pipeline.py CHANGED Viewed

@@ -98,7 +98,6 @@ def mask_brain(
 # Default ROI partition: split a (D, H, W) volume into 2×2×2 = 8 octant ROIs.
 # Octant index follows binary (z, y, x) ordering: 0..7.
 DEFAULT_N_ROI_AXES: tuple[int, int, int] = (2, 2, 2)
-ROI_STATS: tuple[str, ...] = ("mean", "std", "p10", "p50", "p90", "voxel_count")
 def _roi_slices(
@@ -123,18 +122,32 @@ def _roi_slices(
     return out
 def _roi_stats_for(values: np.ndarray) -> dict[str, float]:
-    """Compute the 6 ROI stats. Empty array → all 0.0 (no-NaN contract)."""
     if values.size == 0:
-        return {stat: 0.0 for stat in ROI_STATS}
-    return {
-        "mean": float(values.mean()),
-        "std": float(values.std()),
-        "p10": float(np.percentile(values, 10)),
-        "p50": float(np.percentile(values, 50)),
-        "p90": float(np.percentile(values, 90)),
-        "voxel_count": float(values.size),
-    }
 def extract_features_from_volume(
@@ -150,6 +163,13 @@ def extract_features_from_volume(
     90th percentile / voxel count. Empty ROIs (no mask voxels) report all
     zeros so the resulting Parquet has no NaN values.
     Args:
         volume: 3-D numeric `np.ndarray` (already validated).
         mask: Boolean `np.ndarray` of the same shape (from `mask_brain`).
@@ -158,7 +178,15 @@ def extract_features_from_volume(
     Returns:
         Flat dict `{"feat_roi{i}_{stat}": float}` of length
         ``prod(n_roi_axes) * len(ROI_STATS)``.
     """
     feats: dict[str, float] = {}
     slices = _roi_slices(volume.shape, n_roi_axes)
     for i, sl in enumerate(slices):

 # Default ROI partition: split a (D, H, W) volume into 2×2×2 = 8 octant ROIs.
 # Octant index follows binary (z, y, x) ordering: 0..7.
 DEFAULT_N_ROI_AXES: tuple[int, int, int] = (2, 2, 2)
 def _roi_slices(
     return out
+# Statistical functions, bound to their column-label names. The `ROI_STATS`
+# tuple below is derived from this list so labels and computations cannot
+# drift out of sync (a class of bug the prior parallel-list design was
+# vulnerable to — same pattern as EEG's _STATS_FUNCS).
+#
+# `mean`/`std` use NumPy with `ddof=0` (biased / population estimators).
+# `p10`/`p50`/`p90` use `np.percentile` default linear interpolation.
+# `voxel_count` is stored as float for column-uniformity in the eventual
+# Parquet, but always represents a whole number (assertable via
+# `v == float(int(v))`).
+_ROI_STATS_FUNCS: tuple[tuple[str, "object"], ...] = (
+    ("mean", lambda v: float(v.mean())),
+    ("std", lambda v: float(v.std())),
+    ("p10", lambda v: float(np.percentile(v, 10))),
+    ("p50", lambda v: float(np.percentile(v, 50))),
+    ("p90", lambda v: float(np.percentile(v, 90))),
+    ("voxel_count", lambda v: float(v.size)),
+)
+ROI_STATS: tuple[str, ...] = tuple(name for name, _ in _ROI_STATS_FUNCS)
 def _roi_stats_for(values: np.ndarray) -> dict[str, float]:
+    """Compute the ROI stats. Empty array → all 0.0 (no-NaN contract)."""
     if values.size == 0:
+        return {name: 0.0 for name, _ in _ROI_STATS_FUNCS}
+    return {name: fn(values) for name, fn in _ROI_STATS_FUNCS}
 def extract_features_from_volume(
     90th percentile / voxel count. Empty ROIs (no mask voxels) report all
     zeros so the resulting Parquet has no NaN values.
+    Statistical conventions:
+      - ``mean`` / ``std`` use ``ddof=0`` (biased / population estimators).
+      - ``p10`` / ``p50`` / ``p90`` use ``np.percentile`` with the default
+        linear interpolation.
+      - ``voxel_count`` is stored as float for column uniformity but always
+        represents a whole number.
     Args:
         volume: 3-D numeric `np.ndarray` (already validated).
         mask: Boolean `np.ndarray` of the same shape (from `mask_brain`).
     Returns:
         Flat dict `{"feat_roi{i}_{stat}": float}` of length
         ``prod(n_roi_axes) * len(ROI_STATS)``.
+    Raises:
+        ValueError: if `volume.shape` and `mask.shape` differ.
     """
+    if volume.shape != mask.shape:
+        raise ValueError(
+            f"volume.shape {volume.shape} != mask.shape {mask.shape}"
+        )
     feats: dict[str, float] = {}
     slices = _roi_slices(volume.shape, n_roi_axes)
     for i, sl in enumerate(slices):

tests/pipelines/test_mri_pipeline.py CHANGED Viewed

@@ -189,3 +189,17 @@ class TestExtractFeaturesFromVolume:
         a = extract_features_from_volume(vol, mask)
         b = extract_features_from_volume(vol, mask)
         assert a == b

         a = extract_features_from_volume(vol, mask)
         b = extract_features_from_volume(vol, mask)
         assert a == b
+    def test_roi_stats_labels_and_funcs_stay_in_sync(self) -> None:
+        """ROI_STATS labels must equal the names in _ROI_STATS_FUNCS — single source of truth."""
+        from src.pipelines.mri_pipeline import _ROI_STATS_FUNCS
+        derived_names = tuple(name for name, _ in _ROI_STATS_FUNCS)
+        assert derived_names == ROI_STATS
+    def test_raises_on_shape_mismatch(self) -> None:
+        """volume.shape and mask.shape must agree — the contract is enforced."""
+        vol = np.zeros((8, 8, 8), dtype=np.float64)
+        bad_mask = np.zeros((4, 4, 4), dtype=bool)
+        with pytest.raises(ValueError, match=r"volume\.shape .* != mask\.shape"):
+            extract_features_from_volume(vol, bad_mask)