| import sys |
| import h5py |
| import numpy as np |
| from pathlib import Path |
|
|
| |
| |
| |
| ALGONAUTS_REPO = Path("/raid/lttung05/fmri_encoder/code/algonauts2025") |
| FMRI_PATH = Path("/raid/lttung05/fmri_encoder/data/fmri/algonauts_2025.competitors") |
| FEAT_PATH = Path("/raid/lttung05/fmri_encoder/data/features") |
| |
|
|
| |
| |
| |
| SUBJECTS = [1, 2, 3, 5] |
| MODEL_NAME = "Llama-3.2-3B" |
| LAYER = "model.layers.11" |
| ALPHAS = np.logspace(-2, 10, 20) |
|
|
| TRAIN_SEASONS = list(range(1, 5)) |
| ALPHA_SEASONS = [5] |
| VAL_SEASONS = [6] |
| TRAIN_MOVIES = ["bourne", "wolf"] |
|
|
| |
| |
| |
| sys.path.insert(0, str(ALGONAUTS_REPO)) |
|
|
| from src.data import ( |
| load_algonauts2025_friends_fmri, |
| load_algonauts2025_movie10_fmri, |
| episode_filter, |
| ) |
|
|
| |
|
|
| |
| |
| |
|
|
| class FeatureStore: |
| """Reads one episode from its HDF5 shard on demand; holds no arrays. |
| |
| Parameters |
| ---------- |
| root : base features directory (e.g. .../features) |
| model : model name (e.g. "Llama-3.2-3B") |
| layer : HDF5 key (e.g. "model.layers.11") |
| series : "friends" | "movie10" | "ood" |
| """ |
|
|
| def __init__(self, root: Path, model: str, layer: str, series: str): |
| self._layer = layer |
| self._paths: dict[str, Path] = {} |
|
|
| for path in sorted((root / model / series).rglob("*.h5")): |
| if path.stem.endswith("_video"): |
| ep = path.stem.split("-")[-1].split("_")[0] |
| else: |
| ep = path.stem.split("_")[-1] |
| self._paths[ep] = path |
|
|
| def __getitem__(self, ep: "str | tuple") -> np.ndarray: |
| key = ep[0] if isinstance(ep, tuple) else ep |
| with h5py.File(self._paths[key]) as f: |
| return f[self._layer][:].squeeze().astype(np.float32) |
|
|
| def __contains__(self, ep: "str | tuple") -> bool: |
| key = ep[0] if isinstance(ep, tuple) else ep |
| return key in self._paths |
|
|
| def keys(self): |
| return self._paths.keys() |
|
|
|
|
| |
| |
| |
| print("Loading fMRI data...") |
|
|
| friends_fmri = load_algonauts2025_friends_fmri( |
| root=FMRI_PATH, |
| subjects=SUBJECTS, |
| seasons=TRAIN_SEASONS + ALPHA_SEASONS + VAL_SEASONS, |
| ) |
| movie10_fmri = load_algonauts2025_movie10_fmri( |
| root=FMRI_PATH, |
| subjects=SUBJECTS, |
| movies=TRAIN_MOVIES, |
| runs=[1], |
| ) |
| all_fmri = {**friends_fmri, **movie10_fmri} |
| print(f" {len(friends_fmri)} Friends + {len(movie10_fmri)} Movie10 episodes") |
|
|
| |
| _sample = next(iter(all_fmri.values())) |
| n_subs, _, n_parcels = _sample.shape |
| print(f" n_subs={n_subs}, n_parcels={n_parcels}") |
|
|
| |
| |
| |
| print(f"\nBuilding FeatureStore: {MODEL_NAME} / {LAYER}") |
| _friends_store = FeatureStore(FEAT_PATH, MODEL_NAME, LAYER, "friends") |
| _movie10_store = FeatureStore(FEAT_PATH, MODEL_NAME, LAYER, "movie10") |
|
|
|
|
| def get_features(ep) -> np.ndarray: |
| """Load features for one episode (float32, shape: time Γ feat_dim).""" |
| if ep in _friends_store: |
| return _friends_store[ep] |
| return _movie10_store[ep] |
|
|
|
|
| |
| feat_dim = get_features(next(iter(all_fmri))).shape[-1] |
| print(f" feat_dim={feat_dim}") |
|
|
| |
| |
| |
| all_episodes = sorted(all_fmri.keys(), key=str) |
|
|
| fit_filter = episode_filter(seasons=TRAIN_SEASONS, movies=TRAIN_MOVIES, runs=[1]) |
| alpha_filter = episode_filter(seasons=ALPHA_SEASONS, movies=[], runs=[1]) |
| val_filter = episode_filter(seasons=VAL_SEASONS, movies=[], runs=[1]) |
|
|
| fit_episodes = [ep for ep in all_episodes if fit_filter(ep)] |
| alpha_episodes = [ep for ep in all_episodes if alpha_filter(ep)] |
| val_episodes = [ep for ep in all_episodes if val_filter(ep)] |
|
|
| print(f"\nFit episodes: {len(fit_episodes)}") |
| print(f"Alpha episodes: {len(alpha_episodes)}") |
| print(f"Val episodes: {len(val_episodes)}") |