cortexlab-dashboard / synthetic.py
SID2000's picture
Upload folder using huggingface_hub
f2e4921 verified
"""Biologically realistic synthetic fMRI data generation.
Generates data with hemodynamic response convolution, modality-specific
activation patterns, spatial autocorrelation, temporal noise structure,
and scanner drift - mimicking real fMRI recordings.
"""
import numpy as np
# --- Hemodynamic Response Function ---
def generate_hrf(tr_seconds=1.0, duration=30.0):
"""Canonical double-gamma hemodynamic response function.
Models the BOLD signal: a positive peak at ~5-6s followed by a
smaller negative undershoot at ~15s.
"""
t = np.arange(0, duration, tr_seconds)
# Double gamma parameters (SPM canonical)
a1, b1 = 6.0, 1.0 # positive peak
a2, b2 = 16.0, 1.0 # undershoot
c = 1.0 / 6.0 # undershoot ratio
from scipy.stats import gamma as gamma_dist
h = gamma_dist.pdf(t, a1, scale=b1) - c * gamma_dist.pdf(t, a2, scale=b2)
h = h / np.max(np.abs(h)) # normalize to [-1, 1]
return h
def generate_stimulus_events(n_timepoints, tr_seconds=1.0, n_events=5, seed=42):
"""Generate random stimulus onset times as a binary event train.
Returns a (n_timepoints,) array with 1s at stimulus onsets.
Events are spaced at least 8 seconds apart.
"""
rng = np.random.default_rng(seed)
total_seconds = n_timepoints * tr_seconds
min_gap = 8.0 # minimum inter-stimulus interval
events = np.zeros(n_timepoints)
onsets = []
attempts = 0
while len(onsets) < n_events and attempts < 1000:
t = rng.uniform(2.0, total_seconds - 10.0)
if all(abs(t - o) > min_gap for o in onsets):
onsets.append(t)
attempts += 1
for onset in onsets:
idx = int(onset / tr_seconds)
if 0 <= idx < n_timepoints:
events[idx] = 1.0
return events
# --- Modality-Specific Activation Weights ---
# Weight for each ROI given a stimulus modality (0 = no response, 1 = maximum)
MODALITY_WEIGHTS = {
"visual": {
# Strong visual cortex activation
"V1": 1.0, "V2": 0.95, "V3": 0.85, "V4": 0.8,
"MT": 0.75, "MST": 0.7, "FFC": 0.65, "VVC": 0.6,
# Weak cross-modal
"A1": 0.05, "LBelt": 0.04, "MBelt": 0.03, "PBelt": 0.03, "A4": 0.02, "A5": 0.02,
# Minimal language
"44": 0.08, "45": 0.07, "IFJa": 0.06, "IFJp": 0.05,
"TPOJ1": 0.1, "TPOJ2": 0.08, "STV": 0.07, "PSL": 0.06,
# Moderate executive (attention)
"46": 0.3, "9-46d": 0.25, "8Av": 0.35, "8Ad": 0.3,
"FEF": 0.4, "p32pr": 0.15, "a32pr": 0.12,
},
"auditory": {
"V1": 0.03, "V2": 0.03, "V3": 0.02, "V4": 0.02,
"MT": 0.02, "MST": 0.01, "FFC": 0.01, "VVC": 0.01,
"A1": 1.0, "LBelt": 0.95, "MBelt": 0.9, "PBelt": 0.85, "A4": 0.75, "A5": 0.7,
"44": 0.15, "45": 0.12, "IFJa": 0.1, "IFJp": 0.08,
"TPOJ1": 0.25, "TPOJ2": 0.2, "STV": 0.3, "PSL": 0.2,
"46": 0.2, "9-46d": 0.15, "8Av": 0.12, "8Ad": 0.1,
"FEF": 0.08, "p32pr": 0.1, "a32pr": 0.08,
},
"language": {
"V1": 0.05, "V2": 0.04, "V3": 0.03, "V4": 0.03,
"MT": 0.02, "MST": 0.02, "FFC": 0.1, "VVC": 0.08,
"A1": 0.3, "LBelt": 0.25, "MBelt": 0.2, "PBelt": 0.15, "A4": 0.2, "A5": 0.15,
"44": 1.0, "45": 0.95, "IFJa": 0.85, "IFJp": 0.8,
"TPOJ1": 0.9, "TPOJ2": 0.85, "STV": 0.75, "PSL": 0.7,
"46": 0.5, "9-46d": 0.45, "8Av": 0.3, "8Ad": 0.25,
"FEF": 0.15, "p32pr": 0.35, "a32pr": 0.3,
},
"multimodal": {
"V1": 0.7, "V2": 0.65, "V3": 0.55, "V4": 0.5,
"MT": 0.5, "MST": 0.45, "FFC": 0.4, "VVC": 0.35,
"A1": 0.7, "LBelt": 0.65, "MBelt": 0.55, "PBelt": 0.5, "A4": 0.45, "A5": 0.4,
"44": 0.65, "45": 0.6, "IFJa": 0.5, "IFJp": 0.45,
"TPOJ1": 0.6, "TPOJ2": 0.55, "STV": 0.5, "PSL": 0.45,
"46": 0.4, "9-46d": 0.35, "8Av": 0.3, "8Ad": 0.25,
"FEF": 0.3, "p32pr": 0.25, "a32pr": 0.2,
},
}
def generate_realistic_predictions(
n_timepoints,
roi_indices,
stimulus_type="visual",
tr_seconds=1.0,
n_events=5,
snr=2.0,
seed=42,
):
"""Generate biologically realistic fMRI-like predictions.
Parameters
----------
n_timepoints : int
Number of TRs.
roi_indices : dict[str, np.ndarray]
ROI name -> vertex indices mapping.
stimulus_type : str
One of "visual", "auditory", "language", "multimodal".
tr_seconds : float
Repetition time in seconds.
n_events : int
Number of stimulus events.
snr : float
Signal-to-noise ratio (higher = cleaner signal).
seed : int
Random seed.
"""
rng = np.random.default_rng(seed)
n_vertices = max(max(v) for v in roi_indices.values()) + 1
predictions = np.zeros((n_timepoints, n_vertices))
# 1. Generate stimulus-evoked signal
events = generate_stimulus_events(n_timepoints, tr_seconds, n_events, seed)
hrf = generate_hrf(tr_seconds)
# Convolve events with HRF
bold_signal = np.convolve(events, hrf)[:n_timepoints]
# 2. Apply modality-specific weights per ROI
weights = MODALITY_WEIGHTS.get(stimulus_type, MODALITY_WEIGHTS["multimodal"])
for roi_name, vertices in roi_indices.items():
w = weights.get(roi_name, 0.1)
# Add per-ROI latency jitter (higher-order areas respond later)
latency_shift = 0
if roi_name in ["44", "45", "IFJa", "IFJp", "46", "9-46d"]:
latency_shift = int(2.0 / tr_seconds) # ~2s later for association cortex
elif roi_name in ["TPOJ1", "TPOJ2", "STV", "PSL"]:
latency_shift = int(1.5 / tr_seconds)
shifted = np.roll(bold_signal, latency_shift) * w
# Add per-vertex variation within ROI
for v in vertices:
if v < n_vertices:
vertex_scale = 0.8 + 0.4 * rng.random()
predictions[:, v] = shifted * vertex_scale
# 3. Add temporal autocorrelation (AR(1) noise)
ar_coeff = 0.5
noise = rng.standard_normal(predictions.shape)
for t in range(1, n_timepoints):
noise[t] += ar_coeff * noise[t - 1]
# 4. Add scanner drift (low-frequency sinusoidal)
t_axis = np.arange(n_timepoints) * tr_seconds
drift = 0.1 * np.sin(2 * np.pi * t_axis / (n_timepoints * tr_seconds * 0.8))
drift = drift[:, np.newaxis]
# 5. Combine signal + noise + drift
signal_power = np.std(predictions[predictions != 0]) if np.any(predictions != 0) else 1.0
noise_power = signal_power / max(snr, 0.1)
predictions = predictions + noise * noise_power + drift
# 6. Spatial smoothing (average with neighbors within same ROI)
for roi_name, vertices in roi_indices.items():
valid = vertices[vertices < n_vertices]
if len(valid) > 1:
roi_data = predictions[:, valid].copy()
kernel = np.ones(min(3, len(valid))) / min(3, len(valid))
for t in range(n_timepoints):
predictions[t, valid] = np.convolve(roi_data[t], kernel, mode="same")
return predictions
def generate_correlated_features(
brain_predictions,
alignment_strength=0.5,
feature_dim=512,
seed=42,
):
"""Generate model features with controllable correlation to brain data.
Parameters
----------
brain_predictions : np.ndarray
Brain data of shape (n_stimuli, n_vertices).
alignment_strength : float
0.0 = random features, 1.0 = perfectly correlated with brain.
feature_dim : int
Output feature dimensionality.
seed : int
Random seed.
Returns
-------
np.ndarray
Features of shape (n_stimuli, feature_dim).
"""
rng = np.random.default_rng(seed)
n_stimuli = brain_predictions.shape[0]
# Project brain data to feature_dim via random projection
n_vertices = brain_predictions.shape[1]
projection = rng.standard_normal((n_vertices, feature_dim)) / np.sqrt(n_vertices)
brain_projected = brain_predictions @ projection
# Generate random features
random_features = rng.standard_normal((n_stimuli, feature_dim))
# Mix: strength controls brain-alignment vs randomness
strength = np.clip(alignment_strength, 0.0, 1.0)
features = strength * brain_projected + (1 - strength) * random_features
# Standardize
features = (features - features.mean(axis=0)) / (features.std(axis=0) + 1e-8)
return features