Upload brain_virality_predictor/features.py with huggingface_hub
Browse files
brain_virality_predictor/features.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Distill 8 temporal UX signals into a 40-feature vector per video.
|
| 3 |
+
|
| 4 |
+
Per signal (8 signals × 5 features = 40):
|
| 5 |
+
1. mean — average activation
|
| 6 |
+
2. peak — maximum activation
|
| 7 |
+
3. variability — std dev of activation
|
| 8 |
+
4. hook — mean of first 4.5 seconds (first impression)
|
| 9 |
+
5. slope — linear trend (coef) across full video
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import numpy as np
|
| 13 |
+
from typing import Dict, List, Tuple
|
| 14 |
+
|
| 15 |
+
SIGNAL_NAMES = [
|
| 16 |
+
"aesthetic_appeal",
|
| 17 |
+
"visual_fluency",
|
| 18 |
+
"cognitive_load",
|
| 19 |
+
"trust_affinity",
|
| 20 |
+
"reward_anticipation",
|
| 21 |
+
"motor_readiness",
|
| 22 |
+
"surprise_novelty",
|
| 23 |
+
"friction_anxiety",
|
| 24 |
+
]
|
| 25 |
+
|
| 26 |
+
FEATURE_NAMES: List[str] = []
|
| 27 |
+
for sig in SIGNAL_NAMES:
|
| 28 |
+
for stat in ["mean", "peak", "variability", "hook", "slope"]:
|
| 29 |
+
FEATURE_NAMES.append(f"{sig}__{stat}")
|
| 30 |
+
|
| 31 |
+
assert len(FEATURE_NAMES) == 40
|
| 32 |
+
|
| 33 |
+
HOOK_SECONDS = 4.5 # first impression window
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def extract_features(signals: Dict[str, np.ndarray], tr: float = 1.5) -> np.ndarray:
|
| 37 |
+
"""
|
| 38 |
+
signals: {signal_name: (n_timesteps,) array}
|
| 39 |
+
Returns: (40,) feature vector, ordered per FEATURE_NAMES
|
| 40 |
+
"""
|
| 41 |
+
feats = []
|
| 42 |
+
t = None
|
| 43 |
+
for sig_name in SIGNAL_NAMES:
|
| 44 |
+
ts = signals.get(sig_name, np.zeros(1))
|
| 45 |
+
if t is None:
|
| 46 |
+
t = np.arange(len(ts)) * tr
|
| 47 |
+
|
| 48 |
+
mean = float(np.mean(ts))
|
| 49 |
+
peak = float(np.max(ts))
|
| 50 |
+
variability = float(np.std(ts))
|
| 51 |
+
|
| 52 |
+
# hook = mean over first 4.5s
|
| 53 |
+
hook_idx = max(1, int(np.ceil(HOOK_SECONDS / tr)))
|
| 54 |
+
hook = float(np.mean(ts[:hook_idx]))
|
| 55 |
+
|
| 56 |
+
# slope = linear regression coefficient over time
|
| 57 |
+
if len(ts) > 1:
|
| 58 |
+
slope = float(np.polyfit(t[:len(ts)], ts, 1)[0])
|
| 59 |
+
else:
|
| 60 |
+
slope = 0.0
|
| 61 |
+
|
| 62 |
+
feats.extend([mean, peak, variability, hook, slope])
|
| 63 |
+
|
| 64 |
+
return np.array(feats, dtype=np.float32)
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def feature_vector_to_dict(vec: np.ndarray) -> Dict[str, float]:
|
| 68 |
+
"""Convert flat (40,) back to named dict for interpretability."""
|
| 69 |
+
return {name: float(val) for name, val in zip(FEATURE_NAMES, vec)}
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def top_positive_negative(feat_dict: Dict[str, float], n: int = 3) -> Tuple[List[str], List[str]]:
|
| 73 |
+
"""Return (top_n_positive_features, top_n_negative_features) by value."""
|
| 74 |
+
sorted_items = sorted(feat_dict.items(), key=lambda x: x[1], reverse=True)
|
| 75 |
+
pos = [k for k, v in sorted_items[:n]]
|
| 76 |
+
neg = [k for k, v in sorted_items[-n:]]
|
| 77 |
+
return pos, neg
|