ek100-retrieval-demo / data_loader.py
jsurrea's picture
Update demo: text-to-narration retrieval, add precomputed assets
3671814 verified
"""
Loads pre-computed narration embeddings and metadata from the Space repo.
No GPU, no video downloads needed at runtime.
"""
import json
import numpy as np
from pathlib import Path
_PRECOMPUTED = Path(__file__).parent / "precomputed"
_metadata: list | None = None
_embeddings: np.ndarray | None = None
def get_demo_assets() -> tuple[list, np.ndarray]:
"""Returns (narrations_metadata list, narration_embeddings ndarray [N, 256])."""
global _metadata, _embeddings
if _metadata is not None and _embeddings is not None:
return _metadata, _embeddings
emb_path = _PRECOMPUTED / "narration_embeddings.npy"
meta_path = _PRECOMPUTED / "narrations_metadata.json"
if not emb_path.exists() or not meta_path.exists():
raise FileNotFoundError(
f"Pre-computed assets not found in {_PRECOMPUTED}. "
"Make sure precomputed/ folder is included in the Space repo."
)
_embeddings = np.load(str(emb_path))
with open(meta_path) as f:
_metadata = json.load(f)
print(f"Loaded {len(_metadata)} narrations, embeddings {_embeddings.shape}")
return _metadata, _embeddings