| """Fastembed wrapper — ONNX-based, CPU-only, no torch dep. | |
| Public entry: `Embedder().encode(texts) -> np.ndarray[N, D]`. Model is | |
| loaded lazily on first call. Output is float32 to match FAISS's expected | |
| input dtype. | |
| """ | |
| from __future__ import annotations | |
| import numpy as np | |
| from src.core.logger import get_logger | |
| logger = get_logger(__name__) | |
| # bge-small-en-v1.5: 384-dim, ~33MB ONNX, MTEB top-tier for size class. | |
| _MODEL_NAME = "BAAI/bge-small-en-v1.5" | |
| EMBEDDING_DIM = 384 | |
| class Embedder: | |
| """Lazy-loaded fastembed wrapper. One instance per process is enough.""" | |
| def __init__(self, model_name: str = _MODEL_NAME) -> None: | |
| self._model_name = model_name | |
| self._model = None # lazy-loaded on first encode() | |
| def _ensure_model(self) -> None: | |
| if self._model is None: | |
| from fastembed import TextEmbedding | |
| logger.info("Loading fastembed model %s (one-time)", self._model_name) | |
| self._model = TextEmbedding(model_name=self._model_name) | |
| def encode(self, texts: list[str]) -> np.ndarray: | |
| if not texts: | |
| return np.zeros((0, EMBEDDING_DIM), dtype=np.float32) | |
| self._ensure_model() | |
| embeddings = list(self._model.embed(texts)) | |
| return np.array(embeddings, dtype=np.float32) | |