File size: 1,530 Bytes
7ff7119 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | """Embedding model — sentence-transformers, runs locally, offline-friendly.
Default: ``BAAI/bge-m3`` (2.27 GB, 1024 dim, multilingual incl. EN/HU/DE/FR/...).
Pre-downloaded at Docker build time → no network call at runtime.
Implements LangChain's ``Embeddings`` interface so the Chroma store and the
RAG subgraph can use it natively.
"""
from __future__ import annotations
from functools import lru_cache
from langchain_core.embeddings import Embeddings
from sentence_transformers import SentenceTransformer
from config import settings
@lru_cache(maxsize=1)
def _get_model() -> SentenceTransformer:
"""Singleton model loader — first call ~2-5 seconds, subsequent calls instant."""
return SentenceTransformer(settings.embedding_model)
class SentenceTransformerEmbeddings(Embeddings):
"""LangChain Embeddings adapter on top of sentence-transformers."""
def embed_documents(self, texts: list[str]) -> list[list[float]]:
"""Batch-embed documents (faster than per-chunk encoding)."""
model = _get_model()
# convert_to_numpy=True → list[ndarray]; .tolist() → list[list[float]]
vectors = model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
return vectors.tolist()
def embed_query(self, text: str) -> list[float]:
"""Embed a single query (used by the chat search_documents tool)."""
return self.embed_documents([text])[0]
def build_embeddings() -> SentenceTransformerEmbeddings:
return SentenceTransformerEmbeddings()
|