File size: 1,466 Bytes
6d9c72b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
"""Sentence-transformers wrapper for async embedding generation."""
import asyncio
import logging
from typing import Any

from sentence_transformers import SentenceTransformer

logger = logging.getLogger(__name__)


class Embedder:
    """Async-safe wrapper for sentence-transformers."""

    def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
        self._model_name = model_name
        self._model: SentenceTransformer | None = None
        self._lock = asyncio.Lock()

    async def load(self) -> None:
        """Load the embedding model (lazy initialization)."""
        if self._model is None:
            async with self._lock:
                if self._model is None:
                    logger.info(f"Loading embedder model: {self._model_name}")
                    self._model = SentenceTransformer(self._model_name)

    async def encode(self, text: str) -> list[float]:
        """Encode text to embedding vector."""
        await self.load()
        loop = asyncio.get_event_loop()
        embedding = await loop.run_in_executor(
            None, self._model.encode, text
        )
        return embedding.tolist()

    async def encode_batch(self, texts: list[str]) -> list[list[float]]:
        """Encode multiple texts."""
        await self.load()
        loop = asyncio.get_event_loop()
        embeddings = await loop.run_in_executor(
            None, self._model.encode, texts
        )
        return [e.tolist() for e in embeddings]