Spaces:
Running
Running
| import numpy as np | |
| from sentence_transformers import SentenceTransformer | |
| TOP_K = 5 | |
| class ChunkRetriever: | |
| """Stage 1 Bi-Encoder: quickly narrows down hundreds of chunks | |
| to the few that are actually semantically relevant to the LLM output.""" | |
| def __init__(self): | |
| self.model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
| print("Retriever (MiniLM-L6-v2) loaded.") | |
| def get_top_chunks(self, llm_output: str, chunks: list[str], top_k: int = TOP_K) -> list[str]: | |
| """Embeds everything, ranks by cosine similarity, returns the top_k chunks.""" | |
| if len(chunks) <= top_k: | |
| return chunks | |
| query_embedding = self.model.encode(llm_output, normalize_embeddings=True) | |
| chunk_embeddings = self.model.encode(chunks, normalize_embeddings=True, batch_size=32) | |
| # cosine sim is just dot product when vectors are already L2-normalized | |
| similarities = np.dot(chunk_embeddings, query_embedding) | |
| top_indices = np.argsort(similarities)[::-1][:top_k] | |
| return [chunks[i] for i in top_indices] | |