| """Vector store module for document embedding and retrieval""" |
|
|
| from typing import List |
| from langchain_community.vectorstores import FAISS |
| from langchain_openai import OpenAIEmbeddings |
| from langchain_community.embeddings import HuggingFaceEmbeddings |
| |
| from langchain_core.documents import Document |
|
|
|
|
| class VectorStore: |
| """Manages vector store operations""" |
| |
| def __init__(self): |
| """Initialize vector store with OpenAI embeddings""" |
| |
| |
| self.embeddings = HuggingFaceEmbeddings( |
| model_name="sentence-transformers/all-MiniLM-L6-v2", |
| model_kwargs={"device": "cpu"}, |
| encode_kwargs={"normalize_embeddings": True} |
| ) |
| self.vectorstore = None |
| self.retriever = None |
| |
| def create_vectorstore(self, documents: List[Document]): |
| """ |
| Create vector store from documents |
| |
| Args: |
| documents: List of documents to embed |
| """ |
| self.vectorstore = FAISS.from_documents(documents, self.embedding) |
| self.retriever = self.vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 5, "lambda_mult":0.25}) |
| |
| def get_retriever(self): |
| """ |
| Get the retriever instance |
| |
| Returns: |
| Retriever instance |
| """ |
| if self.retriever is None: |
| raise ValueError("Vector store not initialized. Call create_vectorstore first.") |
| return self.retriever |
| |
| def retrieve(self, query: str, k: int = 4) -> List[Document]: |
| """ |
| Retrieve relevant documents for a query |
| |
| Args: |
| query: Search query |
| k: Number of documents to retrieve |
| |
| Returns: |
| List of relevant documents |
| """ |
| if self.retriever is None: |
| raise ValueError("Vector store not initialized. Call create_vectorstore first.") |
| return self.retriever.invoke(query) |