import numpy as np from typing import List, Tuple, Dict from .openai_utils.embedding import EmbeddingModel class VectorDatabase: def __init__(self, embedding_model: EmbeddingModel = None): self.vectors: Dict[str, np.ndarray] = {} self.texts: List[str] = [] self.embedding_model = embedding_model or EmbeddingModel() async def abuild_from_list(self, list_of_text: List[str]) -> 'VectorDatabase': embeddings = await self.embedding_model.async_get_embeddings(list_of_text) for text, embedding in zip(list_of_text, embeddings): self.insert(text, np.array(embedding)) return self def insert(self, text: str, vector: np.ndarray): self.texts.append(text) self.vectors[text] = vector def search_by_text(self, query: str, k: int = 4) -> List[Tuple[str, float]]: query_embedding = self.embedding_model.get_embedding(query) similarities = [] for text, vector in self.vectors.items(): similarity = np.dot(query_embedding, vector) / (np.linalg.norm(query_embedding) * np.linalg.norm(vector)) similarities.append((text, similarity)) return sorted(similarities, key=lambda x: x[1], reverse=True)[:k]