| import uuid |
| from typing import Literal, Any |
|
|
| import pandas as pd |
| from qdrant_client import models |
|
|
| from src.config import qdrant_client |
|
|
|
|
| def qdrant_create_index( |
| index_name: str, |
| dim: int, |
| distance: Literal["cosine", "euclid", "manhattan"], |
| ): |
| distance_mode = None |
| match distance: |
| case "cosine": |
| distance_mode = models.Distance.COSINE |
| case "euclid": |
| distance_mode = models.Distance.EUCLID |
| case "manhattan": |
| distance_mode = models.Distance.MANHATTAN |
| case _: |
| return ValueError(distance) |
| |
| return qdrant_client.create_collection( |
| collection_name=index_name, |
| vectors_config=models.VectorParams( |
| size=dim, |
| distance=distance_mode, |
| ) |
| ) |
|
|
|
|
| def qdrant_insert(df: pd.DataFrame, index_name: str) -> Any: |
| """ |
| df.columns == ["doc_id", "text", "vector"] |
| """ |
| points = [ |
| models.PointStruct( |
| id=str(uuid.uuid4()), |
| vector=list(row.vector), |
| payload={ |
| "doc_id": row.doc_id, |
| "text": row.text, |
| }, |
| ) for row in df.itertuples(index=False) |
| ] |
|
|
| return qdrant_client.upsert(collection_name=index_name, points=points) |
|
|
|
|
| def qdrant_search(index_name: str, vector: list, limit: int = 5) -> list: |
| return qdrant_client.query_points( |
| collection_name=index_name, |
| query=vector, |
| limit=limit, |
| ) |
|
|