Spaces:
Runtime error
Runtime error
| import asyncio | |
| import json | |
| import os | |
| import sys | |
| import numpy as np | |
| # Setup paths | |
| server_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) | |
| sys.path.append(server_dir) | |
| from app.services.vector_db import VectorDB | |
| from app.services.vector_operations import VectorOperations | |
| async def repair_index(): | |
| website_id = 22 | |
| metadata_path = os.path.join(server_dir, "vector_db", f"metadata_{website_id}.json") | |
| index_path = os.path.join(server_dir, "vector_db", f"index_{website_id}.faiss") | |
| print(f"Repairing index for website {website_id}...") | |
| if not os.path.exists(metadata_path): | |
| print(f"Error: Metadata not found at {metadata_path}") | |
| return | |
| with open(metadata_path, 'r') as f: | |
| metadata_list = json.load(f) | |
| print(f"Found {len(metadata_list)} chunks in metadata.") | |
| new_embeddings = [] | |
| for i, meta in enumerate(metadata_list): | |
| text = meta.get('text', meta.get('content', '')) | |
| if not text: | |
| print(f"Warning: Empty text in chunk {i}") | |
| continue | |
| if i % 5 == 0: | |
| print(f"Embedding chunk {i}/{len(metadata_list)}...") | |
| # Use is_query=False to apply "passage: " prefix for indexing | |
| emb = await VectorOperations.get_embedding(text, is_query=False) | |
| new_embeddings.append(emb) | |
| if not new_embeddings: | |
| print("No embeddings generated.") | |
| return | |
| print("Saving to new FAISS index...") | |
| vdb = VectorDB() | |
| # Delete old index to ensure a fresh start in memory cache | |
| vdb.delete_index(website_id) | |
| # Re-initialize for adding | |
| vdb.add_vectors(np.array(new_embeddings, dtype=np.float32), metadata_list, website_id) | |
| vdb.save(website_id) | |
| print(f"✓ Repair complete. New index saved to {index_path}") | |
| if __name__ == "__main__": | |
| asyncio.run(repair_index()) | |