| """ |
| Indexer Module for Module C |
| Ingests templates from the data directory into the Vector DB. |
| """ |
|
|
| import logging |
| import sys |
| from pathlib import Path |
|
|
| |
| sys.path.append(str(Path(__file__).parent.parent)) |
|
|
| from module_c.config import TEMPLATE_DIR |
| from module_c.template_loader import TemplateLoader |
| from module_c.vector_db import TemplateVectorDB |
| from module_a.embeddings import EmbeddingGenerator |
|
|
| |
| logging.basicConfig(level=logging.INFO) |
| logger = logging.getLogger(__name__) |
|
|
| def build_index(): |
| logger.info("Starting Template Indexing...") |
| |
| |
| loader = TemplateLoader(TEMPLATE_DIR) |
| template_files = loader.list_templates() |
| |
| if not template_files: |
| logger.warning("No templates found to index.") |
| return |
|
|
| templates_data = [] |
| texts = [] |
| |
| for filename in template_files: |
| content = loader.load_template(filename) |
| placeholders = list(loader.extract_placeholders(content)) |
| |
| |
| |
| |
| text_for_embedding = f"Template Name: {filename}\nContent:\n{content}" |
| |
| templates_data.append({ |
| "id": filename, |
| "text": content, |
| "metadata": { |
| "filename": filename, |
| "placeholders": ", ".join(placeholders) |
| } |
| }) |
| texts.append(text_for_embedding) |
| logger.info(f"Loaded: {filename}") |
|
|
| |
| logger.info("Generating embeddings...") |
| embedder = EmbeddingGenerator() |
| embeddings = embedder.generate_embeddings_batch(texts) |
| |
| |
| logger.info("Storing in Vector DB...") |
| db = TemplateVectorDB() |
| |
| |
| |
| |
| |
| |
| try: |
| db.client.delete_collection(db.collection_name) |
| db.collection = db.client.create_collection(db.collection_name) |
| except Exception: |
| pass |
|
|
| db.add_templates(templates_data, embeddings.tolist()) |
| |
| logger.info("Indexing Complete!") |
|
|
| if __name__ == "__main__": |
| build_index() |
|
|