"""Local retrieval index builder.""" from __future__ import annotations import json from pathlib import Path from app.models.retrieval.chunker import chunk_text def build_local_index(source_dir: Path, out_file: Path) -> int: docs: list[dict[str, str]] = [] for path in source_dir.rglob("*"): if path.is_file() and path.suffix.lower() in {".txt", ".md", ".json"}: text = path.read_text(encoding="utf-8", errors="ignore") for idx, chunk in enumerate(chunk_text(text)): docs.append({"id": f"{path.stem}_{idx}", "path": str(path), "text": chunk}) out_file.parent.mkdir(parents=True, exist_ok=True) out_file.write_text(json.dumps(docs, ensure_ascii=True, indent=2), encoding="utf-8") return len(docs)