polyguard-openenv / scripts /build_retrieval_index.py
TheJackBright's picture
Deploy PolyGuard OpenEnv Space
877add7 verified
#!/usr/bin/env python3
"""Build retrieval index from local corpus."""
from __future__ import annotations
from pathlib import Path
from app.models.retrieval.index import build_local_index
def main() -> None:
root = Path(__file__).resolve().parents[1]
source_dir = root / "data" / "raw"
out_file = root / "data" / "retrieval_index" / "index.json"
count = build_local_index(source_dir=source_dir, out_file=out_file)
print(f"retrieval_chunks={count}")
if __name__ == "__main__":
main()