| """ | |
| Phase 0 ingestion: RERA Act 2016 (Central). | |
| Idempotent — safe to re-run, upserts on conflict. | |
| Run: | |
| uv run python scripts/ingest_phase0.py | |
| """ | |
| from __future__ import annotations | |
| import asyncio | |
| import sys | |
| from pathlib import Path | |
| sys.path.insert(0, str(Path(__file__).parent.parent / "src")) | |
| import structlog | |
| from civicsetu.config.document_registry import DOCUMENT_REGISTRY | |
| from civicsetu.ingestion.pipeline import IngestionPipeline | |
| from civicsetu.ingestion.graph_seeder import GraphSeeder | |
| log = structlog.get_logger(__name__) | |
| def main() -> None: | |
| spec = DOCUMENT_REGISTRY["rera_act_2016"] | |
| pipeline = IngestionPipeline() | |
| doc = pipeline.ingest_document( | |
| source_url=spec.url, | |
| doc_name=spec.name, | |
| jurisdiction=spec.jurisdiction, | |
| doc_type=spec.doc_type, | |
| effective_date=spec.effective_date, | |
| dest_subdir=spec.dest_subdir, | |
| filename=spec.filename, | |
| ) | |
| log.info("phase0_ingest_complete", doc_id=str(doc.doc_id), chunks=doc.total_chunks) | |
| stats = asyncio.run(GraphSeeder.seed_from_postgres(doc_id=str(doc.doc_id))) | |
| log.info("phase0_graph_seed_complete", stats=stats) | |
| if __name__ == "__main__": | |
| main() | |