File size: 2,923 Bytes
fce9fad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# scripts/ingest_phase5.py
"""
Phase 5 ingestion — Karnataka RERA:
  1. Karnataka RERA Rules 2017

Run:
    uv run python scripts/ingest_phase5.py
"""
from __future__ import annotations

import asyncio
import sys
from pathlib import Path

sys.path.insert(0, str(Path(__file__).parent.parent / "src"))

import structlog
from civicsetu.config.document_registry import DOCUMENT_REGISTRY
from civicsetu.ingestion.pipeline import IngestionPipeline
from civicsetu.ingestion.graph_seeder import GraphSeeder

log = structlog.get_logger(__name__)

# Karnataka Rule → RERA Act Section (hand-mapped)
# Karnataka Rules 2017 implements RERA Act 2016 under Section 84
_KA_DERIVED_FROM_MAP = {
    "3":  "4",   # Application for registration of project → promoter obligations
    "4":  "5",   # Grant / rejection of registration
    "5":  "6",   # Extension of registration
    "6":  "7",   # Revocation of registration
    "7":  "9",   # Registration of real estate agents
    "8":  "9",   # Grant / rejection of agent registration
    "9":  "10",  # Functions of real estate agents
    "10": "11",  # Agreement for Sale
    "11": "13",  # Responsibility of promoter post-possession
    "12": "17",  # Conveyance deed
    "13": "18",  # Timelines for refund
    "14": "19",  # Rate of interest
    "15": "25",  # Functions of regulatory authority
    "16": "31",  # Complaints to authority
    "17": "43",  # Appellate Tribunal composition
    "18": "58",  # Offences by companies
    "19": "66",  # Compounding of offences
}


def main() -> None:
    pipeline = IngestionPipeline()
    spec = DOCUMENT_REGISTRY["karnataka_rera_rules_2017"]

    log.info("phase5_ingest_start", doc=spec.name)
    doc = pipeline.ingest_document(
        source_url=spec.url,
        doc_name=spec.name,
        jurisdiction=spec.jurisdiction,
        doc_type=spec.doc_type,
        effective_date=spec.effective_date,
        dest_subdir=spec.dest_subdir,
        filename=spec.filename,
        max_pages=spec.max_pages,
    )
    log.info("phase5_ingest_complete",
             doc=spec.name, chunks=doc.total_chunks, doc_id=str(doc.doc_id))

    if doc.total_chunks == 0:
        print("\n✗ ABORT: 0 chunks — PDF is likely fully scanned.")
        print("  Fallback: use NAREDCO mirror and re-run.")
        print("  URL: https://naredco.in/notification/pdfs/Karnataka%20Real%20Estate%20(Regulation%20and%20Development)%20Rules,%202017.pdf")
        return

    stats = asyncio.run(GraphSeeder.seed_from_postgres(doc_id=str(doc.doc_id)))

    print("\n" + "=" * 60)
    print("Phase 5 — Karnataka RERA ingestion complete")
    print(f"  Chunks       : {doc.total_chunks}")
    print(f"  Graph stats  : {stats}")
    print("=" * 60)

    if doc.total_chunks < 10:
        print("\n⚠ WARNING: Very few chunks — check scanned_pages in log above")
    else:
        print("\n✓ Karnataka wired.")


if __name__ == "__main__":
    main()