File size: 2,640 Bytes
661eb14
c7224df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
"""Step 11 — runs the full pipeline and writes data/precomputed/*.json."""

import json
import sys
from pathlib import Path

BASE_DIR = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(BASE_DIR))

from core.config import DATA_DIR, DEEPSEEK_API_KEY, PRECOMPUTED_DIR
from core.criteria_extractor import extract_criteria
from core.bidder_processor import process_bidder
from core.evaluator import evaluate_bidder
from core.fallback import _HARDCODED_CRITERIA
from core.schemas import Criterion


def main() -> None:
    if not DEEPSEEK_API_KEY:
        print("ERROR: DEEPSEEK_API_KEY is not set.")
        print("Set it in .env or export it before running this script.")
        sys.exit(1)

    PRECOMPUTED_DIR.mkdir(parents=True, exist_ok=True)

    # Step 1 — Extract criteria
    tender_path = DATA_DIR / "tender" / "crpf_construction_tender.pdf"
    print(f"Extracting criteria from {tender_path.name}...")
    try:
        criteria = extract_criteria(tender_path)
        print(f"  Got {len(criteria)} criteria from LLM.")
    except Exception as e:
        print(f"  LLM extraction failed ({e}), using hardcoded criteria.")
        criteria = [Criterion(**c) for c in _HARDCODED_CRITERIA]

    criteria_file = PRECOMPUTED_DIR / "criteria.json"
    criteria_file.write_text(
        json.dumps({"criteria": [c.model_dump() for c in criteria]},
                   indent=2, ensure_ascii=False),
        encoding="utf-8",
    )
    print(f"  Saved {criteria_file}")

    # Step 2 — Process + evaluate each bidder
    bidders = ["bidder_a", "bidder_b", "bidder_c"]
    for bidder_id in bidders:
        bidder_dir = DATA_DIR / "bidders" / bidder_id
        files = sorted(bidder_dir.glob("*"))
        files = [f for f in files if f.suffix.lower() in {".pdf", ".png", ".jpg"}]

        print(f"\nProcessing {bidder_id} ({len(files)} files)...")
        process_bidder(bidder_id, files)

        print(f"  Evaluating {bidder_id} against {len(criteria)} criteria...")
        verdicts = evaluate_bidder(bidder_id, criteria)

        eval_file = PRECOMPUTED_DIR / f"eval_{bidder_id}.json"
        eval_file.write_text(
            json.dumps([v.model_dump() for v in verdicts], indent=2, ensure_ascii=False),
            encoding="utf-8",
        )
        print(f"  Saved {eval_file}")
        for v in verdicts:
            print(f"    {v.criterion_id}: {v.verdict} (conf={v.combined_confidence:.2f})")

    print("\nPre-computation complete. Files in data/precomputed/:")
    for f in sorted(PRECOMPUTED_DIR.glob("*.json")):
        print(f"  {f.name} ({f.stat().st_size} bytes)")


if __name__ == "__main__":
    main()