| from __future__ import annotations |
|
|
| import json |
| import sys |
| from pathlib import Path |
|
|
| ROOT = Path(__file__).resolve().parents[1] |
| sys.path.insert(0, str(ROOT)) |
|
|
| from app.decision_engine import build_candidates, extract_evidence |
|
|
|
|
| def load(path: Path) -> dict: |
| return json.loads(path.read_text(encoding="utf-8")) |
|
|
|
|
| def ensure_expanded() -> Path: |
| out = ROOT / "expanded" |
| if not (out / "test_pairs.json").exists(): |
| import subprocess |
| subprocess.run([sys.executable, "dataset/generate_dataset.py", "--seed-dir", "dataset", "--out", "expanded"], cwd=ROOT, check=True) |
| return out |
|
|
|
|
| def score_pair(out: Path, pair: dict) -> tuple[int, dict]: |
| merchant = load(out / "merchants" / f"{pair['merchant_id']}.json") |
| category = load(out / "categories" / f"{merchant['category_slug']}.json") |
| trigger = load(out / "triggers" / f"{pair['trigger_id']}.json") |
| customer = load(out / "customers" / f"{pair['customer_id']}.json") if pair.get("customer_id") else None |
| evidence = extract_evidence(category, merchant, trigger, customer) |
| candidates = build_candidates(category, merchant, trigger, customer, evidence) |
| if not candidates: |
| return 0, {"reason": "no candidates"} |
| best = max(candidates, key=lambda c: c.total_score) |
| return best.total_score, { |
| "scores": best.rubric_scores, |
| "signal": best.primary_signal, |
| "lever": best.selected_lever, |
| "body": best.body, |
| } |
|
|
|
|
| def main() -> int: |
| threshold = int(sys.argv[1]) if len(sys.argv) > 1 else 36 |
| out = ensure_expanded() |
| pairs = load(out / "test_pairs.json")["pairs"] |
| failures = [] |
| totals = [] |
| for pair in pairs: |
| total, detail = score_pair(out, pair) |
| totals.append(total) |
| if total < threshold: |
| failures.append((pair["test_id"], total, detail)) |
| avg = sum(totals) / max(1, len(totals)) |
| print(f"proxy scored {len(totals)} canonical pairs; avg={avg:.1f}/50 min={min(totals) if totals else 0}/50 threshold={threshold}") |
| if failures: |
| print("proxy score failures:") |
| for test_id, total, detail in failures: |
| print(f"- {test_id}: {total}/50 {detail}") |
| return 1 |
| return 0 |
|
|
|
|
| if __name__ == "__main__": |
| raise SystemExit(main()) |
|
|
|
|