File size: 2,274 Bytes
aec2fdf | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | from __future__ import annotations
import json
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(ROOT))
from app.decision_engine import build_candidates, extract_evidence # noqa: E402
def load(path: Path) -> dict:
return json.loads(path.read_text(encoding="utf-8"))
def ensure_expanded() -> Path:
out = ROOT / "expanded"
if not (out / "test_pairs.json").exists():
import subprocess
subprocess.run([sys.executable, "dataset/generate_dataset.py", "--seed-dir", "dataset", "--out", "expanded"], cwd=ROOT, check=True)
return out
def score_pair(out: Path, pair: dict) -> tuple[int, dict]:
merchant = load(out / "merchants" / f"{pair['merchant_id']}.json")
category = load(out / "categories" / f"{merchant['category_slug']}.json")
trigger = load(out / "triggers" / f"{pair['trigger_id']}.json")
customer = load(out / "customers" / f"{pair['customer_id']}.json") if pair.get("customer_id") else None
evidence = extract_evidence(category, merchant, trigger, customer)
candidates = build_candidates(category, merchant, trigger, customer, evidence)
if not candidates:
return 0, {"reason": "no candidates"}
best = max(candidates, key=lambda c: c.total_score)
return best.total_score, {
"scores": best.rubric_scores,
"signal": best.primary_signal,
"lever": best.selected_lever,
"body": best.body,
}
def main() -> int:
threshold = int(sys.argv[1]) if len(sys.argv) > 1 else 36
out = ensure_expanded()
pairs = load(out / "test_pairs.json")["pairs"]
failures = []
totals = []
for pair in pairs:
total, detail = score_pair(out, pair)
totals.append(total)
if total < threshold:
failures.append((pair["test_id"], total, detail))
avg = sum(totals) / max(1, len(totals))
print(f"proxy scored {len(totals)} canonical pairs; avg={avg:.1f}/50 min={min(totals) if totals else 0}/50 threshold={threshold}")
if failures:
print("proxy score failures:")
for test_id, total, detail in failures:
print(f"- {test_id}: {total}/50 {detail}")
return 1
return 0
if __name__ == "__main__":
raise SystemExit(main())
|