narcolepticchicken
/

agent-cost-optimizer

Model card Files Files and versions

narcolepticchicken commited on about 13 hours ago

Commit

617b314

·

verified ·

1 Parent(s): 7a843a8

Upload eval/eval_bert_part4.py

Files changed (1) hide show

eval/eval_bert_part4.py +61 -0

eval/eval_bert_part4.py ADDED Viewed

	@@ -0,0 +1,61 @@

+# ── Part 4: Print results ──
+print(f"\n\n{'='*70}")
+print("BERT vs XGBoost ROUTER COMPARISON ON SWE-BENCH")
+print(f"{'='*70}")
+fr = policies['frontier']
+fr_cost = fr['cost'] / fr['n']
+fr_succ = fr['success'] / fr['n']
+print(f"\n{'Policy':<20} {'Success':>10} {'AvgCost':>10} {'CostRed':>10}")
+print("-"*52)
+order = ['oracle','bert_feedback','v11_feedback','bert','v11_xgboost','frontier','always_cheap']
+for name in order:
+    if name not in policies:
+        continue
+    r = policies[name]
+    sr = r['success']/r['n'] if r['n'] > 0 else 0
+    ac = r['cost']/r['n'] if r['n'] > 0 else 0
+    cr = (1 - ac/fr_cost)*100 if fr_cost > 0 else 0
+    print(f"{name:<20} {sr:>10.3f} {ac:>10.4f} {cr:>9.1f}%")
+# BERT tier distribution
+print(f"\n\nBERT tier distribution:")
+bert_tiers = defaultdict(int)
+for iid, model_results in traces.items():
+    problem = next(iter(model_results.values()))['problem']
+    t, c = route_bert(problem)
+    bert_tiers[t] += 1
+for t in sorted(bert_tiers):
+    print(f"  Tier {t}: {bert_tiers[t]}")
+# Quality gap analysis
+print(f"\n\nQuality gap vs frontier:")
+for name in ['bert','bert_feedback','v11_xgboost','v11_feedback']:
+    r = policies[name]
+    sr = r['success']/r['n'] if r['n'] > 0 else 0
+    gap = (sr - fr_succ) * 100
+    print(f"  {name}: {gap:+.1f}pp vs frontier")
+# Save results
+results = {}
+for name, r in policies.items():
+    sr = r['success']/r['n'] if r['n'] > 0 else 0
+    ac = r['cost']/r['n'] if r['n'] > 0 else 0
+    cr = (1 - ac/fr_cost)*100 if fr_cost > 0 else 0
+    results[name] = {"success": round(sr, 4), "avg_cost": round(ac, 4), "costRed": round(cr, 1)}
+# Upload results to Hub
+from huggingface_hub import HfApi
+api = HfApi()
+import tempfile, json
+with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
+    json.dump(results, f, indent=2)
+    api.upload_file(path_or_fileobj=f.name, path_in_repo="eval/bert_vs_xgboost_results.json",
+                    repo_id="narcolepticchicken/agent-cost-optimizer", repo_type="model")
+    os.unlink(f.name)
+print(f"\nResults saved to eval/bert_vs_xgboost_results.json")
+print("DONE!")