File size: 2,122 Bytes
617b314
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62

# ── Part 4: Print results ──

print(f"\n\n{'='*70}")
print("BERT vs XGBoost ROUTER COMPARISON ON SWE-BENCH")
print(f"{'='*70}")

fr = policies['frontier']
fr_cost = fr['cost'] / fr['n']
fr_succ = fr['success'] / fr['n']

print(f"\n{'Policy':<20} {'Success':>10} {'AvgCost':>10} {'CostRed':>10}")
print("-"*52)
order = ['oracle','bert_feedback','v11_feedback','bert','v11_xgboost','frontier','always_cheap']
for name in order:
    if name not in policies:
        continue
    r = policies[name]
    sr = r['success']/r['n'] if r['n'] > 0 else 0
    ac = r['cost']/r['n'] if r['n'] > 0 else 0
    cr = (1 - ac/fr_cost)*100 if fr_cost > 0 else 0
    print(f"{name:<20} {sr:>10.3f} {ac:>10.4f} {cr:>9.1f}%")

# BERT tier distribution
print(f"\n\nBERT tier distribution:")
bert_tiers = defaultdict(int)
for iid, model_results in traces.items():
    problem = next(iter(model_results.values()))['problem']
    t, c = route_bert(problem)
    bert_tiers[t] += 1
for t in sorted(bert_tiers):
    print(f"  Tier {t}: {bert_tiers[t]}")

# Quality gap analysis
print(f"\n\nQuality gap vs frontier:")
for name in ['bert','bert_feedback','v11_xgboost','v11_feedback']:
    r = policies[name]
    sr = r['success']/r['n'] if r['n'] > 0 else 0
    gap = (sr - fr_succ) * 100
    print(f"  {name}: {gap:+.1f}pp vs frontier")

# Save results
results = {}
for name, r in policies.items():
    sr = r['success']/r['n'] if r['n'] > 0 else 0
    ac = r['cost']/r['n'] if r['n'] > 0 else 0
    cr = (1 - ac/fr_cost)*100 if fr_cost > 0 else 0
    results[name] = {"success": round(sr, 4), "avg_cost": round(ac, 4), "costRed": round(cr, 1)}

# Upload results to Hub
from huggingface_hub import HfApi
api = HfApi()
import tempfile, json
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
    json.dump(results, f, indent=2)
    api.upload_file(path_or_fileobj=f.name, path_in_repo="eval/bert_vs_xgboost_results.json",
                    repo_id="narcolepticchicken/agent-cost-optimizer", repo_type="model")
    os.unlink(f.name)

print(f"\nResults saved to eval/bert_vs_xgboost_results.json")
print("DONE!")