narcolepticchicken
/

agent-cost-optimizer

Safetensors

Model card Files Files and versions

xet

Community

narcolepticchicken commited on about 9 hours ago

Commit

8a5c26f

verified ·

1 Parent(s): 3bed4ff

Upload eval/eval_bert_partD.py

Browse files

Files changed (1) hide show

eval/eval_bert_partD.py +45 -18

eval/eval_bert_partD.py CHANGED Viewed

@@ -10,8 +10,15 @@ print(f"{'='*70}")
 print(f"\n{'Policy':<20} {'Success':>10} {'AvgCost':>10} {'CostRed':>10}")
 print("-"*52)
 for name in ['oracle','bert_feedback','v11_feedback','bert','v11_xgboost','frontier','always_cheap']:
-    if name not in policies: continue
-    r = policies[name]
     sr = r['success']/r['n'] if r['n'] > 0 else 0
     ac = r['cost']/r['n'] if r['n'] > 0 else 0
     cr = (1-ac/fr_cost)*100 if fr_cost > 0 else 0
@@ -19,22 +26,37 @@ for name in ['oracle','bert_feedback','v11_feedback','bert','v11_xgboost','front
 print(f"\nQuality gap vs frontier:")
 for name in ['bert','bert_feedback','v11_xgboost','v11_feedback']:
-    r = policies[name]
     sr = r['success']/r['n'] if r['n'] > 0 else 0
     gap = (sr - fr_succ) * 100
     print(f"  {name}: {gap:+.1f}pp vs frontier")
 # BERT tier distribution
-print(f"\nBERT tier distribution:")
 bert_tiers = defaultdict(int)
-for iid, model_results in list(traces.items())[:500]:
     problem = next(iter(model_results.values()))['problem']
-    t, c = route_bert(problem)
     bert_tiers[t] += 1
 for t in sorted(bert_tiers):
-    print(f"  Tier {t}: {bert_tiers[t]}")
-# Save results
 results = {}
 for name, r in policies.items():
     sr = r['success']/r['n'] if r['n'] > 0 else 0
@@ -42,13 +64,18 @@ for name, r in policies.items():
     cr = (1-ac/fr_cost)*100 if fr_cost > 0 else 0
     results[name] = {"success": round(sr, 4), "avg_cost": round(ac, 4), "costRed": round(cr, 1)}
-from huggingface_hub import HfApi
-import tempfile
-api = HfApi()
-with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
-    json.dump(results, f, indent=2)
-    api.upload_file(path_or_fileobj=f.name, path_in_repo="eval/bert_vs_xgboost_results.json",
-                    repo_id="narcolepticchicken/agent-cost-optimizer", repo_type="model")
-    os.unlink(f.name)
-print(f"\nResults uploaded to eval/bert_vs_xgboost_results.json")
-print("DONE!")

 print(f"\n{'Policy':<20} {'Success':>10} {'AvgCost':>10} {'CostRed':>10}")
 print("-"*52)
 for name in ['oracle','bert_feedback','v11_feedback','bert','v11_xgboost','frontier','always_cheap']:
+    # Map v11 names to v10 if v11 not available
+    actual_name = name
+    if name == 'v11_xgboost' and 'v11_xgboost' not in policies and 'v10_xgboost' in policies:
+        actual_name = 'v10_xgboost'
+    if name == 'v11_feedback' and 'v11_feedback' not in policies and 'v10_feedback' in policies:
+        actual_name = 'v10_feedback'
+    if actual_name not in policies:
+        continue
+    r = policies[actual_name]
     sr = r['success']/r['n'] if r['n'] > 0 else 0
     ac = r['cost']/r['n'] if r['n'] > 0 else 0
     cr = (1-ac/fr_cost)*100 if fr_cost > 0 else 0
 print(f"\nQuality gap vs frontier:")
 for name in ['bert','bert_feedback','v11_xgboost','v11_feedback']:
+    actual_name = name
+    if name == 'v11_xgboost' and 'v11_xgboost' not in policies and 'v10_xgboost' in policies:
+        actual_name = 'v10_xgboost'
+    if name == 'v11_feedback' and 'v11_feedback' not in policies and 'v10_feedback' in policies:
+        actual_name = 'v10_feedback'
+    if actual_name not in policies:
+        continue
+    r = policies[actual_name]
     sr = r['success']/r['n'] if r['n'] > 0 else 0
     gap = (sr - fr_succ) * 100
     print(f"  {name}: {gap:+.1f}pp vs frontier")
 # BERT tier distribution
+print(f"\nBERT tier distribution (first 100 tasks):")
 bert_tiers = defaultdict(int)
+bert_probs_dist = defaultdict(list)
+for iid, model_results in list(traces.items())[:100]:
     problem = next(iter(model_results.values()))['problem']
+    t, conf, probs = route_bert(problem)
     bert_tiers[t] += 1
+    for tier, p in probs.items():
+        bert_probs_dist[tier].append(p)
+print(f"  Tier routing counts:")
 for t in sorted(bert_tiers):
+    print(f"    Tier {t}: {bert_tiers[t]}")
+print(f"  Per-tier P(success) stats:")
+for t in sorted(bert_probs_dist):
+    ps = bert_probs_dist[t]
+    print(f"    Tier {t}: mean={np.mean(ps):.3f}, std={np.std(ps):.3f}, min={np.min(ps):.3f}, max={np.max(ps):.3f}")
+# Save results to local file and try to upload to Hub
 results = {}
 for name, r in policies.items():
     sr = r['success']/r['n'] if r['n'] > 0 else 0
     cr = (1-ac/fr_cost)*100 if fr_cost > 0 else 0
     results[name] = {"success": round(sr, 4), "avg_cost": round(ac, 4), "costRed": round(cr, 1)}
+import tempfile, json
+try:
+    from huggingface_hub import HfApi
+    api = HfApi()
+    with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
+        json.dump(results, f, indent=2)
+        api.upload_file(path_or_fileobj=f.name, path_in_repo="eval/bert_vs_xgboost_results.json",
+                        repo_id="narcolepticchicken/agent-cost-optimizer", repo_type="model")
+        os.unlink(f.name)
+    print(f"\nResults uploaded to eval/bert_vs_xgboost_results.json on Hub")
+except Exception as e:
+    print(f"\nCould not upload to Hub: {e}")
+    print(f"Results JSON:\n{json.dumps(results, indent=2)}")
+print("\nDONE!")