narcolepticchicken commited on
Commit
e152f8c
Β·
verified Β·
1 Parent(s): 9221a43

Upload training/router_v7_tuned.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. training/router_v7_tuned.py +95 -0
training/router_v7_tuned.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """v7: Tuned hybrid with conservative downgrades + aggressive safety net."""
3
+ import json, os, sys, random, uuid, pickle
4
+ import numpy as np
5
+ from collections import defaultdict
6
+
7
+ # ─── Reuse v6 infrastructure ──────────────────────────────────────────
8
+ exec(open("/app/router_v6_hybrid.py").read().split("# ─── Save ─")[0])
9
+
10
+ # Override route_hybrid with tuned thresholds
11
+ print("\n\n[EXTRA] Fine-tuned threshold sweep...")
12
+
13
+ # Only downgrade when very confident (0.90+), but escalate when P(success) < 0.30
14
+ def route_v7(request, task_type, difficulty, safety=0.30, downgrade=0.90):
15
+ h = min(difficulty + 1, 5)
16
+ floor = TASK_FLOOR.get(task_type, 2)
17
+ h = max(h, floor)
18
+ feats = extract_features(request, task_type, difficulty)
19
+ x = f2v(feats).reshape(1, -1)
20
+ tier = h
21
+ # Safety net
22
+ ps = get_calibrated_psuccess(x, tier)
23
+ if ps < safety and tier < 5:
24
+ tier += 1
25
+ ps = get_calibrated_psuccess(x, tier)
26
+ # Cost saver (conservative: only downgrade when very confident)
27
+ if tier > floor and tier == h:
28
+ cheaper = tier - 1
29
+ pc = get_calibrated_psuccess(x, cheaper)
30
+ if pc >= downgrade and cheaper >= floor:
31
+ tier = cheaper
32
+ return tier
33
+
34
+ # Sweep
35
+ for s in [0.25, 0.30, 0.35]:
36
+ for d in [0.85, 0.90, 0.95]:
37
+ name = f"v7_s{s:.2f}_d{d:.2f}"
38
+ results[name] = eval_router(name, lambda t, s=s, d=d: route_v7(t["req"], t["tt"], t["diff"], s, d))
39
+
40
+ # Also try: no downgrade at all, only safety net
41
+ def route_v7_safety_only(request, task_type, difficulty, safety=0.30):
42
+ h = min(difficulty + 1, 5)
43
+ floor = TASK_FLOOR.get(task_type, 2)
44
+ h = max(h, floor)
45
+ feats = extract_features(request, task_type, difficulty)
46
+ x = f2v(feats).reshape(1, -1)
47
+ ps = get_calibrated_psuccess(x, h)
48
+ tier = h
49
+ while ps < safety and tier < 5:
50
+ tier += 1
51
+ ps = get_calibrated_psuccess(x, tier)
52
+ return tier
53
+
54
+ for s in [0.25, 0.30, 0.35, 0.40, 0.45]:
55
+ name = f"v7_safety_s{s:.2f}"
56
+ results[name] = eval_router(name, lambda t, s=s: route_v7_safety_only(t["req"], t["tt"], t["diff"], s))
57
+
58
+ # Print final results
59
+ print(f"\n\n{'='*80}")
60
+ print("FINAL v7 COMPARISON")
61
+ print(f"{'='*80}")
62
+ print(f"\n{'Router':<30} {'Success':>10} {'AvgCost':>10} {'CostRed':>10} {'Unsafe':>10} {'F-DONE':>10}")
63
+ print("-"*80)
64
+ fc = results["always_frontier"]["avg_cost"]
65
+
66
+ # Key baselines
67
+ for name in ["always_frontier","heuristic_diff+1","oracle"]:
68
+ r = results[name]
69
+ cr = (1-r["avg_cost"]/fc)*100
70
+ print(f"{name:<30} {r['success']:>10.3f} {r['avg_cost']:>10.4f} {cr:>9.1f}% {r['unsafe_rate']:>10.3f} {r['false_done']:>10.3f}")
71
+
72
+ # Best v7 variants
73
+ for name, r in sorted(results.items(), key=lambda x: (-x[1]["success"], x[1]["avg_cost"])):
74
+ if not name.startswith("v7"): continue
75
+ cr = (1-r["avg_cost"]/fc)*100
76
+ print(f"{name:<30} {r['success']:>10.3f} {r['avg_cost']:>10.4f} {cr:>9.1f}% {r['unsafe_rate']:>10.3f} {r['false_done']:>10.3f}")
77
+
78
+ # Find the winner
79
+ print(f"\n\nBEST ROUTER SELECTION:")
80
+ print(f" Iso-quality (success >= 0.84):")
81
+ for name, r in sorted(results.items(), key=lambda x: (x[1]["avg_cost"])):
82
+ if r["success"] >= 0.84 and name not in ("always_cheap",):
83
+ cr = (1-r["avg_cost"]/fc)*100
84
+ print(f" {name:<30} success={r['success']:.3f} cost={r['avg_cost']:.4f} costRed={cr:.1f}%")
85
+ break # just show the cheapest at that quality
86
+
87
+ print(f"\n Best quality (max success):")
88
+ best = max(results.items(), key=lambda x: x[1]["success"])
89
+ cr = (1-best[1]["avg_cost"]/fc)*100
90
+ print(f" {best[0]:<30} success={best[1]['success']:.3f} cost={best[1]['avg_cost']:.4f} costRed={cr:.1f}%")
91
+
92
+ print(f"\n Best composite (success*20 - cost*30 - unsafe*100):")
93
+ best_comp = max(results.items(), key=lambda x: x[1]["success"]*20 - x[1]["avg_cost"]*30 - x[1]["unsafe_rate"]*100)
94
+ cr = (1-best_comp[1]["avg_cost"]/fc)*100
95
+ print(f" {best_comp[0]:<30} success={best_comp[1]['success']:.3f} cost={best_comp[1]['avg_cost']:.4f} costRed={cr:.1f}%")