Upload training/router_v7_tuned.py with huggingface_hub
Browse files- training/router_v7_tuned.py +95 -0
training/router_v7_tuned.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""v7: Tuned hybrid with conservative downgrades + aggressive safety net."""
|
| 3 |
+
import json, os, sys, random, uuid, pickle
|
| 4 |
+
import numpy as np
|
| 5 |
+
from collections import defaultdict
|
| 6 |
+
|
| 7 |
+
# βββ Reuse v6 infrastructure ββββββββββββββββββββββββββββββββββββββββββ
|
| 8 |
+
exec(open("/app/router_v6_hybrid.py").read().split("# βββ Save β")[0])
|
| 9 |
+
|
| 10 |
+
# Override route_hybrid with tuned thresholds
|
| 11 |
+
print("\n\n[EXTRA] Fine-tuned threshold sweep...")
|
| 12 |
+
|
| 13 |
+
# Only downgrade when very confident (0.90+), but escalate when P(success) < 0.30
|
| 14 |
+
def route_v7(request, task_type, difficulty, safety=0.30, downgrade=0.90):
|
| 15 |
+
h = min(difficulty + 1, 5)
|
| 16 |
+
floor = TASK_FLOOR.get(task_type, 2)
|
| 17 |
+
h = max(h, floor)
|
| 18 |
+
feats = extract_features(request, task_type, difficulty)
|
| 19 |
+
x = f2v(feats).reshape(1, -1)
|
| 20 |
+
tier = h
|
| 21 |
+
# Safety net
|
| 22 |
+
ps = get_calibrated_psuccess(x, tier)
|
| 23 |
+
if ps < safety and tier < 5:
|
| 24 |
+
tier += 1
|
| 25 |
+
ps = get_calibrated_psuccess(x, tier)
|
| 26 |
+
# Cost saver (conservative: only downgrade when very confident)
|
| 27 |
+
if tier > floor and tier == h:
|
| 28 |
+
cheaper = tier - 1
|
| 29 |
+
pc = get_calibrated_psuccess(x, cheaper)
|
| 30 |
+
if pc >= downgrade and cheaper >= floor:
|
| 31 |
+
tier = cheaper
|
| 32 |
+
return tier
|
| 33 |
+
|
| 34 |
+
# Sweep
|
| 35 |
+
for s in [0.25, 0.30, 0.35]:
|
| 36 |
+
for d in [0.85, 0.90, 0.95]:
|
| 37 |
+
name = f"v7_s{s:.2f}_d{d:.2f}"
|
| 38 |
+
results[name] = eval_router(name, lambda t, s=s, d=d: route_v7(t["req"], t["tt"], t["diff"], s, d))
|
| 39 |
+
|
| 40 |
+
# Also try: no downgrade at all, only safety net
|
| 41 |
+
def route_v7_safety_only(request, task_type, difficulty, safety=0.30):
|
| 42 |
+
h = min(difficulty + 1, 5)
|
| 43 |
+
floor = TASK_FLOOR.get(task_type, 2)
|
| 44 |
+
h = max(h, floor)
|
| 45 |
+
feats = extract_features(request, task_type, difficulty)
|
| 46 |
+
x = f2v(feats).reshape(1, -1)
|
| 47 |
+
ps = get_calibrated_psuccess(x, h)
|
| 48 |
+
tier = h
|
| 49 |
+
while ps < safety and tier < 5:
|
| 50 |
+
tier += 1
|
| 51 |
+
ps = get_calibrated_psuccess(x, tier)
|
| 52 |
+
return tier
|
| 53 |
+
|
| 54 |
+
for s in [0.25, 0.30, 0.35, 0.40, 0.45]:
|
| 55 |
+
name = f"v7_safety_s{s:.2f}"
|
| 56 |
+
results[name] = eval_router(name, lambda t, s=s: route_v7_safety_only(t["req"], t["tt"], t["diff"], s))
|
| 57 |
+
|
| 58 |
+
# Print final results
|
| 59 |
+
print(f"\n\n{'='*80}")
|
| 60 |
+
print("FINAL v7 COMPARISON")
|
| 61 |
+
print(f"{'='*80}")
|
| 62 |
+
print(f"\n{'Router':<30} {'Success':>10} {'AvgCost':>10} {'CostRed':>10} {'Unsafe':>10} {'F-DONE':>10}")
|
| 63 |
+
print("-"*80)
|
| 64 |
+
fc = results["always_frontier"]["avg_cost"]
|
| 65 |
+
|
| 66 |
+
# Key baselines
|
| 67 |
+
for name in ["always_frontier","heuristic_diff+1","oracle"]:
|
| 68 |
+
r = results[name]
|
| 69 |
+
cr = (1-r["avg_cost"]/fc)*100
|
| 70 |
+
print(f"{name:<30} {r['success']:>10.3f} {r['avg_cost']:>10.4f} {cr:>9.1f}% {r['unsafe_rate']:>10.3f} {r['false_done']:>10.3f}")
|
| 71 |
+
|
| 72 |
+
# Best v7 variants
|
| 73 |
+
for name, r in sorted(results.items(), key=lambda x: (-x[1]["success"], x[1]["avg_cost"])):
|
| 74 |
+
if not name.startswith("v7"): continue
|
| 75 |
+
cr = (1-r["avg_cost"]/fc)*100
|
| 76 |
+
print(f"{name:<30} {r['success']:>10.3f} {r['avg_cost']:>10.4f} {cr:>9.1f}% {r['unsafe_rate']:>10.3f} {r['false_done']:>10.3f}")
|
| 77 |
+
|
| 78 |
+
# Find the winner
|
| 79 |
+
print(f"\n\nBEST ROUTER SELECTION:")
|
| 80 |
+
print(f" Iso-quality (success >= 0.84):")
|
| 81 |
+
for name, r in sorted(results.items(), key=lambda x: (x[1]["avg_cost"])):
|
| 82 |
+
if r["success"] >= 0.84 and name not in ("always_cheap",):
|
| 83 |
+
cr = (1-r["avg_cost"]/fc)*100
|
| 84 |
+
print(f" {name:<30} success={r['success']:.3f} cost={r['avg_cost']:.4f} costRed={cr:.1f}%")
|
| 85 |
+
break # just show the cheapest at that quality
|
| 86 |
+
|
| 87 |
+
print(f"\n Best quality (max success):")
|
| 88 |
+
best = max(results.items(), key=lambda x: x[1]["success"])
|
| 89 |
+
cr = (1-best[1]["avg_cost"]/fc)*100
|
| 90 |
+
print(f" {best[0]:<30} success={best[1]['success']:.3f} cost={best[1]['avg_cost']:.4f} costRed={cr:.1f}%")
|
| 91 |
+
|
| 92 |
+
print(f"\n Best composite (success*20 - cost*30 - unsafe*100):")
|
| 93 |
+
best_comp = max(results.items(), key=lambda x: x[1]["success"]*20 - x[1]["avg_cost"]*30 - x[1]["unsafe_rate"]*100)
|
| 94 |
+
cr = (1-best_comp[1]["avg_cost"]/fc)*100
|
| 95 |
+
print(f" {best_comp[0]:<30} success={best_comp[1]['success']:.3f} cost={best_comp[1]['avg_cost']:.4f} costRed={cr:.1f}%")
|