File size: 4,080 Bytes
e152f8c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | #!/usr/bin/env python3
"""v7: Tuned hybrid with conservative downgrades + aggressive safety net."""
import json, os, sys, random, uuid, pickle
import numpy as np
from collections import defaultdict
# βββ Reuse v6 infrastructure ββββββββββββββββββββββββββββββββββββββββββ
exec(open("/app/router_v6_hybrid.py").read().split("# βββ Save β")[0])
# Override route_hybrid with tuned thresholds
print("\n\n[EXTRA] Fine-tuned threshold sweep...")
# Only downgrade when very confident (0.90+), but escalate when P(success) < 0.30
def route_v7(request, task_type, difficulty, safety=0.30, downgrade=0.90):
h = min(difficulty + 1, 5)
floor = TASK_FLOOR.get(task_type, 2)
h = max(h, floor)
feats = extract_features(request, task_type, difficulty)
x = f2v(feats).reshape(1, -1)
tier = h
# Safety net
ps = get_calibrated_psuccess(x, tier)
if ps < safety and tier < 5:
tier += 1
ps = get_calibrated_psuccess(x, tier)
# Cost saver (conservative: only downgrade when very confident)
if tier > floor and tier == h:
cheaper = tier - 1
pc = get_calibrated_psuccess(x, cheaper)
if pc >= downgrade and cheaper >= floor:
tier = cheaper
return tier
# Sweep
for s in [0.25, 0.30, 0.35]:
for d in [0.85, 0.90, 0.95]:
name = f"v7_s{s:.2f}_d{d:.2f}"
results[name] = eval_router(name, lambda t, s=s, d=d: route_v7(t["req"], t["tt"], t["diff"], s, d))
# Also try: no downgrade at all, only safety net
def route_v7_safety_only(request, task_type, difficulty, safety=0.30):
h = min(difficulty + 1, 5)
floor = TASK_FLOOR.get(task_type, 2)
h = max(h, floor)
feats = extract_features(request, task_type, difficulty)
x = f2v(feats).reshape(1, -1)
ps = get_calibrated_psuccess(x, h)
tier = h
while ps < safety and tier < 5:
tier += 1
ps = get_calibrated_psuccess(x, tier)
return tier
for s in [0.25, 0.30, 0.35, 0.40, 0.45]:
name = f"v7_safety_s{s:.2f}"
results[name] = eval_router(name, lambda t, s=s: route_v7_safety_only(t["req"], t["tt"], t["diff"], s))
# Print final results
print(f"\n\n{'='*80}")
print("FINAL v7 COMPARISON")
print(f"{'='*80}")
print(f"\n{'Router':<30} {'Success':>10} {'AvgCost':>10} {'CostRed':>10} {'Unsafe':>10} {'F-DONE':>10}")
print("-"*80)
fc = results["always_frontier"]["avg_cost"]
# Key baselines
for name in ["always_frontier","heuristic_diff+1","oracle"]:
r = results[name]
cr = (1-r["avg_cost"]/fc)*100
print(f"{name:<30} {r['success']:>10.3f} {r['avg_cost']:>10.4f} {cr:>9.1f}% {r['unsafe_rate']:>10.3f} {r['false_done']:>10.3f}")
# Best v7 variants
for name, r in sorted(results.items(), key=lambda x: (-x[1]["success"], x[1]["avg_cost"])):
if not name.startswith("v7"): continue
cr = (1-r["avg_cost"]/fc)*100
print(f"{name:<30} {r['success']:>10.3f} {r['avg_cost']:>10.4f} {cr:>9.1f}% {r['unsafe_rate']:>10.3f} {r['false_done']:>10.3f}")
# Find the winner
print(f"\n\nBEST ROUTER SELECTION:")
print(f" Iso-quality (success >= 0.84):")
for name, r in sorted(results.items(), key=lambda x: (x[1]["avg_cost"])):
if r["success"] >= 0.84 and name not in ("always_cheap",):
cr = (1-r["avg_cost"]/fc)*100
print(f" {name:<30} success={r['success']:.3f} cost={r['avg_cost']:.4f} costRed={cr:.1f}%")
break # just show the cheapest at that quality
print(f"\n Best quality (max success):")
best = max(results.items(), key=lambda x: x[1]["success"])
cr = (1-best[1]["avg_cost"]/fc)*100
print(f" {best[0]:<30} success={best[1]['success']:.3f} cost={best[1]['avg_cost']:.4f} costRed={cr:.1f}%")
print(f"\n Best composite (success*20 - cost*30 - unsafe*100):")
best_comp = max(results.items(), key=lambda x: x[1]["success"]*20 - x[1]["avg_cost"]*30 - x[1]["unsafe_rate"]*100)
cr = (1-best_comp[1]["avg_cost"]/fc)*100
print(f" {best_comp[0]:<30} success={best_comp[1]['success']:.3f} cost={best_comp[1]['avg_cost']:.4f} costRed={cr:.1f}%")
|