narcolepticchicken
/

agent-cost-optimizer

Safetensors

Model card Files Files and versions

xet

Community

narcolepticchicken commited on about 17 hours ago

Commit

e152f8c

verified ·

1 Parent(s): 9221a43

Upload training/router_v7_tuned.py with huggingface_hub

Browse files

Files changed (1) hide show

training/router_v7_tuned.py +95 -0

training/router_v7_tuned.py ADDED Viewed

	@@ -0,0 +1,95 @@

+#!/usr/bin/env python3
+"""v7: Tuned hybrid with conservative downgrades + aggressive safety net."""
+import json, os, sys, random, uuid, pickle
+import numpy as np
+from collections import defaultdict
+# ─── Reuse v6 infrastructure ──────────────────────────────────────────
+exec(open("/app/router_v6_hybrid.py").read().split("# ─── Save ─")[0])
+# Override route_hybrid with tuned thresholds
+print("\n\n[EXTRA] Fine-tuned threshold sweep...")
+# Only downgrade when very confident (0.90+), but escalate when P(success) < 0.30
+def route_v7(request, task_type, difficulty, safety=0.30, downgrade=0.90):
+    h = min(difficulty + 1, 5)
+    floor = TASK_FLOOR.get(task_type, 2)
+    h = max(h, floor)
+    feats = extract_features(request, task_type, difficulty)
+    x = f2v(feats).reshape(1, -1)
+    tier = h
+    # Safety net
+    ps = get_calibrated_psuccess(x, tier)
+    if ps < safety and tier < 5:
+        tier += 1
+        ps = get_calibrated_psuccess(x, tier)
+    # Cost saver (conservative: only downgrade when very confident)
+    if tier > floor and tier == h:
+        cheaper = tier - 1
+        pc = get_calibrated_psuccess(x, cheaper)
+        if pc >= downgrade and cheaper >= floor:
+            tier = cheaper
+    return tier
+# Sweep
+for s in [0.25, 0.30, 0.35]:
+    for d in [0.85, 0.90, 0.95]:
+        name = f"v7_s{s:.2f}_d{d:.2f}"
+        results[name] = eval_router(name, lambda t, s=s, d=d: route_v7(t["req"], t["tt"], t["diff"], s, d))
+# Also try: no downgrade at all, only safety net
+def route_v7_safety_only(request, task_type, difficulty, safety=0.30):
+    h = min(difficulty + 1, 5)
+    floor = TASK_FLOOR.get(task_type, 2)
+    h = max(h, floor)
+    feats = extract_features(request, task_type, difficulty)
+    x = f2v(feats).reshape(1, -1)
+    ps = get_calibrated_psuccess(x, h)
+    tier = h
+    while ps < safety and tier < 5:
+        tier += 1
+        ps = get_calibrated_psuccess(x, tier)
+    return tier
+for s in [0.25, 0.30, 0.35, 0.40, 0.45]:
+    name = f"v7_safety_s{s:.2f}"
+    results[name] = eval_router(name, lambda t, s=s: route_v7_safety_only(t["req"], t["tt"], t["diff"], s))
+# Print final results
+print(f"\n\n{'='*80}")
+print("FINAL v7 COMPARISON")
+print(f"{'='*80}")
+print(f"\n{'Router':<30} {'Success':>10} {'AvgCost':>10} {'CostRed':>10} {'Unsafe':>10} {'F-DONE':>10}")
+print("-"*80)
+fc = results["always_frontier"]["avg_cost"]
+# Key baselines
+for name in ["always_frontier","heuristic_diff+1","oracle"]:
+    r = results[name]
+    cr = (1-r["avg_cost"]/fc)*100
+    print(f"{name:<30} {r['success']:>10.3f} {r['avg_cost']:>10.4f} {cr:>9.1f}% {r['unsafe_rate']:>10.3f} {r['false_done']:>10.3f}")
+# Best v7 variants
+for name, r in sorted(results.items(), key=lambda x: (-x[1]["success"], x[1]["avg_cost"])):
+    if not name.startswith("v7"): continue
+    cr = (1-r["avg_cost"]/fc)*100
+    print(f"{name:<30} {r['success']:>10.3f} {r['avg_cost']:>10.4f} {cr:>9.1f}% {r['unsafe_rate']:>10.3f} {r['false_done']:>10.3f}")
+# Find the winner
+print(f"\n\nBEST ROUTER SELECTION:")
+print(f"  Iso-quality (success >= 0.84):")
+for name, r in sorted(results.items(), key=lambda x: (x[1]["avg_cost"])):
+    if r["success"] >= 0.84 and name not in ("always_cheap",):
+        cr = (1-r["avg_cost"]/fc)*100
+        print(f"    {name:<30} success={r['success']:.3f} cost={r['avg_cost']:.4f} costRed={cr:.1f}%")
+        break  # just show the cheapest at that quality
+print(f"\n  Best quality (max success):")
+best = max(results.items(), key=lambda x: x[1]["success"])
+cr = (1-best[1]["avg_cost"]/fc)*100
+print(f"    {best[0]:<30} success={best[1]['success']:.3f} cost={best[1]['avg_cost']:.4f} costRed={cr:.1f}%")
+print(f"\n  Best composite (success*20 - cost*30 - unsafe*100):")
+best_comp = max(results.items(), key=lambda x: x[1]["success"]*20 - x[1]["avg_cost"]*30 - x[1]["unsafe_rate"]*100)
+cr = (1-best_comp[1]["avg_cost"]/fc)*100
+print(f"    {best_comp[0]:<30} success={best_comp[1]['success']:.3f} cost={best_comp[1]['avg_cost']:.4f} costRed={cr:.1f}%")