Upload training/tune_feedback.py with huggingface_hub
Browse files- training/tune_feedback.py +98 -0
training/tune_feedback.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Tune execution-feedback thresholds for optimal cost-quality tradeoff."""
|
| 3 |
+
import sys,json,random,math,pickle
|
| 4 |
+
sys.path.insert(0,"/app")
|
| 5 |
+
from collections import defaultdict
|
| 6 |
+
from aco.classifier import TaskCostClassifier
|
| 7 |
+
from aco.router import ModelCascadeRouter
|
| 8 |
+
from aco.execution_feedback import ExecutionFeedbackRouter
|
| 9 |
+
|
| 10 |
+
TIER_STR={1:0.35,2:0.55,3:0.80,4:0.93,5:0.97}
|
| 11 |
+
TIER_COST={1:0.05,2:0.15,3:0.75,4:1.0,5:1.5}
|
| 12 |
+
TASK_FLOOR={"legal_regulated":4,"long_horizon":3,"research":3,"coding":3,
|
| 13 |
+
"unknown_ambiguous":3,"quick_answer":1,"document_drafting":2,
|
| 14 |
+
"tool_heavy":2,"retrieval_heavy":2}
|
| 15 |
+
|
| 16 |
+
TASKS = {
|
| 17 |
+
"quick_answer":["What is 2+2?","Explain quantum computing briefly.","Convert 100F to Celsius."],
|
| 18 |
+
"coding":["Write a Python function to reverse a linked list.","Fix a typo in the README.",
|
| 19 |
+
"Debug this critical production segfault NOW.","Just fix the typo in line 42."],
|
| 20 |
+
"research":["Research latest transformer advances.","Find sources comparing LoRA and full FT briefly."],
|
| 21 |
+
"document_drafting":["Draft project proposal for ML pipeline."],
|
| 22 |
+
"legal_regulated":["Review this contract for liability clauses.","Check GDPR compliance."],
|
| 23 |
+
"tool_heavy":["Search open issues and create summary."],
|
| 24 |
+
"retrieval_heavy":["Answer based on 50-page document."],
|
| 25 |
+
"long_horizon":["Plan 3-month roadmap.","Orchestrate multi-region deployment."],
|
| 26 |
+
"unknown_ambiguous":["Help me with this thing."],
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
classifier = TaskCostClassifier()
|
| 30 |
+
router = ModelCascadeRouter(model_path="/app/router_models/router_bundle_v8.pkl")
|
| 31 |
+
|
| 32 |
+
rng = random.Random(42)
|
| 33 |
+
N = 2000
|
| 34 |
+
|
| 35 |
+
def sim_logprobs(tier, diff, success, rng):
|
| 36 |
+
n = rng.randint(20, 150)
|
| 37 |
+
base = {1:-3.5,2:-2.5,3:-1.5,4:-0.7,5:-0.3}[tier]
|
| 38 |
+
base *= (1 + diff * 0.15)
|
| 39 |
+
lps = []
|
| 40 |
+
for _ in range(n):
|
| 41 |
+
noise = rng.gauss(0, 1.0 + diff*0.3)
|
| 42 |
+
lps.append(base + noise * (0.3 if success else 0.8))
|
| 43 |
+
return lps
|
| 44 |
+
|
| 45 |
+
# Sweep thresholds
|
| 46 |
+
print("="*80)
|
| 47 |
+
print("FEEDBACK THRESHOLD SWEEP")
|
| 48 |
+
print("="*80)
|
| 49 |
+
print(f"\n{'EntropyThr':>12} {'LowConfThr':>12} {'Success':>10} {'AvgCost':>10} {'CostRed':>10} {'Gap':>10}")
|
| 50 |
+
print("-"*65)
|
| 51 |
+
|
| 52 |
+
frontier_sr = 0.901
|
| 53 |
+
frontier_cost = 1.0
|
| 54 |
+
|
| 55 |
+
best_score = -999
|
| 56 |
+
best_config = None
|
| 57 |
+
|
| 58 |
+
for ent_thr in [1.5, 2.0, 2.5, 3.0, 3.5, 4.0]:
|
| 59 |
+
for lc_thr in [0.05, 0.10, 0.15, 0.20, 0.25, 0.30]:
|
| 60 |
+
ef = ExecutionFeedbackRouter(entropy_threshold=ent_thr,
|
| 61 |
+
low_conf_ratio_threshold=lc_thr, tier_costs=TIER_COST,
|
| 62 |
+
task_floors=TASK_FLOOR)
|
| 63 |
+
rng.seed(42)
|
| 64 |
+
succ = 0; cost = 0.0
|
| 65 |
+
for i in range(N):
|
| 66 |
+
tt = rng.choice(list(TASKS.keys()))
|
| 67 |
+
req = rng.choice(TASKS[tt])
|
| 68 |
+
pred = classifier.classify(req)
|
| 69 |
+
r = router.route(req, tt, pred["difficulty"], pred)
|
| 70 |
+
tier = r.tier; diff = r.dynamic_difficulty
|
| 71 |
+
ps = TIER_STR[tier]**(diff*0.6)
|
| 72 |
+
initial_success = rng.random() < ps
|
| 73 |
+
lps = sim_logprobs(tier, diff, initial_success, rng)
|
| 74 |
+
signal = ef.analyze_output(lps, task_type=tt, current_tier=tier)
|
| 75 |
+
if signal.should_escalate and tier < 5:
|
| 76 |
+
final_tier = min(tier+1, 5)
|
| 77 |
+
final_tier = max(final_tier, TASK_FLOOR.get(tt,1))
|
| 78 |
+
ps2 = TIER_STR[final_tier]**(diff*0.6)
|
| 79 |
+
final_success = rng.random() < ps2
|
| 80 |
+
c = TIER_COST[tier] + TIER_COST[final_tier]
|
| 81 |
+
if final_success: succ += 1
|
| 82 |
+
else:
|
| 83 |
+
c = TIER_COST[tier]
|
| 84 |
+
if initial_success: succ += 1
|
| 85 |
+
cost += c
|
| 86 |
+
sr = succ/N; ac = cost/N
|
| 87 |
+
cr = (1-ac/frontier_cost)*100
|
| 88 |
+
gap = frontier_sr - sr
|
| 89 |
+
# Score: maximize success, minimize cost
|
| 90 |
+
score = sr*20 - ac*10
|
| 91 |
+
if score > best_score:
|
| 92 |
+
best_score = score
|
| 93 |
+
best_config = (ent_thr, lc_thr, sr, ac, cr, gap)
|
| 94 |
+
if ent_thr == 2.5 or ent_thr == 3.0:
|
| 95 |
+
print(f"{ent_thr:>12.1f} {lc_thr:>12.2f} {sr:>10.3f} {ac:>10.4f} {cr:>9.1f}% {gap:>10.3f}")
|
| 96 |
+
|
| 97 |
+
print(f"\n\nBest config: entropy_thr={best_config[0]}, low_conf_thr={best_config[1]}")
|
| 98 |
+
print(f" success={best_config[2]:.3f}, cost={best_config[3]:.4f}, costRed={best_config[4]:.1f}%, gap={best_config[5]:.3f}")
|