narcolepticchicken commited on
Commit
9234215
·
verified ·
1 Parent(s): 3d0ebe0

Upload training/tune_feedback.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. training/tune_feedback.py +98 -0
training/tune_feedback.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Tune execution-feedback thresholds for optimal cost-quality tradeoff."""
3
+ import sys,json,random,math,pickle
4
+ sys.path.insert(0,"/app")
5
+ from collections import defaultdict
6
+ from aco.classifier import TaskCostClassifier
7
+ from aco.router import ModelCascadeRouter
8
+ from aco.execution_feedback import ExecutionFeedbackRouter
9
+
10
+ TIER_STR={1:0.35,2:0.55,3:0.80,4:0.93,5:0.97}
11
+ TIER_COST={1:0.05,2:0.15,3:0.75,4:1.0,5:1.5}
12
+ TASK_FLOOR={"legal_regulated":4,"long_horizon":3,"research":3,"coding":3,
13
+ "unknown_ambiguous":3,"quick_answer":1,"document_drafting":2,
14
+ "tool_heavy":2,"retrieval_heavy":2}
15
+
16
+ TASKS = {
17
+ "quick_answer":["What is 2+2?","Explain quantum computing briefly.","Convert 100F to Celsius."],
18
+ "coding":["Write a Python function to reverse a linked list.","Fix a typo in the README.",
19
+ "Debug this critical production segfault NOW.","Just fix the typo in line 42."],
20
+ "research":["Research latest transformer advances.","Find sources comparing LoRA and full FT briefly."],
21
+ "document_drafting":["Draft project proposal for ML pipeline."],
22
+ "legal_regulated":["Review this contract for liability clauses.","Check GDPR compliance."],
23
+ "tool_heavy":["Search open issues and create summary."],
24
+ "retrieval_heavy":["Answer based on 50-page document."],
25
+ "long_horizon":["Plan 3-month roadmap.","Orchestrate multi-region deployment."],
26
+ "unknown_ambiguous":["Help me with this thing."],
27
+ }
28
+
29
+ classifier = TaskCostClassifier()
30
+ router = ModelCascadeRouter(model_path="/app/router_models/router_bundle_v8.pkl")
31
+
32
+ rng = random.Random(42)
33
+ N = 2000
34
+
35
+ def sim_logprobs(tier, diff, success, rng):
36
+ n = rng.randint(20, 150)
37
+ base = {1:-3.5,2:-2.5,3:-1.5,4:-0.7,5:-0.3}[tier]
38
+ base *= (1 + diff * 0.15)
39
+ lps = []
40
+ for _ in range(n):
41
+ noise = rng.gauss(0, 1.0 + diff*0.3)
42
+ lps.append(base + noise * (0.3 if success else 0.8))
43
+ return lps
44
+
45
+ # Sweep thresholds
46
+ print("="*80)
47
+ print("FEEDBACK THRESHOLD SWEEP")
48
+ print("="*80)
49
+ print(f"\n{'EntropyThr':>12} {'LowConfThr':>12} {'Success':>10} {'AvgCost':>10} {'CostRed':>10} {'Gap':>10}")
50
+ print("-"*65)
51
+
52
+ frontier_sr = 0.901
53
+ frontier_cost = 1.0
54
+
55
+ best_score = -999
56
+ best_config = None
57
+
58
+ for ent_thr in [1.5, 2.0, 2.5, 3.0, 3.5, 4.0]:
59
+ for lc_thr in [0.05, 0.10, 0.15, 0.20, 0.25, 0.30]:
60
+ ef = ExecutionFeedbackRouter(entropy_threshold=ent_thr,
61
+ low_conf_ratio_threshold=lc_thr, tier_costs=TIER_COST,
62
+ task_floors=TASK_FLOOR)
63
+ rng.seed(42)
64
+ succ = 0; cost = 0.0
65
+ for i in range(N):
66
+ tt = rng.choice(list(TASKS.keys()))
67
+ req = rng.choice(TASKS[tt])
68
+ pred = classifier.classify(req)
69
+ r = router.route(req, tt, pred["difficulty"], pred)
70
+ tier = r.tier; diff = r.dynamic_difficulty
71
+ ps = TIER_STR[tier]**(diff*0.6)
72
+ initial_success = rng.random() < ps
73
+ lps = sim_logprobs(tier, diff, initial_success, rng)
74
+ signal = ef.analyze_output(lps, task_type=tt, current_tier=tier)
75
+ if signal.should_escalate and tier < 5:
76
+ final_tier = min(tier+1, 5)
77
+ final_tier = max(final_tier, TASK_FLOOR.get(tt,1))
78
+ ps2 = TIER_STR[final_tier]**(diff*0.6)
79
+ final_success = rng.random() < ps2
80
+ c = TIER_COST[tier] + TIER_COST[final_tier]
81
+ if final_success: succ += 1
82
+ else:
83
+ c = TIER_COST[tier]
84
+ if initial_success: succ += 1
85
+ cost += c
86
+ sr = succ/N; ac = cost/N
87
+ cr = (1-ac/frontier_cost)*100
88
+ gap = frontier_sr - sr
89
+ # Score: maximize success, minimize cost
90
+ score = sr*20 - ac*10
91
+ if score > best_score:
92
+ best_score = score
93
+ best_config = (ent_thr, lc_thr, sr, ac, cr, gap)
94
+ if ent_thr == 2.5 or ent_thr == 3.0:
95
+ print(f"{ent_thr:>12.1f} {lc_thr:>12.2f} {sr:>10.3f} {ac:>10.4f} {cr:>9.1f}% {gap:>10.3f}")
96
+
97
+ print(f"\n\nBest config: entropy_thr={best_config[0]}, low_conf_thr={best_config[1]}")
98
+ print(f" success={best_config[2]:.3f}, cost={best_config[3]:.4f}, costRed={best_config[4]:.1f}%, gap={best_config[5]:.3f}")