Upload training/tune_feedback.py with huggingface_hub

9234215 verified about 15 hours ago

4.19 kB

	#!/usr/bin/env python3
	"""Tune execution-feedback thresholds for optimal cost-quality tradeoff."""
	import sys,json,random,math,pickle
	sys.path.insert(0,"/app")
	from collections import defaultdict
	from aco.classifier import TaskCostClassifier
	from aco.router import ModelCascadeRouter
	from aco.execution_feedback import ExecutionFeedbackRouter

	TIER_STR={1:0.35,2:0.55,3:0.80,4:0.93,5:0.97}
	TIER_COST={1:0.05,2:0.15,3:0.75,4:1.0,5:1.5}
	TASK_FLOOR={"legal_regulated":4,"long_horizon":3,"research":3,"coding":3,
	"unknown_ambiguous":3,"quick_answer":1,"document_drafting":2,
	"tool_heavy":2,"retrieval_heavy":2}

	TASKS = {
	"quick_answer":["What is 2+2?","Explain quantum computing briefly.","Convert 100F to Celsius."],
	"coding":["Write a Python function to reverse a linked list.","Fix a typo in the README.",
	"Debug this critical production segfault NOW.","Just fix the typo in line 42."],
	"research":["Research latest transformer advances.","Find sources comparing LoRA and full FT briefly."],
	"document_drafting":["Draft project proposal for ML pipeline."],
	"legal_regulated":["Review this contract for liability clauses.","Check GDPR compliance."],
	"tool_heavy":["Search open issues and create summary."],
	"retrieval_heavy":["Answer based on 50-page document."],
	"long_horizon":["Plan 3-month roadmap.","Orchestrate multi-region deployment."],
	"unknown_ambiguous":["Help me with this thing."],
	}

	classifier = TaskCostClassifier()
	router = ModelCascadeRouter(model_path="/app/router_models/router_bundle_v8.pkl")

	rng = random.Random(42)
	N = 2000

	def sim_logprobs(tier, diff, success, rng):
	n = rng.randint(20, 150)
	base = {1:-3.5,2:-2.5,3:-1.5,4:-0.7,5:-0.3}[tier]
	base = (1 + diff 0.15)
	lps = []
	for _ in range(n):
	noise = rng.gauss(0, 1.0 + diff*0.3)
	lps.append(base + noise * (0.3 if success else 0.8))
	return lps

	# Sweep thresholds
	print("="*80)
	print("FEEDBACK THRESHOLD SWEEP")
	print("="*80)
	print(f"\n{'EntropyThr':>12} {'LowConfThr':>12} {'Success':>10} {'AvgCost':>10} {'CostRed':>10} {'Gap':>10}")
	print("-"*65)

	frontier_sr = 0.901
	frontier_cost = 1.0

	best_score = -999
	best_config = None

	for ent_thr in [1.5, 2.0, 2.5, 3.0, 3.5, 4.0]:
	for lc_thr in [0.05, 0.10, 0.15, 0.20, 0.25, 0.30]:
	ef = ExecutionFeedbackRouter(entropy_threshold=ent_thr,
	low_conf_ratio_threshold=lc_thr, tier_costs=TIER_COST,
	task_floors=TASK_FLOOR)
	rng.seed(42)
	succ = 0; cost = 0.0
	for i in range(N):
	tt = rng.choice(list(TASKS.keys()))
	req = rng.choice(TASKS[tt])
	pred = classifier.classify(req)
	r = router.route(req, tt, pred["difficulty"], pred)
	tier = r.tier; diff = r.dynamic_difficulty
	ps = TIER_STR[tier]*(diff0.6)
	initial_success = rng.random() < ps
	lps = sim_logprobs(tier, diff, initial_success, rng)
	signal = ef.analyze_output(lps, task_type=tt, current_tier=tier)
	if signal.should_escalate and tier < 5:
	final_tier = min(tier+1, 5)
	final_tier = max(final_tier, TASK_FLOOR.get(tt,1))
	ps2 = TIER_STR[final_tier]*(diff0.6)
	final_success = rng.random() < ps2
	c = TIER_COST[tier] + TIER_COST[final_tier]
	if final_success: succ += 1
	else:
	c = TIER_COST[tier]
	if initial_success: succ += 1
	cost += c
	sr = succ/N; ac = cost/N
	cr = (1-ac/frontier_cost)*100
	gap = frontier_sr - sr
	# Score: maximize success, minimize cost
	score = sr20 - ac10
	if score > best_score:
	best_score = score
	best_config = (ent_thr, lc_thr, sr, ac, cr, gap)
	if ent_thr == 2.5 or ent_thr == 3.0:
	print(f"{ent_thr:>12.1f} {lc_thr:>12.2f} {sr:>10.3f} {ac:>10.4f} {cr:>9.1f}% {gap:>10.3f}")

	print(f"\n\nBest config: entropy_thr={best_config[0]}, low_conf_thr={best_config[1]}")
	print(f" success={best_config[2]:.3f}, cost={best_config[3]:.4f}, costRed={best_config[4]:.1f}%, gap={best_config[5]:.3f}")