narcolepticchicken commited on
Commit
ee673f9
·
verified ·
1 Parent(s): f575279

Upload aco/pareto.py

Browse files
Files changed (1) hide show
  1. aco/pareto.py +97 -0
aco/pareto.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Cost-Quality Pareto Frontier analysis.
2
+
3
+ Implements RouterBench NDCH (arxiv:2403.12031) and
4
+ RouteLLM CPT/APGR (arxiv:2406.18665) metrics.
5
+ """
6
+ import numpy as np
7
+ from typing import Dict, List, Optional, Tuple
8
+
9
+
10
+ def compute_ndch(points: np.ndarray) -> np.ndarray:
11
+ """Non-Decreasing Convex Hull from RouterBench Sec 3.2.
12
+
13
+ Args:
14
+ points: Nx2 array of (cost, quality) pairs
15
+ Returns:
16
+ NDCH vertices sorted by cost
17
+ """
18
+ from scipy.spatial import ConvexHull
19
+ if len(points) < 3:
20
+ idx = np.argsort(points[:, 0])
21
+ return points[idx]
22
+ hull = ConvexHull(points)
23
+ hull_pts = points[hull.vertices]
24
+ hull_pts = hull_pts[hull_pts[:, 0].argsort()]
25
+ ndch = [hull_pts[0]]
26
+ for pt in hull_pts[1:]:
27
+ if pt[1] >= ndch[-1][1]:
28
+ ndch.append(pt)
29
+ return np.array(ndch)
30
+
31
+
32
+ def cost_at_quality(ndch: np.ndarray, target_q: float) -> Optional[float]:
33
+ """Find minimum cost on NDCH achieving target_quality."""
34
+ for i in range(len(ndch) - 1):
35
+ q1, q2 = ndch[i, 1], ndch[i+1, 1]
36
+ if q1 <= target_q <= q2:
37
+ c1, c2 = ndch[i, 0], ndch[i+1, 0]
38
+ return c1 + (c2 - c1) * (target_q - q1) / (q2 - q1)
39
+ return None
40
+
41
+
42
+ def cost_savings_at_iso(ndch_opt, ndch_base, target_q):
43
+ """Cost reduction at iso-quality: 1 - c_opt/c_base."""
44
+ c_opt = cost_at_quality(ndch_opt, target_q)
45
+ c_base = cost_at_quality(ndch_base, target_q)
46
+ if c_opt is None or c_base is None:
47
+ return None
48
+ return 1.0 - c_opt / c_base
49
+
50
+
51
+ def compute_aiq(ndch, c_min, c_max):
52
+ """Average Improvement in Quality (RouterBench)."""
53
+ costs = np.linspace(c_min, c_max, 100)
54
+ quals = np.interp(costs, ndch[:, 0], ndch[:, 1])
55
+ return float(np.trapz(quals, costs) / (c_max - c_min))
56
+
57
+
58
+ def compute_apgr(ndch_router, ndch_random, c_max):
59
+ """Average Performance Gap Recovery (RouteLLM).
60
+ APGR = integral of PGR over cost fraction [0,1].
61
+ PGR(c) = (q_router(c) - q_weak(c)) / (q_strong(c) - q_weak(c))
62
+ """
63
+ costs = np.linspace(0, c_max, 100)
64
+ q_r = np.interp(costs, ndch_router[:, 0], ndch_router[:, 1])
65
+ q_rand = np.interp(costs, ndch_random[:, 0], ndch_random[:, 1])
66
+ q_weak = ndch_random[0, 1]
67
+ q_strong = ndch_random[-1, 1]
68
+ pgr = (q_r - q_weak) / max(q_strong - q_weak, 1e-6)
69
+ return float(np.trapz(pgr, costs) / c_max)
70
+
71
+
72
+ def build_frontier_report(policies: Dict[str, Tuple[float, float]]):
73
+ """Build full Pareto frontier report.
74
+
75
+ Args:
76
+ policies: {name: (success_rate, cost_per_task)}
77
+ """
78
+ points = np.array([(c, q) for q, c in policies.values()])
79
+ names = list(policies.keys())
80
+
81
+ ndch = compute_ndch(points)
82
+ c_min, c_max = points[:, 0].min(), points[:, 0].max()
83
+ aiq = compute_aiq(ndch, c_min, c_max)
84
+
85
+ report = {
86
+ "policies": {n: {"success": q, "cost": c} for n, (q, c) in policies.items()},
87
+ "ndch_vertices": ndch.tolist(),
88
+ "aiq": round(aiq, 4),
89
+ "frontier_quality_levels": {},
90
+ }
91
+
92
+ for q_target in [0.65, 0.70, 0.75, 0.78, 0.80, 0.85, 0.87]:
93
+ c = cost_at_quality(ndch, q_target)
94
+ if c is not None:
95
+ report["frontier_quality_levels"][f"q={q_target:.2f}"] = round(c, 4)
96
+
97
+ return report