Upload aco/pareto.py
Browse files- aco/pareto.py +97 -0
aco/pareto.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Cost-Quality Pareto Frontier analysis.
|
| 2 |
+
|
| 3 |
+
Implements RouterBench NDCH (arxiv:2403.12031) and
|
| 4 |
+
RouteLLM CPT/APGR (arxiv:2406.18665) metrics.
|
| 5 |
+
"""
|
| 6 |
+
import numpy as np
|
| 7 |
+
from typing import Dict, List, Optional, Tuple
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def compute_ndch(points: np.ndarray) -> np.ndarray:
|
| 11 |
+
"""Non-Decreasing Convex Hull from RouterBench Sec 3.2.
|
| 12 |
+
|
| 13 |
+
Args:
|
| 14 |
+
points: Nx2 array of (cost, quality) pairs
|
| 15 |
+
Returns:
|
| 16 |
+
NDCH vertices sorted by cost
|
| 17 |
+
"""
|
| 18 |
+
from scipy.spatial import ConvexHull
|
| 19 |
+
if len(points) < 3:
|
| 20 |
+
idx = np.argsort(points[:, 0])
|
| 21 |
+
return points[idx]
|
| 22 |
+
hull = ConvexHull(points)
|
| 23 |
+
hull_pts = points[hull.vertices]
|
| 24 |
+
hull_pts = hull_pts[hull_pts[:, 0].argsort()]
|
| 25 |
+
ndch = [hull_pts[0]]
|
| 26 |
+
for pt in hull_pts[1:]:
|
| 27 |
+
if pt[1] >= ndch[-1][1]:
|
| 28 |
+
ndch.append(pt)
|
| 29 |
+
return np.array(ndch)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def cost_at_quality(ndch: np.ndarray, target_q: float) -> Optional[float]:
|
| 33 |
+
"""Find minimum cost on NDCH achieving target_quality."""
|
| 34 |
+
for i in range(len(ndch) - 1):
|
| 35 |
+
q1, q2 = ndch[i, 1], ndch[i+1, 1]
|
| 36 |
+
if q1 <= target_q <= q2:
|
| 37 |
+
c1, c2 = ndch[i, 0], ndch[i+1, 0]
|
| 38 |
+
return c1 + (c2 - c1) * (target_q - q1) / (q2 - q1)
|
| 39 |
+
return None
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def cost_savings_at_iso(ndch_opt, ndch_base, target_q):
|
| 43 |
+
"""Cost reduction at iso-quality: 1 - c_opt/c_base."""
|
| 44 |
+
c_opt = cost_at_quality(ndch_opt, target_q)
|
| 45 |
+
c_base = cost_at_quality(ndch_base, target_q)
|
| 46 |
+
if c_opt is None or c_base is None:
|
| 47 |
+
return None
|
| 48 |
+
return 1.0 - c_opt / c_base
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def compute_aiq(ndch, c_min, c_max):
|
| 52 |
+
"""Average Improvement in Quality (RouterBench)."""
|
| 53 |
+
costs = np.linspace(c_min, c_max, 100)
|
| 54 |
+
quals = np.interp(costs, ndch[:, 0], ndch[:, 1])
|
| 55 |
+
return float(np.trapz(quals, costs) / (c_max - c_min))
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def compute_apgr(ndch_router, ndch_random, c_max):
|
| 59 |
+
"""Average Performance Gap Recovery (RouteLLM).
|
| 60 |
+
APGR = integral of PGR over cost fraction [0,1].
|
| 61 |
+
PGR(c) = (q_router(c) - q_weak(c)) / (q_strong(c) - q_weak(c))
|
| 62 |
+
"""
|
| 63 |
+
costs = np.linspace(0, c_max, 100)
|
| 64 |
+
q_r = np.interp(costs, ndch_router[:, 0], ndch_router[:, 1])
|
| 65 |
+
q_rand = np.interp(costs, ndch_random[:, 0], ndch_random[:, 1])
|
| 66 |
+
q_weak = ndch_random[0, 1]
|
| 67 |
+
q_strong = ndch_random[-1, 1]
|
| 68 |
+
pgr = (q_r - q_weak) / max(q_strong - q_weak, 1e-6)
|
| 69 |
+
return float(np.trapz(pgr, costs) / c_max)
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def build_frontier_report(policies: Dict[str, Tuple[float, float]]):
|
| 73 |
+
"""Build full Pareto frontier report.
|
| 74 |
+
|
| 75 |
+
Args:
|
| 76 |
+
policies: {name: (success_rate, cost_per_task)}
|
| 77 |
+
"""
|
| 78 |
+
points = np.array([(c, q) for q, c in policies.values()])
|
| 79 |
+
names = list(policies.keys())
|
| 80 |
+
|
| 81 |
+
ndch = compute_ndch(points)
|
| 82 |
+
c_min, c_max = points[:, 0].min(), points[:, 0].max()
|
| 83 |
+
aiq = compute_aiq(ndch, c_min, c_max)
|
| 84 |
+
|
| 85 |
+
report = {
|
| 86 |
+
"policies": {n: {"success": q, "cost": c} for n, (q, c) in policies.items()},
|
| 87 |
+
"ndch_vertices": ndch.tolist(),
|
| 88 |
+
"aiq": round(aiq, 4),
|
| 89 |
+
"frontier_quality_levels": {},
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
for q_target in [0.65, 0.70, 0.75, 0.78, 0.80, 0.85, 0.87]:
|
| 93 |
+
c = cost_at_quality(ndch, q_target)
|
| 94 |
+
if c is not None:
|
| 95 |
+
report["frontier_quality_levels"][f"q={q_target:.2f}"] = round(c, 4)
|
| 96 |
+
|
| 97 |
+
return report
|