narcolepticchicken
/

agent-cost-optimizer

Safetensors

Model card Files Files and versions

xet

Community

narcolepticchicken commited on about 5 hours ago

Commit

91c6791

verified ·

1 Parent(s): 911104d

Upload aco/conformal.py

Browse files

Files changed (1) hide show

aco/conformal.py +112 -0

aco/conformal.py ADDED Viewed

	@@ -0,0 +1,112 @@

+"""Conformal calibration for escalation thresholds.
+Implements RouteNLP-style conformal risk control:
+  P(failure AND no escalation) <= alpha
+Method:
+1. Compute nonconformity scores from calibrated P(success)
+2. Find conformal quantile threshold
+3. Guarantee coverage under exchangeability
+"""
+import numpy as np
+from typing import Dict, List, Optional, Tuple
+class ConformalEscalationCalibrator:
+    """Calibrate escalation thresholds with distribution-free coverage guarantees.
+    Based on RouteNLP (arxiv 2604.23577) and Conformal Risk Control
+    (Angelopoulos et al., arxiv 2208.02814).
+    Guarantee: P(y=fail AND no_escalation) <= alpha
+    """
+    def __init__(self, alpha: float = 0.05):
+        self.alpha = alpha
+        self.thresholds: Dict[int, float] = {}
+        self.calibrated = False
+    def calibrate(
+        self,
+        psuccess: Dict[int, np.ndarray],
+        outcomes: Dict[int, np.ndarray],
+    ) -> Dict[int, float]:
+        """Calibrate per-tier escalation thresholds.
+        Args:
+            psuccess: {tier: array of calibrated P(success)}
+            outcomes: {tier: array of binary outcomes (1=success, 0=fail)}
+        Returns:
+            {tier: conformal_threshold}
+        """
+        for tier in sorted(psuccess.keys()):
+            p = psuccess[tier]
+            y = outcomes[tier]
+            n = len(y)
+            # Nonconformity: 1 - P(success) for failed examples
+            # These are the scores we want to bound
+            failed_mask = y == 0
+            if failed_mask.sum() == 0:
+                self.thresholds[tier] = 1.0
+                continue
+            # Conformal risk control: find threshold lam such that
+            # R_hat(lam) <= alpha, where R_hat = (1/n) * sum 1[p >= lam AND y=0]
+            # This means: fraction of examples with P(success) >= lam that actually failed <= alpha
+            # Sort P(success) values
+            sorted_p = np.sort(p[failed_mask])
+            # Conformal quantile: ceiling of (1-alpha)*(n+1)/n
+            q = int(np.ceil((1 - self.alpha) * (n + 1) / n))
+            q = min(q, len(sorted_p))
+            # Threshold: if P(success) < this, escalate
+            # We want the (1-alpha) quantile of failure nonconformity scores
+            threshold = sorted_p[q - 1] if q > 0 else 0.0
+            self.thresholds[tier] = float(threshold)
+        self.calibrated = True
+        return self.thresholds
+    def should_escalate(self, tier: int, psuccess: float) -> bool:
+        """Decide whether to escalate from this tier.
+        Returns True if P(success) is below conformal threshold,
+        meaning we can't guarantee success at this tier with 1-alpha coverage.
+        """
+        if not self.calibrated:
+            return psuccess < 0.65  # fallback to heuristic
+        threshold = self.thresholds.get(tier, 0.65)
+        return psuccess < threshold
+    def coverage_check(
+        self,
+        psuccess: Dict[int, np.ndarray],
+        outcomes: Dict[int, np.ndarray],
+    ) -> Dict[int, Dict[str, float]]:
+        """Verify conformal coverage on test data."""
+        results = {}
+        for tier in sorted(psuccess.keys()):
+            p = psuccess[tier]
+            y = outcomes[tier]
+            threshold = self.thresholds.get(tier, 0.65)
+            no_escalate = p >= threshold
+            failed_no_escalate = (y == 0) & no_escalate
+            n_no_escalate = no_escalate.sum()
+            violation_rate = failed_no_escalate.sum() / max(n_no_escalate, 1)
+            escalation_rate = 1 - no_escalate.mean()
+            results[tier] = {
+                "violation_rate": float(violation_rate),
+                "escalation_rate": float(escalation_rate),
+                "threshold": float(threshold),
+                "n_no_escalate": int(n_no_escalate),
+                "n_violations": int(failed_no_escalate.sum()),
+                "covered": violation_rate <= self.alpha,
+            }
+        return results