narcolepticchicken
/

agent-cost-optimizer

Safetensors

Model card Files Files and versions

xet

Community

narcolepticchicken commited on about 16 hours ago

Commit

5d30266

verified ·

1 Parent(s): a7e3035

Upload aco/verifier_budgeter.py with huggingface_hub

Browse files

Files changed (1) hide show

aco/verifier_budgeter.py +66 -330

aco/verifier_budgeter.py CHANGED Viewed

@@ -1,338 +1,74 @@
-"""Verifier Budgeter - Module 7.
-Do not call verifiers everywhere.
-Call verifiers when:
-- task is high-risk
-- confidence is low
-- retrieval evidence is weak
-- output is irreversible
-- prior failures exist
-- cheap model was used
-- final answer is likely to be hallucination-prone
-Compare:
-A. no verifier
-B. verifier on every output
-C. heuristic verifier
-D. learned verifier budgeter
-E. risk-weighted verifier
-"""
-from typing import Dict, List, Optional
 from dataclasses import dataclass
-from enum import Enum
-from .trace_schema import TaskType, Outcome
-from .config import ACOConfig, VerifierConfig
-class VerifierDecision(Enum):
-    CALL_VERIFIER = "call_verifier"
-    SKIP = "skip"
-    USE_LIGHT_VERIFIER = "use_light_verifier"
-    SELF_CHECK = "self_check"
 @dataclass
-class VerifierBudgetDecision:
-    decision: VerifierDecision
-    verifier_model_id: Optional[str]
-    reasoning: str
-    estimated_verifier_cost: float
-    estimated_value: float
     confidence: float
-    checks: List[str]  # what aspects to verify
 class VerifierBudgeter:
-    """Selectively calls verifiers based on risk and value."""
-    # Risk thresholds for mandatory verification
-    HIGH_RISK_TASKS = {TaskType.LEGAL_REGULATED}
-    HALLUCINATION_PRONE_TASKS = {TaskType.RESEARCH, TaskType.DOCUMENT_DRAFTING}
-    # Confidence thresholds
-    LOW_CONFIDENCE_THRESHOLD = 0.6
-    MEDIUM_CONFIDENCE_THRESHOLD = 0.8
-    def __init__(self, config: Optional[ACOConfig] = None):
-        self.config = config or ACOConfig()
-        self.verifier_stats: Dict[str, Dict] = {}
-        self.decision_history: List[Dict] = []
-    def decide(
-        self,
-        task_type: TaskType,
-        model_tier_used: int,
-        confidence: float,
-        has_prior_failures: bool,
-        is_irreversible: bool,
-        output_length_tokens: int,
-        retrieval_evidence_count: int,
-        step_number: int,
-        total_steps: int,
-        mode: str = "risk_weighted",
-    ) -> VerifierBudgetDecision:
-        """Decide whether and how to verify an agent output."""
-        if mode == "no_verifier":
-            return VerifierBudgetDecision(
-                decision=VerifierDecision.SKIP,
-                verifier_model_id=None,
-                reasoning="No verifier mode",
-                estimated_verifier_cost=0.0,
-                estimated_value=0.0,
-                confidence=1.0,
-                checks=[],
-            )
-        if mode == "always":
-            verifier = list(self.config.verifiers.values())[0] if self.config.verifiers else None
-            return VerifierBudgetDecision(
-                decision=VerifierDecision.CALL_VERIFIER,
-                verifier_model_id=verifier.verifier_model_id if verifier else None,
-                reasoning="Verifier on every output",
-                estimated_verifier_cost=verifier.cost_per_call if verifier else 0.0,
-                estimated_value=0.5,
-                confidence=0.5,
-                checks=["all"],
-            )
-        if mode == "heuristic":
-            return self._heuristic_decide(
-                task_type, model_tier_used, confidence, has_prior_failures,
-                is_irreversible, output_length_tokens, retrieval_evidence_count,
-                step_number, total_steps,
-            )
-        if mode in ("learned", "risk_weighted"):
-            return self._risk_weighted_decide(
-                task_type, model_tier_used, confidence, has_prior_failures,
-                is_irreversible, output_length_tokens, retrieval_evidence_count,
-                step_number, total_steps,
-            )
-        return self._risk_weighted_decide(
-            task_type, model_tier_used, confidence, has_prior_failures,
-            is_irreversible, output_length_tokens, retrieval_evidence_count,
-            step_number, total_steps,
-        )
-    def _heuristic_decide(
-        self,
-        task_type: TaskType,
-        model_tier_used: int,
-        confidence: float,
-        has_prior_failures: bool,
-        is_irreversible: bool,
-        output_length_tokens: int,
-        retrieval_evidence_count: int,
-        step_number: int,
-        total_steps: int,
-    ) -> VerifierBudgetDecision:
-        """Simple heuristic-based verifier selection."""
-        should_verify = False
-        checks = []
-        if task_type in self.HIGH_RISK_TASKS:
-            should_verify = True
-            checks.append("legal_compliance")
-        if confidence < self.LOW_CONFIDENCE_THRESHOLD:
-            should_verify = True
-            checks.append("factual_accuracy")
         if has_prior_failures:
-            should_verify = True
-            checks.append("failure_pattern")
-        if is_irreversible:
-            should_verify = True
-            checks.append("safety")
-        if model_tier_used <= 2 and task_type in (TaskType.CODING, TaskType.RESEARCH):
-            should_verify = True
-            checks.append("output_quality")
-        if retrieval_evidence_count == 0 and task_type in self.HALLUCINATION_PRONE_TASKS:
-            should_verify = True
-            checks.append("hallucination")
-        if not should_verify:
-            return VerifierBudgetDecision(
-                decision=VerifierDecision.SKIP,
-                verifier_model_id=None,
-                reasoning="No heuristic triggers met",
-                estimated_verifier_cost=0.0,
-                estimated_value=0.0,
-                confidence=confidence,
-                checks=[],
-            )
-        # Pick verifier
-        verifier = self._select_verifier(checks)
-        return VerifierBudgetDecision(
-            decision=VerifierDecision.CALL_VERIFIER,
-            verifier_model_id=verifier.verifier_model_id if verifier else None,
-            reasoning=f"Heuristic triggered: {', '.join(checks)}",
-            estimated_verifier_cost=verifier.cost_per_call if verifier else 0.0,
-            estimated_value=0.3 + (1.0 - confidence) * 0.5,
-            confidence=confidence,
-            checks=checks,
-        )
-    def _risk_weighted_decide(
-        self,
-        task_type: TaskType,
-        model_tier_used: int,
-        confidence: float,
-        has_prior_failures: bool,
-        is_irreversible: bool,
-        output_length_tokens: int,
-        retrieval_evidence_count: int,
-        step_number: int,
-        total_steps: int,
-    ) -> VerifierBudgetDecision:
-        """Risk-weighted verifier selection with value estimation."""
-        # Compute risk score
-        risk = 0.0
-        checks = []
-        # Task type risk
-        if task_type in self.HIGH_RISK_TASKS:
-            risk += 0.4
-            checks.append("legal_compliance")
-        elif task_type in {TaskType.CODING, TaskType.RESEARCH}:
-            risk += 0.15
-        # Confidence risk
-        risk += max(0.0, (self.MEDIUM_CONFIDENCE_THRESHOLD - confidence) * 0.5)
-        if confidence < self.LOW_CONFIDENCE_THRESHOLD:
-            checks.append("factual_accuracy")
-        # Model tier risk (cheap models are riskier)
-        risk += max(0.0, (3 - model_tier_used) * 0.05)
-        if model_tier_used <= 2 and task_type in (TaskType.CODING, TaskType.RESEARCH, TaskType.DOCUMENT_DRAFTING):
-            checks.append("output_quality")
-        # Prior failures
-        if has_prior_failures:
-            risk += 0.2
-            checks.append("failure_pattern")
-        # Irreversibility
-        if is_irreversible:
-            risk += 0.25
-            checks.append("safety")
-        # Evidence weakness
-        if retrieval_evidence_count == 0 and task_type in self.HALLUCINATION_PRONE_TASKS:
-            risk += 0.2
-            checks.append("hallucination")
-        # Output size (longer outputs are more error-prone)
-        if output_length_tokens > 2048:
-            risk += min(0.1, (output_length_tokens - 2048) / 50000)
-        # Step position (first and last steps are more critical)
-        if step_number == 1:
-            risk += 0.05
-        if step_number == total_steps and total_steps > 1:
-            risk += 0.1
-            checks.append("final_output")
-        risk = min(risk, 1.0)
-        # Cost-benefit analysis
-        verifier = self._select_verifier(checks)
-        verifier_cost = verifier.cost_per_call if verifier else 0.01
-        # Expected value of verification
-        # If risk is high, catching an error is very valuable
-        error_cost = self._estimate_error_cost(task_type, is_irreversible)
-        p_error = risk
-        p_catch = 0.7  # verifier catches error with 70% probability
-        expected_value = p_error * p_catch * error_cost - verifier_cost
-        if expected_value > 0 or risk > 0.6:
-            return VerifierBudgetDecision(
-                decision=VerifierDecision.CALL_VERIFIER,
-                verifier_model_id=verifier.verifier_model_id if verifier else None,
-                reasoning=f"Risk={risk:.2f}, expected_value={expected_value:.4f}, checks={checks}",
-                estimated_verifier_cost=verifier_cost,
-                estimated_value=expected_value,
-                confidence=confidence,
-                checks=list(set(checks)),
-            )
-        # For medium risk, use a lighter self-check
-        if risk > 0.3:
-            return VerifierBudgetDecision(
-                decision=VerifierDecision.SELF_CHECK,
-                verifier_model_id=None,
-                reasoning=f"Medium risk ({risk:.2f}) — use lightweight self-check instead of full verifier",
-                estimated_verifier_cost=0.0,
-                estimated_value=p_error * 0.3 * error_cost,
-                confidence=confidence,
-                checks=["self_consistency"],
-            )
-        return VerifierBudgetDecision(
-            decision=VerifierDecision.SKIP,
-            verifier_model_id=None,
-            reasoning=f"Low risk ({risk:.2f}), expected_value={expected_value:.4f} — skip verification",
-            estimated_verifier_cost=0.0,
-            estimated_value=0.0,
-            confidence=confidence,
-            checks=[],
-        )
-    def _select_verifier(self, checks: List[str]) -> Optional[VerifierConfig]:
-        """Select appropriate verifier based on checks needed."""
-        if not self.config.verifiers:
-            return None
-        # For now, return the first verifier
-        # In production, map check types to specialist verifiers
-        return list(self.config.verifiers.values())[0]
-    def _estimate_error_cost(self, task_type: TaskType, is_irreversible: bool) -> float:
-        """Estimate the cost of an undetected error."""
-        base = 1.0
-        if task_type == TaskType.LEGAL_REGULATED:
-            base = 10.0
-        elif task_type == TaskType.CODING:
-            base = 3.0
-        elif task_type == TaskType.RESEARCH:
-            base = 2.0
-        if is_irreversible:
-            base *= 2.0
-        return base
-    def record_verifier_outcome(
-        self,
-        verifier_model_id: str,
-        target_step: str,
-        passed: bool,
-        cost: float,
-        was_actually_correct: bool,
-    ) -> None:
-        """Record verifier performance for calibration."""
-        stats = self.verifier_stats.setdefault(verifier_model_id, {
-            "calls": 0, "passed": 0, "true_positives": 0, "false_positives": 0,
-            "false_negatives": 0, "total_cost": 0.0,
-        })
-        stats["calls"] += 1
-        if passed:
-            stats["passed"] += 1
-        if passed and was_actually_correct:
-            stats["true_positives"] += 1
-        if passed and not was_actually_correct:
-            stats["false_positives"] += 1
-        if not passed and was_actually_correct:
-            stats["false_negatives"] += 1
-        stats["total_cost"] += cost

+"""Verifier Budgeter: Selective verification for high-risk outputs only."""
+from typing import Dict, Optional, Tuple
 from dataclasses import dataclass
 @dataclass
+class VerifierDecision:
+    should_verify: bool
+    verifier_type: str  # "full", "spot_check", "skip"
     confidence: float
+    reasoning: str
+    estimated_cost: float
+RISK_VERIFIER_MAP = {
+    "critical": {"min_confidence": 0.95, "verifier_type": "full"},
+    "high": {"min_confidence": 0.85, "verifier_type": "full"},
+    "medium": {"min_confidence": 0.70, "verifier_type": "spot_check"},
+    "low": {"min_confidence": 0.50, "verifier_type": "spot_check"},
+}
 class VerifierBudgeter:
+    def __init__(self, verifier_cost: float = 0.02, max_verifications_per_run: int = 3):
+        self.verifier_cost = verifier_cost
+        self.max_per_run = max_verifications_per_run
+        self.verifications_this_run = 0
+        self.stats = {"verified":0,"skipped":0,"spot_checked":0,"false_passes":0,"false_rejects":0}
+    def should_verify(self, task_type: str, risk: str, model_confidence: float,
+                      is_irreversible: bool = False, has_prior_failures: bool = False,
+                      model_tier: int = 4, step_num: int = 0,
+                      total_steps: int = 1) -> VerifierDecision:
+        if self.verifications_this_run >= self.max_per_run:
+            self.stats["skipped"] += 1
+            return VerifierDecision(False, "skip", model_confidence,
+                                   "max verifications reached", 0.0)
+        # Check if this is the final answer
+        is_final = (step_num == total_steps or step_num == -1)
+        # Risk-based threshold
+        risk_config = RISK_VERIFIER_MAP.get(risk, RISK_VERIFIER_MAP["medium"])
+        min_conf = risk_config["min_confidence"]
+        default_type = risk_config["verifier_type"]
+        # Adjust for context
+        should = False
+        reasons = []
+        if model_confidence < min_conf:
+            should = True
+            reasons.append(f"low confidence ({model_confidence:.2f} < {min_conf})")
+        if is_irreversible and risk in ("high", "critical"):
+            should = True
+            reasons.append("irreversible + high risk")
         if has_prior_failures:
+            should = True
+            reasons.append("prior failures exist")
+        if model_tier <= 2 and risk in ("high", "critical"):
+            should = True
+            reasons.append("cheap model on high-risk task")
+        if is_final and risk in ("high", "critical"):
+            should = True
+            reasons.append("final answer on high-risk task")
+        if not should:
+            self.stats["skipped"] += 1
+            return VerifierDecision(False, "skip", model_confidence,
+                                   "no verification needed", 0.0)
+        self.verifications_this_run += 1
+        vtype = default_type
+        if model_confidence > min_conf and not is_irreversible:
+            vtype = "spot_check"
+            self.stats["spot_checked"] += 1
+        else:
+            self.stats["verified"] += 1
+        return VerifierDecision(True, vtype, model_confidence,
+                               "; ".join(reasons), self.verifier_cost)
+    def reset_run(self):
+        self.verifications_this_run = 0