narcolepticchicken
/

agent-cost-optimizer

Safetensors

ml-intern

Model card Files Files and versions

xet

Community

narcolepticchicken commited on 3 days ago

Commit

33a5f28

verified ·

1 Parent(s): 7d60df1

Upload aco/verifier_budgeter.py

Browse files

Files changed (1) hide show

aco/verifier_budgeter.py +338 -0

aco/verifier_budgeter.py ADDED Viewed

	@@ -0,0 +1,338 @@

+"""Verifier Budgeter - Module 7.
+Do not call verifiers everywhere.
+Call verifiers when:
+- task is high-risk
+- confidence is low
+- retrieval evidence is weak
+- output is irreversible
+- prior failures exist
+- cheap model was used
+- final answer is likely to be hallucination-prone
+Compare:
+A. no verifier
+B. verifier on every output
+C. heuristic verifier
+D. learned verifier budgeter
+E. risk-weighted verifier
+"""
+from typing import Dict, List, Optional
+from dataclasses import dataclass
+from enum import Enum
+from .trace_schema import TaskType, Outcome
+from .config import ACOConfig, VerifierConfig
+class VerifierDecision(Enum):
+    CALL_VERIFIER = "call_verifier"
+    SKIP = "skip"
+    USE_LIGHT_VERIFIER = "use_light_verifier"
+    SELF_CHECK = "self_check"
+@dataclass
+class VerifierBudgetDecision:
+    decision: VerifierDecision
+    verifier_model_id: Optional[str]
+    reasoning: str
+    estimated_verifier_cost: float
+    estimated_value: float
+    confidence: float
+    checks: List[str]  # what aspects to verify
+class VerifierBudgeter:
+    """Selectively calls verifiers based on risk and value."""
+    # Risk thresholds for mandatory verification
+    HIGH_RISK_TASKS = {TaskType.LEGAL_REGULATED}
+    HALLUCINATION_PRONE_TASKS = {TaskType.RESEARCH, TaskType.DOCUMENT_DRAFTING}
+    # Confidence thresholds
+    LOW_CONFIDENCE_THRESHOLD = 0.6
+    MEDIUM_CONFIDENCE_THRESHOLD = 0.8
+    def __init__(self, config: Optional[ACOConfig] = None):
+        self.config = config or ACOConfig()
+        self.verifier_stats: Dict[str, Dict] = {}
+        self.decision_history: List[Dict] = []
+    def decide(
+        self,
+        task_type: TaskType,
+        model_tier_used: int,
+        confidence: float,
+        has_prior_failures: bool,
+        is_irreversible: bool,
+        output_length_tokens: int,
+        retrieval_evidence_count: int,
+        step_number: int,
+        total_steps: int,
+        mode: str = "risk_weighted",
+    ) -> VerifierBudgetDecision:
+        """Decide whether and how to verify an agent output."""
+        if mode == "no_verifier":
+            return VerifierBudgetDecision(
+                decision=VerifierDecision.SKIP,
+                verifier_model_id=None,
+                reasoning="No verifier mode",
+                estimated_verifier_cost=0.0,
+                estimated_value=0.0,
+                confidence=1.0,
+                checks=[],
+            )
+        if mode == "always":
+            verifier = list(self.config.verifiers.values())[0] if self.config.verifiers else None
+            return VerifierBudgetDecision(
+                decision=VerifierDecision.CALL_VERIFIER,
+                verifier_model_id=verifier.verifier_model_id if verifier else None,
+                reasoning="Verifier on every output",
+                estimated_verifier_cost=verifier.cost_per_call if verifier else 0.0,
+                estimated_value=0.5,
+                confidence=0.5,
+                checks=["all"],
+            )
+        if mode == "heuristic":
+            return self._heuristic_decide(
+                task_type, model_tier_used, confidence, has_prior_failures,
+                is_irreversible, output_length_tokens, retrieval_evidence_count,
+                step_number, total_steps,
+            )
+        if mode in ("learned", "risk_weighted"):
+            return self._risk_weighted_decide(
+                task_type, model_tier_used, confidence, has_prior_failures,
+                is_irreversible, output_length_tokens, retrieval_evidence_count,
+                step_number, total_steps,
+            )
+        return self._risk_weighted_decide(
+            task_type, model_tier_used, confidence, has_prior_failures,
+            is_irreversible, output_length_tokens, retrieval_evidence_count,
+            step_number, total_steps,
+        )
+    def _heuristic_decide(
+        self,
+        task_type: TaskType,
+        model_tier_used: int,
+        confidence: float,
+        has_prior_failures: bool,
+        is_irreversible: bool,
+        output_length_tokens: int,
+        retrieval_evidence_count: int,
+        step_number: int,
+        total_steps: int,
+    ) -> VerifierBudgetDecision:
+        """Simple heuristic-based verifier selection."""
+        should_verify = False
+        checks = []
+        if task_type in self.HIGH_RISK_TASKS:
+            should_verify = True
+            checks.append("legal_compliance")
+        if confidence < self.LOW_CONFIDENCE_THRESHOLD:
+            should_verify = True
+            checks.append("factual_accuracy")
+        if has_prior_failures:
+            should_verify = True
+            checks.append("failure_pattern")
+        if is_irreversible:
+            should_verify = True
+            checks.append("safety")
+        if model_tier_used <= 2 and task_type in (TaskType.CODING, TaskType.RESEARCH):
+            should_verify = True
+            checks.append("output_quality")
+        if retrieval_evidence_count == 0 and task_type in self.HALLUCINATION_PRONE_TASKS:
+            should_verify = True
+            checks.append("hallucination")
+        if not should_verify:
+            return VerifierBudgetDecision(
+                decision=VerifierDecision.SKIP,
+                verifier_model_id=None,
+                reasoning="No heuristic triggers met",
+                estimated_verifier_cost=0.0,
+                estimated_value=0.0,
+                confidence=confidence,
+                checks=[],
+            )
+        # Pick verifier
+        verifier = self._select_verifier(checks)
+        return VerifierBudgetDecision(
+            decision=VerifierDecision.CALL_VERIFIER,
+            verifier_model_id=verifier.verifier_model_id if verifier else None,
+            reasoning=f"Heuristic triggered: {', '.join(checks)}",
+            estimated_verifier_cost=verifier.cost_per_call if verifier else 0.0,
+            estimated_value=0.3 + (1.0 - confidence) * 0.5,
+            confidence=confidence,
+            checks=checks,
+        )
+    def _risk_weighted_decide(
+        self,
+        task_type: TaskType,
+        model_tier_used: int,
+        confidence: float,
+        has_prior_failures: bool,
+        is_irreversible: bool,
+        output_length_tokens: int,
+        retrieval_evidence_count: int,
+        step_number: int,
+        total_steps: int,
+    ) -> VerifierBudgetDecision:
+        """Risk-weighted verifier selection with value estimation."""
+        # Compute risk score
+        risk = 0.0
+        checks = []
+        # Task type risk
+        if task_type in self.HIGH_RISK_TASKS:
+            risk += 0.4
+            checks.append("legal_compliance")
+        elif task_type in {TaskType.CODING, TaskType.RESEARCH}:
+            risk += 0.15
+        # Confidence risk
+        risk += max(0.0, (self.MEDIUM_CONFIDENCE_THRESHOLD - confidence) * 0.5)
+        if confidence < self.LOW_CONFIDENCE_THRESHOLD:
+            checks.append("factual_accuracy")
+        # Model tier risk (cheap models are riskier)
+        risk += max(0.0, (3 - model_tier_used) * 0.05)
+        if model_tier_used <= 2 and task_type in (TaskType.CODING, TaskType.RESEARCH, TaskType.DOCUMENT_DRAFTING):
+            checks.append("output_quality")
+        # Prior failures
+        if has_prior_failures:
+            risk += 0.2
+            checks.append("failure_pattern")
+        # Irreversibility
+        if is_irreversible:
+            risk += 0.25
+            checks.append("safety")
+        # Evidence weakness
+        if retrieval_evidence_count == 0 and task_type in self.HALLUCINATION_PRONE_TASKS:
+            risk += 0.2
+            checks.append("hallucination")
+        # Output size (longer outputs are more error-prone)
+        if output_length_tokens > 2048:
+            risk += min(0.1, (output_length_tokens - 2048) / 50000)
+        # Step position (first and last steps are more critical)
+        if step_number == 1:
+            risk += 0.05
+        if step_number == total_steps and total_steps > 1:
+            risk += 0.1
+            checks.append("final_output")
+        risk = min(risk, 1.0)
+        # Cost-benefit analysis
+        verifier = self._select_verifier(checks)
+        verifier_cost = verifier.cost_per_call if verifier else 0.01
+        # Expected value of verification
+        # If risk is high, catching an error is very valuable
+        error_cost = self._estimate_error_cost(task_type, is_irreversible)
+        p_error = risk
+        p_catch = 0.7  # verifier catches error with 70% probability
+        expected_value = p_error * p_catch * error_cost - verifier_cost
+        if expected_value > 0 or risk > 0.6:
+            return VerifierBudgetDecision(
+                decision=VerifierDecision.CALL_VERIFIER,
+                verifier_model_id=verifier.verifier_model_id if verifier else None,
+                reasoning=f"Risk={risk:.2f}, expected_value={expected_value:.4f}, checks={checks}",
+                estimated_verifier_cost=verifier_cost,
+                estimated_value=expected_value,
+                confidence=confidence,
+                checks=list(set(checks)),
+            )
+        # For medium risk, use a lighter self-check
+        if risk > 0.3:
+            return VerifierBudgetDecision(
+                decision=VerifierDecision.SELF_CHECK,
+                verifier_model_id=None,
+                reasoning=f"Medium risk ({risk:.2f}) — use lightweight self-check instead of full verifier",
+                estimated_verifier_cost=0.0,
+                estimated_value=p_error * 0.3 * error_cost,
+                confidence=confidence,
+                checks=["self_consistency"],
+            )
+        return VerifierBudgetDecision(
+            decision=VerifierDecision.SKIP,
+            verifier_model_id=None,
+            reasoning=f"Low risk ({risk:.2f}), expected_value={expected_value:.4f} — skip verification",
+            estimated_verifier_cost=0.0,
+            estimated_value=0.0,
+            confidence=confidence,
+            checks=[],
+        )
+    def _select_verifier(self, checks: List[str]) -> Optional[VerifierConfig]:
+        """Select appropriate verifier based on checks needed."""
+        if not self.config.verifiers:
+            return None
+        # For now, return the first verifier
+        # In production, map check types to specialist verifiers
+        return list(self.config.verifiers.values())[0]
+    def _estimate_error_cost(self, task_type: TaskType, is_irreversible: bool) -> float:
+        """Estimate the cost of an undetected error."""
+        base = 1.0
+        if task_type == TaskType.LEGAL_REGULATED:
+            base = 10.0
+        elif task_type == TaskType.CODING:
+            base = 3.0
+        elif task_type == TaskType.RESEARCH:
+            base = 2.0
+        if is_irreversible:
+            base *= 2.0
+        return base
+    def record_verifier_outcome(
+        self,
+        verifier_model_id: str,
+        target_step: str,
+        passed: bool,
+        cost: float,
+        was_actually_correct: bool,
+    ) -> None:
+        """Record verifier performance for calibration."""
+        stats = self.verifier_stats.setdefault(verifier_model_id, {
+            "calls": 0, "passed": 0, "true_positives": 0, "false_positives": 0,
+            "false_negatives": 0, "total_cost": 0.0,
+        })
+        stats["calls"] += 1
+        if passed:
+            stats["passed"] += 1
+        if passed and was_actually_correct:
+            stats["true_positives"] += 1
+        if passed and not was_actually_correct:
+            stats["false_positives"] += 1
+        if not passed and was_actually_correct:
+            stats["false_negatives"] += 1
+        stats["total_cost"] += cost