narcolepticchicken
/

agent-cost-optimizer

Safetensors

Model card Files Files and versions

xet

Community

narcolepticchicken commited on 1 day ago

Commit

7d60df1

verified ·

1 Parent(s): 07f0bb4

Upload aco/tool_gate.py

Browse files

Files changed (1) hide show

aco/tool_gate.py +260 -0

aco/tool_gate.py ADDED Viewed

	@@ -0,0 +1,260 @@

+"""Tool-Use Cost Gate - Module 6.
+Predicts whether a tool call is worth the cost.
+Decisions:
+- use tool
+- skip tool
+- batch tool calls
+- run in parallel
+- use cheaper tool
+- use cached result
+- ask user instead
+- escalate
+Tracks:
+- unnecessary tool calls
+- missed tool calls
+- failed tool calls
+- tool result ignored
+- repeated tool calls
+- tool cost
+- tool latency
+"""
+from typing import Dict, List, Tuple, Optional
+from dataclasses import dataclass
+from enum import Enum
+from .trace_schema import TaskType, ToolCall
+from .config import ACOConfig, ToolConfig
+class ToolDecision(Enum):
+    USE = "use"
+    SKIP = "skip"
+    BATCH = "batch"
+    PARALLEL = "parallel"
+    USE_CHEAPER = "use_cheaper"
+    USE_CACHE = "use_cache"
+    ASK_USER = "ask_user"
+    ESCALATE = "escalate"
+@dataclass
+class ToolGateDecision:
+    decision: ToolDecision
+    tool_name: str
+    reasoning: str
+    estimated_cost: float
+    estimated_benefit: float  # 0-1 probability of useful result
+    confidence: float
+    alternative_tool: Optional[str] = None
+    batched_with: Optional[List[str]] = None
+class ToolUseCostGate:
+    """Gates tool calls based on predicted value vs. cost."""
+    # Tool necessity by task type (probability that task needs this tool class)
+    TOOL_NECESSITY = {
+        TaskType.QUICK_ANSWER: {
+            "search": 0.3,
+            "retrieve": 0.1,
+            "calculator": 0.2,
+        },
+        TaskType.CODING: {
+            "code_execution": 0.8,
+            "linter": 0.6,
+            "test_runner": 0.7,
+            "file_read": 0.9,
+            "file_write": 0.5,
+            "search": 0.4,
+        },
+        TaskType.RESEARCH: {
+            "search": 0.95,
+            "retrieve": 0.9,
+            "fetch": 0.7,
+            "summarize": 0.8,
+        },
+        TaskType.LEGAL_REGULATED: {
+            "document_retrieval": 0.95,
+            "compliance_check": 0.9,
+            "search": 0.6,
+        },
+        TaskType.TOOL_HEAVY: {
+            "search": 0.7,
+            "fetch": 0.6,
+            "api_call": 0.8,
+            "database_query": 0.7,
+        },
+        TaskType.RETRIEVAL_HEAVY: {
+            "retrieve": 0.95,
+            "search": 0.8,
+            "fetch": 0.7,
+        },
+        TaskType.LONG_HORIZON: {
+            "task_planner": 0.7,
+            "progress_tracker": 0.5,
+            "file_read": 0.6,
+            "search": 0.4,
+        },
+    }
+    # Cost-benefit threshold
+    MIN_BENEFIT_COST_RATIO = 2.0  # benefit must be > 2x cost to call
+    def __init__(self, config: Optional[ACOConfig] = None):
+        self.config = config or ACOConfig()
+        self.tool_stats: Dict[str, Dict] = {}  # tool_name -> stats
+        self.decision_history: List[Dict] = []
+    def decide(
+        self,
+        tool_name: str,
+        task_type: TaskType,
+        tool_input: Dict,
+        previous_tool_calls: Optional[List[ToolCall]] = None,
+        current_cost_so_far: float = 0.0,
+        predicted_task_cost: float = 0.1,
+    ) -> ToolGateDecision:
+        """Decide whether to make a tool call."""
+        tool_cfg = self.config.tools.get(tool_name, ToolConfig(tool_name=tool_name))
+        tool_cost = tool_cfg.cost_per_call
+        tool_latency = tool_cfg.latency_ms_estimate
+        previous = previous_tool_calls or []
+        # Check for repeated identical calls
+        if self._is_repeated(tool_name, tool_input, previous):
+            return ToolGateDecision(
+                decision=ToolDecision.USE_CACHE,
+                tool_name=tool_name,
+                reasoning="Repeated identical tool call detected — use cached result",
+                estimated_cost=0.0,
+                estimated_benefit=0.9,
+                confidence=0.95,
+            )
+        # Check if tool result was ignored in previous steps
+        if previous and self._was_ignored(tool_name, previous):
+            # If tool results are consistently ignored, skip
+            return ToolGateDecision(
+                decision=ToolDecision.SKIP,
+                tool_name=tool_name,
+                reasoning="Previous results from this tool were ignored by the agent",
+                estimated_cost=0.0,
+                estimated_benefit=0.1,
+                confidence=0.8,
+            )
+        # Estimate necessity
+        necessity = self.TOOL_NECESSITY.get(task_type, {}).get(tool_name, 0.5)
+        # Adjust by historical success rate
+        stats = self.tool_stats.get(tool_name, {"calls": 0, "useful": 0})
+        if stats["calls"] > 5:
+            historical_useful_rate = stats["useful"] / stats["calls"]
+            necessity = (necessity + historical_useful_rate) / 2
+        # Cost escalation check: if we're already over predicted cost, be more selective
+        cost_ratio = current_cost_so_far / max(predicted_task_cost, 0.001)
+        if cost_ratio > 1.5:
+            necessity *= 0.7
+        if cost_ratio > 2.5:
+            necessity *= 0.5
+        # Normalize cost to benefit scale (assume $0.01 = 1.0 benefit unit)
+        normalized_cost = tool_cost / 0.01
+        benefit_cost_ratio = necessity / max(normalized_cost, 0.001)
+        if benefit_cost_ratio < self.MIN_BENEFIT_COST_RATIO and necessity < 0.5:
+            return ToolGateDecision(
+                decision=ToolDecision.SKIP,
+                tool_name=tool_name,
+                reasoning=f"Low benefit/cost ratio ({benefit_cost_ratio:.2f}) and low necessity ({necessity:.2f})",
+                estimated_cost=0.0,
+                estimated_benefit=necessity,
+                confidence=0.75,
+            )
+        # Check if we can batch with other pending tool calls
+        # (simplified: if multiple reads, batch them)
+        if tool_name in ("file_read", "search", "retrieve"):
+            return ToolGateDecision(
+                decision=ToolDecision.USE,
+                tool_name=tool_name,
+                reasoning=f"Tool is necessary (necessity={necessity:.2f}) and cacheable — proceed",
+                estimated_cost=tool_cost,
+                estimated_benefit=necessity,
+                confidence=min(necessity + 0.2, 1.0),
+            )
+        return ToolGateDecision(
+            decision=ToolDecision.USE,
+            tool_name=tool_name,
+            reasoning=f"Tool justified: necessity={necessity:.2f}, cost={tool_cost:.4f}",
+            estimated_cost=tool_cost,
+            estimated_benefit=necessity,
+            confidence=min(necessity + 0.1, 1.0),
+        )
+    def decide_batch(
+        self,
+        tool_requests: List[Tuple[str, Dict]],
+        task_type: TaskType,
+        previous_tool_calls: Optional[List[ToolCall]] = None,
+    ) -> List[ToolGateDecision]:
+        """Decide on a batch of tool calls, potentially grouping parallel ones."""
+        decisions = []
+        for tool_name, tool_input in tool_requests:
+            d = self.decide(tool_name, task_type, tool_input, previous_tool_calls)
+            decisions.append(d)
+        # Group independent tool calls for parallel execution
+        # (reads can be parallel, writes should be sequential)
+        read_tools = ["file_read", "search", "retrieve", "fetch", "database_query"]
+        parallel_group = []
+        sequential = []
+        for d in decisions:
+            if d.tool_name in read_tools and d.decision == ToolDecision.USE:
+                parallel_group.append(d)
+            else:
+                if parallel_group:
+                    # Mark the first as parallel with the rest
+                    parallel_group[0].batched_with = [p.tool_name for p in parallel_group[1:]]
+                    sequential.append(parallel_group[0])
+                    parallel_group = []
+                sequential.append(d)
+        if parallel_group:
+            parallel_group[0].batched_with = [p.tool_name for p in parallel_group[1:]]
+            sequential.append(parallel_group[0])
+        return sequential
+    def _is_repeated(self, tool_name: str, tool_input: Dict, previous: List[ToolCall]) -> bool:
+        """Check if this exact tool call was made before in this trace."""
+        for call in reversed(previous[-5:]):  # check last 5
+            if call.tool_name == tool_name and call.tool_input == tool_input:
+                return True
+        return False
+    def _was_ignored(self, tool_name: str, previous: List[ToolCall]) -> bool:
+        """Check if previous calls to this tool had their results ignored."""
+        relevant = [c for c in previous if c.tool_name == tool_name]
+        if len(relevant) < 2:
+            return False
+        ignored_count = sum(1 for c in relevant if c.ignored_result)
+        return ignored_count / len(relevant) > 0.5
+    def record_outcome(self, tool_name: str, was_useful: bool, cost: float) -> None:
+        """Record whether a tool call was actually useful."""
+        stats = self.tool_stats.setdefault(tool_name, {"calls": 0, "useful": 0, "total_cost": 0.0})
+        stats["calls"] += 1
+        if was_useful:
+            stats["useful"] += 1
+        stats["total_cost"] += cost
+        stats["useful_rate"] = stats["useful"] / stats["calls"]