narcolepticchicken
/

agent-cost-optimizer

Safetensors

Model card Files Files and versions

xet

Community

narcolepticchicken commited on about 16 hours ago

Commit

c8ece28

verified ·

1 Parent(s): e6100b5

Upload aco/context_budgeter.py with huggingface_hub

Browse files

Files changed (1) hide show

aco/context_budgeter.py +116 -200

aco/context_budgeter.py CHANGED Viewed

@@ -1,209 +1,125 @@
-"""Context Budgeter - Module 4.
-Decides what context is needed, what can be omitted/summarized, and what should be retrieved.
-Context sources:
-- system rules
-- tool descriptions
-- user preferences
-- project memory
-- retrieved docs
-- prior trace failures
-- examples
-- recent messages
-- artifacts
-- task plan
-"""
-from typing import Dict, List, Tuple, Optional, Any
-from dataclasses import dataclass, field
-from .trace_schema import TaskType
-from .config import ACOConfig
-@dataclass
-class ContextSource:
-    name: str
-    tokens: int
-    importance: float  # 0-1
-    staleness: float  # 0=current, 1=very stale
-    mutable: bool  # True if content changes per turn
-    cacheable: bool  # True if can be prefix-cached
-    summary: Optional[str] = None
 @dataclass
 class ContextBudget:
-    total_budget_tokens: int
-    allocated_sources: List[ContextSource]
-    omitted_sources: List[ContextSource]
-    summarized_sources: List[Tuple[ContextSource, str]]  # (source, summary_text)
-    retrieval_queries: List[str]
-    cache_prefix_tokens: int
-    dynamic_suffix_tokens: int
-    estimated_cost: float
 class ContextBudgeter:
-    """Intelligently budgets context window to minimize cost while preserving quality."""
-    # Task-specific context budgets (tokens)
-    DEFAULT_BUDGETS = {
-        TaskType.QUICK_ANSWER: 2048,
-        TaskType.UNKNOWN_AMBIGUOUS: 4096,
-        TaskType.TOOL_HEAVY: 8192,
-        TaskType.RETRIEVAL_HEAVY: 16384,
-        TaskType.DOCUMENT_DRAFTING: 8192,
-        TaskType.CODING: 12288,
-        TaskType.RESEARCH: 16384,
-        TaskType.LONG_HORIZON: 32768,
-        TaskType.LEGAL_REGULATED: 24576,
-    }
-    # Importance weights by task type
-    IMPORTANCE_RULES = {
-        TaskType.QUICK_ANSWER: {
-            "system_rules": 0.9,
-            "recent_messages": 0.9,
-            "user_preferences": 0.5,
-        },
-        TaskType.CODING: {
-            "system_rules": 0.8,
-            "tool_descriptions": 0.9,
-            "artifacts": 0.9,
-            "recent_messages": 0.7,
-            "examples": 0.6,
-        },
-        TaskType.RESEARCH: {
-            "retrieved_docs": 0.95,
-            "recent_messages": 0.6,
-            "system_rules": 0.5,
-            "task_plan": 0.8,
-        },
-        TaskType.LEGAL_REGULATED: {
-            "retrieved_docs": 0.95,
-            "system_rules": 0.9,
-            "user_preferences": 0.7,
-            "artifacts": 0.8,
-        },
-        TaskType.LONG_HORIZON: {
-            "task_plan": 0.95,
-            "recent_messages": 0.8,
-            "artifacts": 0.85,
-            "system_rules": 0.7,
-            "prior_trace_failures": 0.6,
-        },
-    }
-    def __init__(self, config: Optional[ACOConfig] = None):
-        self.config = config or ACOConfig()
-    def budget(
-        self,
-        task_type: TaskType,
-        available_sources: List[ContextSource],
-        model_max_context: int = 128000,
-        cost_per_1k_input: float = 0.01,
-    ) -> ContextBudget:
-        """Allocate context budget across sources."""
-        budget_tokens = self.DEFAULT_BUDGETS.get(task_type, 8192)
-        # Don't exceed model limit
-        budget_tokens = min(budget_tokens, int(model_max_context * 0.8))
-        # Apply importance rules
-        importance_map = self.IMPORTANCE_RULES.get(task_type, {})
-        for source in available_sources:
-            source.importance = importance_map.get(source.name, source.importance)
-        # Separate stable (cacheable) vs dynamic
-        stable_sources = [s for s in available_sources if s.cacheable and not s.mutable]
-        dynamic_sources = [s for s in available_sources if s.mutable or not s.cacheable]
-        # Always include stable sources in prefix (they're cache-efficient)
-        prefix_tokens = sum(s.tokens for s in stable_sources)
-        remaining = budget_tokens - prefix_tokens
-        # Sort dynamic by importance / staleness ratio
-        dynamic_sources.sort(key=lambda s: s.importance / (1 + s.staleness), reverse=True)
-        allocated = list(stable_sources)
-        omitted = []
-        summarized = []
-        retrieval_queries = []
-        for source in dynamic_sources:
-            if source.tokens <= remaining:
-                allocated.append(source)
-                remaining -= source.tokens
-            elif source.importance > 0.7 and source.tokens > remaining * 1.5:
-                # Source is important but too big — summarize it
-                summary_tokens = min(int(remaining * 0.3), 512)
-                if summary_tokens > 50:
-                    summary = self._summarize(source, summary_tokens)
-                    summarized.append((source, summary))
-                    remaining -= summary_tokens
-            elif source.importance > 0.8:
-                # Critical but doesn't fit — mark for retrieval instead
-                omitted.append(source)
-                retrieval_queries.append(f"retrieve:{source.name}")
             else:
-                omitted.append(source)
-        dynamic_used = sum(s.tokens for s in allocated if s in dynamic_sources)
-        dynamic_used += sum(len(s[1].split()) for s in summarized)
-        total_tokens = prefix_tokens + dynamic_used
-        estimated_cost = (total_tokens / 1000) * cost_per_1k_input
         return ContextBudget(
-            total_budget_tokens=budget_tokens,
-            allocated_sources=allocated,
-            omitted_sources=omitted,
-            summarized_sources=summarized,
-            retrieval_queries=retrieval_queries,
-            cache_prefix_tokens=prefix_tokens,
-            dynamic_suffix_tokens=dynamic_used,
-            estimated_cost=estimated_cost,
         )
-    def _summarize(self, source: ContextSource, max_tokens: int) -> str:
-        """Produce a token-budgeted summary of a context source."""
-        # In production, use a summarization model
-        # Here we return a placeholder
-        return f"[SUMMARY:{source.name}:{max_tokens}tokens]"
-    def should_retrieve(self, source: ContextSource, task_type: TaskType) -> bool:
-        """Decide if a source should be retrieved on-demand vs. kept in context."""
-        if source.staleness > 0.5:
-            return True
-        if source.tokens > 4096 and source.importance < 0.8:
-            return True
-        if task_type in (TaskType.RESEARCH, TaskType.RETRIEVAL_HEAVY):
-            return True
-        return False
-    def compress_history(
-        self,
-        messages: List[Dict[str, str]],
-        max_messages: int = 10,
-        summarize_older: bool = True,
-    ) -> List[Dict[str, str]]:
-        """Compress message history by summarizing older messages."""
-        if len(messages) <= max_messages:
-            return messages
-        keep = messages[-max_messages:]
-        older = messages[:-max_messages]
-        if summarize_older and older:
-            summary = self._summarize_messages(older)
-            return [{"role": "system", "content": f"[Earlier context summary]: {summary}"}] + keep
-        return keep
-    def _summarize_messages(self, messages: List[Dict[str, str]]) -> str:
-        """Summarize a list of messages."""
-        # In production, use a summarization model
-        return f"{len(messages)} earlier messages summarized."

+"""Context Budgeter: Decides what context to include/exclude/summarize/retrieve."""
+from typing import Dict, List, Tuple, Optional
+from dataclasses import dataclass
 @dataclass
 class ContextBudget:
+    total_tokens: int
+    sources: Dict[str, int]  # source_name -> token_count
+    keep_exact: List[str]
+    summarize: List[str]
+    omit: List[str]
+    retrieve_on_demand: List[str]
+    cache_prefix: List[str]
+    dynamic_suffix: List[str]
+SOURCE_PRIORITIES = {
+    "system_rules": 1.0,       # Always include
+    "tool_descriptions": 0.9,  # Almost always
+    "recent_messages": 0.8,    # Important for coherence
+    "task_plan": 0.7,          # Usually important
+    "user_preferences": 0.6,
+    "project_memory": 0.5,
+    "prior_trace_failures": 0.5,
+    "examples": 0.4,
+    "retrieved_docs": 0.3,     # Retrieve on demand
+    "artifacts": 0.3,
+}
+SOURCE_TOKEN_ESTIMATES = {
+    "system_rules": 500,
+    "tool_descriptions": 2000,
+    "recent_messages": 1500,
+    "task_plan": 300,
+    "user_preferences": 100,
+    "project_memory": 500,
+    "prior_trace_failures": 300,
+    "examples": 1000,
+    "retrieved_docs": 3000,
+    "artifacts": 1000,
+}
+TASK_CONTEXT_MULTIPLIERS = {
+    "quick_answer": 0.3,
+    "document_drafting": 0.6,
+    "tool_heavy": 0.7,
+    "retrieval_heavy": 1.2,
+    "research": 1.0,
+    "coding": 0.8,
+    "unknown_ambiguous": 0.5,
+    "long_horizon": 1.0,
+    "legal_regulated": 1.3,
+}
 class ContextBudgeter:
+    def __init__(self, max_context: int = 128000, default_budget: int = 8000):
+        self.max_context = max_context
+        self.default_budget = default_budget
+    def budget(self, task_type: str, difficulty: int, needs_retrieval: bool,
+               needs_tools: bool, has_prior_failures: bool = False,
+               model_context_limit: int = None) -> ContextBudget:
+        limit = model_context_limit or self.max_context
+        mult = TASK_CONTEXT_MULTIPLIERS.get(task_type, 0.7)
+        budget = int(self.default_budget * mult * (1 + difficulty * 0.2))
+        budget = min(budget, limit)
+        sources = {}
+        keep_exact = []
+        summarize = []
+        omit = []
+        retrieve_on_demand = []
+        cache_prefix = []
+        dynamic_suffix = []
+        remaining = budget
+        # Sort sources by priority
+        sorted_sources = sorted(SOURCE_PRIORITIES.items(), key=lambda x: -x[1])
+        for source, priority in sorted_sources:
+            est_tokens = SOURCE_TOKEN_ESTIMATES.get(source, 500)
+            # Check if this source is needed for this task
+            needed = self._is_needed(source, task_type, needs_retrieval, needs_tools, has_prior_failures)
+            if not needed:
+                omit.append(source)
+                continue
+            if remaining >= est_tokens:
+                if priority >= 0.7:
+                    keep_exact.append(source)
+                    cache_prefix.append(source) if priority >= 0.9 else dynamic_suffix.append(source)
+                elif priority >= 0.4:
+                    # Summarize high-token sources
+                    if est_tokens > 800:
+                        summarize.append(source)
+                        est_tokens = min(300, est_tokens // 3)
+                    else:
+                        keep_exact.append(source)
+                    dynamic_suffix.append(source)
+                else:
+                    retrieve_on_demand.append(source)
+                    est_tokens = 0
+                sources[source] = est_tokens
+                remaining -= est_tokens
             else:
+                if priority >= 0.7:
+                    # Always include high-priority, even if truncated
+                    keep_exact.append(source)
+                    sources[source] = remaining
+                    remaining = 0
+                else:
+                    retrieve_on_demand.append(source)
         return ContextBudget(
+            total_tokens=budget,
+            sources=sources,
+            keep_exact=keep_exact,
+            summarize=summarize,
+            omit=omit,
+            retrieve_on_demand=retrieve_on_demand,
+            cache_prefix=cache_prefix,
+            dynamic_suffix=dynamic_suffix,
         )
+    def _is_needed(self, source: str, task_type: str, needs_retrieval: bool,
+                   needs_tools: bool, has_failures: bool) -> bool:
+        if source == "retrieved_docs" and not needs_retrieval: return False
+        if source == "tool_descriptions" and not needs_tools: return False
+        if source == "prior_trace_failures" and not has_failures: return False
+        if source == "examples" and task_type == "quick_answer": return False
+        return True