narcolepticchicken
/

agent-cost-optimizer

Safetensors

Model card Files Files and versions

xet

Community

narcolepticchicken commited on about 16 hours ago

Commit

5569d72

verified ·

1 Parent(s): d807f4a

Upload aco/trace_schema.py with huggingface_hub

Browse files

Files changed (1) hide show

aco/trace_schema.py +86 -207

aco/trace_schema.py CHANGED Viewed

@@ -1,234 +1,113 @@
-"""Normalized trace schema for Agent Cost Optimizer."""
 from dataclasses import dataclass, field
-from typing import Optional, List, Dict, Any
-from enum import Enum
 from datetime import datetime
-class TaskType(Enum):
-    QUICK_ANSWER = "quick_answer"
-    RESEARCH = "research"
-    CODING = "coding"
-    DOCUMENT_DRAFTING = "document_drafting"
-    LEGAL_REGULATED = "legal_regulated"
-    TOOL_HEAVY = "tool_heavy"
-    RETRIEVAL_HEAVY = "retrieval_heavy"
-    LONG_HORIZON = "long_horizon"
-    UNKNOWN_AMBIGUOUS = "unknown_ambiguous"
-class Outcome(Enum):
-    SUCCESS = "success"
-    PARTIAL_SUCCESS = "partial_success"
-    FAILURE = "failure"
-    FALSE_DONE = "false_done"
-    BLOCKED = "blocked"
-    ESCALATED_HUMAN = "escalated_human"
-    STOPPED_DOOM = "stopped_doom"
-class FailureTag(Enum):
-    MODEL_TOO_WEAK = "model_too_weak"
-    CONTEXT_TOO_SMALL = "context_too_small"
-    TOOL_FAILED = "tool_failed"
-    TOOL_UNNECESSARY = "tool_unnecessary"
-    TOOL_MISSED = "tool_missed"
-    VERIFIER_FALSE_PASS = "verifier_false_pass"
-    VERIFIER_FALSE_REJECT = "verifier_false_reject"
-    RETRY_LOOP = "retry_loop"
-    CACHE_BREAK = "cache_break"
-    HALLUCINATION = "hallucination"
-    TIMEOUT = "timeout"
-    COST_EXCEEDED = "cost_exceeded"
-    UNSAFE_CHEAP_MODEL = "unsafe_cheap_model"
-    MISSED_ESCALATION = "missed_escalation"
-@dataclass
-class ToolCall:
-    tool_name: str
-    tool_input: Dict[str, Any]
-    tool_output: Optional[str] = None
-    tool_cost: float = 0.0
-    tool_latency_ms: float = 0.0
-    cache_hit: bool = False
-    repeated: bool = False
-    ignored_result: bool = False
-    failed: bool = False
 @dataclass
 class ModelCall:
     model_id: str
     provider: str
     input_tokens: int = 0
     output_tokens: int = 0
     reasoning_tokens: int = 0
-    cost_per_1k_input: float = 0.0
-    cost_per_1k_output: float = 0.0
-    cache_hit_input_tokens: int = 0
     latency_ms: float = 0.0
-    @property
-    def total_cost(self) -> float:
-        input_cost = (self.input_tokens / 1000) * self.cost_per_1k_input
-        output_cost = (self.output_tokens / 1000) * self.cost_per_1k_output
-        cache_discount = (self.cache_hit_input_tokens / 1000) * self.cost_per_1k_input * 0.5  # 50% discount
-        return input_cost + output_cost - cache_discount
 @dataclass
-class VerifierCall:
-    verifier_model_id: str
-    target_step_id: str
-    passed: bool = False
-    confidence: float = 0.0
-    cost: float = 0.0
     latency_ms: float = 0.0
 @dataclass
 class TraceStep:
-    step_id: str
-    timestamp: datetime
-    task_type: TaskType
-    model_call: ModelCall
     tool_calls: List[ToolCall] = field(default_factory=list)
-    verifier_calls: List[VerifierCall] = field(default_factory=list)
-    context_size_tokens: int = 0
     context_sources: List[str] = field(default_factory=list)
-    cache_boundary_reached: bool = False
-    retry_count: int = 0
     recovery_action: Optional[str] = None
-    planned_next: Optional[str] = None
-    user_correction: Optional[str] = None
-    artifacts_created: List[str] = field(default_factory=list)
-    step_outcome: Optional[Outcome] = None
-    @property
-    def step_cost(self) -> float:
-        mc = self.model_call.total_cost if self.model_call else 0.0
-        tc = sum(t.tool_cost for t in self.tool_calls)
-        vc = sum(v.cost for v in self.verifier_calls)
-        return mc + tc + vc
-    @property
-    def step_latency_ms(self) -> float:
-        ml = self.model_call.latency_ms if self.model_call else 0.0
-        tl = sum(t.tool_latency_ms for t in self.tool_calls)
-        vl = sum(v.latency_ms for v in self.verifier_calls)
-        return ml + tl + vl
 @dataclass
 class AgentTrace:
-    trace_id: str
-    user_request: str
-    task_type: TaskType
     steps: List[TraceStep] = field(default_factory=list)
-    final_outcome: Optional[Outcome] = None
-    final_artifacts: List[str] = field(default_factory=list)
-    failure_tags: List[FailureTag] = field(default_factory=list)
-    user_satisfaction: Optional[float] = None  # 0-1
-    total_cost_saved_vs_frontier: Optional[float] = None
-    total_cost: Optional[float] = None
-    optimal_cost: Optional[float] = None  # oracle routing cost
-    metadata: Dict[str, Any] = field(default_factory=dict)
-    @property
-    def total_cost_computed(self) -> float:
-        return sum(s.step_cost for s in self.steps)
-    @property
-    def total_latency_ms(self) -> float:
-        return sum(s.step_latency_ms for s in self.steps)
-    @property
-    def total_retries(self) -> int:
-        return sum(s.retry_count for s in self.steps)
-    @property
-    def total_tool_calls(self) -> int:
-        return sum(len(s.tool_calls) for s in self.steps)
-    @property
-    def total_verifier_calls(self) -> int:
-        return sum(len(s.verifier_calls) for s in self.steps)
-    @property
-    def total_context_tokens(self) -> int:
-        return sum(s.context_size_tokens for s in self.steps)
-    @property
-    def cache_hit_rate(self) -> float:
-        model_calls = [s.model_call for s in self.steps if s.model_call]
-        if not model_calls:
-            return 0.0
-        total_input = sum(m.input_tokens for m in model_calls)
-        total_cache_hit = sum(m.cache_hit_input_tokens for m in model_calls)
-        return total_cache_hit / total_input if total_input > 0 else 0.0
-    def to_dict(self) -> Dict[str, Any]:
         return {
             "trace_id": self.trace_id,
-            "user_request": self.user_request,
-            "task_type": self.task_type.value,
-            "steps": [
-                {
-                    "step_id": s.step_id,
-                    "timestamp": s.timestamp.isoformat(),
-                    "task_type": s.task_type.value,
-                    "model_call": {
-                        "model_id": s.model_call.model_id,
-                        "provider": s.model_call.provider,
-                        "input_tokens": s.model_call.input_tokens,
-                        "output_tokens": s.model_call.output_tokens,
-                        "reasoning_tokens": s.model_call.reasoning_tokens,
-                        "cost": s.model_call.total_cost,
-                        "latency_ms": s.model_call.latency_ms,
-                        "cache_hit_input_tokens": s.model_call.cache_hit_input_tokens,
-                    },
-                    "tool_calls": [
-                        {
-                            "tool_name": t.tool_name,
-                            "tool_cost": t.tool_cost,
-                            "tool_latency_ms": t.tool_latency_ms,
-                            "cache_hit": t.cache_hit,
-                            "repeated": t.repeated,
-                            "ignored_result": t.ignored_result,
-                            "failed": t.failed,
-                        }
-                        for t in s.tool_calls
-                    ],
-                    "verifier_calls": [
-                        {
-                            "verifier_model_id": v.verifier_model_id,
-                            "passed": v.passed,
-                            "confidence": v.confidence,
-                            "cost": v.cost,
-                        }
-                        for v in s.verifier_calls
-                    ],
-                    "context_size_tokens": s.context_size_tokens,
-                    "retry_count": s.retry_count,
-                    "recovery_action": s.recovery_action,
-                    "step_outcome": s.step_outcome.value if s.step_outcome else None,
-                    "step_cost": s.step_cost,
-                    "step_latency_ms": s.step_latency_ms,
-                }
-                for s in self.steps
-            ],
-            "final_outcome": self.final_outcome.value if self.final_outcome else None,
-            "failure_tags": [f.value for f in self.failure_tags],
-            "total_cost": self.total_cost_computed,
-            "total_latency_ms": self.total_latency_ms,
-            "total_retries": self.total_retries,
-            "total_tool_calls": self.total_tool_calls,
-            "total_verifier_calls": self.total_verifier_calls,
-            "total_context_tokens": self.total_context_tokens,
-            "cache_hit_rate": self.cache_hit_rate,
-            "user_satisfaction": self.user_satisfaction,
-            "total_cost_saved_vs_frontier": self.total_cost_saved_vs_frontier,
-            "optimal_cost": self.optimal_cost,
         }

+"""Normalized Agent Trace Schema for ACO."""
 from dataclasses import dataclass, field
+from typing import Dict, List, Optional, Any
 from datetime import datetime
+import json, uuid
 @dataclass
 class ModelCall:
     model_id: str
     provider: str
+    tier: int
     input_tokens: int = 0
     output_tokens: int = 0
     reasoning_tokens: int = 0
+    cache_hit: bool = False
     latency_ms: float = 0.0
+    cost: float = 0.0
+    success: bool = True
+    error: Optional[str] = None
 @dataclass
+class ToolCall:
+    tool_name: str
+    args: Dict[str, Any] = field(default_factory=dict)
+    result: Optional[str] = None
     latency_ms: float = 0.0
+    cost: float = 0.0
+    success: bool = True
+    cached: bool = False
+    unnecessary: bool = False
+    error: Optional[str] = None
 @dataclass
 class TraceStep:
+    step_num: int
+    model_call: Optional[ModelCall] = None
     tool_calls: List[ToolCall] = field(default_factory=list)
+    context_size: int = 0
     context_sources: List[str] = field(default_factory=list)
+    context_budget_used: float = 0.0
+    cache_prefix_tokens: int = 0
+    cache_suffix_tokens: int = 0
+    verifier_called: bool = False
+    verifier_result: Optional[str] = None
+    retry_num: int = 0
     recovery_action: Optional[str] = None
+    timestamp: str = field(default_factory=lambda: datetime.utcnow().isoformat())
 @dataclass
 class AgentTrace:
+    trace_id: str = field(default_factory=lambda: str(uuid.uuid4())[:12])
+    request: str = ""
+    task_type: str = "unknown_ambiguous"
+    difficulty: int = 3
+    predicted_tier: int = 4
     steps: List[TraceStep] = field(default_factory=list)
+    final_outcome: str = "unknown"
+    task_success: bool = False
+    total_cost: float = 0.0
+    total_tokens: int = 0
+    total_tool_calls: int = 0
+    total_retries: int = 0
+    total_verifier_calls: int = 0
+    cache_hit_rate: float = 0.0
+    latency_total_ms: float = 0.0
+    user_correction: bool = False
+    failure_tags: List[str] = field(default_factory=list)
+    artifacts_created: List[str] = field(default_factory=list)
+    meta_tool_used: bool = False
+    early_terminated: bool = False
+    escalation_occurred: bool = False
+    timestamp: str = field(default_factory=lambda: datetime.utcnow().isoformat())
+    def to_dict(self) -> dict:
+        import dataclasses
+        return dataclasses.asdict(self)
+    def to_json(self) -> str:
+        return json.dumps(self.to_dict(), indent=2)
+    def compute_summary(self) -> dict:
+        total_cost = sum(
+            (s.model_call.cost if s.model_call else 0) +
+            sum(tc.cost for tc in s.tool_calls)
+            for s in self.steps
+        )
+        total_tokens = sum(
+            (s.model_call.input_tokens + s.model_call.output_tokens if s.model_call else 0)
+            for s in self.steps
+        )
+        total_tool_calls = sum(len(s.tool_calls) for s in self.steps)
+        total_retries = sum(s.retry_num for s in self.steps)
+        total_verifiers = sum(1 for s in self.steps if s.verifier_called)
+        cache_hits = sum(1 for s in self.steps if s.model_call and s.model_call.cache_hit)
+        cache_total = sum(1 for s in self.steps if s.model_call)
         return {
             "trace_id": self.trace_id,
+            "task_type": self.task_type,
+            "difficulty": self.difficulty,
+            "predicted_tier": self.predicted_tier,
+            "success": self.task_success,
+            "total_cost": total_cost,
+            "total_tokens": total_tokens,
+            "total_tool_calls": total_tool_calls,
+            "total_retries": total_retries,
+            "total_verifier_calls": total_verifiers,
+            "cache_hit_rate": cache_hits / max(cache_total, 1),
+            "num_steps": len(self.steps),
+            "outcome": self.final_outcome,
+            "failure_tags": self.failure_tags,
+            "meta_tool_used": self.meta_tool_used,
+            "early_terminated": self.early_terminated,
         }