| """End-to-end demo: ACO in action with a simulated agent harness. |
| |
| This script demonstrates how to bolt ACO onto any agent harness. |
| No actual LLM calls are made β decisions are simulated with realistic parameters. |
| """ |
|
|
| import json |
| from typing import Dict, Any |
| from datetime import datetime |
|
|
| from aco import AgentCostOptimizer |
| from aco.config import ACOConfig, ModelConfig, ToolConfig, VerifierConfig, RoutingPolicy |
| from aco.trace_schema import ModelCall, ToolCall, Outcome, FailureTag |
|
|
|
|
| def build_demo_config() -> ACOConfig: |
| """Build a demo config with realistic provider pricing.""" |
| return ACOConfig( |
| models={ |
| "gpt-4o-mini": ModelConfig( |
| model_id="gpt-4o-mini", |
| provider="openai", |
| cost_per_1k_input=0.00015, |
| cost_per_1k_output=0.0006, |
| latency_ms_estimate=400, |
| strength_tier=2, |
| max_context=128000, |
| ), |
| "gpt-4o": ModelConfig( |
| model_id="gpt-4o", |
| provider="openai", |
| cost_per_1k_input=0.0025, |
| cost_per_1k_output=0.01, |
| latency_ms_estimate=1500, |
| strength_tier=4, |
| max_context=128000, |
| ), |
| "claude-3.5-sonnet": ModelConfig( |
| model_id="claude-3-5-sonnet-20241022", |
| provider="anthropic", |
| cost_per_1k_input=0.003, |
| cost_per_1k_output=0.015, |
| latency_ms_estimate=1200, |
| strength_tier=3, |
| max_context=200000, |
| ), |
| "claude-3.5-haiku": ModelConfig( |
| model_id="claude-3-5-haiku-20241022", |
| provider="anthropic", |
| cost_per_1k_input=0.00025, |
| cost_per_1k_output=0.00125, |
| latency_ms_estimate=300, |
| strength_tier=2, |
| max_context=200000, |
| ), |
| "deepseek-chat": ModelConfig( |
| model_id="deepseek-chat", |
| provider="deepseek", |
| cost_per_1k_input=0.00014, |
| cost_per_1k_output=0.00028, |
| latency_ms_estimate=800, |
| strength_tier=3, |
| max_context=64000, |
| cache_discount_rate=0.5, |
| ), |
| "local-qwen-7b": ModelConfig( |
| model_id="Qwen/Qwen2.5-7B-Instruct", |
| provider="local", |
| cost_per_1k_input=0.0, |
| cost_per_1k_output=0.0, |
| latency_ms_estimate=600, |
| strength_tier=3, |
| max_context=131072, |
| ), |
| }, |
| tools={ |
| "search": ToolConfig("search", 0.002, 500, cacheable=False), |
| "code_execution": ToolConfig("code_execution", 0.005, 1000, requires_verification=True), |
| "file_read": ToolConfig("file_read", 0.0005, 100, cacheable=True), |
| "linter": ToolConfig("linter", 0.001, 200), |
| "document_retrieval": ToolConfig("document_retrieval", 0.001, 300, cacheable=True), |
| "compliance_check": ToolConfig("compliance_check", 0.01, 1500, requires_verification=True), |
| }, |
| verifiers={ |
| "verifier_medium": VerifierConfig("claude-3.5-haiku", 0.005, 800, 0.8), |
| }, |
| routing_policy=RoutingPolicy("demo"), |
| ) |
|
|
|
|
| def demo_task(optimizer: AgentCostOptimizer, request: str, expected_difficulty: int = 3): |
| """Run ACO optimization for a single task and show decisions.""" |
| |
| print(f"\n{'='*80}") |
| print(f"TASK: {request}") |
| print(f"{'='*80}") |
| |
| |
| run_state = { |
| "trace_id": f"demo-{hash(request) % 10000:04d}", |
| "planned_tools": [("file_read", {"path": "project.md"}), ("code_execution", {"code": "test"})], |
| "previous_tool_calls": [], |
| "current_cost": 0.0, |
| "step_number": 1, |
| "total_steps": 3, |
| "is_irreversible": False, |
| "context_pieces": { |
| "system_rules": "You are a helpful coding assistant.", |
| "tool_descriptions": "Available: file_read, code_execution, linter", |
| "user_preferences": "Prefer Python, type hints, docstrings", |
| "recent_messages": "", |
| }, |
| "retrieved_docs": [], |
| "routing_mode": "cascade", |
| } |
| |
| |
| result = optimizer.optimize(request, run_state) |
| |
| |
| print(f"\nπ OPTIMIZATION DECISIONS") |
| print(f" Trace ID: {result.trace_id}") |
| print(f" Estimated Cost: ${result.estimated_cost:.4f}") |
| print(f" Estimated Latency: {result.estimated_latency_ms:.0f}ms") |
| print(f" Confidence: {result.confidence:.2f}") |
| print(f"\n π― Model Routing") |
| print(f" Selected: {result.routing_decision.model_id} (tier {result.routing_decision.tier})") |
| print(f" Provider: {result.routing_decision.provider}") |
| print(f" Max Tokens: {result.routing_decision.max_tokens}") |
| print(f" Temperature: {result.routing_decision.temperature}") |
| print(f" Reasoning: {result.routing_decision.reasoning}") |
| if result.routing_decision.fallback_model_id: |
| print(f" Fallback: {result.routing_decision.fallback_model_id}") |
| |
| if result.context_budget: |
| cb = result.context_budget |
| print(f"\n π Context Budget ({cb.total_budget_tokens:,} tokens)") |
| print(f" Prefix (cacheable): {cb.cache_prefix_tokens:,} tokens") |
| print(f" Suffix (dynamic): {cb.dynamic_suffix_tokens:,} tokens") |
| if cb.omitted_sources: |
| print(f" Omitted: {[s.name for s in cb.omitted_sources]}") |
| if cb.summarized_sources: |
| print(f" Summarized: {[s.name for s, _ in cb.summarized_sources]}") |
| if cb.retrieval_queries: |
| print(f" Retrieval: {cb.retrieval_queries}") |
| |
| if result.prompt_layout: |
| pl = result.prompt_layout |
| print(f"\n πΎ Cache Layout") |
| print(f" Cold Cost: ${pl.estimated_cold_cost:.4f}") |
| print(f" Warm Cost: ${pl.estimated_warm_cost:.4f}") |
| print(f" Cache Discount: ${pl.cache_discount:.4f}") |
| |
| print(f"\n π§ Tool Decisions ({len(result.tool_decisions)} tools)") |
| for td in result.tool_decisions: |
| icon = "β
" if td.decision.value in ("use", "batch", "parallel") else "β" |
| print(f" {icon} {td.tool_name}: {td.decision.value} (cost: ${td.estimated_cost:.4f}, benefit: {td.estimated_benefit:.2f})") |
| |
| if result.verifier_decision: |
| vd = result.verifier_decision |
| print(f"\n π Verifier Decision") |
| print(f" Decision: {vd.decision.value}") |
| print(f" Checks: {vd.checks}") |
| print(f" Estimated Cost: ${vd.estimated_verifier_cost:.4f}") |
| |
| if result.meta_tool_match: |
| mm = result.meta_tool_match |
| print(f"\n β‘ Meta-Tool Match") |
| print(f" ID: {mm['meta_tool_id']}") |
| print(f" Est. Savings: ${mm['estimated_cost_savings']:.4f}") |
| |
| if result.doom_assessment: |
| da = result.doom_assessment |
| print(f"\n β οΈ Doom Assessment") |
| print(f" Action: {da.action.value}") |
| print(f" Confidence: {da.confidence:.2f}") |
| if da.signals_triggered: |
| print(f" Signals: {da.signals_triggered}") |
| |
| |
| print(f"\n㪠SIMULATED EXECUTION") |
| model_cost = (result.routing_decision.max_tokens / 1000) * optimizer.config.models[result.routing_decision.model_id].cost_per_1k_input |
| tool_cost = sum(d.estimated_cost for d in result.tool_decisions if d.decision.value in ("use", "batch")) |
| verifier_cost = result.verifier_decision.estimated_verifier_cost if result.verifier_decision else 0.0 |
| total_cost = model_cost + tool_cost + verifier_cost |
| |
| print(f" Model call: ${model_cost:.4f}") |
| print(f" Tool calls: ${tool_cost:.4f}") |
| print(f" Verifier: ${verifier_cost:.4f}") |
| print(f" TOTAL: ${total_cost:.4f}") |
| |
| |
| frontier_cfg = optimizer.config.models.get("gpt-4o") |
| if frontier_cfg: |
| frontier_cost = (result.routing_decision.max_tokens / 1000) * frontier_cfg.cost_per_1k_input + tool_cost + verifier_cost |
| savings = frontier_cost - total_cost |
| print(f"\nπ° vs Frontier Model (gpt-4o)") |
| print(f" Frontier cost: ${frontier_cost:.4f}") |
| print(f" Savings: ${savings:.4f} ({savings/max(frontier_cost,0.001)*100:.1f}%)") |
| |
| return result |
|
|
|
|
| def main(): |
| print("=" * 80) |
| print("AGENT COST OPTIMIZER - End-to-End Demo") |
| print("=" * 80) |
| |
| config = build_demo_config() |
| optimizer = AgentCostOptimizer(config) |
| |
| tasks = [ |
| ("What is the capital of France?", 1), |
| ("Write a Python function to reverse a linked list", 3), |
| ("Research the latest advancements in transformer architectures and summarize key findings", 4), |
| ("Review this contract for liability clauses and check GDPR compliance", 5), |
| ("Help me with this thing", 3), |
| ("Debug this segfault in our C++ thread pool implementation", 4), |
| ("Draft an email to the team about the deployment schedule for next week", 2), |
| ("Plan a 3-month roadmap for migrating our ML infrastructure to Kubernetes", 4), |
| ("Search for open issues in the repo and create a summary report", 2), |
| ("Query the database for Q3 sales data broken down by region, then produce a chart", 3), |
| ] |
| |
| results = [] |
| for request, difficulty in tasks: |
| result = demo_task(optimizer, request, difficulty) |
| results.append({ |
| "request": request, |
| "model": result.routing_decision.model_id, |
| "tier": result.routing_decision.tier, |
| "estimated_cost": result.estimated_cost, |
| "verifier": result.verifier_decision.decision.value if result.verifier_decision else "none", |
| }) |
| |
| |
| print(f"\n{'='*80}") |
| print("SUMMARY") |
| print(f"{'='*80}") |
| total_est = sum(r["estimated_cost"] for r in results) |
| print(f"Total estimated cost for {len(tasks)} tasks: ${total_est:.4f}") |
| |
| |
| from collections import Counter |
| model_counts = Counter(r["model"] for r in results) |
| print(f"\nModel distribution:") |
| for model, count in model_counts.most_common(): |
| print(f" {model}: {count} tasks ({count/len(tasks)*100:.0f}%)") |
| |
| print(f"\nβ
Demo complete!") |
| print(f" Repo: https://huggingface.co/narcolepticchicken/agent-cost-optimizer") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|