agent-cost-optimizer / examples /end_to_end_demo.py
narcolepticchicken's picture
Upload examples/end_to_end_demo.py
74d6f4b verified
"""End-to-end demo: ACO in action with a simulated agent harness.
This script demonstrates how to bolt ACO onto any agent harness.
No actual LLM calls are made β€” decisions are simulated with realistic parameters.
"""
import json
from typing import Dict, Any
from datetime import datetime
from aco import AgentCostOptimizer
from aco.config import ACOConfig, ModelConfig, ToolConfig, VerifierConfig, RoutingPolicy
from aco.trace_schema import ModelCall, ToolCall, Outcome, FailureTag
def build_demo_config() -> ACOConfig:
"""Build a demo config with realistic provider pricing."""
return ACOConfig(
models={
"gpt-4o-mini": ModelConfig(
model_id="gpt-4o-mini",
provider="openai",
cost_per_1k_input=0.00015,
cost_per_1k_output=0.0006,
latency_ms_estimate=400,
strength_tier=2,
max_context=128000,
),
"gpt-4o": ModelConfig(
model_id="gpt-4o",
provider="openai",
cost_per_1k_input=0.0025,
cost_per_1k_output=0.01,
latency_ms_estimate=1500,
strength_tier=4,
max_context=128000,
),
"claude-3.5-sonnet": ModelConfig(
model_id="claude-3-5-sonnet-20241022",
provider="anthropic",
cost_per_1k_input=0.003,
cost_per_1k_output=0.015,
latency_ms_estimate=1200,
strength_tier=3,
max_context=200000,
),
"claude-3.5-haiku": ModelConfig(
model_id="claude-3-5-haiku-20241022",
provider="anthropic",
cost_per_1k_input=0.00025,
cost_per_1k_output=0.00125,
latency_ms_estimate=300,
strength_tier=2,
max_context=200000,
),
"deepseek-chat": ModelConfig(
model_id="deepseek-chat",
provider="deepseek",
cost_per_1k_input=0.00014,
cost_per_1k_output=0.00028,
latency_ms_estimate=800,
strength_tier=3,
max_context=64000,
cache_discount_rate=0.5,
),
"local-qwen-7b": ModelConfig(
model_id="Qwen/Qwen2.5-7B-Instruct",
provider="local",
cost_per_1k_input=0.0,
cost_per_1k_output=0.0,
latency_ms_estimate=600,
strength_tier=3,
max_context=131072,
),
},
tools={
"search": ToolConfig("search", 0.002, 500, cacheable=False),
"code_execution": ToolConfig("code_execution", 0.005, 1000, requires_verification=True),
"file_read": ToolConfig("file_read", 0.0005, 100, cacheable=True),
"linter": ToolConfig("linter", 0.001, 200),
"document_retrieval": ToolConfig("document_retrieval", 0.001, 300, cacheable=True),
"compliance_check": ToolConfig("compliance_check", 0.01, 1500, requires_verification=True),
},
verifiers={
"verifier_medium": VerifierConfig("claude-3.5-haiku", 0.005, 800, 0.8),
},
routing_policy=RoutingPolicy("demo"),
)
def demo_task(optimizer: AgentCostOptimizer, request: str, expected_difficulty: int = 3):
"""Run ACO optimization for a single task and show decisions."""
print(f"\n{'='*80}")
print(f"TASK: {request}")
print(f"{'='*80}")
# Build run state for a fresh task
run_state = {
"trace_id": f"demo-{hash(request) % 10000:04d}",
"planned_tools": [("file_read", {"path": "project.md"}), ("code_execution", {"code": "test"})],
"previous_tool_calls": [],
"current_cost": 0.0,
"step_number": 1,
"total_steps": 3,
"is_irreversible": False,
"context_pieces": {
"system_rules": "You are a helpful coding assistant.",
"tool_descriptions": "Available: file_read, code_execution, linter",
"user_preferences": "Prefer Python, type hints, docstrings",
"recent_messages": "",
},
"retrieved_docs": [],
"routing_mode": "cascade",
}
# Call optimizer
result = optimizer.optimize(request, run_state)
# Display decisions
print(f"\nπŸ“Š OPTIMIZATION DECISIONS")
print(f" Trace ID: {result.trace_id}")
print(f" Estimated Cost: ${result.estimated_cost:.4f}")
print(f" Estimated Latency: {result.estimated_latency_ms:.0f}ms")
print(f" Confidence: {result.confidence:.2f}")
print(f"\n 🎯 Model Routing")
print(f" Selected: {result.routing_decision.model_id} (tier {result.routing_decision.tier})")
print(f" Provider: {result.routing_decision.provider}")
print(f" Max Tokens: {result.routing_decision.max_tokens}")
print(f" Temperature: {result.routing_decision.temperature}")
print(f" Reasoning: {result.routing_decision.reasoning}")
if result.routing_decision.fallback_model_id:
print(f" Fallback: {result.routing_decision.fallback_model_id}")
if result.context_budget:
cb = result.context_budget
print(f"\n πŸ“„ Context Budget ({cb.total_budget_tokens:,} tokens)")
print(f" Prefix (cacheable): {cb.cache_prefix_tokens:,} tokens")
print(f" Suffix (dynamic): {cb.dynamic_suffix_tokens:,} tokens")
if cb.omitted_sources:
print(f" Omitted: {[s.name for s in cb.omitted_sources]}")
if cb.summarized_sources:
print(f" Summarized: {[s.name for s, _ in cb.summarized_sources]}")
if cb.retrieval_queries:
print(f" Retrieval: {cb.retrieval_queries}")
if result.prompt_layout:
pl = result.prompt_layout
print(f"\n πŸ’Ύ Cache Layout")
print(f" Cold Cost: ${pl.estimated_cold_cost:.4f}")
print(f" Warm Cost: ${pl.estimated_warm_cost:.4f}")
print(f" Cache Discount: ${pl.cache_discount:.4f}")
print(f"\n πŸ”§ Tool Decisions ({len(result.tool_decisions)} tools)")
for td in result.tool_decisions:
icon = "βœ…" if td.decision.value in ("use", "batch", "parallel") else "❌"
print(f" {icon} {td.tool_name}: {td.decision.value} (cost: ${td.estimated_cost:.4f}, benefit: {td.estimated_benefit:.2f})")
if result.verifier_decision:
vd = result.verifier_decision
print(f"\n πŸ” Verifier Decision")
print(f" Decision: {vd.decision.value}")
print(f" Checks: {vd.checks}")
print(f" Estimated Cost: ${vd.estimated_verifier_cost:.4f}")
if result.meta_tool_match:
mm = result.meta_tool_match
print(f"\n ⚑ Meta-Tool Match")
print(f" ID: {mm['meta_tool_id']}")
print(f" Est. Savings: ${mm['estimated_cost_savings']:.4f}")
if result.doom_assessment:
da = result.doom_assessment
print(f"\n ⚠️ Doom Assessment")
print(f" Action: {da.action.value}")
print(f" Confidence: {da.confidence:.2f}")
if da.signals_triggered:
print(f" Signals: {da.signals_triggered}")
# Simulate execution
print(f"\n🎬 SIMULATED EXECUTION")
model_cost = (result.routing_decision.max_tokens / 1000) * optimizer.config.models[result.routing_decision.model_id].cost_per_1k_input
tool_cost = sum(d.estimated_cost for d in result.tool_decisions if d.decision.value in ("use", "batch"))
verifier_cost = result.verifier_decision.estimated_verifier_cost if result.verifier_decision else 0.0
total_cost = model_cost + tool_cost + verifier_cost
print(f" Model call: ${model_cost:.4f}")
print(f" Tool calls: ${tool_cost:.4f}")
print(f" Verifier: ${verifier_cost:.4f}")
print(f" TOTAL: ${total_cost:.4f}")
# Estimate what frontier-only would cost
frontier_cfg = optimizer.config.models.get("gpt-4o")
if frontier_cfg:
frontier_cost = (result.routing_decision.max_tokens / 1000) * frontier_cfg.cost_per_1k_input + tool_cost + verifier_cost
savings = frontier_cost - total_cost
print(f"\nπŸ’° vs Frontier Model (gpt-4o)")
print(f" Frontier cost: ${frontier_cost:.4f}")
print(f" Savings: ${savings:.4f} ({savings/max(frontier_cost,0.001)*100:.1f}%)")
return result
def main():
print("=" * 80)
print("AGENT COST OPTIMIZER - End-to-End Demo")
print("=" * 80)
config = build_demo_config()
optimizer = AgentCostOptimizer(config)
tasks = [
("What is the capital of France?", 1),
("Write a Python function to reverse a linked list", 3),
("Research the latest advancements in transformer architectures and summarize key findings", 4),
("Review this contract for liability clauses and check GDPR compliance", 5),
("Help me with this thing", 3),
("Debug this segfault in our C++ thread pool implementation", 4),
("Draft an email to the team about the deployment schedule for next week", 2),
("Plan a 3-month roadmap for migrating our ML infrastructure to Kubernetes", 4),
("Search for open issues in the repo and create a summary report", 2),
("Query the database for Q3 sales data broken down by region, then produce a chart", 3),
]
results = []
for request, difficulty in tasks:
result = demo_task(optimizer, request, difficulty)
results.append({
"request": request,
"model": result.routing_decision.model_id,
"tier": result.routing_decision.tier,
"estimated_cost": result.estimated_cost,
"verifier": result.verifier_decision.decision.value if result.verifier_decision else "none",
})
# Summary
print(f"\n{'='*80}")
print("SUMMARY")
print(f"{'='*80}")
total_est = sum(r["estimated_cost"] for r in results)
print(f"Total estimated cost for {len(tasks)} tasks: ${total_est:.4f}")
# Show model distribution
from collections import Counter
model_counts = Counter(r["model"] for r in results)
print(f"\nModel distribution:")
for model, count in model_counts.most_common():
print(f" {model}: {count} tasks ({count/len(tasks)*100:.0f}%)")
print(f"\nβœ… Demo complete!")
print(f" Repo: https://huggingface.co/narcolepticchicken/agent-cost-optimizer")
if __name__ == "__main__":
main()