narcolepticchicken commited on
Commit
74d6f4b
·
verified ·
1 Parent(s): fed7e5a

Upload examples/end_to_end_demo.py

Browse files
Files changed (1) hide show
  1. examples/end_to_end_demo.py +255 -0
examples/end_to_end_demo.py ADDED
@@ -0,0 +1,255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """End-to-end demo: ACO in action with a simulated agent harness.
2
+
3
+ This script demonstrates how to bolt ACO onto any agent harness.
4
+ No actual LLM calls are made — decisions are simulated with realistic parameters.
5
+ """
6
+
7
+ import json
8
+ from typing import Dict, Any
9
+ from datetime import datetime
10
+
11
+ from aco import AgentCostOptimizer
12
+ from aco.config import ACOConfig, ModelConfig, ToolConfig, VerifierConfig, RoutingPolicy
13
+ from aco.trace_schema import ModelCall, ToolCall, Outcome, FailureTag
14
+
15
+
16
+ def build_demo_config() -> ACOConfig:
17
+ """Build a demo config with realistic provider pricing."""
18
+ return ACOConfig(
19
+ models={
20
+ "gpt-4o-mini": ModelConfig(
21
+ model_id="gpt-4o-mini",
22
+ provider="openai",
23
+ cost_per_1k_input=0.00015,
24
+ cost_per_1k_output=0.0006,
25
+ latency_ms_estimate=400,
26
+ strength_tier=2,
27
+ max_context=128000,
28
+ ),
29
+ "gpt-4o": ModelConfig(
30
+ model_id="gpt-4o",
31
+ provider="openai",
32
+ cost_per_1k_input=0.0025,
33
+ cost_per_1k_output=0.01,
34
+ latency_ms_estimate=1500,
35
+ strength_tier=4,
36
+ max_context=128000,
37
+ ),
38
+ "claude-3.5-sonnet": ModelConfig(
39
+ model_id="claude-3-5-sonnet-20241022",
40
+ provider="anthropic",
41
+ cost_per_1k_input=0.003,
42
+ cost_per_1k_output=0.015,
43
+ latency_ms_estimate=1200,
44
+ strength_tier=3,
45
+ max_context=200000,
46
+ ),
47
+ "claude-3.5-haiku": ModelConfig(
48
+ model_id="claude-3-5-haiku-20241022",
49
+ provider="anthropic",
50
+ cost_per_1k_input=0.00025,
51
+ cost_per_1k_output=0.00125,
52
+ latency_ms_estimate=300,
53
+ strength_tier=2,
54
+ max_context=200000,
55
+ ),
56
+ "deepseek-chat": ModelConfig(
57
+ model_id="deepseek-chat",
58
+ provider="deepseek",
59
+ cost_per_1k_input=0.00014,
60
+ cost_per_1k_output=0.00028,
61
+ latency_ms_estimate=800,
62
+ strength_tier=3,
63
+ max_context=64000,
64
+ cache_discount_rate=0.5,
65
+ ),
66
+ "local-qwen-7b": ModelConfig(
67
+ model_id="Qwen/Qwen2.5-7B-Instruct",
68
+ provider="local",
69
+ cost_per_1k_input=0.0,
70
+ cost_per_1k_output=0.0,
71
+ latency_ms_estimate=600,
72
+ strength_tier=3,
73
+ max_context=131072,
74
+ ),
75
+ },
76
+ tools={
77
+ "search": ToolConfig("search", 0.002, 500, cacheable=False),
78
+ "code_execution": ToolConfig("code_execution", 0.005, 1000, requires_verification=True),
79
+ "file_read": ToolConfig("file_read", 0.0005, 100, cacheable=True),
80
+ "linter": ToolConfig("linter", 0.001, 200),
81
+ "document_retrieval": ToolConfig("document_retrieval", 0.001, 300, cacheable=True),
82
+ "compliance_check": ToolConfig("compliance_check", 0.01, 1500, requires_verification=True),
83
+ },
84
+ verifiers={
85
+ "verifier_medium": VerifierConfig("claude-3.5-haiku", 0.005, 800, 0.8),
86
+ },
87
+ routing_policy=RoutingPolicy("demo"),
88
+ )
89
+
90
+
91
+ def demo_task(optimizer: AgentCostOptimizer, request: str, expected_difficulty: int = 3):
92
+ """Run ACO optimization for a single task and show decisions."""
93
+
94
+ print(f"\n{'='*80}")
95
+ print(f"TASK: {request}")
96
+ print(f"{'='*80}")
97
+
98
+ # Build run state for a fresh task
99
+ run_state = {
100
+ "trace_id": f"demo-{hash(request) % 10000:04d}",
101
+ "planned_tools": [("file_read", {"path": "project.md"}), ("code_execution", {"code": "test"})],
102
+ "previous_tool_calls": [],
103
+ "current_cost": 0.0,
104
+ "step_number": 1,
105
+ "total_steps": 3,
106
+ "is_irreversible": False,
107
+ "context_pieces": {
108
+ "system_rules": "You are a helpful coding assistant.",
109
+ "tool_descriptions": "Available: file_read, code_execution, linter",
110
+ "user_preferences": "Prefer Python, type hints, docstrings",
111
+ "recent_messages": "",
112
+ },
113
+ "retrieved_docs": [],
114
+ "routing_mode": "cascade",
115
+ }
116
+
117
+ # Call optimizer
118
+ result = optimizer.optimize(request, run_state)
119
+
120
+ # Display decisions
121
+ print(f"\n📊 OPTIMIZATION DECISIONS")
122
+ print(f" Trace ID: {result.trace_id}")
123
+ print(f" Estimated Cost: ${result.estimated_cost:.4f}")
124
+ print(f" Estimated Latency: {result.estimated_latency_ms:.0f}ms")
125
+ print(f" Confidence: {result.confidence:.2f}")
126
+ print(f"\n 🎯 Model Routing")
127
+ print(f" Selected: {result.routing_decision.model_id} (tier {result.routing_decision.tier})")
128
+ print(f" Provider: {result.routing_decision.provider}")
129
+ print(f" Max Tokens: {result.routing_decision.max_tokens}")
130
+ print(f" Temperature: {result.routing_decision.temperature}")
131
+ print(f" Reasoning: {result.routing_decision.reasoning}")
132
+ if result.routing_decision.fallback_model_id:
133
+ print(f" Fallback: {result.routing_decision.fallback_model_id}")
134
+
135
+ if result.context_budget:
136
+ cb = result.context_budget
137
+ print(f"\n 📄 Context Budget ({cb.total_budget_tokens:,} tokens)")
138
+ print(f" Prefix (cacheable): {cb.cache_prefix_tokens:,} tokens")
139
+ print(f" Suffix (dynamic): {cb.dynamic_suffix_tokens:,} tokens")
140
+ if cb.omitted_sources:
141
+ print(f" Omitted: {[s.name for s in cb.omitted_sources]}")
142
+ if cb.summarized_sources:
143
+ print(f" Summarized: {[s.name for s, _ in cb.summarized_sources]}")
144
+ if cb.retrieval_queries:
145
+ print(f" Retrieval: {cb.retrieval_queries}")
146
+
147
+ if result.prompt_layout:
148
+ pl = result.prompt_layout
149
+ print(f"\n 💾 Cache Layout")
150
+ print(f" Cold Cost: ${pl.estimated_cold_cost:.4f}")
151
+ print(f" Warm Cost: ${pl.estimated_warm_cost:.4f}")
152
+ print(f" Cache Discount: ${pl.cache_discount:.4f}")
153
+
154
+ print(f"\n 🔧 Tool Decisions ({len(result.tool_decisions)} tools)")
155
+ for td in result.tool_decisions:
156
+ icon = "✅" if td.decision.value in ("use", "batch", "parallel") else "❌"
157
+ print(f" {icon} {td.tool_name}: {td.decision.value} (cost: ${td.estimated_cost:.4f}, benefit: {td.estimated_benefit:.2f})")
158
+
159
+ if result.verifier_decision:
160
+ vd = result.verifier_decision
161
+ print(f"\n 🔍 Verifier Decision")
162
+ print(f" Decision: {vd.decision.value}")
163
+ print(f" Checks: {vd.checks}")
164
+ print(f" Estimated Cost: ${vd.estimated_verifier_cost:.4f}")
165
+
166
+ if result.meta_tool_match:
167
+ mm = result.meta_tool_match
168
+ print(f"\n ⚡ Meta-Tool Match")
169
+ print(f" ID: {mm['meta_tool_id']}")
170
+ print(f" Est. Savings: ${mm['estimated_cost_savings']:.4f}")
171
+
172
+ if result.doom_assessment:
173
+ da = result.doom_assessment
174
+ print(f"\n ⚠️ Doom Assessment")
175
+ print(f" Action: {da.action.value}")
176
+ print(f" Confidence: {da.confidence:.2f}")
177
+ if da.signals_triggered:
178
+ print(f" Signals: {da.signals_triggered}")
179
+
180
+ # Simulate execution
181
+ print(f"\n🎬 SIMULATED EXECUTION")
182
+ model_cost = (result.routing_decision.max_tokens / 1000) * optimizer.config.models[result.routing_decision.model_id].cost_per_1k_input
183
+ tool_cost = sum(d.estimated_cost for d in result.tool_decisions if d.decision.value in ("use", "batch"))
184
+ verifier_cost = result.verifier_decision.estimated_verifier_cost if result.verifier_decision else 0.0
185
+ total_cost = model_cost + tool_cost + verifier_cost
186
+
187
+ print(f" Model call: ${model_cost:.4f}")
188
+ print(f" Tool calls: ${tool_cost:.4f}")
189
+ print(f" Verifier: ${verifier_cost:.4f}")
190
+ print(f" TOTAL: ${total_cost:.4f}")
191
+
192
+ # Estimate what frontier-only would cost
193
+ frontier_cfg = optimizer.config.models.get("gpt-4o")
194
+ if frontier_cfg:
195
+ frontier_cost = (result.routing_decision.max_tokens / 1000) * frontier_cfg.cost_per_1k_input + tool_cost + verifier_cost
196
+ savings = frontier_cost - total_cost
197
+ print(f"\n💰 vs Frontier Model (gpt-4o)")
198
+ print(f" Frontier cost: ${frontier_cost:.4f}")
199
+ print(f" Savings: ${savings:.4f} ({savings/max(frontier_cost,0.001)*100:.1f}%)")
200
+
201
+ return result
202
+
203
+
204
+ def main():
205
+ print("=" * 80)
206
+ print("AGENT COST OPTIMIZER - End-to-End Demo")
207
+ print("=" * 80)
208
+
209
+ config = build_demo_config()
210
+ optimizer = AgentCostOptimizer(config)
211
+
212
+ tasks = [
213
+ ("What is the capital of France?", 1),
214
+ ("Write a Python function to reverse a linked list", 3),
215
+ ("Research the latest advancements in transformer architectures and summarize key findings", 4),
216
+ ("Review this contract for liability clauses and check GDPR compliance", 5),
217
+ ("Help me with this thing", 3),
218
+ ("Debug this segfault in our C++ thread pool implementation", 4),
219
+ ("Draft an email to the team about the deployment schedule for next week", 2),
220
+ ("Plan a 3-month roadmap for migrating our ML infrastructure to Kubernetes", 4),
221
+ ("Search for open issues in the repo and create a summary report", 2),
222
+ ("Query the database for Q3 sales data broken down by region, then produce a chart", 3),
223
+ ]
224
+
225
+ results = []
226
+ for request, difficulty in tasks:
227
+ result = demo_task(optimizer, request, difficulty)
228
+ results.append({
229
+ "request": request,
230
+ "model": result.routing_decision.model_id,
231
+ "tier": result.routing_decision.tier,
232
+ "estimated_cost": result.estimated_cost,
233
+ "verifier": result.verifier_decision.decision.value if result.verifier_decision else "none",
234
+ })
235
+
236
+ # Summary
237
+ print(f"\n{'='*80}")
238
+ print("SUMMARY")
239
+ print(f"{'='*80}")
240
+ total_est = sum(r["estimated_cost"] for r in results)
241
+ print(f"Total estimated cost for {len(tasks)} tasks: ${total_est:.4f}")
242
+
243
+ # Show model distribution
244
+ from collections import Counter
245
+ model_counts = Counter(r["model"] for r in results)
246
+ print(f"\nModel distribution:")
247
+ for model, count in model_counts.most_common():
248
+ print(f" {model}: {count} tasks ({count/len(tasks)*100:.0f}%)")
249
+
250
+ print(f"\n✅ Demo complete!")
251
+ print(f" Repo: https://huggingface.co/narcolepticchicken/agent-cost-optimizer")
252
+
253
+
254
+ if __name__ == "__main__":
255
+ main()