narcolepticchicken commited on
Commit
a0449c9
·
verified ·
1 Parent(s): 74d6f4b

Upload docs/deployment_guide.md

Browse files
Files changed (1) hide show
  1. docs/deployment_guide.md +160 -390
docs/deployment_guide.md CHANGED
@@ -1,441 +1,211 @@
1
- # Agent Cost Optimizer Deployment Guide
2
 
3
- ## Quick Start
4
-
5
- ### Installation
6
 
7
- ```bash
8
- pip install git+https://huggingface.co/narcolepticchicken/agent-cost-optimizer
9
- ```
10
 
11
- Or clone and install locally:
12
 
13
  ```bash
 
14
  git clone https://huggingface.co/narcolepticchicken/agent-cost-optimizer
15
  cd agent-cost-optimizer
 
 
16
  pip install -e .
 
 
 
 
 
 
17
  ```
18
 
19
- ### Basic Usage
20
 
21
  ```python
22
  from aco import AgentCostOptimizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- # Load default configuration
25
- optimizer = AgentCostOptimizer()
26
-
27
- # Optimize a single agent request
28
- result = optimizer.optimize(
29
- "Write a Python function to reverse a linked list",
30
- run_state={
31
- "trace_id": "run-001",
32
- "planned_tools": [("code_execution", {"code": "test"})],
33
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  )
35
 
36
- print(f"Model: {result.routing_decision.model_id}")
37
- print(f"Tier: {result.routing_decision.tier}")
38
- print(f"Estimated Cost: ${result.estimated_cost:.4f}")
39
- print(f"Tool Decisions: {[d.decision.value for d in result.tool_decisions]}")
40
  ```
41
 
42
  ## Configuration
43
 
44
- ### Config File
45
-
46
- Create a `config.yaml`:
47
-
48
- ```yaml
49
- project_name: "my-agent-optimizer"
50
- trace_storage_path: "./traces"
51
-
52
- models:
53
- gpt-4o-mini:
54
- model_id: "gpt-4o-mini"
55
- provider: "openai"
56
- cost_per_1k_input: 0.00015
57
- cost_per_1k_output: 0.0006
58
- strength_tier: 2
59
- max_context: 128000
60
- cache_discount_rate: 0.5
61
-
62
- gpt-4o:
63
- model_id: "gpt-4o"
64
- provider: "openai"
65
- cost_per_1k_input: 0.0025
66
- cost_per_1k_output: 0.01
67
- strength_tier: 4
68
- max_context: 128000
69
- cache_discount_rate: 0.5
70
-
71
- tools:
72
- search:
73
- tool_name: "search"
74
- cost_per_call: 0.002
75
- latency_ms_estimate: 500
76
-
77
- code_execution:
78
- tool_name: "code_execution"
79
- cost_per_call: 0.005
80
- latency_ms_estimate: 1000
81
- requires_verification: true
82
-
83
- verifiers:
84
- verifier_medium:
85
- verifier_model_id: "gpt-4o-mini"
86
- cost_per_call: 0.005
87
- confidence_threshold: 0.8
88
-
89
- # Enable/disable modules
90
- enable_router: true
91
- enable_context_budgeter: true
92
- enable_cache_layout: true
93
- enable_tool_gate: true
94
- enable_verifier_budgeter: true
95
- enable_retry_optimizer: true
96
- enable_meta_tool_miner: true
97
- enable_early_termination: true
98
- ```
99
 
100
- Load it:
 
 
 
 
 
 
101
 
102
- ```python
103
- optimizer = AgentCostOptimizer.from_config("config.yaml")
104
- ```
105
 
106
- ## Integration with Agent Harness
 
 
 
 
107
 
108
- ### Generic Integration Pattern
109
 
110
- ```python
111
- class MyAgentHarness:
112
- def __init__(self):
113
- self.optimizer = AgentCostOptimizer.from_config("config.yaml")
114
-
115
- def execute(self, user_request: str, context: dict):
116
- # 1. Build run state
117
- run_state = {
118
- "trace_id": f"run-{uuid.uuid4()}",
119
- "planned_tools": self.plan_tools(user_request),
120
- "context_pieces": context,
121
- "current_cost": 0.0,
122
- "step_number": 1,
123
- "total_steps": self.estimate_steps(user_request),
124
- "is_irreversible": False,
125
- }
126
-
127
- # 2. Call optimizer BEFORE execution
128
- decision = self.optimizer.optimize(user_request, run_state)
129
-
130
- # 3. Apply optimizer decisions
131
- selected_model = decision.routing_decision.model_id
132
-
133
- # Apply tool gate
134
- approved_tools = [
135
- td for td in decision.tool_decisions
136
- if td.decision.value in ("use", "batch", "parallel")
137
- ]
138
-
139
- # Apply context budget
140
- if decision.context_budget:
141
- context = self._apply_context_budget(context, decision.context_budget)
142
-
143
- # Apply cache layout
144
- if decision.prompt_layout:
145
- prompt = self._apply_cache_layout(decision.prompt_layout)
146
-
147
- # Check doom assessment
148
- if decision.doom_assessment and decision.doom_assessment.action.value == "mark_blocked":
149
- return {"status": "BLOCKED", "reason": decision.doom_assessment.reasoning}
150
-
151
- # 4. Execute with optimized parameters
152
- result = self.llm_call(
153
- model=selected_model,
154
- prompt=prompt,
155
- tools=approved_tools,
156
- max_tokens=decision.routing_decision.max_tokens,
157
- )
158
-
159
- # 5. Record step
160
- self.optimizer.record_step(
161
- trace_id=decision.trace_id,
162
- model_call=ModelCall(
163
- model_id=selected_model,
164
- provider="openai",
165
- input_tokens=result.input_tokens,
166
- output_tokens=result.output_tokens,
167
- cost_per_1k_input=0.0025,
168
- cost_per_1k_output=0.01,
169
- ),
170
- tool_calls=[...],
171
- context_size_tokens=len(prompt) // 4,
172
- step_outcome=Outcome.SUCCESS if result.success else Outcome.FAILURE,
173
- )
174
-
175
- # 6. Finalize trace
176
- self.optimizer.finalize_trace(
177
- trace_id=decision.trace_id,
178
- outcome=Outcome.SUCCESS if result.success else Outcome.FAILURE,
179
- user_satisfaction=1.0 if result.success else 0.0,
180
- )
181
-
182
- return result
183
  ```
184
-
185
- ### LangChain Integration
186
-
187
- ```python
188
- from aco import AgentCostOptimizer
189
- from langchain.agents import AgentExecutor
190
-
191
- class ACOWrapper:
192
- def __init__(self, agent_executor, optimizer):
193
- self.agent = agent_executor
194
- self.optimizer = optimizer
195
-
196
- def invoke(self, input_data):
197
- # Pre-optimize
198
- decision = self.optimizer.optimize(
199
- input_data["input"],
200
- run_state={
201
- "planned_tools": [(t.name, {}) for t in self.agent.tools],
202
- "trace_id": input_data.get("run_id", str(uuid.uuid4())),
203
- }
204
- )
205
-
206
- # Override agent LLM based on routing decision
207
- self.agent.llm = self.get_llm(decision.routing_decision.model_id)
208
-
209
- # Filter tools based on tool gate
210
- self.agent.tools = [
211
- t for t in self.agent.tools
212
- if any(d.tool_name == t.name and d.decision.value == "use"
213
- for d in decision.tool_decisions)
214
- ]
215
-
216
- # Execute
217
- result = self.agent.invoke(input_data)
218
-
219
- # Record and finalize
220
- # ... (see generic pattern above)
221
-
222
- return result
223
  ```
224
 
225
- ### OpenAI Assistants Integration
226
-
227
- ```python
228
- from aco import AgentCostOptimizer
229
-
230
- class ACOAssistantWrapper:
231
- def __init__(self, assistant_id, optimizer):
232
- self.assistant_id = assistant_id
233
- self.optimizer = optimizer
234
-
235
- def create_run(self, thread_id, instructions):
236
- # Optimize instructions (context budgeter)
237
- decision = self.optimizer.optimize(
238
- instructions,
239
- run_state={
240
- "trace_id": f"assistant-run-{thread_id}",
241
- "context_pieces": {"system_rules": instructions},
242
- }
243
- )
244
-
245
- # Use cache-aware prompt layout
246
- if decision.prompt_layout:
247
- optimized_instructions = decision.prompt_layout.prefix + "\n\n" + decision.prompt_layout.suffix
248
- else:
249
- optimized_instructions = instructions
250
-
251
- # Create run with optimized parameters
252
- return openai.beta.threads.runs.create(
253
- thread_id=thread_id,
254
- assistant_id=self.assistant_id,
255
- instructions=optimized_instructions,
256
- model=decision.routing_decision.model_id,
257
- )
258
  ```
259
-
260
- ## Multi-Provider Support
261
-
262
- ACO supports any provider with cost metadata:
263
-
264
- ```yaml
265
- models:
266
- claude-3-haiku:
267
- model_id: "claude-3-haiku-20240307"
268
- provider: "anthropic"
269
- cost_per_1k_input: 0.00025
270
- cost_per_1k_output: 0.00125
271
- strength_tier: 2
272
-
273
- claude-3-opus:
274
- model_id: "claude-3-opus-20240229"
275
- provider: "anthropic"
276
- cost_per_1k_input: 0.015
277
- cost_per_1k_output: 0.075
278
- strength_tier: 4
279
-
280
- gemini-pro:
281
- model_id: "gemini-1.5-pro"
282
- provider: "google"
283
- cost_per_1k_input: 0.0035
284
- cost_per_1k_output: 0.0105
285
- strength_tier: 3
286
-
287
- deepseek-chat:
288
- model_id: "deepseek-chat"
289
- provider: "deepseek"
290
- cost_per_1k_input: 0.00014
291
- cost_per_1k_output: 0.00028
292
- strength_tier: 2
293
- cache_discount_rate: 0.5
294
  ```
295
 
296
- ## Local Model Support
297
-
298
- For self-hosted models:
299
-
300
- ```yaml
301
- models:
302
- llama-3.2-1b:
303
- model_id: "meta-llama/Llama-3.2-1B-Instruct"
304
- provider: "local"
305
- cost_per_1k_input: 0.0
306
- cost_per_1k_output: 0.0
307
- strength_tier: 1
308
- max_context: 128000
309
-
310
- qwen2.5-7b:
311
- model_id: "Qwen/Qwen2.5-7B-Instruct"
312
- provider: "local"
313
- cost_per_1k_input: 0.0
314
- cost_per_1k_output: 0.0
315
- strength_tier: 3
316
- max_context: 131072
317
  ```
318
 
319
- Use `cost_per_1k_input: 0.0` for local models. ACO will still optimize latency and context size.
320
-
321
- ## Benchmarking
322
-
323
- Run the benchmark suite:
324
 
325
  ```bash
326
- python eval_runner.py --tasks 1000 --output ./eval_results
327
- ```
328
 
329
- With ablations:
 
330
 
331
- ```bash
332
- python eval_runner.py --tasks 1000 --ablations --output ./eval_results
333
  ```
334
 
335
- Generate report:
336
 
337
  ```bash
338
- python -m aco.cli report --input ./eval_results/baseline_results.json
 
339
  ```
340
 
341
- ## Telemetry and Monitoring
342
-
343
- Traces are stored as JSON in `trace_storage_path`:
344
 
345
  ```python
346
- # List all traces
347
- traces = optimizer.telemetry.list_traces()
348
-
349
- # Get statistics
350
- stats = optimizer.telemetry.get_stats()
351
- print(f"Total traces: {stats['count']}")
352
- print(f"Avg cost: ${stats['avg_cost']:.4f}")
353
- print(f"Success rate: {stats['success_rate']:.1%}")
354
-
355
- # Full optimizer stats
356
- all_stats = optimizer.get_stats()
357
- print(json.dumps(all_stats, indent=2))
358
- ```
359
 
360
- ## Advanced: Training a Custom Router
361
 
362
- To train a model-specific router using your trace data:
363
-
364
- ```python
365
- from aco.optimizer import AgentCostOptimizer
366
- from aco.config import ACOConfig, ModelConfig
367
-
368
- # 1. Collect traces
369
- optimizer = AgentCostOptimizer()
370
- # ... run agent tasks ...
371
-
372
- # 2. Extract features and labels from traces
373
- traces = [optimizer.telemetry.load_trace(tid) for tid in optimizer.telemetry.list_traces()]
374
-
375
- # 3. Train a simple classifier (example with sklearn)
376
- from sklearn.ensemble import RandomForestClassifier
377
- import numpy as np
378
-
379
- X = []
380
- y = []
381
- for trace in traces:
382
- # Features: task_type, request_length, predicted_cost, prior_success_rate
383
- features = [
384
- hash(trace["task_type"]) % 1000,
385
- len(trace["user_request"]),
386
- trace.get("total_cost", 0.01),
387
- ]
388
- # Label: optimal model tier (from oracle comparison)
389
- optimal_tier = trace.get("metadata", {}).get("optimal_tier", 3)
390
- X.append(features)
391
- y.append(optimal_tier)
392
-
393
- clf = RandomForestClassifier(n_estimators=100)
394
- clf.fit(X, y)
395
-
396
- # 4. Deploy: override router decisions
397
- # In production, integrate the classifier into ModelCascadeRouter._route_learned()
398
  ```
399
 
400
- For RL-based routing (GRPO/DPO), see the literature review for BAAR and xRouter approaches.
401
 
402
- ## Production Checklist
403
-
404
- - [ ] Configure all models with accurate cost metadata
405
- - [ ] Configure all tools with cost/latency estimates
406
- - [ ] Set appropriate tier mappings for your use case
407
- - [ ] Enable telemetry to collect traces for learning
408
- - [ ] Set doom thresholds appropriate for your SLA
409
- - [ ] Configure verifier thresholds for safety-critical tasks
410
- - [ ] Test with small synthetic benchmark before deployment
411
- - [ ] Monitor regression rate and false-DONE rate
412
- - [ ] Review and adjust routing policy monthly
413
- - [ ] Mine meta-tools after collecting 100+ successful traces
414
-
415
- ## Troubleshooting
416
-
417
- ### High regression rate
418
- - Check if model tier mappings match your actual model capabilities
419
- - Increase `unsafe_cheap_model_penalty` in config
420
- - Enable verifier on more task types
421
-
422
- ### Low cost savings
423
- - Verify cache layout is enabled (check cache hit rate)
424
- - Ensure tool gate is catching repeated/unnecessary calls
425
- - Check if meta-tool miner is enabled and has enough traces
426
-
427
- ### High false-DONE rate
428
- - Increase verifier threshold for final-step verification
429
- - Enable doom detector with stricter `doom_no_progress_steps`
430
- - Add more failure patterns to retry optimizer
431
-
432
- ### Slow routing decisions
433
- - Use prompt-only or static routing instead of learned
434
- - Cache classification results for repeated request patterns
435
- - Pre-compute meta-tools during off-peak hours
436
-
437
- ## Support
438
 
439
- - Repository: https://huggingface.co/narcolepticchicken/agent-cost-optimizer
440
- - Issues: Open a discussion on the Hugging Face Hub
441
- - Literature Review: See `docs/literature_review.md`
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Agent Cost Optimizer - Deployment Guide
2
 
3
+ ## Overview
 
 
4
 
5
+ The Agent Cost Optimizer (ACO) is a control layer that sits **in front of, around, or inside** any agent harness. It does not replace your agent — it optimizes how your agent runs.
 
 
6
 
7
+ ## Installation
8
 
9
  ```bash
10
+ # Clone the repository
11
  git clone https://huggingface.co/narcolepticchicken/agent-cost-optimizer
12
  cd agent-cost-optimizer
13
+
14
+ # Install dependencies
15
  pip install -e .
16
+
17
+ # Optional: Gradio dashboard
18
+ pip install gradio
19
+
20
+ # Optional: Trackio monitoring
21
+ pip install trackio
22
  ```
23
 
24
+ ## Quick Start
25
 
26
  ```python
27
  from aco import AgentCostOptimizer
28
+ from aco.config import ACOConfig, ModelConfig, RoutingPolicy
29
+
30
+ # 1. Define your available models with real pricing
31
+ config = ACOConfig(
32
+ models={
33
+ "gpt-4o-mini": ModelConfig(
34
+ model_id="gpt-4o-mini", provider="openai",
35
+ cost_per_1k_input=0.00015, cost_per_1k_output=0.0006,
36
+ strength_tier=2, max_context=128000,
37
+ ),
38
+ "gpt-4o": ModelConfig(
39
+ model_id="gpt-4o", provider="openai",
40
+ cost_per_1k_input=0.0025, cost_per_1k_output=0.01,
41
+ strength_tier=4, max_context=128000,
42
+ ),
43
+ "deepseek-chat": ModelConfig(
44
+ model_id="deepseek-chat", provider="deepseek",
45
+ cost_per_1k_input=0.00014, cost_per_1k_output=0.00028,
46
+ strength_tier=3, max_context=64000,
47
+ cache_discount_rate=0.5,
48
+ ),
49
+ },
50
+ routing_policy=RoutingPolicy("cascade"),
51
+ )
52
 
53
+ # 2. Initialize optimizer
54
+ optimizer = AgentCostOptimizer(config)
55
+
56
+ # 3. Before each agent step, call optimize()
57
+ request = "Write a Python function to reverse a linked list"
58
+ run_state = {
59
+ "trace_id": "run-001",
60
+ "planned_tools": [("file_read", {"path": "linked_list.py"})],
61
+ "previous_tool_calls": [],
62
+ "current_cost": 0.0,
63
+ "step_number": 1,
64
+ "total_steps": 3,
65
+ "is_irreversible": False,
66
+ "routing_mode": "cascade",
67
+ }
68
+
69
+ result = optimizer.optimize(request, run_state)
70
+
71
+ # 4. Use the decisions
72
+ print(f"Use model: {result.routing_decision.model_id}")
73
+ print(f"Max tokens: {result.routing_decision.max_tokens}")
74
+ print(f"Temperature: {result.routing_decision.temperature}")
75
+ print(f"Estimated cost: ${result.estimated_cost:.4f}")
76
+
77
+ # 5. After execution, record actual costs
78
+ optimizer.record_step(
79
+ trace_id=result.trace_id,
80
+ model_call=ModelCall(
81
+ model_id=result.routing_decision.model_id,
82
+ provider=result.routing_decision.provider,
83
+ input_tokens=2000,
84
+ output_tokens=800,
85
+ latency_ms=1200,
86
+ ),
87
+ tool_calls=[ToolCall(tool_name="file_read", tool_input={"path": "linked_list.py"},
88
+ tool_cost=0.001, tool_latency_ms=300)],
89
+ context_size_tokens=2500,
90
+ step_outcome=Outcome.SUCCESS,
91
  )
92
 
93
+ # 6. Finalize trace
94
+ optimizer.finalize_trace(result.trace_id, outcome=Outcome.SUCCESS)
 
 
95
  ```
96
 
97
  ## Configuration
98
 
99
+ ### Model Tiers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
+ | Tier | Typical Models | Cost | Strength | When to Use |
102
+ |------|---------------|------|----------|-------------|
103
+ | 1 | Local Qwen-0.5B, Phi-1 | Near-zero | 35% | Factual QA, simple extraction |
104
+ | 2 | GPT-4o-mini, Claude-3.5-Haiku, DeepSeek | $0.15/M tok | 55% | Drafting, classification, parsing |
105
+ | 3 | Claude-3.5-Sonnet, DeepSeek-V2 | $1.5-3/M tok | 80% | Coding, reasoning, research |
106
+ | 4 | GPT-4o, Claude-3-Opus | $2.5-5/M tok | 93% | Complex analysis, legal, creative |
107
+ | 5 | o1, o3-mini, specialist | $3-15/M tok | 97% | Math, safety-critical, adversarial |
108
 
109
+ ### Routing Modes
 
 
110
 
111
+ - **`cheapest`**: Always use lowest-cost model (dangerous, only for internal tools)
112
+ - **`strongest`**: Always use frontier (expensive, maximum quality)
113
+ - **`cascade`**: Try cheap first, escalate on low confidence
114
+ - **`risk_based`**: Route by predicted task risk
115
+ - **`adaptive`**: Learn from trace history
116
 
117
+ ## Integration Patterns
118
 
119
+ ### Pattern A: Front Proxy (Pre-Step)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  ```
121
+ User Request → ACO.optimize() → [Decisions] → Agent Harness → LLM API
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  ```
123
 
124
+ ### Pattern B: Around Wrapper (Pre + Post)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  ```
126
+ User Request → ACO.optimize() → Agent Step → ACO.record_step() → Next Step
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  ```
128
 
129
+ ### Pattern C: Inside Agent (Per-Step)
130
+ ```
131
+ Agent Loop:
132
+ if step == 0: ACO.optimize()
133
+ else: ACO.reassess() # mid-run adjustment
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  ```
135
 
136
+ ## Benchmarking Your Own Traces
 
 
 
 
137
 
138
  ```bash
139
+ # Generate benchmark
140
+ python -m aco.benchmark --tasks 1000 --output ./results
141
 
142
+ # Compare baselines
143
+ python -m aco.benchmark --compare always_frontier always_cheap cascade full_optimizer
144
 
145
+ # Run ablation study
146
+ python -m aco.benchmark --ablate all
147
  ```
148
 
149
+ ## Dashboard
150
 
151
  ```bash
152
+ # Launch Gradio dashboard
153
+ python dashboard.py --results ./eval_results_v2/baseline_results.json
154
  ```
155
 
156
+ ## Trackio Integration
 
 
157
 
158
  ```python
159
+ from aco.trackio_integration import ACOTrackioLogger
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
+ logger = ACOTrackioLogger(project="aco-production", space_id="your-space")
162
 
163
+ # Inside your agent loop
164
+ logger.log_decision(run_id, decision, cost, success)
165
+ logger.alert(run_id, "Cost spike", f"Step {step} cost ${cost:.3f}", "WARN")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  ```
167
 
168
+ ## Multi-Provider Setup
169
 
170
+ ```python
171
+ config = ACOConfig(
172
+ models={
173
+ "gpt-4o": ModelConfig(..., provider="openai", api_key_env="OPENAI_API_KEY"),
174
+ "claude-3.5-sonnet": ModelConfig(..., provider="anthropic", api_key_env="ANTHROPIC_API_KEY"),
175
+ "deepseek-chat": ModelConfig(..., provider="deepseek", api_key_env="DEEPSEEK_API_KEY"),
176
+ "local-qwen": ModelConfig(..., provider="local", base_url="http://localhost:8000/v1"),
177
+ }
178
+ )
179
+ ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
+ ## Safety Rules
182
+
183
+ 1. **Legal/regulated tasks never go below tier 4** without explicit override
184
+ 2. **Tool calls marked `requires_verification` always get a verifier**
185
+ 3. **Irreversible actions trigger automatic frontier escalation**
186
+ 4. **All routing decisions include reasoning strings for audit**
187
+ 5. **Doom detector stops runs where cost exceeds 3x estimate**
188
+
189
+ ## Performance Tuning
190
+
191
+ | Parameter | Default | Tune When... |
192
+ |-----------|---------|-------------|
193
+ | `doom_max_cost_ratio` | 3.0 | Runs often terminate too early |
194
+ | `doom_no_progress_steps` | 5 | Long-horizon tasks get killed |
195
+ | `verifier_confidence_threshold` | 0.7 | Too many/few verifiers |
196
+ | `max_context_fraction` | 0.8 | Context truncation issues |
197
+ | `cache_prefix_max_tokens` | 8000 | Cache hit rate low |
198
+
199
+ ## Monitoring
200
+
201
+ Track these metrics in production:
202
+ - Cost per successful task (primary)
203
+ - Cost per artifact (secondary)
204
+ - Task success rate by tier
205
+ - Cache hit rate
206
+ - Tool call efficiency (used vs called)
207
+ - Verifier pass rate
208
+ - Retry rate
209
+ - False-DONE rate
210
+ - Escalation rate
211
+ - Doom detector precision/recall