| |
|
|
| project_name: "agent-cost-optimizer" |
| trace_storage_path: "./traces" |
|
|
| |
| models: |
| tiny_local: |
| model_id: "tiny_local" |
| provider: "local" |
| cost_per_1k_input: 0.0001 |
| cost_per_1k_output: 0.0002 |
| cost_per_1k_reasoning: 0.0 |
| latency_ms_estimate: 200 |
| strength_tier: 1 |
| max_context: 32768 |
| supports_tools: true |
| supports_reasoning: false |
| cache_discount_rate: 0.5 |
| |
| cheap_cloud: |
| model_id: "cheap_cloud" |
| provider: "cloud" |
| cost_per_1k_input: 0.0005 |
| cost_per_1k_output: 0.001 |
| cost_per_1k_reasoning: 0.0 |
| latency_ms_estimate: 500 |
| strength_tier: 2 |
| max_context: 128000 |
| supports_tools: true |
| supports_reasoning: false |
| cache_discount_rate: 0.5 |
| |
| medium: |
| model_id: "medium" |
| provider: "cloud" |
| cost_per_1k_input: 0.003 |
| cost_per_1k_output: 0.006 |
| cost_per_1k_reasoning: 0.0 |
| latency_ms_estimate: 800 |
| strength_tier: 3 |
| max_context: 128000 |
| supports_tools: true |
| supports_reasoning: false |
| cache_discount_rate: 0.5 |
| |
| frontier: |
| model_id: "frontier" |
| provider: "cloud" |
| cost_per_1k_input: 0.01 |
| cost_per_1k_output: 0.03 |
| cost_per_1k_reasoning: 0.01 |
| latency_ms_estimate: 1500 |
| strength_tier: 4 |
| max_context: 200000 |
| supports_tools: true |
| supports_reasoning: true |
| cache_discount_rate: 0.5 |
| |
| specialist: |
| model_id: "specialist" |
| provider: "cloud" |
| cost_per_1k_input: 0.015 |
| cost_per_1k_output: 0.045 |
| cost_per_1k_reasoning: 0.015 |
| latency_ms_estimate: 2000 |
| strength_tier: 5 |
| max_context: 128000 |
| supports_tools: true |
| supports_reasoning: true |
| cache_discount_rate: 0.5 |
|
|
| tools: |
| search: |
| tool_name: "search" |
| cost_per_call: 0.002 |
| latency_ms_estimate: 500 |
| cacheable: false |
| requires_verification: false |
| max_retries: 3 |
| |
| retrieve: |
| tool_name: "retrieve" |
| cost_per_call: 0.001 |
| latency_ms_estimate: 300 |
| cacheable: true |
| requires_verification: false |
| max_retries: 3 |
| |
| code_execution: |
| tool_name: "code_execution" |
| cost_per_call: 0.005 |
| latency_ms_estimate: 1000 |
| cacheable: false |
| requires_verification: true |
| max_retries: 2 |
| |
| linter: |
| tool_name: "linter" |
| cost_per_call: 0.001 |
| latency_ms_estimate: 200 |
| cacheable: false |
| requires_verification: false |
| max_retries: 1 |
| |
| file_read: |
| tool_name: "file_read" |
| cost_per_call: 0.0005 |
| latency_ms_estimate: 100 |
| cacheable: true |
| requires_verification: false |
| max_retries: 3 |
| |
| compliance_check: |
| tool_name: "compliance_check" |
| cost_per_call: 0.01 |
| latency_ms_estimate: 1500 |
| cacheable: false |
| requires_verification: true |
| max_retries: 2 |
|
|
| verifiers: |
| verifier_medium: |
| verifier_model_id: "verifier_medium" |
| cost_per_call: 0.005 |
| latency_ms_estimate: 800 |
| confidence_threshold: 0.8 |
|
|
| |
| routing_policy: |
| name: "default" |
| type: "cascade" |
| threshold_confidence: 0.7 |
| max_cascade_depth: 3 |
| enable_verifier_fallback: true |
| enable_escalation: true |
|
|
| |
| model_cost_weight: 1.0 |
| tool_cost_weight: 1.0 |
| verifier_cost_weight: 1.0 |
| latency_weight: 0.1 |
| retry_penalty_weight: 2.0 |
| false_done_penalty: 10.0 |
| unsafe_cheap_model_penalty: 20.0 |
| missed_escalation_penalty: 15.0 |
|
|
| |
| enable_telemetry: true |
| enable_classifier: true |
| enable_router: true |
| enable_context_budgeter: true |
| enable_cache_layout: true |
| enable_tool_gate: true |
| enable_verifier_budgeter: true |
| enable_retry_optimizer: true |
| enable_meta_tool_miner: true |
| enable_early_termination: true |
|
|
| |
| cache_prefix_stable: |
| - system_rules |
| - tool_descriptions |
| - user_preferences |
| cache_suffix_dynamic: |
| - user_message |
| - retrieved_docs |
| - recent_trace |
| - artifacts |
|
|
| |
| doom_max_cost_ratio: 3.0 |
| doom_max_retries: 3 |
| doom_no_progress_steps: 5 |
| doom_verifier_disagreement_threshold: 2 |
|
|
| |
| meta_tool_min_frequency: 5 |
| meta_tool_min_success_rate: 0.8 |
|
|