File size: 4,033 Bytes
9a3f54b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 | # Agent Cost Optimizer Configuration
project_name: "agent-cost-optimizer"
trace_storage_path: "./traces"
# Model tiers: 1=tiny, 2=cheap, 3=medium, 4=frontier, 5=specialist, 6=verifier
models:
tiny_local:
model_id: "tiny_local"
provider: "local"
cost_per_1k_input: 0.0001
cost_per_1k_output: 0.0002
cost_per_1k_reasoning: 0.0
latency_ms_estimate: 200
strength_tier: 1
max_context: 32768
supports_tools: true
supports_reasoning: false
cache_discount_rate: 0.5
cheap_cloud:
model_id: "cheap_cloud"
provider: "cloud"
cost_per_1k_input: 0.0005
cost_per_1k_output: 0.001
cost_per_1k_reasoning: 0.0
latency_ms_estimate: 500
strength_tier: 2
max_context: 128000
supports_tools: true
supports_reasoning: false
cache_discount_rate: 0.5
medium:
model_id: "medium"
provider: "cloud"
cost_per_1k_input: 0.003
cost_per_1k_output: 0.006
cost_per_1k_reasoning: 0.0
latency_ms_estimate: 800
strength_tier: 3
max_context: 128000
supports_tools: true
supports_reasoning: false
cache_discount_rate: 0.5
frontier:
model_id: "frontier"
provider: "cloud"
cost_per_1k_input: 0.01
cost_per_1k_output: 0.03
cost_per_1k_reasoning: 0.01
latency_ms_estimate: 1500
strength_tier: 4
max_context: 200000
supports_tools: true
supports_reasoning: true
cache_discount_rate: 0.5
specialist:
model_id: "specialist"
provider: "cloud"
cost_per_1k_input: 0.015
cost_per_1k_output: 0.045
cost_per_1k_reasoning: 0.015
latency_ms_estimate: 2000
strength_tier: 5
max_context: 128000
supports_tools: true
supports_reasoning: true
cache_discount_rate: 0.5
tools:
search:
tool_name: "search"
cost_per_call: 0.002
latency_ms_estimate: 500
cacheable: false
requires_verification: false
max_retries: 3
retrieve:
tool_name: "retrieve"
cost_per_call: 0.001
latency_ms_estimate: 300
cacheable: true
requires_verification: false
max_retries: 3
code_execution:
tool_name: "code_execution"
cost_per_call: 0.005
latency_ms_estimate: 1000
cacheable: false
requires_verification: true
max_retries: 2
linter:
tool_name: "linter"
cost_per_call: 0.001
latency_ms_estimate: 200
cacheable: false
requires_verification: false
max_retries: 1
file_read:
tool_name: "file_read"
cost_per_call: 0.0005
latency_ms_estimate: 100
cacheable: true
requires_verification: false
max_retries: 3
compliance_check:
tool_name: "compliance_check"
cost_per_call: 0.01
latency_ms_estimate: 1500
cacheable: false
requires_verification: true
max_retries: 2
verifiers:
verifier_medium:
verifier_model_id: "verifier_medium"
cost_per_call: 0.005
latency_ms_estimate: 800
confidence_threshold: 0.8
# Routing policy
routing_policy:
name: "default"
type: "cascade"
threshold_confidence: 0.7
max_cascade_depth: 3
enable_verifier_fallback: true
enable_escalation: true
# Cost weights
model_cost_weight: 1.0
tool_cost_weight: 1.0
verifier_cost_weight: 1.0
latency_weight: 0.1
retry_penalty_weight: 2.0
false_done_penalty: 10.0
unsafe_cheap_model_penalty: 20.0
missed_escalation_penalty: 15.0
# Module toggles
enable_telemetry: true
enable_classifier: true
enable_router: true
enable_context_budgeter: true
enable_cache_layout: true
enable_tool_gate: true
enable_verifier_budgeter: true
enable_retry_optimizer: true
enable_meta_tool_miner: true
enable_early_termination: true
# Cache-aware layout
cache_prefix_stable:
- system_rules
- tool_descriptions
- user_preferences
cache_suffix_dynamic:
- user_message
- retrieved_docs
- recent_trace
- artifacts
# Early termination
doom_max_cost_ratio: 3.0
doom_max_retries: 3
doom_no_progress_steps: 5
doom_verifier_disagreement_threshold: 2
# Meta-tool mining
meta_tool_min_frequency: 5
meta_tool_min_success_rate: 0.8
|