agent-cost-optimizer / config.yaml
narcolepticchicken's picture
Upload config.yaml
9a3f54b verified
# Agent Cost Optimizer Configuration
project_name: "agent-cost-optimizer"
trace_storage_path: "./traces"
# Model tiers: 1=tiny, 2=cheap, 3=medium, 4=frontier, 5=specialist, 6=verifier
models:
tiny_local:
model_id: "tiny_local"
provider: "local"
cost_per_1k_input: 0.0001
cost_per_1k_output: 0.0002
cost_per_1k_reasoning: 0.0
latency_ms_estimate: 200
strength_tier: 1
max_context: 32768
supports_tools: true
supports_reasoning: false
cache_discount_rate: 0.5
cheap_cloud:
model_id: "cheap_cloud"
provider: "cloud"
cost_per_1k_input: 0.0005
cost_per_1k_output: 0.001
cost_per_1k_reasoning: 0.0
latency_ms_estimate: 500
strength_tier: 2
max_context: 128000
supports_tools: true
supports_reasoning: false
cache_discount_rate: 0.5
medium:
model_id: "medium"
provider: "cloud"
cost_per_1k_input: 0.003
cost_per_1k_output: 0.006
cost_per_1k_reasoning: 0.0
latency_ms_estimate: 800
strength_tier: 3
max_context: 128000
supports_tools: true
supports_reasoning: false
cache_discount_rate: 0.5
frontier:
model_id: "frontier"
provider: "cloud"
cost_per_1k_input: 0.01
cost_per_1k_output: 0.03
cost_per_1k_reasoning: 0.01
latency_ms_estimate: 1500
strength_tier: 4
max_context: 200000
supports_tools: true
supports_reasoning: true
cache_discount_rate: 0.5
specialist:
model_id: "specialist"
provider: "cloud"
cost_per_1k_input: 0.015
cost_per_1k_output: 0.045
cost_per_1k_reasoning: 0.015
latency_ms_estimate: 2000
strength_tier: 5
max_context: 128000
supports_tools: true
supports_reasoning: true
cache_discount_rate: 0.5
tools:
search:
tool_name: "search"
cost_per_call: 0.002
latency_ms_estimate: 500
cacheable: false
requires_verification: false
max_retries: 3
retrieve:
tool_name: "retrieve"
cost_per_call: 0.001
latency_ms_estimate: 300
cacheable: true
requires_verification: false
max_retries: 3
code_execution:
tool_name: "code_execution"
cost_per_call: 0.005
latency_ms_estimate: 1000
cacheable: false
requires_verification: true
max_retries: 2
linter:
tool_name: "linter"
cost_per_call: 0.001
latency_ms_estimate: 200
cacheable: false
requires_verification: false
max_retries: 1
file_read:
tool_name: "file_read"
cost_per_call: 0.0005
latency_ms_estimate: 100
cacheable: true
requires_verification: false
max_retries: 3
compliance_check:
tool_name: "compliance_check"
cost_per_call: 0.01
latency_ms_estimate: 1500
cacheable: false
requires_verification: true
max_retries: 2
verifiers:
verifier_medium:
verifier_model_id: "verifier_medium"
cost_per_call: 0.005
latency_ms_estimate: 800
confidence_threshold: 0.8
# Routing policy
routing_policy:
name: "default"
type: "cascade"
threshold_confidence: 0.7
max_cascade_depth: 3
enable_verifier_fallback: true
enable_escalation: true
# Cost weights
model_cost_weight: 1.0
tool_cost_weight: 1.0
verifier_cost_weight: 1.0
latency_weight: 0.1
retry_penalty_weight: 2.0
false_done_penalty: 10.0
unsafe_cheap_model_penalty: 20.0
missed_escalation_penalty: 15.0
# Module toggles
enable_telemetry: true
enable_classifier: true
enable_router: true
enable_context_budgeter: true
enable_cache_layout: true
enable_tool_gate: true
enable_verifier_budgeter: true
enable_retry_optimizer: true
enable_meta_tool_miner: true
enable_early_termination: true
# Cache-aware layout
cache_prefix_stable:
- system_rules
- tool_descriptions
- user_preferences
cache_suffix_dynamic:
- user_message
- retrieved_docs
- recent_trace
- artifacts
# Early termination
doom_max_cost_ratio: 3.0
doom_max_retries: 3
doom_no_progress_steps: 5
doom_verifier_disagreement_threshold: 2
# Meta-tool mining
meta_tool_min_frequency: 5
meta_tool_min_success_rate: 0.8