# Agent Cost Optimizer Configuration project_name: "agent-cost-optimizer" trace_storage_path: "./traces" # Model tiers: 1=tiny, 2=cheap, 3=medium, 4=frontier, 5=specialist, 6=verifier models: tiny_local: model_id: "tiny_local" provider: "local" cost_per_1k_input: 0.0001 cost_per_1k_output: 0.0002 cost_per_1k_reasoning: 0.0 latency_ms_estimate: 200 strength_tier: 1 max_context: 32768 supports_tools: true supports_reasoning: false cache_discount_rate: 0.5 cheap_cloud: model_id: "cheap_cloud" provider: "cloud" cost_per_1k_input: 0.0005 cost_per_1k_output: 0.001 cost_per_1k_reasoning: 0.0 latency_ms_estimate: 500 strength_tier: 2 max_context: 128000 supports_tools: true supports_reasoning: false cache_discount_rate: 0.5 medium: model_id: "medium" provider: "cloud" cost_per_1k_input: 0.003 cost_per_1k_output: 0.006 cost_per_1k_reasoning: 0.0 latency_ms_estimate: 800 strength_tier: 3 max_context: 128000 supports_tools: true supports_reasoning: false cache_discount_rate: 0.5 frontier: model_id: "frontier" provider: "cloud" cost_per_1k_input: 0.01 cost_per_1k_output: 0.03 cost_per_1k_reasoning: 0.01 latency_ms_estimate: 1500 strength_tier: 4 max_context: 200000 supports_tools: true supports_reasoning: true cache_discount_rate: 0.5 specialist: model_id: "specialist" provider: "cloud" cost_per_1k_input: 0.015 cost_per_1k_output: 0.045 cost_per_1k_reasoning: 0.015 latency_ms_estimate: 2000 strength_tier: 5 max_context: 128000 supports_tools: true supports_reasoning: true cache_discount_rate: 0.5 tools: search: tool_name: "search" cost_per_call: 0.002 latency_ms_estimate: 500 cacheable: false requires_verification: false max_retries: 3 retrieve: tool_name: "retrieve" cost_per_call: 0.001 latency_ms_estimate: 300 cacheable: true requires_verification: false max_retries: 3 code_execution: tool_name: "code_execution" cost_per_call: 0.005 latency_ms_estimate: 1000 cacheable: false requires_verification: true max_retries: 2 linter: tool_name: "linter" cost_per_call: 0.001 latency_ms_estimate: 200 cacheable: false requires_verification: false max_retries: 1 file_read: tool_name: "file_read" cost_per_call: 0.0005 latency_ms_estimate: 100 cacheable: true requires_verification: false max_retries: 3 compliance_check: tool_name: "compliance_check" cost_per_call: 0.01 latency_ms_estimate: 1500 cacheable: false requires_verification: true max_retries: 2 verifiers: verifier_medium: verifier_model_id: "verifier_medium" cost_per_call: 0.005 latency_ms_estimate: 800 confidence_threshold: 0.8 # Routing policy routing_policy: name: "default" type: "cascade" threshold_confidence: 0.7 max_cascade_depth: 3 enable_verifier_fallback: true enable_escalation: true # Cost weights model_cost_weight: 1.0 tool_cost_weight: 1.0 verifier_cost_weight: 1.0 latency_weight: 0.1 retry_penalty_weight: 2.0 false_done_penalty: 10.0 unsafe_cheap_model_penalty: 20.0 missed_escalation_penalty: 15.0 # Module toggles enable_telemetry: true enable_classifier: true enable_router: true enable_context_budgeter: true enable_cache_layout: true enable_tool_gate: true enable_verifier_budgeter: true enable_retry_optimizer: true enable_meta_tool_miner: true enable_early_termination: true # Cache-aware layout cache_prefix_stable: - system_rules - tool_descriptions - user_preferences cache_suffix_dynamic: - user_message - retrieved_docs - recent_trace - artifacts # Early termination doom_max_cost_ratio: 3.0 doom_max_retries: 3 doom_no_progress_steps: 5 doom_verifier_disagreement_threshold: 2 # Meta-tool mining meta_tool_min_frequency: 5 meta_tool_min_success_rate: 0.8