File size: 4,033 Bytes

9a3f54b

# Agent Cost Optimizer Configuration

project_name: "agent-cost-optimizer"
trace_storage_path: "./traces"

# Model tiers: 1=tiny, 2=cheap, 3=medium, 4=frontier, 5=specialist, 6=verifier
models:
  tiny_local:
    model_id: "tiny_local"
    provider: "local"
    cost_per_1k_input: 0.0001
    cost_per_1k_output: 0.0002
    cost_per_1k_reasoning: 0.0
    latency_ms_estimate: 200
    strength_tier: 1
    max_context: 32768
    supports_tools: true
    supports_reasoning: false
    cache_discount_rate: 0.5
  
  cheap_cloud:
    model_id: "cheap_cloud"
    provider: "cloud"
    cost_per_1k_input: 0.0005
    cost_per_1k_output: 0.001
    cost_per_1k_reasoning: 0.0
    latency_ms_estimate: 500
    strength_tier: 2
    max_context: 128000
    supports_tools: true
    supports_reasoning: false
    cache_discount_rate: 0.5
  
  medium:
    model_id: "medium"
    provider: "cloud"
    cost_per_1k_input: 0.003
    cost_per_1k_output: 0.006
    cost_per_1k_reasoning: 0.0
    latency_ms_estimate: 800
    strength_tier: 3
    max_context: 128000
    supports_tools: true
    supports_reasoning: false
    cache_discount_rate: 0.5
  
  frontier:
    model_id: "frontier"
    provider: "cloud"
    cost_per_1k_input: 0.01
    cost_per_1k_output: 0.03
    cost_per_1k_reasoning: 0.01
    latency_ms_estimate: 1500
    strength_tier: 4
    max_context: 200000
    supports_tools: true
    supports_reasoning: true
    cache_discount_rate: 0.5
  
  specialist:
    model_id: "specialist"
    provider: "cloud"
    cost_per_1k_input: 0.015
    cost_per_1k_output: 0.045
    cost_per_1k_reasoning: 0.015
    latency_ms_estimate: 2000
    strength_tier: 5
    max_context: 128000
    supports_tools: true
    supports_reasoning: true
    cache_discount_rate: 0.5

tools:
  search:
    tool_name: "search"
    cost_per_call: 0.002
    latency_ms_estimate: 500
    cacheable: false
    requires_verification: false
    max_retries: 3
  
  retrieve:
    tool_name: "retrieve"
    cost_per_call: 0.001
    latency_ms_estimate: 300
    cacheable: true
    requires_verification: false
    max_retries: 3
  
  code_execution:
    tool_name: "code_execution"
    cost_per_call: 0.005
    latency_ms_estimate: 1000
    cacheable: false
    requires_verification: true
    max_retries: 2
  
  linter:
    tool_name: "linter"
    cost_per_call: 0.001
    latency_ms_estimate: 200
    cacheable: false
    requires_verification: false
    max_retries: 1
  
  file_read:
    tool_name: "file_read"
    cost_per_call: 0.0005
    latency_ms_estimate: 100
    cacheable: true
    requires_verification: false
    max_retries: 3
  
  compliance_check:
    tool_name: "compliance_check"
    cost_per_call: 0.01
    latency_ms_estimate: 1500
    cacheable: false
    requires_verification: true
    max_retries: 2

verifiers:
  verifier_medium:
    verifier_model_id: "verifier_medium"
    cost_per_call: 0.005
    latency_ms_estimate: 800
    confidence_threshold: 0.8

# Routing policy
routing_policy:
  name: "default"
  type: "cascade"
  threshold_confidence: 0.7
  max_cascade_depth: 3
  enable_verifier_fallback: true
  enable_escalation: true

# Cost weights
model_cost_weight: 1.0
tool_cost_weight: 1.0
verifier_cost_weight: 1.0
latency_weight: 0.1
retry_penalty_weight: 2.0
false_done_penalty: 10.0
unsafe_cheap_model_penalty: 20.0
missed_escalation_penalty: 15.0

# Module toggles
enable_telemetry: true
enable_classifier: true
enable_router: true
enable_context_budgeter: true
enable_cache_layout: true
enable_tool_gate: true
enable_verifier_budgeter: true
enable_retry_optimizer: true
enable_meta_tool_miner: true
enable_early_termination: true

# Cache-aware layout
cache_prefix_stable:
  - system_rules
  - tool_descriptions
  - user_preferences
cache_suffix_dynamic:
  - user_message
  - retrieved_docs
  - recent_trace
  - artifacts

# Early termination
doom_max_cost_ratio: 3.0
doom_max_retries: 3
doom_no_progress_steps: 5
doom_verifier_disagreement_threshold: 2

# Meta-tool mining
meta_tool_min_frequency: 5
meta_tool_min_success_rate: 0.8