narcolepticchicken
/

agent-cost-optimizer

Model card Files Files and versions

agent-cost-optimizer / config.yaml

narcolepticchicken's picture

narcolepticchicken

Upload config.yaml

9a3f54b verified 1 day ago

history blame contribute delete

4.03 kB

	# Agent Cost Optimizer Configuration

	project_name: "agent-cost-optimizer"
	trace_storage_path: "./traces"

	# Model tiers: 1=tiny, 2=cheap, 3=medium, 4=frontier, 5=specialist, 6=verifier
	models:
	tiny_local:
	model_id: "tiny_local"
	provider: "local"
	cost_per_1k_input: 0.0001
	cost_per_1k_output: 0.0002
	cost_per_1k_reasoning: 0.0
	latency_ms_estimate: 200
	strength_tier: 1
	max_context: 32768
	supports_tools: true
	supports_reasoning: false
	cache_discount_rate: 0.5

	cheap_cloud:
	model_id: "cheap_cloud"
	provider: "cloud"
	cost_per_1k_input: 0.0005
	cost_per_1k_output: 0.001
	cost_per_1k_reasoning: 0.0
	latency_ms_estimate: 500
	strength_tier: 2
	max_context: 128000
	supports_tools: true
	supports_reasoning: false
	cache_discount_rate: 0.5

	medium:
	model_id: "medium"
	provider: "cloud"
	cost_per_1k_input: 0.003
	cost_per_1k_output: 0.006
	cost_per_1k_reasoning: 0.0
	latency_ms_estimate: 800
	strength_tier: 3
	max_context: 128000
	supports_tools: true
	supports_reasoning: false
	cache_discount_rate: 0.5

	frontier:
	model_id: "frontier"
	provider: "cloud"
	cost_per_1k_input: 0.01
	cost_per_1k_output: 0.03
	cost_per_1k_reasoning: 0.01
	latency_ms_estimate: 1500
	strength_tier: 4
	max_context: 200000
	supports_tools: true
	supports_reasoning: true
	cache_discount_rate: 0.5

	specialist:
	model_id: "specialist"
	provider: "cloud"
	cost_per_1k_input: 0.015
	cost_per_1k_output: 0.045
	cost_per_1k_reasoning: 0.015
	latency_ms_estimate: 2000
	strength_tier: 5
	max_context: 128000
	supports_tools: true
	supports_reasoning: true
	cache_discount_rate: 0.5

	tools:
	search:
	tool_name: "search"
	cost_per_call: 0.002
	latency_ms_estimate: 500
	cacheable: false
	requires_verification: false
	max_retries: 3

	retrieve:
	tool_name: "retrieve"
	cost_per_call: 0.001
	latency_ms_estimate: 300
	cacheable: true
	requires_verification: false
	max_retries: 3

	code_execution:
	tool_name: "code_execution"
	cost_per_call: 0.005
	latency_ms_estimate: 1000
	cacheable: false
	requires_verification: true
	max_retries: 2

	linter:
	tool_name: "linter"
	cost_per_call: 0.001
	latency_ms_estimate: 200
	cacheable: false
	requires_verification: false
	max_retries: 1

	file_read:
	tool_name: "file_read"
	cost_per_call: 0.0005
	latency_ms_estimate: 100
	cacheable: true
	requires_verification: false
	max_retries: 3

	compliance_check:
	tool_name: "compliance_check"
	cost_per_call: 0.01
	latency_ms_estimate: 1500
	cacheable: false
	requires_verification: true
	max_retries: 2

	verifiers:
	verifier_medium:
	verifier_model_id: "verifier_medium"
	cost_per_call: 0.005
	latency_ms_estimate: 800
	confidence_threshold: 0.8

	# Routing policy
	routing_policy:
	name: "default"
	type: "cascade"
	threshold_confidence: 0.7
	max_cascade_depth: 3
	enable_verifier_fallback: true
	enable_escalation: true

	# Cost weights
	model_cost_weight: 1.0
	tool_cost_weight: 1.0
	verifier_cost_weight: 1.0
	latency_weight: 0.1
	retry_penalty_weight: 2.0
	false_done_penalty: 10.0
	unsafe_cheap_model_penalty: 20.0
	missed_escalation_penalty: 15.0

	# Module toggles
	enable_telemetry: true
	enable_classifier: true
	enable_router: true
	enable_context_budgeter: true
	enable_cache_layout: true
	enable_tool_gate: true
	enable_verifier_budgeter: true
	enable_retry_optimizer: true
	enable_meta_tool_miner: true
	enable_early_termination: true

	# Cache-aware layout
	cache_prefix_stable:
	- system_rules
	- tool_descriptions
	- user_preferences
	cache_suffix_dynamic:
	- user_message
	- retrieved_docs
	- recent_trace
	- artifacts

	# Early termination
	doom_max_cost_ratio: 3.0
	doom_max_retries: 3
	doom_no_progress_steps: 5
	doom_verifier_disagreement_threshold: 2

	# Meta-tool mining
	meta_tool_min_frequency: 5
	meta_tool_min_success_rate: 0.8