narcolepticchicken
/

agent-cost-optimizer

Safetensors

Model card Files Files and versions

xet

Community

narcolepticchicken commited on 1 day ago

Commit

9a3f54b

verified ·

1 Parent(s): 7d1a411

Upload config.yaml

Browse files

Files changed (1) hide show

config.yaml +179 -0

config.yaml ADDED Viewed

	@@ -0,0 +1,179 @@

+# Agent Cost Optimizer Configuration
+project_name: "agent-cost-optimizer"
+trace_storage_path: "./traces"
+# Model tiers: 1=tiny, 2=cheap, 3=medium, 4=frontier, 5=specialist, 6=verifier
+models:
+  tiny_local:
+    model_id: "tiny_local"
+    provider: "local"
+    cost_per_1k_input: 0.0001
+    cost_per_1k_output: 0.0002
+    cost_per_1k_reasoning: 0.0
+    latency_ms_estimate: 200
+    strength_tier: 1
+    max_context: 32768
+    supports_tools: true
+    supports_reasoning: false
+    cache_discount_rate: 0.5
+  cheap_cloud:
+    model_id: "cheap_cloud"
+    provider: "cloud"
+    cost_per_1k_input: 0.0005
+    cost_per_1k_output: 0.001
+    cost_per_1k_reasoning: 0.0
+    latency_ms_estimate: 500
+    strength_tier: 2
+    max_context: 128000
+    supports_tools: true
+    supports_reasoning: false
+    cache_discount_rate: 0.5
+  medium:
+    model_id: "medium"
+    provider: "cloud"
+    cost_per_1k_input: 0.003
+    cost_per_1k_output: 0.006
+    cost_per_1k_reasoning: 0.0
+    latency_ms_estimate: 800
+    strength_tier: 3
+    max_context: 128000
+    supports_tools: true
+    supports_reasoning: false
+    cache_discount_rate: 0.5
+  frontier:
+    model_id: "frontier"
+    provider: "cloud"
+    cost_per_1k_input: 0.01
+    cost_per_1k_output: 0.03
+    cost_per_1k_reasoning: 0.01
+    latency_ms_estimate: 1500
+    strength_tier: 4
+    max_context: 200000
+    supports_tools: true
+    supports_reasoning: true
+    cache_discount_rate: 0.5
+  specialist:
+    model_id: "specialist"
+    provider: "cloud"
+    cost_per_1k_input: 0.015
+    cost_per_1k_output: 0.045
+    cost_per_1k_reasoning: 0.015
+    latency_ms_estimate: 2000
+    strength_tier: 5
+    max_context: 128000
+    supports_tools: true
+    supports_reasoning: true
+    cache_discount_rate: 0.5
+tools:
+  search:
+    tool_name: "search"
+    cost_per_call: 0.002
+    latency_ms_estimate: 500
+    cacheable: false
+    requires_verification: false
+    max_retries: 3
+  retrieve:
+    tool_name: "retrieve"
+    cost_per_call: 0.001
+    latency_ms_estimate: 300
+    cacheable: true
+    requires_verification: false
+    max_retries: 3
+  code_execution:
+    tool_name: "code_execution"
+    cost_per_call: 0.005
+    latency_ms_estimate: 1000
+    cacheable: false
+    requires_verification: true
+    max_retries: 2
+  linter:
+    tool_name: "linter"
+    cost_per_call: 0.001
+    latency_ms_estimate: 200
+    cacheable: false
+    requires_verification: false
+    max_retries: 1
+  file_read:
+    tool_name: "file_read"
+    cost_per_call: 0.0005
+    latency_ms_estimate: 100
+    cacheable: true
+    requires_verification: false
+    max_retries: 3
+  compliance_check:
+    tool_name: "compliance_check"
+    cost_per_call: 0.01
+    latency_ms_estimate: 1500
+    cacheable: false
+    requires_verification: true
+    max_retries: 2
+verifiers:
+  verifier_medium:
+    verifier_model_id: "verifier_medium"
+    cost_per_call: 0.005
+    latency_ms_estimate: 800
+    confidence_threshold: 0.8
+# Routing policy
+routing_policy:
+  name: "default"
+  type: "cascade"
+  threshold_confidence: 0.7
+  max_cascade_depth: 3
+  enable_verifier_fallback: true
+  enable_escalation: true
+# Cost weights
+model_cost_weight: 1.0
+tool_cost_weight: 1.0
+verifier_cost_weight: 1.0
+latency_weight: 0.1
+retry_penalty_weight: 2.0
+false_done_penalty: 10.0
+unsafe_cheap_model_penalty: 20.0
+missed_escalation_penalty: 15.0
+# Module toggles
+enable_telemetry: true
+enable_classifier: true
+enable_router: true
+enable_context_budgeter: true
+enable_cache_layout: true
+enable_tool_gate: true
+enable_verifier_budgeter: true
+enable_retry_optimizer: true
+enable_meta_tool_miner: true
+enable_early_termination: true
+# Cache-aware layout
+cache_prefix_stable:
+  - system_rules
+  - tool_descriptions
+  - user_preferences
+cache_suffix_dynamic:
+  - user_message
+  - retrieved_docs
+  - recent_trace
+  - artifacts
+# Early termination
+doom_max_cost_ratio: 3.0
+doom_max_retries: 3
+doom_no_progress_steps: 5
+doom_verifier_disagreement_threshold: 2
+# Meta-tool mining
+meta_tool_min_frequency: 5
+meta_tool_min_success_rate: 0.8