""" Concrete Agent Strategies for the CGAE Economy Testbed. Five agent archetypes designed to test different aspects of the CGAE theorems: 1. Conservative: High robustness, low capability -> tests Theorem 1 (bounded exposure) 2. Aggressive: High capability, low robustness -> tests incentive structure (stuck at low tiers) 3. Balanced: Moderate both -> baseline reference 4. Adaptive: Invests in weakest dimension -> tests Theorem 2 (incentive compatibility) 5. Cheater: Attempts tier-laundering -> tests Proposition 2 (collusion resistance) """ from __future__ import annotations import random from typing import Any, Optional from cgae_engine.gate import RobustnessVector, Tier from cgae_engine.contracts import CGAEContract from agents.base import BaseAgent, AgentStrategy, AgentDecision class ConservativeAgent(BaseAgent): """ High robustness, low capability. Plays it safe. Strategy: - Only bids on contracts well within its tier - Prefers low-risk, low-reward contracts - Maintains high balance by avoiding risky contracts - Never invests in capability, focuses on maintaining robustness Tests: Theorem 1 (bounded exposure) - should have low, stable exposure Expected: Survives long but earns less than optimal """ def __init__(self, name: str = "conservative", **kwargs): robustness = kwargs.pop("true_robustness", RobustnessVector( cc=0.85, er=0.80, as_=0.75, ih=0.90 )) super().__init__( name=name, strategy=AgentStrategy.CONSERVATIVE, true_robustness=robustness, capability=kwargs.pop("capability", 0.65), **kwargs, ) def decide(self, available_contracts, current_tier, balance, current_exposure, budget_ceiling): # Only bid if we have plenty of budget headroom safe_exposure_limit = budget_ceiling * 0.5 eligible = [ c for c in available_contracts if c.penalty + current_exposure <= safe_exposure_limit and c.difficulty < 0.5 # Only easy tasks and c.reward > 0 # Must have positive reward ] if not eligible: return AgentDecision(action="idle") # Pick the safest (lowest penalty) contract best = min(eligible, key=lambda c: c.penalty) return AgentDecision(action="bid", contract_id=best.contract_id) def execute_task(self, contract): # Conservative agents are careful but slow. Success depends on capability + low difficulty. success_prob = self.task_success_probability(contract) return random.random() < success_prob class AggressiveAgent(BaseAgent): """ High capability, low robustness. Pushes boundaries. Strategy: - Bids on the highest-reward contracts available - Accepts more risk (higher exposure ratio) - Doesn't invest in robustness (capability-focused) - Gets stuck at low tiers due to robustness gating Tests: Theorem 2 (incentive compatibility) - demonstrates that capability alone doesn't unlock higher tiers. Should underperform Adaptive agent. Expected: High short-term earnings at T1, but can't access T2+ contracts """ def __init__(self, name: str = "aggressive", **kwargs): robustness = kwargs.pop("true_robustness", RobustnessVector( cc=0.35, er=0.40, as_=0.30, ih=0.70 )) super().__init__( name=name, strategy=AgentStrategy.AGGRESSIVE, true_robustness=robustness, capability=kwargs.pop("capability", 0.85), **kwargs, ) def decide(self, available_contracts, current_tier, balance, current_exposure, budget_ceiling): eligible = [ c for c in available_contracts if c.penalty + current_exposure <= budget_ceiling ] if not eligible: return AgentDecision(action="idle") # Pick the highest-reward contract best = max(eligible, key=lambda c: c.reward) return AgentDecision(action="bid", contract_id=best.contract_id) def execute_task(self, contract): # Aggressive agents have high capability, so they succeed more often success_prob = self.task_success_probability(contract) # Bonus for high capability success_prob = min(0.95, success_prob * 1.3) return random.random() < success_prob class BalancedAgent(BaseAgent): """ Moderate robustness and capability. The baseline. Strategy: - Evaluates contracts by expected value (reward * success_prob - penalty * fail_prob) - Maintains moderate exposure - Occasionally invests in robustness when near a tier threshold Tests: Provides baseline for comparing other strategies Expected: Moderate performance across all metrics """ def __init__(self, name: str = "balanced", **kwargs): robustness = kwargs.pop("true_robustness", RobustnessVector( cc=0.60, er=0.55, as_=0.50, ih=0.80 )) super().__init__( name=name, strategy=AgentStrategy.BALANCED, true_robustness=robustness, capability=kwargs.pop("capability", 0.6), **kwargs, ) def decide(self, available_contracts, current_tier, balance, current_exposure, budget_ceiling): eligible = [ c for c in available_contracts if c.penalty + current_exposure <= budget_ceiling * 0.8 ] if not eligible: return AgentDecision(action="idle") # Pick by expected value def ev(c): p = self.task_success_probability(c) return c.reward * p - c.penalty * (1 - p) best = max(eligible, key=ev) if ev(best) > 0: return AgentDecision(action="bid", contract_id=best.contract_id) return AgentDecision(action="idle") def execute_task(self, contract): success_prob = self.task_success_probability(contract) return random.random() < success_prob class AdaptiveAgent(BaseAgent): """ Strategically invests in its weakest robustness dimension. Strategy: - Identifies binding dimension (what's keeping it at current tier) - Allocates a fraction of earnings to robustness investment - Targets the weakest dimension specifically (Theorem 2 behavior) - Gradually unlocks higher tiers over time Tests: Theorem 2 (incentive compatibility) - this agent should demonstrate the predicted behavior where rational agents invest in robustness. Expected: Starts slow, accelerates as it unlocks higher tiers. This is the agent that should win long-run. """ def __init__(self, name: str = "adaptive", **kwargs): robustness = kwargs.pop("true_robustness", RobustnessVector( cc=0.55, er=0.50, as_=0.45, ih=0.80 )) super().__init__( name=name, strategy=AgentStrategy.ADAPTIVE, true_robustness=robustness, capability=kwargs.pop("capability", 0.6), **kwargs, ) self.investment_fraction = 0.15 # Spend 15% of earnings on robustness self._accumulated_investment = 0.0 def decide(self, available_contracts, current_tier, balance, current_exposure, budget_ceiling): # Should we invest in robustness this step? # Only invest when we have sufficient capital buffer if self._accumulated_investment >= 0.03 and balance > 0.15: weakest_dim = self._weakest_dimension() investment = min(self._accumulated_investment, 0.03) self._accumulated_investment -= investment return AgentDecision( action="invest_robustness", investment_dimension=weakest_dim, investment_amount=investment * 0.5, # Diminishing returns ) # Otherwise, bid on contracts eligible = [ c for c in available_contracts if c.penalty + current_exposure <= budget_ceiling * 0.7 ] if not eligible: return AgentDecision(action="idle") # Pick by expected value, with preference for higher-tier contracts def ev(c): p = self.task_success_probability(c) return (c.reward * p - c.penalty * (1 - p)) * (1 + c.min_tier.value * 0.1) best = max(eligible, key=ev) if ev(best) > 0: return AgentDecision(action="bid", contract_id=best.contract_id) return AgentDecision(action="idle") def execute_task(self, contract): success_prob = self.task_success_probability(contract) result = random.random() < success_prob if result: # Accumulate investment fund from earnings self._accumulated_investment += contract.reward * self.investment_fraction return result def _weakest_dimension(self) -> str: r = self.true_robustness dims = {"cc": r.cc, "er": r.er, "as": r.as_} return min(dims, key=dims.get) class CheaterAgent(BaseAgent): """ Attempts to game the system via delegation chain exploitation. Strategy: - Tries to accept contracts above its actual tier - Attempts to form delegation chains to bypass gate - May misrepresent capabilities Tests: Proposition 2 (collusion resistance) - the CGAE system should prevent this agent from accessing higher tiers than its robustness warrants. Expected: Should be caught and penalized. Demonstrates that the weakest-link gate + chain robustness constraint works. """ def __init__(self, name: str = "cheater", **kwargs): robustness = kwargs.pop("true_robustness", RobustnessVector( cc=0.70, er=0.25, as_=0.65, ih=0.60 )) super().__init__( name=name, strategy=AgentStrategy.CHEATER, true_robustness=robustness, capability=kwargs.pop("capability", 0.7), **kwargs, ) self._collusion_partners: list[str] = [] def decide(self, available_contracts, current_tier, balance, current_exposure, budget_ceiling): # Greedily bid on highest-reward contracts, even risky ones eligible = [ c for c in available_contracts if c.penalty + current_exposure <= budget_ceiling ] if not eligible: return AgentDecision(action="idle") # Always go for max reward, regardless of risk best = max(eligible, key=lambda c: c.reward) return AgentDecision( action="bid", contract_id=best.contract_id, details={"attempted_tier_bypass": best.min_tier.value > current_tier.value}, ) def execute_task(self, contract): # Cheater has moderate capability but weak epistemic robustness (ER=0.25) # This manifests as higher failure rate on tasks requiring epistemic integrity success_prob = self.task_success_probability(contract) # Penalty for epistemic weakness: more failures on complex tasks if contract.difficulty > 0.5: success_prob *= 0.6 # Epistemic weakness hurts on hard tasks return random.random() < success_prob # --------------------------------------------------------------------------- # Agent factory # --------------------------------------------------------------------------- AGENT_PRESETS: dict[str, type[BaseAgent]] = { "conservative": ConservativeAgent, "aggressive": AggressiveAgent, "balanced": BalancedAgent, "adaptive": AdaptiveAgent, "cheater": CheaterAgent, } def create_agent_cohort( strategies: Optional[list[str]] = None, custom_robustness: Optional[dict[str, RobustnessVector]] = None, ) -> list[BaseAgent]: """ Create a cohort of agents with diverse strategies. Default: one of each strategy type. """ if strategies is None: strategies = list(AGENT_PRESETS.keys()) agents = [] for i, strategy_name in enumerate(strategies): cls = AGENT_PRESETS.get(strategy_name) if cls is None: raise ValueError(f"Unknown strategy: {strategy_name}") kwargs = {} if custom_robustness and strategy_name in custom_robustness: kwargs["true_robustness"] = custom_robustness[strategy_name] agent = cls(name=f"{strategy_name}_{i}", **kwargs) agents.append(agent) return agents