cgae-server / agents /strategies.py
rb125
added strategy archetypes, two layer verification.
d74aa65
"""
Concrete Agent Strategies for the CGAE Economy Testbed.
Five agent archetypes designed to test different aspects of the CGAE theorems:
1. Conservative: High robustness, low capability -> tests Theorem 1 (bounded exposure)
2. Aggressive: High capability, low robustness -> tests incentive structure (stuck at low tiers)
3. Balanced: Moderate both -> baseline reference
4. Adaptive: Invests in weakest dimension -> tests Theorem 2 (incentive compatibility)
5. Cheater: Attempts tier-laundering -> tests Proposition 2 (collusion resistance)
"""
from __future__ import annotations
import random
from typing import Any, Optional
from cgae_engine.gate import RobustnessVector, Tier
from cgae_engine.contracts import CGAEContract
from agents.base import BaseAgent, AgentStrategy, AgentDecision
class ConservativeAgent(BaseAgent):
"""
High robustness, low capability. Plays it safe.
Strategy:
- Only bids on contracts well within its tier
- Prefers low-risk, low-reward contracts
- Maintains high balance by avoiding risky contracts
- Never invests in capability, focuses on maintaining robustness
Tests: Theorem 1 (bounded exposure) - should have low, stable exposure
Expected: Survives long but earns less than optimal
"""
def __init__(self, name: str = "conservative", **kwargs):
robustness = kwargs.pop("true_robustness", RobustnessVector(
cc=0.85, er=0.80, as_=0.75, ih=0.90
))
super().__init__(
name=name,
strategy=AgentStrategy.CONSERVATIVE,
true_robustness=robustness,
capability=kwargs.pop("capability", 0.65),
**kwargs,
)
def decide(self, available_contracts, current_tier, balance, current_exposure, budget_ceiling):
# Only bid if we have plenty of budget headroom
safe_exposure_limit = budget_ceiling * 0.5
eligible = [
c for c in available_contracts
if c.penalty + current_exposure <= safe_exposure_limit
and c.difficulty < 0.5 # Only easy tasks
and c.reward > 0 # Must have positive reward
]
if not eligible:
return AgentDecision(action="idle")
# Pick the safest (lowest penalty) contract
best = min(eligible, key=lambda c: c.penalty)
return AgentDecision(action="bid", contract_id=best.contract_id)
def execute_task(self, contract):
# Conservative agents are careful but slow. Success depends on capability + low difficulty.
success_prob = self.task_success_probability(contract)
return random.random() < success_prob
class AggressiveAgent(BaseAgent):
"""
High capability, low robustness. Pushes boundaries.
Strategy:
- Bids on the highest-reward contracts available
- Accepts more risk (higher exposure ratio)
- Doesn't invest in robustness (capability-focused)
- Gets stuck at low tiers due to robustness gating
Tests: Theorem 2 (incentive compatibility) - demonstrates that capability
alone doesn't unlock higher tiers. Should underperform Adaptive agent.
Expected: High short-term earnings at T1, but can't access T2+ contracts
"""
def __init__(self, name: str = "aggressive", **kwargs):
robustness = kwargs.pop("true_robustness", RobustnessVector(
cc=0.35, er=0.40, as_=0.30, ih=0.70
))
super().__init__(
name=name,
strategy=AgentStrategy.AGGRESSIVE,
true_robustness=robustness,
capability=kwargs.pop("capability", 0.85),
**kwargs,
)
def decide(self, available_contracts, current_tier, balance, current_exposure, budget_ceiling):
eligible = [
c for c in available_contracts
if c.penalty + current_exposure <= budget_ceiling
]
if not eligible:
return AgentDecision(action="idle")
# Pick the highest-reward contract
best = max(eligible, key=lambda c: c.reward)
return AgentDecision(action="bid", contract_id=best.contract_id)
def execute_task(self, contract):
# Aggressive agents have high capability, so they succeed more often
success_prob = self.task_success_probability(contract)
# Bonus for high capability
success_prob = min(0.95, success_prob * 1.3)
return random.random() < success_prob
class BalancedAgent(BaseAgent):
"""
Moderate robustness and capability. The baseline.
Strategy:
- Evaluates contracts by expected value (reward * success_prob - penalty * fail_prob)
- Maintains moderate exposure
- Occasionally invests in robustness when near a tier threshold
Tests: Provides baseline for comparing other strategies
Expected: Moderate performance across all metrics
"""
def __init__(self, name: str = "balanced", **kwargs):
robustness = kwargs.pop("true_robustness", RobustnessVector(
cc=0.60, er=0.55, as_=0.50, ih=0.80
))
super().__init__(
name=name,
strategy=AgentStrategy.BALANCED,
true_robustness=robustness,
capability=kwargs.pop("capability", 0.6),
**kwargs,
)
def decide(self, available_contracts, current_tier, balance, current_exposure, budget_ceiling):
eligible = [
c for c in available_contracts
if c.penalty + current_exposure <= budget_ceiling * 0.8
]
if not eligible:
return AgentDecision(action="idle")
# Pick by expected value
def ev(c):
p = self.task_success_probability(c)
return c.reward * p - c.penalty * (1 - p)
best = max(eligible, key=ev)
if ev(best) > 0:
return AgentDecision(action="bid", contract_id=best.contract_id)
return AgentDecision(action="idle")
def execute_task(self, contract):
success_prob = self.task_success_probability(contract)
return random.random() < success_prob
class AdaptiveAgent(BaseAgent):
"""
Strategically invests in its weakest robustness dimension.
Strategy:
- Identifies binding dimension (what's keeping it at current tier)
- Allocates a fraction of earnings to robustness investment
- Targets the weakest dimension specifically (Theorem 2 behavior)
- Gradually unlocks higher tiers over time
Tests: Theorem 2 (incentive compatibility) - this agent should demonstrate
the predicted behavior where rational agents invest in robustness.
Expected: Starts slow, accelerates as it unlocks higher tiers.
This is the agent that should win long-run.
"""
def __init__(self, name: str = "adaptive", **kwargs):
robustness = kwargs.pop("true_robustness", RobustnessVector(
cc=0.55, er=0.50, as_=0.45, ih=0.80
))
super().__init__(
name=name,
strategy=AgentStrategy.ADAPTIVE,
true_robustness=robustness,
capability=kwargs.pop("capability", 0.6),
**kwargs,
)
self.investment_fraction = 0.15 # Spend 15% of earnings on robustness
self._accumulated_investment = 0.0
def decide(self, available_contracts, current_tier, balance, current_exposure, budget_ceiling):
# Should we invest in robustness this step?
# Only invest when we have sufficient capital buffer
if self._accumulated_investment >= 0.03 and balance > 0.15:
weakest_dim = self._weakest_dimension()
investment = min(self._accumulated_investment, 0.03)
self._accumulated_investment -= investment
return AgentDecision(
action="invest_robustness",
investment_dimension=weakest_dim,
investment_amount=investment * 0.5, # Diminishing returns
)
# Otherwise, bid on contracts
eligible = [
c for c in available_contracts
if c.penalty + current_exposure <= budget_ceiling * 0.7
]
if not eligible:
return AgentDecision(action="idle")
# Pick by expected value, with preference for higher-tier contracts
def ev(c):
p = self.task_success_probability(c)
return (c.reward * p - c.penalty * (1 - p)) * (1 + c.min_tier.value * 0.1)
best = max(eligible, key=ev)
if ev(best) > 0:
return AgentDecision(action="bid", contract_id=best.contract_id)
return AgentDecision(action="idle")
def execute_task(self, contract):
success_prob = self.task_success_probability(contract)
result = random.random() < success_prob
if result:
# Accumulate investment fund from earnings
self._accumulated_investment += contract.reward * self.investment_fraction
return result
def _weakest_dimension(self) -> str:
r = self.true_robustness
dims = {"cc": r.cc, "er": r.er, "as": r.as_}
return min(dims, key=dims.get)
class CheaterAgent(BaseAgent):
"""
Attempts to game the system via delegation chain exploitation.
Strategy:
- Tries to accept contracts above its actual tier
- Attempts to form delegation chains to bypass gate
- May misrepresent capabilities
Tests: Proposition 2 (collusion resistance) - the CGAE system should
prevent this agent from accessing higher tiers than its robustness warrants.
Expected: Should be caught and penalized. Demonstrates that the
weakest-link gate + chain robustness constraint works.
"""
def __init__(self, name: str = "cheater", **kwargs):
robustness = kwargs.pop("true_robustness", RobustnessVector(
cc=0.70, er=0.25, as_=0.65, ih=0.60
))
super().__init__(
name=name,
strategy=AgentStrategy.CHEATER,
true_robustness=robustness,
capability=kwargs.pop("capability", 0.7),
**kwargs,
)
self._collusion_partners: list[str] = []
def decide(self, available_contracts, current_tier, balance, current_exposure, budget_ceiling):
# Greedily bid on highest-reward contracts, even risky ones
eligible = [
c for c in available_contracts
if c.penalty + current_exposure <= budget_ceiling
]
if not eligible:
return AgentDecision(action="idle")
# Always go for max reward, regardless of risk
best = max(eligible, key=lambda c: c.reward)
return AgentDecision(
action="bid",
contract_id=best.contract_id,
details={"attempted_tier_bypass": best.min_tier.value > current_tier.value},
)
def execute_task(self, contract):
# Cheater has moderate capability but weak epistemic robustness (ER=0.25)
# This manifests as higher failure rate on tasks requiring epistemic integrity
success_prob = self.task_success_probability(contract)
# Penalty for epistemic weakness: more failures on complex tasks
if contract.difficulty > 0.5:
success_prob *= 0.6 # Epistemic weakness hurts on hard tasks
return random.random() < success_prob
# ---------------------------------------------------------------------------
# Agent factory
# ---------------------------------------------------------------------------
AGENT_PRESETS: dict[str, type[BaseAgent]] = {
"conservative": ConservativeAgent,
"aggressive": AggressiveAgent,
"balanced": BalancedAgent,
"adaptive": AdaptiveAgent,
"cheater": CheaterAgent,
}
def create_agent_cohort(
strategies: Optional[list[str]] = None,
custom_robustness: Optional[dict[str, RobustnessVector]] = None,
) -> list[BaseAgent]:
"""
Create a cohort of agents with diverse strategies.
Default: one of each strategy type.
"""
if strategies is None:
strategies = list(AGENT_PRESETS.keys())
agents = []
for i, strategy_name in enumerate(strategies):
cls = AGENT_PRESETS.get(strategy_name)
if cls is None:
raise ValueError(f"Unknown strategy: {strategy_name}")
kwargs = {}
if custom_robustness and strategy_name in custom_robustness:
kwargs["true_robustness"] = custom_robustness[strategy_name]
agent = cls(name=f"{strategy_name}_{i}", **kwargs)
agents.append(agent)
return agents