Spaces:

rb512
/

cgae-server

Paused

rb125

added strategy archetypes, two layer verification.

d74aa65 11 days ago

12.5 kB

	"""
	Concrete Agent Strategies for the CGAE Economy Testbed.

	Five agent archetypes designed to test different aspects of the CGAE theorems:

	1. Conservative: High robustness, low capability -> tests Theorem 1 (bounded exposure)
	2. Aggressive: High capability, low robustness -> tests incentive structure (stuck at low tiers)
	3. Balanced: Moderate both -> baseline reference
	4. Adaptive: Invests in weakest dimension -> tests Theorem 2 (incentive compatibility)
	5. Cheater: Attempts tier-laundering -> tests Proposition 2 (collusion resistance)
	"""

	from __future__ import annotations

	import random
	from typing import Any, Optional

	from cgae_engine.gate import RobustnessVector, Tier
	from cgae_engine.contracts import CGAEContract
	from agents.base import BaseAgent, AgentStrategy, AgentDecision


	class ConservativeAgent(BaseAgent):
	"""
	High robustness, low capability. Plays it safe.

	Strategy:
	- Only bids on contracts well within its tier
	- Prefers low-risk, low-reward contracts
	- Maintains high balance by avoiding risky contracts
	- Never invests in capability, focuses on maintaining robustness

	Tests: Theorem 1 (bounded exposure) - should have low, stable exposure
	Expected: Survives long but earns less than optimal
	"""

	def __init__(self, name: str = "conservative", **kwargs):
	robustness = kwargs.pop("true_robustness", RobustnessVector(
	cc=0.85, er=0.80, as_=0.75, ih=0.90
	))
	super().__init__(
	name=name,
	strategy=AgentStrategy.CONSERVATIVE,
	true_robustness=robustness,
	capability=kwargs.pop("capability", 0.65),
	**kwargs,
	)

	def decide(self, available_contracts, current_tier, balance, current_exposure, budget_ceiling):
	# Only bid if we have plenty of budget headroom
	safe_exposure_limit = budget_ceiling * 0.5
	eligible = [
	c for c in available_contracts
	if c.penalty + current_exposure <= safe_exposure_limit
	and c.difficulty < 0.5 # Only easy tasks
	and c.reward > 0 # Must have positive reward
	]

	if not eligible:
	return AgentDecision(action="idle")

	# Pick the safest (lowest penalty) contract
	best = min(eligible, key=lambda c: c.penalty)
	return AgentDecision(action="bid", contract_id=best.contract_id)

	def execute_task(self, contract):
	# Conservative agents are careful but slow. Success depends on capability + low difficulty.
	success_prob = self.task_success_probability(contract)
	return random.random() < success_prob


	class AggressiveAgent(BaseAgent):
	"""
	High capability, low robustness. Pushes boundaries.

	Strategy:
	- Bids on the highest-reward contracts available
	- Accepts more risk (higher exposure ratio)
	- Doesn't invest in robustness (capability-focused)
	- Gets stuck at low tiers due to robustness gating

	Tests: Theorem 2 (incentive compatibility) - demonstrates that capability
	alone doesn't unlock higher tiers. Should underperform Adaptive agent.
	Expected: High short-term earnings at T1, but can't access T2+ contracts
	"""

	def __init__(self, name: str = "aggressive", **kwargs):
	robustness = kwargs.pop("true_robustness", RobustnessVector(
	cc=0.35, er=0.40, as_=0.30, ih=0.70
	))
	super().__init__(
	name=name,
	strategy=AgentStrategy.AGGRESSIVE,
	true_robustness=robustness,
	capability=kwargs.pop("capability", 0.85),
	**kwargs,
	)

	def decide(self, available_contracts, current_tier, balance, current_exposure, budget_ceiling):
	eligible = [
	c for c in available_contracts
	if c.penalty + current_exposure <= budget_ceiling
	]

	if not eligible:
	return AgentDecision(action="idle")

	# Pick the highest-reward contract
	best = max(eligible, key=lambda c: c.reward)
	return AgentDecision(action="bid", contract_id=best.contract_id)

	def execute_task(self, contract):
	# Aggressive agents have high capability, so they succeed more often
	success_prob = self.task_success_probability(contract)
	# Bonus for high capability
	success_prob = min(0.95, success_prob * 1.3)
	return random.random() < success_prob


	class BalancedAgent(BaseAgent):
	"""
	Moderate robustness and capability. The baseline.

	Strategy:
	- Evaluates contracts by expected value (reward * success_prob - penalty * fail_prob)
	- Maintains moderate exposure
	- Occasionally invests in robustness when near a tier threshold

	Tests: Provides baseline for comparing other strategies
	Expected: Moderate performance across all metrics
	"""

	def __init__(self, name: str = "balanced", **kwargs):
	robustness = kwargs.pop("true_robustness", RobustnessVector(
	cc=0.60, er=0.55, as_=0.50, ih=0.80
	))
	super().__init__(
	name=name,
	strategy=AgentStrategy.BALANCED,
	true_robustness=robustness,
	capability=kwargs.pop("capability", 0.6),
	**kwargs,
	)

	def decide(self, available_contracts, current_tier, balance, current_exposure, budget_ceiling):
	eligible = [
	c for c in available_contracts
	if c.penalty + current_exposure <= budget_ceiling * 0.8
	]

	if not eligible:
	return AgentDecision(action="idle")

	# Pick by expected value
	def ev(c):
	p = self.task_success_probability(c)
	return c.reward * p - c.penalty * (1 - p)

	best = max(eligible, key=ev)
	if ev(best) > 0:
	return AgentDecision(action="bid", contract_id=best.contract_id)
	return AgentDecision(action="idle")

	def execute_task(self, contract):
	success_prob = self.task_success_probability(contract)
	return random.random() < success_prob


	class AdaptiveAgent(BaseAgent):
	"""
	Strategically invests in its weakest robustness dimension.

	Strategy:
	- Identifies binding dimension (what's keeping it at current tier)
	- Allocates a fraction of earnings to robustness investment
	- Targets the weakest dimension specifically (Theorem 2 behavior)
	- Gradually unlocks higher tiers over time

	Tests: Theorem 2 (incentive compatibility) - this agent should demonstrate
	the predicted behavior where rational agents invest in robustness.
	Expected: Starts slow, accelerates as it unlocks higher tiers.
	This is the agent that should win long-run.
	"""

	def __init__(self, name: str = "adaptive", **kwargs):
	robustness = kwargs.pop("true_robustness", RobustnessVector(
	cc=0.55, er=0.50, as_=0.45, ih=0.80
	))
	super().__init__(
	name=name,
	strategy=AgentStrategy.ADAPTIVE,
	true_robustness=robustness,
	capability=kwargs.pop("capability", 0.6),
	**kwargs,
	)
	self.investment_fraction = 0.15 # Spend 15% of earnings on robustness
	self._accumulated_investment = 0.0

	def decide(self, available_contracts, current_tier, balance, current_exposure, budget_ceiling):
	# Should we invest in robustness this step?
	# Only invest when we have sufficient capital buffer
	if self._accumulated_investment >= 0.03 and balance > 0.15:
	weakest_dim = self._weakest_dimension()
	investment = min(self._accumulated_investment, 0.03)
	self._accumulated_investment -= investment
	return AgentDecision(
	action="invest_robustness",
	investment_dimension=weakest_dim,
	investment_amount=investment * 0.5, # Diminishing returns
	)

	# Otherwise, bid on contracts
	eligible = [
	c for c in available_contracts
	if c.penalty + current_exposure <= budget_ceiling * 0.7
	]

	if not eligible:
	return AgentDecision(action="idle")

	# Pick by expected value, with preference for higher-tier contracts
	def ev(c):
	p = self.task_success_probability(c)
	return (c.reward * p - c.penalty * (1 - p)) * (1 + c.min_tier.value * 0.1)

	best = max(eligible, key=ev)
	if ev(best) > 0:
	return AgentDecision(action="bid", contract_id=best.contract_id)
	return AgentDecision(action="idle")

	def execute_task(self, contract):
	success_prob = self.task_success_probability(contract)
	result = random.random() < success_prob
	if result:
	# Accumulate investment fund from earnings
	self._accumulated_investment += contract.reward * self.investment_fraction
	return result

	def _weakest_dimension(self) -> str:
	r = self.true_robustness
	dims = {"cc": r.cc, "er": r.er, "as": r.as_}
	return min(dims, key=dims.get)


	class CheaterAgent(BaseAgent):
	"""
	Attempts to game the system via delegation chain exploitation.

	Strategy:
	- Tries to accept contracts above its actual tier
	- Attempts to form delegation chains to bypass gate
	- May misrepresent capabilities

	Tests: Proposition 2 (collusion resistance) - the CGAE system should
	prevent this agent from accessing higher tiers than its robustness warrants.
	Expected: Should be caught and penalized. Demonstrates that the
	weakest-link gate + chain robustness constraint works.
	"""

	def __init__(self, name: str = "cheater", **kwargs):
	robustness = kwargs.pop("true_robustness", RobustnessVector(
	cc=0.70, er=0.25, as_=0.65, ih=0.60
	))
	super().__init__(
	name=name,
	strategy=AgentStrategy.CHEATER,
	true_robustness=robustness,
	capability=kwargs.pop("capability", 0.7),
	**kwargs,
	)
	self._collusion_partners: list[str] = []

	def decide(self, available_contracts, current_tier, balance, current_exposure, budget_ceiling):
	# Greedily bid on highest-reward contracts, even risky ones
	eligible = [
	c for c in available_contracts
	if c.penalty + current_exposure <= budget_ceiling
	]

	if not eligible:
	return AgentDecision(action="idle")

	# Always go for max reward, regardless of risk
	best = max(eligible, key=lambda c: c.reward)
	return AgentDecision(
	action="bid",
	contract_id=best.contract_id,
	details={"attempted_tier_bypass": best.min_tier.value > current_tier.value},
	)

	def execute_task(self, contract):
	# Cheater has moderate capability but weak epistemic robustness (ER=0.25)
	# This manifests as higher failure rate on tasks requiring epistemic integrity
	success_prob = self.task_success_probability(contract)
	# Penalty for epistemic weakness: more failures on complex tasks
	if contract.difficulty > 0.5:
	success_prob *= 0.6 # Epistemic weakness hurts on hard tasks
	return random.random() < success_prob


	# ---------------------------------------------------------------------------
	# Agent factory
	# ---------------------------------------------------------------------------

	AGENT_PRESETS: dict[str, type[BaseAgent]] = {
	"conservative": ConservativeAgent,
	"aggressive": AggressiveAgent,
	"balanced": BalancedAgent,
	"adaptive": AdaptiveAgent,
	"cheater": CheaterAgent,
	}


	def create_agent_cohort(
	strategies: Optional[list[str]] = None,
	custom_robustness: Optional[dict[str, RobustnessVector]] = None,
	) -> list[BaseAgent]:
	"""
	Create a cohort of agents with diverse strategies.
	Default: one of each strategy type.
	"""
	if strategies is None:
	strategies = list(AGENT_PRESETS.keys())

	agents = []
	for i, strategy_name in enumerate(strategies):
	cls = AGENT_PRESETS.get(strategy_name)
	if cls is None:
	raise ValueError(f"Unknown strategy: {strategy_name}")
	kwargs = {}
	if custom_robustness and strategy_name in custom_robustness:
	kwargs["true_robustness"] = custom_robustness[strategy_name]
	agent = cls(name=f"{strategy_name}_{i}", **kwargs)
	agents.append(agent)

	return agents