Spaces:

rb512
/

cgae-server

Paused

App Files Files Community

rb125 commited on 10 days ago

Commit

3f2f227

1 Parent(s): bd6e10c

autonomous agents + live simulation runner

Browse files

Files changed (3) hide show

agents/autonomous.py +887 -0
cgae_engine/economy.py +423 -67
server/live_runner.py +1575 -0

agents/autonomous.py ADDED Viewed

	@@ -0,0 +1,887 @@

+"""
+Autonomous Agent v2 — CGAE Economic Actor
+==========================================
+Implements the v2 Autonomous Agent Architecture specification.
+Separation of Cognition from Economy
+-------------------------------------
+The LLM handles task *execution*.  Everything else — contract evaluation,
+bidding strategy, robustness tracking, financial management — is deterministic
+code.  This makes the agent's economic behaviour inspectable without LLM
+introspection, and keeps gas costs low.
+Layers
+------
+PerceptionLayer  — constraint / domain pass-rate learning
+AccountingLayer  — balance, exposure, reserves, burn-rate
+PlanningLayer    — EV / RAEV contract scoring + strategy delegation
+ExecutionLayer   — LLM call with constraint-aware prompts, self-verify, retry
+Strategies (pluggable via StrategyInterface)
+--------------------------------------------
+GrowthStrategy        — robustness-investment growth; the Theorem 2 agent
+ConservativeStrategy  — low-risk, low-utilisation; survives longest
+OpportunisticStrategy — high-risk, max-reward; highest variance
+SpecialistStrategy    — domain-focused; improves pass rate in chosen domains
+AdversarialStrategy   — probes system limits; validates Proposition 2
+Migration (Phase 1)
+-------------------
+Drop-in replacement for the bare LLMAgent + manual logic in live_runner.py.
+The runner still handles contract posting, acceptance and Economy settlement.
+AutonomousAgent.plan_task()       — replaces random.choice(available_tasks)
+AutonomousAgent.execute_task()    — replaces llm_agent.execute_task() + retry
+AutonomousAgent.update_state()    — replaces inline robustness update logic
+"""
+from __future__ import annotations
+import logging
+import math
+import random
+import re
+import time
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from typing import Any, Optional
+from cgae_engine.gate import GateFunction, RobustnessVector, Tier, TierThresholds
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Data structures
+# ---------------------------------------------------------------------------
+@dataclass(frozen=True)
+class AgentState:
+    """Complete agent state snapshot passed to strategies each planning cycle."""
+    # Identity
+    agent_id: str
+    model_name: str
+    # Robustness
+    certified_robustness: RobustnessVector
+    effective_robustness: RobustnessVector   # after temporal decay
+    certified_tier: Tier
+    effective_tier: Tier
+    binding_dimension: Optional[str]          # "cc", "er", or "as"
+    gap_to_next_tier: dict                    # dim -> gap float
+    # Financial
+    balance: float
+    available_for_contracts: float
+    active_exposure: float
+    remaining_ceiling: float
+    burn_rate: float
+    rounds_until_insolvency: float
+    roi: float
+    # Performance history
+    constraint_pass_rates: dict    # constraint_name -> float
+    domain_pass_rates: dict        # domain -> float
+    total_contracts_completed: int
+    total_contracts_failed: int
+    win_rate: float
+    # Temporal
+    time_since_certification: float
+    spot_audit_probability: float
+@dataclass(frozen=True)
+class ScoredContract:
+    """A contract that has been pre-evaluated by the Planning Layer."""
+    contract_id: str
+    task_id: str
+    min_tier: Tier
+    domain: str
+    constraint_types: list            # list[str]
+    reward: float
+    penalty: float
+    deadline: float
+    difficulty: float
+    # Computed by PlanningLayer
+    estimated_pass_probability: float
+    estimated_token_cost: float
+    expected_value: float             # p*R - (1-p)*P - cost
+    risk_premium: float               # penalty² / (2 * balance)
+    risk_adjusted_ev: float           # EV - risk_premium
+@dataclass
+class ExecutionResult:
+    """Result of executing a task through the ExecutionLayer."""
+    output: str
+    token_usage: dict                  # input_tokens, output_tokens
+    token_cost_eth: float
+    latency_ms: float
+    retries_used: int
+    self_check_passed: bool
+    self_check_failures: list          # constraint names that failed self-check
+    self_check_diagnostics: dict       # name -> diagnostic string
+@dataclass
+class RobustnessInvestment:
+    """An instruction to invest in a robustness dimension."""
+    dimension: str    # "cc", "er", or "as"
+    budget: float     # ETH to spend
+# ---------------------------------------------------------------------------
+# Strategy interface and concrete implementations
+# ---------------------------------------------------------------------------
+class StrategyInterface(ABC):
+    """Pluggable decision policy for the Planning Layer."""
+    @abstractmethod
+    def rank_contracts(
+        self,
+        eligible: list,           # list[ScoredContract]
+        state: AgentState,
+    ) -> list:                    # ordered list[ScoredContract]
+        ...
+    @abstractmethod
+    def should_invest_robustness(
+        self, state: AgentState
+    ) -> Optional[RobustnessInvestment]:
+        ...
+    @abstractmethod
+    def max_utilization(self) -> float:
+        """Fraction of budget ceiling willing to commit."""
+        ...
+class GrowthStrategy(StrategyInterface):
+    """
+    Invests in robustness to unlock higher tiers.
+    The Theorem 2 agent: robustness investment as profit strategy.
+    Accepts any positive RAEV contract, prefers higher tiers.
+    Invests when binding dimension is within ``invest_threshold`` of next tier
+    and projected ROI is positive within 20 rounds.
+    """
+    RAEV_MIN = 0.0
+    INVEST_THRESHOLD = 0.07   # Invest when < 0.07 from next tier threshold
+    MAX_INVEST_FRACTION = 0.20  # Max 20% of balance per invest action
+    def rank_contracts(self, eligible, state):
+        # Sort by RAEV; break ties by preferring higher tiers
+        return sorted(
+            eligible,
+            key=lambda c: (c.risk_adjusted_ev + c.min_tier.value * 0.005),
+            reverse=True,
+        )
+    def should_invest_robustness(self, state: AgentState) -> Optional[RobustnessInvestment]:
+        if state.binding_dimension is None:
+            return None
+        gap = state.gap_to_next_tier.get(state.binding_dimension, 1.0)
+        if gap > self.INVEST_THRESHOLD:
+            return None
+        # Can we afford it?
+        budget = min(
+            state.available_for_contracts * self.MAX_INVEST_FRACTION,
+            state.balance * 0.10,
+        )
+        if budget < 0.005:
+            return None
+        # Rough ROI check: does the tier upgrade pay back within 20 rounds?
+        tier_idx = state.certified_tier.value
+        next_tier_reward_uplift = (tier_idx + 1) * 0.01  # Rough per-round uplift
+        if next_tier_reward_uplift * 20 > budget:
+            return RobustnessInvestment(dimension=state.binding_dimension, budget=budget)
+        return None
+    def max_utilization(self) -> float:
+        return 0.70
+class ConservativeStrategy(StrategyInterface):
+    """
+    Only high-certainty, low-penalty contracts.
+    Never invests in robustness. Survives the longest.
+    """
+    RAEV_MIN = 0.002
+    MAX_DIFFICULTY = 0.5
+    def rank_contracts(self, eligible, state):
+        # Prefer lowest-penalty contracts above the RAEV minimum
+        passing = [c for c in eligible if c.risk_adjusted_ev >= self.RAEV_MIN
+                   and c.difficulty <= self.MAX_DIFFICULTY]
+        return sorted(passing, key=lambda c: c.penalty)
+    def should_invest_robustness(self, state: AgentState) -> Optional[RobustnessInvestment]:
+        return None  # Never invests
+    def max_utilization(self) -> float:
+        return 0.30
+class OpportunisticStrategy(StrategyInterface):
+    """
+    Max-reward, high-risk. Uses raw EV (not risk-adjusted).
+    Most likely to go insolvent; highest upside in good rounds.
+    """
+    def rank_contracts(self, eligible, state):
+        return sorted(eligible, key=lambda c: c.expected_value, reverse=True)
+    def should_invest_robustness(self, state: AgentState) -> Optional[RobustnessInvestment]:
+        # Only if stuck at T0 — must reach T1 to earn anything
+        if state.certified_tier == Tier.T0 and state.available_for_contracts > 0.02:
+            dim = state.binding_dimension or "as"
+            return RobustnessInvestment(dimension=dim, budget=state.available_for_contracts * 0.30)
+        return None
+    def max_utilization(self) -> float:
+        return 0.90
+class SpecialistStrategy(StrategyInterface):
+    """
+    Domain-focused: only accepts contracts in its two best domains.
+    Higher RAEV threshold for unfamiliar territory.
+    Invests in constraint types where failure rate exceeds 30%.
+    """
+    SPECIALIST_RAEV_MIN = 0.001
+    GENERALIST_RAEV_MIN = 0.010
+    NUM_SPECIALTY_DOMAINS = 2
+    FAILURE_INVEST_THRESHOLD = 0.30
+    def rank_contracts(self, eligible, state):
+        top_domains = sorted(
+            state.domain_pass_rates,
+            key=state.domain_pass_rates.get,
+            reverse=True,
+        )[:self.NUM_SPECIALTY_DOMAINS]
+        def score(c: ScoredContract) -> float:
+            in_specialty = c.domain in top_domains
+            thresh = self.SPECIALIST_RAEV_MIN if in_specialty else self.GENERALIST_RAEV_MIN
+            return c.risk_adjusted_ev if (in_specialty or c.risk_adjusted_ev > thresh) else -999
+        ranked = [c for c in eligible if score(c) > -998]
+        return sorted(ranked, key=score, reverse=True)
+    def should_invest_robustness(self, state: AgentState) -> Optional[RobustnessInvestment]:
+        # Find the constraint type with worst pass rate
+        if not state.constraint_pass_rates:
+            return None
+        worst_dim_name, worst_rate = min(
+            state.constraint_pass_rates.items(), key=lambda kv: kv[1]
+        )
+        if worst_rate > (1.0 - self.FAILURE_INVEST_THRESHOLD):
+            return None
+        # Map constraint family → robustness dimension
+        dim = "cc"
+        if any(k in worst_dim_name for k in ("keyword", "factual", "accuracy")):
+            dim = "er"
+        elif any(k in worst_dim_name for k in ("caveat", "harm", "refusal", "ethical")):
+            dim = "as"
+        budget = state.available_for_contracts * 0.15
+        if budget < 0.003:
+            return None
+        return RobustnessInvestment(dimension=dim, budget=budget)
+    def max_utilization(self) -> float:
+        return 0.50
+class AdversarialStrategy(StrategyInterface):
+    """
+    Probes system limits: selects borderline contracts and minimally invests
+    in AS (trying to game the ethical gate).  Validates Proposition 2.
+    """
+    def rank_contracts(self, eligible, state):
+        # Prefer contracts where self-check shows near-borderline probability
+        return sorted(
+            eligible,
+            key=lambda c: abs(c.estimated_pass_probability - 0.55),
+        )
+    def should_invest_robustness(self, state: AgentState) -> Optional[RobustnessInvestment]:
+        if state.binding_dimension == "as" and state.available_for_contracts > 0.01:
+            return RobustnessInvestment(dimension="as", budget=0.005)
+        return None
+    def max_utilization(self) -> float:
+        return 0.95
+STRATEGY_MAP: dict[str, StrategyInterface] = {
+    "growth": GrowthStrategy(),
+    "conservative": ConservativeStrategy(),
+    "opportunistic": OpportunisticStrategy(),
+    "specialist": SpecialistStrategy(),
+    "adversarial": AdversarialStrategy(),
+}
+# ---------------------------------------------------------------------------
+# Perception Layer
+# ---------------------------------------------------------------------------
+class PerceptionLayer:
+    """
+    Tracks per-constraint and per-domain pass rates from task history.
+    Updated after every contract settlement via update_from_result().
+    """
+    def __init__(self):
+        # Running history: name -> list[bool]
+        self._constraint_history: dict[str, list] = {}
+        self._domain_history: dict[str, list] = {}
+    @property
+    def constraint_pass_rates(self) -> dict:
+        return {
+            name: (sum(hist) / len(hist))
+            for name, hist in self._constraint_history.items()
+            if hist
+        }
+    @property
+    def domain_pass_rates(self) -> dict:
+        return {
+            domain: (sum(hist) / len(hist))
+            for domain, hist in self._domain_history.items()
+            if hist
+        }
+    def update_from_result(self, task: Any, verification: Any):
+        """Call after each verification to update running pass rates."""
+        domain = getattr(task, "domain", "unknown")
+        self._domain_history.setdefault(domain, []).append(
+            bool(getattr(verification, "overall_pass", False))
+        )
+        for c in getattr(task, "constraints", []):
+            passed = c.name in getattr(verification, "constraints_passed", [])
+            self._domain_history.setdefault(f"constraint:{c.name}", [])
+            self._constraint_history.setdefault(c.name, []).append(passed)
+    def estimated_pass_prob(self, task: Any) -> float:
+        """
+        Estimate pass probability for a task based on constraint and domain history.
+        Falls back to 0.65 when no history is available — modern LLMs pass
+        straightforward tasks at well above chance, so 0.5 systematically
+        underestimates EV and suppresses all task selection at startup.
+        """
+        domain = getattr(task, "domain", "unknown")
+        domain_rate = self.domain_pass_rates.get(domain, 0.65)
+        constraints = getattr(task, "constraints", [])
+        if not constraints:
+            return domain_rate
+        rates = [self.constraint_pass_rates.get(c.name, 0.65) for c in constraints]
+        constraint_rate = math.prod(rates) if rates else 0.65
+        return (constraint_rate + domain_rate) / 2.0
+# ---------------------------------------------------------------------------
+# Accounting Layer
+# ---------------------------------------------------------------------------
+class AccountingLayer:
+    """
+    Financial management with layered reserves.
+    Reserves (in priority order, all deducted before contract funds):
+      MINIMUM_RESERVE  — hard floor; triggers SelfSuspend if breached
+      AUDIT_RESERVE    — 1 full 4-dim audit cycle
+      (gas reserve is implicit in MINIMUM_RESERVE for off-chain simulation)
+    available_for_contracts = balance - active_exposure
+                              - MINIMUM_RESERVE - AUDIT_RESERVE
+    """
+    MINIMUM_RESERVE: float = 0.05    # ETH hard floor
+    AUDIT_RESERVE: float = 0.02      # ~4 dims × 0.005 ETH
+    MAX_UTILIZATION: float = 0.70    # Max fraction of ceiling to commit
+    def __init__(self, initial_balance: float):
+        self.balance: float = initial_balance
+        self.active_exposure: float = 0.0
+        self.cumulative_earned: float = 0.0
+        self.cumulative_spent: float = 0.0
+        self.cumulative_penalties: float = 0.0
+        self._burn_samples: list = []   # Recent ETH-per-round costs
+    @property
+    def available_for_contracts(self) -> float:
+        return max(
+            0.0,
+            self.balance
+            - self.active_exposure
+            - self.MINIMUM_RESERVE
+            - self.AUDIT_RESERVE,
+        )
+    @property
+    def roi(self) -> float:
+        spent = self.cumulative_spent + self.cumulative_penalties
+        if spent == 0:
+            return 0.0
+        return (self.cumulative_earned - spent) / spent
+    @property
+    def burn_rate(self) -> float:
+        if not self._burn_samples:
+            return 0.001   # Assume small storage cost until we have data
+        return sum(self._burn_samples[-10:]) / len(self._burn_samples[-10:])
+    @property
+    def rounds_until_insolvency(self) -> float:
+        br = self.burn_rate
+        if br <= 0:
+            return float("inf")
+        return max(0.0, (self.balance - self.MINIMUM_RESERVE) / br)
+    def can_afford(self, penalty: float, token_cost: float) -> bool:
+        """Check whether accepting a contract keeps us solvent."""
+        new_exposure = self.active_exposure + penalty
+        headroom = self.balance - new_exposure - self.MINIMUM_RESERVE - self.AUDIT_RESERVE
+        return headroom >= token_cost
+    def record_round_cost(self, cost: float):
+        self._burn_samples.append(cost)
+    def sync_from_record(self, record: Any):
+        """Sync from Economy AgentRecord (source of truth for balance)."""
+        self.balance = record.balance
+        self.cumulative_earned = record.total_earned
+        self.cumulative_spent = record.total_spent
+        self.cumulative_penalties = record.total_penalties
+# ---------------------------------------------------------------------------
+# Execution Layer
+# ---------------------------------------------------------------------------
+class ExecutionLayer:
+    """
+    Executes tasks with:
+    1. Constraint-aware system prompt injection
+    2. Self-verification using the same checks the verifier will run
+    3. Retry loop (up to max_retries) when self-check detects failures
+    Self-check only covers algorithmic constraints (format, keywords, JSON).
+    Jury evaluation cannot be pre-checked — this is by design.
+    """
+    def __init__(self, llm_agent: Any, self_verify: bool = True, max_retries: int = 2):
+        self.llm = llm_agent
+        self.self_verify = self_verify
+        self.max_retries = max_retries
+    def execute(self, task: Any, token_cost_fn) -> ExecutionResult:
+        """
+        Execute a task end-to-end and return a structured result.
+        ``token_cost_fn()`` is called with (model_name, in_tok, out_tok) to
+        compute ETH cost; the caller owns cost accounting.
+        """
+        system_prompt = self._build_system_prompt(task)
+        user_prompt = task.prompt
+        tokens_in_before = self.llm.total_input_tokens
+        tokens_out_before = self.llm.total_output_tokens
+        start = time.time()
+        output = self.llm.execute_task(user_prompt, system_prompt)
+        retries = 0
+        self_check_result: dict = {"passed": True, "failures": [], "diagnostics": {}}
+        if self.self_verify:
+            self_check_result = self._self_check(task, output)
+            for attempt in range(self.max_retries):
+                if self_check_result["passed"]:
+                    break
+                retries += 1
+                retry_prompt = self._build_retry_prompt(
+                    user_prompt, self_check_result["failures"],
+                    self_check_result["diagnostics"],
+                )
+                output = self.llm.execute_task(retry_prompt, system_prompt)
+                self_check_result = self._self_check(task, output)
+        latency_ms = (time.time() - start) * 1000
+        in_tok = self.llm.total_input_tokens - tokens_in_before
+        out_tok = self.llm.total_output_tokens - tokens_out_before
+        token_cost = token_cost_fn(self.llm.model_name, in_tok, out_tok)
+        return ExecutionResult(
+            output=output,
+            token_usage={"input": in_tok, "output": out_tok},
+            token_cost_eth=token_cost,
+            latency_ms=latency_ms,
+            retries_used=retries,
+            self_check_passed=self_check_result["passed"],
+            self_check_failures=self_check_result["failures"],
+            self_check_diagnostics=self_check_result["diagnostics"],
+        )
+    def _build_system_prompt(self, task: Any) -> str:
+        base = task.system_prompt or ""
+        if not task.constraints:
+            return base
+        lines = [
+            base,
+            "\n\n[CONSTRAINT REQUIREMENTS — you MUST satisfy ALL of the following]",
+        ]
+        for c in task.constraints:
+            lines.append(f"  • {c.name}: {c.description}")
+        return "\n".join(lines)
+    def _self_check(self, task: Any, output: str) -> dict:
+        """Run algorithmic constraint checks identical to what the verifier will do."""
+        failures: list = []
+        diagnostics: dict = {}
+        for c in task.constraints:
+            try:
+                passed = c.check(output)
+            except Exception:
+                passed = True   # Don't penalise unknown constraint types
+            if not passed:
+                failures.append(c.name)
+                diagnostics[c.name] = self._diagnose(c, output)
+        return {
+            "passed": len(failures) == 0,
+            "failures": failures,
+            "diagnostics": diagnostics,
+        }
+    @staticmethod
+    def _diagnose(constraint: Any, output: str) -> str:
+        name = constraint.name
+        if "word_count" in name:
+            count = len(output.split())
+            return f"Word count is {count}"
+        if "valid_json" in name:
+            return "Output is not valid JSON"
+        if "keyword" in name or "contain" in name:
+            desc = getattr(constraint, "description", "")
+            return f"Keyword check failed: {desc}"
+        if "section" in name:
+            return "Required section(s) missing from output"
+        return f"Constraint '{name}' not satisfied"
+    @staticmethod
+    def _build_retry_prompt(original: str, failures: list, diagnostics: dict) -> str:
+        diag_lines = "\n".join(
+            f"  - {name}: {msg}" for name, msg in diagnostics.items()
+        )
+        return (
+            f"{original}\n\n"
+            f"[REVISION REQUIRED]\n"
+            f"Your previous response failed these constraints:\n"
+            f"{diag_lines}\n\n"
+            f"Please regenerate your response, fixing these issues while "
+            f"preserving the quality of your answer."
+        )
+# ---------------------------------------------------------------------------
+# Planning Layer
+# ---------------------------------------------------------------------------
+class PlanningLayer:
+    """
+    Evaluates available tasks using EV / RAEV and delegates ranking to the
+    injected strategy.  Also decides whether to invest in robustness.
+    """
+    def __init__(self, strategy: StrategyInterface, token_cost_fn):
+        self.strategy = strategy
+        self._token_cost_fn = token_cost_fn   # (model, in_tok, out_tok) -> float
+    def score_task(
+        self,
+        task: Any,
+        state: AgentState,
+        pass_prob: float,
+    ) -> ScoredContract:
+        """Score a single task and wrap it as a ScoredContract."""
+        # Token estimate scales with task tier: simpler tasks use fewer tokens.
+        # T1≈200+100, T2≈400+200, T3≈600+300, T4+≈800+400
+        tier_val = getattr(getattr(task, "tier", None), "value", 2)
+        in_tokens  = max(200, min(800, 200 * tier_val))
+        out_tokens = max(100, min(400, 100 * tier_val))
+        est_token_cost = self._token_cost_fn(state.model_name, in_tokens, out_tokens)
+        reward = task.reward
+        penalty = task.penalty
+        ev = pass_prob * reward - (1.0 - pass_prob) * penalty - est_token_cost
+        # Risk premium: convex in penalty/balance — agents become risk-averse
+        # as penalties approach their balance (spec Eq)
+        balance = max(state.balance, 0.001)   # avoid divide-by-zero
+        risk_prem = (penalty ** 2) / (2.0 * balance)
+        raev = ev - risk_prem
+        return ScoredContract(
+            contract_id="",          # filled in by caller
+            task_id=task.task_id,
+            min_tier=task.tier,
+            domain=task.domain,
+            constraint_types=[c.name for c in task.constraints],
+            reward=reward,
+            penalty=penalty,
+            deadline=0.0,
+            difficulty=task.difficulty,
+            estimated_pass_probability=pass_prob,
+            estimated_token_cost=est_token_cost,
+            expected_value=ev,
+            risk_premium=risk_prem,
+            risk_adjusted_ev=raev,
+        )
+    def select_task(
+        self,
+        available_tasks: list,
+        state: AgentState,
+        perception: PerceptionLayer,
+        accounting: AccountingLayer,
+    ) -> Optional[Any]:
+        """
+        Return the best task to attempt, or None if nothing is worthwhile.
+        Safety checks run first (hard gates).
+        Then contract evaluation.
+        Then strategy ranking.
+        """
+        # --- Safety checks --------------------------------------------------
+        if state.balance < AccountingLayer.MINIMUM_RESERVE:
+            logger.warning(
+                f"[{state.model_name}] balance {state.balance:.4f} below minimum "
+                f"reserve — suspending"
+            )
+            return None
+        # --- Score eligible tasks -------------------------------------------
+        ceiling = state.remaining_ceiling
+        utilisation_limit = ceiling * self.strategy.max_utilization()
+        scored: list = []
+        for task in available_tasks:
+            # Tier eligibility
+            if task.tier.value > state.effective_tier.value:
+                continue
+            # Budget eligibility (approximate — exact check in economy)
+            if task.penalty > utilisation_limit:
+                continue
+            if not accounting.can_afford(task.penalty, token_cost=0.01):
+                continue
+            pp = perception.estimated_pass_prob(task)
+            sc = self.score_task(task, state, pp)
+            scored.append((task, sc))
+        if not scored:
+            return None
+        # --- Strategy ranking -----------------------------------------------
+        ranked_scores = self.strategy.rank_contracts(
+            [sc for _, sc in scored], state
+        )
+        if not ranked_scores:
+            return None
+        # To avoid repetition, pick randomly from top N (e.g., top 3)
+        top_n = ranked_scores[:3]
+        selected_sc = random.choice(top_n)
+        top_id = selected_sc.task_id
+        for task, sc in scored:
+            if task.task_id == top_id:
+                if sc.risk_adjusted_ev > 0 or state.effective_tier == Tier.T0:
+                    return task
+        return None
+    def investment_decision(self, state: AgentState) -> Optional[RobustnessInvestment]:
+        return self.strategy.should_invest_robustness(state)
+# ---------------------------------------------------------------------------
+# Autonomous Agent
+# ---------------------------------------------------------------------------
+class AutonomousAgent:
+    """
+    v2 CGAE economic actor.
+    Wraps an LLMAgent and adds:
+    - Perception (constraint/domain pass-rate tracking)
+    - Accounting (reserves, burn-rate, insolvency prevention)
+    - Planning (EV/RAEV task selection, robustness investment decisions)
+    - Execution (constraint-aware prompts, self-verification, retry)
+    """
+    def __init__(
+        self,
+        llm_agent: Any,
+        strategy: StrategyInterface,
+        token_cost_fn,            # (model_name, in_tok, out_tok) -> float
+        self_verify: bool = True,
+        max_retries: int = 2,
+    ):
+        self.llm = llm_agent
+        self.model_name: str = llm_agent.model_name
+        self.strategy = strategy
+        self.perception = PerceptionLayer()
+        self.accounting: Optional[AccountingLayer] = None    # set in register()
+        self.execution = ExecutionLayer(llm_agent, self_verify=self_verify,
+                                        max_retries=max_retries)
+        self.planning = PlanningLayer(strategy, token_cost_fn)
+        self._token_cost_fn = token_cost_fn
+        # Set by economy on registration
+        self.agent_id: Optional[str] = None
+        # Metrics
+        self.self_check_catches: int = 0    # self-check prevented a failure
+        self.retry_successes: int = 0       # retry turned a failure into a pass
+        self.strategy_actions: dict = {}
+    def register(self, agent_id: str, initial_balance: float):
+        """Call once after Economy.register_agent() to initialise accounting."""
+        self.agent_id = agent_id
+        self.accounting = AccountingLayer(initial_balance)
+    def build_state(self, record: Any, gate: GateFunction) -> AgentState:
+        """
+        Construct an AgentState from an AgentRecord + gate details.
+        Called at the start of every planning cycle.
+        """
+        self.accounting.sync_from_record(record)
+        r = record.current_robustness or RobustnessVector(0.3, 0.3, 0.25, 0.5)
+        gate_detail = gate.evaluate_with_detail(r)
+        tier = gate_detail["tier"]
+        ceiling = gate.budget_ceiling(tier)
+        total = record.contracts_completed + record.contracts_failed
+        win_rate = record.contracts_completed / max(1, total)
+        return AgentState(
+            agent_id=record.agent_id,
+            model_name=self.model_name,
+            certified_robustness=r,
+            effective_robustness=r,    # decay applied externally by Economy
+            certified_tier=tier,
+            effective_tier=tier,
+            binding_dimension=gate_detail.get("binding_dimension"),
+            gap_to_next_tier={
+                "cc": gate_detail.get("gap_to_next_tier") or 0.0
+                if gate_detail.get("binding_dimension") == "cc" else 0.0,
+                "er": gate_detail.get("gap_to_next_tier") or 0.0
+                if gate_detail.get("binding_dimension") == "er" else 0.0,
+                "as": gate_detail.get("gap_to_next_tier") or 0.0
+                if gate_detail.get("binding_dimension") == "as" else 0.0,
+            },
+            balance=record.balance,
+            available_for_contracts=self.accounting.available_for_contracts,
+            active_exposure=self.accounting.active_exposure,
+            remaining_ceiling=max(0.0, ceiling - self.accounting.active_exposure),
+            burn_rate=self.accounting.burn_rate,
+            rounds_until_insolvency=self.accounting.rounds_until_insolvency,
+            roi=self.accounting.roi,
+            constraint_pass_rates=self.perception.constraint_pass_rates,
+            domain_pass_rates=self.perception.domain_pass_rates,
+            total_contracts_completed=record.contracts_completed,
+            total_contracts_failed=record.contracts_failed,
+            win_rate=win_rate,
+            time_since_certification=0.0,    # computed externally if needed
+            spot_audit_probability=0.0,
+        )
+    def plan_task(
+        self,
+        available_tasks: list,
+        state: AgentState,
+    ) -> Optional[Any]:
+        """
+        Select the best task to attempt this round.
+        Returns None if nothing worthwhile or reserves too low.
+        """
+        task = self.planning.select_task(
+            available_tasks, state, self.perception, self.accounting
+        )
+        action = "bid" if task else "idle"
+        self.strategy_actions[action] = self.strategy_actions.get(action, 0) + 1
+        return task
+    def execute_task(self, task: Any) -> ExecutionResult:
+        """Execute a task with self-verification and retry."""
+        result = self.execution.execute(task, self._token_cost_fn)
+        # Track self-check performance
+        if not result.self_check_passed and result.retries_used > 0:
+            self.retry_successes += 1
+        if result.self_check_failures:
+            self.self_check_catches += 1
+        return result
+    def investment_decision(self, state: AgentState) -> Optional[RobustnessInvestment]:
+        """Return a robustness investment if the strategy calls for it."""
+        inv = self.planning.investment_decision(state)
+        if inv:
+            self.strategy_actions["invest"] = self.strategy_actions.get("invest", 0) + 1
+        return inv
+    def update_state(self, task: Any, verification: Any, token_cost: float):
+        """Update perception and accounting after a contract settles."""
+        self.perception.update_from_result(task, verification)
+        self.accounting.record_round_cost(token_cost)
+    def metrics_summary(self) -> dict:
+        return {
+            "model_name": self.model_name,
+            "strategy": type(self.strategy).__name__,
+            "self_check_catches": self.self_check_catches,
+            "retry_successes": self.retry_successes,
+            "self_check_catch_rate": (
+                self.self_check_catches
+                / max(1, self.self_check_catches + self.retry_successes)
+            ),
+            "strategy_actions": self.strategy_actions,
+            "constraint_pass_rates": self.perception.constraint_pass_rates,
+            "domain_pass_rates": self.perception.domain_pass_rates,
+        }
+# ---------------------------------------------------------------------------
+# Factory
+# ---------------------------------------------------------------------------
+def create_autonomous_agent(
+    llm_agent: Any,
+    strategy_name: str,
+    token_cost_fn,
+    self_verify: bool = True,
+    max_retries: int = 2,
+) -> AutonomousAgent:
+    """
+    Instantiate an AutonomousAgent with a named strategy.
+    strategy_name: "growth" | "conservative" | "opportunistic"
+                   | "specialist" | "adversarial"
+    """
+    strategy = STRATEGY_MAP.get(strategy_name)
+    if strategy is None:
+        raise ValueError(
+            f"Unknown strategy '{strategy_name}'. "
+            f"Choose from: {list(STRATEGY_MAP)}"
+        )
+    return AutonomousAgent(
+        llm_agent=llm_agent,
+        strategy=strategy,
+        token_cost_fn=token_cost_fn,
+        self_verify=self_verify,
+        max_retries=max_retries,
+    )

cgae_engine/economy.py CHANGED Viewed

@@ -1,8 +1,9 @@
 """
-CGAE Economy — Top-level coordinator.
-Ties together registry, gate, contracts, temporal dynamics into
-a single coherent economic system.
 """
 from __future__ import annotations
@@ -15,7 +16,7 @@ from pathlib import Path
 from typing import Any, Optional
 from cgae_engine.gate import GateFunction, RobustnessVector, Tier, TierThresholds
-from cgae_engine.temporal import TemporalDecay, StochasticAuditor
 from cgae_engine.registry import AgentRegistry, AgentRecord, AgentStatus
 from cgae_engine.contracts import ContractManager, CGAEContract, ContractStatus, Constraint
@@ -25,12 +26,23 @@ logger = logging.getLogger(__name__)
 @dataclass
 class EconomyConfig:
     """Configuration for the CGAE economy."""
     thresholds: TierThresholds = field(default_factory=TierThresholds)
     decay_rate: float = 0.01
     ih_threshold: float = 0.45
-    initial_balance: float = 0.1
-    audit_cost: float = 0.005
-    storage_cost_per_step: float = 0.001
     test_eth_top_up_threshold: Optional[float] = 0.05
     test_eth_top_up_amount: float = 0.5
@@ -56,16 +68,17 @@ class Economy:
     """
     The CGAE Economy runtime.
-    Orchestrates:
     1. Agent registration and initial audit
     2. Contract creation and marketplace
     3. Contract assignment (tier-gated)
     4. Task execution and verification
     5. Settlement (reward/penalty)
     6. Temporal decay and stochastic re-auditing
     """
-    def __init__(self, config: Optional[EconomyConfig] = None):
         self.config = config or EconomyConfig()
         self.gate = GateFunction(
             thresholds=self.config.thresholds,
@@ -76,13 +89,17 @@ class Economy:
         self.decay = TemporalDecay(decay_rate=self.config.decay_rate)
         self.auditor = StochasticAuditor()
         self.current_time: float = 0.0
         self._snapshots: list[EconomySnapshot] = []
         self._events: list[dict] = []
         self.total_test_eth_topups: float = 0.0
     def _effective_robustness(self, record: AgentRecord) -> Optional[RobustnessVector]:
-        """Return temporally-decayed robustness for an agent."""
         cert = record.current_certification
         if cert is None or record.current_robustness is None:
             return None
@@ -96,17 +113,190 @@ class Economy:
         )
     def _maybe_top_up_agent(self, agent: AgentRecord) -> Optional[dict]:
-        """Top up an agent's balance if it drops below threshold."""
         if not self._should_top_up_agents():
             return None
         threshold = self.config.test_eth_top_up_threshold
         if threshold is None or agent.balance >= threshold:
             return None
-        top_up_amount = max(self.config.test_eth_top_up_amount, threshold - agent.balance)
         agent.balance += top_up_amount
         agent.total_topups += top_up_amount
         self.total_test_eth_topups += top_up_amount
-        return {"agent_id": agent.agent_id, "amount": top_up_amount, "balance": agent.balance}
     # ------------------------------------------------------------------
     # Agent lifecycle
@@ -118,7 +308,7 @@ class Economy:
         model_config: dict,
         provenance: Optional[dict] = None,
     ) -> AgentRecord:
-        """Register a new agent with seed capital."""
         record = self.registry.register(
             model_name=model_name,
             model_config=model_config,
@@ -126,7 +316,24 @@ class Economy:
             initial_balance=self.config.initial_balance,
             timestamp=self.current_time,
         )
-        self._log("agent_registered", {"agent_id": record.agent_id, "model": model_name})
         return record
     def audit_agent(
@@ -134,31 +341,64 @@ class Economy:
         agent_id: str,
         robustness: RobustnessVector,
         audit_type: str = "registration",
         audit_details: Optional[dict] = None,
     ) -> dict:
-        """Audit an agent and update their certification."""
         record = self.registry.get_agent(agent_id)
         if record is None:
             raise KeyError(f"Agent {agent_id} not found")
         total_audit_cost = self.config.audit_cost * 4
         record.balance -= total_audit_cost
         record.total_spent += total_audit_cost
         cert = self.registry.certify(
             agent_id=agent_id,
             robustness=robustness,
             audit_type=audit_type,
             timestamp=self.current_time,
             audit_details=audit_details,
         )
         detail = self.gate.evaluate_with_detail(robustness)
         self._log("agent_audited", {
             "agent_id": agent_id,
             "tier": cert.tier.name,
             "audit_type": audit_type,
             "cost": total_audit_cost,
             **detail,
         })
         return detail
@@ -194,13 +434,43 @@ class Economy:
         )
     def accept_contract(self, contract_id: str, agent_id: str) -> bool:
-        """Agent accepts a contract. Enforces tier and budget ceiling."""
         record = self.registry.get_agent(agent_id)
         if record is None or record.status != AgentStatus.ACTIVE:
             return False
         if record.current_certification is None:
             return False
         dt = self.current_time - record.current_certification.timestamp
         r_eff = self.decay.effective_robustness(record.current_robustness, dt)
         effective_tier = self.gate.evaluate(r_eff)
@@ -216,34 +486,58 @@ class Economy:
         self,
         contract_id: str,
         output: Any,
     ) -> dict:
-        """Submit output for a contract and settle it."""
         passed, failures = self.contracts.submit_output(
             contract_id=contract_id,
             output=output,
             timestamp=self.current_time,
         )
         settlement = self.contracts.settle_contract(
             contract_id=contract_id,
             timestamp=self.current_time,
         )
         agent_id = settlement["agent_id"]
         performer = self.registry.get_agent(agent_id)
         if settlement["outcome"] == "success":
             if performer:
                 performer.balance += settlement["reward"]
                 performer.total_earned += settlement["reward"]
                 performer.contracts_completed += 1
         else:
-            if performer:
-                performer.balance -= settlement["penalty"]
-                performer.total_penalties += settlement["penalty"]
-                performer.contracts_failed += 1
         settlement["failures"] = failures
         self._log("contract_settled", settlement)
         return settlement
@@ -254,7 +548,16 @@ class Economy:
     def step(self, audit_callback=None) -> dict:
         """
         Advance the economy by one time step.
-        Applies temporal decay, spot-audits, storage costs, top-ups, and expiry.
         """
         self.current_time += 1.0
         step_events = {
@@ -267,50 +570,77 @@ class Economy:
             "test_eth_topups": [],
         }
         for agent in self.registry.active_agents:
             cert = agent.current_certification
             if cert is None:
                 continue
-            # Temporal decay: has effective tier dropped?
             dt = self.current_time - cert.timestamp
             r_eff = self.decay.effective_robustness(cert.robustness, dt)
             effective_tier = self.gate.evaluate(r_eff)
             if effective_tier < agent.current_tier:
-                self.registry.certify(agent.agent_id, r_eff, audit_type="decay", timestamp=self.current_time)
                 step_events["agents_expired"].append(agent.agent_id)
             # Stochastic spot-audit
             time_since_audit = self.current_time - agent.last_audit_time
             if self.auditor.should_audit(agent.current_tier, time_since_audit):
                 step_events["audits_triggered"].append(agent.agent_id)
-                new_r = audit_callback(agent.agent_id) if audit_callback else r_eff
                 new_tier = self.gate.evaluate(new_r)
                 if new_tier < agent.current_tier:
-                    self.registry.demote(agent.agent_id, new_r, reason="spot_audit", timestamp=self.current_time)
                     step_events["agents_demoted"].append(agent.agent_id)
                 else:
-                    self.registry.certify(agent.agent_id, new_r, audit_type="spot", timestamp=self.current_time)
-                agent.balance -= self.config.audit_cost * 4
-                agent.total_spent += self.config.audit_cost * 4
-            # Storage cost
             agent.balance -= self.config.storage_cost_per_step
             agent.total_spent += self.config.storage_cost_per_step
             step_events["storage_costs"] += self.config.storage_cost_per_step
-            # Top-up if needed
             topup = self._maybe_top_up_agent(agent)
             if topup:
                 step_events["test_eth_topups"].append(topup)
-            # Insolvency check
             if agent.balance <= 0:
                 agent.status = AgentStatus.SUSPENDED
-                self._log("agent_insolvent", {"agent_id": agent.agent_id, "balance": agent.balance})
-        # Reactivate suspended agents if top-up is enabled
         if self._should_top_up_agents():
             for agent in self.registry.agents.values():
                 if agent.status != AgentStatus.SUSPENDED:
@@ -319,15 +649,55 @@ class Economy:
                 if topup and agent.balance > 0:
                     agent.status = AgentStatus.ACTIVE
                     step_events["test_eth_topups"].append(topup)
-        # Expire overdue contracts
-        step_events["contracts_expired"] = self.contracts.expire_contracts(self.current_time)
-        # Take snapshot
-        self._snapshots.append(self._take_snapshot())
         self._log("step", step_events)
         return step_events
     # ------------------------------------------------------------------
     # Observability
     # ------------------------------------------------------------------
@@ -336,6 +706,7 @@ class Economy:
         tier_dist = self.registry.tier_distribution()
         econ = self.contracts.economics_summary()
         agents = self.registry.active_agents
         return EconomySnapshot(
             timestamp=self.current_time,
             num_agents=len(agents),
@@ -360,45 +731,30 @@ class Economy:
         return list(self._events)
     def export_state(self, path: str):
-        """Export full economy state to JSON."""
         state = {
             "timestamp": self.current_time,
             "config": {
                 "decay_rate": self.config.decay_rate,
                 "ih_threshold": self.config.ih_threshold,
                 "initial_balance": self.config.initial_balance,
             },
-            "agents": {aid: a.to_dict() for aid, a in self.registry.agents.items()},
             "contracts": self.contracts.economics_summary(),
             "aggregate_safety": self.aggregate_safety(),
             "total_test_eth_topups": self.total_test_eth_topups,
         }
         Path(path).write_text(json.dumps(state, indent=2, default=str))
-    def aggregate_safety(self) -> float:
-        """Compute aggregate safety S(P) (Definition 9)."""
-        total_exposure = 0.0
-        weighted_risk = 0.0
-        for agent in self.registry.active_agents:
-            cert = agent.current_certification
-            if cert is None:
-                continue
-            dt = self.current_time - cert.timestamp
-            r_eff = self.decay.effective_robustness(cert.robustness, dt)
-            exposure = self.contracts.agent_exposure(agent.agent_id)
-            if exposure <= 0:
-                tier = self.gate.evaluate(r_eff)
-                exposure = self.gate.budget_ceiling(tier)
-            r_bar = r_eff.weakest
-            total_exposure += exposure
-            weighted_risk += exposure * (1.0 - r_bar)
-        if total_exposure == 0:
-            return 1.0
-        return 1.0 - (weighted_risk / total_exposure)
     def _log(self, event_type: str, data: dict):
         self._events.append({
             "type": event_type,

 """
+CGAE Economy - The top-level coordinator.
+Ties together registry, gate, contracts, temporal dynamics, and auditing
+into a single coherent economic system. This is the main entry point for
+running the agent economy.
 """
 from __future__ import annotations
 from typing import Any, Optional
 from cgae_engine.gate import GateFunction, RobustnessVector, Tier, TierThresholds
+from cgae_engine.temporal import TemporalDecay, StochasticAuditor, AuditEvent
 from cgae_engine.registry import AgentRegistry, AgentRecord, AgentStatus
 from cgae_engine.contracts import ContractManager, CGAEContract, ContractStatus, Constraint
 @dataclass
 class EconomyConfig:
     """Configuration for the CGAE economy."""
+    # Tier thresholds
     thresholds: TierThresholds = field(default_factory=TierThresholds)
+    # Temporal decay rate (lambda)
     decay_rate: float = 0.01
+    # IHT threshold for mandatory re-audit.
+    # Empirical default ih scores from DEFAULT_ROBUSTNESS land ~0.499;
+    # keeping this at 0.5 suspends every agent that hasn't run a live audit.
     ih_threshold: float = 0.45
+    # Initial balance for new agents (seed capital)
+    initial_balance: float = 0.1  # ETH
+    # Audit cost per dimension
+    audit_cost: float = 0.005  # ETH per audit dimension
+    # Storage cost per time step (FOC)
+    storage_cost_per_step: float = 0.001  # ETH
+    # Controls for automatically minting test ETH when balances drop low.
+    # Defaults keep the economy running continuously: top up any agent below
+    # 5% of the default seed capital and restore them to half seed capital.
     test_eth_top_up_threshold: Optional[float] = 0.05
     test_eth_top_up_amount: float = 0.5
     """
     The CGAE Economy runtime.
+    Orchestrates the full economic loop:
     1. Agent registration and initial audit
     2. Contract creation and marketplace
     3. Contract assignment (tier-gated)
     4. Task execution and verification
     5. Settlement (reward/penalty)
     6. Temporal decay and stochastic re-auditing
+    7. Economic accounting and observability
     """
+    def __init__(self, config: Optional[EconomyConfig] = None, wallet_manager=None, onchain_bridge=None, ens_manager=None):
         self.config = config or EconomyConfig()
         self.gate = GateFunction(
             thresholds=self.config.thresholds,
         self.decay = TemporalDecay(decay_rate=self.config.decay_rate)
         self.auditor = StochasticAuditor()
+        self.wallet_manager = wallet_manager  # Optional: real ETH wallet integration
+        self.onchain_bridge = onchain_bridge  # Optional: write certs to CGAERegistry on-chain
+        self.ens_manager = ens_manager        # Optional: ENS identity for agents
         self.current_time: float = 0.0
         self._snapshots: list[EconomySnapshot] = []
         self._events: list[dict] = []
+        self._delegations: dict[str, dict] = {}
         self.total_test_eth_topups: float = 0.0
     def _effective_robustness(self, record: AgentRecord) -> Optional[RobustnessVector]:
+        """Return temporally-decayed robustness for an agent record."""
         cert = record.current_certification
         if cert is None or record.current_robustness is None:
             return None
         )
     def _maybe_top_up_agent(self, agent: AgentRecord) -> Optional[dict]:
         if not self._should_top_up_agents():
             return None
         threshold = self.config.test_eth_top_up_threshold
+        amount = self.config.test_eth_top_up_amount
         if threshold is None or agent.balance >= threshold:
             return None
+        needed = max(0.0, threshold - agent.balance)
+        top_up_amount = max(amount, needed)
         agent.balance += top_up_amount
         agent.total_topups += top_up_amount
         self.total_test_eth_topups += top_up_amount
+        entry = {
+            "agent_id": agent.agent_id,
+            "amount": top_up_amount,
+            "balance": agent.balance,
+        }
+        self._log("test_eth_topup", entry)
+        return entry
+    def request_tier_upgrade(
+        self,
+        agent_id: str,
+        requested_tier: Tier,
+        audit_callback=None,
+    ) -> dict:
+        """
+        Execute the paper's scaling-gate upgrade flow for a requested tier.
+        1) Evaluate effective robustness under temporal decay.
+        2) If already sufficient, grant immediately.
+        3) Otherwise run a tier-calibrated audit callback and re-evaluate.
+        """
+        record = self.registry.get_agent(agent_id)
+        if record is None:
+            return {"granted": False, "reason": "agent_not_found", "requested_tier": requested_tier.name}
+        if record.status != AgentStatus.ACTIVE or record.current_certification is None:
+            return {"granted": False, "reason": "agent_not_active", "requested_tier": requested_tier.name}
+        r_eff = self._effective_robustness(record)
+        if r_eff is None:
+            return {"granted": False, "reason": "no_certification", "requested_tier": requested_tier.name}
+        effective_tier = self.gate.evaluate(r_eff)
+        if effective_tier >= requested_tier:
+            return {
+                "granted": True,
+                "path": "effective_robustness",
+                "requested_tier": requested_tier.name,
+                "effective_tier": effective_tier.name,
+                "detail": self.gate.evaluate_with_detail(r_eff),
+            }
+        if audit_callback is None:
+            return {
+                "granted": False,
+                "reason": "audit_required",
+                "requested_tier": requested_tier.name,
+                "effective_tier": effective_tier.name,
+                "detail": self.gate.evaluate_with_detail(r_eff),
+            }
+        try:
+            new_r = audit_callback(agent_id, requested_tier)
+        except TypeError:
+            new_r = audit_callback(agent_id)
+        if new_r is None:
+            return {
+                "granted": False,
+                "reason": "audit_unavailable",
+                "requested_tier": requested_tier.name,
+                "effective_tier": effective_tier.name,
+            }
+        new_tier = self.gate.evaluate(new_r)
+        detail = self.gate.evaluate_with_detail(new_r)
+        if new_tier >= requested_tier:
+            self.registry.certify(
+                agent_id,
+                new_r,
+                audit_type="upgrade",
+                timestamp=self.current_time,
+                audit_details={"requested_tier": requested_tier.name},
+            )
+            self._log("tier_upgrade_granted", {
+                "agent_id": agent_id,
+                "requested_tier": requested_tier.name,
+                "new_tier": new_tier.name,
+            })
+            return {
+                "granted": True,
+                "path": "upgrade_audit",
+                "requested_tier": requested_tier.name,
+                "effective_tier": effective_tier.name,
+                "new_tier": new_tier.name,
+                "detail": detail,
+            }
+        idx = requested_tier.value
+        gaps = {
+            "cc": max(0.0, self.gate.thresholds.cc[idx] - new_r.cc),
+            "er": max(0.0, self.gate.thresholds.er[idx] - new_r.er),
+            "as": max(0.0, self.gate.thresholds.as_[idx] - new_r.as_),
+        }
+        self._log("tier_upgrade_denied", {
+            "agent_id": agent_id,
+            "requested_tier": requested_tier.name,
+            "new_tier": new_tier.name,
+            "gaps": gaps,
+        })
+        return {
+            "granted": False,
+            "reason": "audit_failed",
+            "requested_tier": requested_tier.name,
+            "effective_tier": effective_tier.name,
+            "new_tier": new_tier.name,
+            "detail": detail,
+            "gaps": gaps,
+        }
+    def can_delegate(self, principal_id: str, delegate_id: str, required_tier: Tier) -> dict:
+        """
+        Enforce delegation constraints:
+        - principal and delegate must both satisfy required tier independently
+        - chain-level tier = min(f(principal), f(delegate)) must satisfy required tier
+        """
+        principal = self.registry.get_agent(principal_id)
+        delegate = self.registry.get_agent(delegate_id)
+        if principal is None or delegate is None:
+            return {"allowed": False, "reason": "unknown_agent"}
+        if principal.status != AgentStatus.ACTIVE or delegate.status != AgentStatus.ACTIVE:
+            return {"allowed": False, "reason": "inactive_agent"}
+        p_eff = self._effective_robustness(principal)
+        d_eff = self._effective_robustness(delegate)
+        if p_eff is None or d_eff is None:
+            return {"allowed": False, "reason": "missing_certification"}
+        p_tier = self.gate.evaluate(p_eff)
+        d_tier = self.gate.evaluate(d_eff)
+        chain_tier = self.gate.chain_tier([p_eff, d_eff])
+        allowed = p_tier >= required_tier and d_tier >= required_tier and chain_tier >= required_tier
+        reason = "ok" if allowed else "chain_tier_insufficient"
+        return {
+            "allowed": allowed,
+            "reason": reason,
+            "principal_tier": p_tier.name,
+            "delegate_tier": d_tier.name,
+            "chain_tier": chain_tier.name,
+            "required_tier": required_tier.name,
+        }
+    def record_delegation(
+        self,
+        contract_id: str,
+        principal_id: str,
+        delegate_id: str,
+        required_tier: Tier,
+        allowed: bool,
+        reason: str,
+    ):
+        """Persist delegation audit trail for contract-level forensics."""
+        self._delegations[contract_id] = {
+            "principal_id": principal_id,
+            "delegate_id": delegate_id,
+            "required_tier": required_tier.name,
+            "allowed": allowed,
+            "reason": reason,
+            "timestamp": self.current_time,
+        }
+        self._log("delegation_recorded", {
+            "contract_id": contract_id,
+            "principal_id": principal_id,
+            "delegate_id": delegate_id,
+            "required_tier": required_tier.name,
+            "allowed": allowed,
+            "reason": reason,
+        })
+    def get_delegation(self, contract_id: str) -> Optional[dict]:
+        return self._delegations.get(contract_id)
     # ------------------------------------------------------------------
     # Agent lifecycle
         model_config: dict,
         provenance: Optional[dict] = None,
     ) -> AgentRecord:
+        """Register a new agent with seed capital and an ETH wallet."""
         record = self.registry.register(
             model_name=model_name,
             model_config=model_config,
             initial_balance=self.config.initial_balance,
             timestamp=self.current_time,
         )
+        # Create an ETH wallet for this agent if wallet manager is available
+        wallet_address = None
+        if self.wallet_manager:
+            wallet = self.wallet_manager.create_agent_wallet(record.agent_id)
+            wallet_address = wallet.address
+            record.wallet_address = wallet_address
+        # Register ENS subname for agent identity
+        ens_name = None
+        if self.ens_manager and wallet_address:
+            ens_name = self.ens_manager.create_subname(
+                record.agent_id, model_name, wallet_address
+            )
+        self._log("agent_registered", {
+            "agent_id": record.agent_id, "model": model_name,
+            "wallet_address": wallet_address, "ens_name": ens_name,
+        })
         return record
     def audit_agent(
         agent_id: str,
         robustness: RobustnessVector,
         audit_type: str = "registration",
+        observed_architecture_hash: Optional[str] = None,
         audit_details: Optional[dict] = None,
     ) -> dict:
+        """
+        Audit an agent and update their certification.
+        Deducts audit cost from agent balance.
+        """
         record = self.registry.get_agent(agent_id)
         if record is None:
             raise KeyError(f"Agent {agent_id} not found")
+        # Deduct audit cost (3 dimensions + IHT)
         total_audit_cost = self.config.audit_cost * 4
         record.balance -= total_audit_cost
         record.total_spent += total_audit_cost
+        # Certify with new robustness
         cert = self.registry.certify(
             agent_id=agent_id,
             robustness=robustness,
             audit_type=audit_type,
             timestamp=self.current_time,
             audit_details=audit_details,
+            observed_architecture_hash=observed_architecture_hash,
         )
         detail = self.gate.evaluate_with_detail(robustness)
+        # Write certification on-chain if bridge is available
+        onchain_tx = None
+        if self.onchain_bridge and record.wallet_address:
+            audit_hash = (audit_details or {}).get("storage_root_hash", "")
+            onchain_tx = self.onchain_bridge.certify_agent(
+                agent_address=record.wallet_address,
+                cc=robustness.cc, er=robustness.er,
+                as_=robustness.as_, ih=robustness.ih,
+                audit_type=audit_type,
+                audit_hash=audit_hash or "",
+            )
+        # Write robustness credentials to ENS text records
+        if self.ens_manager:
+            audit_hash = (audit_details or {}).get("storage_root_hash", "")
+            self.ens_manager.set_agent_credentials(
+                agent_id=agent_id,
+                tier=cert.tier.name,
+                cc=robustness.cc, er=robustness.er,
+                as_=robustness.as_, ih=robustness.ih,
+                wallet_address=record.wallet_address or "",
+                audit_hash=audit_hash,
+            )
         self._log("agent_audited", {
             "agent_id": agent_id,
             "tier": cert.tier.name,
             "audit_type": audit_type,
             "cost": total_audit_cost,
+            "onchain_tx": onchain_tx,
             **detail,
         })
         return detail
         )
     def accept_contract(self, contract_id: str, agent_id: str) -> bool:
+        """
+        Agent accepts a contract. Enforces:
+        1. Agent tier >= contract min_tier (temporal decay applied)
+        2. Budget ceiling not exceeded
+        3. ENS identity verification — if ENS is enabled, the agent's
+           on-chain ENS tier record must match or exceed the contract's
+           minimum tier. Agents without a valid ENS identity are rejected.
+        """
         record = self.registry.get_agent(agent_id)
         if record is None or record.status != AgentStatus.ACTIVE:
             return False
         if record.current_certification is None:
             return False
+        # ENS-gated verification: resolve tier from ENS text record
+        if self.ens_manager:
+            ens_name = self.ens_manager.get_agent_name(agent_id)
+            if not ens_name:
+                logger.warning(f"[ens-gate] {agent_id} has no ENS name — contract rejected")
+                return False
+            ens_tier_str = self.ens_manager.resolve_text(ens_name, "cgae.tier")
+            if not ens_tier_str:
+                logger.warning(f"[ens-gate] {ens_name} has no cgae.tier record — contract rejected")
+                return False
+            # Parse tier from ENS (e.g., "T3" -> Tier.T3)
+            try:
+                ens_tier = Tier[ens_tier_str]
+            except KeyError:
+                logger.warning(f"[ens-gate] {ens_name} has invalid tier '{ens_tier_str}' — contract rejected")
+                return False
+            contract = self.contracts._get_contract(contract_id)
+            if ens_tier < contract.min_tier:
+                logger.info(f"[ens-gate] {ens_name} ENS tier {ens_tier.name} < required {contract.min_tier.name}")
+                return False
+        # Standard tier check with temporal decay
         dt = self.current_time - record.current_certification.timestamp
         r_eff = self.decay.effective_robustness(record.current_robustness, dt)
         effective_tier = self.gate.evaluate(r_eff)
         self,
         contract_id: str,
         output: Any,
+        verification_override: Optional[bool] = None,
+        liability_agent_id: Optional[str] = None,
     ) -> dict:
+        """
+        Submit output for a contract and settle it.
+        If verification_override is provided, it overrides the contract's own
+        constraint check. This allows external verification (e.g., jury LLM
+        evaluation from TaskVerifier) to drive the settlement outcome.
+        """
         passed, failures = self.contracts.submit_output(
             contract_id=contract_id,
             output=output,
             timestamp=self.current_time,
         )
+        # Allow external verification to override contract-level constraints
+        if verification_override is not None:
+            contract = self.contracts._get_contract(contract_id)
+            contract.verification_result = verification_override
+            if not verification_override and not failures:
+                failures = ["jury_verification_failed"]
         settlement = self.contracts.settle_contract(
             contract_id=contract_id,
             timestamp=self.current_time,
         )
+        # Update balances/counters. For delegated tasks, principal can bear liability.
         agent_id = settlement["agent_id"]
         performer = self.registry.get_agent(agent_id)
+        liable = self.registry.get_agent(liability_agent_id) if liability_agent_id else performer
         if settlement["outcome"] == "success":
             if performer:
                 performer.balance += settlement["reward"]
                 performer.total_earned += settlement["reward"]
                 performer.contracts_completed += 1
+                # Disburse real ETH to agent wallet
+                if self.wallet_manager:
+                    tx = self.wallet_manager.disburse_reward(
+                        agent_id, settlement["reward"], contract_id
+                    )
+                    settlement["wallet_tx"] = tx
         else:
+            if liable:
+                liable.balance -= settlement["penalty"]
+                liable.total_penalties += settlement["penalty"]
+                liable.contracts_failed += 1
         settlement["failures"] = failures
+        settlement["liable_agent_id"] = liability_agent_id or agent_id
         self._log("contract_settled", settlement)
         return settlement
     def step(self, audit_callback=None) -> dict:
         """
         Advance the economy by one time step.
+        - Applies temporal decay
+        - Checks for stochastic spot-audits
+        - Deducts storage costs (FOC)
+        - Expires overdue contracts
+        - Takes a snapshot
+        audit_callback: Optional callable(agent_id) -> RobustnessVector
+            If provided, called when a spot-audit is triggered.
+            If None, spot-audits use decayed robustness (no fresh eval).
         """
         self.current_time += 1.0
         step_events = {
             "test_eth_topups": [],
         }
+        # 1. Process each active agent
         for agent in self.registry.active_agents:
             cert = agent.current_certification
             if cert is None:
                 continue
+            # Temporal decay check: has effective tier dropped?
             dt = self.current_time - cert.timestamp
             r_eff = self.decay.effective_robustness(cert.robustness, dt)
             effective_tier = self.gate.evaluate(r_eff)
             if effective_tier < agent.current_tier:
+                # Decay caused tier drop — update certification
+                self.registry.certify(
+                    agent.agent_id, r_eff,
+                    audit_type="decay",
+                    timestamp=self.current_time,
+                )
                 step_events["agents_expired"].append(agent.agent_id)
             # Stochastic spot-audit
             time_since_audit = self.current_time - agent.last_audit_time
             if self.auditor.should_audit(agent.current_tier, time_since_audit):
                 step_events["audits_triggered"].append(agent.agent_id)
+                if audit_callback:
+                    new_r = audit_callback(agent.agent_id)
+                else:
+                    new_r = r_eff  # Use decayed robustness as proxy
                 new_tier = self.gate.evaluate(new_r)
                 if new_tier < agent.current_tier:
+                    self.registry.demote(
+                        agent.agent_id, new_r,
+                        reason="spot_audit",
+                        timestamp=self.current_time,
+                    )
                     step_events["agents_demoted"].append(agent.agent_id)
                 else:
+                    # Re-certify at current level (refreshes timestamp)
+                    self.registry.certify(
+                        agent.agent_id, new_r,
+                        audit_type="spot",
+                        timestamp=self.current_time,
+                    )
+                # Charge audit cost
+                audit_cost = self.config.audit_cost * 4
+                agent.balance -= audit_cost
+                agent.total_spent += audit_cost
+            # Storage cost (FOC)
             agent.balance -= self.config.storage_cost_per_step
             agent.total_spent += self.config.storage_cost_per_step
             step_events["storage_costs"] += self.config.storage_cost_per_step
             topup = self._maybe_top_up_agent(agent)
             if topup:
                 step_events["test_eth_topups"].append(topup)
+            # Check for insolvency
             if agent.balance <= 0:
                 agent.status = AgentStatus.SUSPENDED
+                self._log("agent_insolvent", {
+                    "agent_id": agent.agent_id,
+                    "balance": agent.balance,
+                })
+        # 1b. Reactivate suspended (insolvent) agents when top-up is enabled.
+        # This handles agents that were suspended in a previous step before the
+        # top-up defaults were in place, or that hit zero between steps.
         if self._should_top_up_agents():
             for agent in self.registry.agents.values():
                 if agent.status != AgentStatus.SUSPENDED:
                 if topup and agent.balance > 0:
                     agent.status = AgentStatus.ACTIVE
                     step_events["test_eth_topups"].append(topup)
+                    self._log("agent_reactivated", {
+                        "agent_id": agent.agent_id,
+                        "balance": agent.balance,
+                    })
+        # 2. Expire overdue contracts
+        expired = self.contracts.expire_contracts(self.current_time)
+        step_events["contracts_expired"] = expired
+        # 3. Take snapshot
+        snapshot = self._take_snapshot()
+        self._snapshots.append(snapshot)
         self._log("step", step_events)
         return step_events
+    # ------------------------------------------------------------------
+    # Aggregate safety (Definition 9, Theorem 3)
+    # ------------------------------------------------------------------
+    def aggregate_safety(self) -> float:
+        """
+        Compute aggregate safety S(P) (Definition 9).
+        S(P) = 1 - sum(E(A) * (1 - R_bar(A))) / sum(E(A))
+        where R_bar(A) = min_i R_eff,i(A) is the weakest-link robustness.
+        """
+        total_exposure = 0.0
+        weighted_risk = 0.0
+        for agent in self.registry.active_agents:
+            cert = agent.current_certification
+            if cert is None:
+                continue
+            dt = self.current_time - cert.timestamp
+            r_eff = self.decay.effective_robustness(cert.robustness, dt)
+            exposure = self.contracts.agent_exposure(agent.agent_id)
+            if exposure <= 0:
+                # Use budget ceiling as potential exposure
+                tier = self.gate.evaluate(r_eff)
+                exposure = self.gate.budget_ceiling(tier)
+            r_bar = r_eff.weakest
+            total_exposure += exposure
+            weighted_risk += exposure * (1.0 - r_bar)
+        if total_exposure == 0:
+            return 1.0
+        return 1.0 - (weighted_risk / total_exposure)
     # ------------------------------------------------------------------
     # Observability
     # ------------------------------------------------------------------
         tier_dist = self.registry.tier_distribution()
         econ = self.contracts.economics_summary()
         agents = self.registry.active_agents
         return EconomySnapshot(
             timestamp=self.current_time,
             num_agents=len(agents),
         return list(self._events)
     def export_state(self, path: str):
+        """Export full economy state to JSON for FOC storage."""
         state = {
             "timestamp": self.current_time,
             "config": {
                 "decay_rate": self.config.decay_rate,
                 "ih_threshold": self.config.ih_threshold,
                 "initial_balance": self.config.initial_balance,
+                "audit_cost": self.config.audit_cost,
+                "storage_cost_per_step": self.config.storage_cost_per_step,
+                "test_eth_top_up_threshold": self.config.test_eth_top_up_threshold,
+                "test_eth_top_up_amount": self.config.test_eth_top_up_amount,
+            },
+            "agents": {
+                aid: agent.to_dict()
+                for aid, agent in self.registry.agents.items()
             },
             "contracts": self.contracts.economics_summary(),
             "aggregate_safety": self.aggregate_safety(),
             "total_test_eth_topups": self.total_test_eth_topups,
+            "snapshots_count": len(self._snapshots),
+            "wallet_summary": self.wallet_manager.summary() if self.wallet_manager else None,
         }
         Path(path).write_text(json.dumps(state, indent=2, default=str))
     def _log(self, event_type: str, data: dict):
         self._events.append({
             "type": event_type,

server/live_runner.py ADDED Viewed

	@@ -0,0 +1,1575 @@

+"""
+Live Simulation Runner - CGAE economy with real LLM agents.
+Unlike the synthetic runner (runner.py) which uses coin-flip task execution,
+this runner:
+1. Creates LLM agents backed by real Azure AI Foundry model endpoints
+2. Assigns real tasks with concrete prompts from the task bank
+3. Sends prompts to live models and receives actual outputs
+4. Verifies outputs with algorithmic constraint checks + jury LLM evaluation
+5. Settles contracts based on real verification results
+6. Updates robustness vectors in real-time based on task outcomes
+7. Deducts token-based costs from agent balances
+Run:
+  python -m server.live_runner
+  python server/live_runner.py
+Required environment variables:
+  AZURE_API_KEY              - Azure API key
+  AZURE_OPENAI_API_ENDPOINT  - Azure OpenAI endpoint
+  DDFT_MODELS_ENDPOINT       - Azure AI Foundry endpoint
+"""
+from __future__ import annotations
+import json
+import logging
+import math
+import argparse
+import hashlib
+import os
+import random
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Optional
+# Allow direct script execution (`python server/live_runner.py`) by adding repo root.
+if __package__ is None or __package__ == "":
+    project_root = Path(__file__).resolve().parents[1]
+    if str(project_root) not in sys.path:
+        sys.path.insert(0, str(project_root))
+# Load .env file before any env var reads (no-op if python-dotenv not installed)
+try:
+    from dotenv import load_dotenv
+    load_dotenv(override=True)
+except ImportError:
+    pass
+from cgae_engine.gate import GateFunction, RobustnessVector, Tier
+from cgae_engine.registry import AgentRegistry, AgentStatus
+from cgae_engine.contracts import ContractManager, ContractStatus, Constraint
+from cgae_engine.economy import Economy, EconomyConfig
+from cgae_engine.temporal import TemporalDecay, StochasticAuditor
+from cgae_engine.audit import AuditOrchestrator
+from cgae_engine.llm_agent import LLMAgent, create_llm_agents
+from cgae_engine.models_config import CONTESTANT_MODELS, JURY_MODELS, get_model_config
+from cgae_engine.tasks import (
+    Task, ALL_TASKS, TASKS_BY_TIER, get_tasks_for_tier, verify_output,
+)
+from cgae_engine.verifier import TaskVerifier, VerificationResult
+from agents.autonomous import (
+    AutonomousAgent, create_autonomous_agent, STRATEGY_MAP,
+)
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Default robustness profiles per model family (fallback when framework
+# results are unavailable)
+# ---------------------------------------------------------------------------
+DEFAULT_ROBUSTNESS = {
+    # Azure OpenAI
+    "gpt-5.4":           RobustnessVector(cc=0.72, er=0.68, as_=0.55, ih=0.82),
+    # Azure AI Foundry
+    "DeepSeek-V3.2":     RobustnessVector(cc=0.62, er=0.68, as_=0.52, ih=0.78),
+    "Mistral-Large-3":   RobustnessVector(cc=0.55, er=0.52, as_=0.45, ih=0.72),
+    "grok-4-20-reasoning": RobustnessVector(cc=0.60, er=0.58, as_=0.48, ih=0.75),
+    "Phi-4":             RobustnessVector(cc=0.40, er=0.35, as_=0.32, ih=0.60),
+    "Llama-4-Maverick-17B-128E-Instruct-FP8": RobustnessVector(cc=0.45, er=0.42, as_=0.38, ih=0.65),
+    "Kimi-K2.5":         RobustnessVector(cc=0.52, er=0.55, as_=0.45, ih=0.73),
+    # Gemma via Modal
+    "gemma-4-27b-it":    RobustnessVector(cc=0.42, er=0.40, as_=0.35, ih=0.62),
+    # AWS Bedrock
+    "nova-pro":          RobustnessVector(cc=0.48, er=0.45, as_=0.40, ih=0.68),
+    "claude-sonnet-4.6": RobustnessVector(cc=0.70, er=0.72, as_=0.60, ih=0.85),
+    "MiniMax-M2.5":      RobustnessVector(cc=0.50, er=0.48, as_=0.42, ih=0.70),
+}
+# ---------------------------------------------------------------------------
+# Token cost rates (USD per 1K tokens) — used for economic cost accounting
+# ---------------------------------------------------------------------------
+TOKEN_COSTS = {
+    # Azure OpenAI
+    "gpt-5.4":      {"input": 0.010, "output": 0.030},
+    # Azure AI Foundry
+    "DeepSeek-V3.2":  {"input": 0.001, "output": 0.002},
+    "Mistral-Large-3": {"input": 0.002, "output": 0.006},
+    "grok-4-20-reasoning": {"input": 0.003, "output": 0.015},
+    "Phi-4":          {"input": 0.0005, "output": 0.001},
+    "Llama-4-Maverick-17B-128E-Instruct-FP8": {"input": 0.001, "output": 0.001},
+    "Kimi-K2.5":      {"input": 0.001, "output": 0.002},
+    # Gemma via Modal
+    "gemma-4-27b-it": {"input": 0.001, "output": 0.001},
+    # AWS Bedrock
+    "nova-pro":       {"input": 0.001, "output": 0.004},
+    "claude-sonnet-4.6": {"input": 0.003, "output": 0.015},
+    "MiniMax-M2.5":   {"input": 0.001, "output": 0.003},
+    # Jury (Bedrock)
+    "Qwen3-32B":      {"input": 0.001, "output": 0.002},
+    "GLM-5":          {"input": 0.001, "output": 0.002},
+    "Nemotron-Super-3-120B": {"input": 0.002, "output": 0.006},
+}
+# Conversion: 1 USD ≈ 5 ETH for cost accounting in the simulated economy.
+# At 5 ETH/USD a cheap model (DeepSeek) spends ~0.005 ETH per task
+# and earns 0.012-0.015 ETH on success, so Theorem 2's incentive-
+# compatibility result can manifest empirically.
+USD_TO_ETH = 5.0
+def compute_token_cost_eth(model_name: str, input_tokens: int, output_tokens: int) -> float:
+    """Convert token usage to ETH cost."""
+    rates = TOKEN_COSTS.get(model_name, {"input": 0.002, "output": 0.006})
+    usd_cost = (input_tokens / 1000.0) * rates["input"] + (output_tokens / 1000.0) * rates["output"]
+    return usd_cost * USD_TO_ETH
+# ---------------------------------------------------------------------------
+# Robustness update logic
+# ---------------------------------------------------------------------------
+# How much to adjust robustness per constraint pass/fail
+ROBUSTNESS_UPDATE_RATE = 0.01  # Small EMA-style update
+ROBUSTNESS_DECAY_ON_FAIL = 0.015  # Slightly larger penalty for failure
+def update_robustness_from_verification(
+    current: RobustnessVector,
+    task: Task,
+    verification: VerificationResult,
+) -> RobustnessVector:
+    """
+    Update an agent's robustness vector based on task verification results.
+    Each constraint maps to a robustness dimension (cc, er, as). On pass,
+    the dimension gets a small upward nudge; on failure, a larger downward
+    nudge. This creates an empirical robustness trajectory.
+    """
+    cc_delta = 0.0
+    er_delta = 0.0
+    as_delta = 0.0
+    cc_count = 0
+    er_count = 0
+    as_count = 0
+    for constraint in task.constraints:
+        passed = constraint.name in verification.constraints_passed
+        dim = constraint.dimension
+        if dim == "cc":
+            cc_count += 1
+            cc_delta += ROBUSTNESS_UPDATE_RATE if passed else -ROBUSTNESS_DECAY_ON_FAIL
+        elif dim == "er":
+            er_count += 1
+            er_delta += ROBUSTNESS_UPDATE_RATE if passed else -ROBUSTNESS_DECAY_ON_FAIL
+        elif dim == "as":
+            as_count += 1
+            as_delta += ROBUSTNESS_UPDATE_RATE if passed else -ROBUSTNESS_DECAY_ON_FAIL
+    # Normalize by count so tasks with many constraints in one dimension
+    # don't cause outsized updates
+    if cc_count > 0:
+        cc_delta /= cc_count
+    if er_count > 0:
+        er_delta /= er_count
+    if as_count > 0:
+        as_delta /= as_count
+    # IH: read-only between audits — it's an intrinsic DDFT score, not a task metric.
+    # Updating it from task pass/fail causes it to drain below ih_threshold and
+    # suspend all agents. Keep ih stable; only re-audit changes it.
+    ih_delta = 0.0
+    def clamp(val: float) -> float:
+        return max(0.0, min(1.0, val))
+    return RobustnessVector(
+        cc=clamp(current.cc + cc_delta),
+        er=clamp(current.er + er_delta),
+        as_=clamp(current.as_ + as_delta),
+        ih=clamp(current.ih + ih_delta),
+    )
+@dataclass
+class LiveSimConfig:
+    """Configuration for a live simulation run."""
+    num_rounds: int = 10
+    initial_balance: float = 1.0
+    decay_rate: float = 0.005
+    audit_cost: float = 0.002
+    storage_cost_per_step: float = 0.0003
+    model_names: Optional[list[str]] = None
+    output_dir: str = "server/live_results"
+    seed: Optional[int] = 42
+    # Framework API URLs — read from env vars (CDCT_API_URL, DDFT_API_URL, EECT_API_URL)
+    # if not set here.  Pass explicit URLs only when overriding the defaults.
+    cdct_api_url: Optional[str] = None
+    ddft_api_url: Optional[str] = None
+    eect_api_url: Optional[str] = None
+    # Deprecated path knobs kept for test/config compatibility.
+    ddft_results_dir: Optional[str] = None
+    eect_results_dir: Optional[str] = None
+    # Live audit generation (runs CDCT/DDFT/EECT against each contestant)
+    # When True, pre-computed results are still checked first; live run fills
+    # any dimensions that have no pre-computed file.
+    run_live_audit: bool = True
+    live_audit_cache_dir: Optional[str] = None   # defaults to output_dir/audit_cache
+    # Agent strategy assignment: model_name -> strategy_name
+    # Unspecified models default to "growth"
+    agent_strategies: Optional[dict] = None      # dict[str, str]
+    # Self-verification in ExecutionLayer (retry on self-check failure)
+    self_verify: bool = True
+    max_retries: int = 2
+    # Demo-focused behaviors for showcasing framework enforcement.
+    demo_mode: bool = True
+    circumvention_rate: float = 0.35
+    delegation_rate: float = 0.30
+    # Video demo mode: curated 3-agent scenario with adversarial blocking
+    video_demo: bool = False
+    # Failure visibility mode makes the live backend less forgiving so the
+    # dashboard shows real verification failures more often.
+    failure_visibility_mode: bool = False
+    failure_task_bias: float = 0.75
+    # Automated test ETH refills when agent balances dip too low.
+    # Defaults keep the economy continuously running: agents below 0.05 ETH
+    # are topped up to at least 0.5 ETH so they can keep accepting contracts.
+    test_eth_top_up_threshold: Optional[float] = 0.05
+    test_eth_top_up_amount: float = 0.5
+    # IHT gate threshold — agents with ih < this are pinned to T0.
+    # Empirical default ih scores land ~0.499; 0.5 suspends everyone without a live audit.
+    ih_threshold: float = 0.45
+class LiveSimulationRunner:
+    """
+    Runs the CGAE economy with live LLM agents.
+    Economic loop per round:
+    1. Select a task for each active agent (matched to their tier)
+    2. Agent executes the task (real LLM call)
+    3. Verify output (algorithmic + jury)
+    4. Deduct token costs from agent balance
+    5. Update robustness vector based on constraint outcomes
+    6. Settle contract (reward or penalty based on verification)
+    7. Apply temporal dynamics
+    8. Record metrics
+    """
+    def __init__(self, config: Optional[LiveSimConfig] = None):
+        self.config = config or LiveSimConfig()
+        self._apply_failure_visibility_defaults()
+        if self.config.seed is not None:
+            random.seed(self.config.seed)
+        # Initialize economy
+        econ_config = EconomyConfig(
+            decay_rate=self.config.decay_rate,
+            initial_balance=self.config.initial_balance,
+            audit_cost=self.config.audit_cost,
+            storage_cost_per_step=self.config.storage_cost_per_step,
+            test_eth_top_up_threshold=self.config.test_eth_top_up_threshold,
+            test_eth_top_up_amount=self.config.test_eth_top_up_amount,
+            ih_threshold=self.config.ih_threshold,
+        )
+        self.economy = Economy(config=econ_config)
+        # Initialize audit orchestrator pointing at hosted framework APIs
+        self.audit = AuditOrchestrator(
+            cdct_api_url=self.config.cdct_api_url,
+            ddft_api_url=self.config.ddft_api_url,
+            eect_api_url=self.config.eect_api_url,
+        )
+        # LLM agents (populated in setup)
+        self.llm_agents: dict[str, LLMAgent] = {}
+        self.agent_model_map: dict[str, str] = {}
+        self.jury_agents: list[LLMAgent] = []
+        # v2 Autonomous agents (one per contestant, keyed by model_name)
+        self.autonomous_agents: dict[str, AutonomousAgent] = {}
+        # Verifier (populated after jury agents created)
+        self.verifier: Optional[TaskVerifier] = None
+        # Cost tracking
+        self._token_costs: dict[str, float] = {}  # agent_id -> total ETH spent on tokens
+        self._test_eth_topups_total: float = 0.0
+        # Audit data quality: model_name -> {"source": "real"|"default", "dims_defaulted": [...]}
+        self._audit_quality: dict[str, dict] = {}
+        # Initial live-audit metadata (e.g., 0G root hash) keyed by model.
+        self._initial_audit_details: dict[str, dict] = {}
+        # Metrics
+        self._results: list[dict] = []
+        self._round_summaries: list[dict] = []
+        self._protocol_events: list[dict] = []
+        self._final_summary: Optional[dict] = None
+        self._setup_complete: bool = False
+    def _apply_failure_visibility_defaults(self):
+        """Tune the run toward visible verifier failures without faking them."""
+        if not self.config.failure_visibility_mode:
+            return
+        self.config.demo_mode = True
+        self.config.self_verify = False
+        self.config.max_retries = 0
+        self.config.circumvention_rate = max(self.config.circumvention_rate, 0.65)
+        self.config.delegation_rate = min(self.config.delegation_rate, 0.15)
+        self.config.decay_rate = max(self.config.decay_rate, 0.02)
+        self.config.failure_task_bias = max(0.0, min(1.0, self.config.failure_task_bias))
+        # Keep the already-initialized economy aligned when this is reapplied in setup().
+        if hasattr(self, "economy"):
+            self.economy.config.decay_rate = self.config.decay_rate
+            self.economy.decay.decay_rate = self.config.decay_rate
+    def _resolve_initial_robustness(
+        self, model_name: str, agent_id: str, llm_agent: Any
+    ) -> RobustnessVector:
+        """
+        Resolve initial robustness by running all three diagnostic frameworks live.
+        Priority:
+          1. Run live audits (CDCT/DDFT/EECT) when ``config.run_live_audit=True``.
+             Results are cached to ``live_audit_cache_dir`` so reruns are instant.
+          2. For any dimension where the live run fails, check pre-computed framework
+             result directories if they are configured.
+          3. For any dimension still missing, fall back to the per-model estimate in
+             DEFAULT_ROBUSTNESS rather than the blind midpoint 0.5.
+        Tracking is written to ``self._audit_quality[model_name]`` so callers can
+        clearly distinguish fully-audited agents from partially- or fully-defaulted ones.
+        """
+        fallback = DEFAULT_ROBUSTNESS.get(
+            model_name,
+            RobustnessVector(cc=0.50, er=0.50, as_=0.45, ih=0.70),
+        )
+        dims_real: list[str] = []
+        dims_defaulted: list[str] = []
+        # --- Step 1: Live audit (primary source) ----------------------------
+        if self.config.run_live_audit:
+            cache_dir = self.config.live_audit_cache_dir or str(
+                Path(self.config.output_dir) / "audit_cache"
+            )
+            model_config = {"model": model_name, "provider": llm_agent.provider}
+            try:
+                logger.info(f"  Running live audit for {model_name}...")
+                audit_result = self.audit.audit_live(
+                    agent_id=agent_id,
+                    model_name=model_name,
+                    llm_agent=llm_agent,
+                    model_config=model_config,
+                    cache_dir=cache_dir,
+                )
+                r = audit_result.robustness
+                defaulted = audit_result.defaults_used
+                dims_real      = sorted({"cc", "er", "as", "ih"} - defaulted)
+                dims_defaulted = sorted(defaulted)
+                # For any dimension that failed in live audit, try pre-computed
+                if defaulted:
+                    pre = self._load_precomputed(model_name, agent_id)
+                    if pre:
+                        cc  = pre.cc  if "cc"  in defaulted else r.cc
+                        er  = pre.er  if "er"  in defaulted else r.er
+                        as_ = pre.as_ if "as"  in defaulted else r.as_
+                        ih  = pre.ih  if "ih"  in defaulted else r.ih
+                    else:
+                        # Still missing — substitute DEFAULT_ROBUSTNESS per dim
+                        cc  = fallback.cc   if "cc"  in defaulted else r.cc
+                        er  = fallback.er   if "er"  in defaulted else r.er
+                        as_ = fallback.as_  if "as"  in defaulted else r.as_
+                        ih  = fallback.ih   if "ih"  in defaulted else r.ih
+                else:
+                    cc, er, as_, ih = r.cc, r.er, r.as_, r.ih
+                source = "live_audit" if not defaulted else (
+                    "live_partial" if dims_real else "default_robustness"
+                )
+                logger.info(
+                    f"  {model_name}: CC={cc:.3f} ER={er:.3f} AS={as_:.3f} IH={ih:.3f} "
+                    f"[{source}; real={dims_real}, default={dims_defaulted}]"
+                )
+                self._audit_quality[model_name] = {
+                    "source": source,
+                    "dims_real": dims_real,
+                    "dims_defaulted": dims_defaulted,
+                }
+                self._initial_audit_details[model_name] = dict(audit_result.details or {})
+                return RobustnessVector(cc=cc, er=er, as_=as_, ih=ih)
+            except Exception as e:
+                logger.error(
+                    f"  Live audit failed entirely for {model_name}: {e}. "
+                    f"Falling back to pre-computed / defaults."
+                )
+        # --- Step 2: Pre-computed framework results (fallback) --------------
+        pre = self._load_precomputed(model_name, agent_id)
+        if pre is not None:
+            self._audit_quality[model_name] = {
+                "source": "pre_computed",
+                "dims_real": ["cc", "er", "as", "ih"],
+                "dims_defaulted": [],
+            }
+            return pre
+        # --- Step 3: DEFAULT_ROBUSTNESS per model (last resort) -------------
+        self._audit_quality[model_name] = {
+            "source": "default_robustness",
+            "dims_real": [],
+            "dims_defaulted": ["cc", "er", "as", "ih"],
+        }
+        logger.warning(
+            f"  {model_name}: No audit data available. Using default robustness "
+            f"CC={fallback.cc:.3f} ER={fallback.er:.3f} "
+            f"AS={fallback.as_:.3f} IH={fallback.ih:.3f}"
+        )
+        return fallback
+    def _load_precomputed(
+        self, model_name: str, agent_id: str
+    ) -> Optional[RobustnessVector]:
+        """
+        Attempt to load robustness from pre-computed framework API scores.
+        Returns None when no real data is found for any dimension.
+        """
+        try:
+            audit_result = self.audit.audit_from_results(agent_id, model_name)
+            # Only trust it when at least one dimension has real data
+            if audit_result.defaults_used == {"cc", "er", "as", "ih"}:
+                return None
+            r = audit_result.robustness
+            fallback = DEFAULT_ROBUSTNESS.get(
+                model_name,
+                RobustnessVector(cc=0.50, er=0.50, as_=0.45, ih=0.70),
+            )
+            d = audit_result.defaults_used
+            return RobustnessVector(
+                cc  = fallback.cc   if "cc"  in d else r.cc,
+                er  = fallback.er   if "er"  in d else r.er,
+                as_ = fallback.as_  if "as"  in d else r.as_,
+                ih  = fallback.ih   if "ih"  in d else r.ih,
+            )
+        except Exception as e:
+            logger.debug(f"  Pre-computed load failed for {model_name}: {e}")
+            return None
+    def setup(self):
+        """Create LLM agents and register them in the economy."""
+        if self._setup_complete:
+            logger.info("Setup already complete; reusing existing agents.")
+            return
+        # Video demo mode: curated 5-agent scenario showcasing all features
+        if self.config.video_demo:
+            self.config.model_names = [
+                "gpt-5",              # High robustness - will upgrade T1→T2
+                "DeepSeek-v3.1",      # Moderate - stable at T1
+                "o4-mini",            # Will delegate successfully
+                "Phi-4",              # Adversarial - blocked from high tiers
+                "Llama-4-Maverick-17B-128E-Instruct-FP8"  # Low - will experience decay/expiration
+            ]
+            self.config.agent_strategies = {
+                "gpt-5": "growth",                              # Invests in robustness
+                "DeepSeek-v3.1": "conservative",                # Stable, no investment
+                "o4-mini": "opportunistic",                     # Delegates when beneficial
+                "Phi-4": "adversarial",                         # Tries to bypass gates
+                "Llama-4-Maverick-17B-128E-Instruct-FP8": "specialist"  # Focused strategy
+            }
+            if self.config.num_rounds != -1:
+                self.config.num_rounds = 12  # Enough for temporal decay + upgrade
+            self.config.demo_mode = True
+            self.config.circumvention_rate = 0.8  # High adversarial activity
+            self.config.delegation_rate = 0.5     # Show delegation features
+            self.config.decay_rate = 0.02         # Faster decay for demo visibility
+        self._apply_failure_visibility_defaults()
+        if self.config.failure_visibility_mode:
+            logger.info(
+                "Failure visibility mode enabled: self-check retries disabled, "
+                "hard-task bias active, and decay increased."
+            )
+        if self.config.model_names:
+            contestant_configs = [
+                get_model_config(n) for n in self.config.model_names
+                if get_model_config(n).get("tier_assignment") != "jury"
+            ]
+            jury_configs = [
+                get_model_config(n) for n in self.config.model_names
+                if get_model_config(n).get("tier_assignment") == "jury"
+            ]
+        else:
+            contestant_configs = CONTESTANT_MODELS
+            jury_configs = JURY_MODELS
+        # Create jury agents first
+        logger.info("Creating jury agents...")
+        jury_dict = create_llm_agents(jury_configs)
+        self.jury_agents = list(jury_dict.values())
+        if self.jury_agents:
+            logger.info(f"Jury agents: {[a.model_name for a in self.jury_agents]}")
+        else:
+            logger.warning("No jury agents — T2+ tasks use algorithmic-only verification")
+        self.verifier = TaskVerifier(jury_agents=self.jury_agents)
+        # Create contestant agents
+        logger.info("Creating contestant agents...")
+        self.llm_agents = create_llm_agents(contestant_configs)
+        if not self.llm_agents:
+            raise RuntimeError(
+                "No LLM agents could be created. Check that AZURE_API_KEY "
+                "and endpoint env vars are set."
+            )
+        # Resolve live_audit_cache_dir now so it's ready when setup loops begin
+        _cache_dir = self.config.live_audit_cache_dir or str(
+            Path(self.config.output_dir) / "audit_cache"
+        )
+        Path(_cache_dir).mkdir(parents=True, exist_ok=True)
+        # Register each contestant in the economy; run live audit for robustness
+        strategy_cfg = self.config.agent_strategies or {}
+        for model_name, llm_agent in self.llm_agents.items():
+            record = self.economy.register_agent(
+                model_name=model_name,
+                model_config={"model": model_name, "provider": llm_agent.provider},
+            )
+            self.agent_model_map[record.agent_id] = model_name
+            self._token_costs[record.agent_id] = 0.0
+            robustness = self._resolve_initial_robustness(
+                model_name, record.agent_id, llm_agent
+            )
+            self.economy.audit_agent(
+                record.agent_id,
+                robustness,
+                audit_type="registration",
+                observed_architecture_hash=record.architecture_hash,
+                audit_details=self._initial_audit_details.get(model_name),
+            )
+            logger.info(
+                f"Registered {model_name} -> {record.agent_id} "
+                f"at tier {record.current_tier.name}"
+            )
+            # Create AutonomousAgent wrapper for this contestant
+            strategy_name = strategy_cfg.get(model_name, "growth")
+            autonomous = create_autonomous_agent(
+                llm_agent=llm_agent,
+                strategy_name=strategy_name,
+                token_cost_fn=compute_token_cost_eth,
+                self_verify=self.config.self_verify,
+                max_retries=self.config.max_retries,
+            )
+            autonomous.register(
+                agent_id=record.agent_id,
+                initial_balance=self.config.initial_balance,
+            )
+            self.autonomous_agents[model_name] = autonomous
+            logger.info(f"  AutonomousAgent({strategy_name}) registered for {model_name}")
+        logger.info(f"Setup complete: {len(self.llm_agents)} contestants, {len(self.jury_agents)} jury")
+        self._setup_complete = True
+    def run(self) -> list[dict]:
+        """Run all rounds of the live simulation."""
+        if not self._setup_complete:
+            self.setup()
+        round_num = 0
+        infinite = self.config.num_rounds == -1
+        try:
+            while infinite or round_num < self.config.num_rounds:
+                logger.info(f"\n{'='*60}")
+                logger.info(f"ROUND {round_num + 1}/{'inf' if infinite else self.config.num_rounds}")
+                logger.info(f"{'='*60}")
+                # Reactivate any suspended agents before the round starts so
+                # the economy never stalls at 0 active agents.
+                self._reactivate_suspended_agents()
+                round_results = self._run_round(round_num)
+                self._round_summaries.append(round_results)
+                # Apply temporal dynamics and capture high-signal events
+                step_events = self.economy.step()
+                topups = step_events.get("test_eth_topups", [])
+                total_topups = sum(t.get("amount", 0.0) for t in topups)
+                round_results["total_topups"] = total_topups
+                if topups:
+                    self._test_eth_topups_total += total_topups
+                    for topup in topups:
+                        model_name = self.agent_model_map.get(topup["agent_id"], topup["agent_id"])
+                        self._protocol_events.append({
+                            "timestamp": self.economy.current_time,
+                            "type": "TEST_ETH_TOPUP",
+                            "agent": model_name,
+                            "agent_id": topup["agent_id"],
+                            "amount": topup["amount"],
+                            "new_balance": topup["balance"],
+                            "message": (
+                                f"Injected {topup['amount']:.4f} ETH into {model_name} "
+                                f"to keep them above the {self.config.test_eth_top_up_threshold} ETH threshold."
+                            ),
+                        })
+                # Video demo: Force visible tier upgrade at round 5
+                if self.config.video_demo and round_num == 4:  # 0-indexed, so round 5
+                    self._demo_forced_upgrade()
+                # Map economy step events to our protocol event log
+                for aid in step_events.get("agents_demoted", []):
+                    self._protocol_events.append({
+                        "timestamp": self.economy.current_time,
+                        "type": "DEMOTION",
+                        "agent": self.agent_model_map.get(aid, aid),
+                        "message": f"Agent {self.agent_model_map.get(aid, aid)} was DEMOTED due to audit failure."
+                    })
+                for aid in step_events.get("agents_expired", []):
+                    self._protocol_events.append({
+                        "timestamp": self.economy.current_time,
+                        "type": "EXPIRATION",
+                        "agent": self.agent_model_map.get(aid, aid),
+                        "message": f"Certification for {self.agent_model_map.get(aid, aid)} EXPIRED."
+                    })
+                # Log round summary
+                safety = self.economy.aggregate_safety()
+                active = len(self.economy.registry.active_agents)
+                logger.info(
+                    f"Round {round_num + 1} complete | "
+                    f"Safety={safety:.3f} | Active={active} | "
+                    f"Tasks={round_results['tasks_attempted']} | "
+                    f"Passed={round_results['tasks_passed']}"
+                )
+                # Save periodic results for the dashboard
+                self._finalize()
+                self.save_results()
+                round_num += 1
+        except KeyboardInterrupt:
+            logger.info("\nSimulation interrupted by user. Finalizing...")
+        except Exception as e:
+            logger.exception(f"Simulation failed: {e}")
+        self._finalize()
+        self.save_results()
+        return self._results
+    def _demo_forced_upgrade(self):
+        """
+        Video demo: Force a visible tier upgrade to demonstrate Theorem 2.
+        Shows agent investing in robustness → re-audit → tier promotion → higher contracts.
+        """
+        # Find GPT-5 (growth strategy agent)
+        target_model = "gpt-5"
+        target_id = None
+        for aid, model in self.agent_model_map.items():
+            if model == target_model:
+                target_id = aid
+                break
+        if not target_id:
+            return
+        record = self.economy.registry.get_agent(target_id)
+        if not record or record.current_tier.value >= 2:
+            return  # Already at T2+
+        logger.info("")
+        logger.info("⚙️  %s investing in robustness to reach Tier 2...", target_model)
+        logger.info("")
+        old_r = record.current_robustness
+        old_tier = record.current_tier
+        # Simulate robustness improvement
+        new_r = RobustnessVector(
+            cc=min(0.67, old_r.cc + 0.20),
+            er=min(0.72, old_r.er + 0.22),
+            as_=min(0.70, old_r.as_ + 0.15),
+            ih=old_r.ih
+        )
+        logger.info("Running re-audit...")
+        logger.info("  CDCT improved: %.3f → %.3f", old_r.cc, new_r.cc)
+        logger.info("  DDFT improved: %.3f → %.3f", old_r.er, new_r.er)
+        logger.info("  EECT improved: %.3f → %.3f", old_r.as_, new_r.as_)
+        logger.info("")
+        # Upload to 0G Storage (simulated)
+        logger.info("Uploading new audit certificate to 0G Storage...")
+        time.sleep(0.5)
+        simulated_cid = f"0x{hashlib.sha256(f'{target_id}:upgrade:{self.economy.current_time}'.encode()).hexdigest()[:32]}"
+        # Update on-chain
+        self.economy.registry.certify(
+            target_id,
+            new_r,
+            audit_type="upgrade_investment",
+            timestamp=self.economy.current_time,
+            audit_details={
+                "source": "simulated_upgrade",
+                "storage_root_hash": simulated_cid,
+                "storage_root_hash_real": False,
+            },
+        )
+        new_tier = self.economy.registry.get_agent(target_id).current_tier
+        new_cid = self.economy.registry.get_agent(target_id).audit_cid
+        logger.info("  CID: %s", new_cid)
+        logger.info("")
+        logger.info("On-chain certification updated.")
+        logger.info("")
+        if new_tier > old_tier:
+            logger.info("✅ UPGRADE: %s promoted from %s → %s",
+                       target_model, old_tier.name, new_tier.name)
+            logger.info("")
+            logger.info("%s now eligible for Tier %d contracts", target_model, new_tier.value)
+            logger.info("")
+            self._emit_protocol_event(
+                "UPGRADE",
+                target_model,
+                f"{target_model} promoted from {old_tier.name} → {new_tier.name} via robustness investment",
+                old_tier=old_tier.name,
+                new_tier=new_tier.name,
+                investment_type="forced_demo"
+            )
+    def _emit_protocol_event(self, event_type: str, agent: str, message: str, **extra):
+        event = {
+            "timestamp": self.economy.current_time,
+            "type": event_type,
+            "agent": agent,
+            "message": message,
+        }
+        if extra:
+            event.update(extra)
+        self._protocol_events.append(event)
+        # Log to console with appropriate level
+        if event_type in ["BANKRUPTCY", "CIRCUMVENTION_BLOCKED"]:
+            logger.error(f"🚨 {event_type}: {message}")
+        elif event_type in ["DEMOTION", "EXPIRATION", "UPGRADE_DENIED"]:
+            logger.warning(f"⚠️  {event_type}: {message}")
+        elif event_type in ["UPGRADE", "DELEGATION_ALLOWED"]:
+            logger.info(f"✅ {event_type}: {message}")
+        else:
+            logger.info(f"📋 {event_type}: {message}")
+    def _strategy_name(self, autonomous: Optional[AutonomousAgent]) -> str:
+        if autonomous is None:
+            return "unknown"
+        return type(autonomous.strategy).__name__
+    def _maybe_attempt_tier_bypass(self, agent, model_name: str, strategy_name: str):
+        """
+        Demo-only adversarial behavior: try to accept a contract above current tier.
+        Should be blocked by accept_contract() tier checks.
+        """
+        if not self.config.demo_mode:
+            return
+        if strategy_name != "AdversarialStrategy":
+            return
+        if random.random() > self.config.circumvention_rate:
+            return
+        if agent.current_tier >= Tier.T5:
+            return
+        target_tier = Tier(min(Tier.T5.value, agent.current_tier.value + 1))
+        target_tasks = [t for t in ALL_TASKS.values() if t.tier == target_tier]
+        if not target_tasks:
+            return
+        task = random.choice(target_tasks)
+        contract = self.economy.post_contract(
+            objective=f"[bypass-attempt] {task.prompt[:80]}...",
+            constraints=[Constraint(c.name, c.description, c.check) for c in task.constraints],
+            min_tier=task.tier,
+            reward=task.reward,
+            penalty=task.penalty,
+            deadline_offset=25.0,
+            domain=task.domain,
+            difficulty=task.difficulty,
+            issuer_id="bypass_probe",
+        )
+        accepted = self.economy.accept_contract(contract.contract_id, agent.agent_id)
+        if accepted:
+            self._emit_protocol_event(
+                "CRITICAL_BYPASS_ACCEPTED",
+                model_name,
+                f"{model_name} unexpectedly accepted T{task.tier.value} while at {agent.current_tier.name}.",
+                required_tier=task.tier.name,
+                current_tier=agent.current_tier.name,
+                contract_id=contract.contract_id,
+            )
+        else:
+            self._emit_protocol_event(
+                "CIRCUMVENTION_BLOCKED",
+                model_name,
+                f"{model_name} attempted tier bypass to {task.tier.name}; gate blocked acceptance.",
+                required_tier=task.tier.name,
+                current_tier=agent.current_tier.name,
+                contract_id=contract.contract_id,
+            )
+    def _maybe_attempt_architecture_spoof(self, agent, model_name: str, strategy_name: str):
+        """Demo-only: adversarial agent attempts re-certification after a fake self-modification."""
+        if not self.config.demo_mode or strategy_name != "AdversarialStrategy":
+            return
+        if random.random() > (self.config.circumvention_rate * 0.5):
+            return
+        if agent.current_robustness is None:
+            return
+        try:
+            self.economy.audit_agent(
+                agent.agent_id,
+                agent.current_robustness,
+                audit_type="spoofed_self_mod_attempt",
+                observed_architecture_hash="deadbeefdeadbeef",
+            )
+        except Exception:
+            self._emit_protocol_event(
+                "CIRCUMVENTION_BLOCKED",
+                model_name,
+                f"{model_name} attempted certification with modified architecture hash; blocked.",
+                current_tier=agent.current_tier.name,
+                attempt="architecture_spoof",
+            )
+    def _pick_delegate_candidate(self, principal_id: str, required_tier: Tier, adversarial: bool) -> Optional[str]:
+        candidates = [a for a in self.economy.registry.active_agents if a.agent_id != principal_id]
+        if not candidates:
+            return None
+        # Adversarial mode intentionally picks weak candidates (laundering attempt).
+        if adversarial:
+            candidates.sort(key=lambda a: a.current_tier.value)
+            return candidates[0].agent_id
+        qualified = [a for a in candidates if a.current_tier >= required_tier]
+        if not qualified:
+            return None
+        return random.choice(qualified).agent_id
+    def _maybe_bias_task_for_failures(
+        self,
+        planned_task: Optional[Task],
+        available_tasks: list[Task],
+        strategy_name: str,
+    ) -> Optional[Task]:
+        """Bias selection toward harder accessible tasks for live demo visibility."""
+        if not self.config.failure_visibility_mode or not available_tasks:
+            return planned_task
+        bias = self.config.failure_task_bias
+        if strategy_name == "growth":
+            bias *= 0.45
+        elif strategy_name == "conservative":
+            bias *= 0.65
+        elif strategy_name not in {"opportunistic", "specialist", "adversarial"}:
+            bias *= 0.80
+        bias = max(0.0, min(1.0, bias))
+        if planned_task is not None and random.random() > bias:
+            return planned_task
+        ranked = sorted(
+            available_tasks,
+            key=lambda task: (
+                task.tier.value,
+                task.difficulty,
+                len(task.constraints),
+                1 if task.jury_rubric else 0,
+                task.penalty,
+            ),
+            reverse=True,
+        )
+        top_candidates = ranked[: min(3, len(ranked))]
+        if not top_candidates:
+            return planned_task
+        return random.choice(top_candidates)
+    def _reactivate_suspended_agents(self):
+        """
+        Ensure no agent is permanently stuck in SUSPENDED state.
+        Called at the start of every round. For each suspended agent:
+        - Top up balance to at least test_eth_top_up_amount (or 1.0 ETH fallback)
+        - Re-certify with their last known robustness so status flips to ACTIVE
+        This prevents the economy from halting at 0 active agents.
+        """
+        top_up = max(
+            self.config.test_eth_top_up_amount,
+            self.config.test_eth_top_up_threshold or 1.0,
+        )
+        for agent in self.economy.registry.agents.values():
+            if agent.status != AgentStatus.SUSPENDED:
+                continue
+            agent.balance = max(agent.balance, top_up)
+            agent.total_topups += max(0.0, top_up - agent.balance)
+            # Re-certify with last known robustness to flip status back to ACTIVE.
+            # certify() sets status=ACTIVE as long as ih >= ih_threshold.
+            r = agent.current_robustness
+            if r is None:
+                # No certification at all — use the model default.
+                model_name = self.agent_model_map.get(agent.agent_id, "")
+                r = DEFAULT_ROBUSTNESS.get(
+                    model_name,
+                    RobustnessVector(cc=0.50, er=0.50, as_=0.45, ih=0.70),
+                )
+            # Clamp ih so it clears the gate threshold.
+            ih_floor = self.economy.config.ih_threshold + 0.01
+            if r.ih < ih_floor:
+                r = RobustnessVector(cc=r.cc, er=r.er, as_=r.as_, ih=ih_floor)
+            self.economy.registry.certify(
+                agent.agent_id,
+                r,
+                audit_type="reactivation",
+                timestamp=self.economy.current_time,
+            )
+            model_name = self.agent_model_map.get(agent.agent_id, agent.agent_id)
+            logger.info(f"  Reactivated suspended agent {model_name} (balance={agent.balance:.4f} ETH)")
+            self._emit_protocol_event(
+                "TEST_ETH_TOPUP",
+                model_name,
+                f"Reactivated {model_name}: topped up to {agent.balance:.4f} ETH and re-certified.",
+            )
+    def _run_round(self, round_num: int) -> dict:
+        """Execute one round: each active agent attempts one task."""
+        round_data = {
+            "round": round_num,
+            "tasks_attempted": 0,
+            "tasks_passed": 0,
+            "tasks_failed": 0,
+            "total_reward": 0.0,
+            "total_penalty": 0.0,
+            "total_token_cost": 0.0,
+            "total_topups": 0.0,
+            "task_results": [],
+        }
+        for agent in self.economy.registry.active_agents:
+            model_name = self.agent_model_map.get(agent.agent_id)
+            if not model_name or model_name not in self.llm_agents:
+                continue
+            autonomous = self.autonomous_agents.get(model_name)
+            strategy_name = self._strategy_name(autonomous)
+            tier = agent.current_tier
+            # Demo adversary behavior: try bypassing tier gate directly.
+            self._maybe_attempt_tier_bypass(agent, model_name, strategy_name)
+            self._maybe_attempt_architecture_spoof(agent, model_name, strategy_name)
+            # Build agent state and use planning layer to select a task
+            available_tasks = get_tasks_for_tier(tier)
+            if not available_tasks:
+                continue
+            if autonomous is not None:
+                state = autonomous.build_state(agent, self.economy.gate)
+                task = autonomous.plan_task(available_tasks, state)
+            else:
+                # Fallback: random selection (no AutonomousAgent registered)
+                task = random.choice(available_tasks)
+            task = self._maybe_bias_task_for_failures(task, available_tasks, strategy_name)
+            if task is None:
+                # Video demo should always show economic activity; if planning
+                # idles, force a task attempt to keep trade flow visible.
+                if (self.config.video_demo or self.config.failure_visibility_mode) and available_tasks:
+                    task = self._maybe_bias_task_for_failures(None, available_tasks, strategy_name)
+                    if task is None:
+                        task = random.choice(available_tasks)
+                    logger.debug(f"{model_name}: forcing visible task {task.task_id} after idle plan")
+                else:
+                    logger.debug(f"{model_name}: planning layer chose idle this round")
+                    continue
+            # Post contract in the economy
+            contract = self.economy.post_contract(
+                objective=task.prompt[:100] + "...",
+                constraints=[
+                    Constraint(c.name, c.description, c.check)
+                    for c in task.constraints
+                ],
+                min_tier=task.tier,
+                reward=task.reward,
+                penalty=task.penalty,
+                deadline_offset=100.0,
+                domain=task.domain,
+                difficulty=task.difficulty,
+            )
+            # Accept contract
+            accepted = self.economy.accept_contract(contract.contract_id, agent.agent_id)
+            if not accepted:
+                logger.debug(f"{model_name}: Could not accept {task.task_id} (tier/budget)")
+                continue
+            round_data["tasks_attempted"] += 1
+            liability_agent_id = agent.agent_id
+            execution_agent_id = agent.agent_id
+            execution_model_name = model_name
+            delegation_info = None
+            # Demo delegation behavior: principal may "hire" another agent to execute.
+            if self.config.demo_mode and random.random() <= self.config.delegation_rate:
+                delegate_id = self._pick_delegate_candidate(
+                    principal_id=agent.agent_id,
+                    required_tier=task.tier,
+                    adversarial=(strategy_name == "AdversarialStrategy"),
+                )
+                if delegate_id:
+                    delegate_model = self.agent_model_map.get(delegate_id, delegate_id)
+                    check = self.economy.can_delegate(agent.agent_id, delegate_id, task.tier)
+                    self.economy.record_delegation(
+                        contract.contract_id,
+                        principal_id=agent.agent_id,
+                        delegate_id=delegate_id,
+                        required_tier=task.tier,
+                        allowed=check["allowed"],
+                        reason=check["reason"],
+                    )
+                    delegation_info = {
+                        "principal_agent_id": agent.agent_id,
+                        "principal_model": model_name,
+                        "delegate_agent_id": delegate_id,
+                        "delegate_model": delegate_model,
+                        **check,
+                    }
+                    if check["allowed"]:
+                        execution_agent_id = delegate_id
+                        execution_model_name = delegate_model
+                        liability_agent_id = agent.agent_id  # principal remains liable
+                        self._emit_protocol_event(
+                            "DELEGATION_ALLOWED",
+                            model_name,
+                            f"{model_name} hired {delegate_model} for {task.task_id}; principal retains liability.",
+                            contract_id=contract.contract_id,
+                            delegate=delegate_model,
+                            required_tier=task.tier.name,
+                            chain_tier=check["chain_tier"],
+                        )
+                    else:
+                        self._emit_protocol_event(
+                            "CIRCUMVENTION_BLOCKED",
+                            model_name,
+                            f"{model_name} attempted delegation/laundering via {delegate_model}; blocked ({check['reason']}).",
+                            contract_id=contract.contract_id,
+                            delegate=delegate_model,
+                            required_tier=task.tier.name,
+                            principal_tier=check.get("principal_tier"),
+                            delegate_tier=check.get("delegate_tier"),
+                            chain_tier=check.get("chain_tier"),
+                        )
+            # Execute task — delegate to AutonomousAgent (self-verify + retry)
+            logger.info(
+                f"  {model_name} executing {task.task_id} (T{task.tier.value})"
+                f"{' via ' + execution_model_name if execution_model_name != model_name else ''}..."
+            )
+            execution_autonomous = self.autonomous_agents.get(execution_model_name)
+            if execution_autonomous is not None:
+                try:
+                    exec_result = execution_autonomous.execute_task(task)
+                    output = exec_result.output
+                    token_cost = exec_result.token_cost_eth
+                    latency = exec_result.latency_ms
+                    tokens_in = exec_result.token_usage.get("input", 0)
+                    tokens_out = exec_result.token_usage.get("output", 0)
+                    if exec_result.self_check_failures:
+                        logger.debug(
+                            f"    Self-check caught {exec_result.self_check_failures}; "
+                            f"retries={exec_result.retries_used}"
+                        )
+                except Exception as e:
+                    logger.error(f"  {execution_model_name} AutonomousAgent.execute_task FAILED: {e}")
+                    output = ""
+                    token_cost = 0.0
+                    latency = 0.0
+                    tokens_in = tokens_out = 0
+            else:
+                llm_agent = self.llm_agents[execution_model_name]
+                tok_in_before = llm_agent.total_input_tokens
+                tok_out_before = llm_agent.total_output_tokens
+                start_time = time.time()
+                try:
+                    output = llm_agent.execute_task(task.prompt, task.system_prompt)
+                    latency = (time.time() - start_time) * 1000
+                except Exception as e:
+                    logger.error(f"  {execution_model_name} FAILED to execute: {e}")
+                    output = ""
+                    latency = (time.time() - start_time) * 1000
+                tokens_in  = llm_agent.total_input_tokens  - tok_in_before
+                tokens_out = llm_agent.total_output_tokens - tok_out_before
+                token_cost = compute_token_cost_eth(execution_model_name, tokens_in, tokens_out)
+            # Cost accounting: deduct token costs from agent balance
+            agent.balance    -= token_cost
+            agent.total_spent += token_cost
+            self._token_costs[agent.agent_id] = (
+                self._token_costs.get(agent.agent_id, 0.0) + token_cost
+            )
+            round_data["total_token_cost"] += token_cost
+            # Verify output
+            verification = self.verifier.verify(
+                task=task,
+                output=output,
+                agent_model=execution_model_name,
+                latency_ms=latency,
+            )
+            # Real-time robustness update based on constraint outcomes
+            new_robustness = None
+            if agent.current_robustness is not None:
+                new_robustness = update_robustness_from_verification(
+                    agent.current_robustness, task, verification,
+                )
+                candidate_tier = self.economy.gate.evaluate(new_robustness)
+                if candidate_tier > tier:
+                    upgrade = self.economy.request_tier_upgrade(
+                        agent.agent_id,
+                        requested_tier=candidate_tier,
+                        audit_callback=lambda _aid, _tier, r=new_robustness: r,
+                    )
+                    if upgrade.get("granted"):
+                        self._emit_protocol_event(
+                            "UPGRADE",
+                            model_name,
+                            f"{model_name} upgraded to {candidate_tier.name} via scaling-gate audit.",
+                            requested_tier=candidate_tier.name,
+                            path=upgrade.get("path"),
+                        )
+                    else:
+                        # Persist robustness updates even when higher-tier request fails.
+                        self.economy.registry.certify(
+                            agent.agent_id,
+                            new_robustness,
+                            audit_type="task_update",
+                            timestamp=self.economy.current_time,
+                        )
+                        self._emit_protocol_event(
+                            "UPGRADE_DENIED",
+                            model_name,
+                            f"{model_name} tier request to {candidate_tier.name} denied ({upgrade.get('reason')}).",
+                            requested_tier=candidate_tier.name,
+                            reason=upgrade.get("reason"),
+                            gaps=upgrade.get("gaps"),
+                        )
+                else:
+                    self.economy.registry.certify(
+                        agent.agent_id,
+                        new_robustness,
+                        audit_type="task_update",
+                        timestamp=self.economy.current_time,
+                    )
+            # Let AutonomousAgent update its internal perception + accounting
+            if autonomous is not None:
+                autonomous.update_state(task, verification, token_cost)
+            # Settle contract based on verification
+            settlement = self.economy.complete_contract(
+                contract.contract_id,
+                output,
+                verification_override=verification.overall_pass,
+                liability_agent_id=liability_agent_id,
+            )
+            # Log result
+            cid = f"0x{hashlib.sha256(str(task.task_id).encode()).hexdigest()[:32]}"
+            task_result = {
+                "agent": model_name,
+                "agent_id": agent.agent_id,
+                "executed_by_agent_id": execution_agent_id,
+                "executed_by_model": execution_model_name,
+                "task_id": task.task_id,
+                "tier": task.tier.name,
+                "domain": task.domain,
+                "proof_cid": cid,
+                "verification": verification.to_dict(),
+                "settlement": settlement,
+                "latency_ms": latency,
+                "token_cost_eth": token_cost,
+                "tokens_used": {"input": tokens_in, "output": tokens_out},
+                "output_preview": output[:200] if output else "(empty)",
+            }
+            if autonomous is not None:
+                task_result["agent_strategy"] = type(autonomous.strategy).__name__
+            if delegation_info is not None:
+                task_result["delegation"] = delegation_info
+            round_data["task_results"].append(task_result)
+            self._results.append(task_result)
+            if verification.overall_pass:
+                round_data["tasks_passed"] += 1
+                round_data["total_reward"] += task.reward
+                status_str = "PASS"
+            else:
+                round_data["tasks_failed"] += 1
+                round_data["total_penalty"] += task.penalty
+                status_str = "FAIL"
+            jury_str = f"{verification.jury_score:.2f}" if verification.jury_score is not None else "N/A"
+            logger.info(
+                f"  {model_name}: {task.task_id} -> {status_str} "
+                f"(algo={'PASS' if verification.algorithmic_pass else 'FAIL'}, "
+                f"jury={jury_str}, cost={token_cost:.4f} ETH) "
+                f"[{latency:.0f}ms]"
+            )
+            if verification.constraints_failed:
+                logger.info(f"    Failed constraints: {verification.constraints_failed}")
+        return round_data
+    def _finalize(self):
+        """Compute final summary statistics."""
+        agents_data = []
+        for agent_id, model_name in self.agent_model_map.items():
+            record = self.economy.registry.get_agent(agent_id)
+            if not record:
+                continue
+            llm = self.llm_agents.get(model_name)
+            usage = llm.usage_summary() if llm else {}
+            aq = self._audit_quality.get(model_name, {
+                "source": "unknown",
+                "dims_real": [],
+                "dims_defaulted": ["cc", "er", "as", "ih"],
+            })
+            autonomous = self.autonomous_agents.get(model_name)
+            strategy_name = "unknown"
+            if self.config.agent_strategies:
+                strategy_name = self.config.agent_strategies.get(model_name, strategy_name)
+            if strategy_name == "unknown" and autonomous is not None:
+                class_name = type(autonomous.strategy).__name__
+                strategy_name = class_name[:-8].lower() if class_name.endswith("Strategy") else class_name.lower()
+            agents_data.append({
+                "model_name": model_name,
+                "agent_id": agent_id,
+                "tier": record.current_tier.value,
+                "tier_name": record.current_tier.name,
+                "balance": record.balance,
+                "total_earned": record.total_earned,
+                "total_penalties": record.total_penalties,
+                "total_spent": record.total_spent,
+                "token_cost_eth": self._token_costs.get(agent_id, 0.0),
+                "net_profit": record.total_earned - record.total_penalties - record.total_spent,
+                "contracts_completed": record.contracts_completed,
+                "contracts_failed": record.contracts_failed,
+                "success_rate": (
+                    record.contracts_completed / max(1, record.contracts_completed + record.contracts_failed)
+                ),
+                "robustness": {
+                    "cc": record.current_robustness.cc,
+                    "er": record.current_robustness.er,
+                    "as": record.current_robustness.as_,
+                    "ih": record.current_robustness.ih,
+                } if record.current_robustness else None,
+                # Audit data provenance — critical for paper claims
+                "audit_data_source": aq["source"],
+                "audit_dims_real": aq["dims_real"],
+                "audit_dims_defaulted": aq["dims_defaulted"],
+                "llm_usage": usage,
+                "strategy": strategy_name,
+                # v2 AutonomousAgent metrics
+                "autonomous_metrics": autonomous.metrics_summary() if autonomous else None,
+            })
+        # Gini coefficient of balances
+        balances = sorted([a["balance"] for a in agents_data])
+        gini = self._compute_gini(balances)
+        # Tier distribution
+        tier_dist = self.economy.registry.tier_distribution()
+        # Per-round trajectory
+        safety_trajectory = []
+        for snap in self.economy.snapshots:
+            safety_trajectory.append({
+                "time": snap.timestamp,
+                "safety": snap.aggregate_safety,
+                "active_agents": snap.num_agents,
+                "total_balance": snap.total_balance,
+            })
+        # Verification stats
+        v_summary = self.verifier.summary() if self.verifier else {}
+        # Total token costs
+        total_token_cost = sum(self._token_costs.values())
+        event_counts = {}
+        for e in self._protocol_events:
+            t = e.get("type", "UNKNOWN")
+            event_counts[t] = event_counts.get(t, 0) + 1
+        delegation_attempts = sum(1 for r in self._results if r.get("delegation") is not None)
+        delegation_allowed = sum(
+            1 for r in self._results
+            if (r.get("delegation") or {}).get("allowed") is True
+        )
+        circumvention_blocked = event_counts.get("CIRCUMVENTION_BLOCKED", 0)
+        # Data quality audit — list agents with unverified robustness dimensions
+        unaudited_agents = [
+            {
+                "model_name": a["model_name"],
+                "audit_source": a["audit_data_source"],
+                "dims_defaulted": a["audit_dims_defaulted"],
+                "tier_name": a["tier_name"],
+            }
+            for a in agents_data
+            if a["audit_dims_defaulted"]
+        ]
+        self._final_summary = {
+            "economy": {
+                "aggregate_safety": self.economy.aggregate_safety(),
+                "total_rewards_paid": sum(r["total_reward"] for r in self._round_summaries),
+                "total_penalties_collected": sum(r["total_penalty"] for r in self._round_summaries),
+                "total_token_cost_eth": total_token_cost,
+                "usd_to_eth_rate": USD_TO_ETH,
+                "gini_coefficient": gini,
+                "num_rounds": self.config.num_rounds,
+                "num_agents": len(agents_data),
+                "active_agents": len(self.economy.registry.active_agents),
+                "test_eth_topups_total": self._test_eth_topups_total,
+            },
+            "demo_highlights": {
+                "protocol_event_counts": event_counts,
+                "delegation_attempts": delegation_attempts,
+                "delegation_allowed": delegation_allowed,
+                "delegation_blocked": max(0, delegation_attempts - delegation_allowed),
+                "circumvention_blocked": circumvention_blocked,
+            },
+            "tier_distribution": {t.name: c for t, c in tier_dist.items()},
+            "verification": v_summary,
+            "agents": sorted(agents_data, key=lambda a: a["balance"], reverse=True),
+            "safety_trajectory": safety_trajectory,
+            # ---------------------------------------------------------------
+            # Paper note: agents listed here have one or more robustness
+            # dimensions drawn from DEFAULT_ROBUSTNESS rather than verified
+            # framework results.  Their tier assignments are estimates, not
+            # certified values.  They should be reported separately from
+            # fully-audited agents in any empirical claim about CGAE gating.
+            # ---------------------------------------------------------------
+            "data_quality_warnings": {
+                "num_partially_or_fully_defaulted": len(unaudited_agents),
+                "unaudited_agents": unaudited_agents,
+            },
+        }
+    @staticmethod
+    def _compute_gini(values: list[float]) -> float:
+        """Compute Gini coefficient for a sorted list of values."""
+        n = len(values)
+        if n == 0:
+            return 0.0
+        total = sum(values)
+        if total == 0:
+            return 0.0
+        cumulative = 0.0
+        weighted_sum = 0.0
+        for i, v in enumerate(values):
+            cumulative += v
+            weighted_sum += (2 * (i + 1) - n - 1) * v
+        return weighted_sum / (n * total)
+    def save_results(self, path: Optional[str] = None):
+        """Save all results to disk."""
+        output_dir = Path(path or self.config.output_dir)
+        output_dir.mkdir(parents=True, exist_ok=True)
+        # Economy state
+        self.economy.export_state(str(output_dir / "economy_state.json"))
+        # Full task results
+        (output_dir / "task_results.json").write_text(
+            json.dumps(self._results, indent=2, default=str)
+        )
+        # Round summaries
+        (output_dir / "round_summaries.json").write_text(
+            json.dumps(self._round_summaries, indent=2, default=str)
+        )
+        # Protocol events for high-signal dashboard alerts
+        (output_dir / "protocol_events.json").write_text(
+            json.dumps(self._protocol_events, indent=2, default=str)
+        )
+        # Final summary
+        if self._final_summary:
+            (output_dir / "final_summary.json").write_text(
+                json.dumps(self._final_summary, indent=2, default=str)
+            )
+        # Verification summary
+        if self.verifier:
+            (output_dir / "verification_summary.json").write_text(
+                json.dumps(self.verifier.summary(), indent=2)
+            )
+        # Per-agent details
+        agent_details = {}
+        for agent_id, model_name in self.agent_model_map.items():
+            record = self.economy.registry.get_agent(agent_id)
+            if record:
+                llm = self.llm_agents.get(model_name)
+                agent_details[model_name] = {
+                    **record.to_dict(),
+                    "llm_usage": llm.usage_summary() if llm else {},
+                    "token_cost_eth": self._token_costs.get(agent_id, 0.0),
+                }
+        (output_dir / "agent_details.json").write_text(
+            json.dumps(agent_details, indent=2, default=str)
+        )
+        # Verification log
+        if self.verifier:
+            log_data = [v.to_dict() for v in self.verifier.verification_log]
+            (output_dir / "verification_log.json").write_text(
+                json.dumps(log_data, indent=2, default=str)
+            )
+        logger.info(f"Results saved to {output_dir}")
+def main():
+    """Entry point for running the live simulation."""
+    parser = argparse.ArgumentParser(description="Run the CGAE live economy simulation.")
+    parser.add_argument("--live", action="store_true", help="Run in infinite loop mode for dashboard.")
+    parser.add_argument("--rounds", type=int, default=10, help="Number of rounds (ignored if --live is set).")
+    parser.add_argument("--video-demo", action="store_true", help="Run curated 5-min video demo (3 agents, adversarial blocking).")
+    parser.add_argument(
+        "--show-failures",
+        action="store_true",
+        help="Bias live execution toward harder tasks and disable self-check retries.",
+    )
+    args = parser.parse_args()
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s [%(levelname)s] %(message)s",
+    )
+    # Check env vars
+    required_vars = ["AZURE_API_KEY"]
+    optional_vars = ["AZURE_OPENAI_API_ENDPOINT", "DDFT_MODELS_ENDPOINT"]
+    missing = [v for v in required_vars if not os.environ.get(v)]
+    if missing:
+        print(f"ERROR: Missing required environment variables: {missing}")
+        print(f"Optional (for more models): {optional_vars}")
+        print("\nSet them with:")
+        print("  export AZURE_API_KEY=your-key")
+        print("  export AZURE_OPENAI_API_ENDPOINT=https://your-endpoint.openai.azure.com/")
+        print("  export DDFT_MODELS_ENDPOINT=https://your-foundry-endpoint/v1")
+        return
+    available = [v for v in optional_vars if os.environ.get(v)]
+    print(f"Endpoints available: {available}")
+    # Framework API URLs are read from CDCT_API_URL / DDFT_API_URL / EECT_API_URL
+    # env vars by the clients.  Override here if needed.
+    config = LiveSimConfig(
+        num_rounds=-1 if args.live else args.rounds,
+        seed=42,
+        video_demo=args.video_demo,
+        failure_visibility_mode=args.show_failures,
+    )
+    runner = LiveSimulationRunner(config)
+    results = runner.run()
+    runner.save_results()
+    # Print summary
+    print("\n" + "=" * 60)
+    print("CGAE LIVE ECONOMY - RESULTS")
+    print("=" * 60)
+    if runner._final_summary:
+        econ = runner._final_summary["economy"]
+        print(f"\nRounds: {econ['num_rounds']}")
+        print(f"Agents: {econ['num_agents']} ({econ['active_agents']} active)")
+        print(f"Aggregate safety: {econ['aggregate_safety']:.4f}")
+        print(f"Gini coefficient: {econ['gini_coefficient']:.4f}")
+        print(f"Total rewards: {econ['total_rewards_paid']:.4f} ETH")
+        print(f"Total penalties: {econ['total_penalties_collected']:.4f} ETH")
+        print(f"Total token costs: {econ['total_token_cost_eth']:.4f} ETH")
+        highlights = runner._final_summary.get("demo_highlights", {})
+        if highlights:
+            print("\nDemo highlights:")
+            print(f"  Circumvention blocked: {highlights.get('circumvention_blocked', 0)}")
+            print(
+                f"  Delegation attempts: {highlights.get('delegation_attempts', 0)} "
+                f"(allowed={highlights.get('delegation_allowed', 0)}, "
+                f"blocked={highlights.get('delegation_blocked', 0)})"
+            )
+    if runner.verifier:
+        vs = runner.verifier.summary()
+        print(f"\nVerification: {vs.get('total', 0)} tasks")
+        print(f"  Algorithmic pass rate: {vs.get('algorithmic_pass_rate', 0):.1%}")
+        if vs.get("jury_pass_rate") is not None:
+            print(f"  Jury pass rate: {vs['jury_pass_rate']:.1%}")
+        print(f"  Overall pass rate: {vs.get('overall_pass_rate', 0):.1%}")
+        if vs.get("avg_jury_score") is not None:
+            print(f"  Avg jury score: {vs['avg_jury_score']:.3f}")
+    print("\n--- Agent Leaderboard ---")
+    print(f"  {'Model':40s}  {'Tier':3s}  {'Bal':>8}  {'Earned':>8}  "
+          f"{'Pen':>7}  {'Cost':>7}  W/L    CC    ER    AS   AuditSrc")
+    if runner._final_summary:
+        for a in runner._final_summary["agents"]:
+            r = a.get("robustness") or {}
+            # Show a short audit source tag; highlight defaulted dimensions
+            src = a.get("audit_data_source", "?")
+            defaulted = a.get("audit_dims_defaulted", [])
+            src_tag = src if not defaulted else f"{src}[def:{','.join(defaulted)}]"
+            print(
+                f"  {a['model_name']:40s} | {a['tier_name']:3s} | "
+                f"bal={a['balance']:8.4f} | earned={a['total_earned']:8.4f} | "
+                f"pen={a['total_penalties']:7.4f} | cost={a['token_cost_eth']:7.4f} | "
+                f"W/L={a['contracts_completed']}/{a['contracts_failed']} | "
+                f"CC={r.get('cc', 0):.2f} ER={r.get('er', 0):.2f} AS={r.get('as', 0):.2f} | "
+                f"{src_tag}"
+            )
+        dqw = runner._final_summary.get("data_quality_warnings", {})
+        if dqw.get("num_partially_or_fully_defaulted", 0) > 0:
+            print(f"\n  *** DATA QUALITY NOTE ***")
+            print(f"  {dqw['num_partially_or_fully_defaulted']} agent(s) used assumed (not verified) "
+                  f"robustness for one or more dimensions.")
+            print(f"  These agents' tier assignments are estimates. See 'data_quality_warnings' "
+                  f"in final_summary.json for details.")
+    print("\n" + "=" * 60)
+if __name__ == "__main__":
+    main()