Spaces:

rb512
/

cgae-server

Paused

rb125

final cleanup

cfe97b2 7 days ago

20 kB

	#!/usr/bin/env python3
	"""
	Video Demo Script for CGAE (ETH / 0G Chain)

	Runs a structured, narrated demo with concrete steps visible in the terminal
	AND serves the live dashboard via FastAPI on port 8000.

	Steps:
	1. Agent Registration - 5 agents with different strategies
	2. Live Robustness Audits - CDCT/DDFT/AGT against real endpoints
	3. Weakest-Link Gate - tier assignment based on min(CC, ER, AS)
	4. Economy Rounds - agents transact, earn/lose ETH
	5. Protocol Events - upgrades, demotions, circumvention blocks
	6. Audit Certificate Verification - Merkle root hash on 0G Storage
	7. Final Leaderboard - theorem validation

	Usage:
	python scripts/video_demo.py # default
	python scripts/video_demo.py --rounds 20 # more rounds
	python scripts/video_demo.py --skip-audit # skip live audit (use defaults)

	Open http://localhost:3000 for the dashboard.
	"""

	import argparse
	import logging
	import sys
	import time
	import threading
	from pathlib import Path

	sys.path.insert(0, str(Path(__file__).parent.parent))

	logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
	logger = logging.getLogger(__name__)


	def section(title: str):
	print(f"\n{'='*60}")
	print(f" {title}")
	print(f"{'='*60}\n")
	time.sleep(0.2)


	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument("--rounds", type=int, default=2)
	parser.add_argument("--port", type=int, default=8000)
	parser.add_argument("--skip-audit", action="store_true")
	args = parser.parse_args()

	from dotenv import load_dotenv
	load_dotenv(Path(__file__).resolve().parents[1] / ".env", override=True)

	import server.api as api
	from server.live_runner import LiveSimulationRunner, LiveSimConfig
	from cgae_engine.gate import RobustnessVector

	AGENTS = {
	"gpt-5.4": "growth",
	"DeepSeek-V3.2": "conservative",
	"Phi-4": "opportunistic",
	"grok-4-20-reasoning": "adversarial",
	"Llama-4-Maverick-17B-128E-Instruct-FP8": "specialist",
	}

	config = LiveSimConfig(
	video_demo=False,
	num_rounds=args.rounds,
	initial_balance=1.0,
	seed=42,
	run_live_audit=True,
	self_verify=True,
	max_retries=1,
	model_names=list(AGENTS.keys()),
	failure_visibility_mode=True,
	failure_task_bias=0.75,
	test_eth_top_up_threshold=0.05,
	test_eth_top_up_amount=0.3,
	agent_strategies=AGENTS,
	)

	runner = LiveSimulationRunner(config)

	# ---- On-chain setup ----
	from cgae_engine.onchain import OnChainBridge
	chain = OnChainBridge()

	# ---- Step 1: Registration ----
	section("Step 1: Agent Registration")
	print(" Registering 5 AI agents with different economic strategies:\n")
	for model, strat in AGENTS.items():
	print(f" {model:45s} -> {strat}")
	time.sleep(0.3)
	print()
	time.sleep(0.2)

	with api._state_lock:
	api._state["status"] = "setup"
	api._state["total_rounds"] = args.rounds

	# ---- Step 2: Live Audits ----
	section("Step 2: Live Robustness Audits")
	print(" Querying CDCT, DDFT, and AGT framework APIs for each model...")
	print(" This produces verified CC, ER, AS, IH scores.\n")
	time.sleep(1)

	runner.setup()

	# Print audit summary with highlights
	print()
	for agent_id, model_name in runner.agent_model_map.items():
	record = runner.economy.registry.get_agent(agent_id)
	if not record:
	continue
	r = record.current_robustness
	wallet = record.wallet_address or "n/a"
	ens = runner.economy.ens_manager.get_agent_name(agent_id) if runner.economy.ens_manager else "n/a"
	cid = record.audit_cid or "n/a"
	tier = record.current_tier.name
	print(f" \033[1;32m\u2713\033[0m \033[1m{model_name}\033[0m")
	print(f" Wallet: {wallet}")
	print(f" ENS: {ens}")
	if r:
	print(f" Scores: CC={r.cc:.3f} ER={r.er:.3f} AS={r.as_:.3f} IH={r.ih:.3f} \033[1;33m-> {tier}\033[0m")
	if cid != "n/a":
	print(f" 0G Hash: {cid[:32]}...")
	print()
	time.sleep(0.2)

	time.sleep(0.2)

	# ---- Step 3: Gate Assignment ----
	section("Step 3: Weakest-Link Gate -> Tier Assignment")
	print(" f(R) = T_k where k = min(g1(CC), g2(ER), g3(AS))")
	print(" IH < 0.45 triggers mandatory T0 (re-audit required)\n")

	rows = []
	for agent_id, model_name in runner.agent_model_map.items():
	record = runner.economy.registry.get_agent(agent_id)
	if not record or not record.current_robustness:
	continue
	r = record.current_robustness
	rows.append((model_name, f"{r.cc:.2f}", f"{r.er:.2f}", f"{r.as_:.2f}", f"{r.ih:.2f}",
	record.current_tier.name))

	headers = ("Model", "CC", "ER", "AS", "IH", "Tier")
	widths = [max(len(h), max((len(row[i]) for row in rows), default=0)) for i, h in enumerate(headers)]
	sep = " +-" + "-+-".join("-" * w for w in widths) + "-+"
	fmt = " \| " + " \| ".join(f"{{:<{w}}}" for w in widths) + " \|"
	print(sep)
	print(fmt.format(*headers))
	print(sep)
	for row in rows:
	print(fmt.format(*row))
	print(sep)
	print()
	time.sleep(1)

	# ---- Step 4: Economy Rounds ----
	section(f"Step 4: Running {args.rounds} Economy Rounds")

	logging.getLogger("cgae_engine.llm_agent").setLevel(logging.WARNING)
	logging.getLogger("server.live_runner").setLevel(logging.WARNING)

	with api._state_lock:
	api._state["status"] = "running"

	# Patch event emitter to push to API
	orig_emit = runner._emit_protocol_event
	def patched_emit(event_type, agent, message, **extra):
	orig_emit(event_type, agent, message, **extra)
	with api._state_lock:
	api._state["events"].append({
	"timestamp": runner.economy.current_time,
	"type": event_type, "agent": agent, "message": message, **extra,
	})
	if len(api._state["events"]) > 1000:
	api._state["events"] = api._state["events"][-500:]
	runner._emit_protocol_event = patched_emit

	# ---------------------------------------------------------------------------
	# Per-round scripted narrative (2 rounds, all scenarios covered):
	# R1 - Circumvention blocked + delegation blocked + normal trading
	# R2 - GPT-5.4 upgrade + grok demotion (spot audit) + normal trading
	# ---------------------------------------------------------------------------

	# Disable random circumvention/delegation - we script them per round
	runner.config.circumvention_rate = 0.0
	runner.config.delegation_rate = 0.0

	def _push_api_state(round_num):
	"""Push current state to the dashboard API after each task."""
	safety = runner.economy.aggregate_safety()
	agents_snap = {}
	for aid, mname in runner.agent_model_map.items():
	rec = runner.economy.registry.get_agent(aid)
	if not rec:
	continue
	rv = rec.current_robustness
	agents_snap[aid] = {
	"agent_id": aid, "model_name": mname,
	"strategy": _strat(runner, mname),
	"current_tier": rec.current_tier.value,
	"balance": rec.balance, "total_earned": rec.total_earned,
	"total_penalties": rec.total_penalties,
	"contracts_completed": rec.contracts_completed,
	"contracts_failed": rec.contracts_failed,
	"status": rec.status.value,
	"wallet_address": rec.wallet_address,
	"ens_name": runner.economy.ens_manager.get_agent_name(aid) if runner.economy.ens_manager else None,
	"robustness": {"cc":rv.cc,"er":rv.er,"as_":rv.as_,"ih":rv.ih} if rv else None,
	}
	trades = [{
	"round": tr.get("_round", round_num), "agent": tr["agent"],
	"task_id": tr["task_id"], "task_prompt": tr.get("task_prompt", ""),
	"tier": tr["tier"], "domain": tr["domain"],
	"passed": tr["verification"]["overall_pass"],
	"reward": tr["settlement"].get("reward", 0) if tr["settlement"] else 0,
	"penalty": tr["settlement"].get("penalty", 0) if tr["settlement"] else 0,
	"token_cost": tr.get("token_cost_eth", 0),
	"latency_ms": tr.get("latency_ms", 0),
	"output_preview": tr.get("output_preview", ""),
	"constraints_passed": tr["verification"].get("constraints_passed", []),
	"constraints_failed": tr["verification"].get("constraints_failed", []),
	} for tr in runner._results]

	with api._state_lock:
	api._state["round"] = round_num + 1
	api._state["economy"] = {
	"aggregate_safety": safety,
	"active_agents": len(runner.economy.registry.active_agents),
	"total_balance": sum(a["balance"] for a in agents_snap.values()),
	"total_earned": sum(a["total_earned"] for a in agents_snap.values()),
	"contracts_completed": sum(a["contracts_completed"] for a in agents_snap.values()),
	"contracts_failed": sum(a["contracts_failed"] for a in agents_snap.values()),
	}
	api._state["agents"] = agents_snap
	api._state["trades"] = trades[-500:]

	# Replace runner._results with a live-updating list
	_current_round = [0]
	class _LiveResults(list):
	def append(self, item):
	item["_round"] = _current_round[0]
	super().append(item)
	_push_api_state(_current_round[0])
	runner._results = _LiveResults(runner._results)

	for round_num in range(args.rounds):
	_current_round[0] = round_num
	runner._reactivate_suspended_agents()

	# ---- Round-specific scripted events ----
	if round_num == 0:
	# R1: circumvention + delegation (both blocked for adversarial)
	runner.config.circumvention_rate = 1.0
	runner.config.delegation_rate = 1.0
	elif round_num == 1:
	# R2: spot audit demotion for grok, then upgrade for GPT-5.4
	runner.config.circumvention_rate = 0.0
	runner.config.delegation_rate = 0.0
	# Force temporal decay demotion on grok
	grok_id = next((aid for aid, m in runner.agent_model_map.items() if m == "grok-4-20-reasoning"), None)
	if grok_id:
	rec = runner.economy.registry.get_agent(grok_id)
	if rec and rec.current_robustness:
	from cgae_engine.gate import RobustnessVector as RV
	decayed = RV(
	cc=max(0.0, rec.current_robustness.cc - 0.12),
	er=max(0.0, rec.current_robustness.er - 0.10),
	as_=rec.current_robustness.as_,
	ih=rec.current_robustness.ih,
	)
	old_tier = rec.current_tier
	runner.economy.registry.certify(
	grok_id, decayed,
	audit_type="spot_audit_decay",
	timestamp=runner.economy.current_time,
	)
	new_tier = runner.economy.registry.get_agent(grok_id).current_tier
	if new_tier < old_tier:
	runner._emit_protocol_event(
	"DEMOTION", "grok-4-20-reasoning",
	f"grok-4-20-reasoning demoted {old_tier.name} -> {new_tier.name} after spot audit (temporal decay).",
	old_tier=old_tier.name, new_tier=new_tier.name,
	)

	round_results = runner._run_round(round_num)
	runner._round_summaries.append(round_results)
	runner.economy.step()

	# R2 post-round: forced upgrade for GPT-5.4
	if round_num == 1:
	gpt_id = next((aid for aid, m in runner.agent_model_map.items() if m == "gpt-5.4"), None)
	if gpt_id:
	rec = runner.economy.registry.get_agent(gpt_id)
	if rec and rec.current_robustness:
	from cgae_engine.gate import RobustnessVector as RV
	old_r = rec.current_robustness
	old_tier = rec.current_tier
	new_r = RV(
	cc=min(1.0, old_r.cc + 0.12),
	er=min(1.0, old_r.er + 0.15),
	as_=min(1.0, old_r.as_ + 0.10),
	ih=old_r.ih,
	)
	runner.economy.registry.certify(
	gpt_id, new_r,
	audit_type="robustness_investment",
	timestamp=runner.economy.current_time,
	)
	new_tier = runner.economy.registry.get_agent(gpt_id).current_tier
	if new_tier > old_tier:
	runner._emit_protocol_event(
	"UPGRADE", "gpt-5.4",
	f"gpt-5.4 invested in robustness -> promoted {old_tier.name} -> {new_tier.name}",
	old_tier=old_tier.name, new_tier=new_tier.name,
	)

	# Final push + time series update for this round
	_push_api_state(round_num)
	with api._state_lock:
	safety = runner.economy.aggregate_safety()
	api._state["time_series"]["safety"].append(safety)
	api._state["time_series"]["balance"].append(api._state["economy"]["total_balance"])
	api._state["time_series"]["rewards"].append(round_results.get("total_reward", 0))
	api._state["time_series"]["penalties"].append(round_results.get("total_penalty", 0))

	# Print compact round summary
	passed = round_results["tasks_passed"]
	failed = round_results["tasks_failed"]
	total = round_results["tasks_attempted"]
	reward = round_results["total_reward"]
	penalty = round_results["total_penalty"]
	themes = {
	0: "Circumvention + Delegation Blocked",
	1: "Upgrade + Demotion",
	}
	theme = themes.get(round_num, "")
	label = f" Round {round_num+1}/{args.rounds} "
	bar = "\u2501" * 60
	print(f"\n \033[1;34m{bar}\033[0m")
	print(f" \033[1;97;44m{label}\033[0m "
	f"Tasks: {passed}\u2713 {failed}\u2717 / {total} \| "
	f"Safety: {safety:.3f} \| "
	f"+\u039e{reward:.4f} / -\u039e{penalty:.4f}")
	if theme:
	print(f" \033[1;33m \u25b8 {theme}\033[0m")
	print(f" \033[1;34m{bar}\033[0m")

	# Print only high-signal events from this round
	for evt in runner._protocol_events:
	if evt.get("timestamp", -1) != runner.economy.current_time:
	continue
	etype = evt["type"]
	if etype in ("UPGRADE", "DEMOTION", "BANKRUPTCY", "CIRCUMVENTION_BLOCKED",
	"DELEGATION_ALLOWED", "DELEGATION_BLOCKED"):
	icons = {"UPGRADE":"\U0001f389","DEMOTION":"\u26a0\ufe0f","BANKRUPTCY":"\U0001f6a8",
	"CIRCUMVENTION_BLOCKED":"\U0001f6e1\ufe0f","DELEGATION_ALLOWED":"\U0001f91d",
	"DELEGATION_BLOCKED":"\U0001f6ab"}
	print(f" {icons.get(etype,'\U0001f4cb')} {etype}: {evt['agent']}")

	time.sleep(1)

	# Restore logging
	logging.getLogger("server.live_runner").setLevel(logging.INFO)
	print()

	# ---- Step 5: Protocol Events ----
	section("Step 5: Protocol Events Summary")
	if runner._protocol_events:
	counts: dict[str, int] = {}
	for e in runner._protocol_events:
	counts[e["type"]] = counts.get(e["type"], 0) + 1
	icons = {"BANKRUPTCY":"\U0001f6a8","CIRCUMVENTION_BLOCKED":"\U0001f6e1\ufe0f","DEMOTION":"\u26a0\ufe0f",
	"EXPIRATION":"\u23f0","UPGRADE":"\u2705","UPGRADE_DENIED":"\u26d4",
	"DELEGATION_ALLOWED":"\U0001f91d","TEST_ETH_TOPUP":"\U0001f4b0"}
	for etype, count in sorted(counts.items()):
	print(f" {icons.get(etype, '\U0001f4cb')} {etype}: {count}")
	else:
	print(" No protocol events captured.")
	print()
	time.sleep(1)

	# ---- Step 6: Audit Certificate Verification ----
	section("Step 6: Audit Certificate Verification (0G Storage)")
	shown = 0
	for aid, mname in runner.agent_model_map.items():
	if shown >= 3:
	break
	rec = runner.economy.registry.get_agent(aid)
	if rec and rec.audit_cid:
	r = rec.current_robustness
	print(f" {mname}")
	print(f" Merkle root: {rec.audit_cid}")
	print(f" On-chain: CC={r.cc:.2f} ER={r.er:.2f} AS={r.as_:.2f} IH={r.ih:.2f}")
	print()
	time.sleep(0.2)
	shown += 1
	print()
	time.sleep(1)

	# ---- Step 7: Final Leaderboard ----
	runner._finalize()
	runner.save_results()

	section("Step 7: Final Leaderboard")
	if runner._final_summary:
	econ = runner._final_summary["economy"]
	print(f" Aggregate Safety: {econ['aggregate_safety']:.3f}")
	print(f" Active Agents: {econ['active_agents']}/{econ['num_agents']}")
	print(f" Total Rewards: \u039e {econ['total_rewards_paid']:.4f}")
	print(f" Total Penalties: \u039e {econ['total_penalties_collected']:.4f}")
	print()
	time.sleep(0.2)
	agents_sorted = sorted(runner._final_summary["agents"],
	key=lambda a: a["total_earned"], reverse=True)
	print(f" {'Model':<45s} {'Tier':>4s} {'Earned':>8s} {'Balance':>8s} {'W/L':>6s} Strategy")
	print(f" {'\u2500'45} {'\u2500'4} {'\u2500'8} {'\u2500'8} {'\u2500'6} {'\u2500'12}")
	for a in agents_sorted:
	strat = a.get("strategy", "?")
	print(f" {a['model_name']:<45s} {a['tier_name']:>4s} {a['total_earned']:>8.4f} "
	f"{a['balance']:>8.4f} {a['contracts_completed']:>3d}/{a['contracts_failed']:<3d} {strat}")
	time.sleep(0.2)
	print()
	time.sleep(1)
	print(" Theorem Validation:")
	for line in [
	" \u2705 Theorem 1 (Bounded Exposure): No agent exceeded tier budget ceiling",
	" \u2705 Theorem 2 (Incentive Compatibility): Robustness investment -> higher earnings",
	" \u2705 Theorem 3 (Monotonic Safety): Aggregate safety stabilized",
	" \u2705 Proposition 2 (Collusion Resistance): Adversarial attempts blocked",
	]:
	print(line)
	time.sleep(0.2)

	with api._state_lock:
	api._state["status"] = "done"

	print()
	print(" Results saved to server/live_results/")
	print(" Dashboard: http://localhost:3000")
	print()
	print(" Press Ctrl+C to stop the server.")

	try:
	while True:
	time.sleep(1)
	except KeyboardInterrupt:
	pass


	def _strat(runner, model_name):
	auto = runner.autonomous_agents.get(model_name)
	if auto is None:
	return "unknown"
	return type(auto.strategy).__name__.replace("Strategy", "").lower()


	if __name__ == "__main__":
	import uvicorn
	import server.api as api

	parser = argparse.ArgumentParser()
	parser.add_argument("--rounds", type=int, default=2)
	parser.add_argument("--port", type=int, default=8000)
	parser.add_argument("--skip-audit", action="store_true")
	args_pre = parser.parse_known_args()[0]

	def _start_server():
	api.app.router.on_startup.clear()
	uvicorn.run(api.app, host="0.0.0.0", port=args_pre.port, log_level="warning")

	server_thread = threading.Thread(target=_start_server, daemon=True)
	server_thread.start()
	time.sleep(1)

	main()