rb125 commited on
Commit
a556b6c
·
1 Parent(s): 96a15a6

final demo cleanup

Browse files
cgae_engine/audit.py CHANGED
@@ -656,14 +656,18 @@ class AuditOrchestrator:
656
  storage_root_hash: Optional[str] = None
657
  storage_root_hash_real: bool = False
658
  if cache_dir:
659
- storage_root_hash, storage_root_hash_real = _pin_audit_to_0g(
660
- model_name=model_name,
661
- agent_id=agent_id,
662
- cache_dir=Path(cache_dir) if cache_dir else None,
663
- robustness=robustness,
664
- defaults_used=defaults_used,
665
- errors=errors,
666
- )
 
 
 
 
667
 
668
  return AuditResult(
669
  agent_id=agent_id,
@@ -689,139 +693,45 @@ class AuditOrchestrator:
689
  def _run_ddft_live(
690
  self, model_name: str, model_config: dict, cache_dir: Optional[Path]
691
  ) -> tuple[float, float]:
692
- """
693
- Run DDFT assessment via the hosted DDFT API service.
694
- Returns (er_score, ih_score).
695
- Cache file: cache_dir/<model_name>_ddft_live.json
696
- """
697
- if cache_dir:
698
- cached = cache_dir / f"{model_name}_ddft_live.json"
699
- if cached.exists():
700
- data = json.loads(cached.read_text())
701
- return data["er"], data["ih"]
702
-
703
- api_keys = {
704
- "AZURE_API_KEY": self.azure_api_key,
705
- "AZURE_OPENAI_API_ENDPOINT": self.azure_openai_endpoint,
706
- "DDFT_MODELS_ENDPOINT": self.ddft_models_endpoint,
707
- "AZURE_ANTHROPIC_API_ENDPOINT": self.azure_anthropic_api_endpoint,
708
- }
709
-
710
- result = self._ddft.assess(
711
- model_name=model_name,
712
- model_config=model_config,
713
- api_keys=api_keys,
714
- concepts=["Natural Selection", "Recursion"],
715
- compression_levels=[0.0, 0.5, 1.0],
716
- )
717
-
718
- er = float(result.get("er", 0.5))
719
- ih = float(result.get("ih", 0.7))
720
-
721
- if cache_dir:
722
- cache_dir.mkdir(parents=True, exist_ok=True)
723
- (cache_dir / f"{model_name}_ddft_live.json").write_text(
724
- json.dumps({"er": er, "ih": ih,
725
- "ci_score": result.get("ci_score"),
726
- "phenotype": result.get("phenotype")}, indent=2)
727
- )
728
  return er, ih
729
 
730
  def _run_cdct_live(
731
  self, model_name: str, llm_agent: Any, cache_dir: Optional[Path]
732
  ) -> float:
733
- """
734
- Run CDCT experiment via the hosted CDCT API service.
735
- Returns cc_score.
736
- Cache file: cache_dir/<model_name>_cdct_live.json
737
- """
738
- if cache_dir:
739
- cached = cache_dir / f"{model_name}_cdct_live.json"
740
- if cached.exists():
741
- data = json.loads(cached.read_text())
742
- return data["cc"]
743
-
744
- api_keys = {
745
- "AZURE_API_KEY": self.azure_api_key,
746
- "AZURE_OPENAI_API_ENDPOINT": self.azure_openai_endpoint,
747
- "DDFT_MODELS_ENDPOINT": self.ddft_models_endpoint,
748
- "AZURE_ANTHROPIC_API_ENDPOINT": self.azure_anthropic_api_endpoint,
749
- }
750
-
751
- model_config = getattr(llm_agent, "model_config", {})
752
-
753
- result = self._cdct.run_experiment(
754
- model_name=model_name,
755
- model_config=model_config,
756
- api_keys=api_keys,
757
- concept="logic_modus_ponens",
758
- prompt_strategy="compression_aware",
759
- evaluation_mode="balanced",
760
- )
761
-
762
- cc = float(result.get("cc", 0.5))
763
-
764
- if cache_dir:
765
- cache_dir.mkdir(parents=True, exist_ok=True)
766
- (cache_dir / f"{model_name}_cdct_live.json").write_text(
767
- json.dumps({"cc": cc, "model": model_name}, indent=2)
768
- )
769
  return cc
770
 
771
  def _run_eect_live(
772
  self, model_name: str, llm_agent: Any, cache_dir: Optional[Path]
773
  ) -> float:
774
- """
775
- Run EECT Socratic dialogues via the hosted EECT API service.
776
- Returns as_score.
777
- Cache file: cache_dir/<model_name>_eect_live.json
778
- """
779
- if cache_dir:
780
- cached = cache_dir / f"{model_name}_eect_live.json"
781
- if cached.exists():
782
- data = json.loads(cached.read_text())
783
- return data["as"]
784
-
785
- api_keys = {
786
- "AZURE_API_KEY": self.azure_api_key,
787
- "AZURE_OPENAI_API_ENDPOINT": self.azure_openai_endpoint,
788
- "DDFT_MODELS_ENDPOINT": self.ddft_models_endpoint,
789
- "AZURE_ANTHROPIC_API_ENDPOINT": self.azure_anthropic_api_endpoint,
790
- }
791
-
792
- model_config = getattr(llm_agent, "model_config", {})
793
-
794
- # Run two dilemmas and average the AS scores
795
- dilemma_ids = ["trolley_problem", "lying_to_save_lives"]
796
- all_turns: list[list] = []
797
- for dilemma_id in dilemma_ids:
798
- try:
799
- resp = self._eect.run_dialogue(
800
- model_name=model_name,
801
- model_config=model_config,
802
- api_keys=api_keys,
803
- dilemma={"id": dilemma_id},
804
- compression_level="c1.0",
805
- )
806
- turns = resp.get("turns", [])
807
- if turns:
808
- all_turns.append(turns)
809
- except Exception as e:
810
- logger.warning(f" EECT dialogue failed for dilemma {dilemma_id}: {e}")
811
-
812
- if not all_turns:
813
- raise RuntimeError("No EECT dialogues completed successfully")
814
-
815
- as_scores = [self._score_eect_turns(turns) for turns in all_turns]
816
- as_ = sum(as_scores) / len(as_scores)
817
-
818
- if cache_dir:
819
- cache_dir.mkdir(parents=True, exist_ok=True)
820
- (cache_dir / f"{model_name}_eect_live.json").write_text(
821
- json.dumps({"as": as_, "model": model_name,
822
- "dialogues_run": len(all_turns)}, indent=2)
823
- )
824
- return as_
825
 
826
  @staticmethod
827
  def _score_eect_turns(turns: list) -> float:
 
656
  storage_root_hash: Optional[str] = None
657
  storage_root_hash_real: bool = False
658
  if cache_dir:
659
+ try:
660
+ storage_root_hash, storage_root_hash_real = _pin_audit_to_0g(
661
+ model_name=model_name,
662
+ agent_id=agent_id,
663
+ cache_dir=Path(cache_dir) if cache_dir else None,
664
+ robustness=robustness,
665
+ defaults_used=defaults_used,
666
+ errors=errors,
667
+ )
668
+ except Exception as e:
669
+ logger.error(f" [0g] Storage pin failed for {model_name}: {e}")
670
+ errors.append(f"0G_STORAGE: {e}")
671
 
672
  return AuditResult(
673
  agent_id=agent_id,
 
693
  def _run_ddft_live(
694
  self, model_name: str, model_config: dict, cache_dir: Optional[Path]
695
  ) -> tuple[float, float]:
696
+ """Query DDFT /score endpoint. Returns (er, ih)."""
697
+ data = self._ddft.get_score(model_name)
698
+ er = float(data.get("ER") or data.get("er") or 0)
699
+ ih = float(data.get("IH") or data.get("ih") or 0)
700
+ if er <= 0 or ih <= 0:
701
+ raise RuntimeError(f"DDFT /score returned no valid ER/IH for {model_name}: {data}")
702
+ logger.info(f" [DDFT] GET {self._ddft.base_url}/score/{model_name} -> ER={er:.3f} IH={ih:.3f}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
703
  return er, ih
704
 
705
  def _run_cdct_live(
706
  self, model_name: str, llm_agent: Any, cache_dir: Optional[Path]
707
  ) -> float:
708
+ """Query CDCT /score endpoint. Returns cc. CDCT returns a list of per-concept metrics."""
709
+ data = self._cdct.get_score(model_name)
710
+ cc = None
711
+ if isinstance(data, list) and data:
712
+ cris = [float(r["CRI"]) for r in data if isinstance(r, dict) and "CRI" in r]
713
+ if cris:
714
+ cc = min(cris)
715
+ elif isinstance(data, dict):
716
+ cc = self._extract_score(data, "cc", model_name=model_name)
717
+ if cc is None or cc <= 0:
718
+ raise RuntimeError(f"CDCT /score returned no valid CC for {model_name}: {data}")
719
+ logger.info(f" [CDCT] GET {self._cdct.base_url}/score/{model_name} -> CC={cc:.3f}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
720
  return cc
721
 
722
  def _run_eect_live(
723
  self, model_name: str, llm_agent: Any, cache_dir: Optional[Path]
724
  ) -> float:
725
+ """Query AGT/EECT /score endpoint. Returns as_score."""
726
+ data = self._eect.get_score(model_name)
727
+ as_ = None
728
+ if isinstance(data, dict):
729
+ as_ = data.get("as_") or data.get("as_score") or data.get("AS") or data.get("as")
730
+ if as_ is not None and float(as_) > 0:
731
+ as_ = float(as_)
732
+ logger.info(f" [AGT] GET {self._eect.base_url}/score/{model_name} -> AS={as_:.3f}")
733
+ return as_
734
+ raise RuntimeError(f"AGT /score returned no valid AS for {model_name}: {data}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
735
 
736
  @staticmethod
737
  def _score_eect_turns(turns: list) -> float:
cgae_engine/economy.py CHANGED
@@ -325,7 +325,7 @@ class Economy:
325
  # Create an ETH wallet for this agent if wallet manager is available
326
  wallet_address = None
327
  if self.wallet_manager:
328
- wallet = self.wallet_manager.create_agent_wallet(record.agent_id)
329
  wallet_address = wallet.address
330
  record.wallet_address = wallet_address
331
 
@@ -378,26 +378,38 @@ class Economy:
378
  # Write certification on-chain if bridge is available
379
  onchain_tx = None
380
  if self.onchain_bridge and record.wallet_address:
381
- audit_hash = (audit_details or {}).get("storage_root_hash", "")
382
- onchain_tx = self.onchain_bridge.certify_agent(
383
- agent_address=record.wallet_address,
384
- cc=robustness.cc, er=robustness.er,
385
- as_=robustness.as_, ih=robustness.ih,
386
- audit_type=audit_type,
387
- audit_hash=audit_hash or "",
388
- )
 
 
 
 
 
 
 
389
 
390
  # Write robustness credentials to ENS text records
391
  if self.ens_manager:
392
- audit_hash = (audit_details or {}).get("storage_root_hash", "")
393
- self.ens_manager.set_agent_credentials(
394
- agent_id=agent_id,
395
- tier=cert.tier.name,
396
- cc=robustness.cc, er=robustness.er,
397
- as_=robustness.as_, ih=robustness.ih,
398
- wallet_address=record.wallet_address or "",
399
- audit_hash=audit_hash,
400
- )
 
 
 
 
 
401
 
402
  self._log("agent_audited", {
403
  "agent_id": agent_id,
 
325
  # Create an ETH wallet for this agent if wallet manager is available
326
  wallet_address = None
327
  if self.wallet_manager:
328
+ wallet = self.wallet_manager.create_agent_wallet(record.agent_id, model_name)
329
  wallet_address = wallet.address
330
  record.wallet_address = wallet_address
331
 
 
378
  # Write certification on-chain if bridge is available
379
  onchain_tx = None
380
  if self.onchain_bridge and record.wallet_address:
381
+ # Skip if already certified at this tier on-chain
382
+ ens_tier = ""
383
+ if self.ens_manager:
384
+ ens_name = self.ens_manager.get_agent_name(agent_id)
385
+ if ens_name:
386
+ ens_tier = self.ens_manager.resolve_text(ens_name, "cgae.tier")
387
+ if ens_tier != cert.tier.name:
388
+ audit_hash = (audit_details or {}).get("storage_root_hash", "")
389
+ onchain_tx = self.onchain_bridge.certify_agent(
390
+ agent_address=record.wallet_address,
391
+ cc=robustness.cc, er=robustness.er,
392
+ as_=robustness.as_, ih=robustness.ih,
393
+ audit_type=audit_type,
394
+ audit_hash=audit_hash or "",
395
+ )
396
 
397
  # Write robustness credentials to ENS text records
398
  if self.ens_manager:
399
+ ens_name = self.ens_manager.get_agent_name(agent_id)
400
+ existing_tier = self.ens_manager.resolve_text(ens_name, "cgae.tier") if ens_name else ""
401
+ if existing_tier != cert.tier.name:
402
+ audit_hash = (audit_details or {}).get("storage_root_hash", "")
403
+ self.ens_manager.set_agent_credentials(
404
+ agent_id=agent_id,
405
+ tier=cert.tier.name,
406
+ cc=robustness.cc, er=robustness.er,
407
+ as_=robustness.as_, ih=robustness.ih,
408
+ wallet_address=record.wallet_address or "",
409
+ audit_hash=audit_hash,
410
+ )
411
+ else:
412
+ logger.info(f" [ens] Skipping text record update for {ens_name} (tier unchanged: {existing_tier})")
413
 
414
  self._log("agent_audited", {
415
  "agent_id": agent_id,
cgae_engine/ens.py CHANGED
@@ -161,6 +161,7 @@ class ENSManager:
161
  def create_subname(self, agent_id: str, model_name: str, owner: str) -> Optional[str]:
162
  """
163
  Create a subname like gpt5.cgaeprotocol.eth for an agent.
 
164
  Returns the full ENS name or None on failure.
165
  """
166
  label = _slugify(model_name)
@@ -171,6 +172,13 @@ class ENSManager:
171
  self._subnames[agent_id] = full_name
172
  return full_name
173
 
 
 
 
 
 
 
 
174
  try:
175
  nonce = self.w3.eth.get_transaction_count(self._account.address)
176
  # setSubnodeRecord creates the subname + sets resolver in one tx
 
161
  def create_subname(self, agent_id: str, model_name: str, owner: str) -> Optional[str]:
162
  """
163
  Create a subname like gpt5.cgaeprotocol.eth for an agent.
164
+ If the subname already exists (has a cgae.tier record), reuse it.
165
  Returns the full ENS name or None on failure.
166
  """
167
  label = _slugify(model_name)
 
172
  self._subnames[agent_id] = full_name
173
  return full_name
174
 
175
+ # Check if subname already exists by reading a text record
176
+ existing_tier = self.resolve_text(full_name, "cgae.tier")
177
+ if existing_tier:
178
+ logger.info(f" [ens] Reusing existing {full_name} (tier={existing_tier})")
179
+ self._subnames[agent_id] = full_name
180
+ return full_name
181
+
182
  try:
183
  nonce = self.w3.eth.get_transaction_count(self._account.address)
184
  # setSubnodeRecord creates the subname + sets resolver in one tx
cgae_engine/framework_clients.py CHANGED
@@ -230,17 +230,16 @@ class EECTClient:
230
  Run a single Socratic ethical dialogue for one dilemma.
231
 
232
  Returns a dict with:
233
- turns list of dialogue turn dicts (role, response, )
234
  """
235
  url = f"{self.base_url}/dialogue"
 
236
  payload = {
237
  "model_name": model_name,
238
- "model_config": model_config,
239
- "api_keys": api_keys,
240
- "dilemma": dilemma,
241
  "compression_level": compression_level,
242
  }
243
- logger.debug(f"[EECT] POST {url} model={model_name} dilemma={dilemma.get('id')}")
244
  return _post(url, payload)
245
 
246
  def get_score(self, model_name: str) -> dict:
 
230
  Run a single Socratic ethical dialogue for one dilemma.
231
 
232
  Returns a dict with:
233
+ turns - list of dialogue turn dicts (role, response, ...)
234
  """
235
  url = f"{self.base_url}/dialogue"
236
+ dilemma_id = dilemma.get("id", dilemma) if isinstance(dilemma, dict) else str(dilemma)
237
  payload = {
238
  "model_name": model_name,
239
+ "dilemma_id": dilemma_id,
 
 
240
  "compression_level": compression_level,
241
  }
242
+ logger.debug(f"[EECT] POST {url} model={model_name} dilemma_id={dilemma_id}")
243
  return _post(url, payload)
244
 
245
  def get_score(self, model_name: str) -> dict:
cgae_engine/onchain.py CHANGED
@@ -241,7 +241,7 @@ class EscrowBridge:
241
  self._tx_log.append({"tx_hash": tx_hash.hex(), "status": status})
242
  return tx_hash.hex()
243
  except Exception as e:
244
- logger.error(f" [escrow] tx failed: {e}")
245
  self._tx_log.append({"error": str(e)})
246
  return None
247
 
 
241
  self._tx_log.append({"tx_hash": tx_hash.hex(), "status": status})
242
  return tx_hash.hex()
243
  except Exception as e:
244
+ logger.warning(f" [escrow] tx skipped (insufficient gas or network error): {e}")
245
  self._tx_log.append({"error": str(e)})
246
  return None
247
 
cgae_engine/wallet.py CHANGED
@@ -57,6 +57,7 @@ class WalletManager:
57
  rpc_url: Optional[str] = None,
58
  treasury_private_key: Optional[str] = None,
59
  dry_run: bool = False,
 
60
  ):
61
  self.rpc_url = rpc_url or os.getenv("ZG_RPC_URL", "https://evmrpc-testnet.0g.ai")
62
  self._treasury_key = treasury_private_key or os.getenv("PRIVATE_KEY")
@@ -65,6 +66,7 @@ class WalletManager:
65
  self.w3 = Web3(Web3.HTTPProvider(self.rpc_url))
66
  self._wallets: dict[str, AgentWallet] = {} # agent_id -> wallet
67
  self._disbursements: list[dict] = []
 
68
 
69
  if self._treasury_key:
70
  key = self._treasury_key if self._treasury_key.startswith("0x") else f"0x{self._treasury_key}"
@@ -74,16 +76,31 @@ class WalletManager:
74
  self._treasury_account = None
75
  self.treasury_address = None
76
 
 
 
 
 
77
  @property
78
  def is_live(self) -> bool:
79
  """True if we can send real transactions."""
80
  return self._treasury_account is not None and not self.dry_run
81
 
82
- def create_agent_wallet(self, agent_id: str) -> AgentWallet:
83
- """Generate a new ETH keypair for an agent."""
84
  if agent_id in self._wallets:
85
  return self._wallets[agent_id]
86
 
 
 
 
 
 
 
 
 
 
 
 
87
  acct = Account.create()
88
  wallet = AgentWallet(
89
  agent_id=agent_id,
@@ -91,9 +108,36 @@ class WalletManager:
91
  private_key=acct.key.hex() if isinstance(acct.key, bytes) else acct.key,
92
  )
93
  self._wallets[agent_id] = wallet
 
 
 
94
  logger.info(f" [wallet] Created wallet for {agent_id}: {wallet.address}")
95
  return wallet
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  def get_wallet(self, agent_id: str) -> Optional[AgentWallet]:
98
  return self._wallets.get(agent_id)
99
 
 
57
  rpc_url: Optional[str] = None,
58
  treasury_private_key: Optional[str] = None,
59
  dry_run: bool = False,
60
+ wallet_store_path: Optional[str] = None,
61
  ):
62
  self.rpc_url = rpc_url or os.getenv("ZG_RPC_URL", "https://evmrpc-testnet.0g.ai")
63
  self._treasury_key = treasury_private_key or os.getenv("PRIVATE_KEY")
 
66
  self.w3 = Web3(Web3.HTTPProvider(self.rpc_url))
67
  self._wallets: dict[str, AgentWallet] = {} # agent_id -> wallet
68
  self._disbursements: list[dict] = []
69
+ self._store_path = Path(wallet_store_path) if wallet_store_path else Path("server/live_results/wallets.json")
70
 
71
  if self._treasury_key:
72
  key = self._treasury_key if self._treasury_key.startswith("0x") else f"0x{self._treasury_key}"
 
76
  self._treasury_account = None
77
  self.treasury_address = None
78
 
79
+ # Load persisted wallets from disk
80
+ self._model_wallets: dict[str, AgentWallet] = {} # model_name -> wallet
81
+ self._load_wallets()
82
+
83
  @property
84
  def is_live(self) -> bool:
85
  """True if we can send real transactions."""
86
  return self._treasury_account is not None and not self.dry_run
87
 
88
+ def create_agent_wallet(self, agent_id: str, model_name: str = "") -> AgentWallet:
89
+ """Get existing wallet for this model or generate a new keypair."""
90
  if agent_id in self._wallets:
91
  return self._wallets[agent_id]
92
 
93
+ # Reuse persisted wallet for this model if it exists
94
+ if model_name and model_name in self._model_wallets:
95
+ wallet = AgentWallet(
96
+ agent_id=agent_id,
97
+ address=self._model_wallets[model_name].address,
98
+ private_key=self._model_wallets[model_name].private_key,
99
+ )
100
+ self._wallets[agent_id] = wallet
101
+ logger.info(f" [wallet] Loaded existing wallet for {agent_id}: {wallet.address}")
102
+ return wallet
103
+
104
  acct = Account.create()
105
  wallet = AgentWallet(
106
  agent_id=agent_id,
 
108
  private_key=acct.key.hex() if isinstance(acct.key, bytes) else acct.key,
109
  )
110
  self._wallets[agent_id] = wallet
111
+ if model_name:
112
+ self._model_wallets[model_name] = wallet
113
+ self._save_wallets()
114
  logger.info(f" [wallet] Created wallet for {agent_id}: {wallet.address}")
115
  return wallet
116
 
117
+ def _load_wallets(self):
118
+ """Load persisted model->wallet mapping from disk."""
119
+ if self._store_path.exists():
120
+ try:
121
+ data = json.loads(self._store_path.read_text())
122
+ for model_name, w in data.items():
123
+ self._model_wallets[model_name] = AgentWallet(
124
+ agent_id=w.get("agent_id", ""),
125
+ address=w["address"],
126
+ private_key=w["private_key"],
127
+ )
128
+ logger.info(f" [wallet] Loaded {len(self._model_wallets)} persisted wallets")
129
+ except Exception as e:
130
+ logger.warning(f" [wallet] Could not load wallets: {e}")
131
+
132
+ def _save_wallets(self):
133
+ """Persist model->wallet mapping to disk (unredacted keys)."""
134
+ self._store_path.parent.mkdir(parents=True, exist_ok=True)
135
+ data = {
136
+ model: {"agent_id": w.agent_id, "address": w.address, "private_key": w.private_key}
137
+ for model, w in self._model_wallets.items()
138
+ }
139
+ self._store_path.write_text(json.dumps(data, indent=2))
140
+
141
  def get_wallet(self, agent_id: str) -> Optional[AgentWallet]:
142
  return self._wallets.get(agent_id)
143
 
scripts/video_demo.py CHANGED
@@ -44,7 +44,7 @@ def section(title: str):
44
 
45
  def main():
46
  parser = argparse.ArgumentParser()
47
- parser.add_argument("--rounds", type=int, default=5)
48
  parser.add_argument("--port", type=int, default=8000)
49
  parser.add_argument("--skip-audit", action="store_true")
50
  args = parser.parse_args()
@@ -107,15 +107,26 @@ def main():
107
 
108
  runner.setup()
109
 
110
- # Certify agents on-chain with their audit scores
 
111
  for agent_id, model_name in runner.agent_model_map.items():
112
  record = runner.economy.registry.get_agent(agent_id)
113
- if record and record.current_robustness:
114
- r = record.current_robustness
115
- wallet = record.wallet_address
116
- audit_hash = record.audit_cid or ""
117
- if wallet and chain.is_live:
118
- chain.certify_agent(wallet, r.cc, r.er, r.as_, r.ih, "registration", audit_hash)
 
 
 
 
 
 
 
 
 
 
119
 
120
  time.sleep(2)
121
 
@@ -169,39 +180,87 @@ def main():
169
  runner._emit_protocol_event = patched_emit
170
 
171
  # ---------------------------------------------------------------------------
172
- # Per-round scripted narrative:
173
- # R1 - Baseline trading + grok circumvention blocked
174
- # R2 - Delegation: grok delegates to DeepSeek (chain robustness)
175
- # R3 - GPT-5.4 invests in robustness -> upgrade to T3
176
- # R4 - Spot audit: temporal decay demotes grok + spoof blocked
177
- # R5 - Post-upgrade: GPT-5.4 earns more at T3, economy stabilises
178
  # ---------------------------------------------------------------------------
179
 
180
  # Disable random circumvention/delegation - we script them per round
181
  runner.config.circumvention_rate = 0.0
182
  runner.config.delegation_rate = 0.0
183
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  for round_num in range(args.rounds):
 
185
  runner._reactivate_suspended_agents()
186
 
187
  # ---- Round-specific scripted events ----
188
  if round_num == 0:
189
- # R1: force one circumvention attempt from grok
190
  runner.config.circumvention_rate = 1.0
191
- runner.config.delegation_rate = 0.0
192
- elif round_num == 1:
193
- # R2: force delegation, no circumvention
194
- runner.config.circumvention_rate = 0.0
195
  runner.config.delegation_rate = 1.0
196
- elif round_num == 2:
197
- # R3: normal trading, then forced upgrade after
198
  runner.config.circumvention_rate = 0.0
199
  runner.config.delegation_rate = 0.0
200
- elif round_num == 3:
201
- # R4: grok spoof attempt + spot audit demotion
202
- runner.config.circumvention_rate = 1.0
203
- runner.config.delegation_rate = 0.0
204
- # Force temporal decay to trigger a demotion on grok
205
  grok_id = next((aid for aid, m in runner.agent_model_map.items() if m == "grok-4-20-reasoning"), None)
206
  if grok_id:
207
  rec = runner.economy.registry.get_agent(grok_id)
@@ -226,17 +285,13 @@ def main():
226
  f"grok-4-20-reasoning demoted {old_tier.name} -> {new_tier.name} after spot audit (temporal decay).",
227
  old_tier=old_tier.name, new_tier=new_tier.name,
228
  )
229
- elif round_num == 4:
230
- # R5: clean round, no adversarial - show stable economy
231
- runner.config.circumvention_rate = 0.0
232
- runner.config.delegation_rate = 0.0
233
 
234
  round_results = runner._run_round(round_num)
235
  runner._round_summaries.append(round_results)
236
  runner.economy.step()
237
 
238
- # R3 post-round: forced upgrade for GPT-5.4
239
- if round_num == 2:
240
  gpt_id = next((aid for aid, m in runner.agent_model_map.items() if m == "gpt-5.4"), None)
241
  if gpt_id:
242
  rec = runner.economy.registry.get_agent(gpt_id)
@@ -263,53 +318,10 @@ def main():
263
  old_tier=old_tier.name, new_tier=new_tier.name,
264
  )
265
 
266
- # Push state to API
267
- safety = runner.economy.aggregate_safety()
268
- agents_snap = {}
269
- for aid, mname in runner.agent_model_map.items():
270
- rec = runner.economy.registry.get_agent(aid)
271
- if not rec:
272
- continue
273
- rv = rec.current_robustness
274
- agents_snap[aid] = {
275
- "agent_id": aid, "model_name": mname,
276
- "strategy": _strat(runner, mname),
277
- "current_tier": rec.current_tier.value,
278
- "balance": rec.balance, "total_earned": rec.total_earned,
279
- "total_penalties": rec.total_penalties,
280
- "contracts_completed": rec.contracts_completed,
281
- "contracts_failed": rec.contracts_failed,
282
- "status": rec.status.value,
283
- "wallet_address": rec.wallet_address,
284
- "ens_name": runner.economy.ens_manager.get_agent_name(aid) if runner.economy.ens_manager else None,
285
- "robustness": {"cc":rv.cc,"er":rv.er,"as_":rv.as_,"ih":rv.ih} if rv else None,
286
- }
287
- trades = [{
288
- "round": round_num, "agent": tr["agent"],
289
- "task_id": tr["task_id"], "task_prompt": tr.get("task_prompt", ""),
290
- "tier": tr["tier"], "domain": tr["domain"],
291
- "passed": tr["verification"]["overall_pass"],
292
- "reward": tr["settlement"].get("reward", 0) if tr["settlement"] else 0,
293
- "penalty": tr["settlement"].get("penalty", 0) if tr["settlement"] else 0,
294
- "token_cost": tr.get("token_cost_eth", 0),
295
- "latency_ms": tr.get("latency_ms", 0),
296
- "output_preview": tr.get("output_preview", ""),
297
- "constraints_passed": tr["verification"].get("constraints_passed", []),
298
- "constraints_failed": tr["verification"].get("constraints_failed", []),
299
- } for tr in round_results.get("task_results", [])]
300
-
301
  with api._state_lock:
302
- api._state["round"] = round_num + 1
303
- api._state["economy"] = {
304
- "aggregate_safety": safety,
305
- "active_agents": len(runner.economy.registry.active_agents),
306
- "total_balance": sum(a["balance"] for a in agents_snap.values()),
307
- "total_earned": sum(a["total_earned"] for a in agents_snap.values()),
308
- "contracts_completed": sum(a["contracts_completed"] for a in agents_snap.values()),
309
- "contracts_failed": sum(a["contracts_failed"] for a in agents_snap.values()),
310
- }
311
- api._state["agents"] = agents_snap
312
- api._state["trades"] = (api._state["trades"] + trades)[-500:]
313
  api._state["time_series"]["safety"].append(safety)
314
  api._state["time_series"]["balance"].append(api._state["economy"]["total_balance"])
315
  api._state["time_series"]["rewards"].append(round_results.get("total_reward", 0))
@@ -322,11 +334,8 @@ def main():
322
  reward = round_results["total_reward"]
323
  penalty = round_results["total_penalty"]
324
  themes = {
325
- 0: "Baseline + Circumvention",
326
- 1: "Delegation Chain",
327
- 2: "Robustness Investment -> Upgrade",
328
- 3: "Spot Audit + Demotion",
329
- 4: "Stable Economy",
330
  }
331
  theme = themes.get(round_num, "")
332
  label = f" Round {round_num+1}/{args.rounds} "
@@ -454,7 +463,7 @@ if __name__ == "__main__":
454
  import server.api as api
455
 
456
  parser = argparse.ArgumentParser()
457
- parser.add_argument("--rounds", type=int, default=5)
458
  parser.add_argument("--port", type=int, default=8000)
459
  parser.add_argument("--skip-audit", action="store_true")
460
  args_pre = parser.parse_known_args()[0]
 
44
 
45
  def main():
46
  parser = argparse.ArgumentParser()
47
+ parser.add_argument("--rounds", type=int, default=2)
48
  parser.add_argument("--port", type=int, default=8000)
49
  parser.add_argument("--skip-audit", action="store_true")
50
  args = parser.parse_args()
 
107
 
108
  runner.setup()
109
 
110
+ # Print audit summary with highlights
111
+ print()
112
  for agent_id, model_name in runner.agent_model_map.items():
113
  record = runner.economy.registry.get_agent(agent_id)
114
+ if not record:
115
+ continue
116
+ r = record.current_robustness
117
+ wallet = record.wallet_address or "n/a"
118
+ ens = runner.economy.ens_manager.get_agent_name(agent_id) if runner.economy.ens_manager else "n/a"
119
+ cid = record.audit_cid or "n/a"
120
+ tier = record.current_tier.name
121
+ print(f" \033[1;32m\u2713\033[0m \033[1m{model_name}\033[0m")
122
+ print(f" Wallet: {wallet}")
123
+ print(f" ENS: {ens}")
124
+ if r:
125
+ print(f" Scores: CC={r.cc:.3f} ER={r.er:.3f} AS={r.as_:.3f} IH={r.ih:.3f} \033[1;33m-> {tier}\033[0m")
126
+ if cid != "n/a":
127
+ print(f" 0G Hash: {cid[:32]}...")
128
+ print()
129
+ time.sleep(0.5)
130
 
131
  time.sleep(2)
132
 
 
180
  runner._emit_protocol_event = patched_emit
181
 
182
  # ---------------------------------------------------------------------------
183
+ # Per-round scripted narrative (2 rounds, all scenarios covered):
184
+ # R1 - Circumvention blocked + delegation blocked + normal trading
185
+ # R2 - GPT-5.4 upgrade + grok demotion (spot audit) + normal trading
 
 
 
186
  # ---------------------------------------------------------------------------
187
 
188
  # Disable random circumvention/delegation - we script them per round
189
  runner.config.circumvention_rate = 0.0
190
  runner.config.delegation_rate = 0.0
191
 
192
+ def _push_api_state(round_num):
193
+ """Push current state to the dashboard API after each task."""
194
+ safety = runner.economy.aggregate_safety()
195
+ agents_snap = {}
196
+ for aid, mname in runner.agent_model_map.items():
197
+ rec = runner.economy.registry.get_agent(aid)
198
+ if not rec:
199
+ continue
200
+ rv = rec.current_robustness
201
+ agents_snap[aid] = {
202
+ "agent_id": aid, "model_name": mname,
203
+ "strategy": _strat(runner, mname),
204
+ "current_tier": rec.current_tier.value,
205
+ "balance": rec.balance, "total_earned": rec.total_earned,
206
+ "total_penalties": rec.total_penalties,
207
+ "contracts_completed": rec.contracts_completed,
208
+ "contracts_failed": rec.contracts_failed,
209
+ "status": rec.status.value,
210
+ "wallet_address": rec.wallet_address,
211
+ "ens_name": runner.economy.ens_manager.get_agent_name(aid) if runner.economy.ens_manager else None,
212
+ "robustness": {"cc":rv.cc,"er":rv.er,"as_":rv.as_,"ih":rv.ih} if rv else None,
213
+ }
214
+ trades = [{
215
+ "round": tr.get("_round", round_num), "agent": tr["agent"],
216
+ "task_id": tr["task_id"], "task_prompt": tr.get("task_prompt", ""),
217
+ "tier": tr["tier"], "domain": tr["domain"],
218
+ "passed": tr["verification"]["overall_pass"],
219
+ "reward": tr["settlement"].get("reward", 0) if tr["settlement"] else 0,
220
+ "penalty": tr["settlement"].get("penalty", 0) if tr["settlement"] else 0,
221
+ "token_cost": tr.get("token_cost_eth", 0),
222
+ "latency_ms": tr.get("latency_ms", 0),
223
+ "output_preview": tr.get("output_preview", ""),
224
+ "constraints_passed": tr["verification"].get("constraints_passed", []),
225
+ "constraints_failed": tr["verification"].get("constraints_failed", []),
226
+ } for tr in runner._results]
227
+
228
+ with api._state_lock:
229
+ api._state["round"] = round_num + 1
230
+ api._state["economy"] = {
231
+ "aggregate_safety": safety,
232
+ "active_agents": len(runner.economy.registry.active_agents),
233
+ "total_balance": sum(a["balance"] for a in agents_snap.values()),
234
+ "total_earned": sum(a["total_earned"] for a in agents_snap.values()),
235
+ "contracts_completed": sum(a["contracts_completed"] for a in agents_snap.values()),
236
+ "contracts_failed": sum(a["contracts_failed"] for a in agents_snap.values()),
237
+ }
238
+ api._state["agents"] = agents_snap
239
+ api._state["trades"] = trades[-500:]
240
+
241
+ # Replace runner._results with a live-updating list
242
+ _current_round = [0]
243
+ class _LiveResults(list):
244
+ def append(self, item):
245
+ item["_round"] = _current_round[0]
246
+ super().append(item)
247
+ _push_api_state(_current_round[0])
248
+ runner._results = _LiveResults(runner._results)
249
+
250
  for round_num in range(args.rounds):
251
+ _current_round[0] = round_num
252
  runner._reactivate_suspended_agents()
253
 
254
  # ---- Round-specific scripted events ----
255
  if round_num == 0:
256
+ # R1: circumvention + delegation (both blocked for adversarial)
257
  runner.config.circumvention_rate = 1.0
 
 
 
 
258
  runner.config.delegation_rate = 1.0
259
+ elif round_num == 1:
260
+ # R2: spot audit demotion for grok, then upgrade for GPT-5.4
261
  runner.config.circumvention_rate = 0.0
262
  runner.config.delegation_rate = 0.0
263
+ # Force temporal decay demotion on grok
 
 
 
 
264
  grok_id = next((aid for aid, m in runner.agent_model_map.items() if m == "grok-4-20-reasoning"), None)
265
  if grok_id:
266
  rec = runner.economy.registry.get_agent(grok_id)
 
285
  f"grok-4-20-reasoning demoted {old_tier.name} -> {new_tier.name} after spot audit (temporal decay).",
286
  old_tier=old_tier.name, new_tier=new_tier.name,
287
  )
 
 
 
 
288
 
289
  round_results = runner._run_round(round_num)
290
  runner._round_summaries.append(round_results)
291
  runner.economy.step()
292
 
293
+ # R2 post-round: forced upgrade for GPT-5.4
294
+ if round_num == 1:
295
  gpt_id = next((aid for aid, m in runner.agent_model_map.items() if m == "gpt-5.4"), None)
296
  if gpt_id:
297
  rec = runner.economy.registry.get_agent(gpt_id)
 
318
  old_tier=old_tier.name, new_tier=new_tier.name,
319
  )
320
 
321
+ # Final push + time series update for this round
322
+ _push_api_state(round_num)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
  with api._state_lock:
324
+ safety = runner.economy.aggregate_safety()
 
 
 
 
 
 
 
 
 
 
325
  api._state["time_series"]["safety"].append(safety)
326
  api._state["time_series"]["balance"].append(api._state["economy"]["total_balance"])
327
  api._state["time_series"]["rewards"].append(round_results.get("total_reward", 0))
 
334
  reward = round_results["total_reward"]
335
  penalty = round_results["total_penalty"]
336
  themes = {
337
+ 0: "Circumvention + Delegation Blocked",
338
+ 1: "Upgrade + Demotion",
 
 
 
339
  }
340
  theme = themes.get(round_num, "")
341
  label = f" Round {round_num+1}/{args.rounds} "
 
463
  import server.api as api
464
 
465
  parser = argparse.ArgumentParser()
466
+ parser.add_argument("--rounds", type=int, default=2)
467
  parser.add_argument("--port", type=int, default=8000)
468
  parser.add_argument("--skip-audit", action="store_true")
469
  args_pre = parser.parse_known_args()[0]
server/live_runner.py CHANGED
@@ -1248,6 +1248,7 @@ class LiveSimulationRunner:
1248
  "executed_by_agent_id": execution_agent_id,
1249
  "executed_by_model": execution_model_name,
1250
  "task_id": task.task_id,
 
1251
  "tier": task.tier.name,
1252
  "domain": task.domain,
1253
  "proof_cid": cid,
 
1248
  "executed_by_agent_id": execution_agent_id,
1249
  "executed_by_model": execution_model_name,
1250
  "task_id": task.task_id,
1251
+ "task_prompt": task.prompt,
1252
  "tier": task.tier.name,
1253
  "domain": task.domain,
1254
  "proof_cid": cid,
storage/upload_to_0g.mjs CHANGED
@@ -94,7 +94,8 @@ async function main() {
94
  try {
95
  const [hash, uploadErr] = await indexer.upload(file, RPC_URL, signer);
96
  if (uploadErr) throw new Error(String(uploadErr));
97
- rootHash = hash;
 
98
  } catch (e) {
99
  writeError(`Upload failed: ${e.message}`);
100
  process.exit(1);
 
94
  try {
95
  const [hash, uploadErr] = await indexer.upload(file, RPC_URL, signer);
96
  if (uploadErr) throw new Error(String(uploadErr));
97
+ // SDK may return a string or an object {rootHash, txHash, txSeq}
98
+ rootHash = (typeof hash === "object" && hash !== null) ? hash.rootHash : hash;
99
  } catch (e) {
100
  writeError(`Upload failed: ${e.message}`);
101
  process.exit(1);
storage/zg_store.py CHANGED
@@ -166,7 +166,22 @@ class ZgStore:
166
  except (json.JSONDecodeError, KeyError):
167
  raise RuntimeError(stderr or f"exit code {proc.returncode}")
168
 
169
- data = json.loads(proc.stdout.strip())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  if not data.get("ok"):
171
  raise RuntimeError(data.get("error", "Unknown upload error"))
172
 
 
166
  except (json.JSONDecodeError, KeyError):
167
  raise RuntimeError(stderr or f"exit code {proc.returncode}")
168
 
169
+ stdout = proc.stdout.strip()
170
+ if not stdout:
171
+ stderr = proc.stderr.strip()
172
+ raise RuntimeError(f"0G upload returned empty output. stderr: {stderr}")
173
+
174
+ # SDK may print debug lines before the JSON; find the last JSON line
175
+ json_line = None
176
+ for line in reversed(stdout.splitlines()):
177
+ line = line.strip()
178
+ if line.startswith("{"):
179
+ json_line = line
180
+ break
181
+ if not json_line:
182
+ raise RuntimeError(f"0G upload returned no JSON. stdout: {stdout[:200]}")
183
+
184
+ data = json.loads(json_line)
185
  if not data.get("ok"):
186
  raise RuntimeError(data.get("error", "Unknown upload error"))
187