Spaces:
Paused
Paused
rb125 commited on
Commit ·
a556b6c
1
Parent(s): 96a15a6
final demo cleanup
Browse files- cgae_engine/audit.py +41 -131
- cgae_engine/economy.py +30 -18
- cgae_engine/ens.py +8 -0
- cgae_engine/framework_clients.py +4 -5
- cgae_engine/onchain.py +1 -1
- cgae_engine/wallet.py +46 -2
- scripts/video_demo.py +93 -84
- server/live_runner.py +1 -0
- storage/upload_to_0g.mjs +2 -1
- storage/zg_store.py +16 -1
cgae_engine/audit.py
CHANGED
|
@@ -656,14 +656,18 @@ class AuditOrchestrator:
|
|
| 656 |
storage_root_hash: Optional[str] = None
|
| 657 |
storage_root_hash_real: bool = False
|
| 658 |
if cache_dir:
|
| 659 |
-
|
| 660 |
-
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 667 |
|
| 668 |
return AuditResult(
|
| 669 |
agent_id=agent_id,
|
|
@@ -689,139 +693,45 @@ class AuditOrchestrator:
|
|
| 689 |
def _run_ddft_live(
|
| 690 |
self, model_name: str, model_config: dict, cache_dir: Optional[Path]
|
| 691 |
) -> tuple[float, float]:
|
| 692 |
-
"""
|
| 693 |
-
|
| 694 |
-
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
|
| 698 |
-
|
| 699 |
-
if cached.exists():
|
| 700 |
-
data = json.loads(cached.read_text())
|
| 701 |
-
return data["er"], data["ih"]
|
| 702 |
-
|
| 703 |
-
api_keys = {
|
| 704 |
-
"AZURE_API_KEY": self.azure_api_key,
|
| 705 |
-
"AZURE_OPENAI_API_ENDPOINT": self.azure_openai_endpoint,
|
| 706 |
-
"DDFT_MODELS_ENDPOINT": self.ddft_models_endpoint,
|
| 707 |
-
"AZURE_ANTHROPIC_API_ENDPOINT": self.azure_anthropic_api_endpoint,
|
| 708 |
-
}
|
| 709 |
-
|
| 710 |
-
result = self._ddft.assess(
|
| 711 |
-
model_name=model_name,
|
| 712 |
-
model_config=model_config,
|
| 713 |
-
api_keys=api_keys,
|
| 714 |
-
concepts=["Natural Selection", "Recursion"],
|
| 715 |
-
compression_levels=[0.0, 0.5, 1.0],
|
| 716 |
-
)
|
| 717 |
-
|
| 718 |
-
er = float(result.get("er", 0.5))
|
| 719 |
-
ih = float(result.get("ih", 0.7))
|
| 720 |
-
|
| 721 |
-
if cache_dir:
|
| 722 |
-
cache_dir.mkdir(parents=True, exist_ok=True)
|
| 723 |
-
(cache_dir / f"{model_name}_ddft_live.json").write_text(
|
| 724 |
-
json.dumps({"er": er, "ih": ih,
|
| 725 |
-
"ci_score": result.get("ci_score"),
|
| 726 |
-
"phenotype": result.get("phenotype")}, indent=2)
|
| 727 |
-
)
|
| 728 |
return er, ih
|
| 729 |
|
| 730 |
def _run_cdct_live(
|
| 731 |
self, model_name: str, llm_agent: Any, cache_dir: Optional[Path]
|
| 732 |
) -> float:
|
| 733 |
-
"""
|
| 734 |
-
|
| 735 |
-
|
| 736 |
-
|
| 737 |
-
|
| 738 |
-
|
| 739 |
-
|
| 740 |
-
|
| 741 |
-
|
| 742 |
-
|
| 743 |
-
|
| 744 |
-
|
| 745 |
-
"AZURE_API_KEY": self.azure_api_key,
|
| 746 |
-
"AZURE_OPENAI_API_ENDPOINT": self.azure_openai_endpoint,
|
| 747 |
-
"DDFT_MODELS_ENDPOINT": self.ddft_models_endpoint,
|
| 748 |
-
"AZURE_ANTHROPIC_API_ENDPOINT": self.azure_anthropic_api_endpoint,
|
| 749 |
-
}
|
| 750 |
-
|
| 751 |
-
model_config = getattr(llm_agent, "model_config", {})
|
| 752 |
-
|
| 753 |
-
result = self._cdct.run_experiment(
|
| 754 |
-
model_name=model_name,
|
| 755 |
-
model_config=model_config,
|
| 756 |
-
api_keys=api_keys,
|
| 757 |
-
concept="logic_modus_ponens",
|
| 758 |
-
prompt_strategy="compression_aware",
|
| 759 |
-
evaluation_mode="balanced",
|
| 760 |
-
)
|
| 761 |
-
|
| 762 |
-
cc = float(result.get("cc", 0.5))
|
| 763 |
-
|
| 764 |
-
if cache_dir:
|
| 765 |
-
cache_dir.mkdir(parents=True, exist_ok=True)
|
| 766 |
-
(cache_dir / f"{model_name}_cdct_live.json").write_text(
|
| 767 |
-
json.dumps({"cc": cc, "model": model_name}, indent=2)
|
| 768 |
-
)
|
| 769 |
return cc
|
| 770 |
|
| 771 |
def _run_eect_live(
|
| 772 |
self, model_name: str, llm_agent: Any, cache_dir: Optional[Path]
|
| 773 |
) -> float:
|
| 774 |
-
"""
|
| 775 |
-
|
| 776 |
-
|
| 777 |
-
|
| 778 |
-
|
| 779 |
-
if
|
| 780 |
-
|
| 781 |
-
|
| 782 |
-
|
| 783 |
-
|
| 784 |
-
|
| 785 |
-
api_keys = {
|
| 786 |
-
"AZURE_API_KEY": self.azure_api_key,
|
| 787 |
-
"AZURE_OPENAI_API_ENDPOINT": self.azure_openai_endpoint,
|
| 788 |
-
"DDFT_MODELS_ENDPOINT": self.ddft_models_endpoint,
|
| 789 |
-
"AZURE_ANTHROPIC_API_ENDPOINT": self.azure_anthropic_api_endpoint,
|
| 790 |
-
}
|
| 791 |
-
|
| 792 |
-
model_config = getattr(llm_agent, "model_config", {})
|
| 793 |
-
|
| 794 |
-
# Run two dilemmas and average the AS scores
|
| 795 |
-
dilemma_ids = ["trolley_problem", "lying_to_save_lives"]
|
| 796 |
-
all_turns: list[list] = []
|
| 797 |
-
for dilemma_id in dilemma_ids:
|
| 798 |
-
try:
|
| 799 |
-
resp = self._eect.run_dialogue(
|
| 800 |
-
model_name=model_name,
|
| 801 |
-
model_config=model_config,
|
| 802 |
-
api_keys=api_keys,
|
| 803 |
-
dilemma={"id": dilemma_id},
|
| 804 |
-
compression_level="c1.0",
|
| 805 |
-
)
|
| 806 |
-
turns = resp.get("turns", [])
|
| 807 |
-
if turns:
|
| 808 |
-
all_turns.append(turns)
|
| 809 |
-
except Exception as e:
|
| 810 |
-
logger.warning(f" EECT dialogue failed for dilemma {dilemma_id}: {e}")
|
| 811 |
-
|
| 812 |
-
if not all_turns:
|
| 813 |
-
raise RuntimeError("No EECT dialogues completed successfully")
|
| 814 |
-
|
| 815 |
-
as_scores = [self._score_eect_turns(turns) for turns in all_turns]
|
| 816 |
-
as_ = sum(as_scores) / len(as_scores)
|
| 817 |
-
|
| 818 |
-
if cache_dir:
|
| 819 |
-
cache_dir.mkdir(parents=True, exist_ok=True)
|
| 820 |
-
(cache_dir / f"{model_name}_eect_live.json").write_text(
|
| 821 |
-
json.dumps({"as": as_, "model": model_name,
|
| 822 |
-
"dialogues_run": len(all_turns)}, indent=2)
|
| 823 |
-
)
|
| 824 |
-
return as_
|
| 825 |
|
| 826 |
@staticmethod
|
| 827 |
def _score_eect_turns(turns: list) -> float:
|
|
|
|
| 656 |
storage_root_hash: Optional[str] = None
|
| 657 |
storage_root_hash_real: bool = False
|
| 658 |
if cache_dir:
|
| 659 |
+
try:
|
| 660 |
+
storage_root_hash, storage_root_hash_real = _pin_audit_to_0g(
|
| 661 |
+
model_name=model_name,
|
| 662 |
+
agent_id=agent_id,
|
| 663 |
+
cache_dir=Path(cache_dir) if cache_dir else None,
|
| 664 |
+
robustness=robustness,
|
| 665 |
+
defaults_used=defaults_used,
|
| 666 |
+
errors=errors,
|
| 667 |
+
)
|
| 668 |
+
except Exception as e:
|
| 669 |
+
logger.error(f" [0g] Storage pin failed for {model_name}: {e}")
|
| 670 |
+
errors.append(f"0G_STORAGE: {e}")
|
| 671 |
|
| 672 |
return AuditResult(
|
| 673 |
agent_id=agent_id,
|
|
|
|
| 693 |
def _run_ddft_live(
|
| 694 |
self, model_name: str, model_config: dict, cache_dir: Optional[Path]
|
| 695 |
) -> tuple[float, float]:
|
| 696 |
+
"""Query DDFT /score endpoint. Returns (er, ih)."""
|
| 697 |
+
data = self._ddft.get_score(model_name)
|
| 698 |
+
er = float(data.get("ER") or data.get("er") or 0)
|
| 699 |
+
ih = float(data.get("IH") or data.get("ih") or 0)
|
| 700 |
+
if er <= 0 or ih <= 0:
|
| 701 |
+
raise RuntimeError(f"DDFT /score returned no valid ER/IH for {model_name}: {data}")
|
| 702 |
+
logger.info(f" [DDFT] GET {self._ddft.base_url}/score/{model_name} -> ER={er:.3f} IH={ih:.3f}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 703 |
return er, ih
|
| 704 |
|
| 705 |
def _run_cdct_live(
|
| 706 |
self, model_name: str, llm_agent: Any, cache_dir: Optional[Path]
|
| 707 |
) -> float:
|
| 708 |
+
"""Query CDCT /score endpoint. Returns cc. CDCT returns a list of per-concept metrics."""
|
| 709 |
+
data = self._cdct.get_score(model_name)
|
| 710 |
+
cc = None
|
| 711 |
+
if isinstance(data, list) and data:
|
| 712 |
+
cris = [float(r["CRI"]) for r in data if isinstance(r, dict) and "CRI" in r]
|
| 713 |
+
if cris:
|
| 714 |
+
cc = min(cris)
|
| 715 |
+
elif isinstance(data, dict):
|
| 716 |
+
cc = self._extract_score(data, "cc", model_name=model_name)
|
| 717 |
+
if cc is None or cc <= 0:
|
| 718 |
+
raise RuntimeError(f"CDCT /score returned no valid CC for {model_name}: {data}")
|
| 719 |
+
logger.info(f" [CDCT] GET {self._cdct.base_url}/score/{model_name} -> CC={cc:.3f}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 720 |
return cc
|
| 721 |
|
| 722 |
def _run_eect_live(
|
| 723 |
self, model_name: str, llm_agent: Any, cache_dir: Optional[Path]
|
| 724 |
) -> float:
|
| 725 |
+
"""Query AGT/EECT /score endpoint. Returns as_score."""
|
| 726 |
+
data = self._eect.get_score(model_name)
|
| 727 |
+
as_ = None
|
| 728 |
+
if isinstance(data, dict):
|
| 729 |
+
as_ = data.get("as_") or data.get("as_score") or data.get("AS") or data.get("as")
|
| 730 |
+
if as_ is not None and float(as_) > 0:
|
| 731 |
+
as_ = float(as_)
|
| 732 |
+
logger.info(f" [AGT] GET {self._eect.base_url}/score/{model_name} -> AS={as_:.3f}")
|
| 733 |
+
return as_
|
| 734 |
+
raise RuntimeError(f"AGT /score returned no valid AS for {model_name}: {data}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 735 |
|
| 736 |
@staticmethod
|
| 737 |
def _score_eect_turns(turns: list) -> float:
|
cgae_engine/economy.py
CHANGED
|
@@ -325,7 +325,7 @@ class Economy:
|
|
| 325 |
# Create an ETH wallet for this agent if wallet manager is available
|
| 326 |
wallet_address = None
|
| 327 |
if self.wallet_manager:
|
| 328 |
-
wallet = self.wallet_manager.create_agent_wallet(record.agent_id)
|
| 329 |
wallet_address = wallet.address
|
| 330 |
record.wallet_address = wallet_address
|
| 331 |
|
|
@@ -378,26 +378,38 @@ class Economy:
|
|
| 378 |
# Write certification on-chain if bridge is available
|
| 379 |
onchain_tx = None
|
| 380 |
if self.onchain_bridge and record.wallet_address:
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
|
| 390 |
# Write robustness credentials to ENS text records
|
| 391 |
if self.ens_manager:
|
| 392 |
-
|
| 393 |
-
self.ens_manager.
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 401 |
|
| 402 |
self._log("agent_audited", {
|
| 403 |
"agent_id": agent_id,
|
|
|
|
| 325 |
# Create an ETH wallet for this agent if wallet manager is available
|
| 326 |
wallet_address = None
|
| 327 |
if self.wallet_manager:
|
| 328 |
+
wallet = self.wallet_manager.create_agent_wallet(record.agent_id, model_name)
|
| 329 |
wallet_address = wallet.address
|
| 330 |
record.wallet_address = wallet_address
|
| 331 |
|
|
|
|
| 378 |
# Write certification on-chain if bridge is available
|
| 379 |
onchain_tx = None
|
| 380 |
if self.onchain_bridge and record.wallet_address:
|
| 381 |
+
# Skip if already certified at this tier on-chain
|
| 382 |
+
ens_tier = ""
|
| 383 |
+
if self.ens_manager:
|
| 384 |
+
ens_name = self.ens_manager.get_agent_name(agent_id)
|
| 385 |
+
if ens_name:
|
| 386 |
+
ens_tier = self.ens_manager.resolve_text(ens_name, "cgae.tier")
|
| 387 |
+
if ens_tier != cert.tier.name:
|
| 388 |
+
audit_hash = (audit_details or {}).get("storage_root_hash", "")
|
| 389 |
+
onchain_tx = self.onchain_bridge.certify_agent(
|
| 390 |
+
agent_address=record.wallet_address,
|
| 391 |
+
cc=robustness.cc, er=robustness.er,
|
| 392 |
+
as_=robustness.as_, ih=robustness.ih,
|
| 393 |
+
audit_type=audit_type,
|
| 394 |
+
audit_hash=audit_hash or "",
|
| 395 |
+
)
|
| 396 |
|
| 397 |
# Write robustness credentials to ENS text records
|
| 398 |
if self.ens_manager:
|
| 399 |
+
ens_name = self.ens_manager.get_agent_name(agent_id)
|
| 400 |
+
existing_tier = self.ens_manager.resolve_text(ens_name, "cgae.tier") if ens_name else ""
|
| 401 |
+
if existing_tier != cert.tier.name:
|
| 402 |
+
audit_hash = (audit_details or {}).get("storage_root_hash", "")
|
| 403 |
+
self.ens_manager.set_agent_credentials(
|
| 404 |
+
agent_id=agent_id,
|
| 405 |
+
tier=cert.tier.name,
|
| 406 |
+
cc=robustness.cc, er=robustness.er,
|
| 407 |
+
as_=robustness.as_, ih=robustness.ih,
|
| 408 |
+
wallet_address=record.wallet_address or "",
|
| 409 |
+
audit_hash=audit_hash,
|
| 410 |
+
)
|
| 411 |
+
else:
|
| 412 |
+
logger.info(f" [ens] Skipping text record update for {ens_name} (tier unchanged: {existing_tier})")
|
| 413 |
|
| 414 |
self._log("agent_audited", {
|
| 415 |
"agent_id": agent_id,
|
cgae_engine/ens.py
CHANGED
|
@@ -161,6 +161,7 @@ class ENSManager:
|
|
| 161 |
def create_subname(self, agent_id: str, model_name: str, owner: str) -> Optional[str]:
|
| 162 |
"""
|
| 163 |
Create a subname like gpt5.cgaeprotocol.eth for an agent.
|
|
|
|
| 164 |
Returns the full ENS name or None on failure.
|
| 165 |
"""
|
| 166 |
label = _slugify(model_name)
|
|
@@ -171,6 +172,13 @@ class ENSManager:
|
|
| 171 |
self._subnames[agent_id] = full_name
|
| 172 |
return full_name
|
| 173 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
try:
|
| 175 |
nonce = self.w3.eth.get_transaction_count(self._account.address)
|
| 176 |
# setSubnodeRecord creates the subname + sets resolver in one tx
|
|
|
|
| 161 |
def create_subname(self, agent_id: str, model_name: str, owner: str) -> Optional[str]:
|
| 162 |
"""
|
| 163 |
Create a subname like gpt5.cgaeprotocol.eth for an agent.
|
| 164 |
+
If the subname already exists (has a cgae.tier record), reuse it.
|
| 165 |
Returns the full ENS name or None on failure.
|
| 166 |
"""
|
| 167 |
label = _slugify(model_name)
|
|
|
|
| 172 |
self._subnames[agent_id] = full_name
|
| 173 |
return full_name
|
| 174 |
|
| 175 |
+
# Check if subname already exists by reading a text record
|
| 176 |
+
existing_tier = self.resolve_text(full_name, "cgae.tier")
|
| 177 |
+
if existing_tier:
|
| 178 |
+
logger.info(f" [ens] Reusing existing {full_name} (tier={existing_tier})")
|
| 179 |
+
self._subnames[agent_id] = full_name
|
| 180 |
+
return full_name
|
| 181 |
+
|
| 182 |
try:
|
| 183 |
nonce = self.w3.eth.get_transaction_count(self._account.address)
|
| 184 |
# setSubnodeRecord creates the subname + sets resolver in one tx
|
cgae_engine/framework_clients.py
CHANGED
|
@@ -230,17 +230,16 @@ class EECTClient:
|
|
| 230 |
Run a single Socratic ethical dialogue for one dilemma.
|
| 231 |
|
| 232 |
Returns a dict with:
|
| 233 |
-
turns
|
| 234 |
"""
|
| 235 |
url = f"{self.base_url}/dialogue"
|
|
|
|
| 236 |
payload = {
|
| 237 |
"model_name": model_name,
|
| 238 |
-
"
|
| 239 |
-
"api_keys": api_keys,
|
| 240 |
-
"dilemma": dilemma,
|
| 241 |
"compression_level": compression_level,
|
| 242 |
}
|
| 243 |
-
logger.debug(f"[EECT] POST {url} model={model_name}
|
| 244 |
return _post(url, payload)
|
| 245 |
|
| 246 |
def get_score(self, model_name: str) -> dict:
|
|
|
|
| 230 |
Run a single Socratic ethical dialogue for one dilemma.
|
| 231 |
|
| 232 |
Returns a dict with:
|
| 233 |
+
turns - list of dialogue turn dicts (role, response, ...)
|
| 234 |
"""
|
| 235 |
url = f"{self.base_url}/dialogue"
|
| 236 |
+
dilemma_id = dilemma.get("id", dilemma) if isinstance(dilemma, dict) else str(dilemma)
|
| 237 |
payload = {
|
| 238 |
"model_name": model_name,
|
| 239 |
+
"dilemma_id": dilemma_id,
|
|
|
|
|
|
|
| 240 |
"compression_level": compression_level,
|
| 241 |
}
|
| 242 |
+
logger.debug(f"[EECT] POST {url} model={model_name} dilemma_id={dilemma_id}")
|
| 243 |
return _post(url, payload)
|
| 244 |
|
| 245 |
def get_score(self, model_name: str) -> dict:
|
cgae_engine/onchain.py
CHANGED
|
@@ -241,7 +241,7 @@ class EscrowBridge:
|
|
| 241 |
self._tx_log.append({"tx_hash": tx_hash.hex(), "status": status})
|
| 242 |
return tx_hash.hex()
|
| 243 |
except Exception as e:
|
| 244 |
-
logger.
|
| 245 |
self._tx_log.append({"error": str(e)})
|
| 246 |
return None
|
| 247 |
|
|
|
|
| 241 |
self._tx_log.append({"tx_hash": tx_hash.hex(), "status": status})
|
| 242 |
return tx_hash.hex()
|
| 243 |
except Exception as e:
|
| 244 |
+
logger.warning(f" [escrow] tx skipped (insufficient gas or network error): {e}")
|
| 245 |
self._tx_log.append({"error": str(e)})
|
| 246 |
return None
|
| 247 |
|
cgae_engine/wallet.py
CHANGED
|
@@ -57,6 +57,7 @@ class WalletManager:
|
|
| 57 |
rpc_url: Optional[str] = None,
|
| 58 |
treasury_private_key: Optional[str] = None,
|
| 59 |
dry_run: bool = False,
|
|
|
|
| 60 |
):
|
| 61 |
self.rpc_url = rpc_url or os.getenv("ZG_RPC_URL", "https://evmrpc-testnet.0g.ai")
|
| 62 |
self._treasury_key = treasury_private_key or os.getenv("PRIVATE_KEY")
|
|
@@ -65,6 +66,7 @@ class WalletManager:
|
|
| 65 |
self.w3 = Web3(Web3.HTTPProvider(self.rpc_url))
|
| 66 |
self._wallets: dict[str, AgentWallet] = {} # agent_id -> wallet
|
| 67 |
self._disbursements: list[dict] = []
|
|
|
|
| 68 |
|
| 69 |
if self._treasury_key:
|
| 70 |
key = self._treasury_key if self._treasury_key.startswith("0x") else f"0x{self._treasury_key}"
|
|
@@ -74,16 +76,31 @@ class WalletManager:
|
|
| 74 |
self._treasury_account = None
|
| 75 |
self.treasury_address = None
|
| 76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
@property
|
| 78 |
def is_live(self) -> bool:
|
| 79 |
"""True if we can send real transactions."""
|
| 80 |
return self._treasury_account is not None and not self.dry_run
|
| 81 |
|
| 82 |
-
def create_agent_wallet(self, agent_id: str) -> AgentWallet:
|
| 83 |
-
"""
|
| 84 |
if agent_id in self._wallets:
|
| 85 |
return self._wallets[agent_id]
|
| 86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
acct = Account.create()
|
| 88 |
wallet = AgentWallet(
|
| 89 |
agent_id=agent_id,
|
|
@@ -91,9 +108,36 @@ class WalletManager:
|
|
| 91 |
private_key=acct.key.hex() if isinstance(acct.key, bytes) else acct.key,
|
| 92 |
)
|
| 93 |
self._wallets[agent_id] = wallet
|
|
|
|
|
|
|
|
|
|
| 94 |
logger.info(f" [wallet] Created wallet for {agent_id}: {wallet.address}")
|
| 95 |
return wallet
|
| 96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
def get_wallet(self, agent_id: str) -> Optional[AgentWallet]:
|
| 98 |
return self._wallets.get(agent_id)
|
| 99 |
|
|
|
|
| 57 |
rpc_url: Optional[str] = None,
|
| 58 |
treasury_private_key: Optional[str] = None,
|
| 59 |
dry_run: bool = False,
|
| 60 |
+
wallet_store_path: Optional[str] = None,
|
| 61 |
):
|
| 62 |
self.rpc_url = rpc_url or os.getenv("ZG_RPC_URL", "https://evmrpc-testnet.0g.ai")
|
| 63 |
self._treasury_key = treasury_private_key or os.getenv("PRIVATE_KEY")
|
|
|
|
| 66 |
self.w3 = Web3(Web3.HTTPProvider(self.rpc_url))
|
| 67 |
self._wallets: dict[str, AgentWallet] = {} # agent_id -> wallet
|
| 68 |
self._disbursements: list[dict] = []
|
| 69 |
+
self._store_path = Path(wallet_store_path) if wallet_store_path else Path("server/live_results/wallets.json")
|
| 70 |
|
| 71 |
if self._treasury_key:
|
| 72 |
key = self._treasury_key if self._treasury_key.startswith("0x") else f"0x{self._treasury_key}"
|
|
|
|
| 76 |
self._treasury_account = None
|
| 77 |
self.treasury_address = None
|
| 78 |
|
| 79 |
+
# Load persisted wallets from disk
|
| 80 |
+
self._model_wallets: dict[str, AgentWallet] = {} # model_name -> wallet
|
| 81 |
+
self._load_wallets()
|
| 82 |
+
|
| 83 |
@property
|
| 84 |
def is_live(self) -> bool:
|
| 85 |
"""True if we can send real transactions."""
|
| 86 |
return self._treasury_account is not None and not self.dry_run
|
| 87 |
|
| 88 |
+
def create_agent_wallet(self, agent_id: str, model_name: str = "") -> AgentWallet:
|
| 89 |
+
"""Get existing wallet for this model or generate a new keypair."""
|
| 90 |
if agent_id in self._wallets:
|
| 91 |
return self._wallets[agent_id]
|
| 92 |
|
| 93 |
+
# Reuse persisted wallet for this model if it exists
|
| 94 |
+
if model_name and model_name in self._model_wallets:
|
| 95 |
+
wallet = AgentWallet(
|
| 96 |
+
agent_id=agent_id,
|
| 97 |
+
address=self._model_wallets[model_name].address,
|
| 98 |
+
private_key=self._model_wallets[model_name].private_key,
|
| 99 |
+
)
|
| 100 |
+
self._wallets[agent_id] = wallet
|
| 101 |
+
logger.info(f" [wallet] Loaded existing wallet for {agent_id}: {wallet.address}")
|
| 102 |
+
return wallet
|
| 103 |
+
|
| 104 |
acct = Account.create()
|
| 105 |
wallet = AgentWallet(
|
| 106 |
agent_id=agent_id,
|
|
|
|
| 108 |
private_key=acct.key.hex() if isinstance(acct.key, bytes) else acct.key,
|
| 109 |
)
|
| 110 |
self._wallets[agent_id] = wallet
|
| 111 |
+
if model_name:
|
| 112 |
+
self._model_wallets[model_name] = wallet
|
| 113 |
+
self._save_wallets()
|
| 114 |
logger.info(f" [wallet] Created wallet for {agent_id}: {wallet.address}")
|
| 115 |
return wallet
|
| 116 |
|
| 117 |
+
def _load_wallets(self):
|
| 118 |
+
"""Load persisted model->wallet mapping from disk."""
|
| 119 |
+
if self._store_path.exists():
|
| 120 |
+
try:
|
| 121 |
+
data = json.loads(self._store_path.read_text())
|
| 122 |
+
for model_name, w in data.items():
|
| 123 |
+
self._model_wallets[model_name] = AgentWallet(
|
| 124 |
+
agent_id=w.get("agent_id", ""),
|
| 125 |
+
address=w["address"],
|
| 126 |
+
private_key=w["private_key"],
|
| 127 |
+
)
|
| 128 |
+
logger.info(f" [wallet] Loaded {len(self._model_wallets)} persisted wallets")
|
| 129 |
+
except Exception as e:
|
| 130 |
+
logger.warning(f" [wallet] Could not load wallets: {e}")
|
| 131 |
+
|
| 132 |
+
def _save_wallets(self):
|
| 133 |
+
"""Persist model->wallet mapping to disk (unredacted keys)."""
|
| 134 |
+
self._store_path.parent.mkdir(parents=True, exist_ok=True)
|
| 135 |
+
data = {
|
| 136 |
+
model: {"agent_id": w.agent_id, "address": w.address, "private_key": w.private_key}
|
| 137 |
+
for model, w in self._model_wallets.items()
|
| 138 |
+
}
|
| 139 |
+
self._store_path.write_text(json.dumps(data, indent=2))
|
| 140 |
+
|
| 141 |
def get_wallet(self, agent_id: str) -> Optional[AgentWallet]:
|
| 142 |
return self._wallets.get(agent_id)
|
| 143 |
|
scripts/video_demo.py
CHANGED
|
@@ -44,7 +44,7 @@ def section(title: str):
|
|
| 44 |
|
| 45 |
def main():
|
| 46 |
parser = argparse.ArgumentParser()
|
| 47 |
-
parser.add_argument("--rounds", type=int, default=
|
| 48 |
parser.add_argument("--port", type=int, default=8000)
|
| 49 |
parser.add_argument("--skip-audit", action="store_true")
|
| 50 |
args = parser.parse_args()
|
|
@@ -107,15 +107,26 @@ def main():
|
|
| 107 |
|
| 108 |
runner.setup()
|
| 109 |
|
| 110 |
-
#
|
|
|
|
| 111 |
for agent_id, model_name in runner.agent_model_map.items():
|
| 112 |
record = runner.economy.registry.get_agent(agent_id)
|
| 113 |
-
if
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
time.sleep(2)
|
| 121 |
|
|
@@ -169,39 +180,87 @@ def main():
|
|
| 169 |
runner._emit_protocol_event = patched_emit
|
| 170 |
|
| 171 |
# ---------------------------------------------------------------------------
|
| 172 |
-
# Per-round scripted narrative:
|
| 173 |
-
# R1 -
|
| 174 |
-
# R2 -
|
| 175 |
-
# R3 - GPT-5.4 invests in robustness -> upgrade to T3
|
| 176 |
-
# R4 - Spot audit: temporal decay demotes grok + spoof blocked
|
| 177 |
-
# R5 - Post-upgrade: GPT-5.4 earns more at T3, economy stabilises
|
| 178 |
# ---------------------------------------------------------------------------
|
| 179 |
|
| 180 |
# Disable random circumvention/delegation - we script them per round
|
| 181 |
runner.config.circumvention_rate = 0.0
|
| 182 |
runner.config.delegation_rate = 0.0
|
| 183 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
for round_num in range(args.rounds):
|
|
|
|
| 185 |
runner._reactivate_suspended_agents()
|
| 186 |
|
| 187 |
# ---- Round-specific scripted events ----
|
| 188 |
if round_num == 0:
|
| 189 |
-
# R1:
|
| 190 |
runner.config.circumvention_rate = 1.0
|
| 191 |
-
runner.config.delegation_rate = 0.0
|
| 192 |
-
elif round_num == 1:
|
| 193 |
-
# R2: force delegation, no circumvention
|
| 194 |
-
runner.config.circumvention_rate = 0.0
|
| 195 |
runner.config.delegation_rate = 1.0
|
| 196 |
-
elif round_num ==
|
| 197 |
-
#
|
| 198 |
runner.config.circumvention_rate = 0.0
|
| 199 |
runner.config.delegation_rate = 0.0
|
| 200 |
-
|
| 201 |
-
# R4: grok spoof attempt + spot audit demotion
|
| 202 |
-
runner.config.circumvention_rate = 1.0
|
| 203 |
-
runner.config.delegation_rate = 0.0
|
| 204 |
-
# Force temporal decay to trigger a demotion on grok
|
| 205 |
grok_id = next((aid for aid, m in runner.agent_model_map.items() if m == "grok-4-20-reasoning"), None)
|
| 206 |
if grok_id:
|
| 207 |
rec = runner.economy.registry.get_agent(grok_id)
|
|
@@ -226,17 +285,13 @@ def main():
|
|
| 226 |
f"grok-4-20-reasoning demoted {old_tier.name} -> {new_tier.name} after spot audit (temporal decay).",
|
| 227 |
old_tier=old_tier.name, new_tier=new_tier.name,
|
| 228 |
)
|
| 229 |
-
elif round_num == 4:
|
| 230 |
-
# R5: clean round, no adversarial - show stable economy
|
| 231 |
-
runner.config.circumvention_rate = 0.0
|
| 232 |
-
runner.config.delegation_rate = 0.0
|
| 233 |
|
| 234 |
round_results = runner._run_round(round_num)
|
| 235 |
runner._round_summaries.append(round_results)
|
| 236 |
runner.economy.step()
|
| 237 |
|
| 238 |
-
#
|
| 239 |
-
if round_num ==
|
| 240 |
gpt_id = next((aid for aid, m in runner.agent_model_map.items() if m == "gpt-5.4"), None)
|
| 241 |
if gpt_id:
|
| 242 |
rec = runner.economy.registry.get_agent(gpt_id)
|
|
@@ -263,53 +318,10 @@ def main():
|
|
| 263 |
old_tier=old_tier.name, new_tier=new_tier.name,
|
| 264 |
)
|
| 265 |
|
| 266 |
-
#
|
| 267 |
-
|
| 268 |
-
agents_snap = {}
|
| 269 |
-
for aid, mname in runner.agent_model_map.items():
|
| 270 |
-
rec = runner.economy.registry.get_agent(aid)
|
| 271 |
-
if not rec:
|
| 272 |
-
continue
|
| 273 |
-
rv = rec.current_robustness
|
| 274 |
-
agents_snap[aid] = {
|
| 275 |
-
"agent_id": aid, "model_name": mname,
|
| 276 |
-
"strategy": _strat(runner, mname),
|
| 277 |
-
"current_tier": rec.current_tier.value,
|
| 278 |
-
"balance": rec.balance, "total_earned": rec.total_earned,
|
| 279 |
-
"total_penalties": rec.total_penalties,
|
| 280 |
-
"contracts_completed": rec.contracts_completed,
|
| 281 |
-
"contracts_failed": rec.contracts_failed,
|
| 282 |
-
"status": rec.status.value,
|
| 283 |
-
"wallet_address": rec.wallet_address,
|
| 284 |
-
"ens_name": runner.economy.ens_manager.get_agent_name(aid) if runner.economy.ens_manager else None,
|
| 285 |
-
"robustness": {"cc":rv.cc,"er":rv.er,"as_":rv.as_,"ih":rv.ih} if rv else None,
|
| 286 |
-
}
|
| 287 |
-
trades = [{
|
| 288 |
-
"round": round_num, "agent": tr["agent"],
|
| 289 |
-
"task_id": tr["task_id"], "task_prompt": tr.get("task_prompt", ""),
|
| 290 |
-
"tier": tr["tier"], "domain": tr["domain"],
|
| 291 |
-
"passed": tr["verification"]["overall_pass"],
|
| 292 |
-
"reward": tr["settlement"].get("reward", 0) if tr["settlement"] else 0,
|
| 293 |
-
"penalty": tr["settlement"].get("penalty", 0) if tr["settlement"] else 0,
|
| 294 |
-
"token_cost": tr.get("token_cost_eth", 0),
|
| 295 |
-
"latency_ms": tr.get("latency_ms", 0),
|
| 296 |
-
"output_preview": tr.get("output_preview", ""),
|
| 297 |
-
"constraints_passed": tr["verification"].get("constraints_passed", []),
|
| 298 |
-
"constraints_failed": tr["verification"].get("constraints_failed", []),
|
| 299 |
-
} for tr in round_results.get("task_results", [])]
|
| 300 |
-
|
| 301 |
with api._state_lock:
|
| 302 |
-
|
| 303 |
-
api._state["economy"] = {
|
| 304 |
-
"aggregate_safety": safety,
|
| 305 |
-
"active_agents": len(runner.economy.registry.active_agents),
|
| 306 |
-
"total_balance": sum(a["balance"] for a in agents_snap.values()),
|
| 307 |
-
"total_earned": sum(a["total_earned"] for a in agents_snap.values()),
|
| 308 |
-
"contracts_completed": sum(a["contracts_completed"] for a in agents_snap.values()),
|
| 309 |
-
"contracts_failed": sum(a["contracts_failed"] for a in agents_snap.values()),
|
| 310 |
-
}
|
| 311 |
-
api._state["agents"] = agents_snap
|
| 312 |
-
api._state["trades"] = (api._state["trades"] + trades)[-500:]
|
| 313 |
api._state["time_series"]["safety"].append(safety)
|
| 314 |
api._state["time_series"]["balance"].append(api._state["economy"]["total_balance"])
|
| 315 |
api._state["time_series"]["rewards"].append(round_results.get("total_reward", 0))
|
|
@@ -322,11 +334,8 @@ def main():
|
|
| 322 |
reward = round_results["total_reward"]
|
| 323 |
penalty = round_results["total_penalty"]
|
| 324 |
themes = {
|
| 325 |
-
0: "
|
| 326 |
-
1: "
|
| 327 |
-
2: "Robustness Investment -> Upgrade",
|
| 328 |
-
3: "Spot Audit + Demotion",
|
| 329 |
-
4: "Stable Economy",
|
| 330 |
}
|
| 331 |
theme = themes.get(round_num, "")
|
| 332 |
label = f" Round {round_num+1}/{args.rounds} "
|
|
@@ -454,7 +463,7 @@ if __name__ == "__main__":
|
|
| 454 |
import server.api as api
|
| 455 |
|
| 456 |
parser = argparse.ArgumentParser()
|
| 457 |
-
parser.add_argument("--rounds", type=int, default=
|
| 458 |
parser.add_argument("--port", type=int, default=8000)
|
| 459 |
parser.add_argument("--skip-audit", action="store_true")
|
| 460 |
args_pre = parser.parse_known_args()[0]
|
|
|
|
| 44 |
|
| 45 |
def main():
|
| 46 |
parser = argparse.ArgumentParser()
|
| 47 |
+
parser.add_argument("--rounds", type=int, default=2)
|
| 48 |
parser.add_argument("--port", type=int, default=8000)
|
| 49 |
parser.add_argument("--skip-audit", action="store_true")
|
| 50 |
args = parser.parse_args()
|
|
|
|
| 107 |
|
| 108 |
runner.setup()
|
| 109 |
|
| 110 |
+
# Print audit summary with highlights
|
| 111 |
+
print()
|
| 112 |
for agent_id, model_name in runner.agent_model_map.items():
|
| 113 |
record = runner.economy.registry.get_agent(agent_id)
|
| 114 |
+
if not record:
|
| 115 |
+
continue
|
| 116 |
+
r = record.current_robustness
|
| 117 |
+
wallet = record.wallet_address or "n/a"
|
| 118 |
+
ens = runner.economy.ens_manager.get_agent_name(agent_id) if runner.economy.ens_manager else "n/a"
|
| 119 |
+
cid = record.audit_cid or "n/a"
|
| 120 |
+
tier = record.current_tier.name
|
| 121 |
+
print(f" \033[1;32m\u2713\033[0m \033[1m{model_name}\033[0m")
|
| 122 |
+
print(f" Wallet: {wallet}")
|
| 123 |
+
print(f" ENS: {ens}")
|
| 124 |
+
if r:
|
| 125 |
+
print(f" Scores: CC={r.cc:.3f} ER={r.er:.3f} AS={r.as_:.3f} IH={r.ih:.3f} \033[1;33m-> {tier}\033[0m")
|
| 126 |
+
if cid != "n/a":
|
| 127 |
+
print(f" 0G Hash: {cid[:32]}...")
|
| 128 |
+
print()
|
| 129 |
+
time.sleep(0.5)
|
| 130 |
|
| 131 |
time.sleep(2)
|
| 132 |
|
|
|
|
| 180 |
runner._emit_protocol_event = patched_emit
|
| 181 |
|
| 182 |
# ---------------------------------------------------------------------------
|
| 183 |
+
# Per-round scripted narrative (2 rounds, all scenarios covered):
|
| 184 |
+
# R1 - Circumvention blocked + delegation blocked + normal trading
|
| 185 |
+
# R2 - GPT-5.4 upgrade + grok demotion (spot audit) + normal trading
|
|
|
|
|
|
|
|
|
|
| 186 |
# ---------------------------------------------------------------------------
|
| 187 |
|
| 188 |
# Disable random circumvention/delegation - we script them per round
|
| 189 |
runner.config.circumvention_rate = 0.0
|
| 190 |
runner.config.delegation_rate = 0.0
|
| 191 |
|
| 192 |
+
def _push_api_state(round_num):
|
| 193 |
+
"""Push current state to the dashboard API after each task."""
|
| 194 |
+
safety = runner.economy.aggregate_safety()
|
| 195 |
+
agents_snap = {}
|
| 196 |
+
for aid, mname in runner.agent_model_map.items():
|
| 197 |
+
rec = runner.economy.registry.get_agent(aid)
|
| 198 |
+
if not rec:
|
| 199 |
+
continue
|
| 200 |
+
rv = rec.current_robustness
|
| 201 |
+
agents_snap[aid] = {
|
| 202 |
+
"agent_id": aid, "model_name": mname,
|
| 203 |
+
"strategy": _strat(runner, mname),
|
| 204 |
+
"current_tier": rec.current_tier.value,
|
| 205 |
+
"balance": rec.balance, "total_earned": rec.total_earned,
|
| 206 |
+
"total_penalties": rec.total_penalties,
|
| 207 |
+
"contracts_completed": rec.contracts_completed,
|
| 208 |
+
"contracts_failed": rec.contracts_failed,
|
| 209 |
+
"status": rec.status.value,
|
| 210 |
+
"wallet_address": rec.wallet_address,
|
| 211 |
+
"ens_name": runner.economy.ens_manager.get_agent_name(aid) if runner.economy.ens_manager else None,
|
| 212 |
+
"robustness": {"cc":rv.cc,"er":rv.er,"as_":rv.as_,"ih":rv.ih} if rv else None,
|
| 213 |
+
}
|
| 214 |
+
trades = [{
|
| 215 |
+
"round": tr.get("_round", round_num), "agent": tr["agent"],
|
| 216 |
+
"task_id": tr["task_id"], "task_prompt": tr.get("task_prompt", ""),
|
| 217 |
+
"tier": tr["tier"], "domain": tr["domain"],
|
| 218 |
+
"passed": tr["verification"]["overall_pass"],
|
| 219 |
+
"reward": tr["settlement"].get("reward", 0) if tr["settlement"] else 0,
|
| 220 |
+
"penalty": tr["settlement"].get("penalty", 0) if tr["settlement"] else 0,
|
| 221 |
+
"token_cost": tr.get("token_cost_eth", 0),
|
| 222 |
+
"latency_ms": tr.get("latency_ms", 0),
|
| 223 |
+
"output_preview": tr.get("output_preview", ""),
|
| 224 |
+
"constraints_passed": tr["verification"].get("constraints_passed", []),
|
| 225 |
+
"constraints_failed": tr["verification"].get("constraints_failed", []),
|
| 226 |
+
} for tr in runner._results]
|
| 227 |
+
|
| 228 |
+
with api._state_lock:
|
| 229 |
+
api._state["round"] = round_num + 1
|
| 230 |
+
api._state["economy"] = {
|
| 231 |
+
"aggregate_safety": safety,
|
| 232 |
+
"active_agents": len(runner.economy.registry.active_agents),
|
| 233 |
+
"total_balance": sum(a["balance"] for a in agents_snap.values()),
|
| 234 |
+
"total_earned": sum(a["total_earned"] for a in agents_snap.values()),
|
| 235 |
+
"contracts_completed": sum(a["contracts_completed"] for a in agents_snap.values()),
|
| 236 |
+
"contracts_failed": sum(a["contracts_failed"] for a in agents_snap.values()),
|
| 237 |
+
}
|
| 238 |
+
api._state["agents"] = agents_snap
|
| 239 |
+
api._state["trades"] = trades[-500:]
|
| 240 |
+
|
| 241 |
+
# Replace runner._results with a live-updating list
|
| 242 |
+
_current_round = [0]
|
| 243 |
+
class _LiveResults(list):
|
| 244 |
+
def append(self, item):
|
| 245 |
+
item["_round"] = _current_round[0]
|
| 246 |
+
super().append(item)
|
| 247 |
+
_push_api_state(_current_round[0])
|
| 248 |
+
runner._results = _LiveResults(runner._results)
|
| 249 |
+
|
| 250 |
for round_num in range(args.rounds):
|
| 251 |
+
_current_round[0] = round_num
|
| 252 |
runner._reactivate_suspended_agents()
|
| 253 |
|
| 254 |
# ---- Round-specific scripted events ----
|
| 255 |
if round_num == 0:
|
| 256 |
+
# R1: circumvention + delegation (both blocked for adversarial)
|
| 257 |
runner.config.circumvention_rate = 1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
runner.config.delegation_rate = 1.0
|
| 259 |
+
elif round_num == 1:
|
| 260 |
+
# R2: spot audit demotion for grok, then upgrade for GPT-5.4
|
| 261 |
runner.config.circumvention_rate = 0.0
|
| 262 |
runner.config.delegation_rate = 0.0
|
| 263 |
+
# Force temporal decay demotion on grok
|
|
|
|
|
|
|
|
|
|
|
|
|
| 264 |
grok_id = next((aid for aid, m in runner.agent_model_map.items() if m == "grok-4-20-reasoning"), None)
|
| 265 |
if grok_id:
|
| 266 |
rec = runner.economy.registry.get_agent(grok_id)
|
|
|
|
| 285 |
f"grok-4-20-reasoning demoted {old_tier.name} -> {new_tier.name} after spot audit (temporal decay).",
|
| 286 |
old_tier=old_tier.name, new_tier=new_tier.name,
|
| 287 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
|
| 289 |
round_results = runner._run_round(round_num)
|
| 290 |
runner._round_summaries.append(round_results)
|
| 291 |
runner.economy.step()
|
| 292 |
|
| 293 |
+
# R2 post-round: forced upgrade for GPT-5.4
|
| 294 |
+
if round_num == 1:
|
| 295 |
gpt_id = next((aid for aid, m in runner.agent_model_map.items() if m == "gpt-5.4"), None)
|
| 296 |
if gpt_id:
|
| 297 |
rec = runner.economy.registry.get_agent(gpt_id)
|
|
|
|
| 318 |
old_tier=old_tier.name, new_tier=new_tier.name,
|
| 319 |
)
|
| 320 |
|
| 321 |
+
# Final push + time series update for this round
|
| 322 |
+
_push_api_state(round_num)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
with api._state_lock:
|
| 324 |
+
safety = runner.economy.aggregate_safety()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 325 |
api._state["time_series"]["safety"].append(safety)
|
| 326 |
api._state["time_series"]["balance"].append(api._state["economy"]["total_balance"])
|
| 327 |
api._state["time_series"]["rewards"].append(round_results.get("total_reward", 0))
|
|
|
|
| 334 |
reward = round_results["total_reward"]
|
| 335 |
penalty = round_results["total_penalty"]
|
| 336 |
themes = {
|
| 337 |
+
0: "Circumvention + Delegation Blocked",
|
| 338 |
+
1: "Upgrade + Demotion",
|
|
|
|
|
|
|
|
|
|
| 339 |
}
|
| 340 |
theme = themes.get(round_num, "")
|
| 341 |
label = f" Round {round_num+1}/{args.rounds} "
|
|
|
|
| 463 |
import server.api as api
|
| 464 |
|
| 465 |
parser = argparse.ArgumentParser()
|
| 466 |
+
parser.add_argument("--rounds", type=int, default=2)
|
| 467 |
parser.add_argument("--port", type=int, default=8000)
|
| 468 |
parser.add_argument("--skip-audit", action="store_true")
|
| 469 |
args_pre = parser.parse_known_args()[0]
|
server/live_runner.py
CHANGED
|
@@ -1248,6 +1248,7 @@ class LiveSimulationRunner:
|
|
| 1248 |
"executed_by_agent_id": execution_agent_id,
|
| 1249 |
"executed_by_model": execution_model_name,
|
| 1250 |
"task_id": task.task_id,
|
|
|
|
| 1251 |
"tier": task.tier.name,
|
| 1252 |
"domain": task.domain,
|
| 1253 |
"proof_cid": cid,
|
|
|
|
| 1248 |
"executed_by_agent_id": execution_agent_id,
|
| 1249 |
"executed_by_model": execution_model_name,
|
| 1250 |
"task_id": task.task_id,
|
| 1251 |
+
"task_prompt": task.prompt,
|
| 1252 |
"tier": task.tier.name,
|
| 1253 |
"domain": task.domain,
|
| 1254 |
"proof_cid": cid,
|
storage/upload_to_0g.mjs
CHANGED
|
@@ -94,7 +94,8 @@ async function main() {
|
|
| 94 |
try {
|
| 95 |
const [hash, uploadErr] = await indexer.upload(file, RPC_URL, signer);
|
| 96 |
if (uploadErr) throw new Error(String(uploadErr));
|
| 97 |
-
rootHash
|
|
|
|
| 98 |
} catch (e) {
|
| 99 |
writeError(`Upload failed: ${e.message}`);
|
| 100 |
process.exit(1);
|
|
|
|
| 94 |
try {
|
| 95 |
const [hash, uploadErr] = await indexer.upload(file, RPC_URL, signer);
|
| 96 |
if (uploadErr) throw new Error(String(uploadErr));
|
| 97 |
+
// SDK may return a string or an object {rootHash, txHash, txSeq}
|
| 98 |
+
rootHash = (typeof hash === "object" && hash !== null) ? hash.rootHash : hash;
|
| 99 |
} catch (e) {
|
| 100 |
writeError(`Upload failed: ${e.message}`);
|
| 101 |
process.exit(1);
|
storage/zg_store.py
CHANGED
|
@@ -166,7 +166,22 @@ class ZgStore:
|
|
| 166 |
except (json.JSONDecodeError, KeyError):
|
| 167 |
raise RuntimeError(stderr or f"exit code {proc.returncode}")
|
| 168 |
|
| 169 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
if not data.get("ok"):
|
| 171 |
raise RuntimeError(data.get("error", "Unknown upload error"))
|
| 172 |
|
|
|
|
| 166 |
except (json.JSONDecodeError, KeyError):
|
| 167 |
raise RuntimeError(stderr or f"exit code {proc.returncode}")
|
| 168 |
|
| 169 |
+
stdout = proc.stdout.strip()
|
| 170 |
+
if not stdout:
|
| 171 |
+
stderr = proc.stderr.strip()
|
| 172 |
+
raise RuntimeError(f"0G upload returned empty output. stderr: {stderr}")
|
| 173 |
+
|
| 174 |
+
# SDK may print debug lines before the JSON; find the last JSON line
|
| 175 |
+
json_line = None
|
| 176 |
+
for line in reversed(stdout.splitlines()):
|
| 177 |
+
line = line.strip()
|
| 178 |
+
if line.startswith("{"):
|
| 179 |
+
json_line = line
|
| 180 |
+
break
|
| 181 |
+
if not json_line:
|
| 182 |
+
raise RuntimeError(f"0G upload returned no JSON. stdout: {stdout[:200]}")
|
| 183 |
+
|
| 184 |
+
data = json.loads(json_line)
|
| 185 |
if not data.get("ok"):
|
| 186 |
raise RuntimeError(data.get("error", "Unknown upload error"))
|
| 187 |
|