O96a's picture
Upload folder using huggingface_hub
c619928 verified
"""
Agentic World Model Explorer
Interactive demo of world model capability levels and law regimes
Based on: Agentic World Modeling: Foundations, Capabilities, Laws, and Beyond (2604.22748)
"""
import gradio as gr
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from io import BytesIO
import base64
# Lazy loading - no heavy imports at module level
_MODEL = None
def get_demo_data():
"""Returns demo data for world model visualization."""
# Simulated: prediction accuracy across capability levels
levels = ['L1\nPredictor', 'L2\nSimulator', 'L3\nEvolver']
in_dist = [0.92, 0.89, 0.87] # In-distribution performance
out_dist = [0.45, 0.72, 0.78] # Out-of-distribution (compositional)
return levels, in_dist, out_dist
def visualize_capability_levels():
"""Create visualization of capability levels vs generalization."""
levels, in_dist, out_dist = get_demo_data()
fig, ax = plt.subplots(figsize=(10, 6))
x = np.arange(len(levels))
width = 0.35
bars1 = ax.bar(x - width/2, in_dist, width, label='In-Distribution', color='#6366f1')
bars2 = ax.bar(x + width/2, out_dist, width, label='Out-of-Distribution', color='#a855f7')
ax.set_ylabel('Accuracy')
ax.set_title('World Model Capability Levels: Generalization Gap')
ax.set_xticks(x)
ax.set_xticklabels(levels)
ax.legend()
ax.set_ylim(0, 1.0)
# Add value labels
for bar in bars1:
height = bar.get_height()
ax.annotate(f'{height:.2f}',
xy=(bar.get_x() + bar.get_width() / 2, height),
xytext=(0, 3), textcoords="offset points",
ha='center', va='bottom', fontsize=9)
for bar in bars2:
height = bar.get_height()
ax.annotate(f'{height:.2f}',
xy=(bar.get_x() + bar.get_width() / 2, height),
xytext=(0, 3), textcoords="offset points",
ha='center', va='bottom', fontsize=9)
plt.tight_layout()
buf = BytesIO()
plt.savefig(buf, format='png', dpi=100)
buf.seek(0)
img_base64 = base64.b64encode(buf.read()).decode()
plt.close()
return f"data:image/png;base64,{img_base64}"
def get_law_regime_info(regime):
"""Get information about a specific law regime."""
regimes = {
"Physical": {
"description": "Object manipulation, physics simulation, robotics",
"constraints": ["Newtonian mechanics", "Object permanence", "Collision dynamics"],
"failure_modes": ["Impossible object states", "Gravity violations", "Penetration errors"],
"examples": ["Minecraft agents", "Robotic manipulation", "Physics simulators"]
},
"Digital": {
"description": "Web agents, GUI automation, software interaction",
"constraints": ["API contracts", "UI state consistency", "Action preconditions"],
"failure_modes": ["Invalid actions", "State desync", "Missing element refs"],
"examples": ["WebArena", "OSWorld", "Computer-Using Agents"]
},
"Social": {
"description": "Multi-agent coordination, negotiation, social dynamics",
"constraints": ["Theory of mind", "Commitment consistency", "Communication protocols"],
"failure_modes": ["Broken commitments", "Misaligned incentives", "Deadlocks"],
"examples": ["Social simulations", "Negotiation agents", "Game theory models"]
},
"Scientific": {
"description": "Experimental design, hypothesis testing, discovery",
"constraints": ["Reproducibility", "Causal validity", "Measurement precision"],
"failure_modes": ["Confounded variables", "Measurement error", "Overfitting"],
"examples": ["AI scientists", "Drug discovery", "Materials design"]
}
}
return regimes.get(regime, regimes["Physical"])
def explore_regime(regime):
"""Explore a specific law regime."""
info = get_law_regime_info(regime)
output = f"## {regime} Law Regime\n\n"
output += f"**Description:** {info['description']}\n\n"
output += "**Key Constraints:**\n"
for c in info['constraints']:
output += f"- {c}\n"
output += f"\n**Common Failure Modes:**\n"
for f in info['failure_modes']:
output += f"- {f}\n"
output += f"\n**Example Systems:**\n"
for e in info['examples']:
output += f"- {e}\n"
return output
def simulate_rollout(level, regime, steps):
"""Simulate a world model rollout at given level in given regime."""
np.random.seed(42)
# Simulate state evolution
states = []
current = 0.5
for i in range(steps):
# Different levels have different update dynamics
if level == "L1 Predictor":
# Simple next-step, no long-term coherence
noise = np.random.normal(0, 0.15)
current = np.clip(current + noise, 0, 1)
elif level == "L2 Simulator":
# Multi-step coherence with action conditioning
target = 0.7 if regime in ["Physical", "Digital"] else 0.5
drift = (target - current) * 0.1
noise = np.random.normal(0, 0.08)
current = np.clip(current + drift + noise, 0, 1)
else: # L3 Evolver
# Self-correcting based on prediction errors
target = 0.7 if regime in ["Physical", "Digital"] else 0.5
error = abs(target - current)
correction = (target - current) * 0.15 * (1 + error)
noise = np.random.normal(0, 0.05)
current = np.clip(current + correction + noise, 0, 1)
states.append(current)
# Create trajectory plot
fig, ax = plt.subplots(figsize=(10, 4))
ax.plot(range(len(states)), states, 'o-', linewidth=2, markersize=6, color='#6366f1')
ax.axhline(y=0.7 if regime in ["Physical", "Digital"] else 0.5,
color='red', linestyle='--', alpha=0.5, label='Target')
ax.set_xlabel('Step')
ax.set_ylabel('State Value')
ax.set_title(f'{level} Rollout in {regime} Regime ({steps} steps)')
ax.set_ylim(0, 1)
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
buf = BytesIO()
plt.savefig(buf, format='png', dpi=100)
buf.seek(0)
img_base64 = base64.b64encode(buf.read()).decode()
plt.close()
# Analysis
final_error = abs((0.7 if regime in ["Physical", "Digital"] else 0.5) - states[-1])
stability = np.std(states[-5:]) if len(states) >= 5 else np.std(states)
analysis = f"**Rollout Analysis**\n"
analysis += f"- Final state: {states[-1]:.3f}\n"
analysis += f"- Target error: {final_error:.3f}\n"
analysis += f"- Terminal stability (std): {stability:.3f}\n"
analysis += f"- Convergence: {'Yes' if final_error < 0.15 and stability < 0.1 else 'No'}\n"
return f"data:image/png;base64,{img_base64}", analysis
# Gradio Interface
def create_interface():
with gr.Blocks(title="Agentic World Model Explorer") as demo:
gr.Markdown("""
# 🤖 Agentic World Model Explorer
Interactive exploration of the "levels x laws" taxonomy from
[Agentic World Modeling: Foundations, Capabilities, Laws, and Beyond](https://huggingface.co/papers/2604.22748)
""")
with gr.Tab("Capability Levels"):
gr.Markdown("""
### The Three Levels of World Models
- **L1 Predictor**: Learns one-step local transition operators
- **L2 Simulator**: Composes predictions into multi-step, action-conditioned rollouts
- **L3 Evolver**: Autonomously revises its model when predictions fail
""")
viz_btn = gr.Button("Generate Visualization")
viz_output = gr.Image(label="Generalization Comparison")
viz_btn.click(fn=visualize_capability_levels, outputs=viz_output)
with gr.Tab("Law Regimes"):
gr.Markdown("Explore the four governing-law regimes that determine world model constraints.")
regime_dropdown = gr.Dropdown(
choices=["Physical", "Digital", "Social", "Scientific"],
value="Physical",
label="Select Law Regime"
)
regime_output = gr.Markdown()
regime_dropdown.change(fn=explore_regime, inputs=regime_dropdown, outputs=regime_output)
# Initialize
demo.load(fn=lambda: explore_regime("Physical"), outputs=regime_output)
with gr.Tab("Rollout Simulation"):
gr.Markdown("Simulate world model rollouts at different capability levels.")
with gr.Row():
level_select = gr.Dropdown(
choices=["L1 Predictor", "L2 Simulator", "L3 Evolver"],
value="L2 Simulator",
label="Capability Level"
)
regime_select = gr.Dropdown(
choices=["Physical", "Digital", "Social", "Scientific"],
value="Physical",
label="Law Regime"
)
steps_slider = gr.Slider(10, 100, value=50, step=10, label="Steps")
simulate_btn = gr.Button("Run Simulation")
with gr.Row():
trajectory_plot = gr.Image(label="State Trajectory")
analysis_text = gr.Markdown()
simulate_btn.click(
fn=simulate_rollout,
inputs=[level_select, regime_select, steps_slider],
outputs=[trajectory_plot, analysis_text]
)
with gr.Tab("About"):
gr.Markdown("""
### About This Demo
This Space provides an interactive exploration of key concepts from the Agentic World Modeling paper.
**Paper:** [2604.22748](https://huggingface.co/papers/2604.22748)
**Key Insight:** World models should be evaluated not just on next-step prediction accuracy,
but on their ability to maintain coherent multi-step rollouts that respect domain laws.
**Citation:**
```
@article{chu2026agentic,
title={Agentic World Modeling: Foundations, Capabilities, Laws, and Beyond},
author={Chu, Meng and Zhang, Xuan and Lin, Kevin and Kong, Lingdong and others},
journal={arXiv preprint arXiv:2604.22748},
year={2026}
}
```
""")
return demo
# Create and launch
if __name__ == "__main__":
demo = create_interface()
demo.launch()