Spaces:
Sleeping
Sleeping
| """ | |
| Agentic World Model Explorer | |
| Interactive demo of world model capability levels and law regimes | |
| Based on: Agentic World Modeling: Foundations, Capabilities, Laws, and Beyond (2604.22748) | |
| """ | |
| import gradio as gr | |
| import numpy as np | |
| import matplotlib | |
| matplotlib.use('Agg') | |
| import matplotlib.pyplot as plt | |
| from io import BytesIO | |
| import base64 | |
| # Lazy loading - no heavy imports at module level | |
| _MODEL = None | |
| def get_demo_data(): | |
| """Returns demo data for world model visualization.""" | |
| # Simulated: prediction accuracy across capability levels | |
| levels = ['L1\nPredictor', 'L2\nSimulator', 'L3\nEvolver'] | |
| in_dist = [0.92, 0.89, 0.87] # In-distribution performance | |
| out_dist = [0.45, 0.72, 0.78] # Out-of-distribution (compositional) | |
| return levels, in_dist, out_dist | |
| def visualize_capability_levels(): | |
| """Create visualization of capability levels vs generalization.""" | |
| levels, in_dist, out_dist = get_demo_data() | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| x = np.arange(len(levels)) | |
| width = 0.35 | |
| bars1 = ax.bar(x - width/2, in_dist, width, label='In-Distribution', color='#6366f1') | |
| bars2 = ax.bar(x + width/2, out_dist, width, label='Out-of-Distribution', color='#a855f7') | |
| ax.set_ylabel('Accuracy') | |
| ax.set_title('World Model Capability Levels: Generalization Gap') | |
| ax.set_xticks(x) | |
| ax.set_xticklabels(levels) | |
| ax.legend() | |
| ax.set_ylim(0, 1.0) | |
| # Add value labels | |
| for bar in bars1: | |
| height = bar.get_height() | |
| ax.annotate(f'{height:.2f}', | |
| xy=(bar.get_x() + bar.get_width() / 2, height), | |
| xytext=(0, 3), textcoords="offset points", | |
| ha='center', va='bottom', fontsize=9) | |
| for bar in bars2: | |
| height = bar.get_height() | |
| ax.annotate(f'{height:.2f}', | |
| xy=(bar.get_x() + bar.get_width() / 2, height), | |
| xytext=(0, 3), textcoords="offset points", | |
| ha='center', va='bottom', fontsize=9) | |
| plt.tight_layout() | |
| buf = BytesIO() | |
| plt.savefig(buf, format='png', dpi=100) | |
| buf.seek(0) | |
| img_base64 = base64.b64encode(buf.read()).decode() | |
| plt.close() | |
| return f"data:image/png;base64,{img_base64}" | |
| def get_law_regime_info(regime): | |
| """Get information about a specific law regime.""" | |
| regimes = { | |
| "Physical": { | |
| "description": "Object manipulation, physics simulation, robotics", | |
| "constraints": ["Newtonian mechanics", "Object permanence", "Collision dynamics"], | |
| "failure_modes": ["Impossible object states", "Gravity violations", "Penetration errors"], | |
| "examples": ["Minecraft agents", "Robotic manipulation", "Physics simulators"] | |
| }, | |
| "Digital": { | |
| "description": "Web agents, GUI automation, software interaction", | |
| "constraints": ["API contracts", "UI state consistency", "Action preconditions"], | |
| "failure_modes": ["Invalid actions", "State desync", "Missing element refs"], | |
| "examples": ["WebArena", "OSWorld", "Computer-Using Agents"] | |
| }, | |
| "Social": { | |
| "description": "Multi-agent coordination, negotiation, social dynamics", | |
| "constraints": ["Theory of mind", "Commitment consistency", "Communication protocols"], | |
| "failure_modes": ["Broken commitments", "Misaligned incentives", "Deadlocks"], | |
| "examples": ["Social simulations", "Negotiation agents", "Game theory models"] | |
| }, | |
| "Scientific": { | |
| "description": "Experimental design, hypothesis testing, discovery", | |
| "constraints": ["Reproducibility", "Causal validity", "Measurement precision"], | |
| "failure_modes": ["Confounded variables", "Measurement error", "Overfitting"], | |
| "examples": ["AI scientists", "Drug discovery", "Materials design"] | |
| } | |
| } | |
| return regimes.get(regime, regimes["Physical"]) | |
| def explore_regime(regime): | |
| """Explore a specific law regime.""" | |
| info = get_law_regime_info(regime) | |
| output = f"## {regime} Law Regime\n\n" | |
| output += f"**Description:** {info['description']}\n\n" | |
| output += "**Key Constraints:**\n" | |
| for c in info['constraints']: | |
| output += f"- {c}\n" | |
| output += f"\n**Common Failure Modes:**\n" | |
| for f in info['failure_modes']: | |
| output += f"- {f}\n" | |
| output += f"\n**Example Systems:**\n" | |
| for e in info['examples']: | |
| output += f"- {e}\n" | |
| return output | |
| def simulate_rollout(level, regime, steps): | |
| """Simulate a world model rollout at given level in given regime.""" | |
| np.random.seed(42) | |
| # Simulate state evolution | |
| states = [] | |
| current = 0.5 | |
| for i in range(steps): | |
| # Different levels have different update dynamics | |
| if level == "L1 Predictor": | |
| # Simple next-step, no long-term coherence | |
| noise = np.random.normal(0, 0.15) | |
| current = np.clip(current + noise, 0, 1) | |
| elif level == "L2 Simulator": | |
| # Multi-step coherence with action conditioning | |
| target = 0.7 if regime in ["Physical", "Digital"] else 0.5 | |
| drift = (target - current) * 0.1 | |
| noise = np.random.normal(0, 0.08) | |
| current = np.clip(current + drift + noise, 0, 1) | |
| else: # L3 Evolver | |
| # Self-correcting based on prediction errors | |
| target = 0.7 if regime in ["Physical", "Digital"] else 0.5 | |
| error = abs(target - current) | |
| correction = (target - current) * 0.15 * (1 + error) | |
| noise = np.random.normal(0, 0.05) | |
| current = np.clip(current + correction + noise, 0, 1) | |
| states.append(current) | |
| # Create trajectory plot | |
| fig, ax = plt.subplots(figsize=(10, 4)) | |
| ax.plot(range(len(states)), states, 'o-', linewidth=2, markersize=6, color='#6366f1') | |
| ax.axhline(y=0.7 if regime in ["Physical", "Digital"] else 0.5, | |
| color='red', linestyle='--', alpha=0.5, label='Target') | |
| ax.set_xlabel('Step') | |
| ax.set_ylabel('State Value') | |
| ax.set_title(f'{level} Rollout in {regime} Regime ({steps} steps)') | |
| ax.set_ylim(0, 1) | |
| ax.legend() | |
| ax.grid(True, alpha=0.3) | |
| plt.tight_layout() | |
| buf = BytesIO() | |
| plt.savefig(buf, format='png', dpi=100) | |
| buf.seek(0) | |
| img_base64 = base64.b64encode(buf.read()).decode() | |
| plt.close() | |
| # Analysis | |
| final_error = abs((0.7 if regime in ["Physical", "Digital"] else 0.5) - states[-1]) | |
| stability = np.std(states[-5:]) if len(states) >= 5 else np.std(states) | |
| analysis = f"**Rollout Analysis**\n" | |
| analysis += f"- Final state: {states[-1]:.3f}\n" | |
| analysis += f"- Target error: {final_error:.3f}\n" | |
| analysis += f"- Terminal stability (std): {stability:.3f}\n" | |
| analysis += f"- Convergence: {'Yes' if final_error < 0.15 and stability < 0.1 else 'No'}\n" | |
| return f"data:image/png;base64,{img_base64}", analysis | |
| # Gradio Interface | |
| def create_interface(): | |
| with gr.Blocks(title="Agentic World Model Explorer") as demo: | |
| gr.Markdown(""" | |
| # 🤖 Agentic World Model Explorer | |
| Interactive exploration of the "levels x laws" taxonomy from | |
| [Agentic World Modeling: Foundations, Capabilities, Laws, and Beyond](https://huggingface.co/papers/2604.22748) | |
| """) | |
| with gr.Tab("Capability Levels"): | |
| gr.Markdown(""" | |
| ### The Three Levels of World Models | |
| - **L1 Predictor**: Learns one-step local transition operators | |
| - **L2 Simulator**: Composes predictions into multi-step, action-conditioned rollouts | |
| - **L3 Evolver**: Autonomously revises its model when predictions fail | |
| """) | |
| viz_btn = gr.Button("Generate Visualization") | |
| viz_output = gr.Image(label="Generalization Comparison") | |
| viz_btn.click(fn=visualize_capability_levels, outputs=viz_output) | |
| with gr.Tab("Law Regimes"): | |
| gr.Markdown("Explore the four governing-law regimes that determine world model constraints.") | |
| regime_dropdown = gr.Dropdown( | |
| choices=["Physical", "Digital", "Social", "Scientific"], | |
| value="Physical", | |
| label="Select Law Regime" | |
| ) | |
| regime_output = gr.Markdown() | |
| regime_dropdown.change(fn=explore_regime, inputs=regime_dropdown, outputs=regime_output) | |
| # Initialize | |
| demo.load(fn=lambda: explore_regime("Physical"), outputs=regime_output) | |
| with gr.Tab("Rollout Simulation"): | |
| gr.Markdown("Simulate world model rollouts at different capability levels.") | |
| with gr.Row(): | |
| level_select = gr.Dropdown( | |
| choices=["L1 Predictor", "L2 Simulator", "L3 Evolver"], | |
| value="L2 Simulator", | |
| label="Capability Level" | |
| ) | |
| regime_select = gr.Dropdown( | |
| choices=["Physical", "Digital", "Social", "Scientific"], | |
| value="Physical", | |
| label="Law Regime" | |
| ) | |
| steps_slider = gr.Slider(10, 100, value=50, step=10, label="Steps") | |
| simulate_btn = gr.Button("Run Simulation") | |
| with gr.Row(): | |
| trajectory_plot = gr.Image(label="State Trajectory") | |
| analysis_text = gr.Markdown() | |
| simulate_btn.click( | |
| fn=simulate_rollout, | |
| inputs=[level_select, regime_select, steps_slider], | |
| outputs=[trajectory_plot, analysis_text] | |
| ) | |
| with gr.Tab("About"): | |
| gr.Markdown(""" | |
| ### About This Demo | |
| This Space provides an interactive exploration of key concepts from the Agentic World Modeling paper. | |
| **Paper:** [2604.22748](https://huggingface.co/papers/2604.22748) | |
| **Key Insight:** World models should be evaluated not just on next-step prediction accuracy, | |
| but on their ability to maintain coherent multi-step rollouts that respect domain laws. | |
| **Citation:** | |
| ``` | |
| @article{chu2026agentic, | |
| title={Agentic World Modeling: Foundations, Capabilities, Laws, and Beyond}, | |
| author={Chu, Meng and Zhang, Xuan and Lin, Kevin and Kong, Lingdong and others}, | |
| journal={arXiv preprint arXiv:2604.22748}, | |
| year={2026} | |
| } | |
| ``` | |
| """) | |
| return demo | |
| # Create and launch | |
| if __name__ == "__main__": | |
| demo = create_interface() | |
| demo.launch() | |