""" Agentic World Model Explorer Interactive demo of world model capability levels and law regimes Based on: Agentic World Modeling: Foundations, Capabilities, Laws, and Beyond (2604.22748) """ import gradio as gr import numpy as np import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt from io import BytesIO import base64 # Lazy loading - no heavy imports at module level _MODEL = None def get_demo_data(): """Returns demo data for world model visualization.""" # Simulated: prediction accuracy across capability levels levels = ['L1\nPredictor', 'L2\nSimulator', 'L3\nEvolver'] in_dist = [0.92, 0.89, 0.87] # In-distribution performance out_dist = [0.45, 0.72, 0.78] # Out-of-distribution (compositional) return levels, in_dist, out_dist def visualize_capability_levels(): """Create visualization of capability levels vs generalization.""" levels, in_dist, out_dist = get_demo_data() fig, ax = plt.subplots(figsize=(10, 6)) x = np.arange(len(levels)) width = 0.35 bars1 = ax.bar(x - width/2, in_dist, width, label='In-Distribution', color='#6366f1') bars2 = ax.bar(x + width/2, out_dist, width, label='Out-of-Distribution', color='#a855f7') ax.set_ylabel('Accuracy') ax.set_title('World Model Capability Levels: Generalization Gap') ax.set_xticks(x) ax.set_xticklabels(levels) ax.legend() ax.set_ylim(0, 1.0) # Add value labels for bar in bars1: height = bar.get_height() ax.annotate(f'{height:.2f}', xy=(bar.get_x() + bar.get_width() / 2, height), xytext=(0, 3), textcoords="offset points", ha='center', va='bottom', fontsize=9) for bar in bars2: height = bar.get_height() ax.annotate(f'{height:.2f}', xy=(bar.get_x() + bar.get_width() / 2, height), xytext=(0, 3), textcoords="offset points", ha='center', va='bottom', fontsize=9) plt.tight_layout() buf = BytesIO() plt.savefig(buf, format='png', dpi=100) buf.seek(0) img_base64 = base64.b64encode(buf.read()).decode() plt.close() return f"data:image/png;base64,{img_base64}" def get_law_regime_info(regime): """Get information about a specific law regime.""" regimes = { "Physical": { "description": "Object manipulation, physics simulation, robotics", "constraints": ["Newtonian mechanics", "Object permanence", "Collision dynamics"], "failure_modes": ["Impossible object states", "Gravity violations", "Penetration errors"], "examples": ["Minecraft agents", "Robotic manipulation", "Physics simulators"] }, "Digital": { "description": "Web agents, GUI automation, software interaction", "constraints": ["API contracts", "UI state consistency", "Action preconditions"], "failure_modes": ["Invalid actions", "State desync", "Missing element refs"], "examples": ["WebArena", "OSWorld", "Computer-Using Agents"] }, "Social": { "description": "Multi-agent coordination, negotiation, social dynamics", "constraints": ["Theory of mind", "Commitment consistency", "Communication protocols"], "failure_modes": ["Broken commitments", "Misaligned incentives", "Deadlocks"], "examples": ["Social simulations", "Negotiation agents", "Game theory models"] }, "Scientific": { "description": "Experimental design, hypothesis testing, discovery", "constraints": ["Reproducibility", "Causal validity", "Measurement precision"], "failure_modes": ["Confounded variables", "Measurement error", "Overfitting"], "examples": ["AI scientists", "Drug discovery", "Materials design"] } } return regimes.get(regime, regimes["Physical"]) def explore_regime(regime): """Explore a specific law regime.""" info = get_law_regime_info(regime) output = f"## {regime} Law Regime\n\n" output += f"**Description:** {info['description']}\n\n" output += "**Key Constraints:**\n" for c in info['constraints']: output += f"- {c}\n" output += f"\n**Common Failure Modes:**\n" for f in info['failure_modes']: output += f"- {f}\n" output += f"\n**Example Systems:**\n" for e in info['examples']: output += f"- {e}\n" return output def simulate_rollout(level, regime, steps): """Simulate a world model rollout at given level in given regime.""" np.random.seed(42) # Simulate state evolution states = [] current = 0.5 for i in range(steps): # Different levels have different update dynamics if level == "L1 Predictor": # Simple next-step, no long-term coherence noise = np.random.normal(0, 0.15) current = np.clip(current + noise, 0, 1) elif level == "L2 Simulator": # Multi-step coherence with action conditioning target = 0.7 if regime in ["Physical", "Digital"] else 0.5 drift = (target - current) * 0.1 noise = np.random.normal(0, 0.08) current = np.clip(current + drift + noise, 0, 1) else: # L3 Evolver # Self-correcting based on prediction errors target = 0.7 if regime in ["Physical", "Digital"] else 0.5 error = abs(target - current) correction = (target - current) * 0.15 * (1 + error) noise = np.random.normal(0, 0.05) current = np.clip(current + correction + noise, 0, 1) states.append(current) # Create trajectory plot fig, ax = plt.subplots(figsize=(10, 4)) ax.plot(range(len(states)), states, 'o-', linewidth=2, markersize=6, color='#6366f1') ax.axhline(y=0.7 if regime in ["Physical", "Digital"] else 0.5, color='red', linestyle='--', alpha=0.5, label='Target') ax.set_xlabel('Step') ax.set_ylabel('State Value') ax.set_title(f'{level} Rollout in {regime} Regime ({steps} steps)') ax.set_ylim(0, 1) ax.legend() ax.grid(True, alpha=0.3) plt.tight_layout() buf = BytesIO() plt.savefig(buf, format='png', dpi=100) buf.seek(0) img_base64 = base64.b64encode(buf.read()).decode() plt.close() # Analysis final_error = abs((0.7 if regime in ["Physical", "Digital"] else 0.5) - states[-1]) stability = np.std(states[-5:]) if len(states) >= 5 else np.std(states) analysis = f"**Rollout Analysis**\n" analysis += f"- Final state: {states[-1]:.3f}\n" analysis += f"- Target error: {final_error:.3f}\n" analysis += f"- Terminal stability (std): {stability:.3f}\n" analysis += f"- Convergence: {'Yes' if final_error < 0.15 and stability < 0.1 else 'No'}\n" return f"data:image/png;base64,{img_base64}", analysis # Gradio Interface def create_interface(): with gr.Blocks(title="Agentic World Model Explorer") as demo: gr.Markdown(""" # 🤖 Agentic World Model Explorer Interactive exploration of the "levels x laws" taxonomy from [Agentic World Modeling: Foundations, Capabilities, Laws, and Beyond](https://huggingface.co/papers/2604.22748) """) with gr.Tab("Capability Levels"): gr.Markdown(""" ### The Three Levels of World Models - **L1 Predictor**: Learns one-step local transition operators - **L2 Simulator**: Composes predictions into multi-step, action-conditioned rollouts - **L3 Evolver**: Autonomously revises its model when predictions fail """) viz_btn = gr.Button("Generate Visualization") viz_output = gr.Image(label="Generalization Comparison") viz_btn.click(fn=visualize_capability_levels, outputs=viz_output) with gr.Tab("Law Regimes"): gr.Markdown("Explore the four governing-law regimes that determine world model constraints.") regime_dropdown = gr.Dropdown( choices=["Physical", "Digital", "Social", "Scientific"], value="Physical", label="Select Law Regime" ) regime_output = gr.Markdown() regime_dropdown.change(fn=explore_regime, inputs=regime_dropdown, outputs=regime_output) # Initialize demo.load(fn=lambda: explore_regime("Physical"), outputs=regime_output) with gr.Tab("Rollout Simulation"): gr.Markdown("Simulate world model rollouts at different capability levels.") with gr.Row(): level_select = gr.Dropdown( choices=["L1 Predictor", "L2 Simulator", "L3 Evolver"], value="L2 Simulator", label="Capability Level" ) regime_select = gr.Dropdown( choices=["Physical", "Digital", "Social", "Scientific"], value="Physical", label="Law Regime" ) steps_slider = gr.Slider(10, 100, value=50, step=10, label="Steps") simulate_btn = gr.Button("Run Simulation") with gr.Row(): trajectory_plot = gr.Image(label="State Trajectory") analysis_text = gr.Markdown() simulate_btn.click( fn=simulate_rollout, inputs=[level_select, regime_select, steps_slider], outputs=[trajectory_plot, analysis_text] ) with gr.Tab("About"): gr.Markdown(""" ### About This Demo This Space provides an interactive exploration of key concepts from the Agentic World Modeling paper. **Paper:** [2604.22748](https://huggingface.co/papers/2604.22748) **Key Insight:** World models should be evaluated not just on next-step prediction accuracy, but on their ability to maintain coherent multi-step rollouts that respect domain laws. **Citation:** ``` @article{chu2026agentic, title={Agentic World Modeling: Foundations, Capabilities, Laws, and Beyond}, author={Chu, Meng and Zhang, Xuan and Lin, Kevin and Kong, Lingdong and others}, journal={arXiv preprint arXiv:2604.22748}, year={2026} } ``` """) return demo # Create and launch if __name__ == "__main__": demo = create_interface() demo.launch()