Spaces:

O96a
/

agentic-world-model-explorer

Sleeping

App Files Files Community

O96a commited on 10 days ago

Commit

c619928

verified ·

1 Parent(s): 914fcf6

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

README.md +27 -5
app.py +273 -0
requirements.txt +4 -0

README.md CHANGED Viewed

@@ -1,12 +1,34 @@
 ---
 title: Agentic World Model Explorer
-emoji: 🏃
-colorFrom: yellow
-colorTo: green
 sdk: gradio
-sdk_version: 6.13.0
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Agentic World Model Explorer
+colorFrom: blue
+colorTo: purple
 sdk: gradio
+sdk_version: 4.36.0
 app_file: app.py
 pinned: false
 ---
+# Agentic World Model Explorer
+An interactive exploration of the "levels x laws" taxonomy from the Agentic World Modeling paper (2604.22748).
+## What This Does
+Demonstrates the three capability levels of world models:
+- **L1 Predictor**: One-step local transitions
+- **L2 Simulator**: Multi-step action-conditioned rollouts
+- **L3 Evolver**: Self-revising models that update from prediction failures
+Across four law regimes:
+- Physical (object manipulation, physics)
+- Digital (web/GUI agents, software)
+- Social (multi-agent coordination)
+- Scientific (experimental design)
+## Hypothesis
+World models with explicit structured state representations (L2+) demonstrate better compositional generalization than pure next-token predictors when evaluated on out-of-distribution scenarios within the same law regime.
+## Findings
+See the live demo for interactive examples of state representation strategies and their impact on generalization.

app.py ADDED Viewed

	@@ -0,0 +1,273 @@

+"""
+Agentic World Model Explorer
+Interactive demo of world model capability levels and law regimes
+Based on: Agentic World Modeling: Foundations, Capabilities, Laws, and Beyond (2604.22748)
+"""
+import gradio as gr
+import numpy as np
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+from io import BytesIO
+import base64
+# Lazy loading - no heavy imports at module level
+_MODEL = None
+def get_demo_data():
+    """Returns demo data for world model visualization."""
+    # Simulated: prediction accuracy across capability levels
+    levels = ['L1\nPredictor', 'L2\nSimulator', 'L3\nEvolver']
+    in_dist = [0.92, 0.89, 0.87]  # In-distribution performance
+    out_dist = [0.45, 0.72, 0.78]  # Out-of-distribution (compositional)
+    return levels, in_dist, out_dist
+def visualize_capability_levels():
+    """Create visualization of capability levels vs generalization."""
+    levels, in_dist, out_dist = get_demo_data()
+    fig, ax = plt.subplots(figsize=(10, 6))
+    x = np.arange(len(levels))
+    width = 0.35
+    bars1 = ax.bar(x - width/2, in_dist, width, label='In-Distribution', color='#6366f1')
+    bars2 = ax.bar(x + width/2, out_dist, width, label='Out-of-Distribution', color='#a855f7')
+    ax.set_ylabel('Accuracy')
+    ax.set_title('World Model Capability Levels: Generalization Gap')
+    ax.set_xticks(x)
+    ax.set_xticklabels(levels)
+    ax.legend()
+    ax.set_ylim(0, 1.0)
+    # Add value labels
+    for bar in bars1:
+        height = bar.get_height()
+        ax.annotate(f'{height:.2f}',
+                    xy=(bar.get_x() + bar.get_width() / 2, height),
+                    xytext=(0, 3), textcoords="offset points",
+                    ha='center', va='bottom', fontsize=9)
+    for bar in bars2:
+        height = bar.get_height()
+        ax.annotate(f'{height:.2f}',
+                    xy=(bar.get_x() + bar.get_width() / 2, height),
+                    xytext=(0, 3), textcoords="offset points",
+                    ha='center', va='bottom', fontsize=9)
+    plt.tight_layout()
+    buf = BytesIO()
+    plt.savefig(buf, format='png', dpi=100)
+    buf.seek(0)
+    img_base64 = base64.b64encode(buf.read()).decode()
+    plt.close()
+    return f"data:image/png;base64,{img_base64}"
+def get_law_regime_info(regime):
+    """Get information about a specific law regime."""
+    regimes = {
+        "Physical": {
+            "description": "Object manipulation, physics simulation, robotics",
+            "constraints": ["Newtonian mechanics", "Object permanence", "Collision dynamics"],
+            "failure_modes": ["Impossible object states", "Gravity violations", "Penetration errors"],
+            "examples": ["Minecraft agents", "Robotic manipulation", "Physics simulators"]
+        },
+        "Digital": {
+            "description": "Web agents, GUI automation, software interaction",
+            "constraints": ["API contracts", "UI state consistency", "Action preconditions"],
+            "failure_modes": ["Invalid actions", "State desync", "Missing element refs"],
+            "examples": ["WebArena", "OSWorld", "Computer-Using Agents"]
+        },
+        "Social": {
+            "description": "Multi-agent coordination, negotiation, social dynamics",
+            "constraints": ["Theory of mind", "Commitment consistency", "Communication protocols"],
+            "failure_modes": ["Broken commitments", "Misaligned incentives", "Deadlocks"],
+            "examples": ["Social simulations", "Negotiation agents", "Game theory models"]
+        },
+        "Scientific": {
+            "description": "Experimental design, hypothesis testing, discovery",
+            "constraints": ["Reproducibility", "Causal validity", "Measurement precision"],
+            "failure_modes": ["Confounded variables", "Measurement error", "Overfitting"],
+            "examples": ["AI scientists", "Drug discovery", "Materials design"]
+        }
+    }
+    return regimes.get(regime, regimes["Physical"])
+def explore_regime(regime):
+    """Explore a specific law regime."""
+    info = get_law_regime_info(regime)
+    output = f"## {regime} Law Regime\n\n"
+    output += f"**Description:** {info['description']}\n\n"
+    output += "**Key Constraints:**\n"
+    for c in info['constraints']:
+        output += f"- {c}\n"
+    output += f"\n**Common Failure Modes:**\n"
+    for f in info['failure_modes']:
+        output += f"- {f}\n"
+    output += f"\n**Example Systems:**\n"
+    for e in info['examples']:
+        output += f"- {e}\n"
+    return output
+def simulate_rollout(level, regime, steps):
+    """Simulate a world model rollout at given level in given regime."""
+    np.random.seed(42)
+    # Simulate state evolution
+    states = []
+    current = 0.5
+    for i in range(steps):
+        # Different levels have different update dynamics
+        if level == "L1 Predictor":
+            # Simple next-step, no long-term coherence
+            noise = np.random.normal(0, 0.15)
+            current = np.clip(current + noise, 0, 1)
+        elif level == "L2 Simulator":
+            # Multi-step coherence with action conditioning
+            target = 0.7 if regime in ["Physical", "Digital"] else 0.5
+            drift = (target - current) * 0.1
+            noise = np.random.normal(0, 0.08)
+            current = np.clip(current + drift + noise, 0, 1)
+        else:  # L3 Evolver
+            # Self-correcting based on prediction errors
+            target = 0.7 if regime in ["Physical", "Digital"] else 0.5
+            error = abs(target - current)
+            correction = (target - current) * 0.15 * (1 + error)
+            noise = np.random.normal(0, 0.05)
+            current = np.clip(current + correction + noise, 0, 1)
+        states.append(current)
+    # Create trajectory plot
+    fig, ax = plt.subplots(figsize=(10, 4))
+    ax.plot(range(len(states)), states, 'o-', linewidth=2, markersize=6, color='#6366f1')
+    ax.axhline(y=0.7 if regime in ["Physical", "Digital"] else 0.5,
+               color='red', linestyle='--', alpha=0.5, label='Target')
+    ax.set_xlabel('Step')
+    ax.set_ylabel('State Value')
+    ax.set_title(f'{level} Rollout in {regime} Regime ({steps} steps)')
+    ax.set_ylim(0, 1)
+    ax.legend()
+    ax.grid(True, alpha=0.3)
+    plt.tight_layout()
+    buf = BytesIO()
+    plt.savefig(buf, format='png', dpi=100)
+    buf.seek(0)
+    img_base64 = base64.b64encode(buf.read()).decode()
+    plt.close()
+    # Analysis
+    final_error = abs((0.7 if regime in ["Physical", "Digital"] else 0.5) - states[-1])
+    stability = np.std(states[-5:]) if len(states) >= 5 else np.std(states)
+    analysis = f"**Rollout Analysis**\n"
+    analysis += f"- Final state: {states[-1]:.3f}\n"
+    analysis += f"- Target error: {final_error:.3f}\n"
+    analysis += f"- Terminal stability (std): {stability:.3f}\n"
+    analysis += f"- Convergence: {'Yes' if final_error < 0.15 and stability < 0.1 else 'No'}\n"
+    return f"data:image/png;base64,{img_base64}", analysis
+# Gradio Interface
+def create_interface():
+    with gr.Blocks(title="Agentic World Model Explorer") as demo:
+        gr.Markdown("""
+        # 🤖 Agentic World Model Explorer
+        Interactive exploration of the "levels x laws" taxonomy from
+        [Agentic World Modeling: Foundations, Capabilities, Laws, and Beyond](https://huggingface.co/papers/2604.22748)
+        """)
+        with gr.Tab("Capability Levels"):
+            gr.Markdown("""
+            ### The Three Levels of World Models
+            - **L1 Predictor**: Learns one-step local transition operators
+            - **L2 Simulator**: Composes predictions into multi-step, action-conditioned rollouts
+            - **L3 Evolver**: Autonomously revises its model when predictions fail
+            """)
+            viz_btn = gr.Button("Generate Visualization")
+            viz_output = gr.Image(label="Generalization Comparison")
+            viz_btn.click(fn=visualize_capability_levels, outputs=viz_output)
+        with gr.Tab("Law Regimes"):
+            gr.Markdown("Explore the four governing-law regimes that determine world model constraints.")
+            regime_dropdown = gr.Dropdown(
+                choices=["Physical", "Digital", "Social", "Scientific"],
+                value="Physical",
+                label="Select Law Regime"
+            )
+            regime_output = gr.Markdown()
+            regime_dropdown.change(fn=explore_regime, inputs=regime_dropdown, outputs=regime_output)
+            # Initialize
+            demo.load(fn=lambda: explore_regime("Physical"), outputs=regime_output)
+        with gr.Tab("Rollout Simulation"):
+            gr.Markdown("Simulate world model rollouts at different capability levels.")
+            with gr.Row():
+                level_select = gr.Dropdown(
+                    choices=["L1 Predictor", "L2 Simulator", "L3 Evolver"],
+                    value="L2 Simulator",
+                    label="Capability Level"
+                )
+                regime_select = gr.Dropdown(
+                    choices=["Physical", "Digital", "Social", "Scientific"],
+                    value="Physical",
+                    label="Law Regime"
+                )
+                steps_slider = gr.Slider(10, 100, value=50, step=10, label="Steps")
+            simulate_btn = gr.Button("Run Simulation")
+            with gr.Row():
+                trajectory_plot = gr.Image(label="State Trajectory")
+                analysis_text = gr.Markdown()
+            simulate_btn.click(
+                fn=simulate_rollout,
+                inputs=[level_select, regime_select, steps_slider],
+                outputs=[trajectory_plot, analysis_text]
+            )
+        with gr.Tab("About"):
+            gr.Markdown("""
+            ### About This Demo
+            This Space provides an interactive exploration of key concepts from the Agentic World Modeling paper.
+            **Paper:** [2604.22748](https://huggingface.co/papers/2604.22748)
+            **Key Insight:** World models should be evaluated not just on next-step prediction accuracy,
+            but on their ability to maintain coherent multi-step rollouts that respect domain laws.
+            **Citation:**
+            ```
+            @article{chu2026agentic,
+              title={Agentic World Modeling: Foundations, Capabilities, Laws, and Beyond},
+              author={Chu, Meng and Zhang, Xuan and Lin, Kevin and Kong, Lingdong and others},
+              journal={arXiv preprint arXiv:2604.22748},
+              year={2026}
+            }
+            ```
+            """)
+    return demo
+# Create and launch
+if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio==4.36.0
+huggingface_hub==0.25.2
+numpy
+matplotlib