O96a commited on
Commit
c619928
·
verified ·
1 Parent(s): 914fcf6

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +27 -5
  2. app.py +273 -0
  3. requirements.txt +4 -0
README.md CHANGED
@@ -1,12 +1,34 @@
1
  ---
2
  title: Agentic World Model Explorer
3
- emoji: 🏃
4
- colorFrom: yellow
5
- colorTo: green
6
  sdk: gradio
7
- sdk_version: 6.13.0
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: Agentic World Model Explorer
3
+ colorFrom: blue
4
+ colorTo: purple
 
5
  sdk: gradio
6
+ sdk_version: 4.36.0
7
  app_file: app.py
8
  pinned: false
9
  ---
10
 
11
+ # Agentic World Model Explorer
12
+
13
+ An interactive exploration of the "levels x laws" taxonomy from the Agentic World Modeling paper (2604.22748).
14
+
15
+ ## What This Does
16
+
17
+ Demonstrates the three capability levels of world models:
18
+ - **L1 Predictor**: One-step local transitions
19
+ - **L2 Simulator**: Multi-step action-conditioned rollouts
20
+ - **L3 Evolver**: Self-revising models that update from prediction failures
21
+
22
+ Across four law regimes:
23
+ - Physical (object manipulation, physics)
24
+ - Digital (web/GUI agents, software)
25
+ - Social (multi-agent coordination)
26
+ - Scientific (experimental design)
27
+
28
+ ## Hypothesis
29
+
30
+ World models with explicit structured state representations (L2+) demonstrate better compositional generalization than pure next-token predictors when evaluated on out-of-distribution scenarios within the same law regime.
31
+
32
+ ## Findings
33
+
34
+ See the live demo for interactive examples of state representation strategies and their impact on generalization.
app.py ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Agentic World Model Explorer
3
+ Interactive demo of world model capability levels and law regimes
4
+ Based on: Agentic World Modeling: Foundations, Capabilities, Laws, and Beyond (2604.22748)
5
+ """
6
+
7
+ import gradio as gr
8
+ import numpy as np
9
+ import matplotlib
10
+ matplotlib.use('Agg')
11
+ import matplotlib.pyplot as plt
12
+ from io import BytesIO
13
+ import base64
14
+
15
+ # Lazy loading - no heavy imports at module level
16
+ _MODEL = None
17
+
18
+ def get_demo_data():
19
+ """Returns demo data for world model visualization."""
20
+ # Simulated: prediction accuracy across capability levels
21
+ levels = ['L1\nPredictor', 'L2\nSimulator', 'L3\nEvolver']
22
+ in_dist = [0.92, 0.89, 0.87] # In-distribution performance
23
+ out_dist = [0.45, 0.72, 0.78] # Out-of-distribution (compositional)
24
+ return levels, in_dist, out_dist
25
+
26
+ def visualize_capability_levels():
27
+ """Create visualization of capability levels vs generalization."""
28
+ levels, in_dist, out_dist = get_demo_data()
29
+
30
+ fig, ax = plt.subplots(figsize=(10, 6))
31
+ x = np.arange(len(levels))
32
+ width = 0.35
33
+
34
+ bars1 = ax.bar(x - width/2, in_dist, width, label='In-Distribution', color='#6366f1')
35
+ bars2 = ax.bar(x + width/2, out_dist, width, label='Out-of-Distribution', color='#a855f7')
36
+
37
+ ax.set_ylabel('Accuracy')
38
+ ax.set_title('World Model Capability Levels: Generalization Gap')
39
+ ax.set_xticks(x)
40
+ ax.set_xticklabels(levels)
41
+ ax.legend()
42
+ ax.set_ylim(0, 1.0)
43
+
44
+ # Add value labels
45
+ for bar in bars1:
46
+ height = bar.get_height()
47
+ ax.annotate(f'{height:.2f}',
48
+ xy=(bar.get_x() + bar.get_width() / 2, height),
49
+ xytext=(0, 3), textcoords="offset points",
50
+ ha='center', va='bottom', fontsize=9)
51
+ for bar in bars2:
52
+ height = bar.get_height()
53
+ ax.annotate(f'{height:.2f}',
54
+ xy=(bar.get_x() + bar.get_width() / 2, height),
55
+ xytext=(0, 3), textcoords="offset points",
56
+ ha='center', va='bottom', fontsize=9)
57
+
58
+ plt.tight_layout()
59
+
60
+ buf = BytesIO()
61
+ plt.savefig(buf, format='png', dpi=100)
62
+ buf.seek(0)
63
+ img_base64 = base64.b64encode(buf.read()).decode()
64
+ plt.close()
65
+
66
+ return f"data:image/png;base64,{img_base64}"
67
+
68
+ def get_law_regime_info(regime):
69
+ """Get information about a specific law regime."""
70
+ regimes = {
71
+ "Physical": {
72
+ "description": "Object manipulation, physics simulation, robotics",
73
+ "constraints": ["Newtonian mechanics", "Object permanence", "Collision dynamics"],
74
+ "failure_modes": ["Impossible object states", "Gravity violations", "Penetration errors"],
75
+ "examples": ["Minecraft agents", "Robotic manipulation", "Physics simulators"]
76
+ },
77
+ "Digital": {
78
+ "description": "Web agents, GUI automation, software interaction",
79
+ "constraints": ["API contracts", "UI state consistency", "Action preconditions"],
80
+ "failure_modes": ["Invalid actions", "State desync", "Missing element refs"],
81
+ "examples": ["WebArena", "OSWorld", "Computer-Using Agents"]
82
+ },
83
+ "Social": {
84
+ "description": "Multi-agent coordination, negotiation, social dynamics",
85
+ "constraints": ["Theory of mind", "Commitment consistency", "Communication protocols"],
86
+ "failure_modes": ["Broken commitments", "Misaligned incentives", "Deadlocks"],
87
+ "examples": ["Social simulations", "Negotiation agents", "Game theory models"]
88
+ },
89
+ "Scientific": {
90
+ "description": "Experimental design, hypothesis testing, discovery",
91
+ "constraints": ["Reproducibility", "Causal validity", "Measurement precision"],
92
+ "failure_modes": ["Confounded variables", "Measurement error", "Overfitting"],
93
+ "examples": ["AI scientists", "Drug discovery", "Materials design"]
94
+ }
95
+ }
96
+ return regimes.get(regime, regimes["Physical"])
97
+
98
+ def explore_regime(regime):
99
+ """Explore a specific law regime."""
100
+ info = get_law_regime_info(regime)
101
+
102
+ output = f"## {regime} Law Regime\n\n"
103
+ output += f"**Description:** {info['description']}\n\n"
104
+
105
+ output += "**Key Constraints:**\n"
106
+ for c in info['constraints']:
107
+ output += f"- {c}\n"
108
+
109
+ output += f"\n**Common Failure Modes:**\n"
110
+ for f in info['failure_modes']:
111
+ output += f"- {f}\n"
112
+
113
+ output += f"\n**Example Systems:**\n"
114
+ for e in info['examples']:
115
+ output += f"- {e}\n"
116
+
117
+ return output
118
+
119
+ def simulate_rollout(level, regime, steps):
120
+ """Simulate a world model rollout at given level in given regime."""
121
+ np.random.seed(42)
122
+
123
+ # Simulate state evolution
124
+ states = []
125
+ current = 0.5
126
+
127
+ for i in range(steps):
128
+ # Different levels have different update dynamics
129
+ if level == "L1 Predictor":
130
+ # Simple next-step, no long-term coherence
131
+ noise = np.random.normal(0, 0.15)
132
+ current = np.clip(current + noise, 0, 1)
133
+ elif level == "L2 Simulator":
134
+ # Multi-step coherence with action conditioning
135
+ target = 0.7 if regime in ["Physical", "Digital"] else 0.5
136
+ drift = (target - current) * 0.1
137
+ noise = np.random.normal(0, 0.08)
138
+ current = np.clip(current + drift + noise, 0, 1)
139
+ else: # L3 Evolver
140
+ # Self-correcting based on prediction errors
141
+ target = 0.7 if regime in ["Physical", "Digital"] else 0.5
142
+ error = abs(target - current)
143
+ correction = (target - current) * 0.15 * (1 + error)
144
+ noise = np.random.normal(0, 0.05)
145
+ current = np.clip(current + correction + noise, 0, 1)
146
+
147
+ states.append(current)
148
+
149
+ # Create trajectory plot
150
+ fig, ax = plt.subplots(figsize=(10, 4))
151
+ ax.plot(range(len(states)), states, 'o-', linewidth=2, markersize=6, color='#6366f1')
152
+ ax.axhline(y=0.7 if regime in ["Physical", "Digital"] else 0.5,
153
+ color='red', linestyle='--', alpha=0.5, label='Target')
154
+ ax.set_xlabel('Step')
155
+ ax.set_ylabel('State Value')
156
+ ax.set_title(f'{level} Rollout in {regime} Regime ({steps} steps)')
157
+ ax.set_ylim(0, 1)
158
+ ax.legend()
159
+ ax.grid(True, alpha=0.3)
160
+
161
+ plt.tight_layout()
162
+ buf = BytesIO()
163
+ plt.savefig(buf, format='png', dpi=100)
164
+ buf.seek(0)
165
+ img_base64 = base64.b64encode(buf.read()).decode()
166
+ plt.close()
167
+
168
+ # Analysis
169
+ final_error = abs((0.7 if regime in ["Physical", "Digital"] else 0.5) - states[-1])
170
+ stability = np.std(states[-5:]) if len(states) >= 5 else np.std(states)
171
+
172
+ analysis = f"**Rollout Analysis**\n"
173
+ analysis += f"- Final state: {states[-1]:.3f}\n"
174
+ analysis += f"- Target error: {final_error:.3f}\n"
175
+ analysis += f"- Terminal stability (std): {stability:.3f}\n"
176
+ analysis += f"- Convergence: {'Yes' if final_error < 0.15 and stability < 0.1 else 'No'}\n"
177
+
178
+ return f"data:image/png;base64,{img_base64}", analysis
179
+
180
+ # Gradio Interface
181
+ def create_interface():
182
+ with gr.Blocks(title="Agentic World Model Explorer") as demo:
183
+ gr.Markdown("""
184
+ # 🤖 Agentic World Model Explorer
185
+
186
+ Interactive exploration of the "levels x laws" taxonomy from
187
+ [Agentic World Modeling: Foundations, Capabilities, Laws, and Beyond](https://huggingface.co/papers/2604.22748)
188
+ """)
189
+
190
+ with gr.Tab("Capability Levels"):
191
+ gr.Markdown("""
192
+ ### The Three Levels of World Models
193
+
194
+ - **L1 Predictor**: Learns one-step local transition operators
195
+ - **L2 Simulator**: Composes predictions into multi-step, action-conditioned rollouts
196
+ - **L3 Evolver**: Autonomously revises its model when predictions fail
197
+ """)
198
+
199
+ viz_btn = gr.Button("Generate Visualization")
200
+ viz_output = gr.Image(label="Generalization Comparison")
201
+
202
+ viz_btn.click(fn=visualize_capability_levels, outputs=viz_output)
203
+
204
+ with gr.Tab("Law Regimes"):
205
+ gr.Markdown("Explore the four governing-law regimes that determine world model constraints.")
206
+
207
+ regime_dropdown = gr.Dropdown(
208
+ choices=["Physical", "Digital", "Social", "Scientific"],
209
+ value="Physical",
210
+ label="Select Law Regime"
211
+ )
212
+ regime_output = gr.Markdown()
213
+
214
+ regime_dropdown.change(fn=explore_regime, inputs=regime_dropdown, outputs=regime_output)
215
+ # Initialize
216
+ demo.load(fn=lambda: explore_regime("Physical"), outputs=regime_output)
217
+
218
+ with gr.Tab("Rollout Simulation"):
219
+ gr.Markdown("Simulate world model rollouts at different capability levels.")
220
+
221
+ with gr.Row():
222
+ level_select = gr.Dropdown(
223
+ choices=["L1 Predictor", "L2 Simulator", "L3 Evolver"],
224
+ value="L2 Simulator",
225
+ label="Capability Level"
226
+ )
227
+ regime_select = gr.Dropdown(
228
+ choices=["Physical", "Digital", "Social", "Scientific"],
229
+ value="Physical",
230
+ label="Law Regime"
231
+ )
232
+ steps_slider = gr.Slider(10, 100, value=50, step=10, label="Steps")
233
+
234
+ simulate_btn = gr.Button("Run Simulation")
235
+
236
+ with gr.Row():
237
+ trajectory_plot = gr.Image(label="State Trajectory")
238
+ analysis_text = gr.Markdown()
239
+
240
+ simulate_btn.click(
241
+ fn=simulate_rollout,
242
+ inputs=[level_select, regime_select, steps_slider],
243
+ outputs=[trajectory_plot, analysis_text]
244
+ )
245
+
246
+ with gr.Tab("About"):
247
+ gr.Markdown("""
248
+ ### About This Demo
249
+
250
+ This Space provides an interactive exploration of key concepts from the Agentic World Modeling paper.
251
+
252
+ **Paper:** [2604.22748](https://huggingface.co/papers/2604.22748)
253
+
254
+ **Key Insight:** World models should be evaluated not just on next-step prediction accuracy,
255
+ but on their ability to maintain coherent multi-step rollouts that respect domain laws.
256
+
257
+ **Citation:**
258
+ ```
259
+ @article{chu2026agentic,
260
+ title={Agentic World Modeling: Foundations, Capabilities, Laws, and Beyond},
261
+ author={Chu, Meng and Zhang, Xuan and Lin, Kevin and Kong, Lingdong and others},
262
+ journal={arXiv preprint arXiv:2604.22748},
263
+ year={2026}
264
+ }
265
+ ```
266
+ """)
267
+
268
+ return demo
269
+
270
+ # Create and launch
271
+ if __name__ == "__main__":
272
+ demo = create_interface()
273
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio==4.36.0
2
+ huggingface_hub==0.25.2
3
+ numpy
4
+ matplotlib