Spaces:

O96a
/

agentic-world-model-explorer

Sleeping

App Files Files Community

agentic-world-model-explorer / app.py

O96a

Upload folder using huggingface_hub

c619928 verified 10 days ago

raw

history blame contribute delete

10.9 kB

	"""
	Agentic World Model Explorer
	Interactive demo of world model capability levels and law regimes
	Based on: Agentic World Modeling: Foundations, Capabilities, Laws, and Beyond (2604.22748)
	"""

	import gradio as gr
	import numpy as np
	import matplotlib
	matplotlib.use('Agg')
	import matplotlib.pyplot as plt
	from io import BytesIO
	import base64

	# Lazy loading - no heavy imports at module level
	_MODEL = None

	def get_demo_data():
	"""Returns demo data for world model visualization."""
	# Simulated: prediction accuracy across capability levels
	levels = ['L1\nPredictor', 'L2\nSimulator', 'L3\nEvolver']
	in_dist = [0.92, 0.89, 0.87] # In-distribution performance
	out_dist = [0.45, 0.72, 0.78] # Out-of-distribution (compositional)
	return levels, in_dist, out_dist

	def visualize_capability_levels():
	"""Create visualization of capability levels vs generalization."""
	levels, in_dist, out_dist = get_demo_data()

	fig, ax = plt.subplots(figsize=(10, 6))
	x = np.arange(len(levels))
	width = 0.35

	bars1 = ax.bar(x - width/2, in_dist, width, label='In-Distribution', color='#6366f1')
	bars2 = ax.bar(x + width/2, out_dist, width, label='Out-of-Distribution', color='#a855f7')

	ax.set_ylabel('Accuracy')
	ax.set_title('World Model Capability Levels: Generalization Gap')
	ax.set_xticks(x)
	ax.set_xticklabels(levels)
	ax.legend()
	ax.set_ylim(0, 1.0)

	# Add value labels
	for bar in bars1:
	height = bar.get_height()
	ax.annotate(f'{height:.2f}',
	xy=(bar.get_x() + bar.get_width() / 2, height),
	xytext=(0, 3), textcoords="offset points",
	ha='center', va='bottom', fontsize=9)
	for bar in bars2:
	height = bar.get_height()
	ax.annotate(f'{height:.2f}',
	xy=(bar.get_x() + bar.get_width() / 2, height),
	xytext=(0, 3), textcoords="offset points",
	ha='center', va='bottom', fontsize=9)

	plt.tight_layout()

	buf = BytesIO()
	plt.savefig(buf, format='png', dpi=100)
	buf.seek(0)
	img_base64 = base64.b64encode(buf.read()).decode()
	plt.close()

	return f"data:image/png;base64,{img_base64}"

	def get_law_regime_info(regime):
	"""Get information about a specific law regime."""
	regimes = {
	"Physical": {
	"description": "Object manipulation, physics simulation, robotics",
	"constraints": ["Newtonian mechanics", "Object permanence", "Collision dynamics"],
	"failure_modes": ["Impossible object states", "Gravity violations", "Penetration errors"],
	"examples": ["Minecraft agents", "Robotic manipulation", "Physics simulators"]
	},
	"Digital": {
	"description": "Web agents, GUI automation, software interaction",
	"constraints": ["API contracts", "UI state consistency", "Action preconditions"],
	"failure_modes": ["Invalid actions", "State desync", "Missing element refs"],
	"examples": ["WebArena", "OSWorld", "Computer-Using Agents"]
	},
	"Social": {
	"description": "Multi-agent coordination, negotiation, social dynamics",
	"constraints": ["Theory of mind", "Commitment consistency", "Communication protocols"],
	"failure_modes": ["Broken commitments", "Misaligned incentives", "Deadlocks"],
	"examples": ["Social simulations", "Negotiation agents", "Game theory models"]
	},
	"Scientific": {
	"description": "Experimental design, hypothesis testing, discovery",
	"constraints": ["Reproducibility", "Causal validity", "Measurement precision"],
	"failure_modes": ["Confounded variables", "Measurement error", "Overfitting"],
	"examples": ["AI scientists", "Drug discovery", "Materials design"]
	}
	}
	return regimes.get(regime, regimes["Physical"])

	def explore_regime(regime):
	"""Explore a specific law regime."""
	info = get_law_regime_info(regime)

	output = f"## {regime} Law Regime\n\n"
	output += f"Description: {info['description']}\n\n"

	output += "Key Constraints:\n"
	for c in info['constraints']:
	output += f"- {c}\n"

	output += f"\nCommon Failure Modes:\n"
	for f in info['failure_modes']:
	output += f"- {f}\n"

	output += f"\nExample Systems:\n"
	for e in info['examples']:
	output += f"- {e}\n"

	return output

	def simulate_rollout(level, regime, steps):
	"""Simulate a world model rollout at given level in given regime."""
	np.random.seed(42)

	# Simulate state evolution
	states = []
	current = 0.5

	for i in range(steps):
	# Different levels have different update dynamics
	if level == "L1 Predictor":
	# Simple next-step, no long-term coherence
	noise = np.random.normal(0, 0.15)
	current = np.clip(current + noise, 0, 1)
	elif level == "L2 Simulator":
	# Multi-step coherence with action conditioning
	target = 0.7 if regime in ["Physical", "Digital"] else 0.5
	drift = (target - current) * 0.1
	noise = np.random.normal(0, 0.08)
	current = np.clip(current + drift + noise, 0, 1)
	else: # L3 Evolver
	# Self-correcting based on prediction errors
	target = 0.7 if regime in ["Physical", "Digital"] else 0.5
	error = abs(target - current)
	correction = (target - current) * 0.15 * (1 + error)
	noise = np.random.normal(0, 0.05)
	current = np.clip(current + correction + noise, 0, 1)

	states.append(current)

	# Create trajectory plot
	fig, ax = plt.subplots(figsize=(10, 4))
	ax.plot(range(len(states)), states, 'o-', linewidth=2, markersize=6, color='#6366f1')
	ax.axhline(y=0.7 if regime in ["Physical", "Digital"] else 0.5,
	color='red', linestyle='--', alpha=0.5, label='Target')
	ax.set_xlabel('Step')
	ax.set_ylabel('State Value')
	ax.set_title(f'{level} Rollout in {regime} Regime ({steps} steps)')
	ax.set_ylim(0, 1)
	ax.legend()
	ax.grid(True, alpha=0.3)

	plt.tight_layout()
	buf = BytesIO()
	plt.savefig(buf, format='png', dpi=100)
	buf.seek(0)
	img_base64 = base64.b64encode(buf.read()).decode()
	plt.close()

	# Analysis
	final_error = abs((0.7 if regime in ["Physical", "Digital"] else 0.5) - states[-1])
	stability = np.std(states[-5:]) if len(states) >= 5 else np.std(states)

	analysis = f"Rollout Analysis\n"
	analysis += f"- Final state: {states[-1]:.3f}\n"
	analysis += f"- Target error: {final_error:.3f}\n"
	analysis += f"- Terminal stability (std): {stability:.3f}\n"
	analysis += f"- Convergence: {'Yes' if final_error < 0.15 and stability < 0.1 else 'No'}\n"

	return f"data:image/png;base64,{img_base64}", analysis

	# Gradio Interface
	def create_interface():
	with gr.Blocks(title="Agentic World Model Explorer") as demo:
	gr.Markdown("""
	# 🤖 Agentic World Model Explorer

	Interactive exploration of the "levels x laws" taxonomy from
	[Agentic World Modeling: Foundations, Capabilities, Laws, and Beyond](https://huggingface.co/papers/2604.22748)
	""")

	with gr.Tab("Capability Levels"):
	gr.Markdown("""
	### The Three Levels of World Models

	- L1 Predictor: Learns one-step local transition operators
	- L2 Simulator: Composes predictions into multi-step, action-conditioned rollouts
	- L3 Evolver: Autonomously revises its model when predictions fail
	""")

	viz_btn = gr.Button("Generate Visualization")
	viz_output = gr.Image(label="Generalization Comparison")

	viz_btn.click(fn=visualize_capability_levels, outputs=viz_output)

	with gr.Tab("Law Regimes"):
	gr.Markdown("Explore the four governing-law regimes that determine world model constraints.")

	regime_dropdown = gr.Dropdown(
	choices=["Physical", "Digital", "Social", "Scientific"],
	value="Physical",
	label="Select Law Regime"
	)
	regime_output = gr.Markdown()

	regime_dropdown.change(fn=explore_regime, inputs=regime_dropdown, outputs=regime_output)
	# Initialize
	demo.load(fn=lambda: explore_regime("Physical"), outputs=regime_output)

	with gr.Tab("Rollout Simulation"):
	gr.Markdown("Simulate world model rollouts at different capability levels.")

	with gr.Row():
	level_select = gr.Dropdown(
	choices=["L1 Predictor", "L2 Simulator", "L3 Evolver"],
	value="L2 Simulator",
	label="Capability Level"
	)
	regime_select = gr.Dropdown(
	choices=["Physical", "Digital", "Social", "Scientific"],
	value="Physical",
	label="Law Regime"
	)
	steps_slider = gr.Slider(10, 100, value=50, step=10, label="Steps")

	simulate_btn = gr.Button("Run Simulation")

	with gr.Row():
	trajectory_plot = gr.Image(label="State Trajectory")
	analysis_text = gr.Markdown()

	simulate_btn.click(
	fn=simulate_rollout,
	inputs=[level_select, regime_select, steps_slider],
	outputs=[trajectory_plot, analysis_text]
	)

	with gr.Tab("About"):
	gr.Markdown("""
	### About This Demo

	This Space provides an interactive exploration of key concepts from the Agentic World Modeling paper.

	Paper: [2604.22748](https://huggingface.co/papers/2604.22748)

	Key Insight: World models should be evaluated not just on next-step prediction accuracy,
	but on their ability to maintain coherent multi-step rollouts that respect domain laws.

	Citation:
	```
	@article{chu2026agentic,
	title={Agentic World Modeling: Foundations, Capabilities, Laws, and Beyond},
	author={Chu, Meng and Zhang, Xuan and Lin, Kevin and Kong, Lingdong and others},
	journal={arXiv preprint arXiv:2604.22748},
	year={2026}
	}
	```
	""")

	return demo

	# Create and launch
	if __name__ == "__main__":
	demo = create_interface()
	demo.launch()