"""
Agentic World Model Explorer
Interactive demo of world model capability levels and law regimes
Based on: Agentic World Modeling: Foundations, Capabilities, Laws, and Beyond (2604.22748)
"""

import gradio as gr
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from io import BytesIO
import base64

# Lazy loading - no heavy imports at module level
_MODEL = None

def get_demo_data():
    """Returns demo data for world model visualization."""
    # Simulated: prediction accuracy across capability levels
    levels = ['L1\nPredictor', 'L2\nSimulator', 'L3\nEvolver']
    in_dist = [0.92, 0.89, 0.87]  # In-distribution performance
    out_dist = [0.45, 0.72, 0.78]  # Out-of-distribution (compositional)
    return levels, in_dist, out_dist

def visualize_capability_levels():
    """Create visualization of capability levels vs generalization."""
    levels, in_dist, out_dist = get_demo_data()
    
    fig, ax = plt.subplots(figsize=(10, 6))
    x = np.arange(len(levels))
    width = 0.35
    
    bars1 = ax.bar(x - width/2, in_dist, width, label='In-Distribution', color='#6366f1')
    bars2 = ax.bar(x + width/2, out_dist, width, label='Out-of-Distribution', color='#a855f7')
    
    ax.set_ylabel('Accuracy')
    ax.set_title('World Model Capability Levels: Generalization Gap')
    ax.set_xticks(x)
    ax.set_xticklabels(levels)
    ax.legend()
    ax.set_ylim(0, 1.0)
    
    # Add value labels
    for bar in bars1:
        height = bar.get_height()
        ax.annotate(f'{height:.2f}',
                    xy=(bar.get_x() + bar.get_width() / 2, height),
                    xytext=(0, 3), textcoords="offset points",
                    ha='center', va='bottom', fontsize=9)
    for bar in bars2:
        height = bar.get_height()
        ax.annotate(f'{height:.2f}',
                    xy=(bar.get_x() + bar.get_width() / 2, height),
                    xytext=(0, 3), textcoords="offset points",
                    ha='center', va='bottom', fontsize=9)
    
    plt.tight_layout()
    
    buf = BytesIO()
    plt.savefig(buf, format='png', dpi=100)
    buf.seek(0)
    img_base64 = base64.b64encode(buf.read()).decode()
    plt.close()
    
    return f"data:image/png;base64,{img_base64}"

def get_law_regime_info(regime):
    """Get information about a specific law regime."""
    regimes = {
        "Physical": {
            "description": "Object manipulation, physics simulation, robotics",
            "constraints": ["Newtonian mechanics", "Object permanence", "Collision dynamics"],
            "failure_modes": ["Impossible object states", "Gravity violations", "Penetration errors"],
            "examples": ["Minecraft agents", "Robotic manipulation", "Physics simulators"]
        },
        "Digital": {
            "description": "Web agents, GUI automation, software interaction",
            "constraints": ["API contracts", "UI state consistency", "Action preconditions"],
            "failure_modes": ["Invalid actions", "State desync", "Missing element refs"],
            "examples": ["WebArena", "OSWorld", "Computer-Using Agents"]
        },
        "Social": {
            "description": "Multi-agent coordination, negotiation, social dynamics",
            "constraints": ["Theory of mind", "Commitment consistency", "Communication protocols"],
            "failure_modes": ["Broken commitments", "Misaligned incentives", "Deadlocks"],
            "examples": ["Social simulations", "Negotiation agents", "Game theory models"]
        },
        "Scientific": {
            "description": "Experimental design, hypothesis testing, discovery",
            "constraints": ["Reproducibility", "Causal validity", "Measurement precision"],
            "failure_modes": ["Confounded variables", "Measurement error", "Overfitting"],
            "examples": ["AI scientists", "Drug discovery", "Materials design"]
        }
    }
    return regimes.get(regime, regimes["Physical"])

def explore_regime(regime):
    """Explore a specific law regime."""
    info = get_law_regime_info(regime)
    
    output = f"## {regime} Law Regime\n\n"
    output += f"**Description:** {info['description']}\n\n"
    
    output += "**Key Constraints:**\n"
    for c in info['constraints']:
        output += f"- {c}\n"
    
    output += f"\n**Common Failure Modes:**\n"
    for f in info['failure_modes']:
        output += f"- {f}\n"
    
    output += f"\n**Example Systems:**\n"
    for e in info['examples']:
        output += f"- {e}\n"
    
    return output

def simulate_rollout(level, regime, steps):
    """Simulate a world model rollout at given level in given regime."""
    np.random.seed(42)
    
    # Simulate state evolution
    states = []
    current = 0.5
    
    for i in range(steps):
        # Different levels have different update dynamics
        if level == "L1 Predictor":
            # Simple next-step, no long-term coherence
            noise = np.random.normal(0, 0.15)
            current = np.clip(current + noise, 0, 1)
        elif level == "L2 Simulator":
            # Multi-step coherence with action conditioning
            target = 0.7 if regime in ["Physical", "Digital"] else 0.5
            drift = (target - current) * 0.1
            noise = np.random.normal(0, 0.08)
            current = np.clip(current + drift + noise, 0, 1)
        else:  # L3 Evolver
            # Self-correcting based on prediction errors
            target = 0.7 if regime in ["Physical", "Digital"] else 0.5
            error = abs(target - current)
            correction = (target - current) * 0.15 * (1 + error)
            noise = np.random.normal(0, 0.05)
            current = np.clip(current + correction + noise, 0, 1)
        
        states.append(current)
    
    # Create trajectory plot
    fig, ax = plt.subplots(figsize=(10, 4))
    ax.plot(range(len(states)), states, 'o-', linewidth=2, markersize=6, color='#6366f1')
    ax.axhline(y=0.7 if regime in ["Physical", "Digital"] else 0.5, 
               color='red', linestyle='--', alpha=0.5, label='Target')
    ax.set_xlabel('Step')
    ax.set_ylabel('State Value')
    ax.set_title(f'{level} Rollout in {regime} Regime ({steps} steps)')
    ax.set_ylim(0, 1)
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    buf = BytesIO()
    plt.savefig(buf, format='png', dpi=100)
    buf.seek(0)
    img_base64 = base64.b64encode(buf.read()).decode()
    plt.close()
    
    # Analysis
    final_error = abs((0.7 if regime in ["Physical", "Digital"] else 0.5) - states[-1])
    stability = np.std(states[-5:]) if len(states) >= 5 else np.std(states)
    
    analysis = f"**Rollout Analysis**\n"
    analysis += f"- Final state: {states[-1]:.3f}\n"
    analysis += f"- Target error: {final_error:.3f}\n"
    analysis += f"- Terminal stability (std): {stability:.3f}\n"
    analysis += f"- Convergence: {'Yes' if final_error < 0.15 and stability < 0.1 else 'No'}\n"
    
    return f"data:image/png;base64,{img_base64}", analysis

# Gradio Interface
def create_interface():
    with gr.Blocks(title="Agentic World Model Explorer") as demo:
        gr.Markdown("""
        # 🤖 Agentic World Model Explorer
        
        Interactive exploration of the "levels x laws" taxonomy from 
        [Agentic World Modeling: Foundations, Capabilities, Laws, and Beyond](https://huggingface.co/papers/2604.22748)
        """)
        
        with gr.Tab("Capability Levels"):
            gr.Markdown("""
            ### The Three Levels of World Models
            
            - **L1 Predictor**: Learns one-step local transition operators
            - **L2 Simulator**: Composes predictions into multi-step, action-conditioned rollouts
            - **L3 Evolver**: Autonomously revises its model when predictions fail
            """)
            
            viz_btn = gr.Button("Generate Visualization")
            viz_output = gr.Image(label="Generalization Comparison")
            
            viz_btn.click(fn=visualize_capability_levels, outputs=viz_output)
        
        with gr.Tab("Law Regimes"):
            gr.Markdown("Explore the four governing-law regimes that determine world model constraints.")
            
            regime_dropdown = gr.Dropdown(
                choices=["Physical", "Digital", "Social", "Scientific"],
                value="Physical",
                label="Select Law Regime"
            )
            regime_output = gr.Markdown()
            
            regime_dropdown.change(fn=explore_regime, inputs=regime_dropdown, outputs=regime_output)
            # Initialize
            demo.load(fn=lambda: explore_regime("Physical"), outputs=regime_output)
        
        with gr.Tab("Rollout Simulation"):
            gr.Markdown("Simulate world model rollouts at different capability levels.")
            
            with gr.Row():
                level_select = gr.Dropdown(
                    choices=["L1 Predictor", "L2 Simulator", "L3 Evolver"],
                    value="L2 Simulator",
                    label="Capability Level"
                )
                regime_select = gr.Dropdown(
                    choices=["Physical", "Digital", "Social", "Scientific"],
                    value="Physical",
                    label="Law Regime"
                )
                steps_slider = gr.Slider(10, 100, value=50, step=10, label="Steps")
            
            simulate_btn = gr.Button("Run Simulation")
            
            with gr.Row():
                trajectory_plot = gr.Image(label="State Trajectory")
                analysis_text = gr.Markdown()
            
            simulate_btn.click(
                fn=simulate_rollout,
                inputs=[level_select, regime_select, steps_slider],
                outputs=[trajectory_plot, analysis_text]
            )
        
        with gr.Tab("About"):
            gr.Markdown("""
            ### About This Demo
            
            This Space provides an interactive exploration of key concepts from the Agentic World Modeling paper.
            
            **Paper:** [2604.22748](https://huggingface.co/papers/2604.22748)
            
            **Key Insight:** World models should be evaluated not just on next-step prediction accuracy, 
            but on their ability to maintain coherent multi-step rollouts that respect domain laws.
            
            **Citation:**
            ```
            @article{chu2026agentic,
              title={Agentic World Modeling: Foundations, Capabilities, Laws, and Beyond},
              author={Chu, Meng and Zhang, Xuan and Lin, Kevin and Kong, Lingdong and others},
              journal={arXiv preprint arXiv:2604.22748},
              year={2026}
            }
            ```
            """)
    
    return demo

# Create and launch
if __name__ == "__main__":
    demo = create_interface()
    demo.launch()