Spaces:

AniketAsla
/

debatefloor

Running

File size: 30,355 Bytes

b4ac377

"""

Using Environments

==================



**Part 2 of 5** in the OpenEnv Getting Started Series



This notebook covers how to use OpenEnv environments: connecting to them,

creating AI policies, running evaluations, and working with different games.



.. note::

    **Time**: ~15 minutes | **Difficulty**: Beginner-Intermediate | **GPU Required**: No



What You'll Learn

-----------------



- **Connection Methods**: Hub, Docker, and direct URL connections

- **Available Environments**: OpenSpiel games, coding, browsing, and more

- **Creating Policies**: Random, heuristic, and learning-based strategies

- **Running Evaluations**: Measuring and comparing policy performance

"""

# %%
# Part 1: Setup
# -------------
#
# Let's set up our environment and imports.

import random
import subprocess
import sys
from pathlib import Path

import nest_asyncio
nest_asyncio.apply()

# Detect environment
try:
    import google.colab

    IN_COLAB = True
except ImportError:
    IN_COLAB = False

if IN_COLAB:
    print("=" * 70)
    print("   GOOGLE COLAB DETECTED - Installing OpenEnv...")
    print("=" * 70)

    subprocess.run(
        [sys.executable, "-m", "pip", "install", "-q", "openenv-core"],
        capture_output=True,
    )
    print("   OpenEnv installed!")
    print("=" * 70)
else:
    print("=" * 70)
    print("   RUNNING LOCALLY")
    print("=" * 70)

    # Add src and envs to path for local development
    src_path = Path.cwd().parent.parent.parent / "src"
    if src_path.exists():
        sys.path.insert(0, str(src_path))
    envs_path = Path.cwd().parent.parent.parent / "envs"
    if envs_path.exists():
        sys.path.insert(0, str(envs_path.parent))

    print("=" * 70)

print()

# %%
# Part 2: Available Environments
# ------------------------------
#
# OpenEnv includes a growing collection of environments for different RL tasks.
#
# OpenSpiel Games
# ~~~~~~~~~~~~~~~
#
# OpenSpiel (from DeepMind) provides 70+ game environments. OpenEnv wraps
# several of these:
#
# +------------------+-------------+------------------------------------------+
# | Game             | Players     | Description                              |
# +==================+=============+==========================================+
# | **Catch**        | 1           | Catch falling ball with paddle           |
# +------------------+-------------+------------------------------------------+
# | **2048**         | 1           | Slide tiles to combine numbers           |
# +------------------+-------------+------------------------------------------+
# | **Blackjack**    | 1           | Classic card game vs dealer              |
# +------------------+-------------+------------------------------------------+
# | **Cliff Walking**| 1           | Navigate grid, avoid cliffs              |
# +------------------+-------------+------------------------------------------+
# | **Tic-Tac-Toe**  | 2           | Classic 3x3 grid game                    |
# +------------------+-------------+------------------------------------------+
# | **Kuhn Poker**   | 2           | Simplified poker with 3 cards            |
# +------------------+-------------+------------------------------------------+
#
# Other Environment Types
# ~~~~~~~~~~~~~~~~~~~~~~~
#
# +------------------+--------------------------------------------------+
# | Environment      | Description                                      |
# +==================+==================================================+
# | **Coding Env**   | Execute and evaluate code solutions              |
# +------------------+--------------------------------------------------+
# | **BrowserGym**   | Web browsing and interaction                     |
# +------------------+--------------------------------------------------+
# | **TextArena**    | Text-based game environments                     |
# +------------------+--------------------------------------------------+
# | **Atari**        | Classic Atari 2600 games                         |
# +------------------+--------------------------------------------------+
# | **Snake**        | Classic snake game                               |
# +------------------+--------------------------------------------------+

# %%
# Part 3: Connecting to Environments
# ----------------------------------
#
# OpenEnv provides three ways to connect to environments.

print("=" * 70)
print("   CONNECTION METHODS")
print("=" * 70)

# Import the environment client
try:
    from openspiel_env.client import OpenSpielEnv
    from openspiel_env.models import OpenSpielAction, OpenSpielObservation, OpenSpielState

    IMPORTS_OK = True
    print("✓ Imports successful")
except ImportError as e:
    IMPORTS_OK = False
    print(f"✗ Import error: {e}")

# %%
# Method 1: From Hugging Face Hub
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# The easiest way to get started - automatically downloads and runs the container.
# Let's examine the actual method signature:

print("\n" + "-" * 70)
print("METHOD 1: FROM HUGGING FACE HUB")
print("-" * 70)

if IMPORTS_OK:
    import inspect

    if hasattr(OpenSpielEnv, "from_hub"):
        sig = inspect.signature(OpenSpielEnv.from_hub)
        print(f"\nSignature: OpenSpielEnv.from_hub{sig}")

        # Show docstring if available
        if OpenSpielEnv.from_hub.__doc__:
            doc_lines = OpenSpielEnv.from_hub.__doc__.strip().split("\n")[:3]
            print(f"Purpose: {doc_lines[0].strip()}")
    else:
        print("\nfrom_hub method not available in this version")

    print("\nUsage:")
    print("    env = OpenSpielEnv.from_hub('openenv/openspiel-env')")
    print("\nWhat happens:")
    print("    1. Pulls Docker image from HF registry")
    print("    2. Starts container on available port")
    print("    3. Connects via WebSocket")
    print("    4. Cleans up on close()")
else:
    print("\n(OpenEnv not installed - showing expected signature)")
    print("\nSignature: OpenSpielEnv.from_hub(repo_id, *, use_docker=True, ...)")

# %%
# Method 2: From Docker Image
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Use a locally built or pulled Docker image:

print("\n" + "-" * 70)
print("METHOD 2: FROM DOCKER IMAGE")
print("-" * 70)

if IMPORTS_OK:
    if hasattr(OpenSpielEnv, "from_docker_image"):
        sig = inspect.signature(OpenSpielEnv.from_docker_image)
        print(f"\nSignature: OpenSpielEnv.from_docker_image{sig}")

        if OpenSpielEnv.from_docker_image.__doc__:
            doc_lines = OpenSpielEnv.from_docker_image.__doc__.strip().split("\n")[:3]
            print(f"Purpose: {doc_lines[0].strip()}")
    else:
        print("\nfrom_docker_image method not available in this version")

    print("\nUsage:")
    print("    # Build image first:")
    print("    # docker build -t openspiel-env:latest ./envs/openspiel_env/server")
    print("    env = OpenSpielEnv.from_docker_image('openspiel-env:latest')")
else:
    print("\n(OpenEnv not installed - showing expected signature)")
    print("\nSignature: OpenSpielEnv.from_docker_image(image, provider=None, ...)")

# %%
# Method 3: Direct URL Connection
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Connect to an already-running server:

print("\n" + "-" * 70)
print("METHOD 3: DIRECT URL CONNECTION")
print("-" * 70)

if IMPORTS_OK:
    sig = inspect.signature(OpenSpielEnv.__init__)
    print(f"\nSignature: OpenSpielEnv{sig}")
    print("\nUsage:")
    print("    # Start server first:")
    print("    # docker run -p 8000:8000 openenv/openspiel-env:latest")
    print("    env = OpenSpielEnv(base_url='http://localhost:8000')")
    print("\nNote: Does NOT manage container lifecycle - you control the server")
else:
    print("\n(OpenEnv not installed - showing expected signature)")
    print("\nSignature: OpenSpielEnv(base_url, connect_timeout_s=10.0, ...)")

# %%
# Using Context Managers
# ~~~~~~~~~~~~~~~~~~~~~~
#
# Always use context managers to ensure proper cleanup. Let's verify the
# client supports the context manager protocol:

print("\n" + "-" * 70)
print("CONTEXT MANAGER SUPPORT")
print("-" * 70)

if IMPORTS_OK:
    has_enter = hasattr(OpenSpielEnv, "__enter__")
    has_exit = hasattr(OpenSpielEnv, "__exit__")
    print(f"\n__enter__ method: {'✓ Present' if has_enter else '✗ Missing'}")
    print(f"__exit__ method:  {'✓ Present' if has_exit else '✗ Missing'}")

    if has_enter and has_exit:
        print("\n✓ Context manager supported! Use with 'with' statement:")
        print("    with OpenSpielEnv(base_url='...') as env:")
        print("        result = env.reset()")
        print("        # ... use env ...")
        print("    # Automatically cleaned up")
else:
    print("\n(OpenEnv not installed)")
    print("Context managers are supported for automatic cleanup")

# %%
# Part 4: The Environment Loop
# ----------------------------
#
# Every OpenEnv interaction follows the same pattern:
#
# 1. ``reset()`` - Start a new episode
# 2. ``step(action)`` - Take action, get observation/reward
# 3. Repeat until ``done``
# 4. ``state()`` - Get episode metadata (optional)
#
# Let's demonstrate this with an actual episode:

print("=" * 70)
print("   THE ENVIRONMENT LOOP - LIVE DEMO")
print("=" * 70)
print()

# Run an actual demo episode
GRID_HEIGHT = 10
GRID_WIDTH = 5

# Create mock observation for demonstration
class DemoObservation:
    def __init__(self, info_state, legal_actions, done=False):
        self.info_state = info_state
        self.legal_actions = legal_actions
        self.done = done

class DemoResult:
    def __init__(self, observation, reward=0.0, done=False):
        self.observation = observation
        self.reward = reward
        self.done = done

# Initialize episode
ball_col = random.randint(0, GRID_WIDTH - 1)
paddle_col = GRID_WIDTH // 2

print(f"Episode Starting:")
print(f"  Ball column: {ball_col}")
print(f"  Paddle column: {paddle_col}")
print()

# Simulate the environment loop
step_count = 0
total_reward = 0.0

print("Step | Ball Row | Paddle | Action | Info State (first 10)")
print("-" * 65)

for ball_row in range(GRID_HEIGHT):
    # Build observation (same format as real OpenSpiel Catch)
    info_state = [0.0] * (GRID_HEIGHT * GRID_WIDTH)
    info_state[ball_row * GRID_WIDTH + ball_col] = 1.0  # Ball
    info_state[(GRID_HEIGHT - 1) * GRID_WIDTH + paddle_col] = 1.0  # Paddle

    obs = DemoObservation(info_state=info_state, legal_actions=[0, 1, 2])

    # Choose action (smart policy - move toward ball)
    if paddle_col < ball_col:
        action_id = 2  # RIGHT
    elif paddle_col > ball_col:
        action_id = 0  # LEFT
    else:
        action_id = 1  # STAY

    action_names = {0: "LEFT", 1: "STAY", 2: "RIGHT"}

    # Show state before action
    info_preview = [f"{v:.0f}" for v in info_state[:10]]
    print(f"  {step_count:2d}  |    {ball_row:2d}    |   {paddle_col}    | {action_names[action_id]:<5}  | {info_preview}")

    # Execute action
    if action_id == 0:
        paddle_col = max(0, paddle_col - 1)
    elif action_id == 2:
        paddle_col = min(GRID_WIDTH - 1, paddle_col + 1)

    step_count += 1

# Calculate final reward
caught = (paddle_col == ball_col)
reward = 1.0 if caught else 0.0

print("-" * 65)
print()
print(f"Episode Complete:")
print(f"  Steps: {step_count}")
print(f"  Ball landed at: column {ball_col}")
print(f"  Paddle position: column {paddle_col}")
print(f"  Reward: {reward}")
print(f"  Result: {'CAUGHT! ✓' if caught else 'MISSED! ✗'}")
print()
print("This is the exact same loop you'd run with a live server,")
print("just using local simulation for the game logic.")

# %%
# Part 5: Creating AI Policies
# ----------------------------
#
# A policy is a function that chooses actions based on observations.
# Let's create several policies of increasing sophistication.

import random
from typing import List
from dataclasses import dataclass


@dataclass
class PolicyResult:
    """Result of evaluating a policy."""

    name: str
    episodes: int
    wins: int
    total_reward: float
    avg_steps: float

    @property
    def win_rate(self) -> float:
        return self.wins / self.episodes if self.episodes > 0 else 0.0


# %%
# Policy 1: Random Policy
# ~~~~~~~~~~~~~~~~~~~~~~~
#
# The simplest policy - randomly choose from legal actions:


class RandomPolicy:
    """

    Random policy - baseline for comparison.



    Always picks a random action from the legal actions.

    Expected win rate for Catch: ~20% (1 in 5 columns)

    """

    name = "Random"

    def choose_action(self, observation) -> int:
        """Choose a random legal action."""
        return random.choice(observation.legal_actions)


# %%
# Policy 2: Heuristic Policy
# ~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# A hand-coded policy that uses domain knowledge:


class SmartCatchPolicy:
    """

    Smart heuristic policy for the Catch game.



    Tracks the ball position and moves paddle toward it.

    Expected win rate: ~100% (optimal for Catch)

    """

    name = "Smart (Heuristic)"

    def __init__(self, grid_width: int = 5):
        self.grid_width = grid_width

    def choose_action(self, observation) -> int:
        """Move paddle toward ball position."""
        info_state = observation.info_state
        grid_width = self.grid_width

        # Find ball position (first 1.0 in the grid, excluding last row)
        ball_col = None
        for idx, val in enumerate(info_state[:-grid_width]):
            if abs(val - 1.0) < 0.01:
                ball_col = idx % grid_width
                break

        # Find paddle position (1.0 in last row)
        last_row = info_state[-grid_width:]
        paddle_col = None
        for idx, val in enumerate(last_row):
            if abs(val - 1.0) < 0.01:
                paddle_col = idx
                break

        if ball_col is None or paddle_col is None:
            return 1  # STAY if can't determine positions

        # Move toward ball
        if paddle_col < ball_col:
            return 2  # RIGHT
        elif paddle_col > ball_col:
            return 0  # LEFT
        else:
            return 1  # STAY


# %%
# Policy 3: Epsilon-Greedy Policy
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Combines exploration (random) with exploitation (smart):


class EpsilonGreedyPolicy:
    """

    Epsilon-greedy policy - balances exploration and exploitation.



    With probability epsilon, takes random action (explore).

    Otherwise, uses smart policy (exploit).

    Epsilon decays over time to favor exploitation.

    """

    name = "Epsilon-Greedy"

    def __init__(self, epsilon: float = 0.3, decay: float = 0.99):
        self.epsilon = epsilon
        self.decay = decay
        self.smart_policy = SmartCatchPolicy()
        self.steps = 0

    def choose_action(self, observation) -> int:
        """Choose action with epsilon-greedy strategy."""
        self.steps += 1

        # Decay epsilon
        current_epsilon = self.epsilon * (self.decay**self.steps)

        if random.random() < current_epsilon:
            # Explore: random action
            return random.choice(observation.legal_actions)
        else:
            # Exploit: use smart policy
            return self.smart_policy.choose_action(observation)


# %%
# Policy 4: Always Stay Policy
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# A deliberately bad policy for comparison:


class AlwaysStayPolicy:
    """

    Always stay policy - deliberately bad baseline.



    Never moves the paddle. Only wins if ball lands on starting column.

    Expected win rate: ~20% (same as random)

    """

    name = "Always Stay"

    def choose_action(self, observation) -> int:
        """Always return STAY action."""
        return 1  # STAY


# %%
# Part 6: Running Evaluations
# ---------------------------
#
# Let's evaluate our policies! First, we'll create an evaluation function.


def evaluate_policy_live(

    policy,

    env,

    num_episodes: int = 50,

    game_name: str = "catch",

) -> PolicyResult:
    """

    Evaluate a policy against a live environment.



    Args:

        policy: Policy object with choose_action method

        env: Connected OpenSpielEnv client

        num_episodes: Number of episodes to run

        game_name: Name of the game to play



    Returns:

        PolicyResult with evaluation metrics

    """
    wins = 0
    total_reward = 0.0
    total_steps = 0

    for _ in range(num_episodes):
        result = env.reset()
        episode_steps = 0

        while not result.done:
            action_id = policy.choose_action(result.observation)
            action = OpenSpielAction(action_id=action_id, game_name=game_name)
            result = env.step(action)
            episode_steps += 1

        total_reward += result.reward if result.reward else 0
        total_steps += episode_steps
        if result.reward and result.reward > 0:
            wins += 1

    return PolicyResult(
        name=policy.name,
        episodes=num_episodes,
        wins=wins,
        total_reward=total_reward,
        avg_steps=total_steps / num_episodes,
    )


def evaluate_policy_simulated(

    policy,

    num_episodes: int = 50,

    grid_height: int = 10,

    grid_width: int = 5,

) -> PolicyResult:
    """

    Evaluate a policy using local simulation (no server needed).



    This simulates the Catch game locally for testing without a server.



    Args:

        policy: Policy object with choose_action method

        num_episodes: Number of episodes to run

        grid_height: Height of the game grid

        grid_width: Width of the game grid



    Returns:

        PolicyResult with evaluation metrics

    """
    wins = 0
    total_reward = 0.0
    total_steps = 0

    # Create a mock observation class
    class MockObservation:
        def __init__(self, info_state, legal_actions):
            self.info_state = info_state
            self.legal_actions = legal_actions

    for _ in range(num_episodes):
        # Initialize game
        ball_col = random.randint(0, grid_width - 1)
        paddle_col = grid_width // 2  # Start in center

        for step in range(grid_height):
            # Create observation
            info_state = [0.0] * (grid_height * grid_width)
            info_state[step * grid_width + ball_col] = 1.0  # Ball position
            info_state[(grid_height - 1) * grid_width + paddle_col] = 1.0  # Paddle

            observation = MockObservation(
                info_state=info_state, legal_actions=[0, 1, 2]
            )

            # Get action from policy
            action = policy.choose_action(observation)

            # Execute action
            if action == 0:  # LEFT
                paddle_col = max(0, paddle_col - 1)
            elif action == 2:  # RIGHT
                paddle_col = min(grid_width - 1, paddle_col + 1)
            # action == 1 is STAY, no movement

            total_steps += 1

        # Check if caught
        if paddle_col == ball_col:
            wins += 1
            total_reward += 1.0

    return PolicyResult(
        name=policy.name,
        episodes=num_episodes,
        wins=wins,
        total_reward=total_reward,
        avg_steps=total_steps / num_episodes,
    )


# %%
# Part 7: Policy Competition
# --------------------------
#
# Let's run a competition between all our policies!

# Create policy instances
policies = [
    RandomPolicy(),
    AlwaysStayPolicy(),
    SmartCatchPolicy(),
    EpsilonGreedyPolicy(epsilon=0.3),
]

# Check if we can connect to a live server
SERVER_URL = "http://localhost:8000"
USE_LIVE = False

if IMPORTS_OK:
    try:
        test_env = OpenSpielEnv(base_url=SERVER_URL)
        with test_env.sync() as client:
            pass  # Quick test to verify connection
        USE_LIVE = True
        print(f"✓ Connected to server at {SERVER_URL}")
    except Exception as e:
        USE_LIVE = False
        print(f"✗ No server running at {SERVER_URL}: {e}")

print("=" * 70)
if USE_LIVE:
    print("   POLICY COMPETITION - LIVE SERVER")
else:
    print("   POLICY COMPETITION - SIMULATION MODE")
print("=" * 70)
print()

NUM_EPISODES = 50
print(f"Running {NUM_EPISODES} episodes per policy...\n")

results = []

for policy in policies:
    print(f"  Evaluating {policy.name}...", end=" ", flush=True)

    if USE_LIVE:
        env = OpenSpielEnv(base_url=SERVER_URL)
        with env.sync() as client:
            result = evaluate_policy_live(policy, client, NUM_EPISODES)
    else:
        result = evaluate_policy_simulated(policy, NUM_EPISODES)

    results.append(result)
    print(f"Win rate: {result.win_rate * 100:.1f}%")

# %%
# Display Results
# ~~~~~~~~~~~~~~~

print()
print("=" * 70)
print("   FINAL RESULTS")
print("=" * 70)
print()

# Sort by win rate (descending)
results.sort(key=lambda r: r.win_rate, reverse=True)

# Display leaderboard
print(f"{'Rank':<6}{'Policy':<20}{'Win Rate':<12}{'Avg Steps':<12}{'Wins'}")
print("-" * 60)

for i, result in enumerate(results):
    rank = f"#{i + 1}"
    bar = "█" * int(result.win_rate * 20)
    print(
        f"{rank:<6}{result.name:<20}{result.win_rate * 100:>5.1f}%{'':<5}"
        f"{result.avg_steps:>6.1f}{'':<6}{result.wins}/{result.episodes}"
    )

print()
print("-" * 70)
print()
print("Key Insights:")
print("  • Random/AlwaysStay: ~20% (baseline - relies on luck)")
print("  • Smart Heuristic:   ~100% (optimal for Catch)")
print("  • Epsilon-Greedy:    ~85%+ (balances exploration/exploitation)")
print()

# %%
# Part 8: Working with Different Games
# ------------------------------------
#
# OpenSpiel supports multiple games. Let's create actual action instances
# for different games and examine their structure:

print("=" * 70)
print("   SWITCHING GAMES - ACTUAL ACTION INSTANCES")
print("=" * 70)
print()

# Create actual action instances for different games
if IMPORTS_OK:
    from openspiel_env.models import OpenSpielAction as ActionModel

    # Catch actions
    print("CATCH GAME ACTIONS:")
    print("-" * 40)
    catch_actions = {
        0: "Move LEFT",
        1: "STAY in place",
        2: "Move RIGHT",
    }
    for action_id, description in catch_actions.items():
        action = ActionModel(action_id=action_id, game_name="catch")
        print(f"  {action}  # {description}")

    print()

    # 2048 actions
    print("2048 GAME ACTIONS:")
    print("-" * 40)
    game_2048_actions = {
        0: "Slide UP",
        1: "Slide RIGHT",
        2: "Slide DOWN",
        3: "Slide LEFT",
    }
    for action_id, description in game_2048_actions.items():
        action = ActionModel(action_id=action_id, game_name="2048")
        print(f"  {action}  # {description}")

    print()

    # Tic-Tac-Toe actions
    print("TIC-TAC-TOE ACTIONS:")
    print("-" * 40)
    print("  Grid positions 0-8 (left-to-right, top-to-bottom):")
    print("    0 | 1 | 2")
    print("   ---|---|---")
    print("    3 | 4 | 5")
    print("   ---|---|---")
    print("    6 | 7 | 8")
    print()
    # Show a few examples
    for pos in [0, 4, 8]:
        action = ActionModel(action_id=pos, game_name="tic_tac_toe")
        corner = {0: "top-left", 4: "center", 8: "bottom-right"}[pos]
        print(f"  {action}  # {corner}")

    print()

    # Blackjack actions
    print("BLACKJACK ACTIONS:")
    print("-" * 40)
    blackjack_actions = {
        0: "STAND (keep current hand)",
        1: "HIT (request another card)",
    }
    for action_id, description in blackjack_actions.items():
        action = ActionModel(action_id=action_id, game_name="blackjack")
        print(f"  {action}  # {description}")

else:
    # Fallback using dataclass
    from dataclasses import dataclass

    @dataclass
    class ActionDemo:
        action_id: int
        game_name: str

    print("CATCH GAME ACTIONS:")
    print("-" * 40)
    for action_id, desc in [(0, "LEFT"), (1, "STAY"), (2, "RIGHT")]:
        print(f"  ActionDemo(action_id={action_id}, game_name='catch')  # {desc}")

    print()
    print("2048 GAME ACTIONS:")
    print("-" * 40)
    for action_id, desc in [(0, "UP"), (1, "RIGHT"), (2, "DOWN"), (3, "LEFT")]:
        print(f"  ActionDemo(action_id={action_id}, game_name='2048')  # {desc}")

print()
print("-" * 70)
print("Each game has its own action space - check legal_actions in observation!")

# %%
# Part 9: Multi-Player Games
# --------------------------
#
# Some games like Tic-Tac-Toe and Kuhn Poker support multiple players.
# Let's create actual observation instances to understand the structure:

print("=" * 70)
print("   MULTI-PLAYER GAMES - OBSERVATION STRUCTURE")
print("=" * 70)
print()

# Create observation instances for multi-player games
if IMPORTS_OK:
    from openspiel_env.models import OpenSpielObservation as ObsModel

    # Single-player observation (like Catch)
    print("SINGLE-PLAYER OBSERVATION (Catch):")
    print("-" * 50)
    single_player_obs = ObsModel(
        info_state=[0.0, 0.0, 1.0, 0.0, 0.0] + [0.0] * 45,
        legal_actions=[0, 1, 2],
        game_phase="playing",
        current_player_id=0,
        opponent_last_action=None,
    )
    print(f"  current_player_id:   {single_player_obs.current_player_id}  # Always 0 (you)")
    print(f"  opponent_last_action: {single_player_obs.opponent_last_action}  # None (no opponent)")
    print(f"  legal_actions:       {single_player_obs.legal_actions}")
    print(f"  game_phase:          {single_player_obs.game_phase!r}")
    print()

    # Multi-player observation - your turn (like Tic-Tac-Toe)
    print("MULTI-PLAYER OBSERVATION (Tic-Tac-Toe, YOUR turn):")
    print("-" * 50)
    your_turn_obs = ObsModel(
        info_state=[1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0],  # X at 0, O at 4
        legal_actions=[1, 2, 3, 5, 6, 7, 8],  # Available positions
        game_phase="playing",
        current_player_id=0,  # Your turn!
        opponent_last_action=4,  # Opponent played center
    )
    print(f"  current_player_id:   {your_turn_obs.current_player_id}  # 0 = YOUR turn")
    print(f"  opponent_last_action: {your_turn_obs.opponent_last_action}  # Opponent played position 4 (center)")
    print(f"  legal_actions:       {your_turn_obs.legal_actions}")
    print(f"  game_phase:          {your_turn_obs.game_phase!r}")
    print()

    # Multi-player observation - opponent's turn
    print("MULTI-PLAYER OBSERVATION (Tic-Tac-Toe, OPPONENT's turn):")
    print("-" * 50)
    opponent_turn_obs = ObsModel(
        info_state=[1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 1.0],  # X at 0,8; O at 4
        legal_actions=[],  # No actions available when it's opponent's turn
        game_phase="playing",
        current_player_id=1,  # Opponent's turn
        opponent_last_action=None,  # Will be set after they move
    )
    print(f"  current_player_id:   {opponent_turn_obs.current_player_id}  # 1 = OPPONENT's turn")
    print(f"  legal_actions:       {opponent_turn_obs.legal_actions}  # Empty - wait for opponent")
    print(f"  game_phase:          {opponent_turn_obs.game_phase!r}")
    print()

    # Terminal state observation
    print("TERMINAL OBSERVATION (Game Over):")
    print("-" * 50)
    terminal_obs = ObsModel(
        info_state=[1.0, 1.0, 1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0],  # X wins top row
        legal_actions=[],  # No more moves
        game_phase="terminal",
        current_player_id=-1,  # No current player
        opponent_last_action=4,
    )
    print(f"  current_player_id:   {terminal_obs.current_player_id}  # -1 = Game over")
    print(f"  game_phase:          {terminal_obs.game_phase!r}")
    print(f"  legal_actions:       {terminal_obs.legal_actions}  # Empty - game ended")

else:
    # Fallback demonstration
    from dataclasses import dataclass
    from typing import List, Optional

    @dataclass
    class ObsDemo:
        current_player_id: int
        opponent_last_action: Optional[int]
        legal_actions: List[int]
        game_phase: str

    print("SINGLE-PLAYER (Catch):")
    print(f"  current_player_id: 0  # Always your turn")
    print(f"  opponent_last_action: None")
    print()

    print("MULTI-PLAYER - YOUR TURN (Tic-Tac-Toe):")
    print(f"  current_player_id: 0  # 0 = your turn")
    print(f"  opponent_last_action: 4  # What opponent just played")
    print(f"  legal_actions: [1, 2, 3, 5, 6, 7, 8]  # Available moves")
    print()

    print("MULTI-PLAYER - OPPONENT'S TURN:")
    print(f"  current_player_id: 1  # Wait for opponent")
    print(f"  legal_actions: []  # Can't move during opponent's turn")

print()
print("-" * 70)
print("KEY INSIGHT: Only act when current_player_id == 0 (your turn)!")
print("The environment automatically handles opponent moves.")

# %%
# Summary
# -------
#
# In this notebook, you learned:
#
# **Connection Methods:**
#
# - ``from_hub()`` - Auto-download from Hugging Face
# - ``from_docker_image()`` - Use local Docker image
# - Direct URL - Connect to running server
#
# **Creating Policies:**
#
# - Random: Baseline comparison
# - Heuristic: Domain knowledge encoded
# - Epsilon-Greedy: Balance exploration/exploitation
#
# **Running Evaluations:**
#
# - Measure win rates and rewards
# - Compare policy performance
# - Run competitions
#
# **Multi-Game Support:**
#
# - Switch games via ``game_name`` parameter
# - Handle multi-player games
# - Work with different action spaces
#
# Next Steps
# ----------
#
# **Continue to Notebook 3: Building & Sharing Environments**
#
# In the next notebook, you'll:
#
# - Create your own custom environment
# - Package it with Docker
# - Deploy to Hugging Face Hub
# - Share with the community