debatefloor / docs /source /getting_started /plot_02_using_environments.py
AniketAsla's picture
sync: mirror git d05fcb5 to Space
b4ac377 verified
"""
Using Environments
==================
**Part 2 of 5** in the OpenEnv Getting Started Series
This notebook covers how to use OpenEnv environments: connecting to them,
creating AI policies, running evaluations, and working with different games.
.. note::
**Time**: ~15 minutes | **Difficulty**: Beginner-Intermediate | **GPU Required**: No
What You'll Learn
-----------------
- **Connection Methods**: Hub, Docker, and direct URL connections
- **Available Environments**: OpenSpiel games, coding, browsing, and more
- **Creating Policies**: Random, heuristic, and learning-based strategies
- **Running Evaluations**: Measuring and comparing policy performance
"""
# %%
# Part 1: Setup
# -------------
#
# Let's set up our environment and imports.
import random
import subprocess
import sys
from pathlib import Path
import nest_asyncio
nest_asyncio.apply()
# Detect environment
try:
import google.colab
IN_COLAB = True
except ImportError:
IN_COLAB = False
if IN_COLAB:
print("=" * 70)
print(" GOOGLE COLAB DETECTED - Installing OpenEnv...")
print("=" * 70)
subprocess.run(
[sys.executable, "-m", "pip", "install", "-q", "openenv-core"],
capture_output=True,
)
print(" OpenEnv installed!")
print("=" * 70)
else:
print("=" * 70)
print(" RUNNING LOCALLY")
print("=" * 70)
# Add src and envs to path for local development
src_path = Path.cwd().parent.parent.parent / "src"
if src_path.exists():
sys.path.insert(0, str(src_path))
envs_path = Path.cwd().parent.parent.parent / "envs"
if envs_path.exists():
sys.path.insert(0, str(envs_path.parent))
print("=" * 70)
print()
# %%
# Part 2: Available Environments
# ------------------------------
#
# OpenEnv includes a growing collection of environments for different RL tasks.
#
# OpenSpiel Games
# ~~~~~~~~~~~~~~~
#
# OpenSpiel (from DeepMind) provides 70+ game environments. OpenEnv wraps
# several of these:
#
# +------------------+-------------+------------------------------------------+
# | Game | Players | Description |
# +==================+=============+==========================================+
# | **Catch** | 1 | Catch falling ball with paddle |
# +------------------+-------------+------------------------------------------+
# | **2048** | 1 | Slide tiles to combine numbers |
# +------------------+-------------+------------------------------------------+
# | **Blackjack** | 1 | Classic card game vs dealer |
# +------------------+-------------+------------------------------------------+
# | **Cliff Walking**| 1 | Navigate grid, avoid cliffs |
# +------------------+-------------+------------------------------------------+
# | **Tic-Tac-Toe** | 2 | Classic 3x3 grid game |
# +------------------+-------------+------------------------------------------+
# | **Kuhn Poker** | 2 | Simplified poker with 3 cards |
# +------------------+-------------+------------------------------------------+
#
# Other Environment Types
# ~~~~~~~~~~~~~~~~~~~~~~~
#
# +------------------+--------------------------------------------------+
# | Environment | Description |
# +==================+==================================================+
# | **Coding Env** | Execute and evaluate code solutions |
# +------------------+--------------------------------------------------+
# | **BrowserGym** | Web browsing and interaction |
# +------------------+--------------------------------------------------+
# | **TextArena** | Text-based game environments |
# +------------------+--------------------------------------------------+
# | **Atari** | Classic Atari 2600 games |
# +------------------+--------------------------------------------------+
# | **Snake** | Classic snake game |
# +------------------+--------------------------------------------------+
# %%
# Part 3: Connecting to Environments
# ----------------------------------
#
# OpenEnv provides three ways to connect to environments.
print("=" * 70)
print(" CONNECTION METHODS")
print("=" * 70)
# Import the environment client
try:
from openspiel_env.client import OpenSpielEnv
from openspiel_env.models import OpenSpielAction, OpenSpielObservation, OpenSpielState
IMPORTS_OK = True
print("✓ Imports successful")
except ImportError as e:
IMPORTS_OK = False
print(f"✗ Import error: {e}")
# %%
# Method 1: From Hugging Face Hub
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# The easiest way to get started - automatically downloads and runs the container.
# Let's examine the actual method signature:
print("\n" + "-" * 70)
print("METHOD 1: FROM HUGGING FACE HUB")
print("-" * 70)
if IMPORTS_OK:
import inspect
if hasattr(OpenSpielEnv, "from_hub"):
sig = inspect.signature(OpenSpielEnv.from_hub)
print(f"\nSignature: OpenSpielEnv.from_hub{sig}")
# Show docstring if available
if OpenSpielEnv.from_hub.__doc__:
doc_lines = OpenSpielEnv.from_hub.__doc__.strip().split("\n")[:3]
print(f"Purpose: {doc_lines[0].strip()}")
else:
print("\nfrom_hub method not available in this version")
print("\nUsage:")
print(" env = OpenSpielEnv.from_hub('openenv/openspiel-env')")
print("\nWhat happens:")
print(" 1. Pulls Docker image from HF registry")
print(" 2. Starts container on available port")
print(" 3. Connects via WebSocket")
print(" 4. Cleans up on close()")
else:
print("\n(OpenEnv not installed - showing expected signature)")
print("\nSignature: OpenSpielEnv.from_hub(repo_id, *, use_docker=True, ...)")
# %%
# Method 2: From Docker Image
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Use a locally built or pulled Docker image:
print("\n" + "-" * 70)
print("METHOD 2: FROM DOCKER IMAGE")
print("-" * 70)
if IMPORTS_OK:
if hasattr(OpenSpielEnv, "from_docker_image"):
sig = inspect.signature(OpenSpielEnv.from_docker_image)
print(f"\nSignature: OpenSpielEnv.from_docker_image{sig}")
if OpenSpielEnv.from_docker_image.__doc__:
doc_lines = OpenSpielEnv.from_docker_image.__doc__.strip().split("\n")[:3]
print(f"Purpose: {doc_lines[0].strip()}")
else:
print("\nfrom_docker_image method not available in this version")
print("\nUsage:")
print(" # Build image first:")
print(" # docker build -t openspiel-env:latest ./envs/openspiel_env/server")
print(" env = OpenSpielEnv.from_docker_image('openspiel-env:latest')")
else:
print("\n(OpenEnv not installed - showing expected signature)")
print("\nSignature: OpenSpielEnv.from_docker_image(image, provider=None, ...)")
# %%
# Method 3: Direct URL Connection
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Connect to an already-running server:
print("\n" + "-" * 70)
print("METHOD 3: DIRECT URL CONNECTION")
print("-" * 70)
if IMPORTS_OK:
sig = inspect.signature(OpenSpielEnv.__init__)
print(f"\nSignature: OpenSpielEnv{sig}")
print("\nUsage:")
print(" # Start server first:")
print(" # docker run -p 8000:8000 openenv/openspiel-env:latest")
print(" env = OpenSpielEnv(base_url='http://localhost:8000')")
print("\nNote: Does NOT manage container lifecycle - you control the server")
else:
print("\n(OpenEnv not installed - showing expected signature)")
print("\nSignature: OpenSpielEnv(base_url, connect_timeout_s=10.0, ...)")
# %%
# Using Context Managers
# ~~~~~~~~~~~~~~~~~~~~~~
#
# Always use context managers to ensure proper cleanup. Let's verify the
# client supports the context manager protocol:
print("\n" + "-" * 70)
print("CONTEXT MANAGER SUPPORT")
print("-" * 70)
if IMPORTS_OK:
has_enter = hasattr(OpenSpielEnv, "__enter__")
has_exit = hasattr(OpenSpielEnv, "__exit__")
print(f"\n__enter__ method: {'✓ Present' if has_enter else '✗ Missing'}")
print(f"__exit__ method: {'✓ Present' if has_exit else '✗ Missing'}")
if has_enter and has_exit:
print("\n✓ Context manager supported! Use with 'with' statement:")
print(" with OpenSpielEnv(base_url='...') as env:")
print(" result = env.reset()")
print(" # ... use env ...")
print(" # Automatically cleaned up")
else:
print("\n(OpenEnv not installed)")
print("Context managers are supported for automatic cleanup")
# %%
# Part 4: The Environment Loop
# ----------------------------
#
# Every OpenEnv interaction follows the same pattern:
#
# 1. ``reset()`` - Start a new episode
# 2. ``step(action)`` - Take action, get observation/reward
# 3. Repeat until ``done``
# 4. ``state()`` - Get episode metadata (optional)
#
# Let's demonstrate this with an actual episode:
print("=" * 70)
print(" THE ENVIRONMENT LOOP - LIVE DEMO")
print("=" * 70)
print()
# Run an actual demo episode
GRID_HEIGHT = 10
GRID_WIDTH = 5
# Create mock observation for demonstration
class DemoObservation:
def __init__(self, info_state, legal_actions, done=False):
self.info_state = info_state
self.legal_actions = legal_actions
self.done = done
class DemoResult:
def __init__(self, observation, reward=0.0, done=False):
self.observation = observation
self.reward = reward
self.done = done
# Initialize episode
ball_col = random.randint(0, GRID_WIDTH - 1)
paddle_col = GRID_WIDTH // 2
print(f"Episode Starting:")
print(f" Ball column: {ball_col}")
print(f" Paddle column: {paddle_col}")
print()
# Simulate the environment loop
step_count = 0
total_reward = 0.0
print("Step | Ball Row | Paddle | Action | Info State (first 10)")
print("-" * 65)
for ball_row in range(GRID_HEIGHT):
# Build observation (same format as real OpenSpiel Catch)
info_state = [0.0] * (GRID_HEIGHT * GRID_WIDTH)
info_state[ball_row * GRID_WIDTH + ball_col] = 1.0 # Ball
info_state[(GRID_HEIGHT - 1) * GRID_WIDTH + paddle_col] = 1.0 # Paddle
obs = DemoObservation(info_state=info_state, legal_actions=[0, 1, 2])
# Choose action (smart policy - move toward ball)
if paddle_col < ball_col:
action_id = 2 # RIGHT
elif paddle_col > ball_col:
action_id = 0 # LEFT
else:
action_id = 1 # STAY
action_names = {0: "LEFT", 1: "STAY", 2: "RIGHT"}
# Show state before action
info_preview = [f"{v:.0f}" for v in info_state[:10]]
print(f" {step_count:2d} | {ball_row:2d} | {paddle_col} | {action_names[action_id]:<5} | {info_preview}")
# Execute action
if action_id == 0:
paddle_col = max(0, paddle_col - 1)
elif action_id == 2:
paddle_col = min(GRID_WIDTH - 1, paddle_col + 1)
step_count += 1
# Calculate final reward
caught = (paddle_col == ball_col)
reward = 1.0 if caught else 0.0
print("-" * 65)
print()
print(f"Episode Complete:")
print(f" Steps: {step_count}")
print(f" Ball landed at: column {ball_col}")
print(f" Paddle position: column {paddle_col}")
print(f" Reward: {reward}")
print(f" Result: {'CAUGHT! ✓' if caught else 'MISSED! ✗'}")
print()
print("This is the exact same loop you'd run with a live server,")
print("just using local simulation for the game logic.")
# %%
# Part 5: Creating AI Policies
# ----------------------------
#
# A policy is a function that chooses actions based on observations.
# Let's create several policies of increasing sophistication.
import random
from typing import List
from dataclasses import dataclass
@dataclass
class PolicyResult:
"""Result of evaluating a policy."""
name: str
episodes: int
wins: int
total_reward: float
avg_steps: float
@property
def win_rate(self) -> float:
return self.wins / self.episodes if self.episodes > 0 else 0.0
# %%
# Policy 1: Random Policy
# ~~~~~~~~~~~~~~~~~~~~~~~
#
# The simplest policy - randomly choose from legal actions:
class RandomPolicy:
"""
Random policy - baseline for comparison.
Always picks a random action from the legal actions.
Expected win rate for Catch: ~20% (1 in 5 columns)
"""
name = "Random"
def choose_action(self, observation) -> int:
"""Choose a random legal action."""
return random.choice(observation.legal_actions)
# %%
# Policy 2: Heuristic Policy
# ~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# A hand-coded policy that uses domain knowledge:
class SmartCatchPolicy:
"""
Smart heuristic policy for the Catch game.
Tracks the ball position and moves paddle toward it.
Expected win rate: ~100% (optimal for Catch)
"""
name = "Smart (Heuristic)"
def __init__(self, grid_width: int = 5):
self.grid_width = grid_width
def choose_action(self, observation) -> int:
"""Move paddle toward ball position."""
info_state = observation.info_state
grid_width = self.grid_width
# Find ball position (first 1.0 in the grid, excluding last row)
ball_col = None
for idx, val in enumerate(info_state[:-grid_width]):
if abs(val - 1.0) < 0.01:
ball_col = idx % grid_width
break
# Find paddle position (1.0 in last row)
last_row = info_state[-grid_width:]
paddle_col = None
for idx, val in enumerate(last_row):
if abs(val - 1.0) < 0.01:
paddle_col = idx
break
if ball_col is None or paddle_col is None:
return 1 # STAY if can't determine positions
# Move toward ball
if paddle_col < ball_col:
return 2 # RIGHT
elif paddle_col > ball_col:
return 0 # LEFT
else:
return 1 # STAY
# %%
# Policy 3: Epsilon-Greedy Policy
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Combines exploration (random) with exploitation (smart):
class EpsilonGreedyPolicy:
"""
Epsilon-greedy policy - balances exploration and exploitation.
With probability epsilon, takes random action (explore).
Otherwise, uses smart policy (exploit).
Epsilon decays over time to favor exploitation.
"""
name = "Epsilon-Greedy"
def __init__(self, epsilon: float = 0.3, decay: float = 0.99):
self.epsilon = epsilon
self.decay = decay
self.smart_policy = SmartCatchPolicy()
self.steps = 0
def choose_action(self, observation) -> int:
"""Choose action with epsilon-greedy strategy."""
self.steps += 1
# Decay epsilon
current_epsilon = self.epsilon * (self.decay**self.steps)
if random.random() < current_epsilon:
# Explore: random action
return random.choice(observation.legal_actions)
else:
# Exploit: use smart policy
return self.smart_policy.choose_action(observation)
# %%
# Policy 4: Always Stay Policy
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# A deliberately bad policy for comparison:
class AlwaysStayPolicy:
"""
Always stay policy - deliberately bad baseline.
Never moves the paddle. Only wins if ball lands on starting column.
Expected win rate: ~20% (same as random)
"""
name = "Always Stay"
def choose_action(self, observation) -> int:
"""Always return STAY action."""
return 1 # STAY
# %%
# Part 6: Running Evaluations
# ---------------------------
#
# Let's evaluate our policies! First, we'll create an evaluation function.
def evaluate_policy_live(
policy,
env,
num_episodes: int = 50,
game_name: str = "catch",
) -> PolicyResult:
"""
Evaluate a policy against a live environment.
Args:
policy: Policy object with choose_action method
env: Connected OpenSpielEnv client
num_episodes: Number of episodes to run
game_name: Name of the game to play
Returns:
PolicyResult with evaluation metrics
"""
wins = 0
total_reward = 0.0
total_steps = 0
for _ in range(num_episodes):
result = env.reset()
episode_steps = 0
while not result.done:
action_id = policy.choose_action(result.observation)
action = OpenSpielAction(action_id=action_id, game_name=game_name)
result = env.step(action)
episode_steps += 1
total_reward += result.reward if result.reward else 0
total_steps += episode_steps
if result.reward and result.reward > 0:
wins += 1
return PolicyResult(
name=policy.name,
episodes=num_episodes,
wins=wins,
total_reward=total_reward,
avg_steps=total_steps / num_episodes,
)
def evaluate_policy_simulated(
policy,
num_episodes: int = 50,
grid_height: int = 10,
grid_width: int = 5,
) -> PolicyResult:
"""
Evaluate a policy using local simulation (no server needed).
This simulates the Catch game locally for testing without a server.
Args:
policy: Policy object with choose_action method
num_episodes: Number of episodes to run
grid_height: Height of the game grid
grid_width: Width of the game grid
Returns:
PolicyResult with evaluation metrics
"""
wins = 0
total_reward = 0.0
total_steps = 0
# Create a mock observation class
class MockObservation:
def __init__(self, info_state, legal_actions):
self.info_state = info_state
self.legal_actions = legal_actions
for _ in range(num_episodes):
# Initialize game
ball_col = random.randint(0, grid_width - 1)
paddle_col = grid_width // 2 # Start in center
for step in range(grid_height):
# Create observation
info_state = [0.0] * (grid_height * grid_width)
info_state[step * grid_width + ball_col] = 1.0 # Ball position
info_state[(grid_height - 1) * grid_width + paddle_col] = 1.0 # Paddle
observation = MockObservation(
info_state=info_state, legal_actions=[0, 1, 2]
)
# Get action from policy
action = policy.choose_action(observation)
# Execute action
if action == 0: # LEFT
paddle_col = max(0, paddle_col - 1)
elif action == 2: # RIGHT
paddle_col = min(grid_width - 1, paddle_col + 1)
# action == 1 is STAY, no movement
total_steps += 1
# Check if caught
if paddle_col == ball_col:
wins += 1
total_reward += 1.0
return PolicyResult(
name=policy.name,
episodes=num_episodes,
wins=wins,
total_reward=total_reward,
avg_steps=total_steps / num_episodes,
)
# %%
# Part 7: Policy Competition
# --------------------------
#
# Let's run a competition between all our policies!
# Create policy instances
policies = [
RandomPolicy(),
AlwaysStayPolicy(),
SmartCatchPolicy(),
EpsilonGreedyPolicy(epsilon=0.3),
]
# Check if we can connect to a live server
SERVER_URL = "http://localhost:8000"
USE_LIVE = False
if IMPORTS_OK:
try:
test_env = OpenSpielEnv(base_url=SERVER_URL)
with test_env.sync() as client:
pass # Quick test to verify connection
USE_LIVE = True
print(f"✓ Connected to server at {SERVER_URL}")
except Exception as e:
USE_LIVE = False
print(f"✗ No server running at {SERVER_URL}: {e}")
print("=" * 70)
if USE_LIVE:
print(" POLICY COMPETITION - LIVE SERVER")
else:
print(" POLICY COMPETITION - SIMULATION MODE")
print("=" * 70)
print()
NUM_EPISODES = 50
print(f"Running {NUM_EPISODES} episodes per policy...\n")
results = []
for policy in policies:
print(f" Evaluating {policy.name}...", end=" ", flush=True)
if USE_LIVE:
env = OpenSpielEnv(base_url=SERVER_URL)
with env.sync() as client:
result = evaluate_policy_live(policy, client, NUM_EPISODES)
else:
result = evaluate_policy_simulated(policy, NUM_EPISODES)
results.append(result)
print(f"Win rate: {result.win_rate * 100:.1f}%")
# %%
# Display Results
# ~~~~~~~~~~~~~~~
print()
print("=" * 70)
print(" FINAL RESULTS")
print("=" * 70)
print()
# Sort by win rate (descending)
results.sort(key=lambda r: r.win_rate, reverse=True)
# Display leaderboard
print(f"{'Rank':<6}{'Policy':<20}{'Win Rate':<12}{'Avg Steps':<12}{'Wins'}")
print("-" * 60)
for i, result in enumerate(results):
rank = f"#{i + 1}"
bar = "█" * int(result.win_rate * 20)
print(
f"{rank:<6}{result.name:<20}{result.win_rate * 100:>5.1f}%{'':<5}"
f"{result.avg_steps:>6.1f}{'':<6}{result.wins}/{result.episodes}"
)
print()
print("-" * 70)
print()
print("Key Insights:")
print(" • Random/AlwaysStay: ~20% (baseline - relies on luck)")
print(" • Smart Heuristic: ~100% (optimal for Catch)")
print(" • Epsilon-Greedy: ~85%+ (balances exploration/exploitation)")
print()
# %%
# Part 8: Working with Different Games
# ------------------------------------
#
# OpenSpiel supports multiple games. Let's create actual action instances
# for different games and examine their structure:
print("=" * 70)
print(" SWITCHING GAMES - ACTUAL ACTION INSTANCES")
print("=" * 70)
print()
# Create actual action instances for different games
if IMPORTS_OK:
from openspiel_env.models import OpenSpielAction as ActionModel
# Catch actions
print("CATCH GAME ACTIONS:")
print("-" * 40)
catch_actions = {
0: "Move LEFT",
1: "STAY in place",
2: "Move RIGHT",
}
for action_id, description in catch_actions.items():
action = ActionModel(action_id=action_id, game_name="catch")
print(f" {action} # {description}")
print()
# 2048 actions
print("2048 GAME ACTIONS:")
print("-" * 40)
game_2048_actions = {
0: "Slide UP",
1: "Slide RIGHT",
2: "Slide DOWN",
3: "Slide LEFT",
}
for action_id, description in game_2048_actions.items():
action = ActionModel(action_id=action_id, game_name="2048")
print(f" {action} # {description}")
print()
# Tic-Tac-Toe actions
print("TIC-TAC-TOE ACTIONS:")
print("-" * 40)
print(" Grid positions 0-8 (left-to-right, top-to-bottom):")
print(" 0 | 1 | 2")
print(" ---|---|---")
print(" 3 | 4 | 5")
print(" ---|---|---")
print(" 6 | 7 | 8")
print()
# Show a few examples
for pos in [0, 4, 8]:
action = ActionModel(action_id=pos, game_name="tic_tac_toe")
corner = {0: "top-left", 4: "center", 8: "bottom-right"}[pos]
print(f" {action} # {corner}")
print()
# Blackjack actions
print("BLACKJACK ACTIONS:")
print("-" * 40)
blackjack_actions = {
0: "STAND (keep current hand)",
1: "HIT (request another card)",
}
for action_id, description in blackjack_actions.items():
action = ActionModel(action_id=action_id, game_name="blackjack")
print(f" {action} # {description}")
else:
# Fallback using dataclass
from dataclasses import dataclass
@dataclass
class ActionDemo:
action_id: int
game_name: str
print("CATCH GAME ACTIONS:")
print("-" * 40)
for action_id, desc in [(0, "LEFT"), (1, "STAY"), (2, "RIGHT")]:
print(f" ActionDemo(action_id={action_id}, game_name='catch') # {desc}")
print()
print("2048 GAME ACTIONS:")
print("-" * 40)
for action_id, desc in [(0, "UP"), (1, "RIGHT"), (2, "DOWN"), (3, "LEFT")]:
print(f" ActionDemo(action_id={action_id}, game_name='2048') # {desc}")
print()
print("-" * 70)
print("Each game has its own action space - check legal_actions in observation!")
# %%
# Part 9: Multi-Player Games
# --------------------------
#
# Some games like Tic-Tac-Toe and Kuhn Poker support multiple players.
# Let's create actual observation instances to understand the structure:
print("=" * 70)
print(" MULTI-PLAYER GAMES - OBSERVATION STRUCTURE")
print("=" * 70)
print()
# Create observation instances for multi-player games
if IMPORTS_OK:
from openspiel_env.models import OpenSpielObservation as ObsModel
# Single-player observation (like Catch)
print("SINGLE-PLAYER OBSERVATION (Catch):")
print("-" * 50)
single_player_obs = ObsModel(
info_state=[0.0, 0.0, 1.0, 0.0, 0.0] + [0.0] * 45,
legal_actions=[0, 1, 2],
game_phase="playing",
current_player_id=0,
opponent_last_action=None,
)
print(f" current_player_id: {single_player_obs.current_player_id} # Always 0 (you)")
print(f" opponent_last_action: {single_player_obs.opponent_last_action} # None (no opponent)")
print(f" legal_actions: {single_player_obs.legal_actions}")
print(f" game_phase: {single_player_obs.game_phase!r}")
print()
# Multi-player observation - your turn (like Tic-Tac-Toe)
print("MULTI-PLAYER OBSERVATION (Tic-Tac-Toe, YOUR turn):")
print("-" * 50)
your_turn_obs = ObsModel(
info_state=[1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0], # X at 0, O at 4
legal_actions=[1, 2, 3, 5, 6, 7, 8], # Available positions
game_phase="playing",
current_player_id=0, # Your turn!
opponent_last_action=4, # Opponent played center
)
print(f" current_player_id: {your_turn_obs.current_player_id} # 0 = YOUR turn")
print(f" opponent_last_action: {your_turn_obs.opponent_last_action} # Opponent played position 4 (center)")
print(f" legal_actions: {your_turn_obs.legal_actions}")
print(f" game_phase: {your_turn_obs.game_phase!r}")
print()
# Multi-player observation - opponent's turn
print("MULTI-PLAYER OBSERVATION (Tic-Tac-Toe, OPPONENT's turn):")
print("-" * 50)
opponent_turn_obs = ObsModel(
info_state=[1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 1.0], # X at 0,8; O at 4
legal_actions=[], # No actions available when it's opponent's turn
game_phase="playing",
current_player_id=1, # Opponent's turn
opponent_last_action=None, # Will be set after they move
)
print(f" current_player_id: {opponent_turn_obs.current_player_id} # 1 = OPPONENT's turn")
print(f" legal_actions: {opponent_turn_obs.legal_actions} # Empty - wait for opponent")
print(f" game_phase: {opponent_turn_obs.game_phase!r}")
print()
# Terminal state observation
print("TERMINAL OBSERVATION (Game Over):")
print("-" * 50)
terminal_obs = ObsModel(
info_state=[1.0, 1.0, 1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0], # X wins top row
legal_actions=[], # No more moves
game_phase="terminal",
current_player_id=-1, # No current player
opponent_last_action=4,
)
print(f" current_player_id: {terminal_obs.current_player_id} # -1 = Game over")
print(f" game_phase: {terminal_obs.game_phase!r}")
print(f" legal_actions: {terminal_obs.legal_actions} # Empty - game ended")
else:
# Fallback demonstration
from dataclasses import dataclass
from typing import List, Optional
@dataclass
class ObsDemo:
current_player_id: int
opponent_last_action: Optional[int]
legal_actions: List[int]
game_phase: str
print("SINGLE-PLAYER (Catch):")
print(f" current_player_id: 0 # Always your turn")
print(f" opponent_last_action: None")
print()
print("MULTI-PLAYER - YOUR TURN (Tic-Tac-Toe):")
print(f" current_player_id: 0 # 0 = your turn")
print(f" opponent_last_action: 4 # What opponent just played")
print(f" legal_actions: [1, 2, 3, 5, 6, 7, 8] # Available moves")
print()
print("MULTI-PLAYER - OPPONENT'S TURN:")
print(f" current_player_id: 1 # Wait for opponent")
print(f" legal_actions: [] # Can't move during opponent's turn")
print()
print("-" * 70)
print("KEY INSIGHT: Only act when current_player_id == 0 (your turn)!")
print("The environment automatically handles opponent moves.")
# %%
# Summary
# -------
#
# In this notebook, you learned:
#
# **Connection Methods:**
#
# - ``from_hub()`` - Auto-download from Hugging Face
# - ``from_docker_image()`` - Use local Docker image
# - Direct URL - Connect to running server
#
# **Creating Policies:**
#
# - Random: Baseline comparison
# - Heuristic: Domain knowledge encoded
# - Epsilon-Greedy: Balance exploration/exploitation
#
# **Running Evaluations:**
#
# - Measure win rates and rewards
# - Compare policy performance
# - Run competitions
#
# **Multi-Game Support:**
#
# - Switch games via ``game_name`` parameter
# - Handle multi-player games
# - Work with different action spaces
#
# Next Steps
# ----------
#
# **Continue to Notebook 3: Building & Sharing Environments**
#
# In the next notebook, you'll:
#
# - Create your own custom environment
# - Package it with Docker
# - Deploy to Hugging Face Hub
# - Share with the community