# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. """ Data models for SUMO-RL Environment. This module defines the Action, Observation, and State types for traffic signal control using SUMO (Simulation of Urban MObility). """ from typing import Dict, List, Optional from openenv.core.env_server import Action, Observation, State from pydantic import Field class SumoAction(Action): """ Action for SUMO traffic signal control environment. Represents selecting which traffic light phase to activate next. Attributes: phase_id: Index of the green phase to activate (0 to num_phases-1) ts_id: Traffic signal ID (for multi-agent support, default "0") """ phase_id: int ts_id: str = "0" class SumoObservation(Observation): """ Observation from SUMO traffic signal environment. Contains traffic metrics for decision-making. Attributes: observation: Flattened observation vector containing: - One-hot encoded current phase - Min green flag (binary) - Lane densities (normalized) - Lane queues (normalized) observation_shape: Shape of observation for reshaping action_mask: List of valid action indices sim_time: Current simulation time in seconds done: Whether episode is complete reward: Reward from last action (None on reset) metadata: Additional info (system metrics, etc.) """ observation: List[float] = Field(default_factory=list) observation_shape: List[int] = Field(default_factory=list) action_mask: List[int] = Field(default_factory=list) sim_time: float = 0.0 done: bool = False reward: Optional[float] = None metadata: Dict = Field(default_factory=dict) class SumoState(State): """ State of SUMO traffic signal environment. Tracks both configuration and runtime state. Configuration attributes: net_file: Path to SUMO network file (.net.xml) route_file: Path to SUMO route file (.rou.xml) num_seconds: Total simulation duration in seconds delta_time: Seconds between agent actions yellow_time: Duration of yellow phase in seconds min_green: Minimum green time per phase in seconds max_green: Maximum green time per phase in seconds reward_fn: Name of reward function used Runtime attributes: episode_id: Unique episode identifier step_count: Number of steps taken in episode sim_time: Current simulation time in seconds total_vehicles: Total number of vehicles in simulation total_waiting_time: Cumulative waiting time across all vehicles """ # Episode tracking episode_id: str = "" step_count: int = 0 # SUMO configuration net_file: str = "" route_file: str = "" num_seconds: int = 20000 delta_time: int = 5 yellow_time: int = 2 min_green: int = 5 max_green: int = 50 reward_fn: str = "diff-waiting-time" # Runtime metrics sim_time: float = 0.0 total_vehicles: int = 0 total_waiting_time: float = 0.0 mean_waiting_time: float = 0.0 mean_speed: float = 0.0