Spaces:

openenv
/

sumo_rl_env

Running

App Files Files Community

sumo_rl_env / models.py

burtenshaw HF Staff

Upload folder using huggingface_hub

6fac95b verified about 1 month ago

raw

history blame contribute delete

3.37 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the BSD-style license found in the
	# LICENSE file in the root directory of this source tree.

	"""
	Data models for SUMO-RL Environment.

	This module defines the Action, Observation, and State types for traffic
	signal control using SUMO (Simulation of Urban MObility).
	"""

	from typing import Dict, List, Optional

	from openenv.core.env_server import Action, Observation, State
	from pydantic import Field


	class SumoAction(Action):
	"""
	Action for SUMO traffic signal control environment.

	Represents selecting which traffic light phase to activate next.

	Attributes:
	phase_id: Index of the green phase to activate (0 to num_phases-1)
	ts_id: Traffic signal ID (for multi-agent support, default "0")
	"""

	phase_id: int
	ts_id: str = "0"


	class SumoObservation(Observation):
	"""
	Observation from SUMO traffic signal environment.

	Contains traffic metrics for decision-making.

	Attributes:
	observation: Flattened observation vector containing:
	- One-hot encoded current phase
	- Min green flag (binary)
	- Lane densities (normalized)
	- Lane queues (normalized)
	observation_shape: Shape of observation for reshaping
	action_mask: List of valid action indices
	sim_time: Current simulation time in seconds
	done: Whether episode is complete
	reward: Reward from last action (None on reset)
	metadata: Additional info (system metrics, etc.)
	"""

	observation: List[float] = Field(default_factory=list)
	observation_shape: List[int] = Field(default_factory=list)
	action_mask: List[int] = Field(default_factory=list)
	sim_time: float = 0.0
	done: bool = False
	reward: Optional[float] = None
	metadata: Dict = Field(default_factory=dict)


	class SumoState(State):
	"""
	State of SUMO traffic signal environment.

	Tracks both configuration and runtime state.

	Configuration attributes:
	net_file: Path to SUMO network file (.net.xml)
	route_file: Path to SUMO route file (.rou.xml)
	num_seconds: Total simulation duration in seconds
	delta_time: Seconds between agent actions
	yellow_time: Duration of yellow phase in seconds
	min_green: Minimum green time per phase in seconds
	max_green: Maximum green time per phase in seconds
	reward_fn: Name of reward function used

	Runtime attributes:
	episode_id: Unique episode identifier
	step_count: Number of steps taken in episode
	sim_time: Current simulation time in seconds
	total_vehicles: Total number of vehicles in simulation
	total_waiting_time: Cumulative waiting time across all vehicles
	"""

	# Episode tracking
	episode_id: str = ""
	step_count: int = 0

	# SUMO configuration
	net_file: str = ""
	route_file: str = ""
	num_seconds: int = 20000
	delta_time: int = 5
	yellow_time: int = 2
	min_green: int = 5
	max_green: int = 50
	reward_fn: str = "diff-waiting-time"

	# Runtime metrics
	sim_time: float = 0.0
	total_vehicles: int = 0
	total_waiting_time: float = 0.0
	mean_waiting_time: float = 0.0
	mean_speed: float = 0.0