# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""
Data models for SUMO-RL Environment.

This module defines the Action, Observation, and State types for traffic
signal control using SUMO (Simulation of Urban MObility).
"""

from typing import Dict, List, Optional

from openenv.core.env_server import Action, Observation, State
from pydantic import Field


class SumoAction(Action):
    """
    Action for SUMO traffic signal control environment.

    Represents selecting which traffic light phase to activate next.

    Attributes:
        phase_id: Index of the green phase to activate (0 to num_phases-1)
        ts_id: Traffic signal ID (for multi-agent support, default "0")
    """

    phase_id: int
    ts_id: str = "0"


class SumoObservation(Observation):
    """
    Observation from SUMO traffic signal environment.

    Contains traffic metrics for decision-making.

    Attributes:
        observation: Flattened observation vector containing:
                    - One-hot encoded current phase
                    - Min green flag (binary)
                    - Lane densities (normalized)
                    - Lane queues (normalized)
        observation_shape: Shape of observation for reshaping
        action_mask: List of valid action indices
        sim_time: Current simulation time in seconds
        done: Whether episode is complete
        reward: Reward from last action (None on reset)
        metadata: Additional info (system metrics, etc.)
    """

    observation: List[float] = Field(default_factory=list)
    observation_shape: List[int] = Field(default_factory=list)
    action_mask: List[int] = Field(default_factory=list)
    sim_time: float = 0.0
    done: bool = False
    reward: Optional[float] = None
    metadata: Dict = Field(default_factory=dict)


class SumoState(State):
    """
    State of SUMO traffic signal environment.

    Tracks both configuration and runtime state.

    Configuration attributes:
        net_file: Path to SUMO network file (.net.xml)
        route_file: Path to SUMO route file (.rou.xml)
        num_seconds: Total simulation duration in seconds
        delta_time: Seconds between agent actions
        yellow_time: Duration of yellow phase in seconds
        min_green: Minimum green time per phase in seconds
        max_green: Maximum green time per phase in seconds
        reward_fn: Name of reward function used

    Runtime attributes:
        episode_id: Unique episode identifier
        step_count: Number of steps taken in episode
        sim_time: Current simulation time in seconds
        total_vehicles: Total number of vehicles in simulation
        total_waiting_time: Cumulative waiting time across all vehicles
    """

    # Episode tracking
    episode_id: str = ""
    step_count: int = 0

    # SUMO configuration
    net_file: str = ""
    route_file: str = ""
    num_seconds: int = 20000
    delta_time: int = 5
    yellow_time: int = 2
    min_green: int = 5
    max_green: int = 50
    reward_fn: str = "diff-waiting-time"

    # Runtime metrics
    sim_time: float = 0.0
    total_vehicles: int = 0
    total_waiting_time: float = 0.0
    mean_waiting_time: float = 0.0
    mean_speed: float = 0.0