# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""
Data models for RhythmEnv Life Simulator.

A holistic life resource management RL environment where an agent balances
5 life meters (Vitality, Cognition, Progress, Serenity, Connection) across
a 7-day week with hidden personality profiles.
"""

from __future__ import annotations

from enum import Enum
from typing import Dict, List, Optional

from openenv.core.env_server import Action, Observation, State
from pydantic import BaseModel, Field


class ActionType(str, Enum):
    """Available actions for the life simulator agent."""

    # Productivity
    DEEP_WORK = "deep_work"
    ADMIN_WORK = "admin_work"
    LEARN = "learn"
    # Recovery
    SLEEP = "sleep"
    EXERCISE = "exercise"
    MEDITATE = "meditate"
    # Social
    FAMILY_TIME = "family_time"
    SOCIALIZE = "socialize"
    # Leisure
    ME_TIME = "me_time"
    BINGE_WATCH = "binge_watch"


class RhythmAction(Action):
    """Action for the Life Simulator. Agent chooses one of 10 activity types."""

    action_type: ActionType


class StepRecord(BaseModel):
    """
    Record of one completed step included in step_history.

    Contains the action taken, the reward received, per-meter deltas, AND
    per-meter ANOMALIES (actual_delta - expected_delta_under_neutral_profile).
    The anomalies are the cleanest profile-inference signal — they tell the
    agent how much THIS person's response deviates from the average person.
    Without them, the agent has to back out the profile from raw deltas
    without a baseline to compare against (much harder).
    """

    step: int
    action: str
    reward: float
    vitality_delta: float = 0.0
    cognition_delta: float = 0.0
    progress_delta: float = 0.0
    serenity_delta: float = 0.0
    connection_delta: float = 0.0
    # Per-meter anomalies: actual_delta minus expected_delta_under_neutral_profile.
    # Surfaced to the agent in the prompt — the cleanest profile-inference signal.
    vitality_anomaly: float = 0.0
    cognition_anomaly: float = 0.0
    progress_anomaly: float = 0.0
    serenity_anomaly: float = 0.0
    connection_anomaly: float = 0.0


class RhythmObservation(Observation):
    """
    Observation returned to the agent each step.

    The agent sees all 5 meters, temporal context, last-step deltas,
    anomaly signals (actual vs expected meter changes), and a rolling
    history of the last 7 steps. The hidden personality profile and
    reward weight decomposition are NOT included.

    The step_history and *_anomaly fields in reward_breakdown together
    give the agent everything it needs to infer the hidden profile:
    - step_history: raw action/reward/delta trajectory for pattern matching
    - *_anomaly: how much each meter deviated from neutral-profile expectation
    """

    timestep: int = 0
    day: int = 0
    slot: int = 0
    vitality: float = 0.8
    cognition: float = 0.7
    progress: float = 0.0
    serenity: float = 0.7
    connection: float = 0.5
    active_event: Optional[str] = None
    remaining_steps: int = 28
    reward: float = 0.0
    done: bool = False
    reward_breakdown: Dict[str, float] = Field(default_factory=dict)

    # Last step's per-meter deltas as first-class fields (not just buried in reward_breakdown)
    vitality_delta: float = 0.0
    cognition_delta: float = 0.0
    progress_delta: float = 0.0
    serenity_delta: float = 0.0
    connection_delta: float = 0.0
    last_action: Optional[str] = None

    # Rolling history of the last HISTORY_LENGTH steps
    step_history: List[StepRecord] = Field(default_factory=list)


class RhythmState(State):
    """
    Internal state for the Life Simulator.

    Includes profile_name which is hidden from the observation.
    """

    timestep: int = 0
    day: int = 0
    slot: int = 0
    profile_name: str = ""
    vitality: float = 0.8
    cognition: float = 0.7
    progress: float = 0.0
    serenity: float = 0.7
    connection: float = 0.5
    active_event: Optional[str] = None