# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. """ Data models for RhythmEnv Life Simulator. A holistic life resource management RL environment where an agent balances 5 life meters (Vitality, Cognition, Progress, Serenity, Connection) across a 7-day week with hidden personality profiles. """ from __future__ import annotations from enum import Enum from typing import Dict, List, Optional from openenv.core.env_server import Action, Observation, State from pydantic import BaseModel, Field class ActionType(str, Enum): """Available actions for the life simulator agent.""" # Productivity DEEP_WORK = "deep_work" ADMIN_WORK = "admin_work" LEARN = "learn" # Recovery SLEEP = "sleep" EXERCISE = "exercise" MEDITATE = "meditate" # Social FAMILY_TIME = "family_time" SOCIALIZE = "socialize" # Leisure ME_TIME = "me_time" BINGE_WATCH = "binge_watch" class RhythmAction(Action): """Action for the Life Simulator. Agent chooses one of 10 activity types.""" action_type: ActionType class StepRecord(BaseModel): """ Record of one completed step included in step_history. Contains the action taken, the reward received, per-meter deltas, AND per-meter ANOMALIES (actual_delta - expected_delta_under_neutral_profile). The anomalies are the cleanest profile-inference signal — they tell the agent how much THIS person's response deviates from the average person. Without them, the agent has to back out the profile from raw deltas without a baseline to compare against (much harder). """ step: int action: str reward: float vitality_delta: float = 0.0 cognition_delta: float = 0.0 progress_delta: float = 0.0 serenity_delta: float = 0.0 connection_delta: float = 0.0 # Per-meter anomalies: actual_delta minus expected_delta_under_neutral_profile. # Surfaced to the agent in the prompt — the cleanest profile-inference signal. vitality_anomaly: float = 0.0 cognition_anomaly: float = 0.0 progress_anomaly: float = 0.0 serenity_anomaly: float = 0.0 connection_anomaly: float = 0.0 class RhythmObservation(Observation): """ Observation returned to the agent each step. The agent sees all 5 meters, temporal context, last-step deltas, anomaly signals (actual vs expected meter changes), and a rolling history of the last 7 steps. The hidden personality profile and reward weight decomposition are NOT included. The step_history and *_anomaly fields in reward_breakdown together give the agent everything it needs to infer the hidden profile: - step_history: raw action/reward/delta trajectory for pattern matching - *_anomaly: how much each meter deviated from neutral-profile expectation """ timestep: int = 0 day: int = 0 slot: int = 0 vitality: float = 0.8 cognition: float = 0.7 progress: float = 0.0 serenity: float = 0.7 connection: float = 0.5 active_event: Optional[str] = None remaining_steps: int = 28 reward: float = 0.0 done: bool = False reward_breakdown: Dict[str, float] = Field(default_factory=dict) # Last step's per-meter deltas as first-class fields (not just buried in reward_breakdown) vitality_delta: float = 0.0 cognition_delta: float = 0.0 progress_delta: float = 0.0 serenity_delta: float = 0.0 connection_delta: float = 0.0 last_action: Optional[str] = None # Rolling history of the last HISTORY_LENGTH steps step_history: List[StepRecord] = Field(default_factory=list) class RhythmState(State): """ Internal state for the Life Simulator. Includes profile_name which is hidden from the observation. """ timestep: int = 0 day: int = 0 slot: int = 0 profile_name: str = "" vitality: float = 0.8 cognition: float = 0.7 progress: float = 0.0 serenity: float = 0.7 connection: float = 0.5 active_event: Optional[str] = None