Spaces:
Sleeping
Sleeping
File size: 4,173 Bytes
c07f15e 025774a cc6473a 025774a cc6473a 025774a 0a15ab5 025774a 0a15ab5 025774a cc6473a 025774a cc6473a 025774a 0a15ab5 bb2a9c7 0a15ab5 ece0bbe bb2a9c7 0a15ab5 025774a cc6473a 0a15ab5 025774a cc6473a 025774a 0a15ab5 025774a cc6473a 025774a cc6473a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 | # Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""
Data models for RhythmEnv Life Simulator.
A holistic life resource management RL environment where an agent balances
5 life meters (Vitality, Cognition, Progress, Serenity, Connection) across
a 7-day week with hidden personality profiles.
"""
from __future__ import annotations
from enum import Enum
from typing import Dict, List, Optional
from openenv.core.env_server import Action, Observation, State
from pydantic import BaseModel, Field
class ActionType(str, Enum):
"""Available actions for the life simulator agent."""
# Productivity
DEEP_WORK = "deep_work"
ADMIN_WORK = "admin_work"
LEARN = "learn"
# Recovery
SLEEP = "sleep"
EXERCISE = "exercise"
MEDITATE = "meditate"
# Social
FAMILY_TIME = "family_time"
SOCIALIZE = "socialize"
# Leisure
ME_TIME = "me_time"
BINGE_WATCH = "binge_watch"
class RhythmAction(Action):
"""Action for the Life Simulator. Agent chooses one of 10 activity types."""
action_type: ActionType
class StepRecord(BaseModel):
"""
Record of one completed step included in step_history.
Contains the action taken, the reward received, per-meter deltas, AND
per-meter ANOMALIES (actual_delta - expected_delta_under_neutral_profile).
The anomalies are the cleanest profile-inference signal — they tell the
agent how much THIS person's response deviates from the average person.
Without them, the agent has to back out the profile from raw deltas
without a baseline to compare against (much harder).
"""
step: int
action: str
reward: float
vitality_delta: float = 0.0
cognition_delta: float = 0.0
progress_delta: float = 0.0
serenity_delta: float = 0.0
connection_delta: float = 0.0
# Per-meter anomalies: actual_delta minus expected_delta_under_neutral_profile.
# Surfaced to the agent in the prompt — the cleanest profile-inference signal.
vitality_anomaly: float = 0.0
cognition_anomaly: float = 0.0
progress_anomaly: float = 0.0
serenity_anomaly: float = 0.0
connection_anomaly: float = 0.0
class RhythmObservation(Observation):
"""
Observation returned to the agent each step.
The agent sees all 5 meters, temporal context, last-step deltas,
anomaly signals (actual vs expected meter changes), and a rolling
history of the last 7 steps. The hidden personality profile and
reward weight decomposition are NOT included.
The step_history and *_anomaly fields in reward_breakdown together
give the agent everything it needs to infer the hidden profile:
- step_history: raw action/reward/delta trajectory for pattern matching
- *_anomaly: how much each meter deviated from neutral-profile expectation
"""
timestep: int = 0
day: int = 0
slot: int = 0
vitality: float = 0.8
cognition: float = 0.7
progress: float = 0.0
serenity: float = 0.7
connection: float = 0.5
active_event: Optional[str] = None
remaining_steps: int = 28
reward: float = 0.0
done: bool = False
reward_breakdown: Dict[str, float] = Field(default_factory=dict)
# Last step's per-meter deltas as first-class fields (not just buried in reward_breakdown)
vitality_delta: float = 0.0
cognition_delta: float = 0.0
progress_delta: float = 0.0
serenity_delta: float = 0.0
connection_delta: float = 0.0
last_action: Optional[str] = None
# Rolling history of the last HISTORY_LENGTH steps
step_history: List[StepRecord] = Field(default_factory=list)
class RhythmState(State):
"""
Internal state for the Life Simulator.
Includes profile_name which is hidden from the observation.
"""
timestep: int = 0
day: int = 0
slot: int = 0
profile_name: str = ""
vitality: float = 0.8
cognition: float = 0.7
progress: float = 0.0
serenity: float = 0.7
connection: float = 0.5
active_event: Optional[str] = None
|