File size: 4,173 Bytes
c07f15e
 
 
 
 
 
025774a
cc6473a
025774a
cc6473a
 
 
025774a
 
 
 
 
0a15ab5
025774a
 
0a15ab5
025774a
 
 
cc6473a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
025774a
 
 
cc6473a
025774a
 
 
 
0a15ab5
 
 
 
bb2a9c7
 
 
 
 
 
0a15ab5
 
 
 
 
 
 
 
 
 
ece0bbe
 
bb2a9c7
 
 
 
 
0a15ab5
 
025774a
 
cc6473a
 
0a15ab5
 
 
 
 
 
 
 
 
025774a
 
 
cc6473a
 
 
 
 
 
 
 
 
 
 
025774a
 
0a15ab5
 
 
 
 
 
 
 
 
 
 
025774a
 
 
cc6473a
 
 
025774a
 
 
cc6473a
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""
Data models for RhythmEnv Life Simulator.

A holistic life resource management RL environment where an agent balances
5 life meters (Vitality, Cognition, Progress, Serenity, Connection) across
a 7-day week with hidden personality profiles.
"""

from __future__ import annotations

from enum import Enum
from typing import Dict, List, Optional

from openenv.core.env_server import Action, Observation, State
from pydantic import BaseModel, Field


class ActionType(str, Enum):
    """Available actions for the life simulator agent."""

    # Productivity
    DEEP_WORK = "deep_work"
    ADMIN_WORK = "admin_work"
    LEARN = "learn"
    # Recovery
    SLEEP = "sleep"
    EXERCISE = "exercise"
    MEDITATE = "meditate"
    # Social
    FAMILY_TIME = "family_time"
    SOCIALIZE = "socialize"
    # Leisure
    ME_TIME = "me_time"
    BINGE_WATCH = "binge_watch"


class RhythmAction(Action):
    """Action for the Life Simulator. Agent chooses one of 10 activity types."""

    action_type: ActionType


class StepRecord(BaseModel):
    """
    Record of one completed step included in step_history.

    Contains the action taken, the reward received, per-meter deltas, AND
    per-meter ANOMALIES (actual_delta - expected_delta_under_neutral_profile).
    The anomalies are the cleanest profile-inference signal — they tell the
    agent how much THIS person's response deviates from the average person.
    Without them, the agent has to back out the profile from raw deltas
    without a baseline to compare against (much harder).
    """

    step: int
    action: str
    reward: float
    vitality_delta: float = 0.0
    cognition_delta: float = 0.0
    progress_delta: float = 0.0
    serenity_delta: float = 0.0
    connection_delta: float = 0.0
    # Per-meter anomalies: actual_delta minus expected_delta_under_neutral_profile.
    # Surfaced to the agent in the prompt — the cleanest profile-inference signal.
    vitality_anomaly: float = 0.0
    cognition_anomaly: float = 0.0
    progress_anomaly: float = 0.0
    serenity_anomaly: float = 0.0
    connection_anomaly: float = 0.0


class RhythmObservation(Observation):
    """
    Observation returned to the agent each step.

    The agent sees all 5 meters, temporal context, last-step deltas,
    anomaly signals (actual vs expected meter changes), and a rolling
    history of the last 7 steps. The hidden personality profile and
    reward weight decomposition are NOT included.

    The step_history and *_anomaly fields in reward_breakdown together
    give the agent everything it needs to infer the hidden profile:
    - step_history: raw action/reward/delta trajectory for pattern matching
    - *_anomaly: how much each meter deviated from neutral-profile expectation
    """

    timestep: int = 0
    day: int = 0
    slot: int = 0
    vitality: float = 0.8
    cognition: float = 0.7
    progress: float = 0.0
    serenity: float = 0.7
    connection: float = 0.5
    active_event: Optional[str] = None
    remaining_steps: int = 28
    reward: float = 0.0
    done: bool = False
    reward_breakdown: Dict[str, float] = Field(default_factory=dict)

    # Last step's per-meter deltas as first-class fields (not just buried in reward_breakdown)
    vitality_delta: float = 0.0
    cognition_delta: float = 0.0
    progress_delta: float = 0.0
    serenity_delta: float = 0.0
    connection_delta: float = 0.0
    last_action: Optional[str] = None

    # Rolling history of the last HISTORY_LENGTH steps
    step_history: List[StepRecord] = Field(default_factory=list)


class RhythmState(State):
    """
    Internal state for the Life Simulator.

    Includes profile_name which is hidden from the observation.
    """

    timestep: int = 0
    day: int = 0
    slot: int = 0
    profile_name: str = ""
    vitality: float = 0.8
    cognition: float = 0.7
    progress: float = 0.0
    serenity: float = 0.7
    connection: float = 0.5
    active_event: Optional[str] = None