Spaces:

InosLihka
/

rhythm_env

Sleeping

App Files Files Community

rhythm_env / models.py

InosLihka

Algorithm Distillation: grader v2 with belief_accuracy + SFT pipeline

ece0bbe 12 days ago

raw

history blame contribute delete

4.17 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the BSD-style license found in the
	# LICENSE file in the root directory of this source tree.

	"""
	Data models for RhythmEnv Life Simulator.

	A holistic life resource management RL environment where an agent balances
	5 life meters (Vitality, Cognition, Progress, Serenity, Connection) across
	a 7-day week with hidden personality profiles.
	"""

	from __future__ import annotations

	from enum import Enum
	from typing import Dict, List, Optional

	from openenv.core.env_server import Action, Observation, State
	from pydantic import BaseModel, Field


	class ActionType(str, Enum):
	"""Available actions for the life simulator agent."""

	# Productivity
	DEEP_WORK = "deep_work"
	ADMIN_WORK = "admin_work"
	LEARN = "learn"
	# Recovery
	SLEEP = "sleep"
	EXERCISE = "exercise"
	MEDITATE = "meditate"
	# Social
	FAMILY_TIME = "family_time"
	SOCIALIZE = "socialize"
	# Leisure
	ME_TIME = "me_time"
	BINGE_WATCH = "binge_watch"


	class RhythmAction(Action):
	"""Action for the Life Simulator. Agent chooses one of 10 activity types."""

	action_type: ActionType


	class StepRecord(BaseModel):
	"""
	Record of one completed step included in step_history.

	Contains the action taken, the reward received, per-meter deltas, AND
	per-meter ANOMALIES (actual_delta - expected_delta_under_neutral_profile).
	The anomalies are the cleanest profile-inference signal — they tell the
	agent how much THIS person's response deviates from the average person.
	Without them, the agent has to back out the profile from raw deltas
	without a baseline to compare against (much harder).
	"""

	step: int
	action: str
	reward: float
	vitality_delta: float = 0.0
	cognition_delta: float = 0.0
	progress_delta: float = 0.0
	serenity_delta: float = 0.0
	connection_delta: float = 0.0
	# Per-meter anomalies: actual_delta minus expected_delta_under_neutral_profile.
	# Surfaced to the agent in the prompt — the cleanest profile-inference signal.
	vitality_anomaly: float = 0.0
	cognition_anomaly: float = 0.0
	progress_anomaly: float = 0.0
	serenity_anomaly: float = 0.0
	connection_anomaly: float = 0.0


	class RhythmObservation(Observation):
	"""
	Observation returned to the agent each step.

	The agent sees all 5 meters, temporal context, last-step deltas,
	anomaly signals (actual vs expected meter changes), and a rolling
	history of the last 7 steps. The hidden personality profile and
	reward weight decomposition are NOT included.

	The step_history and *_anomaly fields in reward_breakdown together
	give the agent everything it needs to infer the hidden profile:
	- step_history: raw action/reward/delta trajectory for pattern matching
	- *_anomaly: how much each meter deviated from neutral-profile expectation
	"""

	timestep: int = 0
	day: int = 0
	slot: int = 0
	vitality: float = 0.8
	cognition: float = 0.7
	progress: float = 0.0
	serenity: float = 0.7
	connection: float = 0.5
	active_event: Optional[str] = None
	remaining_steps: int = 28
	reward: float = 0.0
	done: bool = False
	reward_breakdown: Dict[str, float] = Field(default_factory=dict)

	# Last step's per-meter deltas as first-class fields (not just buried in reward_breakdown)
	vitality_delta: float = 0.0
	cognition_delta: float = 0.0
	progress_delta: float = 0.0
	serenity_delta: float = 0.0
	connection_delta: float = 0.0
	last_action: Optional[str] = None

	# Rolling history of the last HISTORY_LENGTH steps
	step_history: List[StepRecord] = Field(default_factory=list)


	class RhythmState(State):
	"""
	Internal state for the Life Simulator.

	Includes profile_name which is hidden from the observation.
	"""

	timestep: int = 0
	day: int = 0
	slot: int = 0
	profile_name: str = ""
	vitality: float = 0.8
	cognition: float = 0.7
	progress: float = 0.0
	serenity: float = 0.7
	connection: float = 0.5
	active_event: Optional[str] = None