Spaces:
Sleeping
Sleeping
| # Copyright (c) Meta Platforms, Inc. and affiliates. | |
| # All rights reserved. | |
| # | |
| # This source code is licensed under the BSD-style license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| """Tests for the RhythmEnv Life Simulator.""" | |
| import pytest | |
| from models import ActionType, RhythmAction, RhythmObservation | |
| from server.rhythm_environment import ( | |
| CRITICAL_THRESHOLD, | |
| MAX_STEPS, | |
| METERS, | |
| PROFILES, | |
| RhythmEnvironment, | |
| ) | |
| def env(): | |
| return RhythmEnvironment() | |
| def make_action(action_type: ActionType) -> RhythmAction: | |
| return RhythmAction(action_type=action_type) | |
| # --------------------------------------------------------------------------- | |
| # TestReset | |
| # --------------------------------------------------------------------------- | |
| class TestReset: | |
| def test_returns_valid_observation(self, env): | |
| obs = env.reset(seed=0) | |
| assert isinstance(obs, RhythmObservation) | |
| assert obs.timestep == 0 | |
| assert obs.day == 0 | |
| assert obs.slot == 0 | |
| assert obs.remaining_steps == MAX_STEPS | |
| assert obs.done is False | |
| assert obs.reward == 0.0 | |
| def test_meters_initialized(self, env): | |
| obs = env.reset(seed=0) | |
| assert 0.0 <= obs.vitality <= 1.0 | |
| assert 0.0 <= obs.cognition <= 1.0 | |
| assert obs.progress == 0.0 | |
| assert 0.0 <= obs.serenity <= 1.0 | |
| assert 0.0 <= obs.connection <= 1.0 | |
| def test_seed_selects_profile(self, env): | |
| """Different seeds select different profiles.""" | |
| profiles_seen = set() | |
| for seed in range(3): | |
| env.reset(seed=seed) | |
| profiles_seen.add(env.state.profile_name) | |
| assert len(profiles_seen) == 3 | |
| def test_deterministic_with_same_seed(self, env): | |
| obs1 = env.reset(seed=42) | |
| obs2 = env.reset(seed=42) | |
| assert obs1.vitality == obs2.vitality | |
| assert obs1.cognition == obs2.cognition | |
| assert obs1.serenity == obs2.serenity | |
| assert obs1.connection == obs2.connection | |
| def test_explicit_profile_selection(self, env): | |
| env.reset(seed=0, profile="workaholic_stoic") | |
| assert env.state.profile_name == "workaholic_stoic" | |
| def test_reset_clears_state(self, env): | |
| env.reset(seed=0) | |
| for _ in range(5): | |
| env.step(make_action(ActionType.DEEP_WORK)) | |
| obs = env.reset(seed=0) | |
| assert obs.timestep == 0 | |
| assert obs.progress == 0.0 | |
| # --------------------------------------------------------------------------- | |
| # TestStep | |
| # --------------------------------------------------------------------------- | |
| class TestStep: | |
| def test_timestep_advances(self, env): | |
| env.reset(seed=0) | |
| obs = env.step(make_action(ActionType.DEEP_WORK)) | |
| assert obs.timestep == 1 | |
| def test_day_and_slot_correct(self, env): | |
| env.reset(seed=0) | |
| for _ in range(5): | |
| obs = env.step(make_action(ActionType.SLEEP)) | |
| assert obs.day == 1 | |
| assert obs.slot == 1 | |
| def test_deep_work_increases_progress(self, env): | |
| env.reset(seed=0) | |
| obs = env.step(make_action(ActionType.DEEP_WORK)) | |
| assert obs.progress > 0.0 | |
| def test_deep_work_drains_vitality(self, env): | |
| env.reset(seed=0) | |
| initial_vitality = env.state.vitality | |
| obs = env.step(make_action(ActionType.DEEP_WORK)) | |
| assert obs.vitality < initial_vitality | |
| def test_sleep_recovers_vitality(self, env): | |
| env.reset(seed=0) | |
| for _ in range(3): | |
| env.step(make_action(ActionType.DEEP_WORK)) | |
| vitality_before_sleep = env.state.vitality | |
| obs = env.step(make_action(ActionType.SLEEP)) | |
| assert obs.vitality > vitality_before_sleep | |
| def test_socialize_builds_connection(self, env): | |
| env.reset(seed=0) | |
| initial_connection = env.state.connection | |
| obs = env.step(make_action(ActionType.FAMILY_TIME)) | |
| assert obs.connection > initial_connection - 0.05 | |
| def test_episode_ends_at_max_steps(self, env): | |
| env.reset(seed=0) | |
| for i in range(MAX_STEPS): | |
| obs = env.step(make_action(ActionType.SLEEP)) | |
| assert obs.done is True | |
| assert obs.timestep == MAX_STEPS | |
| def test_not_done_before_max_steps(self, env): | |
| env.reset(seed=0) | |
| for i in range(MAX_STEPS - 1): | |
| obs = env.step(make_action(ActionType.SLEEP)) | |
| assert obs.done is False | |
| def test_meters_stay_in_bounds(self, env): | |
| """No meter exceeds [0.0, 1.0] regardless of actions.""" | |
| env.reset(seed=0) | |
| for _ in range(MAX_STEPS): | |
| obs = env.step(make_action(ActionType.DEEP_WORK)) | |
| for meter in METERS: | |
| val = getattr(obs, meter) | |
| assert 0.0 <= val <= 1.0, f"{meter}={val} out of bounds" | |
| def test_low_vitality_reduces_effectiveness(self, env): | |
| """Progress gain should be lower when vitality is low.""" | |
| env.reset(seed=0, profile="introvert_morning") | |
| obs_high = env.step(make_action(ActionType.DEEP_WORK)) | |
| progress_high = obs_high.progress | |
| env.reset(seed=0, profile="introvert_morning") | |
| for _ in range(6): | |
| env.step(make_action(ActionType.DEEP_WORK)) | |
| progress_before = env.state.progress | |
| env.step(make_action(ActionType.DEEP_WORK)) | |
| progress_gained_low = env.state.progress - progress_before | |
| assert progress_high > progress_gained_low | |
| # --------------------------------------------------------------------------- | |
| # TestProfiles | |
| # --------------------------------------------------------------------------- | |
| class TestProfiles: | |
| def test_introvert_social_drains_more(self, env): | |
| """Introvert loses more vitality from socializing than extrovert.""" | |
| env.reset(seed=0, profile="introvert_morning") | |
| v_before_intro = env.state.vitality | |
| env.step(make_action(ActionType.SOCIALIZE)) | |
| intro_drain = v_before_intro - env.state.vitality | |
| env2 = RhythmEnvironment() | |
| env2.reset(seed=0, profile="extrovert_night_owl") | |
| v_before_extro = env2.state.vitality | |
| env2.step(make_action(ActionType.SOCIALIZE)) | |
| extro_drain = v_before_extro - env2.state.vitality | |
| assert intro_drain > extro_drain | |
| def test_workaholic_progress_gives_serenity(self, env): | |
| """Workaholic has better serenity outcome from deep work than introvert.""" | |
| env.reset(seed=0, profile="workaholic_stoic") | |
| serenity_before = env.state.serenity | |
| env.step(make_action(ActionType.DEEP_WORK)) | |
| workaholic_change = env.state.serenity - serenity_before | |
| env2 = RhythmEnvironment() | |
| env2.reset(seed=0, profile="introvert_morning") | |
| serenity_before_intro = env2.state.serenity | |
| env2.step(make_action(ActionType.DEEP_WORK)) | |
| introvert_change = env2.state.serenity - serenity_before_intro | |
| assert workaholic_change > introvert_change | |
| def test_binge_shame_introvert(self, env): | |
| """Introvert suffers extra serenity loss from binge watching.""" | |
| env.reset(seed=0, profile="introvert_morning") | |
| serenity_before = env.state.serenity | |
| env.step(make_action(ActionType.BINGE_WATCH)) | |
| intro_change = env.state.serenity - serenity_before | |
| env2 = RhythmEnvironment() | |
| env2.reset(seed=0, profile="extrovert_night_owl") | |
| serenity_before_ext = env2.state.serenity | |
| env2.step(make_action(ActionType.BINGE_WATCH)) | |
| ext_change = env2.state.serenity - serenity_before_ext | |
| assert intro_change < ext_change | |
| def test_different_rewards_same_action(self, env): | |
| """Same action produces different rewards for different profiles.""" | |
| rewards = {} | |
| for profile_name in ["introvert_morning", "extrovert_night_owl", "workaholic_stoic"]: | |
| e = RhythmEnvironment() | |
| e.reset(seed=0, profile=profile_name) | |
| obs = e.step(make_action(ActionType.DEEP_WORK)) | |
| rewards[profile_name] = obs.reward | |
| values = list(rewards.values()) | |
| assert len(set(round(v, 3) for v in values)) > 1 | |
| def test_extrovert_night_cognition_bonus(self, env): | |
| """Extrovert gets better cognition gains in evening vs morning.""" | |
| env.reset(seed=0, profile="extrovert_night_owl") | |
| env.step(make_action(ActionType.SLEEP)) # morning | |
| env.step(make_action(ActionType.SLEEP)) # afternoon | |
| cognition_before = env.state.cognition | |
| env.step(make_action(ActionType.MEDITATE)) # evening | |
| evening_gain = env.state.cognition - cognition_before | |
| env.reset(seed=0, profile="extrovert_night_owl") | |
| cognition_before_m = env.state.cognition | |
| env.step(make_action(ActionType.MEDITATE)) # morning | |
| morning_gain = env.state.cognition - cognition_before_m | |
| assert evening_gain > morning_gain | |
| # --------------------------------------------------------------------------- | |
| # TestEvents | |
| # --------------------------------------------------------------------------- | |
| class TestEvents: | |
| def test_events_deterministic_with_seed(self, env): | |
| """Same seed produces same event sequence.""" | |
| events1 = [] | |
| env.reset(seed=99) | |
| for _ in range(MAX_STEPS): | |
| obs = env.step(make_action(ActionType.SLEEP)) | |
| events1.append(obs.active_event) | |
| events2 = [] | |
| env.reset(seed=99) | |
| for _ in range(MAX_STEPS): | |
| obs = env.step(make_action(ActionType.SLEEP)) | |
| events2.append(obs.active_event) | |
| assert events1 == events2 | |
| def test_event_visible_in_observation(self, env): | |
| """When an event fires, active_event is set in observation.""" | |
| found_event = False | |
| for seed in range(100): | |
| env.reset(seed=seed) | |
| for _ in range(MAX_STEPS): | |
| obs = env.step(make_action(ActionType.SLEEP)) | |
| if obs.active_event is not None: | |
| found_event = True | |
| assert obs.active_event in [ | |
| "prod_crash", "family_emergency", "illness", "good_news" | |
| ] | |
| break | |
| if found_event: | |
| break | |
| assert found_event, "No events triggered in 100 episodes" | |
| def test_no_event_when_none(self, env): | |
| """Most steps should have no event.""" | |
| env.reset(seed=0) | |
| no_event_count = 0 | |
| for _ in range(MAX_STEPS): | |
| obs = env.step(make_action(ActionType.SLEEP)) | |
| if obs.active_event is None: | |
| no_event_count += 1 | |
| assert no_event_count > MAX_STEPS * 0.7 | |
| # --------------------------------------------------------------------------- | |
| # TestGrader | |
| # --------------------------------------------------------------------------- | |
| class TestGrader: | |
| def test_final_score_in_range(self, env): | |
| env.reset(seed=0) | |
| for _ in range(MAX_STEPS): | |
| obs = env.step(make_action(ActionType.SLEEP)) | |
| assert "final_score" in obs.reward_breakdown | |
| score = obs.reward_breakdown["final_score"] | |
| assert 0.0 <= score <= 1.0 | |
| def test_balanced_play_beats_all_sleep(self, env): | |
| """A balanced strategy should score higher than just sleeping.""" | |
| env.reset(seed=0) | |
| for _ in range(MAX_STEPS): | |
| obs_sleep = env.step(make_action(ActionType.SLEEP)) | |
| score_sleep = obs_sleep.reward_breakdown["final_score"] | |
| balanced_actions = [ | |
| ActionType.DEEP_WORK, ActionType.LEARN, | |
| ActionType.EXERCISE, ActionType.FAMILY_TIME, | |
| ] * 7 | |
| env.reset(seed=0) | |
| for action_type in balanced_actions: | |
| obs_balanced = env.step(make_action(action_type)) | |
| score_balanced = obs_balanced.reward_breakdown["final_score"] | |
| assert score_balanced > score_sleep | |
| def test_deterministic_grading(self, env): | |
| """Same actions produce same final score.""" | |
| scores = [] | |
| for _ in range(2): | |
| env.reset(seed=42) | |
| for _ in range(MAX_STEPS): | |
| obs = env.step(make_action(ActionType.DEEP_WORK)) | |
| scores.append(obs.reward_breakdown["final_score"]) | |
| assert scores[0] == scores[1] | |
| def test_all_binge_scores_low(self, env): | |
| """Binge watching everything should produce a low score.""" | |
| env.reset(seed=0) | |
| for _ in range(MAX_STEPS): | |
| obs = env.step(make_action(ActionType.BINGE_WATCH)) | |
| score = obs.reward_breakdown["final_score"] | |
| assert score < 0.5 | |
| # --------------------------------------------------------------------------- | |
| # TestEdgeCases | |
| # --------------------------------------------------------------------------- | |
| class TestEdgeCases: | |
| def test_observation_hides_profile(self, env): | |
| """Observation should not expose profile_name.""" | |
| obs = env.reset(seed=0) | |
| obs_dict = obs.model_dump() | |
| assert "profile_name" not in obs_dict | |
| def test_state_exposes_profile(self, env): | |
| """State should include profile_name for debugging.""" | |
| # Default: continuous profile (name like 'sampled_0') | |
| env.reset(seed=0) | |
| assert env.state.profile_name != "" | |
| assert env.state.profile_name.startswith("sampled_") | |
| # Explicit profile: name matches the requested reference profile | |
| env.reset(seed=0, profile="workaholic_stoic") | |
| assert env.state.profile_name == "workaholic_stoic" | |
| assert env.state.profile_name in [p["name"] for p in PROFILES] | |
| def test_all_action_types_valid(self, env): | |
| """Every ActionType should be processable without error.""" | |
| env.reset(seed=0) | |
| for action_type in ActionType: | |
| e = RhythmEnvironment() | |
| e.reset(seed=0) | |
| obs = e.step(make_action(action_type)) | |
| assert isinstance(obs, RhythmObservation) | |
| # --------------------------------------------------------------------------- | |
| # Belief-accuracy grader component | |
| # --------------------------------------------------------------------------- | |
| class TestBeliefAccuracyGrader: | |
| """The grader awards 0.20 weight to belief_accuracy. Agents that don't | |
| emit beliefs get 0 on this component; agents whose final belief matches | |
| the true profile vector get up to 0.20 added to final_score. | |
| """ | |
| def _run_episode_with_belief(self, seed, belief, profile=None): | |
| env = RhythmEnvironment() | |
| if profile: | |
| obs = env.reset(seed=seed, profile=profile) | |
| else: | |
| obs = env.reset(seed=seed) | |
| for _ in range(MAX_STEPS): | |
| if obs.done: | |
| break | |
| env.record_belief(belief) | |
| obs = env.step(make_action(ActionType.SLEEP)) | |
| return obs.reward_breakdown.get("final_score", 0.0) | |
| def _run_episode_no_belief(self, seed, profile=None): | |
| env = RhythmEnvironment() | |
| if profile: | |
| obs = env.reset(seed=seed, profile=profile) | |
| else: | |
| obs = env.reset(seed=seed) | |
| for _ in range(MAX_STEPS): | |
| if obs.done: | |
| break | |
| obs = env.step(make_action(ActionType.SLEEP)) | |
| return obs.reward_breakdown.get("final_score", 0.0) | |
| def test_no_belief_means_zero_belief_component(self, env): | |
| """Agent that never calls record_belief gets 0 on the belief component.""" | |
| score = self._run_episode_no_belief(seed=42) | |
| # Without belief, max possible score is 0.80 (all weights ex belief). | |
| # Realistic ceiling is much lower since SLEEP-only doesn't max meters. | |
| assert score <= 0.80 | |
| def test_perfect_belief_lifts_score(self, env): | |
| """An agent that emits the TRUE belief vector should score higher | |
| than the same actions with no belief — by up to +0.20.""" | |
| # Use a known reference profile so we can hand-pick the perfect belief. | |
| from server.rhythm_environment import ( | |
| PROFILE_MAP, | |
| profile_to_belief_vector, | |
| ) | |
| profile_name = "workaholic_stoic" | |
| true_belief = profile_to_belief_vector(PROFILE_MAP[profile_name]) | |
| no_belief_score = self._run_episode_no_belief(seed=7, profile=profile_name) | |
| perfect_score = self._run_episode_with_belief( | |
| seed=7, belief=true_belief, profile=profile_name | |
| ) | |
| # Perfect belief contributes 0.20 to final_score | |
| assert perfect_score > no_belief_score | |
| assert (perfect_score - no_belief_score) == pytest.approx(0.20, abs=0.01) | |
| def test_wrong_belief_scores_less_than_perfect(self, env): | |
| """Wrong belief still counts (0 ≤ score ≤ 1) but less than perfect.""" | |
| from server.rhythm_environment import ( | |
| PROFILE_MAP, | |
| profile_to_belief_vector, | |
| ) | |
| profile_name = "introvert_morning" | |
| true_belief = profile_to_belief_vector(PROFILE_MAP[profile_name]) | |
| wrong_belief = [1.0 - b for b in true_belief] # opposite | |
| perfect_score = self._run_episode_with_belief( | |
| seed=7, belief=true_belief, profile=profile_name | |
| ) | |
| wrong_score = self._run_episode_with_belief( | |
| seed=7, belief=wrong_belief, profile=profile_name | |
| ) | |
| assert perfect_score > wrong_score | |
| def test_record_belief_validates_length(self, env): | |
| env.reset(seed=0) | |
| with pytest.raises(ValueError): | |
| env.record_belief([0.5, 0.5]) # wrong length | |
| with pytest.raises(ValueError): | |
| env.record_belief([0.5, 0.5, 0.5, 0.5]) # too long | |
| def test_record_belief_clamps_to_unit_interval(self, env): | |
| """Beliefs outside [0, 1] should be clamped, not rejected.""" | |
| env.reset(seed=0) | |
| env.record_belief([-0.5, 1.5, 0.5]) | |
| # Internal state should be clamped | |
| assert env._final_belief == [0.0, 1.0, 0.5] | |
| def test_grader_uses_openenv_weighted_sum_rubric(self, env): | |
| """Grader composes child rubrics via openenv.core.rubrics.WeightedSum.""" | |
| from openenv.core.rubrics import Rubric, WeightedSum | |
| from server.rubrics import ( | |
| CrashFreeRubric, ProgressRubric, ConnectionRubric, | |
| AdaptationRubric, EfficiencyRubric, BeliefAccuracyRubric, | |
| GRADE_WEIGHTS, make_grade_rubric, | |
| ) | |
| # Trigger a full episode so _grade_episode runs and builds the rubric | |
| obs = env.reset(seed=0) | |
| for _ in range(MAX_STEPS): | |
| if obs.done: | |
| break | |
| obs = env.step(make_action(ActionType.SLEEP)) | |
| rubric = env._grade_rubric | |
| assert isinstance(rubric, WeightedSum), "grader must use WeightedSum" | |
| assert isinstance(rubric, Rubric) | |
| # 6 children, one per scoring component | |
| children = list(rubric.children()) | |
| assert len(children) == 6 | |
| types = {type(c).__name__ for c in children} | |
| assert types == { | |
| "CrashFreeRubric", "ProgressRubric", "ConnectionRubric", | |
| "AdaptationRubric", "EfficiencyRubric", "BeliefAccuracyRubric", | |
| } | |
| # Weights must sum to 1.0 (WeightedSum enforces; sanity check the keys) | |
| assert abs(sum(GRADE_WEIGHTS.values()) - 1.0) < 1e-6 | |
| def test_make_grade_rubric_is_pure_function(self, env): | |
| """make_grade_rubric should produce equivalent rubrics across calls.""" | |
| from server.rubrics import make_grade_rubric | |
| env.reset(seed=42) | |
| r1 = make_grade_rubric(env) | |
| r2 = make_grade_rubric(env) | |
| # Same shape, fresh object | |
| assert len(list(r1.children())) == len(list(r2.children())) == 6 | |
| assert r1 is not r2 | |
| # Same weights | |
| assert r1._weights == r2._weights | |