# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""Tests for the RhythmEnv Life Simulator."""

import pytest

from models import ActionType, RhythmAction, RhythmObservation
from server.rhythm_environment import (
    CRITICAL_THRESHOLD,
    MAX_STEPS,
    METERS,
    PROFILES,
    RhythmEnvironment,
)


@pytest.fixture
def env():
    return RhythmEnvironment()


def make_action(action_type: ActionType) -> RhythmAction:
    return RhythmAction(action_type=action_type)


# ---------------------------------------------------------------------------
# TestReset
# ---------------------------------------------------------------------------


class TestReset:
    def test_returns_valid_observation(self, env):
        obs = env.reset(seed=0)
        assert isinstance(obs, RhythmObservation)
        assert obs.timestep == 0
        assert obs.day == 0
        assert obs.slot == 0
        assert obs.remaining_steps == MAX_STEPS
        assert obs.done is False
        assert obs.reward == 0.0

    def test_meters_initialized(self, env):
        obs = env.reset(seed=0)
        assert 0.0 <= obs.vitality <= 1.0
        assert 0.0 <= obs.cognition <= 1.0
        assert obs.progress == 0.0
        assert 0.0 <= obs.serenity <= 1.0
        assert 0.0 <= obs.connection <= 1.0

    def test_seed_selects_profile(self, env):
        """Different seeds select different profiles."""
        profiles_seen = set()
        for seed in range(3):
            env.reset(seed=seed)
            profiles_seen.add(env.state.profile_name)
        assert len(profiles_seen) == 3

    def test_deterministic_with_same_seed(self, env):
        obs1 = env.reset(seed=42)
        obs2 = env.reset(seed=42)
        assert obs1.vitality == obs2.vitality
        assert obs1.cognition == obs2.cognition
        assert obs1.serenity == obs2.serenity
        assert obs1.connection == obs2.connection

    def test_explicit_profile_selection(self, env):
        env.reset(seed=0, profile="workaholic_stoic")
        assert env.state.profile_name == "workaholic_stoic"

    def test_reset_clears_state(self, env):
        env.reset(seed=0)
        for _ in range(5):
            env.step(make_action(ActionType.DEEP_WORK))
        obs = env.reset(seed=0)
        assert obs.timestep == 0
        assert obs.progress == 0.0


# ---------------------------------------------------------------------------
# TestStep
# ---------------------------------------------------------------------------


class TestStep:
    def test_timestep_advances(self, env):
        env.reset(seed=0)
        obs = env.step(make_action(ActionType.DEEP_WORK))
        assert obs.timestep == 1

    def test_day_and_slot_correct(self, env):
        env.reset(seed=0)
        for _ in range(5):
            obs = env.step(make_action(ActionType.SLEEP))
        assert obs.day == 1
        assert obs.slot == 1

    def test_deep_work_increases_progress(self, env):
        env.reset(seed=0)
        obs = env.step(make_action(ActionType.DEEP_WORK))
        assert obs.progress > 0.0

    def test_deep_work_drains_vitality(self, env):
        env.reset(seed=0)
        initial_vitality = env.state.vitality
        obs = env.step(make_action(ActionType.DEEP_WORK))
        assert obs.vitality < initial_vitality

    def test_sleep_recovers_vitality(self, env):
        env.reset(seed=0)
        for _ in range(3):
            env.step(make_action(ActionType.DEEP_WORK))
        vitality_before_sleep = env.state.vitality
        obs = env.step(make_action(ActionType.SLEEP))
        assert obs.vitality > vitality_before_sleep

    def test_socialize_builds_connection(self, env):
        env.reset(seed=0)
        initial_connection = env.state.connection
        obs = env.step(make_action(ActionType.FAMILY_TIME))
        assert obs.connection > initial_connection - 0.05

    def test_episode_ends_at_max_steps(self, env):
        env.reset(seed=0)
        for i in range(MAX_STEPS):
            obs = env.step(make_action(ActionType.SLEEP))
        assert obs.done is True
        assert obs.timestep == MAX_STEPS

    def test_not_done_before_max_steps(self, env):
        env.reset(seed=0)
        for i in range(MAX_STEPS - 1):
            obs = env.step(make_action(ActionType.SLEEP))
        assert obs.done is False

    def test_meters_stay_in_bounds(self, env):
        """No meter exceeds [0.0, 1.0] regardless of actions."""
        env.reset(seed=0)
        for _ in range(MAX_STEPS):
            obs = env.step(make_action(ActionType.DEEP_WORK))
            for meter in METERS:
                val = getattr(obs, meter)
                assert 0.0 <= val <= 1.0, f"{meter}={val} out of bounds"

    def test_low_vitality_reduces_effectiveness(self, env):
        """Progress gain should be lower when vitality is low."""
        env.reset(seed=0, profile="introvert_morning")
        obs_high = env.step(make_action(ActionType.DEEP_WORK))
        progress_high = obs_high.progress

        env.reset(seed=0, profile="introvert_morning")
        for _ in range(6):
            env.step(make_action(ActionType.DEEP_WORK))
        progress_before = env.state.progress
        env.step(make_action(ActionType.DEEP_WORK))
        progress_gained_low = env.state.progress - progress_before

        assert progress_high > progress_gained_low


# ---------------------------------------------------------------------------
# TestProfiles
# ---------------------------------------------------------------------------


class TestProfiles:
    def test_introvert_social_drains_more(self, env):
        """Introvert loses more vitality from socializing than extrovert."""
        env.reset(seed=0, profile="introvert_morning")
        v_before_intro = env.state.vitality
        env.step(make_action(ActionType.SOCIALIZE))
        intro_drain = v_before_intro - env.state.vitality

        env2 = RhythmEnvironment()
        env2.reset(seed=0, profile="extrovert_night_owl")
        v_before_extro = env2.state.vitality
        env2.step(make_action(ActionType.SOCIALIZE))
        extro_drain = v_before_extro - env2.state.vitality

        assert intro_drain > extro_drain

    def test_workaholic_progress_gives_serenity(self, env):
        """Workaholic has better serenity outcome from deep work than introvert."""
        env.reset(seed=0, profile="workaholic_stoic")
        serenity_before = env.state.serenity
        env.step(make_action(ActionType.DEEP_WORK))
        workaholic_change = env.state.serenity - serenity_before

        env2 = RhythmEnvironment()
        env2.reset(seed=0, profile="introvert_morning")
        serenity_before_intro = env2.state.serenity
        env2.step(make_action(ActionType.DEEP_WORK))
        introvert_change = env2.state.serenity - serenity_before_intro

        assert workaholic_change > introvert_change

    def test_binge_shame_introvert(self, env):
        """Introvert suffers extra serenity loss from binge watching."""
        env.reset(seed=0, profile="introvert_morning")
        serenity_before = env.state.serenity
        env.step(make_action(ActionType.BINGE_WATCH))
        intro_change = env.state.serenity - serenity_before

        env2 = RhythmEnvironment()
        env2.reset(seed=0, profile="extrovert_night_owl")
        serenity_before_ext = env2.state.serenity
        env2.step(make_action(ActionType.BINGE_WATCH))
        ext_change = env2.state.serenity - serenity_before_ext

        assert intro_change < ext_change

    def test_different_rewards_same_action(self, env):
        """Same action produces different rewards for different profiles."""
        rewards = {}
        for profile_name in ["introvert_morning", "extrovert_night_owl", "workaholic_stoic"]:
            e = RhythmEnvironment()
            e.reset(seed=0, profile=profile_name)
            obs = e.step(make_action(ActionType.DEEP_WORK))
            rewards[profile_name] = obs.reward

        values = list(rewards.values())
        assert len(set(round(v, 3) for v in values)) > 1

    def test_extrovert_night_cognition_bonus(self, env):
        """Extrovert gets better cognition gains in evening vs morning."""
        env.reset(seed=0, profile="extrovert_night_owl")
        env.step(make_action(ActionType.SLEEP))  # morning
        env.step(make_action(ActionType.SLEEP))  # afternoon
        cognition_before = env.state.cognition
        env.step(make_action(ActionType.MEDITATE))  # evening
        evening_gain = env.state.cognition - cognition_before

        env.reset(seed=0, profile="extrovert_night_owl")
        cognition_before_m = env.state.cognition
        env.step(make_action(ActionType.MEDITATE))  # morning
        morning_gain = env.state.cognition - cognition_before_m

        assert evening_gain > morning_gain


# ---------------------------------------------------------------------------
# TestEvents
# ---------------------------------------------------------------------------


class TestEvents:
    def test_events_deterministic_with_seed(self, env):
        """Same seed produces same event sequence."""
        events1 = []
        env.reset(seed=99)
        for _ in range(MAX_STEPS):
            obs = env.step(make_action(ActionType.SLEEP))
            events1.append(obs.active_event)

        events2 = []
        env.reset(seed=99)
        for _ in range(MAX_STEPS):
            obs = env.step(make_action(ActionType.SLEEP))
            events2.append(obs.active_event)

        assert events1 == events2

    def test_event_visible_in_observation(self, env):
        """When an event fires, active_event is set in observation."""
        found_event = False
        for seed in range(100):
            env.reset(seed=seed)
            for _ in range(MAX_STEPS):
                obs = env.step(make_action(ActionType.SLEEP))
                if obs.active_event is not None:
                    found_event = True
                    assert obs.active_event in [
                        "prod_crash", "family_emergency", "illness", "good_news"
                    ]
                    break
            if found_event:
                break
        assert found_event, "No events triggered in 100 episodes"

    def test_no_event_when_none(self, env):
        """Most steps should have no event."""
        env.reset(seed=0)
        no_event_count = 0
        for _ in range(MAX_STEPS):
            obs = env.step(make_action(ActionType.SLEEP))
            if obs.active_event is None:
                no_event_count += 1
        assert no_event_count > MAX_STEPS * 0.7


# ---------------------------------------------------------------------------
# TestGrader
# ---------------------------------------------------------------------------


class TestGrader:
    def test_final_score_in_range(self, env):
        env.reset(seed=0)
        for _ in range(MAX_STEPS):
            obs = env.step(make_action(ActionType.SLEEP))
        assert "final_score" in obs.reward_breakdown
        score = obs.reward_breakdown["final_score"]
        assert 0.0 <= score <= 1.0

    def test_balanced_play_beats_all_sleep(self, env):
        """A balanced strategy should score higher than just sleeping."""
        env.reset(seed=0)
        for _ in range(MAX_STEPS):
            obs_sleep = env.step(make_action(ActionType.SLEEP))
        score_sleep = obs_sleep.reward_breakdown["final_score"]

        balanced_actions = [
            ActionType.DEEP_WORK, ActionType.LEARN,
            ActionType.EXERCISE, ActionType.FAMILY_TIME,
        ] * 7
        env.reset(seed=0)
        for action_type in balanced_actions:
            obs_balanced = env.step(make_action(action_type))
        score_balanced = obs_balanced.reward_breakdown["final_score"]

        assert score_balanced > score_sleep

    def test_deterministic_grading(self, env):
        """Same actions produce same final score."""
        scores = []
        for _ in range(2):
            env.reset(seed=42)
            for _ in range(MAX_STEPS):
                obs = env.step(make_action(ActionType.DEEP_WORK))
            scores.append(obs.reward_breakdown["final_score"])
        assert scores[0] == scores[1]

    def test_all_binge_scores_low(self, env):
        """Binge watching everything should produce a low score."""
        env.reset(seed=0)
        for _ in range(MAX_STEPS):
            obs = env.step(make_action(ActionType.BINGE_WATCH))
        score = obs.reward_breakdown["final_score"]
        assert score < 0.5


# ---------------------------------------------------------------------------
# TestEdgeCases
# ---------------------------------------------------------------------------


class TestEdgeCases:
    def test_observation_hides_profile(self, env):
        """Observation should not expose profile_name."""
        obs = env.reset(seed=0)
        obs_dict = obs.model_dump()
        assert "profile_name" not in obs_dict

    def test_state_exposes_profile(self, env):
        """State should include profile_name for debugging."""
        # Default: continuous profile (name like 'sampled_0')
        env.reset(seed=0)
        assert env.state.profile_name != ""
        assert env.state.profile_name.startswith("sampled_")

        # Explicit profile: name matches the requested reference profile
        env.reset(seed=0, profile="workaholic_stoic")
        assert env.state.profile_name == "workaholic_stoic"
        assert env.state.profile_name in [p["name"] for p in PROFILES]

    def test_all_action_types_valid(self, env):
        """Every ActionType should be processable without error."""
        env.reset(seed=0)
        for action_type in ActionType:
            e = RhythmEnvironment()
            e.reset(seed=0)
            obs = e.step(make_action(action_type))
            assert isinstance(obs, RhythmObservation)


# ---------------------------------------------------------------------------
# Belief-accuracy grader component
# ---------------------------------------------------------------------------


class TestBeliefAccuracyGrader:
    """The grader awards 0.20 weight to belief_accuracy. Agents that don't
    emit beliefs get 0 on this component; agents whose final belief matches
    the true profile vector get up to 0.20 added to final_score.
    """

    def _run_episode_with_belief(self, seed, belief, profile=None):
        env = RhythmEnvironment()
        if profile:
            obs = env.reset(seed=seed, profile=profile)
        else:
            obs = env.reset(seed=seed)
        for _ in range(MAX_STEPS):
            if obs.done:
                break
            env.record_belief(belief)
            obs = env.step(make_action(ActionType.SLEEP))
        return obs.reward_breakdown.get("final_score", 0.0)

    def _run_episode_no_belief(self, seed, profile=None):
        env = RhythmEnvironment()
        if profile:
            obs = env.reset(seed=seed, profile=profile)
        else:
            obs = env.reset(seed=seed)
        for _ in range(MAX_STEPS):
            if obs.done:
                break
            obs = env.step(make_action(ActionType.SLEEP))
        return obs.reward_breakdown.get("final_score", 0.0)

    def test_no_belief_means_zero_belief_component(self, env):
        """Agent that never calls record_belief gets 0 on the belief component."""
        score = self._run_episode_no_belief(seed=42)
        # Without belief, max possible score is 0.80 (all weights ex belief).
        # Realistic ceiling is much lower since SLEEP-only doesn't max meters.
        assert score <= 0.80

    def test_perfect_belief_lifts_score(self, env):
        """An agent that emits the TRUE belief vector should score higher
        than the same actions with no belief — by up to +0.20."""
        # Use a known reference profile so we can hand-pick the perfect belief.
        from server.rhythm_environment import (
            PROFILE_MAP,
            profile_to_belief_vector,
        )
        profile_name = "workaholic_stoic"
        true_belief = profile_to_belief_vector(PROFILE_MAP[profile_name])

        no_belief_score = self._run_episode_no_belief(seed=7, profile=profile_name)
        perfect_score = self._run_episode_with_belief(
            seed=7, belief=true_belief, profile=profile_name
        )
        # Perfect belief contributes 0.20 to final_score
        assert perfect_score > no_belief_score
        assert (perfect_score - no_belief_score) == pytest.approx(0.20, abs=0.01)

    def test_wrong_belief_scores_less_than_perfect(self, env):
        """Wrong belief still counts (0 ≤ score ≤ 1) but less than perfect."""
        from server.rhythm_environment import (
            PROFILE_MAP,
            profile_to_belief_vector,
        )
        profile_name = "introvert_morning"
        true_belief = profile_to_belief_vector(PROFILE_MAP[profile_name])
        wrong_belief = [1.0 - b for b in true_belief]  # opposite

        perfect_score = self._run_episode_with_belief(
            seed=7, belief=true_belief, profile=profile_name
        )
        wrong_score = self._run_episode_with_belief(
            seed=7, belief=wrong_belief, profile=profile_name
        )
        assert perfect_score > wrong_score

    def test_record_belief_validates_length(self, env):
        env.reset(seed=0)
        with pytest.raises(ValueError):
            env.record_belief([0.5, 0.5])  # wrong length
        with pytest.raises(ValueError):
            env.record_belief([0.5, 0.5, 0.5, 0.5])  # too long

    def test_record_belief_clamps_to_unit_interval(self, env):
        """Beliefs outside [0, 1] should be clamped, not rejected."""
        env.reset(seed=0)
        env.record_belief([-0.5, 1.5, 0.5])
        # Internal state should be clamped
        assert env._final_belief == [0.0, 1.0, 0.5]

    def test_grader_uses_openenv_weighted_sum_rubric(self, env):
        """Grader composes child rubrics via openenv.core.rubrics.WeightedSum."""
        from openenv.core.rubrics import Rubric, WeightedSum
        from server.rubrics import (
            CrashFreeRubric, ProgressRubric, ConnectionRubric,
            AdaptationRubric, EfficiencyRubric, BeliefAccuracyRubric,
            GRADE_WEIGHTS, make_grade_rubric,
        )

        # Trigger a full episode so _grade_episode runs and builds the rubric
        obs = env.reset(seed=0)
        for _ in range(MAX_STEPS):
            if obs.done:
                break
            obs = env.step(make_action(ActionType.SLEEP))

        rubric = env._grade_rubric
        assert isinstance(rubric, WeightedSum), "grader must use WeightedSum"
        assert isinstance(rubric, Rubric)

        # 6 children, one per scoring component
        children = list(rubric.children())
        assert len(children) == 6
        types = {type(c).__name__ for c in children}
        assert types == {
            "CrashFreeRubric", "ProgressRubric", "ConnectionRubric",
            "AdaptationRubric", "EfficiencyRubric", "BeliefAccuracyRubric",
        }

        # Weights must sum to 1.0 (WeightedSum enforces; sanity check the keys)
        assert abs(sum(GRADE_WEIGHTS.values()) - 1.0) < 1e-6

    def test_make_grade_rubric_is_pure_function(self, env):
        """make_grade_rubric should produce equivalent rubrics across calls."""
        from server.rubrics import make_grade_rubric

        env.reset(seed=42)
        r1 = make_grade_rubric(env)
        r2 = make_grade_rubric(env)
        # Same shape, fresh object
        assert len(list(r1.children())) == len(list(r2.children())) == 6
        assert r1 is not r2
        # Same weights
        assert r1._weights == r2._weights