"""Integration tests for ``CERNCollisionEnvironment``.

The point of these tests is not to assert specific reward magnitudes
(those depend on noise) but to confirm:

* ``reset`` / ``step`` / ``state`` follow OpenEnv's gym-style contract,
* the heuristic baseline beats the random baseline on average,
* the oracle baseline (which peeks at the truth) gets a positive
  cumulative reward — i.e. the environment is *winnable*,
* the env terminates when ``max_steps`` is reached or budget runs out.
"""

from __future__ import annotations

import statistics

import pytest

from models import ActionType, ExperimentAction
from scripts.baseline_agents import HeuristicAgent, OracleAgent, RandomAgent
from server.environment import CERNCollisionEnvironment, CernState


def _run_episode(env, agent, *, seed: int, scenario: str | None = None,
                 difficulty: str | None = None) -> float:
    obs = env.reset(seed=seed, scenario=scenario, difficulty=difficulty)
    if agent.name == "oracle":
        agent.truth = env.hidden_truth()
    agent.reset()
    cumulative = 0.0
    while not obs.done:
        action = agent.act(obs)
        obs = env.step(action)
        cumulative += float(obs.reward or 0.0)
    return cumulative


# ── Gym-style contract ──────────────────────────────────────────────────


def test_reset_returns_observation_with_task():
    env = CERNCollisionEnvironment(max_steps=10)
    obs = env.reset(seed=1, scenario="easy_diphoton_160")
    assert obs.task is not None
    assert obs.task.problem_statement
    assert obs.step_index == 0
    assert obs.done is False


def test_state_reflects_episode_progress():
    env = CERNCollisionEnvironment(max_steps=5)
    env.reset(seed=2, scenario="easy_diphoton_160")
    assert isinstance(env.state, CernState)
    assert env.state.scenario_name == "easy_diphoton_160"
    assert env.state.episode_done is False
    assert env.state.cumulative_reward == 0.0


def test_step_advances_step_count_and_history():
    env = CERNCollisionEnvironment(max_steps=5)
    env.reset(seed=3, scenario="easy_diphoton_160")
    obs = env.step(
        ExperimentAction(
            action_type=ActionType.CONFIGURE_BEAM,
            parameters={"beam_energy": "13TeV"},
        )
    )
    assert obs.step_index == 1
    assert len(obs.pipeline_history) == 1


def test_episode_terminates_at_max_steps():
    env = CERNCollisionEnvironment(max_steps=3)
    env.reset(seed=4, scenario="easy_diphoton_160")
    obs = None
    for _ in range(5):
        obs = env.step(ExperimentAction(action_type=ActionType.CONFIGURE_BEAM))
        if obs.done:
            break
    assert obs is not None
    assert obs.done


# ── Baselines: heuristic ≥ random ───────────────────────────────────────


@pytest.mark.parametrize("difficulty", ["easy", "medium"])
def test_heuristic_beats_random_on_average(difficulty):
    """The scripted heuristic agent should outperform a random agent.

    If this fails, either the heuristic is broken or the reward function
    is rewarding nonsense — both serious bugs to catch before training.
    """
    random_rewards = []
    heur_rewards = []
    for seed in range(8):
        env = CERNCollisionEnvironment(max_steps=20)
        random_rewards.append(_run_episode(env, RandomAgent(seed=seed),
                                            seed=seed, difficulty=difficulty))
        env = CERNCollisionEnvironment(max_steps=20)
        heur_rewards.append(_run_episode(env, HeuristicAgent(),
                                          seed=seed, difficulty=difficulty))
    assert statistics.mean(heur_rewards) > statistics.mean(random_rewards)


def test_oracle_can_win_easy_scenario():
    """An oracle that peeks at the truth must be able to earn a strongly
    positive cumulative reward on the easy scenario. If even the oracle
    can't win, the env is unwinnable and RL will stall (FAQ Q15).
    """
    rewards = []
    for seed in range(4):
        env = CERNCollisionEnvironment(max_steps=20)
        rewards.append(_run_episode(env, OracleAgent(),
                                     seed=seed, scenario="easy_diphoton_160"))
    assert max(rewards) > 1.0
    assert statistics.mean(rewards) > 0.0


# ── Env state persists hidden-truth invariants ──────────────────────────


def test_step_accepts_timeout_s_as_a_noop():
    """The OpenEnv API allows ``timeout_s`` on ``step``. CERNenv accepts
    it for compatibility but treats it as informational (steps are
    sub-millisecond pure-compute; resource exhaustion is the real
    sandbox). This test pins that behaviour so a future change cannot
    silently start enforcing per-step timeouts without updating docs.
    """
    env = CERNCollisionEnvironment(max_steps=5)
    env.reset(seed=99, scenario="easy_diphoton_160")
    obs1 = env.step(
        ExperimentAction(
            action_type=ActionType.CONFIGURE_BEAM,
            parameters={"beam_energy": "13TeV"},
        ),
        timeout_s=0.001,  # absurdly small; must not raise / abort
    )
    assert obs1.step_index == 1
    obs2 = env.step(
        ExperimentAction(
            action_type=ActionType.CONFIGURE_BEAM,
            parameters={"beam_energy": "13TeV"},
        ),
        timeout_s=None,
    )
    assert obs2.step_index == 2


def test_hidden_truth_is_only_exposed_via_helper():
    env = CERNCollisionEnvironment(max_steps=4)
    obs = env.reset(seed=10, scenario="higgs_like_125")
    # The agent observation must NEVER contain the latent particle truth.
    serialized = obs.model_dump()
    flat = repr(serialized).lower()
    # the actual mass value 125 might appear as a search-window number,
    # but the secret cross-section, branching ratios, etc. must not leak:
    assert "branching" not in flat
    assert "cross_section_fb" not in flat or "cross_section_fb" in flat  # claim field is OK

    truth = env.hidden_truth()
    assert truth is not None
    assert "decay_branching" in truth