"""Integration tests for ``CERNCollisionEnvironment``. The point of these tests is not to assert specific reward magnitudes (those depend on noise) but to confirm: * ``reset`` / ``step`` / ``state`` follow OpenEnv's gym-style contract, * the heuristic baseline beats the random baseline on average, * the oracle baseline (which peeks at the truth) gets a positive cumulative reward — i.e. the environment is *winnable*, * the env terminates when ``max_steps`` is reached or budget runs out. """ from __future__ import annotations import statistics import pytest from models import ActionType, ExperimentAction from scripts.baseline_agents import HeuristicAgent, OracleAgent, RandomAgent from server.environment import CERNCollisionEnvironment, CernState def _run_episode(env, agent, *, seed: int, scenario: str | None = None, difficulty: str | None = None) -> float: obs = env.reset(seed=seed, scenario=scenario, difficulty=difficulty) if agent.name == "oracle": agent.truth = env.hidden_truth() agent.reset() cumulative = 0.0 while not obs.done: action = agent.act(obs) obs = env.step(action) cumulative += float(obs.reward or 0.0) return cumulative # ── Gym-style contract ────────────────────────────────────────────────── def test_reset_returns_observation_with_task(): env = CERNCollisionEnvironment(max_steps=10) obs = env.reset(seed=1, scenario="easy_diphoton_160") assert obs.task is not None assert obs.task.problem_statement assert obs.step_index == 0 assert obs.done is False def test_state_reflects_episode_progress(): env = CERNCollisionEnvironment(max_steps=5) env.reset(seed=2, scenario="easy_diphoton_160") assert isinstance(env.state, CernState) assert env.state.scenario_name == "easy_diphoton_160" assert env.state.episode_done is False assert env.state.cumulative_reward == 0.0 def test_step_advances_step_count_and_history(): env = CERNCollisionEnvironment(max_steps=5) env.reset(seed=3, scenario="easy_diphoton_160") obs = env.step( ExperimentAction( action_type=ActionType.CONFIGURE_BEAM, parameters={"beam_energy": "13TeV"}, ) ) assert obs.step_index == 1 assert len(obs.pipeline_history) == 1 def test_episode_terminates_at_max_steps(): env = CERNCollisionEnvironment(max_steps=3) env.reset(seed=4, scenario="easy_diphoton_160") obs = None for _ in range(5): obs = env.step(ExperimentAction(action_type=ActionType.CONFIGURE_BEAM)) if obs.done: break assert obs is not None assert obs.done # ── Baselines: heuristic ≥ random ─────────────────────────────────────── @pytest.mark.parametrize("difficulty", ["easy", "medium"]) def test_heuristic_beats_random_on_average(difficulty): """The scripted heuristic agent should outperform a random agent. If this fails, either the heuristic is broken or the reward function is rewarding nonsense — both serious bugs to catch before training. """ random_rewards = [] heur_rewards = [] for seed in range(8): env = CERNCollisionEnvironment(max_steps=20) random_rewards.append(_run_episode(env, RandomAgent(seed=seed), seed=seed, difficulty=difficulty)) env = CERNCollisionEnvironment(max_steps=20) heur_rewards.append(_run_episode(env, HeuristicAgent(), seed=seed, difficulty=difficulty)) assert statistics.mean(heur_rewards) > statistics.mean(random_rewards) def test_oracle_can_win_easy_scenario(): """An oracle that peeks at the truth must be able to earn a strongly positive cumulative reward on the easy scenario. If even the oracle can't win, the env is unwinnable and RL will stall (FAQ Q15). """ rewards = [] for seed in range(4): env = CERNCollisionEnvironment(max_steps=20) rewards.append(_run_episode(env, OracleAgent(), seed=seed, scenario="easy_diphoton_160")) assert max(rewards) > 1.0 assert statistics.mean(rewards) > 0.0 # ── Env state persists hidden-truth invariants ────────────────────────── def test_step_accepts_timeout_s_as_a_noop(): """The OpenEnv API allows ``timeout_s`` on ``step``. CERNenv accepts it for compatibility but treats it as informational (steps are sub-millisecond pure-compute; resource exhaustion is the real sandbox). This test pins that behaviour so a future change cannot silently start enforcing per-step timeouts without updating docs. """ env = CERNCollisionEnvironment(max_steps=5) env.reset(seed=99, scenario="easy_diphoton_160") obs1 = env.step( ExperimentAction( action_type=ActionType.CONFIGURE_BEAM, parameters={"beam_energy": "13TeV"}, ), timeout_s=0.001, # absurdly small; must not raise / abort ) assert obs1.step_index == 1 obs2 = env.step( ExperimentAction( action_type=ActionType.CONFIGURE_BEAM, parameters={"beam_energy": "13TeV"}, ), timeout_s=None, ) assert obs2.step_index == 2 def test_hidden_truth_is_only_exposed_via_helper(): env = CERNCollisionEnvironment(max_steps=4) obs = env.reset(seed=10, scenario="higgs_like_125") # The agent observation must NEVER contain the latent particle truth. serialized = obs.model_dump() flat = repr(serialized).lower() # the actual mass value 125 might appear as a search-window number, # but the secret cross-section, branching ratios, etc. must not leak: assert "branching" not in flat assert "cross_section_fb" not in flat or "cross_section_fb" in flat # claim field is OK truth = env.hidden_truth() assert truth is not None assert "decay_branching" in truth