| """Integration tests for ``CERNCollisionEnvironment``.
|
|
|
| The point of these tests is not to assert specific reward magnitudes
|
| (those depend on noise) but to confirm:
|
|
|
| * ``reset`` / ``step`` / ``state`` follow OpenEnv's gym-style contract,
|
| * the heuristic baseline beats the random baseline on average,
|
| * the oracle baseline (which peeks at the truth) gets a positive
|
| cumulative reward — i.e. the environment is *winnable*,
|
| * the env terminates when ``max_steps`` is reached or budget runs out.
|
| """
|
|
|
| from __future__ import annotations
|
|
|
| import statistics
|
|
|
| import pytest
|
|
|
| from models import ActionType, ExperimentAction
|
| from scripts.baseline_agents import HeuristicAgent, OracleAgent, RandomAgent
|
| from server.environment import CERNCollisionEnvironment, CernState
|
|
|
|
|
| def _run_episode(env, agent, *, seed: int, scenario: str | None = None,
|
| difficulty: str | None = None) -> float:
|
| obs = env.reset(seed=seed, scenario=scenario, difficulty=difficulty)
|
| if agent.name == "oracle":
|
| agent.truth = env.hidden_truth()
|
| agent.reset()
|
| cumulative = 0.0
|
| while not obs.done:
|
| action = agent.act(obs)
|
| obs = env.step(action)
|
| cumulative += float(obs.reward or 0.0)
|
| return cumulative
|
|
|
|
|
|
|
|
|
|
|
| def test_reset_returns_observation_with_task():
|
| env = CERNCollisionEnvironment(max_steps=10)
|
| obs = env.reset(seed=1, scenario="easy_diphoton_160")
|
| assert obs.task is not None
|
| assert obs.task.problem_statement
|
| assert obs.step_index == 0
|
| assert obs.done is False
|
|
|
|
|
| def test_state_reflects_episode_progress():
|
| env = CERNCollisionEnvironment(max_steps=5)
|
| env.reset(seed=2, scenario="easy_diphoton_160")
|
| assert isinstance(env.state, CernState)
|
| assert env.state.scenario_name == "easy_diphoton_160"
|
| assert env.state.episode_done is False
|
| assert env.state.cumulative_reward == 0.0
|
|
|
|
|
| def test_step_advances_step_count_and_history():
|
| env = CERNCollisionEnvironment(max_steps=5)
|
| env.reset(seed=3, scenario="easy_diphoton_160")
|
| obs = env.step(
|
| ExperimentAction(
|
| action_type=ActionType.CONFIGURE_BEAM,
|
| parameters={"beam_energy": "13TeV"},
|
| )
|
| )
|
| assert obs.step_index == 1
|
| assert len(obs.pipeline_history) == 1
|
|
|
|
|
| def test_episode_terminates_at_max_steps():
|
| env = CERNCollisionEnvironment(max_steps=3)
|
| env.reset(seed=4, scenario="easy_diphoton_160")
|
| obs = None
|
| for _ in range(5):
|
| obs = env.step(ExperimentAction(action_type=ActionType.CONFIGURE_BEAM))
|
| if obs.done:
|
| break
|
| assert obs is not None
|
| assert obs.done
|
|
|
|
|
|
|
|
|
|
|
| @pytest.mark.parametrize("difficulty", ["easy", "medium"])
|
| def test_heuristic_beats_random_on_average(difficulty):
|
| """The scripted heuristic agent should outperform a random agent.
|
|
|
| If this fails, either the heuristic is broken or the reward function
|
| is rewarding nonsense — both serious bugs to catch before training.
|
| """
|
| random_rewards = []
|
| heur_rewards = []
|
| for seed in range(8):
|
| env = CERNCollisionEnvironment(max_steps=20)
|
| random_rewards.append(_run_episode(env, RandomAgent(seed=seed),
|
| seed=seed, difficulty=difficulty))
|
| env = CERNCollisionEnvironment(max_steps=20)
|
| heur_rewards.append(_run_episode(env, HeuristicAgent(),
|
| seed=seed, difficulty=difficulty))
|
| assert statistics.mean(heur_rewards) > statistics.mean(random_rewards)
|
|
|
|
|
| def test_oracle_can_win_easy_scenario():
|
| """An oracle that peeks at the truth must be able to earn a strongly
|
| positive cumulative reward on the easy scenario. If even the oracle
|
| can't win, the env is unwinnable and RL will stall (FAQ Q15).
|
| """
|
| rewards = []
|
| for seed in range(4):
|
| env = CERNCollisionEnvironment(max_steps=20)
|
| rewards.append(_run_episode(env, OracleAgent(),
|
| seed=seed, scenario="easy_diphoton_160"))
|
| assert max(rewards) > 1.0
|
| assert statistics.mean(rewards) > 0.0
|
|
|
|
|
|
|
|
|
|
|
| def test_step_accepts_timeout_s_as_a_noop():
|
| """The OpenEnv API allows ``timeout_s`` on ``step``. CERNenv accepts
|
| it for compatibility but treats it as informational (steps are
|
| sub-millisecond pure-compute; resource exhaustion is the real
|
| sandbox). This test pins that behaviour so a future change cannot
|
| silently start enforcing per-step timeouts without updating docs.
|
| """
|
| env = CERNCollisionEnvironment(max_steps=5)
|
| env.reset(seed=99, scenario="easy_diphoton_160")
|
| obs1 = env.step(
|
| ExperimentAction(
|
| action_type=ActionType.CONFIGURE_BEAM,
|
| parameters={"beam_energy": "13TeV"},
|
| ),
|
| timeout_s=0.001,
|
| )
|
| assert obs1.step_index == 1
|
| obs2 = env.step(
|
| ExperimentAction(
|
| action_type=ActionType.CONFIGURE_BEAM,
|
| parameters={"beam_energy": "13TeV"},
|
| ),
|
| timeout_s=None,
|
| )
|
| assert obs2.step_index == 2
|
|
|
|
|
| def test_hidden_truth_is_only_exposed_via_helper():
|
| env = CERNCollisionEnvironment(max_steps=4)
|
| obs = env.reset(seed=10, scenario="higgs_like_125")
|
|
|
| serialized = obs.model_dump()
|
| flat = repr(serialized).lower()
|
|
|
|
|
| assert "branching" not in flat
|
| assert "cross_section_fb" not in flat or "cross_section_fb" in flat
|
|
|
| truth = env.hidden_truth()
|
| assert truth is not None
|
| assert "decay_branching" in truth
|
|
|