File size: 6,347 Bytes
f28409b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 | """Integration tests for ``CERNCollisionEnvironment``.
The point of these tests is not to assert specific reward magnitudes
(those depend on noise) but to confirm:
* ``reset`` / ``step`` / ``state`` follow OpenEnv's gym-style contract,
* the heuristic baseline beats the random baseline on average,
* the oracle baseline (which peeks at the truth) gets a positive
cumulative reward — i.e. the environment is *winnable*,
* the env terminates when ``max_steps`` is reached or budget runs out.
"""
from __future__ import annotations
import statistics
import pytest
from models import ActionType, ExperimentAction
from scripts.baseline_agents import HeuristicAgent, OracleAgent, RandomAgent
from server.environment import CERNCollisionEnvironment, CernState
def _run_episode(env, agent, *, seed: int, scenario: str | None = None,
difficulty: str | None = None) -> float:
obs = env.reset(seed=seed, scenario=scenario, difficulty=difficulty)
if agent.name == "oracle":
agent.truth = env.hidden_truth()
agent.reset()
cumulative = 0.0
while not obs.done:
action = agent.act(obs)
obs = env.step(action)
cumulative += float(obs.reward or 0.0)
return cumulative
# ── Gym-style contract ──────────────────────────────────────────────────
def test_reset_returns_observation_with_task():
env = CERNCollisionEnvironment(max_steps=10)
obs = env.reset(seed=1, scenario="easy_diphoton_160")
assert obs.task is not None
assert obs.task.problem_statement
assert obs.step_index == 0
assert obs.done is False
def test_state_reflects_episode_progress():
env = CERNCollisionEnvironment(max_steps=5)
env.reset(seed=2, scenario="easy_diphoton_160")
assert isinstance(env.state, CernState)
assert env.state.scenario_name == "easy_diphoton_160"
assert env.state.episode_done is False
assert env.state.cumulative_reward == 0.0
def test_step_advances_step_count_and_history():
env = CERNCollisionEnvironment(max_steps=5)
env.reset(seed=3, scenario="easy_diphoton_160")
obs = env.step(
ExperimentAction(
action_type=ActionType.CONFIGURE_BEAM,
parameters={"beam_energy": "13TeV"},
)
)
assert obs.step_index == 1
assert len(obs.pipeline_history) == 1
def test_episode_terminates_at_max_steps():
env = CERNCollisionEnvironment(max_steps=3)
env.reset(seed=4, scenario="easy_diphoton_160")
obs = None
for _ in range(5):
obs = env.step(ExperimentAction(action_type=ActionType.CONFIGURE_BEAM))
if obs.done:
break
assert obs is not None
assert obs.done
# ── Baselines: heuristic ≥ random ───────────────────────────────────────
@pytest.mark.parametrize("difficulty", ["easy", "medium"])
def test_heuristic_beats_random_on_average(difficulty):
"""The scripted heuristic agent should outperform a random agent.
If this fails, either the heuristic is broken or the reward function
is rewarding nonsense — both serious bugs to catch before training.
"""
random_rewards = []
heur_rewards = []
for seed in range(8):
env = CERNCollisionEnvironment(max_steps=20)
random_rewards.append(_run_episode(env, RandomAgent(seed=seed),
seed=seed, difficulty=difficulty))
env = CERNCollisionEnvironment(max_steps=20)
heur_rewards.append(_run_episode(env, HeuristicAgent(),
seed=seed, difficulty=difficulty))
assert statistics.mean(heur_rewards) > statistics.mean(random_rewards)
def test_oracle_can_win_easy_scenario():
"""An oracle that peeks at the truth must be able to earn a strongly
positive cumulative reward on the easy scenario. If even the oracle
can't win, the env is unwinnable and RL will stall (FAQ Q15).
"""
rewards = []
for seed in range(4):
env = CERNCollisionEnvironment(max_steps=20)
rewards.append(_run_episode(env, OracleAgent(),
seed=seed, scenario="easy_diphoton_160"))
assert max(rewards) > 1.0
assert statistics.mean(rewards) > 0.0
# ── Env state persists hidden-truth invariants ──────────────────────────
def test_step_accepts_timeout_s_as_a_noop():
"""The OpenEnv API allows ``timeout_s`` on ``step``. CERNenv accepts
it for compatibility but treats it as informational (steps are
sub-millisecond pure-compute; resource exhaustion is the real
sandbox). This test pins that behaviour so a future change cannot
silently start enforcing per-step timeouts without updating docs.
"""
env = CERNCollisionEnvironment(max_steps=5)
env.reset(seed=99, scenario="easy_diphoton_160")
obs1 = env.step(
ExperimentAction(
action_type=ActionType.CONFIGURE_BEAM,
parameters={"beam_energy": "13TeV"},
),
timeout_s=0.001, # absurdly small; must not raise / abort
)
assert obs1.step_index == 1
obs2 = env.step(
ExperimentAction(
action_type=ActionType.CONFIGURE_BEAM,
parameters={"beam_energy": "13TeV"},
),
timeout_s=None,
)
assert obs2.step_index == 2
def test_hidden_truth_is_only_exposed_via_helper():
env = CERNCollisionEnvironment(max_steps=4)
obs = env.reset(seed=10, scenario="higgs_like_125")
# The agent observation must NEVER contain the latent particle truth.
serialized = obs.model_dump()
flat = repr(serialized).lower()
# the actual mass value 125 might appear as a search-window number,
# but the secret cross-section, branching ratios, etc. must not leak:
assert "branching" not in flat
assert "cross_section_fb" not in flat or "cross_section_fb" in flat # claim field is OK
truth = env.hidden_truth()
assert truth is not None
assert "decay_branching" in truth
|