File size: 6,347 Bytes
f28409b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
"""Integration tests for ``CERNCollisionEnvironment``.



The point of these tests is not to assert specific reward magnitudes

(those depend on noise) but to confirm:



* ``reset`` / ``step`` / ``state`` follow OpenEnv's gym-style contract,

* the heuristic baseline beats the random baseline on average,

* the oracle baseline (which peeks at the truth) gets a positive

  cumulative reward — i.e. the environment is *winnable*,

* the env terminates when ``max_steps`` is reached or budget runs out.

"""

from __future__ import annotations

import statistics

import pytest

from models import ActionType, ExperimentAction
from scripts.baseline_agents import HeuristicAgent, OracleAgent, RandomAgent
from server.environment import CERNCollisionEnvironment, CernState


def _run_episode(env, agent, *, seed: int, scenario: str | None = None,

                 difficulty: str | None = None) -> float:
    obs = env.reset(seed=seed, scenario=scenario, difficulty=difficulty)
    if agent.name == "oracle":
        agent.truth = env.hidden_truth()
    agent.reset()
    cumulative = 0.0
    while not obs.done:
        action = agent.act(obs)
        obs = env.step(action)
        cumulative += float(obs.reward or 0.0)
    return cumulative


# ── Gym-style contract ──────────────────────────────────────────────────


def test_reset_returns_observation_with_task():
    env = CERNCollisionEnvironment(max_steps=10)
    obs = env.reset(seed=1, scenario="easy_diphoton_160")
    assert obs.task is not None
    assert obs.task.problem_statement
    assert obs.step_index == 0
    assert obs.done is False


def test_state_reflects_episode_progress():
    env = CERNCollisionEnvironment(max_steps=5)
    env.reset(seed=2, scenario="easy_diphoton_160")
    assert isinstance(env.state, CernState)
    assert env.state.scenario_name == "easy_diphoton_160"
    assert env.state.episode_done is False
    assert env.state.cumulative_reward == 0.0


def test_step_advances_step_count_and_history():
    env = CERNCollisionEnvironment(max_steps=5)
    env.reset(seed=3, scenario="easy_diphoton_160")
    obs = env.step(
        ExperimentAction(
            action_type=ActionType.CONFIGURE_BEAM,
            parameters={"beam_energy": "13TeV"},
        )
    )
    assert obs.step_index == 1
    assert len(obs.pipeline_history) == 1


def test_episode_terminates_at_max_steps():
    env = CERNCollisionEnvironment(max_steps=3)
    env.reset(seed=4, scenario="easy_diphoton_160")
    obs = None
    for _ in range(5):
        obs = env.step(ExperimentAction(action_type=ActionType.CONFIGURE_BEAM))
        if obs.done:
            break
    assert obs is not None
    assert obs.done


# ── Baselines: heuristic ≥ random ───────────────────────────────────────


@pytest.mark.parametrize("difficulty", ["easy", "medium"])
def test_heuristic_beats_random_on_average(difficulty):
    """The scripted heuristic agent should outperform a random agent.



    If this fails, either the heuristic is broken or the reward function

    is rewarding nonsense — both serious bugs to catch before training.

    """
    random_rewards = []
    heur_rewards = []
    for seed in range(8):
        env = CERNCollisionEnvironment(max_steps=20)
        random_rewards.append(_run_episode(env, RandomAgent(seed=seed),
                                            seed=seed, difficulty=difficulty))
        env = CERNCollisionEnvironment(max_steps=20)
        heur_rewards.append(_run_episode(env, HeuristicAgent(),
                                          seed=seed, difficulty=difficulty))
    assert statistics.mean(heur_rewards) > statistics.mean(random_rewards)


def test_oracle_can_win_easy_scenario():
    """An oracle that peeks at the truth must be able to earn a strongly

    positive cumulative reward on the easy scenario. If even the oracle

    can't win, the env is unwinnable and RL will stall (FAQ Q15).

    """
    rewards = []
    for seed in range(4):
        env = CERNCollisionEnvironment(max_steps=20)
        rewards.append(_run_episode(env, OracleAgent(),
                                     seed=seed, scenario="easy_diphoton_160"))
    assert max(rewards) > 1.0
    assert statistics.mean(rewards) > 0.0


# ── Env state persists hidden-truth invariants ──────────────────────────


def test_step_accepts_timeout_s_as_a_noop():
    """The OpenEnv API allows ``timeout_s`` on ``step``. CERNenv accepts

    it for compatibility but treats it as informational (steps are

    sub-millisecond pure-compute; resource exhaustion is the real

    sandbox). This test pins that behaviour so a future change cannot

    silently start enforcing per-step timeouts without updating docs.

    """
    env = CERNCollisionEnvironment(max_steps=5)
    env.reset(seed=99, scenario="easy_diphoton_160")
    obs1 = env.step(
        ExperimentAction(
            action_type=ActionType.CONFIGURE_BEAM,
            parameters={"beam_energy": "13TeV"},
        ),
        timeout_s=0.001,  # absurdly small; must not raise / abort
    )
    assert obs1.step_index == 1
    obs2 = env.step(
        ExperimentAction(
            action_type=ActionType.CONFIGURE_BEAM,
            parameters={"beam_energy": "13TeV"},
        ),
        timeout_s=None,
    )
    assert obs2.step_index == 2


def test_hidden_truth_is_only_exposed_via_helper():
    env = CERNCollisionEnvironment(max_steps=4)
    obs = env.reset(seed=10, scenario="higgs_like_125")
    # The agent observation must NEVER contain the latent particle truth.
    serialized = obs.model_dump()
    flat = repr(serialized).lower()
    # the actual mass value 125 might appear as a search-window number,
    # but the secret cross-section, branching ratios, etc. must not leak:
    assert "branching" not in flat
    assert "cross_section_fb" not in flat or "cross_section_fb" in flat  # claim field is OK

    truth = env.hidden_truth()
    assert truth is not None
    assert "decay_branching" in truth