Spaces:
Sleeping
Sleeping
File size: 6,790 Bytes
0e24aff | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 | """End-to-end smoke tests for :class:`PhysiXEnvironment`.
These tests exercise the full pipeline (parse + simulate + score + record)
without spinning up a FastAPI server. They serve as the first sanity check
that the parser, simulator, metrics, and reward composer interoperate.
"""
from __future__ import annotations
import pytest
from physix.models import CONVERGENCE_THRESHOLD, PhysiXAction
from physix.server.environment import PhysiXEnvironment
from physix.systems import SystemTier
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def env() -> PhysiXEnvironment:
"""Deterministic env restricted to Tier 1 systems for fast tests."""
return PhysiXEnvironment(seed=42, train_tiers=(SystemTier.TIER_1,))
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
def test_reset_returns_well_formed_observation(env: PhysiXEnvironment) -> None:
obs = env.reset(system_id="free_fall")
assert obs.system_id == "free_fall"
assert obs.turn == 0
assert obs.turn_remaining > 0
assert obs.history == []
assert obs.mismatch_summary == ""
assert "y" in obs.state_variables and "vy" in obs.state_variables
assert len(obs.trajectory) == 100
assert obs.hint # non-empty
assert obs.done is False
def test_step_with_ground_truth_rewards_high(env: PhysiXEnvironment) -> None:
"""The exact ground-truth equation should yield r_match close to 1."""
env.reset(system_id="free_fall")
obs = env.step(PhysiXAction(equation="d2y/dt2 = -9.81", params={}))
breakdown = obs.reward_breakdown
assert breakdown["format"] == 1.0
assert breakdown["match"] >= 0.95
assert obs.reward >= CONVERGENCE_THRESHOLD * 0.5 # weighted total floor
def test_step_with_unparseable_equation_short_circuits(
env: PhysiXEnvironment,
) -> None:
"""A junk payload should set r_format=0 and other components to 0."""
env.reset(system_id="free_fall")
obs = env.step(PhysiXAction(equation="not a real equation"))
breakdown = obs.reward_breakdown
assert breakdown["format"] == 0.0
assert breakdown["match"] == 0.0
assert breakdown["progress"] == 0.0
assert breakdown["simplicity"] == 0.0
assert "Parse error" in obs.mismatch_summary
def test_episode_terminates_on_convergence(env: PhysiXEnvironment) -> None:
"""High-quality match should set done=True via the convergence threshold."""
env.reset(system_id="free_fall")
obs = env.step(PhysiXAction(equation="d2y/dt2 = -9.81"))
assert obs.done is True
def test_history_accumulates_across_turns(env: PhysiXEnvironment) -> None:
"""Each step should append exactly one history entry."""
env.reset(system_id="free_fall_drag")
obs1 = env.step(PhysiXAction(equation="d2y/dt2 = -9.81"))
assert len(obs1.history) == 1
assert obs1.history[0]["equation"] == "d2y/dt2 = -9.81"
if not obs1.done:
obs2 = env.step(
PhysiXAction(equation="d2y/dt2 = -9.81 + 0.05 * vy**2"),
)
assert len(obs2.history) == 2
assert obs2.history[1]["equation"] == "d2y/dt2 = -9.81 + 0.05 * vy**2"
def test_progress_reward_rewards_improvement(env: PhysiXEnvironment) -> None:
"""A second-turn improvement should yield positive r_progress."""
env.reset(system_id="free_fall_drag")
# Turn 1: pure gravity (decent fit but missing drag).
obs1 = env.step(PhysiXAction(equation="d2y/dt2 = -9.81"))
if obs1.done:
pytest.skip("episode converged on turn 1")
# Turn 2: add drag (closer fit).
obs2 = env.step(
PhysiXAction(equation="d2y/dt2 = -9.81 + 0.05 * vy**2"),
)
assert obs2.reward_breakdown["match"] >= obs1.reward_breakdown["match"]
if obs2.reward_breakdown["match"] > obs1.reward_breakdown["total"]:
assert obs2.reward_breakdown["progress"] > 0.0
def test_max_turns_terminates_episode() -> None:
"""When budget is exhausted with no convergence, ``done`` flips true."""
env = PhysiXEnvironment(seed=0, max_turns=3, train_tiers=(SystemTier.TIER_1,))
env.reset(system_id="simple_pendulum")
last_obs = None
for _ in range(3):
# Deliberately wrong-but-parseable equation.
last_obs = env.step(PhysiXAction(equation="d2theta/dt2 = 0"))
assert last_obs is not None
assert last_obs.done is True
assert last_obs.turn_remaining == 0
def test_state_property_exposes_episode_id(env: PhysiXEnvironment) -> None:
obs = env.reset(system_id="free_fall")
assert env.state.episode_id is not None
assert env.state.episode_id # non-empty string
assert env.state.system_id == "free_fall"
assert obs.system_id == env.state.system_id
@pytest.mark.parametrize(
"system_id, equation",
[
# Pendulum-like system with a sqrt of an Add — historically
# produced a TypeError ("loop of ufunc does not support argument
# 0 of type Add which has no callable sqrt method") that escaped
# the simulator and 500-ed the route.
("simple_pendulum", "d2theta/dt2 = -sqrt(dtheta**2 + theta**2) * sin(theta)"),
# sqrt of a guaranteed-negative quantity → numpy emits NaN.
("simple_pendulum", "d2theta/dt2 = -sqrt(-theta**2 - 1)"),
# Division by zero from constant numerics in the RHS.
("free_fall", "d2y/dt2 = -9.81 / (y - y)"),
# Pathological growth that overflows odeint.
("free_fall", "d2y/dt2 = exp(exp(exp(y)))"),
# log of zero (-inf) propagating through the RHS.
("free_fall", "d2y/dt2 = log(0 * y)"),
],
)
def test_step_swallows_simulator_failures_as_format_zero_match_zero(
system_id: str, equation: str
) -> None:
"""``step`` must never propagate a TypeError / overflow / NaN out of
the simulator into the route layer. A model-emitted equation that
parses but blows up numerically should score ``r_match=0`` cleanly,
surface a ``Simulation error: ...`` mismatch, and let the episode
continue. Without the broadened exception catch in
:func:`simulate_hypothesis`, several of these would 500 the server.
"""
env = PhysiXEnvironment(seed=0, train_tiers=(SystemTier.TIER_1,))
env.reset(system_id=system_id)
obs = env.step(PhysiXAction(equation=equation))
assert obs.reward_breakdown["match"] == 0.0
# The equation parses, so format should be 1; any "format=0" here
# indicates parse rejection (also acceptable for these inputs).
assert obs.reward_breakdown["format"] in (0.0, 1.0)
# Either path must produce a non-empty diagnostic string.
assert obs.mismatch_summary
|