Spaces:
Sleeping
Sleeping
| """End-to-end smoke tests for :class:`PhysiXEnvironment`. | |
| These tests exercise the full pipeline (parse + simulate + score + record) | |
| without spinning up a FastAPI server. They serve as the first sanity check | |
| that the parser, simulator, metrics, and reward composer interoperate. | |
| """ | |
| from __future__ import annotations | |
| import pytest | |
| from physix.models import CONVERGENCE_THRESHOLD, PhysiXAction | |
| from physix.server.environment import PhysiXEnvironment | |
| from physix.systems import SystemTier | |
| # --------------------------------------------------------------------------- | |
| # Fixtures | |
| # --------------------------------------------------------------------------- | |
| def env() -> PhysiXEnvironment: | |
| """Deterministic env restricted to Tier 1 systems for fast tests.""" | |
| return PhysiXEnvironment(seed=42, train_tiers=(SystemTier.TIER_1,)) | |
| # --------------------------------------------------------------------------- | |
| # Tests | |
| # --------------------------------------------------------------------------- | |
| def test_reset_returns_well_formed_observation(env: PhysiXEnvironment) -> None: | |
| obs = env.reset(system_id="free_fall") | |
| assert obs.system_id == "free_fall" | |
| assert obs.turn == 0 | |
| assert obs.turn_remaining > 0 | |
| assert obs.history == [] | |
| assert obs.mismatch_summary == "" | |
| assert "y" in obs.state_variables and "vy" in obs.state_variables | |
| assert len(obs.trajectory) == 100 | |
| assert obs.hint # non-empty | |
| assert obs.done is False | |
| def test_step_with_ground_truth_rewards_high(env: PhysiXEnvironment) -> None: | |
| """The exact ground-truth equation should yield r_match close to 1.""" | |
| env.reset(system_id="free_fall") | |
| obs = env.step(PhysiXAction(equation="d2y/dt2 = -9.81", params={})) | |
| breakdown = obs.reward_breakdown | |
| assert breakdown["format"] == 1.0 | |
| assert breakdown["match"] >= 0.95 | |
| assert obs.reward >= CONVERGENCE_THRESHOLD * 0.5 # weighted total floor | |
| def test_step_with_unparseable_equation_short_circuits( | |
| env: PhysiXEnvironment, | |
| ) -> None: | |
| """A junk payload should set r_format=0 and other components to 0.""" | |
| env.reset(system_id="free_fall") | |
| obs = env.step(PhysiXAction(equation="not a real equation")) | |
| breakdown = obs.reward_breakdown | |
| assert breakdown["format"] == 0.0 | |
| assert breakdown["match"] == 0.0 | |
| assert breakdown["progress"] == 0.0 | |
| assert breakdown["simplicity"] == 0.0 | |
| assert "Parse error" in obs.mismatch_summary | |
| def test_episode_terminates_on_convergence(env: PhysiXEnvironment) -> None: | |
| """High-quality match should set done=True via the convergence threshold.""" | |
| env.reset(system_id="free_fall") | |
| obs = env.step(PhysiXAction(equation="d2y/dt2 = -9.81")) | |
| assert obs.done is True | |
| def test_history_accumulates_across_turns(env: PhysiXEnvironment) -> None: | |
| """Each step should append exactly one history entry.""" | |
| env.reset(system_id="free_fall_drag") | |
| obs1 = env.step(PhysiXAction(equation="d2y/dt2 = -9.81")) | |
| assert len(obs1.history) == 1 | |
| assert obs1.history[0]["equation"] == "d2y/dt2 = -9.81" | |
| if not obs1.done: | |
| obs2 = env.step( | |
| PhysiXAction(equation="d2y/dt2 = -9.81 + 0.05 * vy**2"), | |
| ) | |
| assert len(obs2.history) == 2 | |
| assert obs2.history[1]["equation"] == "d2y/dt2 = -9.81 + 0.05 * vy**2" | |
| def test_progress_reward_rewards_improvement(env: PhysiXEnvironment) -> None: | |
| """A second-turn improvement should yield positive r_progress.""" | |
| env.reset(system_id="free_fall_drag") | |
| # Turn 1: pure gravity (decent fit but missing drag). | |
| obs1 = env.step(PhysiXAction(equation="d2y/dt2 = -9.81")) | |
| if obs1.done: | |
| pytest.skip("episode converged on turn 1") | |
| # Turn 2: add drag (closer fit). | |
| obs2 = env.step( | |
| PhysiXAction(equation="d2y/dt2 = -9.81 + 0.05 * vy**2"), | |
| ) | |
| assert obs2.reward_breakdown["match"] >= obs1.reward_breakdown["match"] | |
| if obs2.reward_breakdown["match"] > obs1.reward_breakdown["total"]: | |
| assert obs2.reward_breakdown["progress"] > 0.0 | |
| def test_max_turns_terminates_episode() -> None: | |
| """When budget is exhausted with no convergence, ``done`` flips true.""" | |
| env = PhysiXEnvironment(seed=0, max_turns=3, train_tiers=(SystemTier.TIER_1,)) | |
| env.reset(system_id="simple_pendulum") | |
| last_obs = None | |
| for _ in range(3): | |
| # Deliberately wrong-but-parseable equation. | |
| last_obs = env.step(PhysiXAction(equation="d2theta/dt2 = 0")) | |
| assert last_obs is not None | |
| assert last_obs.done is True | |
| assert last_obs.turn_remaining == 0 | |
| def test_state_property_exposes_episode_id(env: PhysiXEnvironment) -> None: | |
| obs = env.reset(system_id="free_fall") | |
| assert env.state.episode_id is not None | |
| assert env.state.episode_id # non-empty string | |
| assert env.state.system_id == "free_fall" | |
| assert obs.system_id == env.state.system_id | |
| def test_step_swallows_simulator_failures_as_format_zero_match_zero( | |
| system_id: str, equation: str | |
| ) -> None: | |
| """``step`` must never propagate a TypeError / overflow / NaN out of | |
| the simulator into the route layer. A model-emitted equation that | |
| parses but blows up numerically should score ``r_match=0`` cleanly, | |
| surface a ``Simulation error: ...`` mismatch, and let the episode | |
| continue. Without the broadened exception catch in | |
| :func:`simulate_hypothesis`, several of these would 500 the server. | |
| """ | |
| env = PhysiXEnvironment(seed=0, train_tiers=(SystemTier.TIER_1,)) | |
| env.reset(system_id=system_id) | |
| obs = env.step(PhysiXAction(equation=equation)) | |
| assert obs.reward_breakdown["match"] == 0.0 | |
| # The equation parses, so format should be 1; any "format=0" here | |
| # indicates parse rejection (also acceptable for these inputs). | |
| assert obs.reward_breakdown["format"] in (0.0, 1.0) | |
| # Either path must produce a non-empty diagnostic string. | |
| assert obs.mismatch_summary | |