Spaces:

Pratyush-01
/

physix

Sleeping

App Files Files Community

physix / tests /test_environment.py

Pratyush-01

Upload folder using huggingface_hub

0e24aff verified 12 days ago

raw

history blame contribute delete

6.79 kB

	"""End-to-end smoke tests for :class:`PhysiXEnvironment`.

	These tests exercise the full pipeline (parse + simulate + score + record)
	without spinning up a FastAPI server. They serve as the first sanity check
	that the parser, simulator, metrics, and reward composer interoperate.
	"""

	from __future__ import annotations

	import pytest

	from physix.models import CONVERGENCE_THRESHOLD, PhysiXAction
	from physix.server.environment import PhysiXEnvironment
	from physix.systems import SystemTier


	# ---------------------------------------------------------------------------
	# Fixtures
	# ---------------------------------------------------------------------------


	@pytest.fixture
	def env() -> PhysiXEnvironment:
	"""Deterministic env restricted to Tier 1 systems for fast tests."""
	return PhysiXEnvironment(seed=42, train_tiers=(SystemTier.TIER_1,))


	# ---------------------------------------------------------------------------
	# Tests
	# ---------------------------------------------------------------------------


	def test_reset_returns_well_formed_observation(env: PhysiXEnvironment) -> None:
	obs = env.reset(system_id="free_fall")

	assert obs.system_id == "free_fall"
	assert obs.turn == 0
	assert obs.turn_remaining > 0
	assert obs.history == []
	assert obs.mismatch_summary == ""
	assert "y" in obs.state_variables and "vy" in obs.state_variables
	assert len(obs.trajectory) == 100
	assert obs.hint # non-empty
	assert obs.done is False


	def test_step_with_ground_truth_rewards_high(env: PhysiXEnvironment) -> None:
	"""The exact ground-truth equation should yield r_match close to 1."""
	env.reset(system_id="free_fall")

	obs = env.step(PhysiXAction(equation="d2y/dt2 = -9.81", params={}))
	breakdown = obs.reward_breakdown

	assert breakdown["format"] == 1.0
	assert breakdown["match"] >= 0.95
	assert obs.reward >= CONVERGENCE_THRESHOLD * 0.5 # weighted total floor


	def test_step_with_unparseable_equation_short_circuits(
	env: PhysiXEnvironment,
	) -> None:
	"""A junk payload should set r_format=0 and other components to 0."""
	env.reset(system_id="free_fall")

	obs = env.step(PhysiXAction(equation="not a real equation"))
	breakdown = obs.reward_breakdown

	assert breakdown["format"] == 0.0
	assert breakdown["match"] == 0.0
	assert breakdown["progress"] == 0.0
	assert breakdown["simplicity"] == 0.0
	assert "Parse error" in obs.mismatch_summary


	def test_episode_terminates_on_convergence(env: PhysiXEnvironment) -> None:
	"""High-quality match should set done=True via the convergence threshold."""
	env.reset(system_id="free_fall")

	obs = env.step(PhysiXAction(equation="d2y/dt2 = -9.81"))

	assert obs.done is True


	def test_history_accumulates_across_turns(env: PhysiXEnvironment) -> None:
	"""Each step should append exactly one history entry."""
	env.reset(system_id="free_fall_drag")

	obs1 = env.step(PhysiXAction(equation="d2y/dt2 = -9.81"))
	assert len(obs1.history) == 1
	assert obs1.history[0]["equation"] == "d2y/dt2 = -9.81"

	if not obs1.done:
	obs2 = env.step(
	PhysiXAction(equation="d2y/dt2 = -9.81 + 0.05 * vy**2"),
	)
	assert len(obs2.history) == 2
	assert obs2.history[1]["equation"] == "d2y/dt2 = -9.81 + 0.05 * vy**2"


	def test_progress_reward_rewards_improvement(env: PhysiXEnvironment) -> None:
	"""A second-turn improvement should yield positive r_progress."""
	env.reset(system_id="free_fall_drag")

	# Turn 1: pure gravity (decent fit but missing drag).
	obs1 = env.step(PhysiXAction(equation="d2y/dt2 = -9.81"))
	if obs1.done:
	pytest.skip("episode converged on turn 1")

	# Turn 2: add drag (closer fit).
	obs2 = env.step(
	PhysiXAction(equation="d2y/dt2 = -9.81 + 0.05 * vy**2"),
	)

	assert obs2.reward_breakdown["match"] >= obs1.reward_breakdown["match"]
	if obs2.reward_breakdown["match"] > obs1.reward_breakdown["total"]:
	assert obs2.reward_breakdown["progress"] > 0.0


	def test_max_turns_terminates_episode() -> None:
	"""When budget is exhausted with no convergence, ``done`` flips true."""
	env = PhysiXEnvironment(seed=0, max_turns=3, train_tiers=(SystemTier.TIER_1,))
	env.reset(system_id="simple_pendulum")

	last_obs = None
	for _ in range(3):
	# Deliberately wrong-but-parseable equation.
	last_obs = env.step(PhysiXAction(equation="d2theta/dt2 = 0"))

	assert last_obs is not None
	assert last_obs.done is True
	assert last_obs.turn_remaining == 0


	def test_state_property_exposes_episode_id(env: PhysiXEnvironment) -> None:
	obs = env.reset(system_id="free_fall")
	assert env.state.episode_id is not None
	assert env.state.episode_id # non-empty string
	assert env.state.system_id == "free_fall"
	assert obs.system_id == env.state.system_id


	@pytest.mark.parametrize(
	"system_id, equation",
	[
	# Pendulum-like system with a sqrt of an Add — historically
	# produced a TypeError ("loop of ufunc does not support argument
	# 0 of type Add which has no callable sqrt method") that escaped
	# the simulator and 500-ed the route.
	("simple_pendulum", "d2theta/dt2 = -sqrt(dtheta2 + theta2) * sin(theta)"),
	# sqrt of a guaranteed-negative quantity → numpy emits NaN.
	("simple_pendulum", "d2theta/dt2 = -sqrt(-theta**2 - 1)"),
	# Division by zero from constant numerics in the RHS.
	("free_fall", "d2y/dt2 = -9.81 / (y - y)"),
	# Pathological growth that overflows odeint.
	("free_fall", "d2y/dt2 = exp(exp(exp(y)))"),
	# log of zero (-inf) propagating through the RHS.
	("free_fall", "d2y/dt2 = log(0 * y)"),
	],
	)
	def test_step_swallows_simulator_failures_as_format_zero_match_zero(
	system_id: str, equation: str
	) -> None:
	"""``step`` must never propagate a TypeError / overflow / NaN out of
	the simulator into the route layer. A model-emitted equation that
	parses but blows up numerically should score ``r_match=0`` cleanly,
	surface a ``Simulation error: ...`` mismatch, and let the episode
	continue. Without the broadened exception catch in
	:func:`simulate_hypothesis`, several of these would 500 the server.
	"""
	env = PhysiXEnvironment(seed=0, train_tiers=(SystemTier.TIER_1,))
	env.reset(system_id=system_id)

	obs = env.step(PhysiXAction(equation=equation))

	assert obs.reward_breakdown["match"] == 0.0
	# The equation parses, so format should be 1; any "format=0" here
	# indicates parse rejection (also acceptable for these inputs).
	assert obs.reward_breakdown["format"] in (0.0, 1.0)
	# Either path must produce a non-empty diagnostic string.
	assert obs.mismatch_summary