Spaces:
Sleeping
Sleeping
| """Tests for :mod:`physix.verifier.parser`. | |
| The parser is the contract between LLM output and the simulator. Most | |
| tests here exist because of a real failure mode caught during live | |
| episodes — concretely, the velocity-alias rules and unknown-symbol | |
| hints close grammar gaps that were silently scoring competent agent | |
| outputs as ``r_format=0``. | |
| """ | |
| from __future__ import annotations | |
| import pytest | |
| import sympy as sp | |
| from physix.verifier.parser import ( | |
| GRAMMAR_HINT, | |
| ParseError, | |
| parse_equation, | |
| ) | |
| def _parse( | |
| text: str, | |
| state_variables: tuple[str, ...] = ("y", "vy"), | |
| parameter_names: frozenset[str] = frozenset(), | |
| ): | |
| return parse_equation(text, state_variables, parameter_names) | |
| def test_basic_equation_round_trips(): | |
| parsed = _parse("d2y/dt2 = -9.81") | |
| assert len(parsed.equations) == 1 | |
| eq = parsed.equations[0] | |
| assert eq.var == "y" | |
| assert eq.order == 2 | |
| assert sp.simplify(eq.rhs - sp.Float(-9.81)) == 0 | |
| def test_dx_dt_alias_substitutes_for_vx_when_velocity_state_exists(): | |
| """``dx/dt`` is a valid synonym for ``vx`` in damped-spring style systems. | |
| Regression: 7B produced the textbook-correct equation | |
| ``d2x/dt2 = -k/m * x - c/m * dx/dt`` on turn 1 and we silently | |
| rejected it because ``dx`` and ``dt`` were not whitelisted. | |
| """ | |
| parsed = parse_equation( | |
| "d2x/dt2 = -k/m * x - c/m * dx/dt", | |
| state_variables=("x", "vx"), | |
| parameter_names=frozenset({"k", "c", "m"}), | |
| ) | |
| eq = parsed.equations[0] | |
| assert "vx" in {s.name for s in eq.rhs.free_symbols} | |
| assert "dx" not in {s.name for s in eq.rhs.free_symbols} | |
| assert "dt" not in {s.name for s in eq.rhs.free_symbols} | |
| def test_bare_dx_alias_substitutes_for_vx(): | |
| """A bare ``dx`` (without ``/dt``) is also accepted as the velocity.""" | |
| parsed = parse_equation( | |
| "d2x/dt2 = -k*x - c*dx", | |
| state_variables=("x", "vx"), | |
| parameter_names=frozenset({"k", "c"}), | |
| ) | |
| eq = parsed.equations[0] | |
| free = {s.name for s in eq.rhs.free_symbols} | |
| assert "vx" in free | |
| assert "dx" not in free | |
| def test_dy_dt_alias_substitutes_for_vy(): | |
| """The same alias rule applies to any ``<var>``/``v<var>`` pairing.""" | |
| parsed = parse_equation( | |
| "d2y/dt2 = -9.81 - 0.1 * dy/dt", | |
| state_variables=("y", "vy"), | |
| parameter_names=frozenset(), | |
| ) | |
| eq = parsed.equations[0] | |
| free = {s.name for s in eq.rhs.free_symbols} | |
| assert free == {"vy"} | |
| def test_alias_does_not_fire_when_velocity_state_is_named_dvar(): | |
| """For systems like damped pendulum where the state itself is ``dtheta``, | |
| we must *not* substitute ``dtheta`` away — it is the canonical state name. | |
| """ | |
| parsed = parse_equation( | |
| "d2theta/dt2 = -9.81 * sin(theta) - b * dtheta", | |
| state_variables=("theta", "dtheta"), | |
| parameter_names=frozenset({"b"}), | |
| ) | |
| eq = parsed.equations[0] | |
| free = {s.name for s in eq.rhs.free_symbols} | |
| assert "dtheta" in free | |
| assert "vtheta" not in free | |
| def test_alias_only_replaces_word_boundary_matches(): | |
| """``dx`` substring inside a longer identifier must be left alone. | |
| Param names like ``mu_dx`` or ``kdx`` should not be silently | |
| rewritten to ``mu_vx``/``kvx``. | |
| """ | |
| parsed = parse_equation( | |
| "d2x/dt2 = -k * x + mu_dx * vx", | |
| state_variables=("x", "vx"), | |
| parameter_names=frozenset({"k", "mu_dx"}), | |
| ) | |
| eq = parsed.equations[0] | |
| free = {s.name for s in eq.rhs.free_symbols} | |
| assert "mu_dx" in free | |
| def test_unknown_dx_in_system_without_paired_velocity_includes_hint(): | |
| """Without a ``vx`` state, ``dx`` cannot be aliased and must reject — | |
| but the error should suggest the actual state name ``x``.""" | |
| with pytest.raises(ParseError) as excinfo: | |
| parse_equation( | |
| "dx/dt = -k * dx", | |
| state_variables=("x",), | |
| parameter_names=frozenset({"k"}), | |
| ) | |
| assert "dx" in str(excinfo.value) | |
| assert "no separate velocity name" in str(excinfo.value) | |
| def test_unknown_t_emits_autonomy_hint(): | |
| """``t`` is the most common forbidden symbol; the error must explain | |
| why so the model stops re-emitting time-explicit RHSs across turns.""" | |
| with pytest.raises(ParseError) as excinfo: | |
| parse_equation( | |
| "d2theta/dt2 = -k * theta + c * t", | |
| state_variables=("theta", "dtheta"), | |
| parameter_names=frozenset({"k", "c"}), | |
| ) | |
| msg = str(excinfo.value) | |
| assert "'t'" in msg | |
| assert "autonomous" in msg | |
| def test_grammar_hint_documents_velocity_convention(): | |
| """The system prompt embeds GRAMMAR_HINT verbatim. Whoever opens | |
| this file looking for *why* dx/dt is now legal will find the | |
| explanation; whoever weakens the convention by accident will | |
| trip this test. | |
| """ | |
| assert "vx" in GRAMMAR_HINT | |
| assert "dx/dt" in GRAMMAR_HINT | |
| assert "autonomous" in GRAMMAR_HINT | |
| def test_multiple_equations_split_on_semicolons_keep_alias_behaviour(): | |
| """The alias rule must apply per-equation when payloads are stacked.""" | |
| parsed = parse_equation( | |
| "dx/dt = vx; d2x/dt2 = -k * x - c * dx/dt", | |
| state_variables=("x", "vx"), | |
| parameter_names=frozenset({"k", "c"}), | |
| ) | |
| assert len(parsed.equations) == 2 | |
| second_rhs = parsed.equations[1].rhs | |
| free = {s.name for s in second_rhs.free_symbols} | |
| assert "vx" in free | |
| assert "dx" not in free | |
| def test_dotted_attribute_access_is_rejected_with_clear_error(): | |
| """Regression test for the GRPO crash on completion:: | |
| d2theta/dt2 = ... * np.sqrt(L**2 - theta**2) / L | |
| Pre-v2 the parser used ``sympy.sympify`` which turned ``np`` into | |
| ``Symbol('np')`` and then evaluated ``.sqrt(...)`` on it during | |
| ``eval``, raising ``AttributeError: 'Symbol' object has no | |
| attribute 'sqrt'`` *inside* sympy and tearing down the entire RL | |
| step. v2 parses with ``ast.parse`` and a whitelist visitor that | |
| rejects ``ast.Attribute`` (and call-with-attribute func) | |
| structurally — there is no longer an "eval" stage that can crash. | |
| """ | |
| with pytest.raises(ParseError) as excinfo: | |
| parse_equation( | |
| "d2theta/dt2 = -theta + np.sqrt(L**2 - theta**2) / L", | |
| state_variables=("theta", "dtheta"), | |
| parameter_names=frozenset({"L"}), | |
| ) | |
| msg = str(excinfo.value) | |
| assert "Attribute access is not allowed" in msg | |
| # Hint always nudges toward the bare-function form. | |
| assert "sqrt(x)" in msg or "sqrt(" in msg | |
| def test_dotted_attribute_access_variants_all_rejected(rhs): | |
| """Defence in depth: every common ``library.fn`` shape — both the | |
| 'attribute as function' (``np.sqrt(x)``) and 'attribute as value' | |
| (``theta.something``) shapes — must reject with the same | |
| user-facing wording.""" | |
| with pytest.raises(ParseError) as excinfo: | |
| parse_equation( | |
| f"d2theta/dt2 = {rhs}", | |
| state_variables=("theta", "dtheta"), | |
| parameter_names=frozenset(), | |
| ) | |
| assert "Attribute access is not allowed" in str(excinfo.value) | |
| def test_decimal_literals_are_not_misread_as_attribute_access(): | |
| """Numbers like ``1.05`` must parse as constants — they were a real | |
| coefficient in the failing pendulum equation and ``ast.parse`` | |
| correctly tokenises them as ``ast.Constant(1.05)``, not as | |
| Attribute access. | |
| """ | |
| parsed = parse_equation( | |
| "d2theta/dt2 = -1.05 * theta", | |
| state_variables=("theta", "dtheta"), | |
| parameter_names=frozenset(), | |
| ) | |
| assert len(parsed.equations) == 1 | |
| def test_keyword_arguments_in_call_are_rejected_with_specific_hint(): | |
| """Pre-v2, ``sin(theta=0.1)`` reached sympy's eval and raised | |
| ``TypeError`` from inside ``parse_expr``. v2's call validator | |
| catches it at the AST level and gives the model a one-line fix. | |
| """ | |
| with pytest.raises(ParseError, match="Keyword arguments"): | |
| parse_equation( | |
| "d2y/dt2 = sin(theta=0.1)", | |
| state_variables=("y", "vy"), | |
| parameter_names=frozenset(), | |
| ) | |
| def test_disallowed_constructs_each_have_targeted_error(rhs, expected_keyword): | |
| """The whitelist visitor must reject each non-arithmetic shape with | |
| a hint that names the construct — the error string is what the | |
| LLM sees on the next turn, so vague messages waste turns. This | |
| locks in coverage so adding a new shape requires adding a test | |
| *and* a branch. | |
| """ | |
| with pytest.raises(ParseError) as excinfo: | |
| parse_equation( | |
| f"d2y/dt2 = {rhs}", | |
| state_variables=("y", "vy"), | |
| parameter_names=frozenset(), | |
| ) | |
| assert expected_keyword in str(excinfo.value) | |
| def test_caret_is_accepted_as_power_synonym(): | |
| """Physics notation universally writes ``x^2`` for the square. v2 | |
| rewrites ``^`` → ``**`` before AST parse so the agent doesn't have | |
| to remember Python's XOR/power split. (Pre-v2 the grammar hint | |
| actively *disallowed* ``^`` — the model frequently emitted it | |
| anyway and got format=0 for purely cosmetic reasons.) | |
| """ | |
| parsed = parse_equation( | |
| "d2y/dt2 = -k * y^2", | |
| state_variables=("y", "vy"), | |
| parameter_names=frozenset({"k"}), | |
| ) | |
| eq = parsed.equations[0] | |
| # k * y**2 = k * y * y → operator count of 2 (Mul + Pow + Pow's UnaryMinus | |
| # outer Mul). What we really care about: the rhs equals the explicit form. | |
| expected = parse_equation( | |
| "d2y/dt2 = -k * y**2", | |
| state_variables=("y", "vy"), | |
| parameter_names=frozenset({"k"}), | |
| ).equations[0] | |
| assert sp.simplify(eq.rhs - expected.rhs) == 0 | |
| def test_only_parse_error_ever_escapes_the_parser(): | |
| """Contract test: whatever the agent writes, the *only* exception | |
| type that ever leaves this module is :class:`ParseError`. The | |
| scorer relies on this to convert grammar failures into | |
| ``r_format = 0`` instead of crashing the entire GRPO group (which | |
| is what zeroed loss + zeroed reward in the v1 training run). | |
| We sweep a grab-bag of historically problematic shapes and | |
| confirm every one becomes ``ParseError``, never bare | |
| ``AttributeError`` / ``TypeError`` / ``SyntaxError``. | |
| """ | |
| pathological = [ | |
| "np.sqrt(y)", | |
| "sin(theta=0.1)", | |
| "(lambda x: x)(y)", | |
| "y if vy else -y", | |
| "y == vy", | |
| "y[0]", | |
| "y; import os", # legal Python, illegal here — first arm parses, second is rejected by split-then-validate | |
| "1.05.foo", # decimal followed by attribute | |
| "y ** vy ** y ** vy ** y", # legal but deep; just confirms no crash | |
| ] | |
| for raw in pathological: | |
| try: | |
| parse_equation( | |
| f"d2y/dt2 = {raw}", | |
| state_variables=("y", "vy"), | |
| parameter_names=frozenset(), | |
| ) | |
| except ParseError: | |
| continue # expected for most | |
| except BaseException as exc: # noqa: BLE001 — that's the whole point | |
| raise AssertionError( | |
| f"Non-ParseError escaped parser for input {raw!r}: " | |
| f"{type(exc).__name__}: {exc}" | |
| ) from exc | |