"""OpenEnv 0.2.x protocol conformance tests for the OpenSleuth env. These tests are *additive* and orthogonal to the existing legacy contract covered in ``test_env.py`` / ``test_open_env.py``. What we verify: * The OpenEnv ``Environment`` adapter (:class:`OpenSleuthEnvironment`) implements all four required methods (``reset`` / ``step`` / ``state`` / ``get_metadata``) and returns instances of OpenEnv's ``Observation`` / ``State`` / ``EnvironmentMetadata`` base classes (so it would pass any ``isinstance`` check by an OpenEnv-aware harness). * The ``/openenv/*`` HTTP sub-app exposes every endpoint OpenEnv 0.2.x promises: ``/health``, ``/metadata``, ``/schema``, ``/state``, ``/reset``, ``/step``. (The ``/ws`` WebSocket is exercised separately via the ``smoke_openenv_client.py`` script run against the live Space.) * ``/openenv/reset`` returns the canonical ``{"observation", "reward", "done"}`` envelope (NOT a bare observation, which is the legacy shape). * ``/openenv/step`` accepts the canonical ``{"action": {...}}`` envelope (NOT ``{"episode_id", "action"}``, which is the legacy shape). * The legacy bare ``/reset`` and ``/step`` routes the trainer uses are untouched. """ from __future__ import annotations import pytest pytest.importorskip( "openenv.core.env_server.types", reason="openenv-core not installed; conformance tests skipped.", ) from fastapi.testclient import TestClient from openenv.core.env_server.types import ( EnvironmentMetadata, Observation as OEObservation, State as OEState, ) from opensleuth_env.openenv_adapter import ( OpenSleuthAction, OpenSleuthEnvironment, OpenSleuthObservation, OpenSleuthState, ) # --------------------------------------------------------------------------- # Adapter-level: exercises the Environment subclass directly (no HTTP). # --------------------------------------------------------------------------- class TestEnvironmentSubclass: def test_observation_inherits_openenv_base(self) -> None: env = OpenSleuthEnvironment() obs = env.reset() assert isinstance(obs, OEObservation), ( "OpenSleuthObservation must subclass openenv.core...types.Observation " "so OpenEnv tooling (rubrics, evals, web UI) can introspect it." ) # Must expose the OpenEnv-required fields. assert obs.done is False assert obs.reward is None assert isinstance(obs.metadata, dict) def test_state_inherits_openenv_base(self) -> None: env = OpenSleuthEnvironment() env.reset() state = env.state assert isinstance(state, OEState) assert state.episode_id is not None assert state.step_count == 0 def test_metadata_is_openenv_environment_metadata(self) -> None: env = OpenSleuthEnvironment() meta = env.get_metadata() assert isinstance(meta, EnvironmentMetadata) assert meta.name == "OpenSleuth" assert meta.description assert meta.version def test_reset_step_full_loop(self) -> None: env = OpenSleuthEnvironment() env.reset(target_name="fibonacci", max_steps=10, seed=0) probe = env.step( OpenSleuthAction(action_type="probe", input_repr="10") ) assert probe.done is False assert probe.reward is not None and probe.reward > 0 assert probe.probe_history[-1]["output_repr"] == "55" assert env.state.step_count == 1 submit = env.step( OpenSleuthAction( action_type="submit", code="def fibonacci(n):\n a,b=0,1\n for _ in range(n-1):\n a,b=b,a+b\n return b\n", ) ) assert submit.done is True assert submit.reward is not None assert env.state.finished is True def test_reset_with_no_args_uses_safe_default(self) -> None: """OpenEnv requires reset() to work with zero arguments. We use 'fibonacci' as the implicit default so a bare reset always produces a valid episode.""" env = OpenSleuthEnvironment() obs = env.reset() assert obs.target_function_name == "fibonacci" def test_supports_concurrent_sessions_flag(self) -> None: """OpenEnv's HTTPEnvServer refuses max_concurrent_envs > 1 unless the env opts in via SUPPORTS_CONCURRENT_SESSIONS.""" assert OpenSleuthEnvironment.SUPPORTS_CONCURRENT_SESSIONS is True def test_action_is_extra_forbid(self) -> None: """OpenEnv Action base sets extra='forbid' to catch typo'd fields early. Our OpenSleuthAction must inherit that behavior.""" from pydantic import ValidationError with pytest.raises(ValidationError): OpenSleuthAction(action_type="probe", input_repr="1", made_up_field=1) # --------------------------------------------------------------------------- # HTTP-level: verifies the /openenv/* sub-app routes that judges will hit. # --------------------------------------------------------------------------- @pytest.fixture(scope="module") def http_client() -> TestClient: from server import app with TestClient(app) as client: yield client class TestOpenEnvHttpSurface: """The endpoints the OpenEnv spec / `openenv validate` look for.""" def test_health(self, http_client: TestClient) -> None: r = http_client.get("/openenv/health") assert r.status_code == 200, r.text assert r.json() == {"status": "healthy"} def test_metadata(self, http_client: TestClient) -> None: r = http_client.get("/openenv/metadata") assert r.status_code == 200, r.text body = r.json() for key in ("name", "description", "version"): assert key in body, f"missing {key} in /openenv/metadata" assert body["name"] == "OpenSleuth" def test_schema(self, http_client: TestClient) -> None: r = http_client.get("/openenv/schema") assert r.status_code == 200, r.text body = r.json() for key in ("action", "observation", "state"): assert key in body, f"missing {key} in /openenv/schema" assert "properties" in body[key], ( f"/openenv/schema {key!r} is not a valid JSON schema" ) # action discriminator should be visible in the schema assert "action_type" in body["action"]["properties"] def test_state(self, http_client: TestClient) -> None: r = http_client.get("/openenv/state") assert r.status_code == 200, r.text body = r.json() assert "episode_id" in body assert "step_count" in body def test_reset_returns_canonical_envelope(self, http_client: TestClient) -> None: r = http_client.post("/openenv/reset", json={"target_name": "fibonacci"}) assert r.status_code == 200, r.text body = r.json() # Canonical OpenEnv shape: {"observation": {...}, "reward": ..., "done": ...} assert set(body.keys()) == {"observation", "reward", "done"}, ( f"Expected OpenEnv envelope, got keys: {sorted(body)}" ) assert body["done"] is False assert body["observation"]["target_function_name"] == "fibonacci" def test_reset_with_no_body_works(self, http_client: TestClient) -> None: """OpenEnv ResetRequest defaults to an empty body. Must still work.""" r = http_client.post("/openenv/reset") assert r.status_code == 200, r.text body = r.json() assert "observation" in body def test_step_canonical_envelope_with_probe(self, http_client: TestClient) -> None: r = http_client.post( "/openenv/step", json={"action": {"action_type": "probe", "input_repr": "10"}}, ) assert r.status_code == 200, r.text body = r.json() assert set(body.keys()) == {"observation", "reward", "done"} # Note: under HTTP (stateless), each /openenv/step gets a fresh env; # we auto-reset so a probe still produces a valid history. assert body["observation"]["probe_history"], "probe should produce history" def test_step_rejects_unknown_action_field(self, http_client: TestClient) -> None: r = http_client.post( "/openenv/step", json={"action": {"action_type": "probe", "input_repr": "1", "wat": True}}, ) # OpenEnv's deserialize_action raises ValidationError -> 422. assert r.status_code == 422 # --------------------------------------------------------------------------- # Regression: the legacy trainer-facing routes must still work unchanged. # --------------------------------------------------------------------------- class TestLegacyContractPreserved: def test_legacy_health(self, http_client: TestClient) -> None: r = http_client.get("/health") assert r.status_code == 200 assert r.json()["status"] == "ok" def test_legacy_reset_returns_bare_observation(self, http_client: TestClient) -> None: """Trainer expects {episode_id, target_function_name, ...} at the top level (NOT wrapped in {observation: ...}). Must NOT regress.""" r = http_client.post( "/reset", json={"target_name": "fibonacci", "seed": 0, "max_steps": 5}, ) assert r.status_code == 200, r.text body = r.json() assert "episode_id" in body, ( "Legacy /reset must return a bare observation, not the OpenEnv envelope. " "If this fails the trainer will break." ) assert "observation" not in body # don't accidentally double-wrap def test_legacy_step_returns_step_response(self, http_client: TestClient) -> None: reset = http_client.post( "/reset", json={"target_name": "fibonacci", "seed": 0, "max_steps": 5}, ).json() eid = reset["episode_id"] r = http_client.post( "/step", json={ "episode_id": eid, "action": {"action_type": "probe", "input_repr": "5"}, }, ) assert r.status_code == 200, r.text body = r.json() # Legacy shape: {observation, reward, done, info} assert {"observation", "reward", "done", "info"} <= set(body.keys()) assert "execution_reward" not in body # only present on submit info