| """OpenEnv 0.2.x protocol conformance tests for the OpenSleuth env. |
| |
| These tests are *additive* and orthogonal to the existing legacy contract |
| covered in ``test_env.py`` / ``test_open_env.py``. |
| |
| What we verify: |
| |
| * The OpenEnv ``Environment`` adapter (:class:`OpenSleuthEnvironment`) implements |
| all four required methods (``reset`` / ``step`` / ``state`` / ``get_metadata``) |
| and returns instances of OpenEnv's ``Observation`` / ``State`` / |
| ``EnvironmentMetadata`` base classes (so it would pass any ``isinstance`` |
| check by an OpenEnv-aware harness). |
| * The ``/openenv/*`` HTTP sub-app exposes every endpoint OpenEnv 0.2.x |
| promises: ``/health``, ``/metadata``, ``/schema``, ``/state``, ``/reset``, |
| ``/step``. (The ``/ws`` WebSocket is exercised separately via the |
| ``smoke_openenv_client.py`` script run against the live Space.) |
| * ``/openenv/reset`` returns the canonical ``{"observation", "reward", "done"}`` |
| envelope (NOT a bare observation, which is the legacy shape). |
| * ``/openenv/step`` accepts the canonical ``{"action": {...}}`` envelope (NOT |
| ``{"episode_id", "action"}``, which is the legacy shape). |
| * The legacy bare ``/reset`` and ``/step`` routes the trainer uses are |
| untouched. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import pytest |
|
|
| pytest.importorskip( |
| "openenv.core.env_server.types", |
| reason="openenv-core not installed; conformance tests skipped.", |
| ) |
|
|
| from fastapi.testclient import TestClient |
| from openenv.core.env_server.types import ( |
| EnvironmentMetadata, |
| Observation as OEObservation, |
| State as OEState, |
| ) |
|
|
| from opensleuth_env.openenv_adapter import ( |
| OpenSleuthAction, |
| OpenSleuthEnvironment, |
| OpenSleuthObservation, |
| OpenSleuthState, |
| ) |
|
|
|
|
| |
| |
| |
|
|
|
|
| class TestEnvironmentSubclass: |
| def test_observation_inherits_openenv_base(self) -> None: |
| env = OpenSleuthEnvironment() |
| obs = env.reset() |
| assert isinstance(obs, OEObservation), ( |
| "OpenSleuthObservation must subclass openenv.core...types.Observation " |
| "so OpenEnv tooling (rubrics, evals, web UI) can introspect it." |
| ) |
| |
| assert obs.done is False |
| assert obs.reward is None |
| assert isinstance(obs.metadata, dict) |
|
|
| def test_state_inherits_openenv_base(self) -> None: |
| env = OpenSleuthEnvironment() |
| env.reset() |
| state = env.state |
| assert isinstance(state, OEState) |
| assert state.episode_id is not None |
| assert state.step_count == 0 |
|
|
| def test_metadata_is_openenv_environment_metadata(self) -> None: |
| env = OpenSleuthEnvironment() |
| meta = env.get_metadata() |
| assert isinstance(meta, EnvironmentMetadata) |
| assert meta.name == "OpenSleuth" |
| assert meta.description |
| assert meta.version |
|
|
| def test_reset_step_full_loop(self) -> None: |
| env = OpenSleuthEnvironment() |
| env.reset(target_name="fibonacci", max_steps=10, seed=0) |
|
|
| probe = env.step( |
| OpenSleuthAction(action_type="probe", input_repr="10") |
| ) |
| assert probe.done is False |
| assert probe.reward is not None and probe.reward > 0 |
| assert probe.probe_history[-1]["output_repr"] == "55" |
| assert env.state.step_count == 1 |
|
|
| submit = env.step( |
| OpenSleuthAction( |
| action_type="submit", |
| code="def fibonacci(n):\n a,b=0,1\n for _ in range(n-1):\n a,b=b,a+b\n return b\n", |
| ) |
| ) |
| assert submit.done is True |
| assert submit.reward is not None |
| assert env.state.finished is True |
|
|
| def test_reset_with_no_args_uses_safe_default(self) -> None: |
| """OpenEnv requires reset() to work with zero arguments. We use |
| 'fibonacci' as the implicit default so a bare reset always produces |
| a valid episode.""" |
| env = OpenSleuthEnvironment() |
| obs = env.reset() |
| assert obs.target_function_name == "fibonacci" |
|
|
| def test_supports_concurrent_sessions_flag(self) -> None: |
| """OpenEnv's HTTPEnvServer refuses max_concurrent_envs > 1 unless |
| the env opts in via SUPPORTS_CONCURRENT_SESSIONS.""" |
| assert OpenSleuthEnvironment.SUPPORTS_CONCURRENT_SESSIONS is True |
|
|
| def test_action_is_extra_forbid(self) -> None: |
| """OpenEnv Action base sets extra='forbid' to catch typo'd fields |
| early. Our OpenSleuthAction must inherit that behavior.""" |
| from pydantic import ValidationError |
|
|
| with pytest.raises(ValidationError): |
| OpenSleuthAction(action_type="probe", input_repr="1", made_up_field=1) |
|
|
|
|
| |
| |
| |
|
|
|
|
| @pytest.fixture(scope="module") |
| def http_client() -> TestClient: |
| from server import app |
|
|
| with TestClient(app) as client: |
| yield client |
|
|
|
|
| class TestOpenEnvHttpSurface: |
| """The endpoints the OpenEnv spec / `openenv validate` look for.""" |
|
|
| def test_health(self, http_client: TestClient) -> None: |
| r = http_client.get("/openenv/health") |
| assert r.status_code == 200, r.text |
| assert r.json() == {"status": "healthy"} |
|
|
| def test_metadata(self, http_client: TestClient) -> None: |
| r = http_client.get("/openenv/metadata") |
| assert r.status_code == 200, r.text |
| body = r.json() |
| for key in ("name", "description", "version"): |
| assert key in body, f"missing {key} in /openenv/metadata" |
| assert body["name"] == "OpenSleuth" |
|
|
| def test_schema(self, http_client: TestClient) -> None: |
| r = http_client.get("/openenv/schema") |
| assert r.status_code == 200, r.text |
| body = r.json() |
| for key in ("action", "observation", "state"): |
| assert key in body, f"missing {key} in /openenv/schema" |
| assert "properties" in body[key], ( |
| f"/openenv/schema {key!r} is not a valid JSON schema" |
| ) |
| |
| assert "action_type" in body["action"]["properties"] |
|
|
| def test_state(self, http_client: TestClient) -> None: |
| r = http_client.get("/openenv/state") |
| assert r.status_code == 200, r.text |
| body = r.json() |
| assert "episode_id" in body |
| assert "step_count" in body |
|
|
| def test_reset_returns_canonical_envelope(self, http_client: TestClient) -> None: |
| r = http_client.post("/openenv/reset", json={"target_name": "fibonacci"}) |
| assert r.status_code == 200, r.text |
| body = r.json() |
| |
| assert set(body.keys()) == {"observation", "reward", "done"}, ( |
| f"Expected OpenEnv envelope, got keys: {sorted(body)}" |
| ) |
| assert body["done"] is False |
| assert body["observation"]["target_function_name"] == "fibonacci" |
|
|
| def test_reset_with_no_body_works(self, http_client: TestClient) -> None: |
| """OpenEnv ResetRequest defaults to an empty body. Must still work.""" |
| r = http_client.post("/openenv/reset") |
| assert r.status_code == 200, r.text |
| body = r.json() |
| assert "observation" in body |
|
|
| def test_step_canonical_envelope_with_probe(self, http_client: TestClient) -> None: |
| r = http_client.post( |
| "/openenv/step", |
| json={"action": {"action_type": "probe", "input_repr": "10"}}, |
| ) |
| assert r.status_code == 200, r.text |
| body = r.json() |
| assert set(body.keys()) == {"observation", "reward", "done"} |
| |
| |
| assert body["observation"]["probe_history"], "probe should produce history" |
|
|
| def test_step_rejects_unknown_action_field(self, http_client: TestClient) -> None: |
| r = http_client.post( |
| "/openenv/step", |
| json={"action": {"action_type": "probe", "input_repr": "1", "wat": True}}, |
| ) |
| |
| assert r.status_code == 422 |
|
|
|
|
| |
| |
| |
|
|
|
|
| class TestLegacyContractPreserved: |
| def test_legacy_health(self, http_client: TestClient) -> None: |
| r = http_client.get("/health") |
| assert r.status_code == 200 |
| assert r.json()["status"] == "ok" |
|
|
| def test_legacy_reset_returns_bare_observation(self, http_client: TestClient) -> None: |
| """Trainer expects {episode_id, target_function_name, ...} at the top |
| level (NOT wrapped in {observation: ...}). Must NOT regress.""" |
| r = http_client.post( |
| "/reset", |
| json={"target_name": "fibonacci", "seed": 0, "max_steps": 5}, |
| ) |
| assert r.status_code == 200, r.text |
| body = r.json() |
| assert "episode_id" in body, ( |
| "Legacy /reset must return a bare observation, not the OpenEnv envelope. " |
| "If this fails the trainer will break." |
| ) |
| assert "observation" not in body |
|
|
| def test_legacy_step_returns_step_response(self, http_client: TestClient) -> None: |
| reset = http_client.post( |
| "/reset", |
| json={"target_name": "fibonacci", "seed": 0, "max_steps": 5}, |
| ).json() |
| eid = reset["episode_id"] |
| r = http_client.post( |
| "/step", |
| json={ |
| "episode_id": eid, |
| "action": {"action_type": "probe", "input_repr": "5"}, |
| }, |
| ) |
| assert r.status_code == 200, r.text |
| body = r.json() |
| |
| assert {"observation", "reward", "done", "info"} <= set(body.keys()) |
| assert "execution_reward" not in body |
|
|