File size: 10,442 Bytes
31715b5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 | """OpenEnv 0.2.x protocol conformance tests for the OpenSleuth env.
These tests are *additive* and orthogonal to the existing legacy contract
covered in ``test_env.py`` / ``test_open_env.py``.
What we verify:
* The OpenEnv ``Environment`` adapter (:class:`OpenSleuthEnvironment`) implements
all four required methods (``reset`` / ``step`` / ``state`` / ``get_metadata``)
and returns instances of OpenEnv's ``Observation`` / ``State`` /
``EnvironmentMetadata`` base classes (so it would pass any ``isinstance``
check by an OpenEnv-aware harness).
* The ``/openenv/*`` HTTP sub-app exposes every endpoint OpenEnv 0.2.x
promises: ``/health``, ``/metadata``, ``/schema``, ``/state``, ``/reset``,
``/step``. (The ``/ws`` WebSocket is exercised separately via the
``smoke_openenv_client.py`` script run against the live Space.)
* ``/openenv/reset`` returns the canonical ``{"observation", "reward", "done"}``
envelope (NOT a bare observation, which is the legacy shape).
* ``/openenv/step`` accepts the canonical ``{"action": {...}}`` envelope (NOT
``{"episode_id", "action"}``, which is the legacy shape).
* The legacy bare ``/reset`` and ``/step`` routes the trainer uses are
untouched.
"""
from __future__ import annotations
import pytest
pytest.importorskip(
"openenv.core.env_server.types",
reason="openenv-core not installed; conformance tests skipped.",
)
from fastapi.testclient import TestClient
from openenv.core.env_server.types import (
EnvironmentMetadata,
Observation as OEObservation,
State as OEState,
)
from opensleuth_env.openenv_adapter import (
OpenSleuthAction,
OpenSleuthEnvironment,
OpenSleuthObservation,
OpenSleuthState,
)
# ---------------------------------------------------------------------------
# Adapter-level: exercises the Environment subclass directly (no HTTP).
# ---------------------------------------------------------------------------
class TestEnvironmentSubclass:
def test_observation_inherits_openenv_base(self) -> None:
env = OpenSleuthEnvironment()
obs = env.reset()
assert isinstance(obs, OEObservation), (
"OpenSleuthObservation must subclass openenv.core...types.Observation "
"so OpenEnv tooling (rubrics, evals, web UI) can introspect it."
)
# Must expose the OpenEnv-required fields.
assert obs.done is False
assert obs.reward is None
assert isinstance(obs.metadata, dict)
def test_state_inherits_openenv_base(self) -> None:
env = OpenSleuthEnvironment()
env.reset()
state = env.state
assert isinstance(state, OEState)
assert state.episode_id is not None
assert state.step_count == 0
def test_metadata_is_openenv_environment_metadata(self) -> None:
env = OpenSleuthEnvironment()
meta = env.get_metadata()
assert isinstance(meta, EnvironmentMetadata)
assert meta.name == "OpenSleuth"
assert meta.description
assert meta.version
def test_reset_step_full_loop(self) -> None:
env = OpenSleuthEnvironment()
env.reset(target_name="fibonacci", max_steps=10, seed=0)
probe = env.step(
OpenSleuthAction(action_type="probe", input_repr="10")
)
assert probe.done is False
assert probe.reward is not None and probe.reward > 0
assert probe.probe_history[-1]["output_repr"] == "55"
assert env.state.step_count == 1
submit = env.step(
OpenSleuthAction(
action_type="submit",
code="def fibonacci(n):\n a,b=0,1\n for _ in range(n-1):\n a,b=b,a+b\n return b\n",
)
)
assert submit.done is True
assert submit.reward is not None
assert env.state.finished is True
def test_reset_with_no_args_uses_safe_default(self) -> None:
"""OpenEnv requires reset() to work with zero arguments. We use
'fibonacci' as the implicit default so a bare reset always produces
a valid episode."""
env = OpenSleuthEnvironment()
obs = env.reset()
assert obs.target_function_name == "fibonacci"
def test_supports_concurrent_sessions_flag(self) -> None:
"""OpenEnv's HTTPEnvServer refuses max_concurrent_envs > 1 unless
the env opts in via SUPPORTS_CONCURRENT_SESSIONS."""
assert OpenSleuthEnvironment.SUPPORTS_CONCURRENT_SESSIONS is True
def test_action_is_extra_forbid(self) -> None:
"""OpenEnv Action base sets extra='forbid' to catch typo'd fields
early. Our OpenSleuthAction must inherit that behavior."""
from pydantic import ValidationError
with pytest.raises(ValidationError):
OpenSleuthAction(action_type="probe", input_repr="1", made_up_field=1)
# ---------------------------------------------------------------------------
# HTTP-level: verifies the /openenv/* sub-app routes that judges will hit.
# ---------------------------------------------------------------------------
@pytest.fixture(scope="module")
def http_client() -> TestClient:
from server import app
with TestClient(app) as client:
yield client
class TestOpenEnvHttpSurface:
"""The endpoints the OpenEnv spec / `openenv validate` look for."""
def test_health(self, http_client: TestClient) -> None:
r = http_client.get("/openenv/health")
assert r.status_code == 200, r.text
assert r.json() == {"status": "healthy"}
def test_metadata(self, http_client: TestClient) -> None:
r = http_client.get("/openenv/metadata")
assert r.status_code == 200, r.text
body = r.json()
for key in ("name", "description", "version"):
assert key in body, f"missing {key} in /openenv/metadata"
assert body["name"] == "OpenSleuth"
def test_schema(self, http_client: TestClient) -> None:
r = http_client.get("/openenv/schema")
assert r.status_code == 200, r.text
body = r.json()
for key in ("action", "observation", "state"):
assert key in body, f"missing {key} in /openenv/schema"
assert "properties" in body[key], (
f"/openenv/schema {key!r} is not a valid JSON schema"
)
# action discriminator should be visible in the schema
assert "action_type" in body["action"]["properties"]
def test_state(self, http_client: TestClient) -> None:
r = http_client.get("/openenv/state")
assert r.status_code == 200, r.text
body = r.json()
assert "episode_id" in body
assert "step_count" in body
def test_reset_returns_canonical_envelope(self, http_client: TestClient) -> None:
r = http_client.post("/openenv/reset", json={"target_name": "fibonacci"})
assert r.status_code == 200, r.text
body = r.json()
# Canonical OpenEnv shape: {"observation": {...}, "reward": ..., "done": ...}
assert set(body.keys()) == {"observation", "reward", "done"}, (
f"Expected OpenEnv envelope, got keys: {sorted(body)}"
)
assert body["done"] is False
assert body["observation"]["target_function_name"] == "fibonacci"
def test_reset_with_no_body_works(self, http_client: TestClient) -> None:
"""OpenEnv ResetRequest defaults to an empty body. Must still work."""
r = http_client.post("/openenv/reset")
assert r.status_code == 200, r.text
body = r.json()
assert "observation" in body
def test_step_canonical_envelope_with_probe(self, http_client: TestClient) -> None:
r = http_client.post(
"/openenv/step",
json={"action": {"action_type": "probe", "input_repr": "10"}},
)
assert r.status_code == 200, r.text
body = r.json()
assert set(body.keys()) == {"observation", "reward", "done"}
# Note: under HTTP (stateless), each /openenv/step gets a fresh env;
# we auto-reset so a probe still produces a valid history.
assert body["observation"]["probe_history"], "probe should produce history"
def test_step_rejects_unknown_action_field(self, http_client: TestClient) -> None:
r = http_client.post(
"/openenv/step",
json={"action": {"action_type": "probe", "input_repr": "1", "wat": True}},
)
# OpenEnv's deserialize_action raises ValidationError -> 422.
assert r.status_code == 422
# ---------------------------------------------------------------------------
# Regression: the legacy trainer-facing routes must still work unchanged.
# ---------------------------------------------------------------------------
class TestLegacyContractPreserved:
def test_legacy_health(self, http_client: TestClient) -> None:
r = http_client.get("/health")
assert r.status_code == 200
assert r.json()["status"] == "ok"
def test_legacy_reset_returns_bare_observation(self, http_client: TestClient) -> None:
"""Trainer expects {episode_id, target_function_name, ...} at the top
level (NOT wrapped in {observation: ...}). Must NOT regress."""
r = http_client.post(
"/reset",
json={"target_name": "fibonacci", "seed": 0, "max_steps": 5},
)
assert r.status_code == 200, r.text
body = r.json()
assert "episode_id" in body, (
"Legacy /reset must return a bare observation, not the OpenEnv envelope. "
"If this fails the trainer will break."
)
assert "observation" not in body # don't accidentally double-wrap
def test_legacy_step_returns_step_response(self, http_client: TestClient) -> None:
reset = http_client.post(
"/reset",
json={"target_name": "fibonacci", "seed": 0, "max_steps": 5},
).json()
eid = reset["episode_id"]
r = http_client.post(
"/step",
json={
"episode_id": eid,
"action": {"action_type": "probe", "input_repr": "5"},
},
)
assert r.status_code == 200, r.text
body = r.json()
# Legacy shape: {observation, reward, done, info}
assert {"observation", "reward", "done", "info"} <= set(body.keys())
assert "execution_reward" not in body # only present on submit info
|