opensleuth-env-gemini-cli / tests /test_openenv_conformance.py
anugrah55's picture
OpenEnv 0.2.3 conformance: mount /openenv sub-app, add adapter + tests + example client
31715b5 verified
"""OpenEnv 0.2.x protocol conformance tests for the OpenSleuth env.
These tests are *additive* and orthogonal to the existing legacy contract
covered in ``test_env.py`` / ``test_open_env.py``.
What we verify:
* The OpenEnv ``Environment`` adapter (:class:`OpenSleuthEnvironment`) implements
all four required methods (``reset`` / ``step`` / ``state`` / ``get_metadata``)
and returns instances of OpenEnv's ``Observation`` / ``State`` /
``EnvironmentMetadata`` base classes (so it would pass any ``isinstance``
check by an OpenEnv-aware harness).
* The ``/openenv/*`` HTTP sub-app exposes every endpoint OpenEnv 0.2.x
promises: ``/health``, ``/metadata``, ``/schema``, ``/state``, ``/reset``,
``/step``. (The ``/ws`` WebSocket is exercised separately via the
``smoke_openenv_client.py`` script run against the live Space.)
* ``/openenv/reset`` returns the canonical ``{"observation", "reward", "done"}``
envelope (NOT a bare observation, which is the legacy shape).
* ``/openenv/step`` accepts the canonical ``{"action": {...}}`` envelope (NOT
``{"episode_id", "action"}``, which is the legacy shape).
* The legacy bare ``/reset`` and ``/step`` routes the trainer uses are
untouched.
"""
from __future__ import annotations
import pytest
pytest.importorskip(
"openenv.core.env_server.types",
reason="openenv-core not installed; conformance tests skipped.",
)
from fastapi.testclient import TestClient
from openenv.core.env_server.types import (
EnvironmentMetadata,
Observation as OEObservation,
State as OEState,
)
from opensleuth_env.openenv_adapter import (
OpenSleuthAction,
OpenSleuthEnvironment,
OpenSleuthObservation,
OpenSleuthState,
)
# ---------------------------------------------------------------------------
# Adapter-level: exercises the Environment subclass directly (no HTTP).
# ---------------------------------------------------------------------------
class TestEnvironmentSubclass:
def test_observation_inherits_openenv_base(self) -> None:
env = OpenSleuthEnvironment()
obs = env.reset()
assert isinstance(obs, OEObservation), (
"OpenSleuthObservation must subclass openenv.core...types.Observation "
"so OpenEnv tooling (rubrics, evals, web UI) can introspect it."
)
# Must expose the OpenEnv-required fields.
assert obs.done is False
assert obs.reward is None
assert isinstance(obs.metadata, dict)
def test_state_inherits_openenv_base(self) -> None:
env = OpenSleuthEnvironment()
env.reset()
state = env.state
assert isinstance(state, OEState)
assert state.episode_id is not None
assert state.step_count == 0
def test_metadata_is_openenv_environment_metadata(self) -> None:
env = OpenSleuthEnvironment()
meta = env.get_metadata()
assert isinstance(meta, EnvironmentMetadata)
assert meta.name == "OpenSleuth"
assert meta.description
assert meta.version
def test_reset_step_full_loop(self) -> None:
env = OpenSleuthEnvironment()
env.reset(target_name="fibonacci", max_steps=10, seed=0)
probe = env.step(
OpenSleuthAction(action_type="probe", input_repr="10")
)
assert probe.done is False
assert probe.reward is not None and probe.reward > 0
assert probe.probe_history[-1]["output_repr"] == "55"
assert env.state.step_count == 1
submit = env.step(
OpenSleuthAction(
action_type="submit",
code="def fibonacci(n):\n a,b=0,1\n for _ in range(n-1):\n a,b=b,a+b\n return b\n",
)
)
assert submit.done is True
assert submit.reward is not None
assert env.state.finished is True
def test_reset_with_no_args_uses_safe_default(self) -> None:
"""OpenEnv requires reset() to work with zero arguments. We use
'fibonacci' as the implicit default so a bare reset always produces
a valid episode."""
env = OpenSleuthEnvironment()
obs = env.reset()
assert obs.target_function_name == "fibonacci"
def test_supports_concurrent_sessions_flag(self) -> None:
"""OpenEnv's HTTPEnvServer refuses max_concurrent_envs > 1 unless
the env opts in via SUPPORTS_CONCURRENT_SESSIONS."""
assert OpenSleuthEnvironment.SUPPORTS_CONCURRENT_SESSIONS is True
def test_action_is_extra_forbid(self) -> None:
"""OpenEnv Action base sets extra='forbid' to catch typo'd fields
early. Our OpenSleuthAction must inherit that behavior."""
from pydantic import ValidationError
with pytest.raises(ValidationError):
OpenSleuthAction(action_type="probe", input_repr="1", made_up_field=1)
# ---------------------------------------------------------------------------
# HTTP-level: verifies the /openenv/* sub-app routes that judges will hit.
# ---------------------------------------------------------------------------
@pytest.fixture(scope="module")
def http_client() -> TestClient:
from server import app
with TestClient(app) as client:
yield client
class TestOpenEnvHttpSurface:
"""The endpoints the OpenEnv spec / `openenv validate` look for."""
def test_health(self, http_client: TestClient) -> None:
r = http_client.get("/openenv/health")
assert r.status_code == 200, r.text
assert r.json() == {"status": "healthy"}
def test_metadata(self, http_client: TestClient) -> None:
r = http_client.get("/openenv/metadata")
assert r.status_code == 200, r.text
body = r.json()
for key in ("name", "description", "version"):
assert key in body, f"missing {key} in /openenv/metadata"
assert body["name"] == "OpenSleuth"
def test_schema(self, http_client: TestClient) -> None:
r = http_client.get("/openenv/schema")
assert r.status_code == 200, r.text
body = r.json()
for key in ("action", "observation", "state"):
assert key in body, f"missing {key} in /openenv/schema"
assert "properties" in body[key], (
f"/openenv/schema {key!r} is not a valid JSON schema"
)
# action discriminator should be visible in the schema
assert "action_type" in body["action"]["properties"]
def test_state(self, http_client: TestClient) -> None:
r = http_client.get("/openenv/state")
assert r.status_code == 200, r.text
body = r.json()
assert "episode_id" in body
assert "step_count" in body
def test_reset_returns_canonical_envelope(self, http_client: TestClient) -> None:
r = http_client.post("/openenv/reset", json={"target_name": "fibonacci"})
assert r.status_code == 200, r.text
body = r.json()
# Canonical OpenEnv shape: {"observation": {...}, "reward": ..., "done": ...}
assert set(body.keys()) == {"observation", "reward", "done"}, (
f"Expected OpenEnv envelope, got keys: {sorted(body)}"
)
assert body["done"] is False
assert body["observation"]["target_function_name"] == "fibonacci"
def test_reset_with_no_body_works(self, http_client: TestClient) -> None:
"""OpenEnv ResetRequest defaults to an empty body. Must still work."""
r = http_client.post("/openenv/reset")
assert r.status_code == 200, r.text
body = r.json()
assert "observation" in body
def test_step_canonical_envelope_with_probe(self, http_client: TestClient) -> None:
r = http_client.post(
"/openenv/step",
json={"action": {"action_type": "probe", "input_repr": "10"}},
)
assert r.status_code == 200, r.text
body = r.json()
assert set(body.keys()) == {"observation", "reward", "done"}
# Note: under HTTP (stateless), each /openenv/step gets a fresh env;
# we auto-reset so a probe still produces a valid history.
assert body["observation"]["probe_history"], "probe should produce history"
def test_step_rejects_unknown_action_field(self, http_client: TestClient) -> None:
r = http_client.post(
"/openenv/step",
json={"action": {"action_type": "probe", "input_repr": "1", "wat": True}},
)
# OpenEnv's deserialize_action raises ValidationError -> 422.
assert r.status_code == 422
# ---------------------------------------------------------------------------
# Regression: the legacy trainer-facing routes must still work unchanged.
# ---------------------------------------------------------------------------
class TestLegacyContractPreserved:
def test_legacy_health(self, http_client: TestClient) -> None:
r = http_client.get("/health")
assert r.status_code == 200
assert r.json()["status"] == "ok"
def test_legacy_reset_returns_bare_observation(self, http_client: TestClient) -> None:
"""Trainer expects {episode_id, target_function_name, ...} at the top
level (NOT wrapped in {observation: ...}). Must NOT regress."""
r = http_client.post(
"/reset",
json={"target_name": "fibonacci", "seed": 0, "max_steps": 5},
)
assert r.status_code == 200, r.text
body = r.json()
assert "episode_id" in body, (
"Legacy /reset must return a bare observation, not the OpenEnv envelope. "
"If this fails the trainer will break."
)
assert "observation" not in body # don't accidentally double-wrap
def test_legacy_step_returns_step_response(self, http_client: TestClient) -> None:
reset = http_client.post(
"/reset",
json={"target_name": "fibonacci", "seed": 0, "max_steps": 5},
).json()
eid = reset["episode_id"]
r = http_client.post(
"/step",
json={
"episode_id": eid,
"action": {"action_type": "probe", "input_repr": "5"},
},
)
assert r.status_code == 200, r.text
body = r.json()
# Legacy shape: {observation, reward, done, info}
assert {"observation", "reward", "done", "info"} <= set(body.keys())
assert "execution_reward" not in body # only present on submit info