Spaces:

anugrah55
/

opensleuth-env-gemini-cli

Paused

App Files Files Community

opensleuth-env-gemini-cli / tests /test_openenv_conformance.py

anugrah55

OpenEnv 0.2.3 conformance: mount /openenv sub-app, add adapter + tests + example client

31715b5 verified 12 days ago

raw

history blame contribute delete

10.4 kB

	"""OpenEnv 0.2.x protocol conformance tests for the OpenSleuth env.

	These tests are additive and orthogonal to the existing legacy contract
	covered in ``test_env.py`` / ``test_open_env.py``.

	What we verify:

	* The OpenEnv ``Environment`` adapter (:class:`OpenSleuthEnvironment`) implements
	all four required methods (``reset`` / ``step`` / ``state`` / ``get_metadata``)
	and returns instances of OpenEnv's ``Observation`` / ``State`` /
	``EnvironmentMetadata`` base classes (so it would pass any ``isinstance``
	check by an OpenEnv-aware harness).
	* The ``/openenv/*`` HTTP sub-app exposes every endpoint OpenEnv 0.2.x
	promises: ``/health``, ``/metadata``, ``/schema``, ``/state``, ``/reset``,
	``/step``. (The ``/ws`` WebSocket is exercised separately via the
	``smoke_openenv_client.py`` script run against the live Space.)
	* ``/openenv/reset`` returns the canonical ``{"observation", "reward", "done"}``
	envelope (NOT a bare observation, which is the legacy shape).
	* ``/openenv/step`` accepts the canonical ``{"action": {...}}`` envelope (NOT
	``{"episode_id", "action"}``, which is the legacy shape).
	* The legacy bare ``/reset`` and ``/step`` routes the trainer uses are
	untouched.
	"""

	from __future__ import annotations

	import pytest

	pytest.importorskip(
	"openenv.core.env_server.types",
	reason="openenv-core not installed; conformance tests skipped.",
	)

	from fastapi.testclient import TestClient
	from openenv.core.env_server.types import (
	EnvironmentMetadata,
	Observation as OEObservation,
	State as OEState,
	)

	from opensleuth_env.openenv_adapter import (
	OpenSleuthAction,
	OpenSleuthEnvironment,
	OpenSleuthObservation,
	OpenSleuthState,
	)


	# ---------------------------------------------------------------------------
	# Adapter-level: exercises the Environment subclass directly (no HTTP).
	# ---------------------------------------------------------------------------


	class TestEnvironmentSubclass:
	def test_observation_inherits_openenv_base(self) -> None:
	env = OpenSleuthEnvironment()
	obs = env.reset()
	assert isinstance(obs, OEObservation), (
	"OpenSleuthObservation must subclass openenv.core...types.Observation "
	"so OpenEnv tooling (rubrics, evals, web UI) can introspect it."
	)
	# Must expose the OpenEnv-required fields.
	assert obs.done is False
	assert obs.reward is None
	assert isinstance(obs.metadata, dict)

	def test_state_inherits_openenv_base(self) -> None:
	env = OpenSleuthEnvironment()
	env.reset()
	state = env.state
	assert isinstance(state, OEState)
	assert state.episode_id is not None
	assert state.step_count == 0

	def test_metadata_is_openenv_environment_metadata(self) -> None:
	env = OpenSleuthEnvironment()
	meta = env.get_metadata()
	assert isinstance(meta, EnvironmentMetadata)
	assert meta.name == "OpenSleuth"
	assert meta.description
	assert meta.version

	def test_reset_step_full_loop(self) -> None:
	env = OpenSleuthEnvironment()
	env.reset(target_name="fibonacci", max_steps=10, seed=0)

	probe = env.step(
	OpenSleuthAction(action_type="probe", input_repr="10")
	)
	assert probe.done is False
	assert probe.reward is not None and probe.reward > 0
	assert probe.probe_history[-1]["output_repr"] == "55"
	assert env.state.step_count == 1

	submit = env.step(
	OpenSleuthAction(
	action_type="submit",
	code="def fibonacci(n):\n a,b=0,1\n for _ in range(n-1):\n a,b=b,a+b\n return b\n",
	)
	)
	assert submit.done is True
	assert submit.reward is not None
	assert env.state.finished is True

	def test_reset_with_no_args_uses_safe_default(self) -> None:
	"""OpenEnv requires reset() to work with zero arguments. We use
	'fibonacci' as the implicit default so a bare reset always produces
	a valid episode."""
	env = OpenSleuthEnvironment()
	obs = env.reset()
	assert obs.target_function_name == "fibonacci"

	def test_supports_concurrent_sessions_flag(self) -> None:
	"""OpenEnv's HTTPEnvServer refuses max_concurrent_envs > 1 unless
	the env opts in via SUPPORTS_CONCURRENT_SESSIONS."""
	assert OpenSleuthEnvironment.SUPPORTS_CONCURRENT_SESSIONS is True

	def test_action_is_extra_forbid(self) -> None:
	"""OpenEnv Action base sets extra='forbid' to catch typo'd fields
	early. Our OpenSleuthAction must inherit that behavior."""
	from pydantic import ValidationError

	with pytest.raises(ValidationError):
	OpenSleuthAction(action_type="probe", input_repr="1", made_up_field=1)


	# ---------------------------------------------------------------------------
	# HTTP-level: verifies the /openenv/* sub-app routes that judges will hit.
	# ---------------------------------------------------------------------------


	@pytest.fixture(scope="module")
	def http_client() -> TestClient:
	from server import app

	with TestClient(app) as client:
	yield client


	class TestOpenEnvHttpSurface:
	"""The endpoints the OpenEnv spec / `openenv validate` look for."""

	def test_health(self, http_client: TestClient) -> None:
	r = http_client.get("/openenv/health")
	assert r.status_code == 200, r.text
	assert r.json() == {"status": "healthy"}

	def test_metadata(self, http_client: TestClient) -> None:
	r = http_client.get("/openenv/metadata")
	assert r.status_code == 200, r.text
	body = r.json()
	for key in ("name", "description", "version"):
	assert key in body, f"missing {key} in /openenv/metadata"
	assert body["name"] == "OpenSleuth"

	def test_schema(self, http_client: TestClient) -> None:
	r = http_client.get("/openenv/schema")
	assert r.status_code == 200, r.text
	body = r.json()
	for key in ("action", "observation", "state"):
	assert key in body, f"missing {key} in /openenv/schema"
	assert "properties" in body[key], (
	f"/openenv/schema {key!r} is not a valid JSON schema"
	)
	# action discriminator should be visible in the schema
	assert "action_type" in body["action"]["properties"]

	def test_state(self, http_client: TestClient) -> None:
	r = http_client.get("/openenv/state")
	assert r.status_code == 200, r.text
	body = r.json()
	assert "episode_id" in body
	assert "step_count" in body

	def test_reset_returns_canonical_envelope(self, http_client: TestClient) -> None:
	r = http_client.post("/openenv/reset", json={"target_name": "fibonacci"})
	assert r.status_code == 200, r.text
	body = r.json()
	# Canonical OpenEnv shape: {"observation": {...}, "reward": ..., "done": ...}
	assert set(body.keys()) == {"observation", "reward", "done"}, (
	f"Expected OpenEnv envelope, got keys: {sorted(body)}"
	)
	assert body["done"] is False
	assert body["observation"]["target_function_name"] == "fibonacci"

	def test_reset_with_no_body_works(self, http_client: TestClient) -> None:
	"""OpenEnv ResetRequest defaults to an empty body. Must still work."""
	r = http_client.post("/openenv/reset")
	assert r.status_code == 200, r.text
	body = r.json()
	assert "observation" in body

	def test_step_canonical_envelope_with_probe(self, http_client: TestClient) -> None:
	r = http_client.post(
	"/openenv/step",
	json={"action": {"action_type": "probe", "input_repr": "10"}},
	)
	assert r.status_code == 200, r.text
	body = r.json()
	assert set(body.keys()) == {"observation", "reward", "done"}
	# Note: under HTTP (stateless), each /openenv/step gets a fresh env;
	# we auto-reset so a probe still produces a valid history.
	assert body["observation"]["probe_history"], "probe should produce history"

	def test_step_rejects_unknown_action_field(self, http_client: TestClient) -> None:
	r = http_client.post(
	"/openenv/step",
	json={"action": {"action_type": "probe", "input_repr": "1", "wat": True}},
	)
	# OpenEnv's deserialize_action raises ValidationError -> 422.
	assert r.status_code == 422


	# ---------------------------------------------------------------------------
	# Regression: the legacy trainer-facing routes must still work unchanged.
	# ---------------------------------------------------------------------------


	class TestLegacyContractPreserved:
	def test_legacy_health(self, http_client: TestClient) -> None:
	r = http_client.get("/health")
	assert r.status_code == 200
	assert r.json()["status"] == "ok"

	def test_legacy_reset_returns_bare_observation(self, http_client: TestClient) -> None:
	"""Trainer expects {episode_id, target_function_name, ...} at the top
	level (NOT wrapped in {observation: ...}). Must NOT regress."""
	r = http_client.post(
	"/reset",
	json={"target_name": "fibonacci", "seed": 0, "max_steps": 5},
	)
	assert r.status_code == 200, r.text
	body = r.json()
	assert "episode_id" in body, (
	"Legacy /reset must return a bare observation, not the OpenEnv envelope. "
	"If this fails the trainer will break."
	)
	assert "observation" not in body # don't accidentally double-wrap

	def test_legacy_step_returns_step_response(self, http_client: TestClient) -> None:
	reset = http_client.post(
	"/reset",
	json={"target_name": "fibonacci", "seed": 0, "max_steps": 5},
	).json()
	eid = reset["episode_id"]
	r = http_client.post(
	"/step",
	json={
	"episode_id": eid,
	"action": {"action_type": "probe", "input_repr": "5"},
	},
	)
	assert r.status_code == 200, r.text
	body = r.json()
	# Legacy shape: {observation, reward, done, info}
	assert {"observation", "reward", "done", "info"} <= set(body.keys())
	assert "execution_reward" not in body # only present on submit info