File size: 10,442 Bytes
31715b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
"""OpenEnv 0.2.x protocol conformance tests for the OpenSleuth env.

These tests are *additive* and orthogonal to the existing legacy contract
covered in ``test_env.py`` / ``test_open_env.py``.

What we verify:

* The OpenEnv ``Environment`` adapter (:class:`OpenSleuthEnvironment`) implements
  all four required methods (``reset`` / ``step`` / ``state`` / ``get_metadata``)
  and returns instances of OpenEnv's ``Observation`` / ``State`` /
  ``EnvironmentMetadata`` base classes (so it would pass any ``isinstance``
  check by an OpenEnv-aware harness).
* The ``/openenv/*`` HTTP sub-app exposes every endpoint OpenEnv 0.2.x
  promises: ``/health``, ``/metadata``, ``/schema``, ``/state``, ``/reset``,
  ``/step``. (The ``/ws`` WebSocket is exercised separately via the
  ``smoke_openenv_client.py`` script run against the live Space.)
* ``/openenv/reset`` returns the canonical ``{"observation", "reward", "done"}``
  envelope (NOT a bare observation, which is the legacy shape).
* ``/openenv/step`` accepts the canonical ``{"action": {...}}`` envelope (NOT
  ``{"episode_id", "action"}``, which is the legacy shape).
* The legacy bare ``/reset`` and ``/step`` routes the trainer uses are
  untouched.
"""

from __future__ import annotations

import pytest

pytest.importorskip(
    "openenv.core.env_server.types",
    reason="openenv-core not installed; conformance tests skipped.",
)

from fastapi.testclient import TestClient
from openenv.core.env_server.types import (
    EnvironmentMetadata,
    Observation as OEObservation,
    State as OEState,
)

from opensleuth_env.openenv_adapter import (
    OpenSleuthAction,
    OpenSleuthEnvironment,
    OpenSleuthObservation,
    OpenSleuthState,
)


# ---------------------------------------------------------------------------
# Adapter-level: exercises the Environment subclass directly (no HTTP).
# ---------------------------------------------------------------------------


class TestEnvironmentSubclass:
    def test_observation_inherits_openenv_base(self) -> None:
        env = OpenSleuthEnvironment()
        obs = env.reset()
        assert isinstance(obs, OEObservation), (
            "OpenSleuthObservation must subclass openenv.core...types.Observation "
            "so OpenEnv tooling (rubrics, evals, web UI) can introspect it."
        )
        # Must expose the OpenEnv-required fields.
        assert obs.done is False
        assert obs.reward is None
        assert isinstance(obs.metadata, dict)

    def test_state_inherits_openenv_base(self) -> None:
        env = OpenSleuthEnvironment()
        env.reset()
        state = env.state
        assert isinstance(state, OEState)
        assert state.episode_id is not None
        assert state.step_count == 0

    def test_metadata_is_openenv_environment_metadata(self) -> None:
        env = OpenSleuthEnvironment()
        meta = env.get_metadata()
        assert isinstance(meta, EnvironmentMetadata)
        assert meta.name == "OpenSleuth"
        assert meta.description
        assert meta.version

    def test_reset_step_full_loop(self) -> None:
        env = OpenSleuthEnvironment()
        env.reset(target_name="fibonacci", max_steps=10, seed=0)

        probe = env.step(
            OpenSleuthAction(action_type="probe", input_repr="10")
        )
        assert probe.done is False
        assert probe.reward is not None and probe.reward > 0
        assert probe.probe_history[-1]["output_repr"] == "55"
        assert env.state.step_count == 1

        submit = env.step(
            OpenSleuthAction(
                action_type="submit",
                code="def fibonacci(n):\n    a,b=0,1\n    for _ in range(n-1):\n        a,b=b,a+b\n    return b\n",
            )
        )
        assert submit.done is True
        assert submit.reward is not None
        assert env.state.finished is True

    def test_reset_with_no_args_uses_safe_default(self) -> None:
        """OpenEnv requires reset() to work with zero arguments. We use
        'fibonacci' as the implicit default so a bare reset always produces
        a valid episode."""
        env = OpenSleuthEnvironment()
        obs = env.reset()
        assert obs.target_function_name == "fibonacci"

    def test_supports_concurrent_sessions_flag(self) -> None:
        """OpenEnv's HTTPEnvServer refuses max_concurrent_envs > 1 unless
        the env opts in via SUPPORTS_CONCURRENT_SESSIONS."""
        assert OpenSleuthEnvironment.SUPPORTS_CONCURRENT_SESSIONS is True

    def test_action_is_extra_forbid(self) -> None:
        """OpenEnv Action base sets extra='forbid' to catch typo'd fields
        early. Our OpenSleuthAction must inherit that behavior."""
        from pydantic import ValidationError

        with pytest.raises(ValidationError):
            OpenSleuthAction(action_type="probe", input_repr="1", made_up_field=1)


# ---------------------------------------------------------------------------
# HTTP-level: verifies the /openenv/* sub-app routes that judges will hit.
# ---------------------------------------------------------------------------


@pytest.fixture(scope="module")
def http_client() -> TestClient:
    from server import app

    with TestClient(app) as client:
        yield client


class TestOpenEnvHttpSurface:
    """The endpoints the OpenEnv spec / `openenv validate` look for."""

    def test_health(self, http_client: TestClient) -> None:
        r = http_client.get("/openenv/health")
        assert r.status_code == 200, r.text
        assert r.json() == {"status": "healthy"}

    def test_metadata(self, http_client: TestClient) -> None:
        r = http_client.get("/openenv/metadata")
        assert r.status_code == 200, r.text
        body = r.json()
        for key in ("name", "description", "version"):
            assert key in body, f"missing {key} in /openenv/metadata"
        assert body["name"] == "OpenSleuth"

    def test_schema(self, http_client: TestClient) -> None:
        r = http_client.get("/openenv/schema")
        assert r.status_code == 200, r.text
        body = r.json()
        for key in ("action", "observation", "state"):
            assert key in body, f"missing {key} in /openenv/schema"
            assert "properties" in body[key], (
                f"/openenv/schema {key!r} is not a valid JSON schema"
            )
        # action discriminator should be visible in the schema
        assert "action_type" in body["action"]["properties"]

    def test_state(self, http_client: TestClient) -> None:
        r = http_client.get("/openenv/state")
        assert r.status_code == 200, r.text
        body = r.json()
        assert "episode_id" in body
        assert "step_count" in body

    def test_reset_returns_canonical_envelope(self, http_client: TestClient) -> None:
        r = http_client.post("/openenv/reset", json={"target_name": "fibonacci"})
        assert r.status_code == 200, r.text
        body = r.json()
        # Canonical OpenEnv shape: {"observation": {...}, "reward": ..., "done": ...}
        assert set(body.keys()) == {"observation", "reward", "done"}, (
            f"Expected OpenEnv envelope, got keys: {sorted(body)}"
        )
        assert body["done"] is False
        assert body["observation"]["target_function_name"] == "fibonacci"

    def test_reset_with_no_body_works(self, http_client: TestClient) -> None:
        """OpenEnv ResetRequest defaults to an empty body. Must still work."""
        r = http_client.post("/openenv/reset")
        assert r.status_code == 200, r.text
        body = r.json()
        assert "observation" in body

    def test_step_canonical_envelope_with_probe(self, http_client: TestClient) -> None:
        r = http_client.post(
            "/openenv/step",
            json={"action": {"action_type": "probe", "input_repr": "10"}},
        )
        assert r.status_code == 200, r.text
        body = r.json()
        assert set(body.keys()) == {"observation", "reward", "done"}
        # Note: under HTTP (stateless), each /openenv/step gets a fresh env;
        # we auto-reset so a probe still produces a valid history.
        assert body["observation"]["probe_history"], "probe should produce history"

    def test_step_rejects_unknown_action_field(self, http_client: TestClient) -> None:
        r = http_client.post(
            "/openenv/step",
            json={"action": {"action_type": "probe", "input_repr": "1", "wat": True}},
        )
        # OpenEnv's deserialize_action raises ValidationError -> 422.
        assert r.status_code == 422


# ---------------------------------------------------------------------------
# Regression: the legacy trainer-facing routes must still work unchanged.
# ---------------------------------------------------------------------------


class TestLegacyContractPreserved:
    def test_legacy_health(self, http_client: TestClient) -> None:
        r = http_client.get("/health")
        assert r.status_code == 200
        assert r.json()["status"] == "ok"

    def test_legacy_reset_returns_bare_observation(self, http_client: TestClient) -> None:
        """Trainer expects {episode_id, target_function_name, ...} at the top
        level (NOT wrapped in {observation: ...}). Must NOT regress."""
        r = http_client.post(
            "/reset",
            json={"target_name": "fibonacci", "seed": 0, "max_steps": 5},
        )
        assert r.status_code == 200, r.text
        body = r.json()
        assert "episode_id" in body, (
            "Legacy /reset must return a bare observation, not the OpenEnv envelope. "
            "If this fails the trainer will break."
        )
        assert "observation" not in body  # don't accidentally double-wrap

    def test_legacy_step_returns_step_response(self, http_client: TestClient) -> None:
        reset = http_client.post(
            "/reset",
            json={"target_name": "fibonacci", "seed": 0, "max_steps": 5},
        ).json()
        eid = reset["episode_id"]
        r = http_client.post(
            "/step",
            json={
                "episode_id": eid,
                "action": {"action_type": "probe", "input_repr": "5"},
            },
        )
        assert r.status_code == 200, r.text
        body = r.json()
        # Legacy shape: {observation, reward, done, info}
        assert {"observation", "reward", "done", "info"} <= set(body.keys())
        assert "execution_reward" not in body  # only present on submit info