Spaces:
Sleeping
Sleeping
| """Hard API dead-test: 500+ calls with reward-consistency checks.""" | |
| from __future__ import annotations | |
| from typing import Any | |
| import pytest | |
| from fastapi.testclient import TestClient | |
| from ghostexec.server.app import app | |
| W_CONFLICT = 0.35 | |
| W_REL = 0.35 | |
| W_TASK = 0.30 | |
| OUTPUT_SCALE = 0.48 | |
| def _step_payload_for(i: int) -> dict[str, Any]: | |
| templates: list[dict[str, Any]] = [ | |
| {"action": {"action_type": "do_nothing"}}, | |
| {"action": {"action_type": "reply_email", "email_id": "e01", "message_body": "On it now."}}, | |
| {"action": {"action_type": "reply_email", "email_id": "e14", "message_body": "Acknowledged."}}, | |
| {"action": {"action_type": "reply_email", "email_id": "nope_999", "message_body": "x"}}, | |
| {"action": {"action_type": "archive_email", "email_id": "e09"}}, | |
| {"action": {"action_type": "archive_email", "email_id": "bad_id"}}, | |
| { | |
| "action": { | |
| "action_type": "reschedule_meeting", | |
| "meeting_id": "m02", | |
| "new_time": "2026-04-21T18:00:00", | |
| } | |
| }, | |
| { | |
| "action": { | |
| "action_type": "reschedule_meeting", | |
| "meeting_id": "m03", | |
| "new_time": "2026-04-21T09:30:00", # overlap -> invalid semantic | |
| } | |
| }, | |
| {"action": {"action_type": "cancel_meeting", "meeting_id": "m10", "reason": "dead test"}}, | |
| {"action": {"action_type": "cancel_meeting", "meeting_id": "m99", "reason": "dead test"}}, | |
| {"action": {"action_type": "complete_task", "task_id": "t07"}}, | |
| {"action": {"action_type": "complete_task", "task_id": "t09"}}, # already done | |
| {"action": {"action_type": "delegate_task", "task_id": "t08", "contact_name": "Jordan Lee"}}, | |
| {"action": {"action_type": "delegate_task", "task_id": "t08", "contact_name": "Nobody"}}, | |
| { | |
| "action": { | |
| "action_type": "send_message", | |
| "contact_name": "Jamie Liu", | |
| "message_body": "Quick sync please.", | |
| } | |
| }, | |
| { | |
| "action": { | |
| "action_type": "send_message", | |
| "contact_name": "Nobody", | |
| "message_body": "hello", | |
| } | |
| }, | |
| ] | |
| return templates[i % len(templates)] | |
| def client() -> TestClient: | |
| return TestClient(app, raise_server_exceptions=True) | |
| def test_api_surface_all_endpoints(client: TestClient) -> None: | |
| # Core GET endpoints. | |
| for path in ("/health", "/metadata", "/state", "/schema", "/openapi.json", "/docs", "/redoc"): | |
| r = client.get(path) | |
| assert r.status_code == 200, f"{path} -> {r.status_code}" | |
| # Control routes: method contracts. | |
| assert client.get("/reset").status_code == 405 | |
| assert client.get("/step").status_code == 405 | |
| assert client.put("/reset", json={}).status_code in (405, 422) | |
| assert client.get("/this-path-should-not-exist-ghostexec").status_code == 404 | |
| # Reset variants. | |
| for body in ({}, {"seed": 42}, {"episode_id": "dead-api-001"}, {"seed": 1, "future_field": True}): | |
| rr = client.post("/reset", json=body) | |
| assert rr.status_code == 200 | |
| j = rr.json() | |
| assert "observation" in j and "done" in j | |
| # MCP endpoint variants. | |
| mcp_ok = client.post( | |
| "/mcp", | |
| json={"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}}, | |
| ) | |
| assert mcp_ok.status_code == 200 | |
| mcp_bad_json = client.post("/mcp", content="{", headers={"Content-Type": "application/json"}) | |
| assert mcp_bad_json.status_code == 200 | |
| def test_api_reward_dead_520_cases(client: TestClient, idx: int) -> None: | |
| # Keep each case independent and deterministic. | |
| rr = client.post("/reset", json={"episode_id": f"dead-{idx:04d}", "seed": 42}) | |
| assert rr.status_code == 200 | |
| payload = _step_payload_for(idx) | |
| rs = client.post("/step", json=payload) | |
| assert rs.status_code == 200, f"idx={idx} payload={payload} status={rs.status_code}" | |
| body = rs.json() | |
| assert "observation" in body and "reward" in body and "done" in body | |
| obs = body["observation"] | |
| meta = obs.get("metadata") or {} | |
| bd = meta.get("reward_breakdown") or {} | |
| # Structural contracts. | |
| assert isinstance(obs.get("echoed_message", ""), str) and obs.get("echoed_message") | |
| assert "step_ok" in meta | |
| assert "step_detail" in meta | |
| assert "final" in bd | |
| assert "weighted_base" in bd | |
| # Reward identity: top-level reward must equal breakdown.final. | |
| reward = float(body["reward"]) | |
| final = float(bd["final"]) | |
| assert reward == pytest.approx(final, abs=1e-9) | |
| # Aggregation formula must hold exactly (within floating tolerance). | |
| conflict = float(bd.get("conflict", 0.0)) | |
| relationship = float(bd.get("relationship", 0.0)) | |
| task = float(bd.get("task", 0.0)) | |
| weighted_inner = W_CONFLICT * conflict + W_REL * relationship + W_TASK * task | |
| expected_weighted = OUTPUT_SCALE * weighted_inner | |
| assert float(bd["weighted_base"]) == pytest.approx(expected_weighted, abs=1e-9) | |
| expected_final = ( | |
| float(bd.get("weighted_base", 0.0)) | |
| + float(bd.get("invalid_step_adjustment", 0.0)) | |
| + float(bd.get("episode_completion_bonus", 0.0)) | |
| + float(bd.get("catastrophic_penalty", 0.0)) | |
| + float(bd.get("do_nothing_floor", 0.0)) | |
| ) | |
| assert final == pytest.approx(expected_final, abs=1e-9) | |
| action_type = payload["action"]["action_type"] | |
| if action_type == "do_nothing": | |
| assert float(bd.get("do_nothing_floor", 0.0)) == pytest.approx(-0.15, abs=1e-12) | |
| assert reward < 0 | |
| if meta.get("step_ok") is False: | |
| assert float(bd.get("invalid_step_adjustment", 0.0)) == pytest.approx(-0.25, abs=1e-12) | |