Spaces:
Sleeping
Sleeping
File size: 5,834 Bytes
ff293b1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | """Hard API dead-test: 500+ calls with reward-consistency checks."""
from __future__ import annotations
from typing import Any
import pytest
from fastapi.testclient import TestClient
from ghostexec.server.app import app
W_CONFLICT = 0.35
W_REL = 0.35
W_TASK = 0.30
OUTPUT_SCALE = 0.48
def _step_payload_for(i: int) -> dict[str, Any]:
templates: list[dict[str, Any]] = [
{"action": {"action_type": "do_nothing"}},
{"action": {"action_type": "reply_email", "email_id": "e01", "message_body": "On it now."}},
{"action": {"action_type": "reply_email", "email_id": "e14", "message_body": "Acknowledged."}},
{"action": {"action_type": "reply_email", "email_id": "nope_999", "message_body": "x"}},
{"action": {"action_type": "archive_email", "email_id": "e09"}},
{"action": {"action_type": "archive_email", "email_id": "bad_id"}},
{
"action": {
"action_type": "reschedule_meeting",
"meeting_id": "m02",
"new_time": "2026-04-21T18:00:00",
}
},
{
"action": {
"action_type": "reschedule_meeting",
"meeting_id": "m03",
"new_time": "2026-04-21T09:30:00", # overlap -> invalid semantic
}
},
{"action": {"action_type": "cancel_meeting", "meeting_id": "m10", "reason": "dead test"}},
{"action": {"action_type": "cancel_meeting", "meeting_id": "m99", "reason": "dead test"}},
{"action": {"action_type": "complete_task", "task_id": "t07"}},
{"action": {"action_type": "complete_task", "task_id": "t09"}}, # already done
{"action": {"action_type": "delegate_task", "task_id": "t08", "contact_name": "Jordan Lee"}},
{"action": {"action_type": "delegate_task", "task_id": "t08", "contact_name": "Nobody"}},
{
"action": {
"action_type": "send_message",
"contact_name": "Jamie Liu",
"message_body": "Quick sync please.",
}
},
{
"action": {
"action_type": "send_message",
"contact_name": "Nobody",
"message_body": "hello",
}
},
]
return templates[i % len(templates)]
@pytest.fixture(scope="module")
def client() -> TestClient:
return TestClient(app, raise_server_exceptions=True)
def test_api_surface_all_endpoints(client: TestClient) -> None:
# Core GET endpoints.
for path in ("/health", "/metadata", "/state", "/schema", "/openapi.json", "/docs", "/redoc"):
r = client.get(path)
assert r.status_code == 200, f"{path} -> {r.status_code}"
# Control routes: method contracts.
assert client.get("/reset").status_code == 405
assert client.get("/step").status_code == 405
assert client.put("/reset", json={}).status_code in (405, 422)
assert client.get("/this-path-should-not-exist-ghostexec").status_code == 404
# Reset variants.
for body in ({}, {"seed": 42}, {"episode_id": "dead-api-001"}, {"seed": 1, "future_field": True}):
rr = client.post("/reset", json=body)
assert rr.status_code == 200
j = rr.json()
assert "observation" in j and "done" in j
# MCP endpoint variants.
mcp_ok = client.post(
"/mcp",
json={"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}},
)
assert mcp_ok.status_code == 200
mcp_bad_json = client.post("/mcp", content="{", headers={"Content-Type": "application/json"})
assert mcp_bad_json.status_code == 200
@pytest.mark.parametrize("idx", range(520))
def test_api_reward_dead_520_cases(client: TestClient, idx: int) -> None:
# Keep each case independent and deterministic.
rr = client.post("/reset", json={"episode_id": f"dead-{idx:04d}", "seed": 42})
assert rr.status_code == 200
payload = _step_payload_for(idx)
rs = client.post("/step", json=payload)
assert rs.status_code == 200, f"idx={idx} payload={payload} status={rs.status_code}"
body = rs.json()
assert "observation" in body and "reward" in body and "done" in body
obs = body["observation"]
meta = obs.get("metadata") or {}
bd = meta.get("reward_breakdown") or {}
# Structural contracts.
assert isinstance(obs.get("echoed_message", ""), str) and obs.get("echoed_message")
assert "step_ok" in meta
assert "step_detail" in meta
assert "final" in bd
assert "weighted_base" in bd
# Reward identity: top-level reward must equal breakdown.final.
reward = float(body["reward"])
final = float(bd["final"])
assert reward == pytest.approx(final, abs=1e-9)
# Aggregation formula must hold exactly (within floating tolerance).
conflict = float(bd.get("conflict", 0.0))
relationship = float(bd.get("relationship", 0.0))
task = float(bd.get("task", 0.0))
weighted_inner = W_CONFLICT * conflict + W_REL * relationship + W_TASK * task
expected_weighted = OUTPUT_SCALE * weighted_inner
assert float(bd["weighted_base"]) == pytest.approx(expected_weighted, abs=1e-9)
expected_final = (
float(bd.get("weighted_base", 0.0))
+ float(bd.get("invalid_step_adjustment", 0.0))
+ float(bd.get("episode_completion_bonus", 0.0))
+ float(bd.get("catastrophic_penalty", 0.0))
+ float(bd.get("do_nothing_floor", 0.0))
)
assert final == pytest.approx(expected_final, abs=1e-9)
action_type = payload["action"]["action_type"]
if action_type == "do_nothing":
assert float(bd.get("do_nothing_floor", 0.0)) == pytest.approx(-0.15, abs=1e-12)
assert reward < 0
if meta.get("step_ok") is False:
assert float(bd.get("invalid_step_adjustment", 0.0)) == pytest.approx(-0.25, abs=1e-12)
|