import json import os from fastapi.testclient import TestClient import server from server import app client = TestClient(app) def test_server_health(): response = client.get("/healthz") assert response.status_code == 200 assert response.json()["status"] == "ok" def test_server_health_alias(): response = client.get("/health") assert response.status_code == 200 assert response.json()["status"] == "ok" def test_server_environment_metadata(): response = client.get("/api/environment") assert response.status_code == 200 body = response.json() assert "action_space" in body assert "observation_space" in body assert "summary" in body def test_openenv_spec_and_tasks_endpoints(): spec = client.get("/openenv.yaml") assert spec.status_code == 200 assert "reset" in spec.text tasks = client.get("/openenv/tasks") assert tasks.status_code == 200 body = tasks.json() assert len(body) >= 3 assert {"task_id", "task_type", "question", "difficulty"} <= set(body[0].keys()) def test_openenv_reset_step_and_state_cycle(): reset = client.post("/openenv/reset", json={"task_index": 0}) assert reset.status_code == 200 body = reset.json() session_id = body["session_id"] assert body["done"] is False assert "question" in body["observation"]["task"] state = client.get(f"/openenv/state/{session_id}") assert state.status_code == 200 assert state.json()["session_id"] == session_id step = client.post( "/openenv/step", json={ "session_id": session_id, "action_type": "ANSWER", "payload": {"answer": "unknown"}, }, ) assert step.status_code == 200 step_body = step.json() assert step_body["session_id"] == session_id assert step_body["done"] is True assert "task_answer" in step_body["info"] def test_openenv_reset_accepts_empty_body(): reset = client.post("/openenv/reset") assert reset.status_code == 200 body = reset.json() assert body["done"] is False assert "session_id" in body def test_openenv_reset_accepts_empty_json_body(): reset = client.post( "/openenv/reset", data="", headers={"Content-Type": "application/json"}, ) assert reset.status_code == 200 body = reset.json() assert body["done"] is False assert "session_id" in body def test_openenv_reset_trailing_slash_post_returns_json(): reset = client.post( "/openenv/reset/", data="", headers={"Content-Type": "application/json"}, ) assert reset.status_code == 200 body = reset.json() assert body["done"] is False assert "session_id" in body def test_openenv_step_accepts_nested_action_payload(): reset = client.post("/openenv/reset", json={"task_index": 0}) assert reset.status_code == 200 session_id = reset.json()["session_id"] step = client.post( "/openenv/step", json={ "session_id": session_id, "action": { "action_type": "ANSWER", "payload": {"answer": "unknown"}, }, }, ) assert step.status_code == 200 assert step.json()["done"] is True def test_step_alias_uses_latest_session_when_session_id_missing(): reset = client.post("/reset", json={"task_index": 0}) assert reset.status_code == 200 session_id = reset.json()["session_id"] step = client.post( "/step", json={ "action_type": "ANSWER", "payload": {"answer": "unknown"}, }, ) assert step.status_code == 200 body = step.json() assert body["session_id"] == session_id assert body["done"] is True def test_state_alias_returns_latest_session(): reset = client.post("/reset", json={"task_index": 0}) assert reset.status_code == 200 session_id = reset.json()["session_id"] state = client.get("/state") assert state.status_code == 200 body = state.json() assert body["session_id"] == session_id assert "task" in body["observation"] def test_report_inference_updates_latest_evaluation_and_dashboard(tmp_path, monkeypatch): latest_evaluation = tmp_path / "latest_evaluation.json" space_dashboard = tmp_path / "space_dashboard.html" monkeypatch.setattr(server, "LATEST_EVALUATION_OUTPUT", latest_evaluation) monkeypatch.setattr(server, "SPACE_DASHBOARD", space_dashboard) monkeypatch.setattr(server, "load_leaderboard", lambda path: []) monkeypatch.setattr(server, "export_dashboard", lambda env, evaluation, leaderboard_records, output_path: str(space_dashboard)) response = client.post( "/openenv/report_inference", json={ "run": {"name": "inference_py_run"}, "summary": {"leaderboard_score": 0.75, "task_success_rate": 1.0}, "episodes": [ { "task_id": "seed_task_0", "agent_answer": "user_bharat", "graph_f1": 0.5, "reward": 1.2, "steps": 5, "tool_calls": 4, "success": 1, } ], }, ) assert response.status_code == 200 body = response.json() assert body["status"] == "ok" assert latest_evaluation.exists() stored = json.loads(latest_evaluation.read_text(encoding="utf-8")) assert stored["summary"]["leaderboard_score"] == 0.75 assert stored["episodes"][0]["task_id"] == "seed_task_0" assert stored["episodes"][0]["truth_edges"] def test_space_snapshot_prefers_newer_evaluation_payload(tmp_path, monkeypatch): baseline_path = tmp_path / "baseline.json" evaluation_path = tmp_path / "evaluation.json" baseline_dashboard = tmp_path / "baseline_dashboard.html" space_dashboard = tmp_path / "space_dashboard.html" baseline_path.write_text( json.dumps( { "run": {"dashboard_path": str(baseline_dashboard)}, "summary": {"leaderboard_score": 0.1, "task_success_rate": 0.1}, } ), encoding="utf-8", ) baseline_dashboard.write_text("baseline", encoding="utf-8") evaluation_path.write_text( json.dumps({"summary": {"leaderboard_score": 0.9, "task_success_rate": 0.9}, "episodes": []}), encoding="utf-8", ) space_dashboard.write_text("space", encoding="utf-8") os.utime(evaluation_path, (baseline_path.stat().st_atime + 5, baseline_path.stat().st_mtime + 5)) monkeypatch.setattr(server, "LATEST_BASELINE_OUTPUT", baseline_path) monkeypatch.setattr(server, "LATEST_EVALUATION_OUTPUT", evaluation_path) monkeypatch.setattr(server, "SPACE_DASHBOARD", space_dashboard) monkeypatch.setattr( server, "_base_environment_snapshot", lambda: { "task_count": 30, "difficulty_counts": {}, "action_space": ["CALL_TOOL", "ADD_EDGE", "ANSWER"], "observation_space": {}, "task_types": [], "config": {}, }, ) monkeypatch.setattr(server, "_build_environment", lambda: object()) monkeypatch.setattr(server, "export_dashboard", lambda env, evaluation, leaderboard_records, output_path: str(space_dashboard)) snapshot = server._space_snapshot() assert snapshot["source"] == "latest_evaluation" assert snapshot["summary"]["leaderboard_score"] == 0.9 assert snapshot["dashboard_path"] == str(space_dashboard)