Spaces:
Sleeping
Sleeping
File size: 6,026 Bytes
e1814ef | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 | """Phase A5 visualizer tests.
The viewer itself is read-only static HTML/JS — heavy automated testing of
the rendering is overkill (a screenshot is the manual smoke gate). These
tests guard the package contract:
- The static assets ship with the package.
- The viz sub-app mounts and serves ``viewer.html`` from ``/viz/`` plus an
``/upload-trace`` endpoint that accepts JSON.
- A real EpisodeTrace JSON parses as JSON in pure Python (proxy for "the JS
``JSON.parse`` would also accept it"); we don't depend on Node in CI.
"""
from __future__ import annotations
import json
from pathlib import Path
import pytest
from fastapi.testclient import TestClient
from ci_triage_env.env.episode import EpisodeManager
from ci_triage_env.env.server import build_app
from ci_triage_env.env.tools import ALL_TOOL_HANDLERS
from ci_triage_env.env.trace import build_trace, write_trace
from ci_triage_env.schemas.action import TerminalAction, ToolCall
from ci_triage_env.schemas.diagnosis import DiagnosisLabel
from ci_triage_env.visualizer.server import STATIC_DIR, build_visualizer_app
from tests.env.conftest import make_a2_scenario
HANDLERS = {h.name: h for h in ALL_TOOL_HANDLERS}
# ---------------------------------------------------------------------------
# Static assets are present and self-contained
# ---------------------------------------------------------------------------
def test_static_files_exist():
for fname in ("viewer.html", "viewer.js", "viewer.css"):
path = STATIC_DIR / fname
assert path.exists(), f"missing static asset: {path}"
assert path.stat().st_size > 0
def test_viewer_html_references_local_assets_only():
"""Submission must run offline: no external CDNs."""
html = (STATIC_DIR / "viewer.html").read_text()
# Local relative refs OK; reject anything that points at an external host.
for needle in ("http://", "https://"):
assert needle not in html, f"viewer.html must not pull from {needle}"
def test_viewer_total_bundle_under_50kb():
"""Phase A5 budget: keep the page weight tight."""
total = sum((STATIC_DIR / fn).stat().st_size for fn in ("viewer.html", "viewer.js", "viewer.css"))
assert total < 50 * 1024, f"static bundle is {total} bytes (cap 50KB)"
# ---------------------------------------------------------------------------
# Standalone visualizer sub-app
# ---------------------------------------------------------------------------
@pytest.fixture
def viz_client() -> TestClient:
return TestClient(build_visualizer_app())
def test_viz_serves_viewer_html(viz_client):
resp = viz_client.get("/")
assert resp.status_code == 200
assert "<title>CI-Triage Episode Replay</title>" in resp.text
def test_viz_serves_javascript(viz_client):
resp = viz_client.get("/viewer.js")
assert resp.status_code == 200
assert "renderTimeline" in resp.text
def test_viz_serves_stylesheet(viz_client):
resp = viz_client.get("/viewer.css")
assert resp.status_code == 200
assert ".timeline" in resp.text
def test_viz_upload_trace_writes_to_trace_dir(viz_client, tmp_path, monkeypatch):
monkeypatch.setenv("CI_TRIAGE_TRACE_DIR", str(tmp_path))
payload = {"hello": "world"}
files = {"file": ("episode-1.json", json.dumps(payload), "application/json")}
resp = viz_client.post("/upload-trace", files=files)
assert resp.status_code == 200, resp.text
saved = Path(resp.json()["saved_to"])
assert saved.exists()
assert json.loads(saved.read_text()) == payload
def test_viz_upload_rejects_non_json(viz_client):
files = {"file": ("trace.txt", b"not json", "text/plain")}
resp = viz_client.post("/upload-trace", files=files)
assert resp.status_code == 400
# ---------------------------------------------------------------------------
# Mounted under /viz on the main env app
# ---------------------------------------------------------------------------
def test_viz_mounted_on_main_app(env_factory):
app = build_app(env_factory=env_factory)
client = TestClient(app)
# /viz/ resolves to viewer.html via StaticFiles(html=True)
resp = client.get("/viz/")
assert resp.status_code == 200
assert "<title>CI-Triage Episode Replay</title>" in resp.text
def test_viz_can_be_disabled():
app = build_app(mount_visualizer=False)
client = TestClient(app)
resp = client.get("/viz/")
assert resp.status_code == 404
# ---------------------------------------------------------------------------
# A real EpisodeTrace JSON is parseable (sanity for the JS-side JSON.parse)
# ---------------------------------------------------------------------------
def test_real_trace_json_parses_as_object(tmp_path):
scenario = make_a2_scenario()
mgr = EpisodeManager(scenario=scenario, episode_id="ep-viz", seed=1)
mgr.apply_tool_call(
ToolCall(tool_name="read_logs", args={"scope": "test", "lines": 50}),
HANDLERS["read_logs"],
)
mgr.apply_terminal(TerminalAction(diagnosis=DiagnosisLabel.RACE_FLAKE, confidence=0.7))
written = write_trace(mgr, tmp_path)
parsed = json.loads(written.read_text())
# The viewer reads these paths — guard the contract here so a future schema
# tweak doesn't silently break the UI.
assert "episode" in parsed
assert "history" in parsed["episode"]
assert parsed["episode"]["is_terminated"] is True
assert parsed["episode"]["final_action"]["diagnosis"] == "race_flake"
assert "reward_breakdown" in parsed
# Counterfactual stays None in v1 (probe deferred to v2).
assert parsed["counterfactual_replay"] is None
def test_build_trace_is_json_serializable_round_trip():
scenario = make_a2_scenario()
mgr = EpisodeManager(scenario=scenario, episode_id="ep-viz-2", seed=2)
mgr.apply_terminal(TerminalAction(diagnosis=DiagnosisLabel.AMBIGUOUS, confidence=0.5))
trace = build_trace(mgr)
blob = trace.model_dump_json()
restored = json.loads(blob)
assert restored["episode"]["episode_id"] == "ep-viz-2"
|