"""Integration tests — exercise multi-step user flows end-to-end.""" from __future__ import annotations import base64 import io import numpy as np import pytest from fastapi.testclient import TestClient from PIL import Image from signbridge.backend import app from signbridge.space import _capture_sign, _clear, _new_session, _speak @pytest.fixture() def client() -> TestClient: return TestClient(app) def _frame(rgb: tuple[int, int, int] = (180, 200, 160), size: int = 96) -> np.ndarray: return np.full((size, size, 3), rgb, dtype=np.uint8) def _frame_b64(rgb: tuple[int, int, int] = (180, 200, 160), size: int = 96) -> str: arr = _frame(rgb, size) img = Image.fromarray(arr) buf = io.BytesIO() img.save(buf, format="JPEG", quality=80) return base64.b64encode(buf.getvalue()).decode("ascii") class TestUserFlowFingerspell: """User fingerspells L-U-C-A-S then presses Speak.""" def test_via_space_helpers(self, monkeypatch: pytest.MonkeyPatch) -> None: # Stub VLM to return one letter at a time. from signbridge.recognizer import vlm responses = iter(["L", "U", "C", "A", "S"]) class _Resp: def __init__(self, c: str) -> None: self.choices = [type("C", (), {"message": type("M", (), {"content": c})()})()] class _FakeClient: class chat: # noqa: N801 class completions: # noqa: N801 @staticmethod def create(**_: object) -> _Resp: return _Resp(next(responses)) monkeypatch.setattr(vlm, "_resolve_client", lambda: (_FakeClient(), "test")) state = _new_session() for _ in range(5): _, _, state = _capture_sign(_frame(), state) assert state.sign_history == ["L", "U", "C", "A", "S"] sentence, audio_path, state = _speak(state) # Composer fallback (no API keys for composer in this test) → naive joiner assert "Lucas" in sentence assert audio_path # silent-stub WAV exists def test_via_backend_endpoints(self, client: TestClient) -> None: # Direct multi-step flow over HTTP, exercising every endpoint. for _letter in "LUCAS": r = client.post("/recognize", json={"frame": _frame_b64()}) assert r.status_code == 200 # No API keys → token is "" but endpoint succeeds. assert r.json()["token"] == "" # Compose a manually-curated sequence r = client.post("/compose", json={"signs": ["L", "U", "C", "A", "S"]}) assert r.status_code == 200 assert "Lucas" in r.json()["sentence"] # Speak r = client.post("/speak", json={"text": "My name is Lucas."}) assert r.status_code == 200 assert len(r.content) > 0 class TestClearResetsCleanly: def test_full_round_trip(self, monkeypatch: pytest.MonkeyPatch) -> None: from signbridge.recognizer import vlm class _FakeClient: class chat: # noqa: N801 class completions: # noqa: N801 @staticmethod def create(**_: object): return type( "R", (), { "choices": [ type( "C", (), {"message": type("M", (), {"content": "hello"})()}, )() ] }, )() monkeypatch.setattr(vlm, "_resolve_client", lambda: (_FakeClient(), "test")) state = _new_session() _, _, state = _capture_sign(_frame(), state) _, _, state = _capture_sign(_frame(), state) assert state.sign_history == ["hello", "hello"] sentence, audio, state = _speak(state) assert sentence assert audio latest, history, sentence_box, audio_out, state = _clear(state) assert state.sign_history == [] assert state.last_sentence == "" assert state.last_audio_path is None assert "no signs" in history.lower() class TestEdgeCases: def test_huge_sign_sequence(self, client: TestClient) -> None: # 200 fingerspelled letters — make sure compose endpoint doesn't crash. signs = list("ABCDEFGHIJ" * 20) r = client.post("/compose", json={"signs": signs}) assert r.status_code == 200 assert r.json()["sentence"] # non-empty def test_unicode_in_compose(self, client: TestClient) -> None: # Synthetic unicode token should pass through naive joiner unscathed. r = client.post("/compose", json={"signs": ["héllo", "wörld"]}) assert r.status_code == 200 def test_speak_very_long_text(self, client: TestClient) -> None: r = client.post("/speak", json={"text": "a " * 500}) assert r.status_code == 200 def test_recognize_jpeg_with_data_url_jpg(self, client: TestClient) -> None: b64 = _frame_b64() r = client.post( "/recognize", json={"frame": f"data:image/jpg;base64,{b64}"} ) # Slightly malformed data URL (jpg vs jpeg) — should still work via tolerant decoder. assert r.status_code == 200 def test_recognize_png_frame(self, client: TestClient) -> None: arr = _frame() img = Image.fromarray(arr) buf = io.BytesIO() img.save(buf, format="PNG") b64 = base64.b64encode(buf.getvalue()).decode("ascii") r = client.post("/recognize", json={"frame": b64}) assert r.status_code == 200 def test_compose_with_only_punctuation_glosses(self, client: TestClient) -> None: # Tokens that are 1 char, lowercase letters — should not be misread as fingerspelling. r = client.post("/compose", json={"signs": ["a", "b"]}) assert r.status_code == 200 # Naive joiner only treats UPPERCASE single letters as fingerspelling. # Lowercase 'a' / 'b' are full glosses → should appear with a space, no concat. assert r.json()["sentence"] == "A b." def test_health_after_recognize_failure(self, client: TestClient) -> None: # Even after a 400, /healthz should still respond. client.post("/recognize", json={"frame": "%%%bad%%%"}) r = client.get("/healthz") assert r.status_code == 200 class TestBackendInfoEndpoint: def test_info_reflects_env(self, client: TestClient, monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setenv("SIGNBRIDGE_PROVIDER", "openai") monkeypatch.setenv( "SIGNBRIDGE_COMPOSER_MODEL", "meta-llama/Llama-3.1-8B-Instruct" ) r = client.get("/info") assert r.status_code == 200 body = r.json() assert body["provider"] == "openai" assert body["composer_model"].endswith("Llama-3.1-8B-Instruct")