"""Tests for the VLM-based sign recognizer.""" from __future__ import annotations import numpy as np import pytest from signbridge.recognizer.vlm import ( _frame_to_data_url, _normalise, _resolve_client, recognize_sign_from_frame, ) class TestNormalise: def test_letter_uppercased(self) -> None: assert _normalise("a") == "A" def test_already_upper_letter(self) -> None: assert _normalise("Z") == "Z" def test_digit_kept_as_string(self) -> None: assert _normalise("7") == "7" def test_word_lowercased(self) -> None: assert _normalise("HELLO") == "hello" def test_strips_quotes(self) -> None: assert _normalise('"hello"') == "hello" def test_strips_punctuation(self) -> None: assert _normalise("hello.") == "hello" def test_takes_first_token_of_multi(self) -> None: assert _normalise("hello world") == "hello" def test_empty(self) -> None: assert _normalise("") == "" def test_underscore_preserved(self) -> None: assert _normalise("thank_you") == "thank_you" class TestResolveClient: def test_no_keys_returns_none(self) -> None: # conftest cleared env; default provider 'amd' but no creds → None client, _ = _resolve_client() assert client is None def test_unknown_provider(self, monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setenv("SIGNBRIDGE_PROVIDER", "garbage") client, _ = _resolve_client() assert client is None def test_amd_with_creds(self, monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setenv("SIGNBRIDGE_PROVIDER", "amd") monkeypatch.setenv("AMD_DEV_CLOUD_BASE_URL", "https://example.invalid/v1") monkeypatch.setenv("AMD_DEV_CLOUD_API_KEY", "test-key") client, model = _resolve_client() assert client is not None assert "Qwen" in model or "Llama" in model def test_openai_with_creds(self, monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setenv("SIGNBRIDGE_PROVIDER", "openai") monkeypatch.setenv("OPENAI_API_KEY", "test-key") client, model = _resolve_client() assert client is not None # When we fall to OpenAI we use a smaller default assert model.startswith("gpt-") or "Qwen" in model class TestFrameToDataUrl: def test_valid_jpeg_data_url(self) -> None: frame = np.full((32, 32, 3), 128, dtype=np.uint8) url = _frame_to_data_url(frame) assert url.startswith("data:image/jpeg;base64,") # Body should decode without error import base64 body = url.split(",", 1)[1] raw = base64.b64decode(body) assert len(raw) > 0 class TestRecognizeSignFromFrame: def test_no_client_returns_empty(self) -> None: # No env keys; should return ("", 0.0) frame = np.zeros((32, 32, 3), dtype=np.uint8) token, conf = recognize_sign_from_frame(frame) assert token == "" assert conf == 0.0 def test_with_mock_client(self, monkeypatch: pytest.MonkeyPatch) -> None: from signbridge.recognizer import vlm class _FakeChoice: def __init__(self, content: str) -> None: self.message = type("M", (), {"content": content})() class _FakeResp: def __init__(self, content: str) -> None: self.choices = [_FakeChoice(content)] class _FakeClient: class chat: # noqa: N801 class completions: # noqa: N801 @staticmethod def create(**_: object) -> _FakeResp: return _FakeResp("A") monkeypatch.setattr(vlm, "_resolve_client", lambda: (_FakeClient(), "test")) frame = np.full((32, 32, 3), 200, dtype=np.uint8) token, conf = recognize_sign_from_frame(frame) assert token == "A" assert conf == 0.85 def test_unknown_token_returns_empty(self, monkeypatch: pytest.MonkeyPatch) -> None: from signbridge.recognizer import vlm class _FakeResp: def __init__(self) -> None: self.choices = [ type("C", (), {"message": type("M", (), {"content": "unknown"})()})() ] class _FakeClient: class chat: # noqa: N801 class completions: # noqa: N801 @staticmethod def create(**_: object) -> _FakeResp: return _FakeResp() monkeypatch.setattr(vlm, "_resolve_client", lambda: (_FakeClient(), "test")) frame = np.zeros((32, 32, 3), dtype=np.uint8) token, conf = recognize_sign_from_frame(frame) assert token == "" assert conf == 0.0 def test_provider_failure_returns_empty(self, monkeypatch: pytest.MonkeyPatch) -> None: from signbridge.recognizer import vlm class _FailingClient: class chat: # noqa: N801 class completions: # noqa: N801 @staticmethod def create(**_: object) -> object: raise RuntimeError("boom") monkeypatch.setattr(vlm, "_resolve_client", lambda: (_FailingClient(), "test")) frame = np.zeros((32, 32, 3), dtype=np.uint8) token, conf = recognize_sign_from_frame(frame) assert token == "" assert conf == 0.0 class TestRecognizeSignFromFrames: def test_too_few_frames_raises(self): from signbridge.recognizer.vlm import recognize_sign_from_frames with pytest.raises(ValueError): recognize_sign_from_frames([]) with pytest.raises(ValueError): recognize_sign_from_frames([np.zeros((32, 32, 3), dtype=np.uint8)]) def test_no_client_returns_empty(self): from signbridge.recognizer.vlm import recognize_sign_from_frames frames = [np.full((32, 32, 3), 200, dtype=np.uint8) for _ in range(4)] token, conf = recognize_sign_from_frames(frames) assert token == "" assert conf == 0.0 def test_with_mock_client(self, monkeypatch: pytest.MonkeyPatch) -> None: from signbridge.recognizer import vlm captured: dict = {} class _FakeChoice: def __init__(self, content: str) -> None: self.message = type("M", (), {"content": content})() class _FakeResp: def __init__(self, content: str) -> None: self.choices = [_FakeChoice(content)] class _FakeClient: class chat: # noqa: N801 class completions: # noqa: N801 @staticmethod def create(**kwargs: object) -> _FakeResp: captured.update(kwargs) return _FakeResp("hello") monkeypatch.setattr(vlm, "_resolve_client", lambda: (_FakeClient(), "test")) frames = [np.full((32, 32, 3), 100 + i, dtype=np.uint8) for i in range(4)] token, conf = vlm.recognize_sign_from_frames(frames) assert token == "hello" assert conf == 0.85 # Verify multi-image payload shape: 1 message with 1 text + 4 image_urls msgs = captured["messages"] assert len(msgs) == 1 content = msgs[0]["content"] assert sum(1 for c in content if c["type"] == "text") == 1 assert sum(1 for c in content if c["type"] == "image_url") == 4 def test_off_vocab_token_suppressed(self, monkeypatch: pytest.MonkeyPatch) -> None: from signbridge.recognizer import vlm class _FakeClient: class chat: # noqa: N801 class completions: # noqa: N801 @staticmethod def create(**_: object) -> object: return type( "R", (), { "choices": [ type( "C", (), {"message": type("M", (), {"content": "fingerspelling"})()}, )() ] }, )() monkeypatch.setattr(vlm, "_resolve_client", lambda: (_FakeClient(), "test")) frames = [np.full((32, 32, 3), 100, dtype=np.uint8) for _ in range(4)] token, conf = vlm.recognize_sign_from_frames(frames) # 'fingerspelling' is not in VOCAB_SET → suppressed assert token == "" assert conf == 0.0 def test_provider_failure_returns_empty(self, monkeypatch: pytest.MonkeyPatch) -> None: from signbridge.recognizer import vlm class _FailingClient: class chat: # noqa: N801 class completions: # noqa: N801 @staticmethod def create(**_: object) -> object: raise RuntimeError("boom") monkeypatch.setattr(vlm, "_resolve_client", lambda: (_FailingClient(), "test")) frames = [np.full((32, 32, 3), 0, dtype=np.uint8) for _ in range(3)] token, conf = vlm.recognize_sign_from_frames(frames) assert token == "" assert conf == 0.0