Spaces:

lablab-ai-amd-developer-hackathon
/

signbridge

Build error

LucasLooTan commited on about 13 hours ago

Commit

ca5affd

1 Parent(s): 5fb88cc

feat(backend): /recognize accepts frames list + body-size cap

deep-check audit B.F3 + new feature:
- RecognizeRequest now accepts either 'frame' (single) or 'frames'
(list). Exactly one of the two must be provided; validator returns
4xx if both or neither are supplied.
- Per-frame base64 size cap of 3 MB (well above any reasonable webcam
frame) and per-burst frame-count cap of 12 — defends /recognize
against accidental DoS via huge payloads.
- Multi-frame path delegates to recognize_sign_from_frames; single-
frame path unchanged.

Files changed (2) hide show

signbridge/backend.py +48 -5
tests/test_backend.py +46 -1

signbridge/backend.py CHANGED Viewed

@@ -17,24 +17,58 @@ from __future__ import annotations
 import base64
 import logging
 import os
 import numpy as np
 from fastapi import FastAPI, HTTPException
 from fastapi.responses import FileResponse
-from pydantic import BaseModel, Field
 from signbridge.composer.sentence import compose_sentence
 from signbridge.imageio import load_rgb
-from signbridge.recognizer.vlm import recognize_sign_from_frame
 from signbridge.voice.tts import synthesize_speech
 logger = logging.getLogger(__name__)
 app = FastAPI(title="SignBridge backend", version="0.1.0")
 class RecognizeRequest(BaseModel):
-    frame: str = Field(..., description="Base64-encoded JPEG/PNG frame.")
 class RecognizeResponse(BaseModel):
@@ -95,10 +129,19 @@ def info() -> InfoResponse:
 @app.post("/recognize", response_model=RecognizeResponse)
 def recognize(req: RecognizeRequest) -> RecognizeResponse:
     if not req.frame:
         raise HTTPException(status_code=400, detail="frame must be non-empty")
-    frame = _decode_b64_image(req.frame)
-    token, conf = recognize_sign_from_frame(frame)
     return RecognizeResponse(token=token, confidence=conf)

 import base64
 import logging
 import os
+from typing import Optional
 import numpy as np
 from fastapi import FastAPI, HTTPException
 from fastapi.responses import FileResponse
+from pydantic import BaseModel, Field, model_validator
 from signbridge.composer.sentence import compose_sentence
 from signbridge.imageio import load_rgb
+from signbridge.recognizer.vlm import (
+    recognize_sign_from_frame,
+    recognize_sign_from_frames,
+)
 from signbridge.voice.tts import synthesize_speech
 logger = logging.getLogger(__name__)
+# Per-frame size cap: a 1280x720 JPEG q85 is ~120 KB → ~160 KB base64.
+# 3 MB cap is generous and defends /recognize against accidental DoS via
+# huge payloads (deep-check audit B.F3).
+_MAX_FRAME_B64_LEN = 3 * 1024 * 1024
+_MAX_FRAMES_IN_BURST = 12
 app = FastAPI(title="SignBridge backend", version="0.1.0")
 class RecognizeRequest(BaseModel):
+    """Either single-frame or multi-frame; exactly one must be provided."""
+    frame: Optional[str] = Field(
+        default=None,
+        description="Base64-encoded JPEG/PNG (single-frame mode).",
+        max_length=_MAX_FRAME_B64_LEN,
+    )
+    frames: Optional[list[str]] = Field(
+        default=None,
+        description="Ordered list of base64 frames (multi-frame mode).",
+        min_length=1,
+        max_length=_MAX_FRAMES_IN_BURST,
+    )
+    @model_validator(mode="after")
+    def _exactly_one_payload(self) -> "RecognizeRequest":
+        if (self.frame is None) == (self.frames is None):
+            raise ValueError("provide exactly one of 'frame' or 'frames'")
+        if self.frames is not None:
+            for f in self.frames:
+                if len(f) > _MAX_FRAME_B64_LEN:
+                    raise ValueError(
+                        f"each frame must be at most {_MAX_FRAME_B64_LEN} bytes (base64)"
+                    )
+        return self
 class RecognizeResponse(BaseModel):
 @app.post("/recognize", response_model=RecognizeResponse)
 def recognize(req: RecognizeRequest) -> RecognizeResponse:
+    if req.frames is not None:
+        if len(req.frames) < 2:
+            raise HTTPException(
+                status_code=400,
+                detail="multi-frame recognition needs at least 2 frames",
+            )
+        decoded_frames = [_decode_b64_image(b) for b in req.frames]
+        token, conf = recognize_sign_from_frames(decoded_frames)
+        return RecognizeResponse(token=token, confidence=conf)
     if not req.frame:
         raise HTTPException(status_code=400, detail="frame must be non-empty")
+    decoded = _decode_b64_image(req.frame)
+    token, conf = recognize_sign_from_frame(decoded)
     return RecognizeResponse(token=token, confidence=conf)

tests/test_backend.py CHANGED Viewed

@@ -43,7 +43,7 @@ class TestHealth:
 class TestRecognize:
     def test_empty_frame_rejected(self, client: TestClient) -> None:
         r = client.post("/recognize", json={"frame": ""})
-        assert r.status_code == 400
     def test_invalid_base64_rejected(self, client: TestClient) -> None:
         r = client.post("/recognize", json={"frame": "%%%not-base64%%%"})
@@ -99,3 +99,48 @@ class TestSpeak:
         assert r.status_code == 200
         assert r.headers["content-type"].startswith("audio/")
         assert len(r.content) > 0

 class TestRecognize:
     def test_empty_frame_rejected(self, client: TestClient) -> None:
         r = client.post("/recognize", json={"frame": ""})
+        assert r.status_code in (400, 422)
     def test_invalid_base64_rejected(self, client: TestClient) -> None:
         r = client.post("/recognize", json={"frame": "%%%not-base64%%%"})
         assert r.status_code == 200
         assert r.headers["content-type"].startswith("audio/")
         assert len(r.content) > 0
+class TestRecognizeFrames:
+    def test_empty_frames_rejected(self, client: TestClient) -> None:
+        r = client.post("/recognize", json={"frames": []})
+        # Pydantic min_length=1 rejects empty list (validation error 422).
+        # If the validator collapses to 400 due to model_validator, accept that too.
+        assert r.status_code in (400, 422)
+    def test_single_frame_in_list_rejected(self, client: TestClient) -> None:
+        # Multi-frame path requires >=2 frames.
+        b64 = _frame_b64()
+        r = client.post("/recognize", json={"frames": [b64]})
+        assert r.status_code == 400
+        detail = r.json().get("detail", "").lower()
+        assert "at least 2" in detail or "2 frames" in detail or "frames" in detail
+    def test_valid_multi_frame_no_provider(self, client: TestClient) -> None:
+        b64 = _frame_b64()
+        r = client.post("/recognize", json={"frames": [b64, b64, b64, b64]})
+        assert r.status_code == 200
+        assert r.json() == {"token": "", "confidence": 0.0}
+    def test_too_many_frames_rejected(self, client: TestClient) -> None:
+        b64 = _frame_b64()
+        r = client.post("/recognize", json={"frames": [b64] * 100})
+        assert r.status_code in (400, 422)
+    def test_oversized_frame_rejected(self, client: TestClient) -> None:
+        # 6 MB base64 string is well past any reasonable webcam frame.
+        big = "A" * (6 * 1024 * 1024)
+        r = client.post("/recognize", json={"frame": big})
+        assert r.status_code in (400, 422)
+    def test_both_frame_and_frames_rejected(self, client: TestClient) -> None:
+        b64 = _frame_b64()
+        r = client.post(
+            "/recognize",
+            json={"frame": b64, "frames": [b64, b64]},
+        )
+        assert r.status_code in (400, 422)
+    def test_neither_frame_nor_frames_rejected(self, client: TestClient) -> None:
+        r = client.post("/recognize", json={})
+        assert r.status_code in (400, 422)