LucasLooTan commited on
Commit
ca5affd
·
1 Parent(s): 5fb88cc

feat(backend): /recognize accepts frames list + body-size cap

Browse files

deep-check audit B.F3 + new feature:
- RecognizeRequest now accepts either 'frame' (single) or 'frames'
(list). Exactly one of the two must be provided; validator returns
4xx if both or neither are supplied.
- Per-frame base64 size cap of 3 MB (well above any reasonable webcam
frame) and per-burst frame-count cap of 12 — defends /recognize
against accidental DoS via huge payloads.
- Multi-frame path delegates to recognize_sign_from_frames; single-
frame path unchanged.

Files changed (2) hide show
  1. signbridge/backend.py +48 -5
  2. tests/test_backend.py +46 -1
signbridge/backend.py CHANGED
@@ -17,24 +17,58 @@ from __future__ import annotations
17
  import base64
18
  import logging
19
  import os
 
20
 
21
  import numpy as np
22
  from fastapi import FastAPI, HTTPException
23
  from fastapi.responses import FileResponse
24
- from pydantic import BaseModel, Field
25
 
26
  from signbridge.composer.sentence import compose_sentence
27
  from signbridge.imageio import load_rgb
28
- from signbridge.recognizer.vlm import recognize_sign_from_frame
 
 
 
29
  from signbridge.voice.tts import synthesize_speech
30
 
31
  logger = logging.getLogger(__name__)
32
 
 
 
 
 
 
 
33
  app = FastAPI(title="SignBridge backend", version="0.1.0")
34
 
35
 
36
  class RecognizeRequest(BaseModel):
37
- frame: str = Field(..., description="Base64-encoded JPEG/PNG frame.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
 
40
  class RecognizeResponse(BaseModel):
@@ -95,10 +129,19 @@ def info() -> InfoResponse:
95
 
96
  @app.post("/recognize", response_model=RecognizeResponse)
97
  def recognize(req: RecognizeRequest) -> RecognizeResponse:
 
 
 
 
 
 
 
 
 
98
  if not req.frame:
99
  raise HTTPException(status_code=400, detail="frame must be non-empty")
100
- frame = _decode_b64_image(req.frame)
101
- token, conf = recognize_sign_from_frame(frame)
102
  return RecognizeResponse(token=token, confidence=conf)
103
 
104
 
 
17
  import base64
18
  import logging
19
  import os
20
+ from typing import Optional
21
 
22
  import numpy as np
23
  from fastapi import FastAPI, HTTPException
24
  from fastapi.responses import FileResponse
25
+ from pydantic import BaseModel, Field, model_validator
26
 
27
  from signbridge.composer.sentence import compose_sentence
28
  from signbridge.imageio import load_rgb
29
+ from signbridge.recognizer.vlm import (
30
+ recognize_sign_from_frame,
31
+ recognize_sign_from_frames,
32
+ )
33
  from signbridge.voice.tts import synthesize_speech
34
 
35
  logger = logging.getLogger(__name__)
36
 
37
+ # Per-frame size cap: a 1280x720 JPEG q85 is ~120 KB → ~160 KB base64.
38
+ # 3 MB cap is generous and defends /recognize against accidental DoS via
39
+ # huge payloads (deep-check audit B.F3).
40
+ _MAX_FRAME_B64_LEN = 3 * 1024 * 1024
41
+ _MAX_FRAMES_IN_BURST = 12
42
+
43
  app = FastAPI(title="SignBridge backend", version="0.1.0")
44
 
45
 
46
  class RecognizeRequest(BaseModel):
47
+ """Either single-frame or multi-frame; exactly one must be provided."""
48
+
49
+ frame: Optional[str] = Field(
50
+ default=None,
51
+ description="Base64-encoded JPEG/PNG (single-frame mode).",
52
+ max_length=_MAX_FRAME_B64_LEN,
53
+ )
54
+ frames: Optional[list[str]] = Field(
55
+ default=None,
56
+ description="Ordered list of base64 frames (multi-frame mode).",
57
+ min_length=1,
58
+ max_length=_MAX_FRAMES_IN_BURST,
59
+ )
60
+
61
+ @model_validator(mode="after")
62
+ def _exactly_one_payload(self) -> "RecognizeRequest":
63
+ if (self.frame is None) == (self.frames is None):
64
+ raise ValueError("provide exactly one of 'frame' or 'frames'")
65
+ if self.frames is not None:
66
+ for f in self.frames:
67
+ if len(f) > _MAX_FRAME_B64_LEN:
68
+ raise ValueError(
69
+ f"each frame must be at most {_MAX_FRAME_B64_LEN} bytes (base64)"
70
+ )
71
+ return self
72
 
73
 
74
  class RecognizeResponse(BaseModel):
 
129
 
130
  @app.post("/recognize", response_model=RecognizeResponse)
131
  def recognize(req: RecognizeRequest) -> RecognizeResponse:
132
+ if req.frames is not None:
133
+ if len(req.frames) < 2:
134
+ raise HTTPException(
135
+ status_code=400,
136
+ detail="multi-frame recognition needs at least 2 frames",
137
+ )
138
+ decoded_frames = [_decode_b64_image(b) for b in req.frames]
139
+ token, conf = recognize_sign_from_frames(decoded_frames)
140
+ return RecognizeResponse(token=token, confidence=conf)
141
  if not req.frame:
142
  raise HTTPException(status_code=400, detail="frame must be non-empty")
143
+ decoded = _decode_b64_image(req.frame)
144
+ token, conf = recognize_sign_from_frame(decoded)
145
  return RecognizeResponse(token=token, confidence=conf)
146
 
147
 
tests/test_backend.py CHANGED
@@ -43,7 +43,7 @@ class TestHealth:
43
  class TestRecognize:
44
  def test_empty_frame_rejected(self, client: TestClient) -> None:
45
  r = client.post("/recognize", json={"frame": ""})
46
- assert r.status_code == 400
47
 
48
  def test_invalid_base64_rejected(self, client: TestClient) -> None:
49
  r = client.post("/recognize", json={"frame": "%%%not-base64%%%"})
@@ -99,3 +99,48 @@ class TestSpeak:
99
  assert r.status_code == 200
100
  assert r.headers["content-type"].startswith("audio/")
101
  assert len(r.content) > 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  class TestRecognize:
44
  def test_empty_frame_rejected(self, client: TestClient) -> None:
45
  r = client.post("/recognize", json={"frame": ""})
46
+ assert r.status_code in (400, 422)
47
 
48
  def test_invalid_base64_rejected(self, client: TestClient) -> None:
49
  r = client.post("/recognize", json={"frame": "%%%not-base64%%%"})
 
99
  assert r.status_code == 200
100
  assert r.headers["content-type"].startswith("audio/")
101
  assert len(r.content) > 0
102
+
103
+
104
+ class TestRecognizeFrames:
105
+ def test_empty_frames_rejected(self, client: TestClient) -> None:
106
+ r = client.post("/recognize", json={"frames": []})
107
+ # Pydantic min_length=1 rejects empty list (validation error 422).
108
+ # If the validator collapses to 400 due to model_validator, accept that too.
109
+ assert r.status_code in (400, 422)
110
+
111
+ def test_single_frame_in_list_rejected(self, client: TestClient) -> None:
112
+ # Multi-frame path requires >=2 frames.
113
+ b64 = _frame_b64()
114
+ r = client.post("/recognize", json={"frames": [b64]})
115
+ assert r.status_code == 400
116
+ detail = r.json().get("detail", "").lower()
117
+ assert "at least 2" in detail or "2 frames" in detail or "frames" in detail
118
+
119
+ def test_valid_multi_frame_no_provider(self, client: TestClient) -> None:
120
+ b64 = _frame_b64()
121
+ r = client.post("/recognize", json={"frames": [b64, b64, b64, b64]})
122
+ assert r.status_code == 200
123
+ assert r.json() == {"token": "", "confidence": 0.0}
124
+
125
+ def test_too_many_frames_rejected(self, client: TestClient) -> None:
126
+ b64 = _frame_b64()
127
+ r = client.post("/recognize", json={"frames": [b64] * 100})
128
+ assert r.status_code in (400, 422)
129
+
130
+ def test_oversized_frame_rejected(self, client: TestClient) -> None:
131
+ # 6 MB base64 string is well past any reasonable webcam frame.
132
+ big = "A" * (6 * 1024 * 1024)
133
+ r = client.post("/recognize", json={"frame": big})
134
+ assert r.status_code in (400, 422)
135
+
136
+ def test_both_frame_and_frames_rejected(self, client: TestClient) -> None:
137
+ b64 = _frame_b64()
138
+ r = client.post(
139
+ "/recognize",
140
+ json={"frame": b64, "frames": [b64, b64]},
141
+ )
142
+ assert r.status_code in (400, 422)
143
+
144
+ def test_neither_frame_nor_frames_rejected(self, client: TestClient) -> None:
145
+ r = client.post("/recognize", json={})
146
+ assert r.status_code in (400, 422)