LucasLooTan Claude Opus 4.7 (1M context) commited on
Commit
37b4f5b
Β·
1 Parent(s): 1874412

fix: global frame-cache so click handler can actually see live frames

Browse files

Previous attempts:
1. streaming=False + click reads webcam β†’ value is None unless user
explicitly clicks πŸ“· Take Photo (extra step we wanted to skip)
2. streaming=True + click reads webcam β†’ value still None at click
time; gradio doesn't deliver streamed frames to click handlers
3. streaming=True + .stream() β†’ state-stash β†’ click reads state β†’
gradio 4.44.1 deep-copies State between handlers, mutations don't
persist across the .stream/.click boundary

This commit: streaming=True + .stream() β†’ write to a module-level
dict keyed by gr.Request.session_hash β†’ click reads from that dict.
Bypasses gradio's state serialization entirely. Threading.Lock around
reads/writes for safety; per-session keying so concurrent users don't
collide.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (1) hide show
  1. signbridge/space.py +38 -18
signbridge/space.py CHANGED
@@ -80,10 +80,15 @@ class _SessionState:
80
  sign_history: list[str] = field(default_factory=list)
81
  last_sentence: str = ""
82
  last_audio_path: str | None = None
83
- # Latest webcam frame stashed by the .stream() handler so the Take-image
84
- # button can pull from state instead of trying to read the gr.Image
85
- # value (which is None unless explicitly captured).
86
- latest_frame: object = None # np.ndarray | None β€” typed loosely to keep dataclass simple
 
 
 
 
 
87
 
88
 
89
  def _new_session() -> _SessionState:
@@ -140,20 +145,30 @@ def _shared_extractor() -> LandmarkExtractor:
140
  return _extractor_singleton
141
 
142
 
 
 
 
 
 
 
 
 
 
 
 
143
  def _capture_sign(
144
- frame: np.ndarray | None,
145
- state: _SessionState,
146
  ) -> tuple[str, str, _SessionState]:
147
- """Take-image button handler.
 
 
 
 
 
148
 
149
- With `streaming=True` on the webcam, gradio passes the latest streamed
150
- frame as the `frame` input on click. (We confirmed this works as long
151
- as the input list includes the webcam component; the earlier failure
152
- was caused by gr.Image's value being None when streaming wasn't set.)
153
- """
154
  if frame is None:
155
  return (
156
- "_no frame yet β€” make sure the camera is live and try again_",
157
  _format_history(state.sign_history),
158
  state,
159
  )
@@ -316,13 +331,18 @@ def build_demo() -> gr.Blocks:
316
  "Spell out a word letter-by-letter, then press Speak."
317
  )
318
 
319
- # Click reads the latest streamed frame from the webcam
320
- # component directly. With streaming=True the gr.Image
321
- # value tracks the live preview β€” no .stream() handler or
322
- # session-state stashing required.
 
 
 
 
 
323
  capture_btn.click(
324
  fn=_capture_sign,
325
- inputs=[webcam, state],
326
  outputs=[latest, history, state],
327
  )
328
  speak_btn.click(
 
80
  sign_history: list[str] = field(default_factory=list)
81
  last_sentence: str = ""
82
  last_audio_path: str | None = None
83
+
84
+
85
+ # Module-level frame cache, keyed by gradio session_hash. The webcam
86
+ # `.stream()` handler writes here on every frame; the Take-image click
87
+ # handler reads from here. We use a global dict instead of gr.State
88
+ # because gradio 4.44.1 deep-copies state between handlers (so a
89
+ # stream-handler mutation doesn't show up in the click-handler view).
90
+ _frame_cache: dict[str, np.ndarray] = {}
91
+ _frame_cache_lock = threading.Lock()
92
 
93
 
94
  def _new_session() -> _SessionState:
 
145
  return _extractor_singleton
146
 
147
 
148
+ def _stash_frame(frame: np.ndarray | None, request: gr.Request) -> None:
149
+ """Webcam .stream() callback β€” writes every live frame to the global
150
+ cache keyed by gradio session_hash. Returns nothing because no
151
+ component output needs updating per frame."""
152
+ if frame is None:
153
+ return
154
+ sid = getattr(request, "session_hash", "default") or "default"
155
+ with _frame_cache_lock:
156
+ _frame_cache[sid] = frame
157
+
158
+
159
  def _capture_sign(
160
+ state: _SessionState, request: gr.Request
 
161
  ) -> tuple[str, str, _SessionState]:
162
+ """Take-image button handler. Reads the latest live frame from the
163
+ module-level cache (populated by the .stream() handler), runs
164
+ recognition, appends to history."""
165
+ sid = getattr(request, "session_hash", "default") or "default"
166
+ with _frame_cache_lock:
167
+ frame = _frame_cache.get(sid)
168
 
 
 
 
 
 
169
  if frame is None:
170
  return (
171
+ "_no frame yet β€” make sure the camera preview is live and try again_",
172
  _format_history(state.sign_history),
173
  state,
174
  )
 
331
  "Spell out a word letter-by-letter, then press Speak."
332
  )
333
 
334
+ # Webcam streams continuously while the camera is live β€”
335
+ # _stash_frame writes each frame to the global session
336
+ # cache. Click reads the latest cached frame.
337
+ webcam.stream(
338
+ fn=_stash_frame,
339
+ inputs=[webcam],
340
+ outputs=None,
341
+ show_progress="hidden",
342
+ )
343
  capture_btn.click(
344
  fn=_capture_sign,
345
+ inputs=[state],
346
  outputs=[latest, history, state],
347
  )
348
  speak_btn.click(