fix: global frame-cache so click handler can actually see live frames
Browse filesPrevious attempts:
1. streaming=False + click reads webcam β value is None unless user
explicitly clicks π· Take Photo (extra step we wanted to skip)
2. streaming=True + click reads webcam β value still None at click
time; gradio doesn't deliver streamed frames to click handlers
3. streaming=True + .stream() β state-stash β click reads state β
gradio 4.44.1 deep-copies State between handlers, mutations don't
persist across the .stream/.click boundary
This commit: streaming=True + .stream() β write to a module-level
dict keyed by gr.Request.session_hash β click reads from that dict.
Bypasses gradio's state serialization entirely. Threading.Lock around
reads/writes for safety; per-session keying so concurrent users don't
collide.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
- signbridge/space.py +38 -18
|
@@ -80,10 +80,15 @@ class _SessionState:
|
|
| 80 |
sign_history: list[str] = field(default_factory=list)
|
| 81 |
last_sentence: str = ""
|
| 82 |
last_audio_path: str | None = None
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
|
| 89 |
def _new_session() -> _SessionState:
|
|
@@ -140,20 +145,30 @@ def _shared_extractor() -> LandmarkExtractor:
|
|
| 140 |
return _extractor_singleton
|
| 141 |
|
| 142 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
def _capture_sign(
|
| 144 |
-
|
| 145 |
-
state: _SessionState,
|
| 146 |
) -> tuple[str, str, _SessionState]:
|
| 147 |
-
"""Take-image button handler.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
|
| 149 |
-
With `streaming=True` on the webcam, gradio passes the latest streamed
|
| 150 |
-
frame as the `frame` input on click. (We confirmed this works as long
|
| 151 |
-
as the input list includes the webcam component; the earlier failure
|
| 152 |
-
was caused by gr.Image's value being None when streaming wasn't set.)
|
| 153 |
-
"""
|
| 154 |
if frame is None:
|
| 155 |
return (
|
| 156 |
-
"_no frame yet β make sure the camera is live and try again_",
|
| 157 |
_format_history(state.sign_history),
|
| 158 |
state,
|
| 159 |
)
|
|
@@ -316,13 +331,18 @@ def build_demo() -> gr.Blocks:
|
|
| 316 |
"Spell out a word letter-by-letter, then press Speak."
|
| 317 |
)
|
| 318 |
|
| 319 |
-
#
|
| 320 |
-
#
|
| 321 |
-
#
|
| 322 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
capture_btn.click(
|
| 324 |
fn=_capture_sign,
|
| 325 |
-
inputs=[
|
| 326 |
outputs=[latest, history, state],
|
| 327 |
)
|
| 328 |
speak_btn.click(
|
|
|
|
| 80 |
sign_history: list[str] = field(default_factory=list)
|
| 81 |
last_sentence: str = ""
|
| 82 |
last_audio_path: str | None = None
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
# Module-level frame cache, keyed by gradio session_hash. The webcam
|
| 86 |
+
# `.stream()` handler writes here on every frame; the Take-image click
|
| 87 |
+
# handler reads from here. We use a global dict instead of gr.State
|
| 88 |
+
# because gradio 4.44.1 deep-copies state between handlers (so a
|
| 89 |
+
# stream-handler mutation doesn't show up in the click-handler view).
|
| 90 |
+
_frame_cache: dict[str, np.ndarray] = {}
|
| 91 |
+
_frame_cache_lock = threading.Lock()
|
| 92 |
|
| 93 |
|
| 94 |
def _new_session() -> _SessionState:
|
|
|
|
| 145 |
return _extractor_singleton
|
| 146 |
|
| 147 |
|
| 148 |
+
def _stash_frame(frame: np.ndarray | None, request: gr.Request) -> None:
|
| 149 |
+
"""Webcam .stream() callback β writes every live frame to the global
|
| 150 |
+
cache keyed by gradio session_hash. Returns nothing because no
|
| 151 |
+
component output needs updating per frame."""
|
| 152 |
+
if frame is None:
|
| 153 |
+
return
|
| 154 |
+
sid = getattr(request, "session_hash", "default") or "default"
|
| 155 |
+
with _frame_cache_lock:
|
| 156 |
+
_frame_cache[sid] = frame
|
| 157 |
+
|
| 158 |
+
|
| 159 |
def _capture_sign(
|
| 160 |
+
state: _SessionState, request: gr.Request
|
|
|
|
| 161 |
) -> tuple[str, str, _SessionState]:
|
| 162 |
+
"""Take-image button handler. Reads the latest live frame from the
|
| 163 |
+
module-level cache (populated by the .stream() handler), runs
|
| 164 |
+
recognition, appends to history."""
|
| 165 |
+
sid = getattr(request, "session_hash", "default") or "default"
|
| 166 |
+
with _frame_cache_lock:
|
| 167 |
+
frame = _frame_cache.get(sid)
|
| 168 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
if frame is None:
|
| 170 |
return (
|
| 171 |
+
"_no frame yet β make sure the camera preview is live and try again_",
|
| 172 |
_format_history(state.sign_history),
|
| 173 |
state,
|
| 174 |
)
|
|
|
|
| 331 |
"Spell out a word letter-by-letter, then press Speak."
|
| 332 |
)
|
| 333 |
|
| 334 |
+
# Webcam streams continuously while the camera is live β
|
| 335 |
+
# _stash_frame writes each frame to the global session
|
| 336 |
+
# cache. Click reads the latest cached frame.
|
| 337 |
+
webcam.stream(
|
| 338 |
+
fn=_stash_frame,
|
| 339 |
+
inputs=[webcam],
|
| 340 |
+
outputs=None,
|
| 341 |
+
show_progress="hidden",
|
| 342 |
+
)
|
| 343 |
capture_btn.click(
|
| 344 |
fn=_capture_sign,
|
| 345 |
+
inputs=[state],
|
| 346 |
outputs=[latest, history, state],
|
| 347 |
)
|
| 348 |
speak_btn.click(
|