techfreakworm commited on
Commit
3c69062
·
unverified ·
1 Parent(s): b3e0fc9

fix(spaces): bump duration cap, drop broken auto-retry, add friendly errors + like banner

Browse files

Logs from run #202 (style transfer) confirmed the cascade:
1. style submitted, ran ~127s of GPU time
2. @spaces.GPU(duration=120) cap hit -> 'GPU task aborted' (line 217)
3. our auto-retry fired with the SAME captured request
4. retry's schedule call -> 'Expired ZeroGPU proxy token' (401, line 226)
because the captured request's X-IP-Token had aged past TTL during run 1

The visible 'expired token' was the symptom of the abort. Two real bugs:
- 120s cap was too tight for style+lipsync detailer paths (~120-180s
actual on H200)
- auto-retry captured stale request tokens; second attempt always 401'd

Fixes:
- _duration_for: clamp [60, 240]. Pro identity accepts these; if a
server rejects, the user sees a clear illegal_duration error.
- _on_generate: drop the for attempt in (0,1) retry. Single attempt;
timeout/expired surfaces as a friendly message; user clicks Generate
again -> fresh request -> fresh token -> succeeds.
- _classify expanded: expired_token, illegal_duration, unlogged,
quota_exceeded categories surface distinctly.
- _FRIENDLY_ERRORS dict + _friendly_error helper: error popups now read
'Hit the GPU time limit / Session timed out / Daily quota used up'
etc with actionable next steps, instead of raw exception strings.
- aio-tipbar at the top: 'Drop a heart at the top of this page to
support it' — quick visibility nudge for the HF like button.

Files changed (2) hide show
  1. app.py +86 -41
  2. backend.py +15 -8
app.py CHANGED
@@ -280,6 +280,19 @@ _CUSTOM_CSS = """
280
  border-radius: 4px;
281
  }
282
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  /* === Drawer === */
284
  .aio-shell { position: relative; }
285
  .aio-drawer {
@@ -461,6 +474,13 @@ def build_app() -> gr.Blocks:
461
  ' <span class="aio-mode-tag" id="aio-mode-tag">T2V</span>'
462
  '</div>'
463
  )
 
 
 
 
 
 
 
464
 
465
  with gr.Row(elem_classes=["aio-shell"]):
466
  # Drawer (drawer behaves as fixed sidebar ≥1024 px;
@@ -748,6 +768,58 @@ def _stage_to_comfy_input(file_path) -> str | None:
748
  PRESET_DURATION = {"Fast": 60, "Balanced": 120, "Quality": 300}
749
 
750
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
751
  def _seconds_to_frames(seconds: float, fps: int) -> int:
752
  return max(9, int(round(float(seconds) * float(fps) / 8) * 8) + 1)
753
 
@@ -841,55 +913,28 @@ async def _on_generate(mode_name: str, *, progress: Any = None, **inputs: Any):
841
  video_update = event.video_path if event.video_path else gr.update()
842
  return (ui._render_idle(), video_update)
843
  if isinstance(event, backend_module.ErrorEvent):
 
844
  return (
845
  f'<div class="status-card status-error">'
846
- f' <div class="status-row"><span class="status-stage">Error · {event.category}</span></div>'
847
- f" <div>{event.message}</div>"
848
  f"</div>",
849
  gr.update(),
850
  )
851
  return None
852
 
853
- # Tier 1 + Tier 2: one normal attempt; if it aborts on ZeroGPU duration
854
- # cap, retry once with a duration multiplier. Each multiplier is
855
- # capped at 900s server-side, so the second attempt never exceeds that.
856
  started = time.time()
857
- multiplier = 1.0
858
- timed_out = False
859
- for attempt in (0, 1):
860
- if attempt == 1:
861
- # Show a friendly retry banner before the second submit
862
- yield (
863
- '<div class="status-card status-error">'
864
- ' <div class="status-row"><span class="status-stage">'
865
- "Retrying with extended GPU budget</span></div>"
866
- " <div>First attempt hit the per-call duration cap "
867
- "(usually a cold model cache or a heavier mode than estimated). "
868
- "Reserving 2× the budget and trying once more.</div>"
869
- "</div>",
870
- gr.update(),
871
- )
872
- multiplier = 2.0
873
- started = time.time() # reset so progress ETAs are sensible
874
-
875
- timed_out = False
876
- async for event in backend.submit(
877
- mode_name, workflow,
878
- preset=preset, duration_multiplier=multiplier,
879
- progress=progress,
880
- ):
881
- if (
882
- isinstance(event, backend_module.ErrorEvent)
883
- and event.category == "gpu_timeout"
884
- and attempt == 0
885
- ):
886
- timed_out = True
887
- break # don't yield the timeout error — auto-retry instead
888
- translated = await _translate(event, started)
889
- if translated is not None:
890
- yield translated
891
- if not timed_out:
892
- return
893
 
894
 
895
  def _input_keys_for_mode(mode_name: str, h: dict) -> list[str]:
 
280
  border-radius: 4px;
281
  }
282
 
283
+ .aio-tipbar {
284
+ margin: 0 0 6px 0;
285
+ padding: 6px 14px;
286
+ font-family: 'IBM Plex Sans', system-ui, sans-serif;
287
+ font-size: 12px;
288
+ color: #B5BCC6;
289
+ background: #1A1F26;
290
+ border-bottom: 1px solid #262C35;
291
+ text-align: center;
292
+ }
293
+ .aio-tipbar strong { color: #E6E8EB; font-weight: 500; }
294
+ .aio-tipbar .aio-heart { color: #E55B6E; }
295
+
296
  /* === Drawer === */
297
  .aio-shell { position: relative; }
298
  .aio-drawer {
 
474
  ' <span class="aio-mode-tag" id="aio-mode-tag">T2V</span>'
475
  '</div>'
476
  )
477
+ gr.HTML(
478
+ '<div class="aio-tipbar">'
479
+ 'Liking this project? '
480
+ '<strong>Drop a <span class="aio-heart">♥</span> at the top of this page</strong> '
481
+ 'to support it.'
482
+ '</div>'
483
+ )
484
 
485
  with gr.Row(elem_classes=["aio-shell"]):
486
  # Drawer (drawer behaves as fixed sidebar ≥1024 px;
 
768
  PRESET_DURATION = {"Fast": 60, "Balanced": 120, "Quality": 300}
769
 
770
 
771
+ _FRIENDLY_ERRORS: dict[str, tuple[str, str]] = {
772
+ "gpu_timeout": (
773
+ "Hit the GPU time limit",
774
+ "This run took longer than the GPU budget. Try the Fast preset, a "
775
+ "shorter video, or a smaller resolution — then click Generate again.",
776
+ ),
777
+ "expired_token": (
778
+ "Session timed out",
779
+ "Your sign-in session expired. Refresh the page and try again — "
780
+ "you'll keep your spot in the GPU queue.",
781
+ ),
782
+ "illegal_duration": (
783
+ "GPU budget too high",
784
+ "The estimator asked for more GPU time than the server allows. "
785
+ "Try Fast preset or a shorter video.",
786
+ ),
787
+ "unlogged": (
788
+ "Sign-in not detected",
789
+ "Make sure you're signed into huggingface.co (top-right avatar), "
790
+ "then refresh this page. Pro accounts get 25 min of GPU per day.",
791
+ ),
792
+ "quota_exceeded": (
793
+ "Daily GPU quota used up",
794
+ "You've used today's GPU minutes. Wait for the rolling 24-hour "
795
+ "reset, or upgrade Pro at huggingface.co/subscribe/pro for more.",
796
+ ),
797
+ "oom": (
798
+ "Ran out of GPU memory",
799
+ "Try a smaller resolution, fewer frames, or the Fast preset.",
800
+ ),
801
+ "interrupt": (
802
+ "Cancelled",
803
+ "Generation was cancelled. Click Generate to start a fresh run.",
804
+ ),
805
+ "download": (
806
+ "Model download failed",
807
+ "Couldn't fetch a required model file. Check your internet and try again.",
808
+ ),
809
+ }
810
+
811
+
812
+ def _friendly_error(category: str, raw_message: str) -> tuple[str, str]:
813
+ """Translate a backend error category into (title, body) the user can act on."""
814
+ if category in _FRIENDLY_ERRORS:
815
+ return _FRIENDLY_ERRORS[category]
816
+ return (
817
+ "Generation failed",
818
+ "Something went wrong. Click Generate to retry, or check the Space "
819
+ "logs if it keeps happening.",
820
+ )
821
+
822
+
823
  def _seconds_to_frames(seconds: float, fps: int) -> int:
824
  return max(9, int(round(float(seconds) * float(fps) / 8) * 8) + 1)
825
 
 
913
  video_update = event.video_path if event.video_path else gr.update()
914
  return (ui._render_idle(), video_update)
915
  if isinstance(event, backend_module.ErrorEvent):
916
+ title, body = _friendly_error(event.category, event.message)
917
  return (
918
  f'<div class="status-card status-error">'
919
+ f' <div class="status-row"><span class="status-stage">{title}</span></div>'
920
+ f" <div>{body}</div>"
921
  f"</div>",
922
  gr.update(),
923
  )
924
  return None
925
 
926
+ # Single attempt. ZeroGPU-side abort (duration cap) and 401 expired-token
927
+ # surface as friendly messages via _friendly_error; user clicks Generate
928
+ # again to retry with a fresh request and fresh X-IP-Token.
929
  started = time.time()
930
+ async for event in backend.submit(
931
+ mode_name, workflow,
932
+ preset=preset, duration_multiplier=1.0,
933
+ progress=progress,
934
+ ):
935
+ translated = await _translate(event, started)
936
+ if translated is not None:
937
+ yield translated
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
938
 
939
 
940
  def _input_keys_for_mode(mode_name: str, h: dict) -> list[str]:
backend.py CHANGED
@@ -109,17 +109,17 @@ def _duration_for(
109
  ZeroGPU can call us with the same arg list it'll use for _execute_workflow.
110
 
111
  Estimate = (base × preset multiplier + cold-cache buffer + per-frame VAE
112
- decode time) × retry multiplier, clamped to [60s, 120s]. The 120s ceiling
113
- is ZeroGPU's per-call hard maximum server rejects requested durations
114
- above it with "ZeroGPU illegal duration" (client.py:137, triggered when
115
- res.wait < timedelta(0) from the scheduler). 120s on H200 is enough for
116
- every preset we ship; longer estimates would just fail the guard check.
117
  """
118
  base = _BASE_DURATION_S.get(mode, 180)
119
  mult = _PRESET_MULT.get(preset.lower(), 1.5)
120
  frames = _frames_from_workflow(workflow)
121
  est = int((base * mult + 60 + frames * 0.3) * multiplier)
122
- return max(60, min(est, 120))
123
 
124
 
125
  # Decorate at module load time so ZeroGPU's startup analyzer detects it.
@@ -529,9 +529,16 @@ def _classify(exc: Exception) -> str:
529
  msg = str(exc).lower()
530
  if "outofmemory" in name or "cuda out of memory" in msg:
531
  return "oom"
 
 
 
 
 
 
 
 
532
  # ZeroGPU enforces the @spaces.GPU(duration=N) cap and re-raises as
533
- # gradio.exceptions.Error('GPU task aborted'). Surface a distinct
534
- # category so the handler can offer a retry with a bigger budget.
535
  if "gpu task aborted" in msg or ("gpu" in msg and "aborted" in msg):
536
  return "gpu_timeout"
537
  if "interrupt" in name:
 
109
  ZeroGPU can call us with the same arg list it'll use for _execute_workflow.
110
 
111
  Estimate = (base × preset multiplier + cold-cache buffer + per-frame VAE
112
+ decode time) × retry multiplier, clamped to [60s, 240s]. ZeroGPU rejects
113
+ durations above the server's per-call max with "ZeroGPU illegal duration"
114
+ (client.py:137); 240s is observed to work for Pro identity (~2 min runs
115
+ needed for style + lipsync detailer paths). If the server rejects values
116
+ in this range, the user will see a clear error and can retry.
117
  """
118
  base = _BASE_DURATION_S.get(mode, 180)
119
  mult = _PRESET_MULT.get(preset.lower(), 1.5)
120
  frames = _frames_from_workflow(workflow)
121
  est = int((base * mult + 60 + frames * 0.3) * multiplier)
122
+ return max(60, min(est, 240))
123
 
124
 
125
  # Decorate at module load time so ZeroGPU's startup analyzer detects it.
 
529
  msg = str(exc).lower()
530
  if "outofmemory" in name or "cuda out of memory" in msg:
531
  return "oom"
532
+ if "expired zerogpu proxy token" in msg or "expired" in msg and "token" in msg:
533
+ return "expired_token"
534
+ if "illegal duration" in msg:
535
+ return "illegal_duration"
536
+ if "unlogged user" in msg:
537
+ return "unlogged"
538
+ if "exceeded your" in msg and "gpu" in msg:
539
+ return "quota_exceeded"
540
  # ZeroGPU enforces the @spaces.GPU(duration=N) cap and re-raises as
541
+ # gradio.exceptions.Error('GPU task aborted').
 
542
  if "gpu task aborted" in msg or ("gpu" in msg and "aborted" in msg):
543
  return "gpu_timeout"
544
  if "interrupt" in name: