Spaces:
Running on Zero
fix(spaces): bump duration cap, drop broken auto-retry, add friendly errors + like banner
Browse filesLogs from run #202 (style transfer) confirmed the cascade:
1. style submitted, ran ~127s of GPU time
2. @spaces.GPU(duration=120) cap hit -> 'GPU task aborted' (line 217)
3. our auto-retry fired with the SAME captured request
4. retry's schedule call -> 'Expired ZeroGPU proxy token' (401, line 226)
because the captured request's X-IP-Token had aged past TTL during run 1
The visible 'expired token' was the symptom of the abort. Two real bugs:
- 120s cap was too tight for style+lipsync detailer paths (~120-180s
actual on H200)
- auto-retry captured stale request tokens; second attempt always 401'd
Fixes:
- _duration_for: clamp [60, 240]. Pro identity accepts these; if a
server rejects, the user sees a clear illegal_duration error.
- _on_generate: drop the for attempt in (0,1) retry. Single attempt;
timeout/expired surfaces as a friendly message; user clicks Generate
again -> fresh request -> fresh token -> succeeds.
- _classify expanded: expired_token, illegal_duration, unlogged,
quota_exceeded categories surface distinctly.
- _FRIENDLY_ERRORS dict + _friendly_error helper: error popups now read
'Hit the GPU time limit / Session timed out / Daily quota used up'
etc with actionable next steps, instead of raw exception strings.
- aio-tipbar at the top: 'Drop a heart at the top of this page to
support it' — quick visibility nudge for the HF like button.
- app.py +86 -41
- backend.py +15 -8
|
@@ -280,6 +280,19 @@ _CUSTOM_CSS = """
|
|
| 280 |
border-radius: 4px;
|
| 281 |
}
|
| 282 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
/* === Drawer === */
|
| 284 |
.aio-shell { position: relative; }
|
| 285 |
.aio-drawer {
|
|
@@ -461,6 +474,13 @@ def build_app() -> gr.Blocks:
|
|
| 461 |
' <span class="aio-mode-tag" id="aio-mode-tag">T2V</span>'
|
| 462 |
'</div>'
|
| 463 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 464 |
|
| 465 |
with gr.Row(elem_classes=["aio-shell"]):
|
| 466 |
# Drawer (drawer behaves as fixed sidebar ≥1024 px;
|
|
@@ -748,6 +768,58 @@ def _stage_to_comfy_input(file_path) -> str | None:
|
|
| 748 |
PRESET_DURATION = {"Fast": 60, "Balanced": 120, "Quality": 300}
|
| 749 |
|
| 750 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 751 |
def _seconds_to_frames(seconds: float, fps: int) -> int:
|
| 752 |
return max(9, int(round(float(seconds) * float(fps) / 8) * 8) + 1)
|
| 753 |
|
|
@@ -841,55 +913,28 @@ async def _on_generate(mode_name: str, *, progress: Any = None, **inputs: Any):
|
|
| 841 |
video_update = event.video_path if event.video_path else gr.update()
|
| 842 |
return (ui._render_idle(), video_update)
|
| 843 |
if isinstance(event, backend_module.ErrorEvent):
|
|
|
|
| 844 |
return (
|
| 845 |
f'<div class="status-card status-error">'
|
| 846 |
-
f' <div class="status-row"><span class="status-stage">
|
| 847 |
-
f" <div>{
|
| 848 |
f"</div>",
|
| 849 |
gr.update(),
|
| 850 |
)
|
| 851 |
return None
|
| 852 |
|
| 853 |
-
#
|
| 854 |
-
#
|
| 855 |
-
#
|
| 856 |
started = time.time()
|
| 857 |
-
|
| 858 |
-
|
| 859 |
-
|
| 860 |
-
|
| 861 |
-
|
| 862 |
-
|
| 863 |
-
|
| 864 |
-
|
| 865 |
-
"Retrying with extended GPU budget</span></div>"
|
| 866 |
-
" <div>First attempt hit the per-call duration cap "
|
| 867 |
-
"(usually a cold model cache or a heavier mode than estimated). "
|
| 868 |
-
"Reserving 2× the budget and trying once more.</div>"
|
| 869 |
-
"</div>",
|
| 870 |
-
gr.update(),
|
| 871 |
-
)
|
| 872 |
-
multiplier = 2.0
|
| 873 |
-
started = time.time() # reset so progress ETAs are sensible
|
| 874 |
-
|
| 875 |
-
timed_out = False
|
| 876 |
-
async for event in backend.submit(
|
| 877 |
-
mode_name, workflow,
|
| 878 |
-
preset=preset, duration_multiplier=multiplier,
|
| 879 |
-
progress=progress,
|
| 880 |
-
):
|
| 881 |
-
if (
|
| 882 |
-
isinstance(event, backend_module.ErrorEvent)
|
| 883 |
-
and event.category == "gpu_timeout"
|
| 884 |
-
and attempt == 0
|
| 885 |
-
):
|
| 886 |
-
timed_out = True
|
| 887 |
-
break # don't yield the timeout error — auto-retry instead
|
| 888 |
-
translated = await _translate(event, started)
|
| 889 |
-
if translated is not None:
|
| 890 |
-
yield translated
|
| 891 |
-
if not timed_out:
|
| 892 |
-
return
|
| 893 |
|
| 894 |
|
| 895 |
def _input_keys_for_mode(mode_name: str, h: dict) -> list[str]:
|
|
|
|
| 280 |
border-radius: 4px;
|
| 281 |
}
|
| 282 |
|
| 283 |
+
.aio-tipbar {
|
| 284 |
+
margin: 0 0 6px 0;
|
| 285 |
+
padding: 6px 14px;
|
| 286 |
+
font-family: 'IBM Plex Sans', system-ui, sans-serif;
|
| 287 |
+
font-size: 12px;
|
| 288 |
+
color: #B5BCC6;
|
| 289 |
+
background: #1A1F26;
|
| 290 |
+
border-bottom: 1px solid #262C35;
|
| 291 |
+
text-align: center;
|
| 292 |
+
}
|
| 293 |
+
.aio-tipbar strong { color: #E6E8EB; font-weight: 500; }
|
| 294 |
+
.aio-tipbar .aio-heart { color: #E55B6E; }
|
| 295 |
+
|
| 296 |
/* === Drawer === */
|
| 297 |
.aio-shell { position: relative; }
|
| 298 |
.aio-drawer {
|
|
|
|
| 474 |
' <span class="aio-mode-tag" id="aio-mode-tag">T2V</span>'
|
| 475 |
'</div>'
|
| 476 |
)
|
| 477 |
+
gr.HTML(
|
| 478 |
+
'<div class="aio-tipbar">'
|
| 479 |
+
'Liking this project? '
|
| 480 |
+
'<strong>Drop a <span class="aio-heart">♥</span> at the top of this page</strong> '
|
| 481 |
+
'to support it.'
|
| 482 |
+
'</div>'
|
| 483 |
+
)
|
| 484 |
|
| 485 |
with gr.Row(elem_classes=["aio-shell"]):
|
| 486 |
# Drawer (drawer behaves as fixed sidebar ≥1024 px;
|
|
|
|
| 768 |
PRESET_DURATION = {"Fast": 60, "Balanced": 120, "Quality": 300}
|
| 769 |
|
| 770 |
|
| 771 |
+
_FRIENDLY_ERRORS: dict[str, tuple[str, str]] = {
|
| 772 |
+
"gpu_timeout": (
|
| 773 |
+
"Hit the GPU time limit",
|
| 774 |
+
"This run took longer than the GPU budget. Try the Fast preset, a "
|
| 775 |
+
"shorter video, or a smaller resolution — then click Generate again.",
|
| 776 |
+
),
|
| 777 |
+
"expired_token": (
|
| 778 |
+
"Session timed out",
|
| 779 |
+
"Your sign-in session expired. Refresh the page and try again — "
|
| 780 |
+
"you'll keep your spot in the GPU queue.",
|
| 781 |
+
),
|
| 782 |
+
"illegal_duration": (
|
| 783 |
+
"GPU budget too high",
|
| 784 |
+
"The estimator asked for more GPU time than the server allows. "
|
| 785 |
+
"Try Fast preset or a shorter video.",
|
| 786 |
+
),
|
| 787 |
+
"unlogged": (
|
| 788 |
+
"Sign-in not detected",
|
| 789 |
+
"Make sure you're signed into huggingface.co (top-right avatar), "
|
| 790 |
+
"then refresh this page. Pro accounts get 25 min of GPU per day.",
|
| 791 |
+
),
|
| 792 |
+
"quota_exceeded": (
|
| 793 |
+
"Daily GPU quota used up",
|
| 794 |
+
"You've used today's GPU minutes. Wait for the rolling 24-hour "
|
| 795 |
+
"reset, or upgrade Pro at huggingface.co/subscribe/pro for more.",
|
| 796 |
+
),
|
| 797 |
+
"oom": (
|
| 798 |
+
"Ran out of GPU memory",
|
| 799 |
+
"Try a smaller resolution, fewer frames, or the Fast preset.",
|
| 800 |
+
),
|
| 801 |
+
"interrupt": (
|
| 802 |
+
"Cancelled",
|
| 803 |
+
"Generation was cancelled. Click Generate to start a fresh run.",
|
| 804 |
+
),
|
| 805 |
+
"download": (
|
| 806 |
+
"Model download failed",
|
| 807 |
+
"Couldn't fetch a required model file. Check your internet and try again.",
|
| 808 |
+
),
|
| 809 |
+
}
|
| 810 |
+
|
| 811 |
+
|
| 812 |
+
def _friendly_error(category: str, raw_message: str) -> tuple[str, str]:
|
| 813 |
+
"""Translate a backend error category into (title, body) the user can act on."""
|
| 814 |
+
if category in _FRIENDLY_ERRORS:
|
| 815 |
+
return _FRIENDLY_ERRORS[category]
|
| 816 |
+
return (
|
| 817 |
+
"Generation failed",
|
| 818 |
+
"Something went wrong. Click Generate to retry, or check the Space "
|
| 819 |
+
"logs if it keeps happening.",
|
| 820 |
+
)
|
| 821 |
+
|
| 822 |
+
|
| 823 |
def _seconds_to_frames(seconds: float, fps: int) -> int:
|
| 824 |
return max(9, int(round(float(seconds) * float(fps) / 8) * 8) + 1)
|
| 825 |
|
|
|
|
| 913 |
video_update = event.video_path if event.video_path else gr.update()
|
| 914 |
return (ui._render_idle(), video_update)
|
| 915 |
if isinstance(event, backend_module.ErrorEvent):
|
| 916 |
+
title, body = _friendly_error(event.category, event.message)
|
| 917 |
return (
|
| 918 |
f'<div class="status-card status-error">'
|
| 919 |
+
f' <div class="status-row"><span class="status-stage">{title}</span></div>'
|
| 920 |
+
f" <div>{body}</div>"
|
| 921 |
f"</div>",
|
| 922 |
gr.update(),
|
| 923 |
)
|
| 924 |
return None
|
| 925 |
|
| 926 |
+
# Single attempt. ZeroGPU-side abort (duration cap) and 401 expired-token
|
| 927 |
+
# surface as friendly messages via _friendly_error; user clicks Generate
|
| 928 |
+
# again to retry with a fresh request and fresh X-IP-Token.
|
| 929 |
started = time.time()
|
| 930 |
+
async for event in backend.submit(
|
| 931 |
+
mode_name, workflow,
|
| 932 |
+
preset=preset, duration_multiplier=1.0,
|
| 933 |
+
progress=progress,
|
| 934 |
+
):
|
| 935 |
+
translated = await _translate(event, started)
|
| 936 |
+
if translated is not None:
|
| 937 |
+
yield translated
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 938 |
|
| 939 |
|
| 940 |
def _input_keys_for_mode(mode_name: str, h: dict) -> list[str]:
|
|
@@ -109,17 +109,17 @@ def _duration_for(
|
|
| 109 |
ZeroGPU can call us with the same arg list it'll use for _execute_workflow.
|
| 110 |
|
| 111 |
Estimate = (base × preset multiplier + cold-cache buffer + per-frame VAE
|
| 112 |
-
decode time) × retry multiplier, clamped to [60s,
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
"""
|
| 118 |
base = _BASE_DURATION_S.get(mode, 180)
|
| 119 |
mult = _PRESET_MULT.get(preset.lower(), 1.5)
|
| 120 |
frames = _frames_from_workflow(workflow)
|
| 121 |
est = int((base * mult + 60 + frames * 0.3) * multiplier)
|
| 122 |
-
return max(60, min(est,
|
| 123 |
|
| 124 |
|
| 125 |
# Decorate at module load time so ZeroGPU's startup analyzer detects it.
|
|
@@ -529,9 +529,16 @@ def _classify(exc: Exception) -> str:
|
|
| 529 |
msg = str(exc).lower()
|
| 530 |
if "outofmemory" in name or "cuda out of memory" in msg:
|
| 531 |
return "oom"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 532 |
# ZeroGPU enforces the @spaces.GPU(duration=N) cap and re-raises as
|
| 533 |
-
# gradio.exceptions.Error('GPU task aborted').
|
| 534 |
-
# category so the handler can offer a retry with a bigger budget.
|
| 535 |
if "gpu task aborted" in msg or ("gpu" in msg and "aborted" in msg):
|
| 536 |
return "gpu_timeout"
|
| 537 |
if "interrupt" in name:
|
|
|
|
| 109 |
ZeroGPU can call us with the same arg list it'll use for _execute_workflow.
|
| 110 |
|
| 111 |
Estimate = (base × preset multiplier + cold-cache buffer + per-frame VAE
|
| 112 |
+
decode time) × retry multiplier, clamped to [60s, 240s]. ZeroGPU rejects
|
| 113 |
+
durations above the server's per-call max with "ZeroGPU illegal duration"
|
| 114 |
+
(client.py:137); 240s is observed to work for Pro identity (~2 min runs
|
| 115 |
+
needed for style + lipsync detailer paths). If the server rejects values
|
| 116 |
+
in this range, the user will see a clear error and can retry.
|
| 117 |
"""
|
| 118 |
base = _BASE_DURATION_S.get(mode, 180)
|
| 119 |
mult = _PRESET_MULT.get(preset.lower(), 1.5)
|
| 120 |
frames = _frames_from_workflow(workflow)
|
| 121 |
est = int((base * mult + 60 + frames * 0.3) * multiplier)
|
| 122 |
+
return max(60, min(est, 240))
|
| 123 |
|
| 124 |
|
| 125 |
# Decorate at module load time so ZeroGPU's startup analyzer detects it.
|
|
|
|
| 529 |
msg = str(exc).lower()
|
| 530 |
if "outofmemory" in name or "cuda out of memory" in msg:
|
| 531 |
return "oom"
|
| 532 |
+
if "expired zerogpu proxy token" in msg or "expired" in msg and "token" in msg:
|
| 533 |
+
return "expired_token"
|
| 534 |
+
if "illegal duration" in msg:
|
| 535 |
+
return "illegal_duration"
|
| 536 |
+
if "unlogged user" in msg:
|
| 537 |
+
return "unlogged"
|
| 538 |
+
if "exceeded your" in msg and "gpu" in msg:
|
| 539 |
+
return "quota_exceeded"
|
| 540 |
# ZeroGPU enforces the @spaces.GPU(duration=N) cap and re-raises as
|
| 541 |
+
# gradio.exceptions.Error('GPU task aborted').
|
|
|
|
| 542 |
if "gpu task aborted" in msg or ("gpu" in msg and "aborted" in msg):
|
| 543 |
return "gpu_timeout"
|
| 544 |
if "interrupt" in name:
|