Spaces:

techfreakworm
/

LTX2.3-Studio

Running on Zero

App Files Files Community

techfreakworm commited on about 16 hours ago

Commit

ecc1595

verified ·

1 Parent(s): de6853a

feat(spaces): user-controlled GPU budget slider + pre-flight gate

Browse files

The fixed 240 s ceiling in `_duration_for` was the only thing standing
between heavy modes (style, lipsync at high frame counts) and HF Pro+ /
credit-topped accounts whose per-call cap actually allows longer
durations. Replace it with a per-call budget the user picks, plus a
pre-flight gate so jobs that won't fit are refused before submitting —
no GPU time wasted after several minutes of compute.

- `_duration_for` honours `user_budget` when set; otherwise returns the
unclamped estimate (only the 60 s floor remains).
- `_estimate_duration_unclamped` helper exposes the formula for the
pre-flight diff message in app.py.
- GPU-budget slider in the sidebar drawer: 60-1800 s, default 240 s,
step 30 s, placed under the mode list for discoverability.
- Pre-flight refuses with a friendly diff: "config estimates ~X s,
slider is Y s" - user raises the slider or reduces preset/frames.
- `illegal_duration` + `gpu_timeout` friendly errors now point at the
slider as the first thing to try.
- Drops the dead `duration_multiplier` arg (leftover from the removed
auto-retry path) so the signature stays clean.

Tests cover: user_budget override, the 60 s floor clamp, the unclamped
estimate formula, and the unknown-mode default.

Files changed (3) hide show

app.py +48 -8
backend.py +41 -25
tests/test_duration.py +82 -0

app.py CHANGED Viewed

@@ -523,6 +523,18 @@ def build_app() -> gr.Blocks:
                     )
                     for name, m in modes.MODE_REGISTRY.items()
                 }
                 gr.Markdown("Models", elem_classes=["aio-drawer-heading"])
                 model_status = gr.HTML(_render_model_status_idle(), elem_id="aio-model-status")
                 refresh_btn = gr.Button("Refresh", size="sm", variant="secondary")
@@ -539,9 +551,11 @@ def build_app() -> gr.Blocks:
             with gr.Column(scale=4, elem_classes=["aio-body"]):
                 handles, tabs_component = _render_mode_panels()
-        # Wire generate buttons
         for name, h in handles.items():
-            inputs = _collect_inputs_for_mode(name, h)
             h["generate_btn"].click(
                 fn=_make_handler(name, h),
                 inputs=inputs,
@@ -818,8 +832,9 @@ PRESET_DURATION = {"Fast": 60, "Balanced": 120, "Quality": 300}
 _FRIENDLY_ERRORS: dict[str, tuple[str, str]] = {
     "gpu_timeout": (
         "Hit the GPU time limit",
-        "This run took longer than the GPU budget. Try the Fast preset, a "
-        "shorter video, or a smaller resolution — then click Generate again.",
     ),
     "expired_token": (
         "Session timed out",
@@ -827,9 +842,10 @@ _FRIENDLY_ERRORS: dict[str, tuple[str, str]] = {
         "you'll keep your spot in the GPU queue.",
     ),
     "illegal_duration": (
-        "GPU budget too high",
-        "The estimator asked for more GPU time than the server allows. "
-        "Try Fast preset or a shorter video.",
     ),
     "unlogged": (
         "Sign-in not detected",
@@ -956,6 +972,29 @@ async def _on_generate(mode_name: str, *, progress: Any = None, **inputs: Any):
     backend = _get_backend()
     preset = params["preset"]  # already lowercased above
     async def _translate(event, started_at):
         """Translate one backend event into Gradio (status_html, video) yields.
@@ -1008,7 +1047,7 @@ async def _on_generate(mode_name: str, *, progress: Any = None, **inputs: Any):
     started = time.time()
     async for event in backend.submit(
         mode_name, workflow,
-        preset=preset, duration_multiplier=1.0,
         progress=progress,
     ):
         translated = await _translate(event, started)
@@ -1034,6 +1073,7 @@ def _input_keys_for_mode(mode_name: str, h: dict) -> list[str]:
         base.extend(["ic_lora", "ic_strength"])
     if h["lora"].pose_on is not None:
         base.append("pose_on")
     return base

                     )
                     for name, m in modes.MODE_REGISTRY.items()
                 }
+                # ZeroGPU per-call cap, placed right under the mode list so
+                # it's visible without scrolling. The pre-flight gate in
+                # _on_generate refuses calls whose estimate exceeds this.
+                gpu_budget_slider = gr.Slider(
+                    minimum=60,
+                    maximum=1800,
+                    value=240,
+                    step=30,
+                    label="GPU budget (seconds)",
+                    info="Max GPU time per generation. Higher = heavy modes fit; uses more of your daily quota per call.",
+                    elem_classes=["aio-gpu-budget"],
+                )
                 gr.Markdown("Models", elem_classes=["aio-drawer-heading"])
                 model_status = gr.HTML(_render_model_status_idle(), elem_id="aio-model-status")
                 refresh_btn = gr.Button("Refresh", size="sm", variant="secondary")
             with gr.Column(scale=4, elem_classes=["aio-body"]):
                 handles, tabs_component = _render_mode_panels()
+        # Wire generate buttons. The GPU-budget slider lives in the drawer and
+        # is the same instance for every mode — append it last so the handler
+        # receives it as `gpu_budget` (see `_input_keys_for_mode`).
         for name, h in handles.items():
+            inputs = _collect_inputs_for_mode(name, h) + [gpu_budget_slider]
             h["generate_btn"].click(
                 fn=_make_handler(name, h),
                 inputs=inputs,
 _FRIENDLY_ERRORS: dict[str, tuple[str, str]] = {
     "gpu_timeout": (
         "Hit the GPU time limit",
+        "This run took longer than the GPU budget. Raise the GPU-budget "
+        "slider (in the sidebar), or try the Fast preset / a shorter video, "
+        "then click Generate again.",
     ),
     "expired_token": (
         "Session timed out",
         "you'll keep your spot in the GPU queue.",
     ),
     "illegal_duration": (
+        "GPU budget too high for your account",
+        "HF rejected the requested duration as exceeding your account's "
+        "per-call cap. Lower the GPU-budget slider (sidebar) and try again, "
+        "or drop the preset / shorten the video.",
     ),
     "unlogged": (
         "Sign-in not detected",
     backend = _get_backend()
     preset = params["preset"]  # already lowercased above
+    # Pre-flight gate: refuse to submit if the estimator says this config
+    # needs more GPU time than the user has allocated. ZeroGPU charges actual
+    # usage, not declared duration, so under-allocating means the call still
+    # burns quota before timing out. Refuse here and tell the user to either
+    # bump the GPU-budget slider or reduce frames/preset.
+    user_budget: int | None = None
+    if "gpu_budget" in inputs and inputs["gpu_budget"] is not None:
+        user_budget = int(inputs["gpu_budget"])
+        estimate = backend_module._estimate_duration_unclamped(
+            mode=mode_name, preset=preset, frames=frames,
+        )
+        if estimate > user_budget:
+            yield (
+                f'<div class="status-card status-error">'
+                f'  <div class="status-row"><span class="status-stage">GPU budget too low</span></div>'
+                f"  <div>This config estimates ~{estimate}s of GPU time, but the "
+                f"GPU-budget slider is set to {user_budget}s. Raise the slider, drop "
+                f"the preset to Fast or Balanced, or reduce the duration / frame count.</div>"
+                f"</div>",
+                gr.update(),
+            )
+            return
     async def _translate(event, started_at):
         """Translate one backend event into Gradio (status_html, video) yields.
     started = time.time()
     async for event in backend.submit(
         mode_name, workflow,
+        preset=preset, user_budget=user_budget,
         progress=progress,
     ):
         translated = await _translate(event, started)
         base.extend(["ic_lora", "ic_strength"])
     if h["lora"].pose_on is not None:
         base.append("pose_on")
+    base.append("gpu_budget")  # appended by build_app() from the global slider
     return base

backend.py CHANGED Viewed

@@ -93,33 +93,49 @@ def _frames_from_workflow(workflow: dict) -> int:
     return 121
 def _duration_for(
     executor: Any,
     workflow: dict,
     output_ids: list[str],
     mode: str,
     preset: str,
-    multiplier: float = 1.0,
     progress: Any = None,
 ) -> int:
-    """ZeroGPU duration estimator. Same signature as _execute_workflow.
-    `progress` is a gr.Progress instance forwarded by the caller; we ignore it
-    here (estimator doesn't emit progress) but must accept it positionally so
-    ZeroGPU can call us with the same arg list it'll use for _execute_workflow.
-    Estimate = (base × preset multiplier + cold-cache buffer + per-frame VAE
-    decode time) × retry multiplier, clamped to [60s, 240s]. ZeroGPU rejects
-    durations above the server's per-call max with "ZeroGPU illegal duration"
-    (client.py:137); 240s is observed to work for Pro identity (~2 min runs
-    needed for style + lipsync detailer paths). If the server rejects values
-    in this range, the user will see a clear error and can retry.
     """
-    base = _BASE_DURATION_S.get(mode, 180)
-    mult = _PRESET_MULT.get(preset.lower(), 1.5)
     frames = _frames_from_workflow(workflow)
-    est = int((base * mult + 60 + frames * 0.3) * multiplier)
-    return max(60, min(est, 240))
 # Decorate at module load time so ZeroGPU's startup analyzer detects it.
@@ -137,14 +153,14 @@ def _execute_workflow(
     output_ids: list[str],
     mode: str,
     preset: str,
-    multiplier: float = 1.0,
     progress: Any = None,
 ) -> str:
     """Run the workflow on GPU and return the path of the first video output.
     Returns just the video path (a plain string, picklable across the
-    @spaces.GPU subprocess boundary). The `mode`, `preset`, and `multiplier`
-    args are consumed by `_duration_for` to estimate the GPU slot to reserve.
     `progress` is an optional `gr.Progress` instance. It's the only progress
     channel that crosses the @spaces.GPU subprocess boundary on HF Spaces —
@@ -384,15 +400,15 @@ class ComfyUILibraryBackend:
         workflow: dict,
         *,
         preset: str = "balanced",
-        duration_multiplier: float = 1.0,
         gpu_duration: int = 0,  # legacy, ignored (now derived from preset+frames)
         progress: Any = None,
     ) -> AsyncIterator[Any]:
         """Run a workflow end-to-end. Yields Download/Progress/Output/Error events.
-        `preset` and `duration_multiplier` flow through to the @spaces.GPU
-        duration estimator. The handler can re-call submit() with
-        duration_multiplier=2.0 if the first attempt aborts on timeout.
         """
         # Pre-flight: ensure all model files exist.
         try:
@@ -467,7 +483,7 @@ class ComfyUILibraryBackend:
                 # light calls get fast queue priority while heavy ones reserve
                 # real headroom. Off-Spaces it's a plain call.
                 video_path = _execute_workflow(
-                    self._executor, workflow, output_ids, mode, preset, duration_multiplier, progress,
                 )
                 # Fallback: if history_result didn't surface a path (rare on
                 # Spaces — happens when ZeroGPU's subprocess boundary drops

     return 121
+def _estimate_duration_unclamped(*, mode: str, preset: str, frames: int) -> int:
+    """Estimator formula minus the 60 s floor.
+    Used by the UI's pre-flight gate so it can show "this config needs ~Xs"
+    without re-implementing the constants in app.py.
+    """
+    base = _BASE_DURATION_S.get(mode, 180)
+    mult = _PRESET_MULT.get(preset.lower(), 1.5)
+    return int(base * mult + 60 + frames * 0.3)
 def _duration_for(
     executor: Any,
     workflow: dict,
     output_ids: list[str],
     mode: str,
     preset: str,
     progress: Any = None,
+    user_budget: int | None = None,
 ) -> int:
+    """ZeroGPU per-call duration. Same signature as _execute_workflow.
+    `progress` is a `gr.Progress` instance forwarded by the caller; we ignore it
+    here but must accept it so ZeroGPU calls us with the same arg list it uses
+    for `_execute_workflow`.
+    When `user_budget` is set, it overrides the estimator — the user has decided
+    how much of their ZeroGPU quota to spend on this call. Clamped to ≥ 60 s
+    (HF's documented per-call floor); no upper clamp, so the user can declare
+    up to whatever their account tier actually allows. If they exceed the
+    account cap, HF raises "ZeroGPU illegal duration" and the UI surfaces it
+    via the `illegal_duration` friendly-error category.
+    Without `user_budget`, returns the unclamped estimate (base × preset
+    multiplier + cold-cache buffer + per-frame VAE decode). The pre-flight
+    gate in app.py refuses calls whose estimate exceeds the user-chosen
+    budget — so by the time we get here, either the user opted in or there
+    was no override.
     """
+    if user_budget is not None:
+        return max(60, int(user_budget))
     frames = _frames_from_workflow(workflow)
+    return max(60, _estimate_duration_unclamped(mode=mode, preset=preset, frames=frames))
 # Decorate at module load time so ZeroGPU's startup analyzer detects it.
     output_ids: list[str],
     mode: str,
     preset: str,
     progress: Any = None,
+    user_budget: int | None = None,
 ) -> str:
     """Run the workflow on GPU and return the path of the first video output.
     Returns just the video path (a plain string, picklable across the
+    @spaces.GPU subprocess boundary). The `mode`, `preset`, and `user_budget`
+    args are consumed by `_duration_for` to set the per-call GPU slot.
     `progress` is an optional `gr.Progress` instance. It's the only progress
     channel that crosses the @spaces.GPU subprocess boundary on HF Spaces —
         workflow: dict,
         *,
         preset: str = "balanced",
+        user_budget: int | None = None,
         gpu_duration: int = 0,  # legacy, ignored (now derived from preset+frames)
         progress: Any = None,
     ) -> AsyncIterator[Any]:
         """Run a workflow end-to-end. Yields Download/Progress/Output/Error events.
+        `preset` and `user_budget` flow through to the @spaces.GPU duration
+        estimator. When `user_budget` is set the user has opted in to a
+        specific per-call GPU time cap; otherwise the estimator picks one.
         """
         # Pre-flight: ensure all model files exist.
         try:
                 # light calls get fast queue priority while heavy ones reserve
                 # real headroom. Off-Spaces it's a plain call.
                 video_path = _execute_workflow(
+                    self._executor, workflow, output_ids, mode, preset, progress, user_budget,
                 )
                 # Fallback: if history_result didn't surface a path (rare on
                 # Spaces — happens when ZeroGPU's subprocess boundary drops

tests/test_duration.py ADDED Viewed

	@@ -0,0 +1,82 @@

+"""Tests for the ZeroGPU per-call duration estimator + user-budget override."""
+import backend
+def _t2v_workflow(frames: int = 121) -> dict:
+    return {
+        "100": {
+            "class_type": "EmptyLTXVLatentVideo",
+            "inputs": {"length": frames, "width": 512, "height": 512},
+        }
+    }
+def test_duration_for_uses_user_budget_when_set() -> None:
+    # 600s should pass through verbatim — the user knows what they're spending.
+    assert (
+        backend._duration_for(
+            executor=None,
+            workflow=_t2v_workflow(),
+            output_ids=[],
+            mode="t2v",
+            preset="fast",
+            user_budget=600,
+        )
+        == 600
+    )
+def test_duration_for_clamps_user_budget_to_floor() -> None:
+    # 30s below the 60s ZeroGPU floor — clamp up, never below.
+    assert (
+        backend._duration_for(
+            executor=None,
+            workflow=_t2v_workflow(),
+            output_ids=[],
+            mode="t2v",
+            preset="fast",
+            user_budget=30,
+        )
+        == 60
+    )
+def test_duration_for_no_budget_returns_unclamped_estimate() -> None:
+    # style/quality/121 frames: 360*3 + 60 + 121*0.3 = 1176.3 -> int 1176.
+    # No upper ceiling — the whole point of the user-budget refactor.
+    result = backend._duration_for(
+        executor=None,
+        workflow=_t2v_workflow(frames=121),
+        output_ids=[],
+        mode="style",
+        preset="quality",
+    )
+    assert result == 1176
+def test_duration_for_no_budget_honours_floor() -> None:
+    # 1-frame t2v/fast: 90*1 + 60 + 0.3 = 150 -> int 150; well above floor, so
+    # this is really testing that the floor doesn't accidentally fire on real
+    # workloads. (See test_duration_for_clamps_user_budget_to_floor for the
+    # actual floor case via user_budget.)
+    result = backend._duration_for(
+        executor=None,
+        workflow=_t2v_workflow(frames=1),
+        output_ids=[],
+        mode="t2v",
+        preset="fast",
+    )
+    assert result == 150
+def test_estimate_duration_unclamped_matches_formula() -> None:
+    # Surface the formula so the pre-flight gate in app.py can show the user
+    # "needs X seconds" without re-implementing it.
+    assert backend._estimate_duration_unclamped(mode="t2v", preset="fast", frames=121) == 90 + 60 + int(121 * 0.3)
+    assert backend._estimate_duration_unclamped(mode="style", preset="quality", frames=121) == int(360 * 3.0 + 60 + 121 * 0.3)
+def test_estimate_duration_unclamped_unknown_mode_uses_default() -> None:
+    # Unknown mode -> default base 180. Preset still applies.
+    assert backend._estimate_duration_unclamped(mode="nonsense", preset="balanced", frames=100) == int(180 * 1.5 + 60 + 100 * 0.3)