Spaces:

techfreakworm
/

LTX2.3-Studio

Running on Zero

App Files Files Community

techfreakworm commited on about 15 hours ago

Commit

a2c44d8

verified ·

1 Parent(s): a5c8e5e

Revert "feat(spaces): user-controlled GPU budget slider + pre-flight gate"

Browse files

This reverts commit ecc159588e928140424b9f985f7f40d5fbc03157.

Files changed (3) hide show

app.py +8 -48
backend.py +25 -41
tests/test_duration.py +0 -82

app.py CHANGED Viewed

@@ -556,18 +556,6 @@ def build_app() -> gr.Blocks:
                     )
                     for name, m in modes.MODE_REGISTRY.items()
                 }
-                # ZeroGPU per-call cap, placed right under the mode list so
-                # it's visible without scrolling. The pre-flight gate in
-                # _on_generate refuses calls whose estimate exceeds this.
-                gpu_budget_slider = gr.Slider(
-                    minimum=60,
-                    maximum=1800,
-                    value=240,
-                    step=30,
-                    label="GPU budget (seconds)",
-                    info="Max GPU time per generation. Higher = heavy modes fit; uses more of your daily quota per call.",
-                    elem_classes=["aio-gpu-budget"],
-                )
                 gr.Markdown("Models", elem_classes=["aio-drawer-heading"])
                 model_status = gr.HTML(_render_model_status_idle(), elem_id="aio-model-status")
                 refresh_btn = gr.Button("Refresh", size="sm", variant="secondary")
@@ -593,11 +581,9 @@ def build_app() -> gr.Blocks:
             with gr.Column(scale=4, elem_classes=["aio-body"]):
                 handles, tabs_component = _render_mode_panels()
-        # Wire generate buttons. The GPU-budget slider lives in the drawer and
-        # is the same instance for every mode — append it last so the handler
-        # receives it as `gpu_budget` (see `_input_keys_for_mode`).
         for name, h in handles.items():
-            inputs = _collect_inputs_for_mode(name, h) + [gpu_budget_slider]
             h["generate_btn"].click(
                 fn=_make_handler(name, h),
                 inputs=inputs,
@@ -874,9 +860,8 @@ PRESET_DURATION = {"Fast": 60, "Balanced": 120, "Quality": 300}
 _FRIENDLY_ERRORS: dict[str, tuple[str, str]] = {
     "gpu_timeout": (
         "Hit the GPU time limit",
-        "This run took longer than the GPU budget. Raise the GPU-budget "
-        "slider (in the sidebar), or try the Fast preset / a shorter video, "
-        "then click Generate again.",
     ),
     "expired_token": (
         "Session timed out",
@@ -884,10 +869,9 @@ _FRIENDLY_ERRORS: dict[str, tuple[str, str]] = {
         "you'll keep your spot in the GPU queue.",
     ),
     "illegal_duration": (
-        "GPU budget too high for your account",
-        "HF rejected the requested duration as exceeding your account's "
-        "per-call cap. Lower the GPU-budget slider (sidebar) and try again, "
-        "or drop the preset / shorten the video.",
     ),
     "unlogged": (
         "Sign-in not detected",
@@ -1014,29 +998,6 @@ async def _on_generate(mode_name: str, *, progress: Any = None, **inputs: Any):
     backend = _get_backend()
     preset = params["preset"]  # already lowercased above
-    # Pre-flight gate: refuse to submit if the estimator says this config
-    # needs more GPU time than the user has allocated. ZeroGPU charges actual
-    # usage, not declared duration, so under-allocating means the call still
-    # burns quota before timing out. Refuse here and tell the user to either
-    # bump the GPU-budget slider or reduce frames/preset.
-    user_budget: int | None = None
-    if "gpu_budget" in inputs and inputs["gpu_budget"] is not None:
-        user_budget = int(inputs["gpu_budget"])
-        estimate = backend_module._estimate_duration_unclamped(
-            mode=mode_name, preset=preset, frames=frames,
-        )
-        if estimate > user_budget:
-            yield (
-                f'<div class="status-card status-error">'
-                f'  <div class="status-row"><span class="status-stage">GPU budget too low</span></div>'
-                f"  <div>This config estimates ~{estimate}s of GPU time, but the "
-                f"GPU-budget slider is set to {user_budget}s. Raise the slider, drop "
-                f"the preset to Fast or Balanced, or reduce the duration / frame count.</div>"
-                f"</div>",
-                gr.update(),
-            )
-            return
     async def _translate(event, started_at):
         """Translate one backend event into Gradio (status_html, video) yields.
@@ -1089,7 +1050,7 @@ async def _on_generate(mode_name: str, *, progress: Any = None, **inputs: Any):
     started = time.time()
     async for event in backend.submit(
         mode_name, workflow,
-        preset=preset, user_budget=user_budget,
         progress=progress,
     ):
         translated = await _translate(event, started)
@@ -1115,7 +1076,6 @@ def _input_keys_for_mode(mode_name: str, h: dict) -> list[str]:
         base.extend(["ic_lora", "ic_strength"])
     if h["lora"].pose_on is not None:
         base.append("pose_on")
-    base.append("gpu_budget")  # appended by build_app() from the global slider
     return base

                     )
                     for name, m in modes.MODE_REGISTRY.items()
                 }
                 gr.Markdown("Models", elem_classes=["aio-drawer-heading"])
                 model_status = gr.HTML(_render_model_status_idle(), elem_id="aio-model-status")
                 refresh_btn = gr.Button("Refresh", size="sm", variant="secondary")
             with gr.Column(scale=4, elem_classes=["aio-body"]):
                 handles, tabs_component = _render_mode_panels()
+        # Wire generate buttons
         for name, h in handles.items():
+            inputs = _collect_inputs_for_mode(name, h)
             h["generate_btn"].click(
                 fn=_make_handler(name, h),
                 inputs=inputs,
 _FRIENDLY_ERRORS: dict[str, tuple[str, str]] = {
     "gpu_timeout": (
         "Hit the GPU time limit",
+        "This run took longer than the GPU budget. Try the Fast preset, a "
+        "shorter video, or a smaller resolution — then click Generate again.",
     ),
     "expired_token": (
         "Session timed out",
         "you'll keep your spot in the GPU queue.",
     ),
     "illegal_duration": (
+        "GPU budget too high",
+        "The estimator asked for more GPU time than the server allows. "
+        "Try Fast preset or a shorter video.",
     ),
     "unlogged": (
         "Sign-in not detected",
     backend = _get_backend()
     preset = params["preset"]  # already lowercased above
     async def _translate(event, started_at):
         """Translate one backend event into Gradio (status_html, video) yields.
     started = time.time()
     async for event in backend.submit(
         mode_name, workflow,
+        preset=preset, duration_multiplier=1.0,
         progress=progress,
     ):
         translated = await _translate(event, started)
         base.extend(["ic_lora", "ic_strength"])
     if h["lora"].pose_on is not None:
         base.append("pose_on")
     return base

backend.py CHANGED Viewed

@@ -93,49 +93,33 @@ def _frames_from_workflow(workflow: dict) -> int:
     return 121
-def _estimate_duration_unclamped(*, mode: str, preset: str, frames: int) -> int:
-    """Estimator formula minus the 60 s floor.
-    Used by the UI's pre-flight gate so it can show "this config needs ~Xs"
-    without re-implementing the constants in app.py.
-    """
-    base = _BASE_DURATION_S.get(mode, 180)
-    mult = _PRESET_MULT.get(preset.lower(), 1.5)
-    return int(base * mult + 60 + frames * 0.3)
 def _duration_for(
     executor: Any,
     workflow: dict,
     output_ids: list[str],
     mode: str,
     preset: str,
     progress: Any = None,
-    user_budget: int | None = None,
 ) -> int:
-    """ZeroGPU per-call duration. Same signature as _execute_workflow.
-    `progress` is a `gr.Progress` instance forwarded by the caller; we ignore it
-    here but must accept it so ZeroGPU calls us with the same arg list it uses
-    for `_execute_workflow`.
-    When `user_budget` is set, it overrides the estimator — the user has decided
-    how much of their ZeroGPU quota to spend on this call. Clamped to ≥ 60 s
-    (HF's documented per-call floor); no upper clamp, so the user can declare
-    up to whatever their account tier actually allows. If they exceed the
-    account cap, HF raises "ZeroGPU illegal duration" and the UI surfaces it
-    via the `illegal_duration` friendly-error category.
-    Without `user_budget`, returns the unclamped estimate (base × preset
-    multiplier + cold-cache buffer + per-frame VAE decode). The pre-flight
-    gate in app.py refuses calls whose estimate exceeds the user-chosen
-    budget — so by the time we get here, either the user opted in or there
-    was no override.
     """
-    if user_budget is not None:
-        return max(60, int(user_budget))
     frames = _frames_from_workflow(workflow)
-    return max(60, _estimate_duration_unclamped(mode=mode, preset=preset, frames=frames))
 # Decorate at module load time so ZeroGPU's startup analyzer detects it.
@@ -153,14 +137,14 @@ def _execute_workflow(
     output_ids: list[str],
     mode: str,
     preset: str,
     progress: Any = None,
-    user_budget: int | None = None,
 ) -> str:
     """Run the workflow on GPU and return the path of the first video output.
     Returns just the video path (a plain string, picklable across the
-    @spaces.GPU subprocess boundary). The `mode`, `preset`, and `user_budget`
-    args are consumed by `_duration_for` to set the per-call GPU slot.
     `progress` is an optional `gr.Progress` instance. It's the only progress
     channel that crosses the @spaces.GPU subprocess boundary on HF Spaces —
@@ -400,15 +384,15 @@ class ComfyUILibraryBackend:
         workflow: dict,
         *,
         preset: str = "balanced",
-        user_budget: int | None = None,
         gpu_duration: int = 0,  # legacy, ignored (now derived from preset+frames)
         progress: Any = None,
     ) -> AsyncIterator[Any]:
         """Run a workflow end-to-end. Yields Download/Progress/Output/Error events.
-        `preset` and `user_budget` flow through to the @spaces.GPU duration
-        estimator. When `user_budget` is set the user has opted in to a
-        specific per-call GPU time cap; otherwise the estimator picks one.
         """
         # Pre-flight: ensure all model files exist.
         try:
@@ -483,7 +467,7 @@ class ComfyUILibraryBackend:
                 # light calls get fast queue priority while heavy ones reserve
                 # real headroom. Off-Spaces it's a plain call.
                 video_path = _execute_workflow(
-                    self._executor, workflow, output_ids, mode, preset, progress, user_budget,
                 )
                 # Fallback: if history_result didn't surface a path (rare on
                 # Spaces — happens when ZeroGPU's subprocess boundary drops

     return 121
 def _duration_for(
     executor: Any,
     workflow: dict,
     output_ids: list[str],
     mode: str,
     preset: str,
+    multiplier: float = 1.0,
     progress: Any = None,
 ) -> int:
+    """ZeroGPU duration estimator. Same signature as _execute_workflow.
+    `progress` is a gr.Progress instance forwarded by the caller; we ignore it
+    here (estimator doesn't emit progress) but must accept it positionally so
+    ZeroGPU can call us with the same arg list it'll use for _execute_workflow.
+    Estimate = (base × preset multiplier + cold-cache buffer + per-frame VAE
+    decode time) × retry multiplier, clamped to [60s, 240s]. ZeroGPU rejects
+    durations above the server's per-call max with "ZeroGPU illegal duration"
+    (client.py:137); 240s is observed to work for Pro identity (~2 min runs
+    needed for style + lipsync detailer paths). If the server rejects values
+    in this range, the user will see a clear error and can retry.
     """
+    base = _BASE_DURATION_S.get(mode, 180)
+    mult = _PRESET_MULT.get(preset.lower(), 1.5)
     frames = _frames_from_workflow(workflow)
+    est = int((base * mult + 60 + frames * 0.3) * multiplier)
+    return max(60, min(est, 240))
 # Decorate at module load time so ZeroGPU's startup analyzer detects it.
     output_ids: list[str],
     mode: str,
     preset: str,
+    multiplier: float = 1.0,
     progress: Any = None,
 ) -> str:
     """Run the workflow on GPU and return the path of the first video output.
     Returns just the video path (a plain string, picklable across the
+    @spaces.GPU subprocess boundary). The `mode`, `preset`, and `multiplier`
+    args are consumed by `_duration_for` to estimate the GPU slot to reserve.
     `progress` is an optional `gr.Progress` instance. It's the only progress
     channel that crosses the @spaces.GPU subprocess boundary on HF Spaces —
         workflow: dict,
         *,
         preset: str = "balanced",
+        duration_multiplier: float = 1.0,
         gpu_duration: int = 0,  # legacy, ignored (now derived from preset+frames)
         progress: Any = None,
     ) -> AsyncIterator[Any]:
         """Run a workflow end-to-end. Yields Download/Progress/Output/Error events.
+        `preset` and `duration_multiplier` flow through to the @spaces.GPU
+        duration estimator. The handler can re-call submit() with
+        duration_multiplier=2.0 if the first attempt aborts on timeout.
         """
         # Pre-flight: ensure all model files exist.
         try:
                 # light calls get fast queue priority while heavy ones reserve
                 # real headroom. Off-Spaces it's a plain call.
                 video_path = _execute_workflow(
+                    self._executor, workflow, output_ids, mode, preset, duration_multiplier, progress,
                 )
                 # Fallback: if history_result didn't surface a path (rare on
                 # Spaces — happens when ZeroGPU's subprocess boundary drops

tests/test_duration.py DELETED Viewed

@@ -1,82 +0,0 @@
-"""Tests for the ZeroGPU per-call duration estimator + user-budget override."""
-import backend
-def _t2v_workflow(frames: int = 121) -> dict:
-    return {
-        "100": {
-            "class_type": "EmptyLTXVLatentVideo",
-            "inputs": {"length": frames, "width": 512, "height": 512},
-        }
-    }
-def test_duration_for_uses_user_budget_when_set() -> None:
-    # 600s should pass through verbatim — the user knows what they're spending.
-    assert (
-        backend._duration_for(
-            executor=None,
-            workflow=_t2v_workflow(),
-            output_ids=[],
-            mode="t2v",
-            preset="fast",
-            user_budget=600,
-        )
-        == 600
-    )
-def test_duration_for_clamps_user_budget_to_floor() -> None:
-    # 30s below the 60s ZeroGPU floor — clamp up, never below.
-    assert (
-        backend._duration_for(
-            executor=None,
-            workflow=_t2v_workflow(),
-            output_ids=[],
-            mode="t2v",
-            preset="fast",
-            user_budget=30,
-        )
-        == 60
-    )
-def test_duration_for_no_budget_returns_unclamped_estimate() -> None:
-    # style/quality/121 frames: 360*3 + 60 + 121*0.3 = 1176.3 -> int 1176.
-    # No upper ceiling — the whole point of the user-budget refactor.
-    result = backend._duration_for(
-        executor=None,
-        workflow=_t2v_workflow(frames=121),
-        output_ids=[],
-        mode="style",
-        preset="quality",
-    )
-    assert result == 1176
-def test_duration_for_no_budget_honours_floor() -> None:
-    # 1-frame t2v/fast: 90*1 + 60 + 0.3 = 150 -> int 150; well above floor, so
-    # this is really testing that the floor doesn't accidentally fire on real
-    # workloads. (See test_duration_for_clamps_user_budget_to_floor for the
-    # actual floor case via user_budget.)
-    result = backend._duration_for(
-        executor=None,
-        workflow=_t2v_workflow(frames=1),
-        output_ids=[],
-        mode="t2v",
-        preset="fast",
-    )
-    assert result == 150
-def test_estimate_duration_unclamped_matches_formula() -> None:
-    # Surface the formula so the pre-flight gate in app.py can show the user
-    # "needs X seconds" without re-implementing it.
-    assert backend._estimate_duration_unclamped(mode="t2v", preset="fast", frames=121) == 90 + 60 + int(121 * 0.3)
-    assert backend._estimate_duration_unclamped(mode="style", preset="quality", frames=121) == int(360 * 3.0 + 60 + 121 * 0.3)
-def test_estimate_duration_unclamped_unknown_mode_uses_default() -> None:
-    # Unknown mode -> default base 180. Preset still applies.
-    assert backend._estimate_duration_unclamped(mode="nonsense", preset="balanced", frames=100) == int(180 * 1.5 + 60 + 100 * 0.3)