Spaces:

techfreakworm
/

LTX2.3-Studio

Running on Zero

App Files Files Community

techfreakworm commited on 15 days ago

Commit

27ffbbc

unverified ·

1 Parent(s): c3b8732

feat(spaces+style): hf_oauth + gr.Progress + style 2-input + DWPose deps

Browse files

Spaces — Pro quota attribution: enable hf_oauth in README YAML and add a
gr.LoginButton in the drawer (rendered only when OAUTH_CLIENT_ID is set, so
local dev stays clean). Without OAuth the Space scheduler bucketed Pro users
as signed-in/free (300s/day), causing "GPU quota exceeded" errors after a
few generations while the profile dashboard still showed 0/25 used.

Spaces — progress bar through @spaces.GPU subprocess: thread a gr.Progress
instance from _make_handler -> _on_generate -> backend.submit -> _execute_workflow.
The existing event-based ProgressEvent flow can't cross the @spaces.GPU
subprocess boundary on Spaces (asyncio.run_coroutine_threadsafe targets the
parent loop the subprocess never inherits). gr.Progress is the only progress
channel ZeroGPU's IPC wraps. Inside _execute_workflow, install a ComfyUI
PROGRESS_BAR_HOOK that calls progress(v/t, desc=...) and chains to the
prior hook so the local event-driven status banner keeps working.

Style mode — actually use the uploaded image: previously _style_parameterize
patched only the source video; the IC-LoRA reference image stayed at the
baked-in seed (IMG-20210721-WA0008.jpg). Now patches NODE_IMAGE_1 and
exposes a "Style reference" upload alongside "Source video" in the UI;
also resets VHS_LoadVideo.skip_first_frames from the workflow's baked-in
266 to 0 so user-uploaded clips don't fail with "No frames generated".

DWPose runtime deps: comfyui_controlnet_aux's dwpose preprocessor needs
matplotlib (already in requirements), scikit-image (hand keypoint detector),
and onnxruntime (acceleration; without it falls back to OpenCV/CPU which is
much slower). All three are now declared in requirements.txt.

Files changed (5) hide show

README.md +2 -1
app.py +23 -7
backend.py +28 -1
modes.py +2 -0
requirements.txt +2 -0

README.md CHANGED Viewed

@@ -8,7 +8,8 @@ sdk_version: "5.50.0"
 app_file: app.py
 python_version: "3.11"
 suggested_hardware: zero-a10g
-hf_oauth: false
 preload_from_hub:
   - Comfy-Org/ltx-2 split_files/text_encoders/gemma_3_12B_it.safetensors
   - Kijai/LTX2.3_comfy diffusion_models/ltx-2.3-22b-dev_transformer_only_bf16.safetensors,loras/ltx-2.3-22b-distilled-lora-dynamic_fro09_avg_rank_105_bf16.safetensors,text_encoders/ltx-2.3_text_projection_bf16.safetensors,vae/LTX23_audio_vae_bf16.safetensors,vae/LTX23_video_vae_bf16.safetensors,vae/taeltx2_3.safetensors

 app_file: app.py
 python_version: "3.11"
 suggested_hardware: zero-a10g
+hf_oauth: true
+hf_oauth_expiration_minutes: 480
 preload_from_hub:
   - Comfy-Org/ltx-2 split_files/text_encoders/gemma_3_12B_it.safetensors
   - Kijai/LTX2.3_comfy diffusion_models/ltx-2.3-22b-dev_transformer_only_bf16.safetensors,loras/ltx-2.3-22b-distilled-lora-dynamic_fro09_avg_rank_105_bf16.safetensors,text_encoders/ltx-2.3_text_projection_bf16.safetensors,vae/LTX23_audio_vae_bf16.safetensors,vae/LTX23_video_vae_bf16.safetensors,vae/taeltx2_3.safetensors

app.py CHANGED Viewed

@@ -466,6 +466,13 @@ def build_app() -> gr.Blocks:
             # Drawer (drawer behaves as fixed sidebar ≥1024 px;
             # absolute-positioned overlay <1024 px — see _CUSTOM_CSS).
             with gr.Column(scale=1, min_width=200, elem_classes=["aio-drawer"]):
                 gr.Markdown("Modes", elem_classes=["aio-drawer-heading"])
                 mode_buttons = {
                     name: gr.Button(
@@ -632,6 +639,7 @@ def _render_one_mode(name: str) -> dict:
                 handles["first_frame"] = gr.Image(label="First frame", type="filepath")
                 handles["last_frame"] = gr.Image(label="Last frame", type="filepath")
             elif name == "style":
                 handles["input_video"] = gr.Video(label="Source video")
             handles["preset"] = ui.preset_bar()
@@ -751,8 +759,14 @@ def _seconds_to_frames(seconds: float, fps: int) -> int:
     return max(9, int(round(float(seconds) * float(fps) / 8) * 8) + 1)
-async def _on_generate(mode_name: str, **inputs: Any):
-    """Generate handler — async generator yielding (status_html, video_path)."""
     mode = modes.MODE_REGISTRY[mode_name]
     fps = int(inputs.get("fps", 24))
@@ -867,7 +881,9 @@ async def _on_generate(mode_name: str, **inputs: Any):
         timed_out = False
         async for event in backend.submit(
-            mode_name, workflow, preset=preset, duration_multiplier=multiplier
         ):
             if (
                 isinstance(event, backend_module.ErrorEvent)
@@ -894,7 +910,7 @@ def _input_keys_for_mode(mode_name: str, h: dict) -> list[str]:
     elif mode_name == "keyframe":
         base.extend(["first_frame", "last_frame"])
     elif mode_name == "style":
-        base.append("input_video")
     base.append("negative_prompt")
     base.extend(["camera_lora", "camera_strength", "detailer_on", "detailer_strength"])
     if h["lora"].ic_lora is not None:
@@ -918,7 +934,7 @@ def _collect_inputs_for_mode(mode_name: str, h: dict) -> list:
     elif mode_name == "keyframe":
         base.extend([h["first_frame"], h["last_frame"]])
     elif mode_name == "style":
-        base.append(h["input_video"])
     base.append(h["negative_prompt"])
     base.extend([
         h["lora"].camera_lora, h["lora"].camera_strength,
@@ -934,9 +950,9 @@ def _collect_inputs_for_mode(mode_name: str, h: dict) -> list:
 def _make_handler(mode_name: str, h: dict):
     keys = _input_keys_for_mode(mode_name, h)
-    async def handler(*values):
         kwargs = dict(zip(keys, values, strict=False))
-        async for output in _on_generate(mode_name, **kwargs):
             yield output
     return handler

             # Drawer (drawer behaves as fixed sidebar ≥1024 px;
             # absolute-positioned overlay <1024 px — see _CUSTOM_CSS).
             with gr.Column(scale=1, min_width=200, elem_classes=["aio-drawer"]):
+                if os.getenv("OAUTH_CLIENT_ID"):
+                    gr.Markdown("Account", elem_classes=["aio-drawer-heading"])
+                    gr.LoginButton(
+                        value="Sign in for Pro GPU quota",
+                        size="sm",
+                        elem_classes=["aio-login-btn"],
+                    )
                 gr.Markdown("Modes", elem_classes=["aio-drawer-heading"])
                 mode_buttons = {
                     name: gr.Button(
                 handles["first_frame"] = gr.Image(label="First frame", type="filepath")
                 handles["last_frame"] = gr.Image(label="Last frame", type="filepath")
             elif name == "style":
+                handles["image"] = gr.Image(label="Style reference", type="filepath")
                 handles["input_video"] = gr.Video(label="Source video")
             handles["preset"] = ui.preset_bar()
     return max(9, int(round(float(seconds) * float(fps) / 8) * 8) + 1)
+async def _on_generate(mode_name: str, *, progress: Any = None, **inputs: Any):
+    """Generate handler — async generator yielding (status_html, video_path).
+    `progress` is a `gr.Progress` instance injected by Gradio. It's the only
+    progress channel that survives the @spaces.GPU subprocess boundary on HF
+    Spaces; we forward it to the backend so ComfyUI's per-step counter renders
+    a real progress bar instead of a generic Gradio spinner.
+    """
     mode = modes.MODE_REGISTRY[mode_name]
     fps = int(inputs.get("fps", 24))
         timed_out = False
         async for event in backend.submit(
+            mode_name, workflow,
+            preset=preset, duration_multiplier=multiplier,
+            progress=progress,
         ):
             if (
                 isinstance(event, backend_module.ErrorEvent)
     elif mode_name == "keyframe":
         base.extend(["first_frame", "last_frame"])
     elif mode_name == "style":
+        base.extend(["image", "input_video"])
     base.append("negative_prompt")
     base.extend(["camera_lora", "camera_strength", "detailer_on", "detailer_strength"])
     if h["lora"].ic_lora is not None:
     elif mode_name == "keyframe":
         base.extend([h["first_frame"], h["last_frame"]])
     elif mode_name == "style":
+        base.extend([h["image"], h["input_video"]])
     base.append(h["negative_prompt"])
     base.extend([
         h["lora"].camera_lora, h["lora"].camera_strength,
 def _make_handler(mode_name: str, h: dict):
     keys = _input_keys_for_mode(mode_name, h)
+    async def handler(*values, progress=gr.Progress()):
         kwargs = dict(zip(keys, values, strict=False))
+        async for output in _on_generate(mode_name, progress=progress, **kwargs):
             yield output
     return handler

backend.py CHANGED Viewed

@@ -129,13 +129,39 @@ def _execute_workflow(
     mode: str,
     preset: str,
     multiplier: float = 1.0,
 ) -> str:
     """Run the workflow on GPU and return the path of the first video output.
     Returns just the video path (a plain string, picklable across the
     @spaces.GPU subprocess boundary). The `mode`, `preset`, and `multiplier`
     args are consumed by `_duration_for` to estimate the GPU slot to reserve.
     """
     executor.execute(
         workflow,
         prompt_id="ltx23-aio",
@@ -351,6 +377,7 @@ class ComfyUILibraryBackend:
         preset: str = "balanced",
         duration_multiplier: float = 1.0,
         gpu_duration: int = 0,  # legacy, ignored (now derived from preset+frames)
     ) -> AsyncIterator[Any]:
         """Run a workflow end-to-end. Yields Download/Progress/Output/Error events.
@@ -431,7 +458,7 @@ class ComfyUILibraryBackend:
                 # light calls get fast queue priority while heavy ones reserve
                 # real headroom. Off-Spaces it's a plain call.
                 video_path = _execute_workflow(
-                    self._executor, workflow, output_ids, mode, preset, duration_multiplier,
                 )
                 # Fallback: if history_result didn't surface a path (rare on
                 # Spaces — happens when ZeroGPU's subprocess boundary drops

     mode: str,
     preset: str,
     multiplier: float = 1.0,
+    progress: Any = None,
 ) -> str:
     """Run the workflow on GPU and return the path of the first video output.
     Returns just the video path (a plain string, picklable across the
     @spaces.GPU subprocess boundary). The `mode`, `preset`, and `multiplier`
     args are consumed by `_duration_for` to estimate the GPU slot to reserve.
+    `progress` is an optional `gr.Progress` instance. It's the only progress
+    channel that crosses the @spaces.GPU subprocess boundary on HF Spaces —
+    Gradio + the `spaces` library wrap it with cross-process IPC. When set,
+    we mirror ComfyUI's step counter into it via the global progress hook,
+    chaining to whatever hook was already installed (so the local event-based
+    status banner keeps working alongside).
     """
+    if progress is not None:
+        import comfy.utils as _cu
+        _saved_hook = getattr(_cu, "PROGRESS_BAR_HOOK", None)
+        def _gp_hook(value, total, _preview=None, **_kw):
+            try:
+                v, t = int(value), int(total)
+                progress(v / max(t, 1), desc=f"Sampling step {v}/{t}")
+            except Exception:
+                pass
+            if _saved_hook is not None:
+                try:
+                    _saved_hook(value, total, _preview)
+                except Exception:
+                    pass
+        _cu.set_progress_bar_global_hook(_gp_hook)
     executor.execute(
         workflow,
         prompt_id="ltx23-aio",
         preset: str = "balanced",
         duration_multiplier: float = 1.0,
         gpu_duration: int = 0,  # legacy, ignored (now derived from preset+frames)
+        progress: Any = None,
     ) -> AsyncIterator[Any]:
         """Run a workflow end-to-end. Yields Download/Progress/Output/Error events.
                 # light calls get fast queue priority while heavy ones reserve
                 # real headroom. Off-Spaces it's a plain call.
                 video_path = _execute_workflow(
+                    self._executor, workflow, output_ids, mode, preset, duration_multiplier, progress,
                 )
                 # Fallback: if history_result didn't surface a path (rare on
                 # Spaces — happens when ZeroGPU's subprocess boundary drops

modes.py CHANGED Viewed

@@ -124,7 +124,9 @@ def _keyframe_parameterize(inp: dict[str, Any]) -> list[Patch]:
 def _style_parameterize(inp: dict[str, Any]) -> list[Patch]:
     return _shared_patches(inp, "style") + [
         (NODE_VIDEO, "video", inp["input_video"]),
     ]

 def _style_parameterize(inp: dict[str, Any]) -> list[Patch]:
     return _shared_patches(inp, "style") + [
+        (NODE_IMAGE_1, "image", inp["image"]),
         (NODE_VIDEO, "video", inp["input_video"]),
+        (NODE_VIDEO, "skip_first_frames", 0),
     ]

requirements.txt CHANGED Viewed

@@ -49,6 +49,8 @@ gguf  # ComfyUI-GGUF (UnetLoaderGGUF)
 imageio_ffmpeg  # ComfyUI-VideoHelperSuite (video write/read backend)
 opencv-python  # ComfyUI_LayerStyle, multiple custom nodes
 matplotlib  # comfyui_controlnet_aux dwpose / pose preprocessors
 diffusers  # ComfyUI-SeedVR2 (used during init even when the node isn't called)
 yt-dlp  # ComfyUI-MediaMixer (init-time import)

 imageio_ffmpeg  # ComfyUI-VideoHelperSuite (video write/read backend)
 opencv-python  # ComfyUI_LayerStyle, multiple custom nodes
 matplotlib  # comfyui_controlnet_aux dwpose / pose preprocessors
+scikit-image  # comfyui_controlnet_aux dwpose hand keypoint detector
+onnxruntime  # comfyui_controlnet_aux dwpose accelerator (CPU/CoreML on Mac, CUDA on Spaces)
 diffusers  # ComfyUI-SeedVR2 (used during init even when the node isn't called)
 yt-dlp  # ComfyUI-MediaMixer (init-time import)