Spaces:

techfreakworm
/

LTX2.3-Studio

Running on Zero

App Files Files Community

techfreakworm commited on 19 days ago

Commit

bce3dbc

unverified ·

1 Parent(s): 107f495

feat: API-format workflows + sidebar nav + responsive UI

Browse files

Switch to ComfyUI API-format JSON for all 6 modes (saved from editor's
"Save API Format"), avoiding the editor->API converter. workflow.py is now
just load_template + set_input over a flat {id: {class_type, inputs}} dict.
modes.py shrinks to ~10 patches per mode (prompt, dimensions, seed, media
inputs).

backend.py wires PromptExecutor with cache_args, server stub, async
nodes.init_extra_nodes via worker thread, sys.path ordering so KJNodes/VHS
find utils.install_util, and a per-sampler stage counter.

app.py: sidebar nav (gr.Tabs hidden offscreen, sidebar buttons drive
gr.Tabs.selected); device/memory badge; preset budget per mode;
seconds-driven length slider with derived frames display; W/H sliders to
4096; randomize-seed toggle; responsive CSS for tablet (≤1024px) and
mobile (≤700px).

Files changed (10) hide show

app.py +295 -67
backend.py +155 -9
modes.py +70 -153
workflow.py +22 -60
workflows/a2v.json +0 -0
workflows/i2v.json +0 -0
workflows/keyframe.json +0 -0
workflows/lipsync.json +0 -0
workflows/style.json +0 -0
workflows/t2v.json +0 -0

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ from __future__ import annotations
 import os
 import pathlib
 import sys
 import time
 from typing import Any
@@ -26,8 +27,6 @@ def _on_spaces() -> bool:
 COMFYUI_REPO = "https://github.com/comfyanonymous/ComfyUI.git"
-# Pinned to the same commit the local git submodule uses (set in Task 5).
-# Override via env var only when intentionally testing a different ComfyUI version.
 COMFYUI_COMMIT = os.environ.get(
     "LTX23_AIO_COMFYUI_COMMIT",
     "eb0686bbb60c83e44c3a3e4f7defd0f589cfef10",
@@ -58,7 +57,6 @@ def _bootstrap() -> None:
         for node_url, node_ref in CUSTOM_NODES_PINNED:
             name = node_url.rstrip(".git").rsplit("/", 1)[-1]
             _git_clone(node_url, comfy_dir / "custom_nodes" / name, ref=node_ref)
-        # Install custom node deps
         import subprocess
         for cn in (comfy_dir / "custom_nodes").iterdir():
@@ -78,29 +76,107 @@ _bootstrap()
 # ---------------------------------------------------------------------------
-# Gradio app
 # ---------------------------------------------------------------------------
 _CUSTOM_CSS = """
 .status-card { padding: 14px 16px; border-radius: 10px; background: rgba(255,255,255,0.04); border: 1px solid rgba(255,255,255,0.08); }
-.status-row { display: flex; gap: 14px; align-items: center; margin-bottom: 8px; }
 .status-stage { font-weight: 600; }
 .status-meta { font-size: 12px; opacity: 0.75; }
 .status-bar { height: 6px; background: rgba(255,255,255,0.08); border-radius: 99px; overflow: hidden; }
 .status-fill { height: 100%; background: linear-gradient(90deg,#6ea8fe,#8de9fe); transition: width .3s; }
 .status-mem { font-size: 11px; opacity: 0.6; margin-top: 6px; font-family: ui-monospace, monospace; }
 """
 def build_app() -> gr.Blocks:
     with gr.Blocks(theme=gr.themes.Soft(), title="LTX 2.3 All-in-One", css=_CUSTOM_CSS) as app:
         gr.Markdown("# ⚡ LTX 2.3 All-in-One")
-        with gr.Row():
-            with gr.Column(scale=1, min_width=200):
-                _render_sidebar()
-            with gr.Column(scale=4):
-                handles = _render_mode_panels()
         for name, h in handles.items():
             inputs = _collect_inputs_for_mode(name, h)
             h["generate_btn"].click(
@@ -108,25 +184,99 @@ def build_app() -> gr.Blocks:
                 inputs=inputs,
                 outputs=[h["status"], h["video_out"]],
             )
     return app
-def _render_sidebar() -> None:
-    gr.Markdown("### Modes")
-    for mode in modes.MODE_REGISTRY.values():
-        gr.Markdown(f"- {mode.icon} {mode.label}")
-    gr.Markdown("---\n### Models")
-    gr.Button("Unload all models", variant="secondary")
-def _render_mode_panels() -> dict[str, dict]:
-    """Render one form per mode. Returns the component handles keyed by mode."""
     handles: dict[str, dict] = {}
-    with gr.Tabs():
         for name, mode in modes.MODE_REGISTRY.items():
-            with gr.Tab(label=f"{mode.icon} {mode.label}"):
                 handles[name] = _render_one_mode(name)
-    return handles
 def _render_one_mode(name: str) -> dict:
@@ -134,7 +284,7 @@ def _render_one_mode(name: str) -> dict:
     handles: dict = {"mode": name}
     with gr.Row():
-        with gr.Column(scale=2):
             handles["prompt"] = gr.Textbox(
                 label="Prompt", lines=4, placeholder="Describe the shot..."
             )
@@ -154,13 +304,31 @@ def _render_one_mode(name: str) -> dict:
                 handles["input_video"] = gr.Video(label="Source video")
             handles["preset"] = ui.preset_bar()
             with gr.Row():
-                handles["width"] = gr.Slider(256, 1280, value=512, step=32, label="Width")
-                handles["height"] = gr.Slider(256, 1280, value=768, step=32, label="Height")
             with gr.Row():
-                handles["frames"] = gr.Slider(9, 121, value=81, step=8, label="Frames (8k+1)")
                 handles["fps"] = gr.Slider(8, 30, value=24, step=1, label="FPS")
-            handles["seed"] = gr.Number(label="Seed", value=42, precision=0)
             with gr.Accordion("Advanced ▾", open=False):
                 handles["lora"] = ui.lora_chrome(name)
@@ -168,7 +336,23 @@ def _render_one_mode(name: str) -> dict:
             handles["generate_btn"] = gr.Button("▶ Generate", variant="primary", size="lg")
-        with gr.Column(scale=2):
             handles["status"] = ui.status_banner()
             handles["video_out"] = gr.Video(label="Output", autoplay=True)
             handles["history"] = gr.Markdown("")
@@ -176,6 +360,10 @@ def _render_one_mode(name: str) -> dict:
     return handles
 _BACKEND: backend_module.ComfyUILibraryBackend | None = None
@@ -186,51 +374,92 @@ def _get_backend() -> backend_module.ComfyUILibraryBackend:
     return _BACKEND
 PRESET_DURATION = {"Fast": 60, "Balanced": 120, "Quality": 300}
 async def _on_generate(mode_name: str, **inputs: Any):
     """Generate handler — async generator yielding (status_html, video_path)."""
     mode = modes.MODE_REGISTRY[mode_name]
-    # Translate UI inputs into the parameterize_fn input dict.
     params: dict[str, Any] = {
         "prompt": inputs.get("prompt", ""),
         "negative_prompt": inputs.get("negative_prompt", ""),
-        "preset": inputs.get("preset", "Balanced").lower(),
         "width": int(inputs.get("width", 512)),
         "height": int(inputs.get("height", 768)),
-        "frames": int(inputs.get("frames", 81)),
-        "fps": int(inputs.get("fps", 24)),
-        "seed": int(inputs.get("seed", 42)),
     }
     for k in (
-        "image",
-        "audio",
-        "first_frame",
-        "last_frame",
-        "input_video",
-        "camera_lora",
-        "camera_strength",
-        "detailer_on",
-        "detailer_strength",
-        "ic_lora",
-        "ic_strength",
-        "pose_on",
-        "audio_cfg",
-        "image_strength",
     ):
         if k in inputs:
             params[k] = inputs[k]
     patches = mode.parameterize_fn(params)
     workflow = wf_module.load_template(mode_name)
     for patch in patches:
         wf_module.set_input(workflow, *patch)
-    wf_module.validate(workflow)
     backend = _get_backend()
-    duration = PRESET_DURATION.get(inputs.get("preset", "Balanced"), 120)
     started = time.time()
     async for event in backend.submit(mode_name, workflow, gpu_duration=duration):
@@ -246,15 +475,15 @@ async def _on_generate(mode_name: str, **inputs: Any):
             )
             yield status, gr.update()
         elif isinstance(event, backend_module.ProgressEvent):
-            stage = (
-                mode.stage_map[event.stage]
-                if event.stage < len(mode.stage_map)
-                else mode.stage_map[-1]
-            )
             eta = (elapsed / max(event.step, 1)) * (event.total_steps - event.step)
             status = ui.render_status(
-                stage_index=event.stage + 1,
-                stage_label=stage.label,
                 step=event.step,
                 total_steps=event.total_steps,
                 elapsed_s=elapsed,
@@ -262,7 +491,8 @@ async def _on_generate(mode_name: str, **inputs: Any):
             )
             yield status, gr.update()
         elif isinstance(event, backend_module.OutputEvent):
-            yield ui._render_idle(), event.video_path
         elif isinstance(event, backend_module.ErrorEvent):
             error_html = (
                 f'<div class="status-card status-error">'
@@ -274,7 +504,7 @@ async def _on_generate(mode_name: str, **inputs: Any):
 def _input_keys_for_mode(mode_name: str, h: dict) -> list[str]:
-    base = ["prompt", "preset", "width", "height", "frames", "fps", "seed"]
     if mode_name == "i2v":
         base.append("image")
     elif mode_name == "a2v":
@@ -295,8 +525,10 @@ def _input_keys_for_mode(mode_name: str, h: dict) -> list[str]:
 def _collect_inputs_for_mode(mode_name: str, h: dict) -> list:
-    """Gather the gr.Component handles to pass into _on_generate."""
-    base = [h["prompt"], h["preset"], h["width"], h["height"], h["frames"], h["fps"], h["seed"]]
     if mode_name == "i2v":
         base.append(h["image"])
     elif mode_name == "a2v":
@@ -308,14 +540,10 @@ def _collect_inputs_for_mode(mode_name: str, h: dict) -> list:
     elif mode_name == "style":
         base.append(h["input_video"])
     base.append(h["negative_prompt"])
-    base.extend(
-        [
-            h["lora"].camera_lora,
-            h["lora"].camera_strength,
-            h["lora"].detailer_on,
-            h["lora"].detailer_strength,
-        ]
-    )
     if h["lora"].ic_lora is not None:
         base.extend([h["lora"].ic_lora, h["lora"].ic_strength])
     if h["lora"].pose_on is not None:

 import os
 import pathlib
+import random
 import sys
 import time
 from typing import Any
 COMFYUI_REPO = "https://github.com/comfyanonymous/ComfyUI.git"
 COMFYUI_COMMIT = os.environ.get(
     "LTX23_AIO_COMFYUI_COMMIT",
     "eb0686bbb60c83e44c3a3e4f7defd0f589cfef10",
         for node_url, node_ref in CUSTOM_NODES_PINNED:
             name = node_url.rstrip(".git").rsplit("/", 1)[-1]
             _git_clone(node_url, comfy_dir / "custom_nodes" / name, ref=node_ref)
         import subprocess
         for cn in (comfy_dir / "custom_nodes").iterdir():
 # ---------------------------------------------------------------------------
+# Styling: hide the default top tab strip (sidebar drives selection),
+# add status-card styling, plus responsive breakpoints (≤1024px tablet,
+# ≤700px mobile).
 # ---------------------------------------------------------------------------
 _CUSTOM_CSS = """
+/* Hide the top tab strip from view, but keep it in the DOM and clickable so
+   the sidebar buttons can drive selection via programmatic click. */
+.aio-tabs > .tab-nav,
+.aio-tabs > div:first-child[role="tablist"],
+.aio-tabs > div:first-child:has([role="tab"]) {
+    position: absolute !important;
+    left: -99999px !important;
+    top: -99999px !important;
+    height: 0 !important;
+    overflow: hidden !important;
+    visibility: visible !important;
+    pointer-events: auto !important;
+}
+/* Sidebar nav buttons */
+.aio-mode-btn { width: 100%; text-align: left; margin: 2px 0; }
+.aio-mode-btn-active { background: rgba(110,168,254,0.15) !important; border-left: 3px solid #6ea8fe !important; }
+/* Sidebar headings */
+.aio-sidebar-heading { font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em; opacity: 0.6; margin-top: 16px !important; margin-bottom: 4px !important; }
+/* Status banner */
 .status-card { padding: 14px 16px; border-radius: 10px; background: rgba(255,255,255,0.04); border: 1px solid rgba(255,255,255,0.08); }
+.status-row { display: flex; gap: 14px; align-items: center; margin-bottom: 8px; flex-wrap: wrap; }
 .status-stage { font-weight: 600; }
 .status-meta { font-size: 12px; opacity: 0.75; }
 .status-bar { height: 6px; background: rgba(255,255,255,0.08); border-radius: 99px; overflow: hidden; }
 .status-fill { height: 100%; background: linear-gradient(90deg,#6ea8fe,#8de9fe); transition: width .3s; }
 .status-mem { font-size: 11px; opacity: 0.6; margin-top: 6px; font-family: ui-monospace, monospace; }
+.status-error { background: rgba(255,90,90,0.08); border-color: rgba(255,90,90,0.25); }
+/* Model status badge */
+.aio-model-badge { padding: 8px 10px; border-radius: 8px; background: rgba(255,255,255,0.04); font-size: 11.5px; font-family: ui-monospace, monospace; opacity: 0.85; }
+/* Responsive: tablet */
+@media (max-width: 1024px) {
+    .aio-sidebar { min-width: 160px !important; }
+    .aio-mode-btn { font-size: 13px !important; padding: 6px 10px !important; }
+}
+/* Responsive: mobile — sidebar collapses to top, single column body */
+@media (max-width: 700px) {
+    .aio-shell { flex-direction: column !important; }
+    .aio-sidebar { width: 100% !important; min-width: unset !important; padding: 0 !important; }
+    .aio-body { width: 100% !important; }
+    .aio-mode-btn-row { display: grid !important; grid-template-columns: repeat(2, 1fr) !important; gap: 6px !important; padding: 8px !important; }
+    .aio-mode-btn { width: 100% !important; font-size: 12.5px !important; padding: 8px !important; text-align: center !important; margin: 0 !important; }
+    .aio-sidebar-heading { font-size: 10px !important; margin: 12px 0 4px !important; padding: 0 8px !important; }
+    .aio-model-badge { margin: 0 8px !important; word-break: break-word; white-space: normal !important; }
+    /* sliders + side-by-side rows: stack vertically on mobile so each value
+       gets its own width budget */
+    .aio-body .form > div, .aio-body [class*="row"] > div { flex: 1 1 100% !important; min-width: 0 !important; }
+    .aio-body [class*="row"] { flex-wrap: wrap !important; }
+}
 """
+# ---------------------------------------------------------------------------
+# UI
+# ---------------------------------------------------------------------------
 def build_app() -> gr.Blocks:
     with gr.Blocks(theme=gr.themes.Soft(), title="LTX 2.3 All-in-One", css=_CUSTOM_CSS) as app:
         gr.Markdown("# ⚡ LTX 2.3 All-in-One")
+        with gr.Row(elem_classes=["aio-shell"]):
+            # Sidebar
+            with gr.Column(scale=1, min_width=200, elem_classes=["aio-sidebar"]):
+                gr.Markdown("**Modes**", elem_classes=["aio-sidebar-heading"])
+                with gr.Column(elem_classes=["aio-mode-btn-row"]):
+                    mode_buttons = {
+                        name: gr.Button(
+                            f"{m.icon}  {m.label}",
+                            elem_classes=["aio-mode-btn"],
+                            variant="secondary",
+                        )
+                        for name, m in modes.MODE_REGISTRY.items()
+                    }
+                gr.Markdown("**Models**", elem_classes=["aio-sidebar-heading"])
+                model_status = gr.HTML(_render_model_status_idle(), elem_id="aio-model-status")
+                refresh_btn = gr.Button("Refresh", size="sm", variant="secondary")
+                unload_btn = gr.Button("Unload all models", size="sm", variant="secondary")
+                gr.Markdown("**Settings**", elem_classes=["aio-sidebar-heading"])
+                gr.Markdown(
+                    "Output: `comfyui/output/LTX2.3/`<br>"
+                    "Set `LTX23_AIO_VRAM=lowvram|normalvram|highvram` to override the auto-detected VRAM tier.",
+                    elem_classes=["aio-model-badge"],
+                )
+            # Body
+            with gr.Column(scale=4, elem_classes=["aio-body"]):
+                handles, tabs_component = _render_mode_panels()
+        # Wire generate buttons
         for name, h in handles.items():
             inputs = _collect_inputs_for_mode(name, h)
             h["generate_btn"].click(
                 inputs=inputs,
                 outputs=[h["status"], h["video_out"]],
             )
+        # Sidebar mode buttons drive Tabs.selected via Gradio's update.
+        for name, btn in mode_buttons.items():
+            btn.click(
+                fn=lambda mode_id=name: gr.Tabs(selected=mode_id),
+                inputs=None,
+                outputs=[tabs_component],
+            )
+        # Sidebar model info wiring
+        refresh_btn.click(fn=_render_model_status, inputs=None, outputs=[model_status])
+        unload_btn.click(fn=_unload_models, inputs=None, outputs=[model_status])
     return app
+def _render_model_status_idle() -> str:
+    return (
+        '<div class="aio-model-badge">device: detecting…<br>'
+        "loaded: —<br>free: —</div>"
+    )
+def _render_model_status() -> str:
+    """Best-effort device + memory readout for the sidebar."""
+    try:
+        be = _get_backend()  # ensure ComfyUI is loaded
+    except Exception as exc:
+        return f'<div class="aio-model-badge">backend not ready<br>{exc}</div>'
+    try:
+        import comfy.model_management as mm
+        import torch
+        device = mm.get_torch_device()
+        free_gb = mm.get_free_memory(device) / (1024**3)
+        if torch.backends.mps.is_available():
+            # MPS unified memory: total physical = total system RAM. The
+            # "recommended max" from torch.mps is a soft cap (~75% of total)
+            # used by the allocator, but actual free can exceed it because
+            # macOS shares RAM between CPU and GPU.
+            try:
+                import psutil
+                total_gb = psutil.virtual_memory().total / (1024**3)
+            except Exception:
+                total_gb = torch.mps.recommended_max_memory() / (1024**3)
+            cap_gb = torch.mps.recommended_max_memory() / (1024**3)
+            label = "MPS (unified)"
+            extra = f"<br>mps cap: {cap_gb:.1f} GB"
+        elif torch.cuda.is_available():
+            total_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3)
+            label = "CUDA"
+            extra = ""
+        else:
+            total_gb = 0.0
+            label = "CPU"
+            extra = ""
+        loaded = len(getattr(mm, "current_loaded_models", []))
+        return (
+            '<div class="aio-model-badge">'
+            f"device: {label}<br>"
+            f"loaded: {loaded} model(s)<br>"
+            f"free: {free_gb:.1f} GB / {total_gb:.1f} GB total"
+            f"{extra}"
+            "</div>"
+        )
+    except Exception as exc:
+        return f'<div class="aio-model-badge">memory probe failed: {exc}</div>'
+def _unload_models() -> str:
+    try:
+        import comfy.model_management as mm
+        import torch
+        mm.unload_all_models()
+        if torch.backends.mps.is_available():
+            torch.mps.empty_cache()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+    except Exception as exc:
+        return f'<div class="aio-model-badge">unload failed: {exc}</div>'
+    return _render_model_status()
+def _render_mode_panels() -> tuple[dict[str, dict], gr.Tabs]:
+    """Render one (hidden-tab) panel per mode. Returns the component handles + the Tabs component."""
     handles: dict[str, dict] = {}
+    with gr.Tabs(elem_classes=["aio-tabs"]) as tabs:
         for name, mode in modes.MODE_REGISTRY.items():
+            with gr.Tab(label=f"{mode.icon}  {mode.label}", id=name):
                 handles[name] = _render_one_mode(name)
+    return handles, tabs
 def _render_one_mode(name: str) -> dict:
     handles: dict = {"mode": name}
     with gr.Row():
+        with gr.Column(scale=2, min_width=280):
             handles["prompt"] = gr.Textbox(
                 label="Prompt", lines=4, placeholder="Describe the shot..."
             )
                 handles["input_video"] = gr.Video(label="Source video")
             handles["preset"] = ui.preset_bar()
+            # Resolution — up to 4K, /32 step
             with gr.Row():
+                handles["width"] = gr.Slider(
+                    256, 4096, value=512, step=32, label="Width"
+                )
+                handles["height"] = gr.Slider(
+                    256, 4096, value=768, step=32, label="Height"
+                )
+            # Length controlled in seconds (matches the master workflow's mxSlider).
+            # Frames are derived: frames = round(seconds * fps / 8) * 8 + 1.
             with gr.Row():
+                handles["seconds"] = gr.Slider(
+                    minimum=1, maximum=30, value=3, step=1,
+                    label="Length (seconds)",
+                    info="Frames are computed as 8·round(seconds·fps/8)+1 (LTX requires 8k+1)",
+                )
                 handles["fps"] = gr.Slider(8, 30, value=24, step=1, label="FPS")
+            handles["frames_display"] = gr.Markdown("Frames: 73", elem_classes=["aio-frames-display"])
+            with gr.Row():
+                handles["seed"] = gr.Number(label="Seed", value=42, precision=0, minimum=0)
+                handles["randomize_seed"] = gr.Checkbox(label="Randomize seed each run", value=True)
             with gr.Accordion("Advanced ▾", open=False):
                 handles["lora"] = ui.lora_chrome(name)
             handles["generate_btn"] = gr.Button("▶ Generate", variant="primary", size="lg")
+            # Live frames-display update when seconds/fps change
+            def _update_frames(seconds, fps):
+                f = max(9, int(round(float(seconds) * float(fps) / 8) * 8) + 1)
+                return f"**Frames:** {f}  (`{seconds}s` × `{fps} fps`)"
+            handles["seconds"].change(
+                fn=_update_frames,
+                inputs=[handles["seconds"], handles["fps"]],
+                outputs=[handles["frames_display"]],
+            )
+            handles["fps"].change(
+                fn=_update_frames,
+                inputs=[handles["seconds"], handles["fps"]],
+                outputs=[handles["frames_display"]],
+            )
+        with gr.Column(scale=2, min_width=280):
             handles["status"] = ui.status_banner()
             handles["video_out"] = gr.Video(label="Output", autoplay=True)
             handles["history"] = gr.Markdown("")
     return handles
+# ---------------------------------------------------------------------------
+# Backend wiring
+# ---------------------------------------------------------------------------
 _BACKEND: backend_module.ComfyUILibraryBackend | None = None
     return _BACKEND
+_COMFY_INPUT_DIR = pathlib.Path(__file__).parent / "comfyui" / "input"
+def _stage_to_comfy_input(file_path) -> str | None:
+    """Copy/stage a path into comfyui/input/ so ComfyUI's LoadImage etc. can find it."""
+    if not file_path:
+        return None
+    if not isinstance(file_path, (str, pathlib.Path)):
+        file_path = (
+            file_path.get("name") or file_path.get("path") or file_path.get("orig_name")
+            if isinstance(file_path, dict)
+            else None
+        )
+        if not file_path:
+            return None
+    src = pathlib.Path(file_path)
+    if not src.exists() or not src.is_file():
+        print(f"[_stage] skip {file_path!r}", flush=True)
+        return None
+    _COMFY_INPUT_DIR.mkdir(parents=True, exist_ok=True)
+    try:
+        if src.resolve().is_relative_to(_COMFY_INPUT_DIR.resolve()):
+            return src.name
+    except (ValueError, OSError):
+        pass
+    dst = _COMFY_INPUT_DIR / src.name
+    if not dst.exists() or dst.stat().st_size != src.stat().st_size:
+        import shutil
+        shutil.copy2(src, dst)
+    return src.name
 PRESET_DURATION = {"Fast": 60, "Balanced": 120, "Quality": 300}
+def _seconds_to_frames(seconds: float, fps: int) -> int:
+    return max(9, int(round(float(seconds) * float(fps) / 8) * 8) + 1)
 async def _on_generate(mode_name: str, **inputs: Any):
     """Generate handler — async generator yielding (status_html, video_path)."""
     mode = modes.MODE_REGISTRY[mode_name]
+    fps = int(inputs.get("fps", 24))
+    seconds = float(inputs.get("seconds", 3))
+    frames = _seconds_to_frames(seconds, fps)
+    # Seed: respect the explicit value unless the "randomize" checkbox is on.
+    seed = int(inputs.get("seed", 42))
+    if inputs.get("randomize_seed"):
+        seed = random.randint(0, 2**31 - 1)
     params: dict[str, Any] = {
         "prompt": inputs.get("prompt", ""),
         "negative_prompt": inputs.get("negative_prompt", ""),
+        "preset": str(inputs.get("preset", "Balanced")).lower(),
         "width": int(inputs.get("width", 512)),
         "height": int(inputs.get("height", 768)),
+        "frames": frames,
+        "fps": fps,
+        "seed": seed,
     }
     for k in (
+        "image", "audio", "first_frame", "last_frame", "input_video",
+        "camera_lora", "camera_strength", "detailer_on", "detailer_strength",
+        "ic_lora", "ic_strength", "pose_on", "audio_cfg", "image_strength",
     ):
         if k in inputs:
             params[k] = inputs[k]
+    for key in ("image", "audio", "first_frame", "last_frame", "input_video"):
+        if key in params and params[key]:
+            staged = _stage_to_comfy_input(params[key])
+            if staged is None:
+                params.pop(key, None)
+            else:
+                params[key] = staged
     patches = mode.parameterize_fn(params)
     workflow = wf_module.load_template(mode_name)
     for patch in patches:
         wf_module.set_input(workflow, *patch)
     backend = _get_backend()
+    duration = PRESET_DURATION.get(str(inputs.get("preset", "Balanced")), 120)
     started = time.time()
     async for event in backend.submit(mode_name, workflow, gpu_duration=duration):
             )
             yield status, gr.update()
         elif isinstance(event, backend_module.ProgressEvent):
+            # Each sampler in the workflow gets its own stage label "Diffusion (n)".
+            # The static `mode.stage_map` describes the full pipeline (encode →
+            # diffusion → upscale → diffusion → decode) but our progress hook
+            # only fires inside samplers, so we label by sampler index instead.
+            label = f"Diffusion (Stage {event.stage})"
             eta = (elapsed / max(event.step, 1)) * (event.total_steps - event.step)
             status = ui.render_status(
+                stage_index=event.stage,
+                stage_label=label,
                 step=event.step,
                 total_steps=event.total_steps,
                 elapsed_s=elapsed,
             )
             yield status, gr.update()
         elif isinstance(event, backend_module.OutputEvent):
+            video_update = event.video_path if event.video_path else gr.update()
+            yield ui._render_idle(), video_update
         elif isinstance(event, backend_module.ErrorEvent):
             error_html = (
                 f'<div class="status-card status-error">'
 def _input_keys_for_mode(mode_name: str, h: dict) -> list[str]:
+    base = ["prompt", "preset", "width", "height", "seconds", "fps", "seed", "randomize_seed"]
     if mode_name == "i2v":
         base.append("image")
     elif mode_name == "a2v":
 def _collect_inputs_for_mode(mode_name: str, h: dict) -> list:
+    base = [
+        h["prompt"], h["preset"], h["width"], h["height"],
+        h["seconds"], h["fps"], h["seed"], h["randomize_seed"],
+    ]
     if mode_name == "i2v":
         base.append(h["image"])
     elif mode_name == "a2v":
     elif mode_name == "style":
         base.append(h["input_video"])
     base.append(h["negative_prompt"])
+    base.extend([
+        h["lora"].camera_lora, h["lora"].camera_strength,
+        h["lora"].detailer_on, h["lora"].detailer_strength,
+    ])
     if h["lora"].ic_lora is not None:
         base.extend([h["lora"].ic_lora, h["lora"].ic_strength])
     if h["lora"].pose_on is not None:

backend.py CHANGED Viewed

@@ -66,6 +66,98 @@ class _StubServer:
         pass
 def _comfy_dir() -> pathlib.Path:
     if _on_spaces():
         return pathlib.Path("/data/comfyui")
@@ -96,6 +188,28 @@ class ComfyUILibraryBackend:
         import execution  # top-level module — provides PromptExecutor
         import nodes  # top-level module — provides init_extra_nodes (async)
         # `nodes.init_extra_nodes` is async. We may be called from within a
         # running event loop (Gradio's handler) — running `asyncio.run()` there
         # raises. Run the coroutine in a fresh loop on a worker thread instead.
@@ -148,13 +262,27 @@ class ComfyUILibraryBackend:
         def _push(event: Any) -> None:
             asyncio.run_coroutine_threadsafe(queue.put(event), loop)
-        def _hook(value: int, total: int, _preview=None) -> None:
             _push(
                 ProgressEvent(
-                    stage=0,
                     stage_label="diffusion",
-                    step=int(value),
-                    total_steps=int(total),
                 )
             )
@@ -163,6 +291,21 @@ class ComfyUILibraryBackend:
             saved_hook = getattr(comfy.utils, "PROGRESS_BAR_HOOK", None)
             try:
                 # Use the public setter; it writes the same global the
                 # ProgressBar class reads, but is the documented API.
                 comfy.utils.set_progress_bar_global_hook(_hook)
@@ -170,12 +313,15 @@ class ComfyUILibraryBackend:
                     workflow,
                     prompt_id="ltx23-aio",
                     extra_data={"client_id": "ltx23-aio"},
-                    execute_outputs=[],
                 )
-                # PromptExecutor writes output files via VHS_VideoCombine; we read its
-                # history to find the most recent saved video.
-                outputs = list(self._executor.outputs.values())
-                video_path = _first_video_path(outputs) or ""
                 _push(OutputEvent(video_path=video_path))
             except Exception as exc:
                 tb_text = tb_mod.format_exc()

         pass
+class _StubPromptQueue:
+    """Stub matching the surface VideoHelperSuite + others touch."""
+    currently_running: dict = {}
+    history: dict = {}
+    flags: dict = {}
+    def get_current_queue(self) -> tuple[list, list]:
+        return ([], [])
+    def get_tasks_remaining(self) -> int:
+        return 0
+    def set_flag(self, name: str, data) -> None:
+        pass
+    def get_flags(self, *a, **kw) -> dict:
+        return {}
+    def task_done(self, *a, **kw) -> None:
+        pass
+    def put(self, *a, **kw) -> None:
+        pass
+    def wipe_queue(self) -> None:
+        pass
+    def delete_queue_item(self, *a, **kw) -> None:
+        pass
+class _StubPromptServerInstance:
+    """Surface that ComfyUI's `server.PromptServer.instance` exposes to custom nodes.
+    VideoHelperSuite, KJNodes, and others read this at import time. They mostly
+    use it to register HTTP routes or send WS events or peek at the prompt queue.
+    No-ops here are fine — we have no real server.
+    """
+    client_id: str | None = "ltx23-aio"
+    # KJNodes' preview thread reads `last_node_id.encode('ascii')` directly.
+    # ComfyUI's real server keeps it as a string per executing node and resets
+    # to None at end-of-prompt — which races the preview thread. Keep it a
+    # safe non-empty string so .encode() never NPEs.
+    last_node_id: str = "ltx23-aio"
+    web_root: str = ""
+    class _Routes:
+        def get(self, *a, **kw):
+            return lambda fn: fn
+        def post(self, *a, **kw):
+            return lambda fn: fn
+        def static(self, *a, **kw):
+            return None
+    routes = _Routes()
+    sockets: dict = {}
+    prompt_queue = _StubPromptQueue()
+    # Custom-Scripts checks PromptServer.instance.supports — claim the
+    # "custom_nodes_from_web" capability so it skips its JS install path.
+    supports: list[str] = ["custom_nodes_from_web"]
+    web_root: str = ""
+    def add_routes(self) -> None:
+        pass
+    def send_sync(self, event: str, data: dict, sid: str | None = None) -> None:
+        pass
+    def send_progress_text(self, text: str, node_id=None, sid=None) -> None:
+        # Comfy_extras nodes call this; we just no-op since we don't have a UI
+        # to surface intermediate text on.
+        pass
+    def queue_updated(self) -> None:
+        pass
+    def get_node_class_def(self, *a, **kw):
+        return None
+    def __getattr__(self, name):
+        # Anything else our custom nodes might reach for — give them a no-op.
+        # This is a deliberate liberal catch-all so the inference path doesn't
+        # die on cosmetic UI hooks. Inspection-style access (hasattr) gets True.
+        def _noop(*a, **kw):
+            return None
+        return _noop
 def _comfy_dir() -> pathlib.Path:
     if _on_spaces():
         return pathlib.Path("/data/comfyui")
         import execution  # top-level module — provides PromptExecutor
         import nodes  # top-level module — provides init_extra_nodes (async)
+        # CRITICAL ordering fix: ComfyUI's nodes.py:24 inserts `comfyui/comfy/`
+        # at sys.path[0]. That dir contains a module-style `utils.py`, which
+        # shadows `comfyui/utils/` (a package containing install_util.py).
+        # Some custom nodes (KJNodes, VideoHelperSuite via app.frontend_management)
+        # do `from utils.install_util import …` and get `comfy/utils.py` instead,
+        # raising "'utils' is not a package". Rewrite sys.path so comfy_dir is
+        # ahead of comfy_dir/comfy and force-clear any cached `utils` binding.
+        comfy_subdir = str(self._comfy_dir / "comfy")
+        sys.path = [p for p in sys.path if p not in (str(self._comfy_dir), comfy_subdir)]
+        sys.path.insert(0, comfy_subdir)
+        sys.path.insert(0, str(self._comfy_dir))
+        if "utils" in sys.modules and not getattr(sys.modules["utils"], "__path__", None):
+            del sys.modules["utils"]
+        # Some custom nodes (e.g. VideoHelperSuite) read `server.PromptServer.instance`
+        # at import time. We don't run a real ComfyUI server, so install a stub
+        # that exposes the attributes those nodes touch (sockets, send, etc.).
+        import server as comfy_server
+        if getattr(comfy_server.PromptServer, "instance", None) is None:
+            comfy_server.PromptServer.instance = _StubPromptServerInstance()
         # `nodes.init_extra_nodes` is async. We may be called from within a
         # running event loop (Gradio's handler) — running `asyncio.run()` there
         # raises. Run the coroutine in a fresh loop on a worker thread instead.
         def _push(event: Any) -> None:
             asyncio.run_coroutine_threadsafe(queue.put(event), loop)
+        # Track stage progression. ComfyUI fires the progress hook from inside
+        # samplers, so we advance the stage every time we observe a new sampler
+        # starting (step==0 with a different total than before, or a "new run"
+        # signal — value smaller than the running max for the same total).
+        progress_state = {"stage": 0, "prev_total": -1, "max_step": -1}
+        def _hook(value: int, total: int, _preview=None, **_kwargs: Any) -> None:
+            v, t = int(value), int(total)
+            # New sampler started (different total, or step rewound)
+            if t != progress_state["prev_total"] or v < progress_state["max_step"]:
+                progress_state["stage"] += 1
+                progress_state["prev_total"] = t
+                progress_state["max_step"] = v
+            else:
+                progress_state["max_step"] = max(progress_state["max_step"], v)
             _push(
                 ProgressEvent(
+                    stage=progress_state["stage"],
                     stage_label="diffusion",
+                    step=v,
+                    total_steps=t,
                 )
             )
             saved_hook = getattr(comfy.utils, "PROGRESS_BAR_HOOK", None)
             try:
+                # Workflow is already API-format (saved from ComfyUI editor's
+                # "Save (API Format)"), so it can be handed to PromptExecutor
+                # directly. The execute_outputs list pinpoints which output
+                # nodes to evaluate — we let PromptExecutor walk the whole
+                # graph by passing every output-class node id.
+                output_ids = [
+                    nid for nid, n in workflow.items()
+                    if n.get("class_type", "").startswith(("SaveVideo", "VHS_VideoCombine", "PreviewAudio", "CreateVideo"))
+                ]
+                print(
+                    f"[backend] submitting workflow: {len(workflow)} nodes, "
+                    f"output_ids={output_ids}",
+                    file=sys.stderr,
+                    flush=True,
+                )
                 # Use the public setter; it writes the same global the
                 # ProgressBar class reads, but is the documented API.
                 comfy.utils.set_progress_bar_global_hook(_hook)
                     workflow,
                     prompt_id="ltx23-aio",
                     extra_data={"client_id": "ltx23-aio"},
+                    execute_outputs=output_ids,
                 )
+                # PromptExecutor stores per-node UI info in history_result["outputs"]
+                # after execute_async. Each entry mirrors what the JS frontend
+                # would receive — including SaveVideo's "filenames"/"video" lists
+                # that point at the saved file inside ComfyUI's output dir.
+                hist = getattr(self._executor, "history_result", {}) or {}
+                outs = hist.get("outputs") or {}
+                video_path = _first_video_path(list(outs.values())) or ""
                 _push(OutputEvent(video_path=video_path))
             except Exception as exc:
                 tb_text = tb_mod.format_exc()

modes.py CHANGED Viewed

@@ -5,14 +5,13 @@ Each Mode declares:
 - label: display name
 - icon: single-character or emoji icon for the sidebar
 - stage_map: list of (label, expected_share_pct) for the status banner
-- parameterize_fn: (Gradio inputs dict) -> list[(node_id, widget_index, value)]
-The parameterize_fn is the only mode-specific logic. Everything else (workflow
-loading, validation, dispatch) is mode-agnostic and lives in workflow.py /
-backend.py.
-Tasks 11 (T2V + I2V) and 12 (A2V + Lipsync + Keyframe + Style) populate
-MODE_REGISTRY. This task only sets up the dataclass and the empty container.
 """
 from __future__ import annotations
@@ -21,7 +20,8 @@ from collections.abc import Callable
 from dataclasses import dataclass, field
 from typing import Any
-Patch = tuple[int, int | str, Any]
 ParameterizeFn = Callable[[dict[str, Any]], list[Patch]]
@@ -40,163 +40,91 @@ class Mode:
     stage_map: list[Stage] = field(default_factory=list)
-# Filled in by tasks 11–12.
 MODE_REGISTRY: dict[str, Mode] = {}
 # ---------------------------------------------------------------------------
-# Node-id constants — captured from workflows/{t2v,i2v}.json on 2026-04-30.
-#
-# The master workflow uses rgthree's GetNode/SetNode for indirection. SetNodes
-# named "pos"/"neg" expose the *outputs* of CLIPTextEncode, not the prompt
-# strings. So the canonical place to set the prompt text is the CLIPTextEncode
-# node itself.
-#
-# Width/Height/FPS are INTConstant nodes whose values feed downstream Set_*
-# variables.  Clip length comes from a mxSlider (in seconds, then multiplied by
-# FPS via a MathExpression to compute frames).  No SetNode for "noise"/seed
-# survived the extraction, so seed is intentionally NOT patched here — the
-# template's hard-coded value is used until we wire RandomNoise injection in
-# Task 12+.
-#
-# LoRA rows live inside a single Power Lora Loader (rgthree) node whose
-# widgets_values is a list of dicts. Patching a specific row requires knowing
-# the index, and the canonical mapping (camera_lora value -> row index) belongs
-# in models.py once camera-LoRA selection lands. Deferred for now.
 # ---------------------------------------------------------------------------
-T2V_NODE_PROMPT = 5536  # CLIPTextEncode positive — wv[0] = prompt
-T2V_NODE_NEG_PROMPT = 5537  # CLIPTextEncode negative — wv[0] = negative prompt
-T2V_NODE_WIDTH = 5383  # INTConstant "Width" — wv[0]
-T2V_NODE_HEIGHT = 5382  # INTConstant "Height" — wv[0]
-T2V_NODE_FPS = 5445  # INTConstant "FPS" — wv[0]
-T2V_NODE_CLIP_LENGTH = 196  # mxSlider "Clip Length ( in seconds )" — wv[0]
-I2V_NODE_PROMPT = 5536
-I2V_NODE_NEG_PROMPT = 5537
-I2V_NODE_WIDTH = 5383
-I2V_NODE_HEIGHT = 5382
-I2V_NODE_FPS = 5445
-I2V_NODE_CLIP_LENGTH = 196
-I2V_NODE_IMAGE = 149  # LoadImage "Load Image1" — wv[0] = filename
-# Mode-specific media nodes — captured from workflows/{a2v,lipsync,keyframe,style}.json
-# on 2026-04-30. All four templates contain the same node ids for these inputs (the
-# Loaders group is shared across modes); only a subset is wired into each mode's
-# pipeline.
-#
-# VHS_LoadAudioUpload and VHS_LoadVideo carry dict-style widgets_values keyed by
-# "audio"/"video". The current set_input helper is list-indexed; passing
-# widget_index=0 against a dict adds a numeric "0" key without replacing the
-# canonical "audio"/"video" entry. The runtime file-path swap is therefore not
-# yet wired — Task 12 only validates the patch tuple set. Real path injection
-# lands when backend.py grows file-staging in Task 17.
-A2V_NODE_PROMPT = 5536
-A2V_NODE_NEG_PROMPT = 5537
-A2V_NODE_WIDTH = 5383
-A2V_NODE_HEIGHT = 5382
-A2V_NODE_FPS = 5445
-A2V_NODE_CLIP_LENGTH = 196
-A2V_NODE_AUDIO = 5400  # VHS_LoadAudioUpload — dict wv keyed by "audio"
-LIPSYNC_NODE_PROMPT = 5536
-LIPSYNC_NODE_NEG_PROMPT = 5537
-LIPSYNC_NODE_FPS = 5445
-LIPSYNC_NODE_CLIP_LENGTH = 196
-LIPSYNC_NODE_IMAGE = 149  # LoadImage "Load Image1" — wv[0] = filename
-LIPSYNC_NODE_AUDIO = 5400  # VHS_LoadAudioUpload — dict wv keyed by "audio"
-KEYFRAME_NODE_PROMPT = 5536
-KEYFRAME_NODE_NEG_PROMPT = 5537
-KEYFRAME_NODE_FPS = 5445
-KEYFRAME_NODE_CLIP_LENGTH = 196
-KEYFRAME_NODE_FIRST_FRAME = 149  # LoadImage "Load Image1" — wv[0] = filename
-KEYFRAME_NODE_LAST_FRAME = 5437  # LoadImage "Load Image2" — wv[0] = filename
-STYLE_NODE_PROMPT = 5536
-STYLE_NODE_NEG_PROMPT = 5537
-STYLE_NODE_FPS = 5445
-STYLE_NODE_CLIP_LENGTH = 196
-STYLE_NODE_INPUT_VIDEO = 5444  # VHS_LoadVideo — dict wv keyed by "video"
-def _frames_to_seconds(frames: int, fps: int) -> int:
-    """Convert (frames, fps) to integer seconds for the mxSlider clip-length widget.
-    The downstream MathExpression is `a*b+1` (a=seconds, b=fps -> total frames),
-    so for a target frame count F at fps R we need seconds = ceil((F - 1) / R).
-    Round up so the slider is never short of the requested frames.
-    """
-    if fps <= 0:
-        return 1
-    return max(1, -(-(frames - 1) // fps))
 def _t2v_parameterize(inp: dict[str, Any]) -> list[Patch]:
-    return [
-        (T2V_NODE_PROMPT, 0, inp["prompt"]),
-        (T2V_NODE_NEG_PROMPT, 0, inp.get("negative_prompt", "")),
-        (T2V_NODE_WIDTH, 0, int(inp["width"])),
-        (T2V_NODE_HEIGHT, 0, int(inp["height"])),
-        (T2V_NODE_FPS, 0, int(inp["fps"])),
-        (T2V_NODE_CLIP_LENGTH, 0, _frames_to_seconds(int(inp["frames"]), int(inp["fps"]))),
-    ]
 def _i2v_parameterize(inp: dict[str, Any]) -> list[Patch]:
-    return [
-        (I2V_NODE_PROMPT, 0, inp["prompt"]),
-        (I2V_NODE_NEG_PROMPT, 0, inp.get("negative_prompt", "")),
-        (I2V_NODE_IMAGE, 0, inp["image"]),
-        (I2V_NODE_WIDTH, 0, int(inp["width"])),
-        (I2V_NODE_HEIGHT, 0, int(inp["height"])),
-        (I2V_NODE_FPS, 0, int(inp["fps"])),
-        (I2V_NODE_CLIP_LENGTH, 0, _frames_to_seconds(int(inp["frames"]), int(inp["fps"]))),
     ]
 def _a2v_parameterize(inp: dict[str, Any]) -> list[Patch]:
-    return [
-        (A2V_NODE_PROMPT, 0, inp["prompt"]),
-        (A2V_NODE_NEG_PROMPT, 0, inp.get("negative_prompt", "")),
-        (A2V_NODE_AUDIO, "audio", inp["audio"]),
-        (A2V_NODE_WIDTH, 0, int(inp["width"])),
-        (A2V_NODE_HEIGHT, 0, int(inp["height"])),
-        (A2V_NODE_FPS, 0, int(inp["fps"])),
-        (A2V_NODE_CLIP_LENGTH, 0, _frames_to_seconds(int(inp["frames"]), int(inp["fps"]))),
     ]
 def _lipsync_parameterize(inp: dict[str, Any]) -> list[Patch]:
-    return [
-        (LIPSYNC_NODE_PROMPT, 0, inp["prompt"]),
-        (LIPSYNC_NODE_NEG_PROMPT, 0, inp.get("negative_prompt", "")),
-        (LIPSYNC_NODE_IMAGE, 0, inp["image"]),
-        (LIPSYNC_NODE_AUDIO, "audio", inp["audio"]),
-        (LIPSYNC_NODE_FPS, 0, int(inp["fps"])),
-        (LIPSYNC_NODE_CLIP_LENGTH, 0, _frames_to_seconds(int(inp["frames"]), int(inp["fps"]))),
     ]
 def _keyframe_parameterize(inp: dict[str, Any]) -> list[Patch]:
-    return [
-        (KEYFRAME_NODE_PROMPT, 0, inp["prompt"]),
-        (KEYFRAME_NODE_NEG_PROMPT, 0, inp.get("negative_prompt", "")),
-        (KEYFRAME_NODE_FIRST_FRAME, 0, inp["first_frame"]),
-        (KEYFRAME_NODE_LAST_FRAME, 0, inp["last_frame"]),
-        (KEYFRAME_NODE_FPS, 0, int(inp["fps"])),
-        (KEYFRAME_NODE_CLIP_LENGTH, 0, _frames_to_seconds(int(inp["frames"]), int(inp["fps"]))),
     ]
 def _style_parameterize(inp: dict[str, Any]) -> list[Patch]:
-    return [
-        (STYLE_NODE_PROMPT, 0, inp["prompt"]),
-        (STYLE_NODE_NEG_PROMPT, 0, inp.get("negative_prompt", "")),
-        (STYLE_NODE_INPUT_VIDEO, "video", inp["input_video"]),
-        (STYLE_NODE_FPS, 0, int(inp["fps"])),
-        (STYLE_NODE_CLIP_LENGTH, 0, _frames_to_seconds(int(inp["frames"]), int(inp["fps"]))),
     ]
@@ -226,16 +154,7 @@ _A2V_STAGES = [
     Stage("Decode video", 10),
 ]
-_LIPSYNC_STAGES = [
-    Stage("Encode prompt", 5),
-    Stage("Encode image", 3),
-    Stage("Encode audio", 5),
-    Stage("Diffusion (Stage 1)", 52),
-    Stage("Spatial upscale", 7),
-    Stage("Diffusion (Stage 2)", 18),
-    Stage("Decode video", 10),
-]
 _KEYFRAME_STAGES = [
     Stage("Encode prompt", 5),
     Stage("Encode keyframes", 5),
@@ -244,16 +163,14 @@ _KEYFRAME_STAGES = [
     Stage("Diffusion (Stage 2)", 18),
     Stage("Decode video", 10),
 ]
 _STYLE_STAGES = [
     Stage("Encode prompt", 5),
-    Stage("Decode source video", 5),
-    Stage("Diffusion (Stage 1)", 55),
-    Stage("Spatial upscale", 7),
-    Stage("Diffusion (Stage 2)", 18),
-    Stage("Decode video", 10),
 ]
 MODE_REGISTRY["t2v"] = Mode(
     name="t2v",
     label="Text → Video",

 - label: display name
 - icon: single-character or emoji icon for the sidebar
 - stage_map: list of (label, expected_share_pct) for the status banner
+- parameterize_fn: (Gradio inputs dict) -> list[(node_id, field_name, value)]
+The workflows live in `workflows/<mode>.json` in ComfyUI's API format
+(`{node_id_str: {class_type, inputs}}` — produced by the editor's
+"Save (API Format)" feature). That format is what `PromptExecutor.execute()`
+consumes directly, so parameterize_fns just patch field values by node id;
+no graph→API conversion is needed.
 """
 from __future__ import annotations
 from dataclasses import dataclass, field
 from typing import Any
+# (node_id, field_name, value)
+Patch = tuple[str, str, Any]
 ParameterizeFn = Callable[[dict[str, Any]], list[Patch]]
     stage_map: list[Stage] = field(default_factory=list)
 MODE_REGISTRY: dict[str, Mode] = {}
 # ---------------------------------------------------------------------------
+# Shared user-input node IDs across all 6 mode API workflows.
+# Captured 2026-05-01 from `/Users/techfreakworm/Downloads/workflows/*_api.json`
+# (master workflow exported via "Save API Format" per mode).
 # ---------------------------------------------------------------------------
+NODE_PROMPT = "5536"  # CLIPTextEncode (positive) — inputs.text
+NODE_NEG_PROMPT = "5537"  # CLIPTextEncode (negative) — inputs.text
+NODE_WIDTH = "5383"  # INTConstant — inputs.value
+NODE_HEIGHT = "5382"  # INTConstant — inputs.value
+NODE_FPS = "5445"  # INTConstant — inputs.value
+NODE_CLIP_SECONDS = "196"  # mxSlider — inputs.Xi (length in seconds; frames = Xi*fps+1)
+NODE_IMAGE_1 = "149"  # LoadImage (first frame / portrait) — inputs.image
+NODE_IMAGE_2 = "5437"  # LoadImage (last frame for keyframe mode) — inputs.image
+NODE_AUDIO = "5400"  # VHS_LoadAudioUpload — inputs.audio
+NODE_VIDEO = "5444"  # VHS_LoadVideo — inputs.video
+# Per-mode RandomNoise (subgraph-internal): id format `<subgraph_inst>:<inner>`.
+SEED_NODE_BY_MODE: dict[str, str] = {
+    "t2v": "5464:5539",
+    "a2v": "463:5540",
+    "i2v": "209:5541",
+    "lipsync": "521:5542",
+    "keyframe": "670:5543",
+    "style": "5364:5545",
+}
+def _seconds_for(frames: int, fps: int) -> int:
+    """Inverse of `frames = seconds*fps + 1` from the master's MathExpression."""
+    return max(1, (max(1, int(frames)) - 1) // max(1, int(fps)))
+def _shared_patches(inp: dict[str, Any], mode: str) -> list[Patch]:
+    return [
+        (NODE_PROMPT, "text", inp.get("prompt", "")),
+        (NODE_NEG_PROMPT, "text", inp.get("negative_prompt", "")),
+        (NODE_WIDTH, "value", int(inp.get("width", 512))),
+        (NODE_HEIGHT, "value", int(inp.get("height", 768))),
+        (NODE_FPS, "value", int(inp.get("fps", 24))),
+        (
+            NODE_CLIP_SECONDS,
+            "Xi",
+            _seconds_for(int(inp.get("frames", 81)), int(inp.get("fps", 24))),
+        ),
+        (SEED_NODE_BY_MODE[mode], "noise_seed", int(inp.get("seed", 42))),
+    ]
 def _t2v_parameterize(inp: dict[str, Any]) -> list[Patch]:
+    return _shared_patches(inp, "t2v")
 def _i2v_parameterize(inp: dict[str, Any]) -> list[Patch]:
+    return _shared_patches(inp, "i2v") + [
+        (NODE_IMAGE_1, "image", inp["image"]),
     ]
 def _a2v_parameterize(inp: dict[str, Any]) -> list[Patch]:
+    return _shared_patches(inp, "a2v") + [
+        (NODE_AUDIO, "audio", inp["audio"]),
     ]
 def _lipsync_parameterize(inp: dict[str, Any]) -> list[Patch]:
+    return _shared_patches(inp, "lipsync") + [
+        (NODE_IMAGE_1, "image", inp["image"]),
+        (NODE_AUDIO, "audio", inp["audio"]),
     ]
 def _keyframe_parameterize(inp: dict[str, Any]) -> list[Patch]:
+    return _shared_patches(inp, "keyframe") + [
+        (NODE_IMAGE_1, "image", inp["first_frame"]),
+        (NODE_IMAGE_2, "image", inp["last_frame"]),
     ]
 def _style_parameterize(inp: dict[str, Any]) -> list[Patch]:
+    return _shared_patches(inp, "style") + [
+        (NODE_VIDEO, "video", inp["input_video"]),
     ]
     Stage("Decode video", 10),
 ]
+_LIPSYNC_STAGES = list(_A2V_STAGES)
 _KEYFRAME_STAGES = [
     Stage("Encode prompt", 5),
     Stage("Encode keyframes", 5),
     Stage("Diffusion (Stage 2)", 18),
     Stage("Decode video", 10),
 ]
 _STYLE_STAGES = [
     Stage("Encode prompt", 5),
+    Stage("Encode source video", 10),
+    Stage("Diffusion", 70),
+    Stage("Decode video", 15),
 ]
 MODE_REGISTRY["t2v"] = Mode(
     name="t2v",
     label="Text → Video",

workflow.py CHANGED Viewed

@@ -1,4 +1,9 @@
-"""Pure functions over LTX 2.3 mode workflow JSON templates."""
 from __future__ import annotations
@@ -13,74 +18,31 @@ VALID_MODES: tuple[str, ...] = ("t2v", "a2v", "i2v", "lipsync", "keyframe", "sty
 def load_template(mode: str) -> dict[str, Any]:
-    """Load a fresh, independent copy of the named mode's workflow template."""
     if mode not in VALID_MODES:
         raise ValueError(f"unknown mode {mode!r}; expected one of {VALID_MODES}")
     path = WORKFLOWS_DIR / f"{mode}.json"
     return copy.deepcopy(json.loads(path.read_text()))
-def set_input(workflow: dict[str, Any], node_id: int, widget_index: int | str, value: Any) -> None:
-    """Patch a node's widgets_values in place.
-    Supports both list-style widgets_values (most ComfyUI nodes — patch by integer index,
-    auto-extending with None) and dict-style widgets_values (VHS_LoadAudioUpload and
-    similar — patch by string key, raising KeyError if the key doesn't exist).
     Args:
-        workflow: A workflow dict (must have a "nodes" list).
-        node_id: The id of the node to patch.
-        widget_index: Integer index (for list widgets) or string key (for dict widgets).
-        value: New value.
     Raises:
-        KeyError: If no node with the given id exists, or for dict widgets, if the key
-            doesn't already exist on the target dict (we don't add new keys).
-        TypeError: If widget_index type doesn't match the node's widgets_values type.
     """
-    for node in workflow["nodes"]:
-        if node.get("id") != node_id:
-            continue
-        widgets = node.get("widgets_values")
-        if isinstance(widgets, dict):
-            if not isinstance(widget_index, str):
-                raise TypeError(
-                    f"node {node_id} has dict widgets_values; widget_index must be str, "
-                    f"got {type(widget_index).__name__}"
-                )
-            if widget_index not in widgets:
-                raise KeyError(
-                    f"node {node_id} dict widgets_values has no key {widget_index!r}; "
-                    f"available keys: {list(widgets.keys())}"
-                )
-            widgets[widget_index] = value
-            return
-        # List/None case — preserve existing list-extension behavior.
-        if not isinstance(widget_index, int):
-            raise TypeError(
-                f"node {node_id} has list widgets_values; widget_index must be int, "
-                f"got {type(widget_index).__name__}"
-            )
-        if widgets is None:
-            widgets = []
-            node["widgets_values"] = widgets
-        while len(widgets) <= widget_index:
-            widgets.append(None)
-        widgets[widget_index] = value
-        return
-    raise KeyError(f"node id {node_id} not found in workflow")
-def validate(workflow: dict[str, Any]) -> None:
-    """Static schema validation. Raises ValueError on the first problem found."""
-    nodes = workflow.get("nodes")
-    if not isinstance(nodes, list) or len(nodes) == 0:
-        raise ValueError("workflow has no nodes")
-    node_ids = {n.get("id") for n in nodes if "id" in n}
-    for link in workflow.get("links", []):
-        if not isinstance(link, list) or len(link) < 6:
-            raise ValueError(f"malformed link {link}")
-        _, src, _, dst, _, _ = link
-        if src not in node_ids or dst not in node_ids:
-            raise ValueError(f"orphan link {link}")

+"""Pure functions over LTX 2.3 mode API-format workflow templates.
+Templates in `workflows/<mode>.json` are saved from ComfyUI's editor via
+"Save (API Format)". Shape: `{node_id_str: {"class_type": str, "inputs": dict}}`.
+This is what ComfyUI's `PromptExecutor.execute(prompt=...)` expects directly.
+"""
 from __future__ import annotations
 def load_template(mode: str) -> dict[str, Any]:
+    """Load a fresh, independent copy of the named mode's API workflow template."""
     if mode not in VALID_MODES:
         raise ValueError(f"unknown mode {mode!r}; expected one of {VALID_MODES}")
     path = WORKFLOWS_DIR / f"{mode}.json"
     return copy.deepcopy(json.loads(path.read_text()))
+def set_input(workflow: dict[str, Any], node_id: int | str, field: str, value: Any) -> None:
+    """Patch a node's input field in place.
+    For API-format workflows, each node has an `inputs` dict keyed by field name.
+    `node_id` is the dict key (string for top-level, "<inst>:<inner>" for
+    subgraph-internal). `field` is an entry name in `inputs`.
     Args:
+        workflow: API-format workflow dict (mapping id → {class_type, inputs}).
+        node_id: Dict key of the target node.
+        field: Name of the input field to set.
+        value: New value (literal, or `[src_id, src_slot]` link form).
     Raises:
+        KeyError: If the node doesn't exist in the workflow.
     """
+    nid = str(node_id)
+    if nid not in workflow:
+        raise KeyError(f"node id {nid!r} not found in workflow")
+    inputs = workflow[nid].setdefault("inputs", {})
+    inputs[field] = value

workflows/a2v.json CHANGED Viewed