techfreakworm commited on
Commit
bce3dbc
·
unverified ·
1 Parent(s): 107f495

feat: API-format workflows + sidebar nav + responsive UI

Browse files

Switch to ComfyUI API-format JSON for all 6 modes (saved from editor's
"Save API Format"), avoiding the editor->API converter. workflow.py is now
just load_template + set_input over a flat {id: {class_type, inputs}} dict.
modes.py shrinks to ~10 patches per mode (prompt, dimensions, seed, media
inputs).

backend.py wires PromptExecutor with cache_args, server stub, async
nodes.init_extra_nodes via worker thread, sys.path ordering so KJNodes/VHS
find utils.install_util, and a per-sampler stage counter.

app.py: sidebar nav (gr.Tabs hidden offscreen, sidebar buttons drive
gr.Tabs.selected); device/memory badge; preset budget per mode;
seconds-driven length slider with derived frames display; W/H sliders to
4096; randomize-seed toggle; responsive CSS for tablet (≤1024px) and
mobile (≤700px).

app.py CHANGED
@@ -5,6 +5,7 @@ from __future__ import annotations
5
 
6
  import os
7
  import pathlib
 
8
  import sys
9
  import time
10
  from typing import Any
@@ -26,8 +27,6 @@ def _on_spaces() -> bool:
26
 
27
 
28
  COMFYUI_REPO = "https://github.com/comfyanonymous/ComfyUI.git"
29
- # Pinned to the same commit the local git submodule uses (set in Task 5).
30
- # Override via env var only when intentionally testing a different ComfyUI version.
31
  COMFYUI_COMMIT = os.environ.get(
32
  "LTX23_AIO_COMFYUI_COMMIT",
33
  "eb0686bbb60c83e44c3a3e4f7defd0f589cfef10",
@@ -58,7 +57,6 @@ def _bootstrap() -> None:
58
  for node_url, node_ref in CUSTOM_NODES_PINNED:
59
  name = node_url.rstrip(".git").rsplit("/", 1)[-1]
60
  _git_clone(node_url, comfy_dir / "custom_nodes" / name, ref=node_ref)
61
- # Install custom node deps
62
  import subprocess
63
 
64
  for cn in (comfy_dir / "custom_nodes").iterdir():
@@ -78,29 +76,107 @@ _bootstrap()
78
 
79
 
80
  # ---------------------------------------------------------------------------
81
- # Gradio app
 
 
82
  # ---------------------------------------------------------------------------
83
 
84
  _CUSTOM_CSS = """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  .status-card { padding: 14px 16px; border-radius: 10px; background: rgba(255,255,255,0.04); border: 1px solid rgba(255,255,255,0.08); }
86
- .status-row { display: flex; gap: 14px; align-items: center; margin-bottom: 8px; }
87
  .status-stage { font-weight: 600; }
88
  .status-meta { font-size: 12px; opacity: 0.75; }
89
  .status-bar { height: 6px; background: rgba(255,255,255,0.08); border-radius: 99px; overflow: hidden; }
90
  .status-fill { height: 100%; background: linear-gradient(90deg,#6ea8fe,#8de9fe); transition: width .3s; }
91
  .status-mem { font-size: 11px; opacity: 0.6; margin-top: 6px; font-family: ui-monospace, monospace; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  """
93
 
94
 
 
 
 
 
 
95
  def build_app() -> gr.Blocks:
96
  with gr.Blocks(theme=gr.themes.Soft(), title="LTX 2.3 All-in-One", css=_CUSTOM_CSS) as app:
97
  gr.Markdown("# ⚡ LTX 2.3 All-in-One")
98
- with gr.Row():
99
- with gr.Column(scale=1, min_width=200):
100
- _render_sidebar()
101
- with gr.Column(scale=4):
102
- handles = _render_mode_panels()
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  for name, h in handles.items():
105
  inputs = _collect_inputs_for_mode(name, h)
106
  h["generate_btn"].click(
@@ -108,25 +184,99 @@ def build_app() -> gr.Blocks:
108
  inputs=inputs,
109
  outputs=[h["status"], h["video_out"]],
110
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  return app
112
 
113
 
114
- def _render_sidebar() -> None:
115
- gr.Markdown("### Modes")
116
- for mode in modes.MODE_REGISTRY.values():
117
- gr.Markdown(f"- {mode.icon} {mode.label}")
118
- gr.Markdown("---\n### Models")
119
- gr.Button("Unload all models", variant="secondary")
120
 
121
 
122
- def _render_mode_panels() -> dict[str, dict]:
123
- """Render one form per mode. Returns the component handles keyed by mode."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  handles: dict[str, dict] = {}
125
- with gr.Tabs():
126
  for name, mode in modes.MODE_REGISTRY.items():
127
- with gr.Tab(label=f"{mode.icon} {mode.label}"):
128
  handles[name] = _render_one_mode(name)
129
- return handles
130
 
131
 
132
  def _render_one_mode(name: str) -> dict:
@@ -134,7 +284,7 @@ def _render_one_mode(name: str) -> dict:
134
  handles: dict = {"mode": name}
135
 
136
  with gr.Row():
137
- with gr.Column(scale=2):
138
  handles["prompt"] = gr.Textbox(
139
  label="Prompt", lines=4, placeholder="Describe the shot..."
140
  )
@@ -154,13 +304,31 @@ def _render_one_mode(name: str) -> dict:
154
  handles["input_video"] = gr.Video(label="Source video")
155
 
156
  handles["preset"] = ui.preset_bar()
 
 
157
  with gr.Row():
158
- handles["width"] = gr.Slider(256, 1280, value=512, step=32, label="Width")
159
- handles["height"] = gr.Slider(256, 1280, value=768, step=32, label="Height")
 
 
 
 
 
 
 
160
  with gr.Row():
161
- handles["frames"] = gr.Slider(9, 121, value=81, step=8, label="Frames (8k+1)")
 
 
 
 
162
  handles["fps"] = gr.Slider(8, 30, value=24, step=1, label="FPS")
163
- handles["seed"] = gr.Number(label="Seed", value=42, precision=0)
 
 
 
 
 
164
 
165
  with gr.Accordion("Advanced ▾", open=False):
166
  handles["lora"] = ui.lora_chrome(name)
@@ -168,7 +336,23 @@ def _render_one_mode(name: str) -> dict:
168
 
169
  handles["generate_btn"] = gr.Button("▶ Generate", variant="primary", size="lg")
170
 
171
- with gr.Column(scale=2):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  handles["status"] = ui.status_banner()
173
  handles["video_out"] = gr.Video(label="Output", autoplay=True)
174
  handles["history"] = gr.Markdown("")
@@ -176,6 +360,10 @@ def _render_one_mode(name: str) -> dict:
176
  return handles
177
 
178
 
 
 
 
 
179
  _BACKEND: backend_module.ComfyUILibraryBackend | None = None
180
 
181
 
@@ -186,51 +374,92 @@ def _get_backend() -> backend_module.ComfyUILibraryBackend:
186
  return _BACKEND
187
 
188
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  PRESET_DURATION = {"Fast": 60, "Balanced": 120, "Quality": 300}
190
 
191
 
 
 
 
 
192
  async def _on_generate(mode_name: str, **inputs: Any):
193
  """Generate handler — async generator yielding (status_html, video_path)."""
194
  mode = modes.MODE_REGISTRY[mode_name]
195
 
196
- # Translate UI inputs into the parameterize_fn input dict.
 
 
 
 
 
 
 
 
197
  params: dict[str, Any] = {
198
  "prompt": inputs.get("prompt", ""),
199
  "negative_prompt": inputs.get("negative_prompt", ""),
200
- "preset": inputs.get("preset", "Balanced").lower(),
201
  "width": int(inputs.get("width", 512)),
202
  "height": int(inputs.get("height", 768)),
203
- "frames": int(inputs.get("frames", 81)),
204
- "fps": int(inputs.get("fps", 24)),
205
- "seed": int(inputs.get("seed", 42)),
206
  }
207
  for k in (
208
- "image",
209
- "audio",
210
- "first_frame",
211
- "last_frame",
212
- "input_video",
213
- "camera_lora",
214
- "camera_strength",
215
- "detailer_on",
216
- "detailer_strength",
217
- "ic_lora",
218
- "ic_strength",
219
- "pose_on",
220
- "audio_cfg",
221
- "image_strength",
222
  ):
223
  if k in inputs:
224
  params[k] = inputs[k]
225
 
 
 
 
 
 
 
 
 
226
  patches = mode.parameterize_fn(params)
227
  workflow = wf_module.load_template(mode_name)
228
  for patch in patches:
229
  wf_module.set_input(workflow, *patch)
230
- wf_module.validate(workflow)
231
 
232
  backend = _get_backend()
233
- duration = PRESET_DURATION.get(inputs.get("preset", "Balanced"), 120)
234
 
235
  started = time.time()
236
  async for event in backend.submit(mode_name, workflow, gpu_duration=duration):
@@ -246,15 +475,15 @@ async def _on_generate(mode_name: str, **inputs: Any):
246
  )
247
  yield status, gr.update()
248
  elif isinstance(event, backend_module.ProgressEvent):
249
- stage = (
250
- mode.stage_map[event.stage]
251
- if event.stage < len(mode.stage_map)
252
- else mode.stage_map[-1]
253
- )
254
  eta = (elapsed / max(event.step, 1)) * (event.total_steps - event.step)
255
  status = ui.render_status(
256
- stage_index=event.stage + 1,
257
- stage_label=stage.label,
258
  step=event.step,
259
  total_steps=event.total_steps,
260
  elapsed_s=elapsed,
@@ -262,7 +491,8 @@ async def _on_generate(mode_name: str, **inputs: Any):
262
  )
263
  yield status, gr.update()
264
  elif isinstance(event, backend_module.OutputEvent):
265
- yield ui._render_idle(), event.video_path
 
266
  elif isinstance(event, backend_module.ErrorEvent):
267
  error_html = (
268
  f'<div class="status-card status-error">'
@@ -274,7 +504,7 @@ async def _on_generate(mode_name: str, **inputs: Any):
274
 
275
 
276
  def _input_keys_for_mode(mode_name: str, h: dict) -> list[str]:
277
- base = ["prompt", "preset", "width", "height", "frames", "fps", "seed"]
278
  if mode_name == "i2v":
279
  base.append("image")
280
  elif mode_name == "a2v":
@@ -295,8 +525,10 @@ def _input_keys_for_mode(mode_name: str, h: dict) -> list[str]:
295
 
296
 
297
  def _collect_inputs_for_mode(mode_name: str, h: dict) -> list:
298
- """Gather the gr.Component handles to pass into _on_generate."""
299
- base = [h["prompt"], h["preset"], h["width"], h["height"], h["frames"], h["fps"], h["seed"]]
 
 
300
  if mode_name == "i2v":
301
  base.append(h["image"])
302
  elif mode_name == "a2v":
@@ -308,14 +540,10 @@ def _collect_inputs_for_mode(mode_name: str, h: dict) -> list:
308
  elif mode_name == "style":
309
  base.append(h["input_video"])
310
  base.append(h["negative_prompt"])
311
- base.extend(
312
- [
313
- h["lora"].camera_lora,
314
- h["lora"].camera_strength,
315
- h["lora"].detailer_on,
316
- h["lora"].detailer_strength,
317
- ]
318
- )
319
  if h["lora"].ic_lora is not None:
320
  base.extend([h["lora"].ic_lora, h["lora"].ic_strength])
321
  if h["lora"].pose_on is not None:
 
5
 
6
  import os
7
  import pathlib
8
+ import random
9
  import sys
10
  import time
11
  from typing import Any
 
27
 
28
 
29
  COMFYUI_REPO = "https://github.com/comfyanonymous/ComfyUI.git"
 
 
30
  COMFYUI_COMMIT = os.environ.get(
31
  "LTX23_AIO_COMFYUI_COMMIT",
32
  "eb0686bbb60c83e44c3a3e4f7defd0f589cfef10",
 
57
  for node_url, node_ref in CUSTOM_NODES_PINNED:
58
  name = node_url.rstrip(".git").rsplit("/", 1)[-1]
59
  _git_clone(node_url, comfy_dir / "custom_nodes" / name, ref=node_ref)
 
60
  import subprocess
61
 
62
  for cn in (comfy_dir / "custom_nodes").iterdir():
 
76
 
77
 
78
  # ---------------------------------------------------------------------------
79
+ # Styling: hide the default top tab strip (sidebar drives selection),
80
+ # add status-card styling, plus responsive breakpoints (≤1024px tablet,
81
+ # ≤700px mobile).
82
  # ---------------------------------------------------------------------------
83
 
84
  _CUSTOM_CSS = """
85
+ /* Hide the top tab strip from view, but keep it in the DOM and clickable so
86
+ the sidebar buttons can drive selection via programmatic click. */
87
+ .aio-tabs > .tab-nav,
88
+ .aio-tabs > div:first-child[role="tablist"],
89
+ .aio-tabs > div:first-child:has([role="tab"]) {
90
+ position: absolute !important;
91
+ left: -99999px !important;
92
+ top: -99999px !important;
93
+ height: 0 !important;
94
+ overflow: hidden !important;
95
+ visibility: visible !important;
96
+ pointer-events: auto !important;
97
+ }
98
+
99
+ /* Sidebar nav buttons */
100
+ .aio-mode-btn { width: 100%; text-align: left; margin: 2px 0; }
101
+ .aio-mode-btn-active { background: rgba(110,168,254,0.15) !important; border-left: 3px solid #6ea8fe !important; }
102
+
103
+ /* Sidebar headings */
104
+ .aio-sidebar-heading { font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em; opacity: 0.6; margin-top: 16px !important; margin-bottom: 4px !important; }
105
+
106
+ /* Status banner */
107
  .status-card { padding: 14px 16px; border-radius: 10px; background: rgba(255,255,255,0.04); border: 1px solid rgba(255,255,255,0.08); }
108
+ .status-row { display: flex; gap: 14px; align-items: center; margin-bottom: 8px; flex-wrap: wrap; }
109
  .status-stage { font-weight: 600; }
110
  .status-meta { font-size: 12px; opacity: 0.75; }
111
  .status-bar { height: 6px; background: rgba(255,255,255,0.08); border-radius: 99px; overflow: hidden; }
112
  .status-fill { height: 100%; background: linear-gradient(90deg,#6ea8fe,#8de9fe); transition: width .3s; }
113
  .status-mem { font-size: 11px; opacity: 0.6; margin-top: 6px; font-family: ui-monospace, monospace; }
114
+ .status-error { background: rgba(255,90,90,0.08); border-color: rgba(255,90,90,0.25); }
115
+
116
+ /* Model status badge */
117
+ .aio-model-badge { padding: 8px 10px; border-radius: 8px; background: rgba(255,255,255,0.04); font-size: 11.5px; font-family: ui-monospace, monospace; opacity: 0.85; }
118
+
119
+ /* Responsive: tablet */
120
+ @media (max-width: 1024px) {
121
+ .aio-sidebar { min-width: 160px !important; }
122
+ .aio-mode-btn { font-size: 13px !important; padding: 6px 10px !important; }
123
+ }
124
+
125
+ /* Responsive: mobile — sidebar collapses to top, single column body */
126
+ @media (max-width: 700px) {
127
+ .aio-shell { flex-direction: column !important; }
128
+ .aio-sidebar { width: 100% !important; min-width: unset !important; padding: 0 !important; }
129
+ .aio-body { width: 100% !important; }
130
+ .aio-mode-btn-row { display: grid !important; grid-template-columns: repeat(2, 1fr) !important; gap: 6px !important; padding: 8px !important; }
131
+ .aio-mode-btn { width: 100% !important; font-size: 12.5px !important; padding: 8px !important; text-align: center !important; margin: 0 !important; }
132
+ .aio-sidebar-heading { font-size: 10px !important; margin: 12px 0 4px !important; padding: 0 8px !important; }
133
+ .aio-model-badge { margin: 0 8px !important; word-break: break-word; white-space: normal !important; }
134
+ /* sliders + side-by-side rows: stack vertically on mobile so each value
135
+ gets its own width budget */
136
+ .aio-body .form > div, .aio-body [class*="row"] > div { flex: 1 1 100% !important; min-width: 0 !important; }
137
+ .aio-body [class*="row"] { flex-wrap: wrap !important; }
138
+ }
139
  """
140
 
141
 
142
+ # ---------------------------------------------------------------------------
143
+ # UI
144
+ # ---------------------------------------------------------------------------
145
+
146
+
147
  def build_app() -> gr.Blocks:
148
  with gr.Blocks(theme=gr.themes.Soft(), title="LTX 2.3 All-in-One", css=_CUSTOM_CSS) as app:
149
  gr.Markdown("# ⚡ LTX 2.3 All-in-One")
 
 
 
 
 
150
 
151
+ with gr.Row(elem_classes=["aio-shell"]):
152
+ # Sidebar
153
+ with gr.Column(scale=1, min_width=200, elem_classes=["aio-sidebar"]):
154
+ gr.Markdown("**Modes**", elem_classes=["aio-sidebar-heading"])
155
+ with gr.Column(elem_classes=["aio-mode-btn-row"]):
156
+ mode_buttons = {
157
+ name: gr.Button(
158
+ f"{m.icon} {m.label}",
159
+ elem_classes=["aio-mode-btn"],
160
+ variant="secondary",
161
+ )
162
+ for name, m in modes.MODE_REGISTRY.items()
163
+ }
164
+ gr.Markdown("**Models**", elem_classes=["aio-sidebar-heading"])
165
+ model_status = gr.HTML(_render_model_status_idle(), elem_id="aio-model-status")
166
+ refresh_btn = gr.Button("Refresh", size="sm", variant="secondary")
167
+ unload_btn = gr.Button("Unload all models", size="sm", variant="secondary")
168
+ gr.Markdown("**Settings**", elem_classes=["aio-sidebar-heading"])
169
+ gr.Markdown(
170
+ "Output: `comfyui/output/LTX2.3/`<br>"
171
+ "Set `LTX23_AIO_VRAM=lowvram|normalvram|highvram` to override the auto-detected VRAM tier.",
172
+ elem_classes=["aio-model-badge"],
173
+ )
174
+
175
+ # Body
176
+ with gr.Column(scale=4, elem_classes=["aio-body"]):
177
+ handles, tabs_component = _render_mode_panels()
178
+
179
+ # Wire generate buttons
180
  for name, h in handles.items():
181
  inputs = _collect_inputs_for_mode(name, h)
182
  h["generate_btn"].click(
 
184
  inputs=inputs,
185
  outputs=[h["status"], h["video_out"]],
186
  )
187
+
188
+ # Sidebar mode buttons drive Tabs.selected via Gradio's update.
189
+ for name, btn in mode_buttons.items():
190
+ btn.click(
191
+ fn=lambda mode_id=name: gr.Tabs(selected=mode_id),
192
+ inputs=None,
193
+ outputs=[tabs_component],
194
+ )
195
+
196
+ # Sidebar model info wiring
197
+ refresh_btn.click(fn=_render_model_status, inputs=None, outputs=[model_status])
198
+ unload_btn.click(fn=_unload_models, inputs=None, outputs=[model_status])
199
+
200
  return app
201
 
202
 
203
+ def _render_model_status_idle() -> str:
204
+ return (
205
+ '<div class="aio-model-badge">device: detecting…<br>'
206
+ "loaded: —<br>free: —</div>"
207
+ )
 
208
 
209
 
210
+ def _render_model_status() -> str:
211
+ """Best-effort device + memory readout for the sidebar."""
212
+ try:
213
+ be = _get_backend() # ensure ComfyUI is loaded
214
+ except Exception as exc:
215
+ return f'<div class="aio-model-badge">backend not ready<br>{exc}</div>'
216
+ try:
217
+ import comfy.model_management as mm
218
+ import torch
219
+
220
+ device = mm.get_torch_device()
221
+ free_gb = mm.get_free_memory(device) / (1024**3)
222
+ if torch.backends.mps.is_available():
223
+ # MPS unified memory: total physical = total system RAM. The
224
+ # "recommended max" from torch.mps is a soft cap (~75% of total)
225
+ # used by the allocator, but actual free can exceed it because
226
+ # macOS shares RAM between CPU and GPU.
227
+ try:
228
+ import psutil
229
+
230
+ total_gb = psutil.virtual_memory().total / (1024**3)
231
+ except Exception:
232
+ total_gb = torch.mps.recommended_max_memory() / (1024**3)
233
+ cap_gb = torch.mps.recommended_max_memory() / (1024**3)
234
+ label = "MPS (unified)"
235
+ extra = f"<br>mps cap: {cap_gb:.1f} GB"
236
+ elif torch.cuda.is_available():
237
+ total_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3)
238
+ label = "CUDA"
239
+ extra = ""
240
+ else:
241
+ total_gb = 0.0
242
+ label = "CPU"
243
+ extra = ""
244
+ loaded = len(getattr(mm, "current_loaded_models", []))
245
+ return (
246
+ '<div class="aio-model-badge">'
247
+ f"device: {label}<br>"
248
+ f"loaded: {loaded} model(s)<br>"
249
+ f"free: {free_gb:.1f} GB / {total_gb:.1f} GB total"
250
+ f"{extra}"
251
+ "</div>"
252
+ )
253
+ except Exception as exc:
254
+ return f'<div class="aio-model-badge">memory probe failed: {exc}</div>'
255
+
256
+
257
+ def _unload_models() -> str:
258
+ try:
259
+ import comfy.model_management as mm
260
+ import torch
261
+
262
+ mm.unload_all_models()
263
+ if torch.backends.mps.is_available():
264
+ torch.mps.empty_cache()
265
+ if torch.cuda.is_available():
266
+ torch.cuda.empty_cache()
267
+ except Exception as exc:
268
+ return f'<div class="aio-model-badge">unload failed: {exc}</div>'
269
+ return _render_model_status()
270
+
271
+
272
+ def _render_mode_panels() -> tuple[dict[str, dict], gr.Tabs]:
273
+ """Render one (hidden-tab) panel per mode. Returns the component handles + the Tabs component."""
274
  handles: dict[str, dict] = {}
275
+ with gr.Tabs(elem_classes=["aio-tabs"]) as tabs:
276
  for name, mode in modes.MODE_REGISTRY.items():
277
+ with gr.Tab(label=f"{mode.icon} {mode.label}", id=name):
278
  handles[name] = _render_one_mode(name)
279
+ return handles, tabs
280
 
281
 
282
  def _render_one_mode(name: str) -> dict:
 
284
  handles: dict = {"mode": name}
285
 
286
  with gr.Row():
287
+ with gr.Column(scale=2, min_width=280):
288
  handles["prompt"] = gr.Textbox(
289
  label="Prompt", lines=4, placeholder="Describe the shot..."
290
  )
 
304
  handles["input_video"] = gr.Video(label="Source video")
305
 
306
  handles["preset"] = ui.preset_bar()
307
+
308
+ # Resolution — up to 4K, /32 step
309
  with gr.Row():
310
+ handles["width"] = gr.Slider(
311
+ 256, 4096, value=512, step=32, label="Width"
312
+ )
313
+ handles["height"] = gr.Slider(
314
+ 256, 4096, value=768, step=32, label="Height"
315
+ )
316
+
317
+ # Length controlled in seconds (matches the master workflow's mxSlider).
318
+ # Frames are derived: frames = round(seconds * fps / 8) * 8 + 1.
319
  with gr.Row():
320
+ handles["seconds"] = gr.Slider(
321
+ minimum=1, maximum=30, value=3, step=1,
322
+ label="Length (seconds)",
323
+ info="Frames are computed as 8·round(seconds·fps/8)+1 (LTX requires 8k+1)",
324
+ )
325
  handles["fps"] = gr.Slider(8, 30, value=24, step=1, label="FPS")
326
+
327
+ handles["frames_display"] = gr.Markdown("Frames: 73", elem_classes=["aio-frames-display"])
328
+
329
+ with gr.Row():
330
+ handles["seed"] = gr.Number(label="Seed", value=42, precision=0, minimum=0)
331
+ handles["randomize_seed"] = gr.Checkbox(label="Randomize seed each run", value=True)
332
 
333
  with gr.Accordion("Advanced ▾", open=False):
334
  handles["lora"] = ui.lora_chrome(name)
 
336
 
337
  handles["generate_btn"] = gr.Button("▶ Generate", variant="primary", size="lg")
338
 
339
+ # Live frames-display update when seconds/fps change
340
+ def _update_frames(seconds, fps):
341
+ f = max(9, int(round(float(seconds) * float(fps) / 8) * 8) + 1)
342
+ return f"**Frames:** {f} (`{seconds}s` × `{fps} fps`)"
343
+
344
+ handles["seconds"].change(
345
+ fn=_update_frames,
346
+ inputs=[handles["seconds"], handles["fps"]],
347
+ outputs=[handles["frames_display"]],
348
+ )
349
+ handles["fps"].change(
350
+ fn=_update_frames,
351
+ inputs=[handles["seconds"], handles["fps"]],
352
+ outputs=[handles["frames_display"]],
353
+ )
354
+
355
+ with gr.Column(scale=2, min_width=280):
356
  handles["status"] = ui.status_banner()
357
  handles["video_out"] = gr.Video(label="Output", autoplay=True)
358
  handles["history"] = gr.Markdown("")
 
360
  return handles
361
 
362
 
363
+ # ---------------------------------------------------------------------------
364
+ # Backend wiring
365
+ # ---------------------------------------------------------------------------
366
+
367
  _BACKEND: backend_module.ComfyUILibraryBackend | None = None
368
 
369
 
 
374
  return _BACKEND
375
 
376
 
377
+ _COMFY_INPUT_DIR = pathlib.Path(__file__).parent / "comfyui" / "input"
378
+
379
+
380
+ def _stage_to_comfy_input(file_path) -> str | None:
381
+ """Copy/stage a path into comfyui/input/ so ComfyUI's LoadImage etc. can find it."""
382
+ if not file_path:
383
+ return None
384
+ if not isinstance(file_path, (str, pathlib.Path)):
385
+ file_path = (
386
+ file_path.get("name") or file_path.get("path") or file_path.get("orig_name")
387
+ if isinstance(file_path, dict)
388
+ else None
389
+ )
390
+ if not file_path:
391
+ return None
392
+ src = pathlib.Path(file_path)
393
+ if not src.exists() or not src.is_file():
394
+ print(f"[_stage] skip {file_path!r}", flush=True)
395
+ return None
396
+ _COMFY_INPUT_DIR.mkdir(parents=True, exist_ok=True)
397
+ try:
398
+ if src.resolve().is_relative_to(_COMFY_INPUT_DIR.resolve()):
399
+ return src.name
400
+ except (ValueError, OSError):
401
+ pass
402
+ dst = _COMFY_INPUT_DIR / src.name
403
+ if not dst.exists() or dst.stat().st_size != src.stat().st_size:
404
+ import shutil
405
+
406
+ shutil.copy2(src, dst)
407
+ return src.name
408
+
409
+
410
  PRESET_DURATION = {"Fast": 60, "Balanced": 120, "Quality": 300}
411
 
412
 
413
+ def _seconds_to_frames(seconds: float, fps: int) -> int:
414
+ return max(9, int(round(float(seconds) * float(fps) / 8) * 8) + 1)
415
+
416
+
417
  async def _on_generate(mode_name: str, **inputs: Any):
418
  """Generate handler — async generator yielding (status_html, video_path)."""
419
  mode = modes.MODE_REGISTRY[mode_name]
420
 
421
+ fps = int(inputs.get("fps", 24))
422
+ seconds = float(inputs.get("seconds", 3))
423
+ frames = _seconds_to_frames(seconds, fps)
424
+
425
+ # Seed: respect the explicit value unless the "randomize" checkbox is on.
426
+ seed = int(inputs.get("seed", 42))
427
+ if inputs.get("randomize_seed"):
428
+ seed = random.randint(0, 2**31 - 1)
429
+
430
  params: dict[str, Any] = {
431
  "prompt": inputs.get("prompt", ""),
432
  "negative_prompt": inputs.get("negative_prompt", ""),
433
+ "preset": str(inputs.get("preset", "Balanced")).lower(),
434
  "width": int(inputs.get("width", 512)),
435
  "height": int(inputs.get("height", 768)),
436
+ "frames": frames,
437
+ "fps": fps,
438
+ "seed": seed,
439
  }
440
  for k in (
441
+ "image", "audio", "first_frame", "last_frame", "input_video",
442
+ "camera_lora", "camera_strength", "detailer_on", "detailer_strength",
443
+ "ic_lora", "ic_strength", "pose_on", "audio_cfg", "image_strength",
 
 
 
 
 
 
 
 
 
 
 
444
  ):
445
  if k in inputs:
446
  params[k] = inputs[k]
447
 
448
+ for key in ("image", "audio", "first_frame", "last_frame", "input_video"):
449
+ if key in params and params[key]:
450
+ staged = _stage_to_comfy_input(params[key])
451
+ if staged is None:
452
+ params.pop(key, None)
453
+ else:
454
+ params[key] = staged
455
+
456
  patches = mode.parameterize_fn(params)
457
  workflow = wf_module.load_template(mode_name)
458
  for patch in patches:
459
  wf_module.set_input(workflow, *patch)
 
460
 
461
  backend = _get_backend()
462
+ duration = PRESET_DURATION.get(str(inputs.get("preset", "Balanced")), 120)
463
 
464
  started = time.time()
465
  async for event in backend.submit(mode_name, workflow, gpu_duration=duration):
 
475
  )
476
  yield status, gr.update()
477
  elif isinstance(event, backend_module.ProgressEvent):
478
+ # Each sampler in the workflow gets its own stage label "Diffusion (n)".
479
+ # The static `mode.stage_map` describes the full pipeline (encode →
480
+ # diffusion upscale → diffusion → decode) but our progress hook
481
+ # only fires inside samplers, so we label by sampler index instead.
482
+ label = f"Diffusion (Stage {event.stage})"
483
  eta = (elapsed / max(event.step, 1)) * (event.total_steps - event.step)
484
  status = ui.render_status(
485
+ stage_index=event.stage,
486
+ stage_label=label,
487
  step=event.step,
488
  total_steps=event.total_steps,
489
  elapsed_s=elapsed,
 
491
  )
492
  yield status, gr.update()
493
  elif isinstance(event, backend_module.OutputEvent):
494
+ video_update = event.video_path if event.video_path else gr.update()
495
+ yield ui._render_idle(), video_update
496
  elif isinstance(event, backend_module.ErrorEvent):
497
  error_html = (
498
  f'<div class="status-card status-error">'
 
504
 
505
 
506
  def _input_keys_for_mode(mode_name: str, h: dict) -> list[str]:
507
+ base = ["prompt", "preset", "width", "height", "seconds", "fps", "seed", "randomize_seed"]
508
  if mode_name == "i2v":
509
  base.append("image")
510
  elif mode_name == "a2v":
 
525
 
526
 
527
  def _collect_inputs_for_mode(mode_name: str, h: dict) -> list:
528
+ base = [
529
+ h["prompt"], h["preset"], h["width"], h["height"],
530
+ h["seconds"], h["fps"], h["seed"], h["randomize_seed"],
531
+ ]
532
  if mode_name == "i2v":
533
  base.append(h["image"])
534
  elif mode_name == "a2v":
 
540
  elif mode_name == "style":
541
  base.append(h["input_video"])
542
  base.append(h["negative_prompt"])
543
+ base.extend([
544
+ h["lora"].camera_lora, h["lora"].camera_strength,
545
+ h["lora"].detailer_on, h["lora"].detailer_strength,
546
+ ])
 
 
 
 
547
  if h["lora"].ic_lora is not None:
548
  base.extend([h["lora"].ic_lora, h["lora"].ic_strength])
549
  if h["lora"].pose_on is not None:
backend.py CHANGED
@@ -66,6 +66,98 @@ class _StubServer:
66
  pass
67
 
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  def _comfy_dir() -> pathlib.Path:
70
  if _on_spaces():
71
  return pathlib.Path("/data/comfyui")
@@ -96,6 +188,28 @@ class ComfyUILibraryBackend:
96
  import execution # top-level module — provides PromptExecutor
97
  import nodes # top-level module — provides init_extra_nodes (async)
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  # `nodes.init_extra_nodes` is async. We may be called from within a
100
  # running event loop (Gradio's handler) — running `asyncio.run()` there
101
  # raises. Run the coroutine in a fresh loop on a worker thread instead.
@@ -148,13 +262,27 @@ class ComfyUILibraryBackend:
148
  def _push(event: Any) -> None:
149
  asyncio.run_coroutine_threadsafe(queue.put(event), loop)
150
 
151
- def _hook(value: int, total: int, _preview=None) -> None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  _push(
153
  ProgressEvent(
154
- stage=0,
155
  stage_label="diffusion",
156
- step=int(value),
157
- total_steps=int(total),
158
  )
159
  )
160
 
@@ -163,6 +291,21 @@ class ComfyUILibraryBackend:
163
 
164
  saved_hook = getattr(comfy.utils, "PROGRESS_BAR_HOOK", None)
165
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  # Use the public setter; it writes the same global the
167
  # ProgressBar class reads, but is the documented API.
168
  comfy.utils.set_progress_bar_global_hook(_hook)
@@ -170,12 +313,15 @@ class ComfyUILibraryBackend:
170
  workflow,
171
  prompt_id="ltx23-aio",
172
  extra_data={"client_id": "ltx23-aio"},
173
- execute_outputs=[],
174
  )
175
- # PromptExecutor writes output files via VHS_VideoCombine; we read its
176
- # history to find the most recent saved video.
177
- outputs = list(self._executor.outputs.values())
178
- video_path = _first_video_path(outputs) or ""
 
 
 
179
  _push(OutputEvent(video_path=video_path))
180
  except Exception as exc:
181
  tb_text = tb_mod.format_exc()
 
66
  pass
67
 
68
 
69
+ class _StubPromptQueue:
70
+ """Stub matching the surface VideoHelperSuite + others touch."""
71
+
72
+ currently_running: dict = {}
73
+ history: dict = {}
74
+ flags: dict = {}
75
+
76
+ def get_current_queue(self) -> tuple[list, list]:
77
+ return ([], [])
78
+
79
+ def get_tasks_remaining(self) -> int:
80
+ return 0
81
+
82
+ def set_flag(self, name: str, data) -> None:
83
+ pass
84
+
85
+ def get_flags(self, *a, **kw) -> dict:
86
+ return {}
87
+
88
+ def task_done(self, *a, **kw) -> None:
89
+ pass
90
+
91
+ def put(self, *a, **kw) -> None:
92
+ pass
93
+
94
+ def wipe_queue(self) -> None:
95
+ pass
96
+
97
+ def delete_queue_item(self, *a, **kw) -> None:
98
+ pass
99
+
100
+
101
+ class _StubPromptServerInstance:
102
+ """Surface that ComfyUI's `server.PromptServer.instance` exposes to custom nodes.
103
+
104
+ VideoHelperSuite, KJNodes, and others read this at import time. They mostly
105
+ use it to register HTTP routes or send WS events or peek at the prompt queue.
106
+ No-ops here are fine — we have no real server.
107
+ """
108
+
109
+ client_id: str | None = "ltx23-aio"
110
+ # KJNodes' preview thread reads `last_node_id.encode('ascii')` directly.
111
+ # ComfyUI's real server keeps it as a string per executing node and resets
112
+ # to None at end-of-prompt — which races the preview thread. Keep it a
113
+ # safe non-empty string so .encode() never NPEs.
114
+ last_node_id: str = "ltx23-aio"
115
+ web_root: str = ""
116
+
117
+ class _Routes:
118
+ def get(self, *a, **kw):
119
+ return lambda fn: fn
120
+
121
+ def post(self, *a, **kw):
122
+ return lambda fn: fn
123
+
124
+ def static(self, *a, **kw):
125
+ return None
126
+
127
+ routes = _Routes()
128
+ sockets: dict = {}
129
+ prompt_queue = _StubPromptQueue()
130
+ # Custom-Scripts checks PromptServer.instance.supports — claim the
131
+ # "custom_nodes_from_web" capability so it skips its JS install path.
132
+ supports: list[str] = ["custom_nodes_from_web"]
133
+ web_root: str = ""
134
+
135
+ def add_routes(self) -> None:
136
+ pass
137
+
138
+ def send_sync(self, event: str, data: dict, sid: str | None = None) -> None:
139
+ pass
140
+
141
+ def send_progress_text(self, text: str, node_id=None, sid=None) -> None:
142
+ # Comfy_extras nodes call this; we just no-op since we don't have a UI
143
+ # to surface intermediate text on.
144
+ pass
145
+
146
+ def queue_updated(self) -> None:
147
+ pass
148
+
149
+ def get_node_class_def(self, *a, **kw):
150
+ return None
151
+
152
+ def __getattr__(self, name):
153
+ # Anything else our custom nodes might reach for — give them a no-op.
154
+ # This is a deliberate liberal catch-all so the inference path doesn't
155
+ # die on cosmetic UI hooks. Inspection-style access (hasattr) gets True.
156
+ def _noop(*a, **kw):
157
+ return None
158
+ return _noop
159
+
160
+
161
  def _comfy_dir() -> pathlib.Path:
162
  if _on_spaces():
163
  return pathlib.Path("/data/comfyui")
 
188
  import execution # top-level module — provides PromptExecutor
189
  import nodes # top-level module — provides init_extra_nodes (async)
190
 
191
+ # CRITICAL ordering fix: ComfyUI's nodes.py:24 inserts `comfyui/comfy/`
192
+ # at sys.path[0]. That dir contains a module-style `utils.py`, which
193
+ # shadows `comfyui/utils/` (a package containing install_util.py).
194
+ # Some custom nodes (KJNodes, VideoHelperSuite via app.frontend_management)
195
+ # do `from utils.install_util import …` and get `comfy/utils.py` instead,
196
+ # raising "'utils' is not a package". Rewrite sys.path so comfy_dir is
197
+ # ahead of comfy_dir/comfy and force-clear any cached `utils` binding.
198
+ comfy_subdir = str(self._comfy_dir / "comfy")
199
+ sys.path = [p for p in sys.path if p not in (str(self._comfy_dir), comfy_subdir)]
200
+ sys.path.insert(0, comfy_subdir)
201
+ sys.path.insert(0, str(self._comfy_dir))
202
+ if "utils" in sys.modules and not getattr(sys.modules["utils"], "__path__", None):
203
+ del sys.modules["utils"]
204
+
205
+ # Some custom nodes (e.g. VideoHelperSuite) read `server.PromptServer.instance`
206
+ # at import time. We don't run a real ComfyUI server, so install a stub
207
+ # that exposes the attributes those nodes touch (sockets, send, etc.).
208
+ import server as comfy_server
209
+
210
+ if getattr(comfy_server.PromptServer, "instance", None) is None:
211
+ comfy_server.PromptServer.instance = _StubPromptServerInstance()
212
+
213
  # `nodes.init_extra_nodes` is async. We may be called from within a
214
  # running event loop (Gradio's handler) — running `asyncio.run()` there
215
  # raises. Run the coroutine in a fresh loop on a worker thread instead.
 
262
  def _push(event: Any) -> None:
263
  asyncio.run_coroutine_threadsafe(queue.put(event), loop)
264
 
265
+ # Track stage progression. ComfyUI fires the progress hook from inside
266
+ # samplers, so we advance the stage every time we observe a new sampler
267
+ # starting (step==0 with a different total than before, or a "new run"
268
+ # signal — value smaller than the running max for the same total).
269
+ progress_state = {"stage": 0, "prev_total": -1, "max_step": -1}
270
+
271
+ def _hook(value: int, total: int, _preview=None, **_kwargs: Any) -> None:
272
+ v, t = int(value), int(total)
273
+ # New sampler started (different total, or step rewound)
274
+ if t != progress_state["prev_total"] or v < progress_state["max_step"]:
275
+ progress_state["stage"] += 1
276
+ progress_state["prev_total"] = t
277
+ progress_state["max_step"] = v
278
+ else:
279
+ progress_state["max_step"] = max(progress_state["max_step"], v)
280
  _push(
281
  ProgressEvent(
282
+ stage=progress_state["stage"],
283
  stage_label="diffusion",
284
+ step=v,
285
+ total_steps=t,
286
  )
287
  )
288
 
 
291
 
292
  saved_hook = getattr(comfy.utils, "PROGRESS_BAR_HOOK", None)
293
  try:
294
+ # Workflow is already API-format (saved from ComfyUI editor's
295
+ # "Save (API Format)"), so it can be handed to PromptExecutor
296
+ # directly. The execute_outputs list pinpoints which output
297
+ # nodes to evaluate — we let PromptExecutor walk the whole
298
+ # graph by passing every output-class node id.
299
+ output_ids = [
300
+ nid for nid, n in workflow.items()
301
+ if n.get("class_type", "").startswith(("SaveVideo", "VHS_VideoCombine", "PreviewAudio", "CreateVideo"))
302
+ ]
303
+ print(
304
+ f"[backend] submitting workflow: {len(workflow)} nodes, "
305
+ f"output_ids={output_ids}",
306
+ file=sys.stderr,
307
+ flush=True,
308
+ )
309
  # Use the public setter; it writes the same global the
310
  # ProgressBar class reads, but is the documented API.
311
  comfy.utils.set_progress_bar_global_hook(_hook)
 
313
  workflow,
314
  prompt_id="ltx23-aio",
315
  extra_data={"client_id": "ltx23-aio"},
316
+ execute_outputs=output_ids,
317
  )
318
+ # PromptExecutor stores per-node UI info in history_result["outputs"]
319
+ # after execute_async. Each entry mirrors what the JS frontend
320
+ # would receive — including SaveVideo's "filenames"/"video" lists
321
+ # that point at the saved file inside ComfyUI's output dir.
322
+ hist = getattr(self._executor, "history_result", {}) or {}
323
+ outs = hist.get("outputs") or {}
324
+ video_path = _first_video_path(list(outs.values())) or ""
325
  _push(OutputEvent(video_path=video_path))
326
  except Exception as exc:
327
  tb_text = tb_mod.format_exc()
modes.py CHANGED
@@ -5,14 +5,13 @@ Each Mode declares:
5
  - label: display name
6
  - icon: single-character or emoji icon for the sidebar
7
  - stage_map: list of (label, expected_share_pct) for the status banner
8
- - parameterize_fn: (Gradio inputs dict) -> list[(node_id, widget_index, value)]
9
 
10
- The parameterize_fn is the only mode-specific logic. Everything else (workflow
11
- loading, validation, dispatch) is mode-agnostic and lives in workflow.py /
12
- backend.py.
13
-
14
- Tasks 11 (T2V + I2V) and 12 (A2V + Lipsync + Keyframe + Style) populate
15
- MODE_REGISTRY. This task only sets up the dataclass and the empty container.
16
  """
17
 
18
  from __future__ import annotations
@@ -21,7 +20,8 @@ from collections.abc import Callable
21
  from dataclasses import dataclass, field
22
  from typing import Any
23
 
24
- Patch = tuple[int, int | str, Any]
 
25
  ParameterizeFn = Callable[[dict[str, Any]], list[Patch]]
26
 
27
 
@@ -40,163 +40,91 @@ class Mode:
40
  stage_map: list[Stage] = field(default_factory=list)
41
 
42
 
43
- # Filled in by tasks 11–12.
44
  MODE_REGISTRY: dict[str, Mode] = {}
45
 
46
 
47
  # ---------------------------------------------------------------------------
48
- # Node-id constants captured from workflows/{t2v,i2v}.json on 2026-04-30.
49
- #
50
- # The master workflow uses rgthree's GetNode/SetNode for indirection. SetNodes
51
- # named "pos"/"neg" expose the *outputs* of CLIPTextEncode, not the prompt
52
- # strings. So the canonical place to set the prompt text is the CLIPTextEncode
53
- # node itself.
54
- #
55
- # Width/Height/FPS are INTConstant nodes whose values feed downstream Set_*
56
- # variables. Clip length comes from a mxSlider (in seconds, then multiplied by
57
- # FPS via a MathExpression to compute frames). No SetNode for "noise"/seed
58
- # survived the extraction, so seed is intentionally NOT patched here — the
59
- # template's hard-coded value is used until we wire RandomNoise injection in
60
- # Task 12+.
61
- #
62
- # LoRA rows live inside a single Power Lora Loader (rgthree) node whose
63
- # widgets_values is a list of dicts. Patching a specific row requires knowing
64
- # the index, and the canonical mapping (camera_lora value -> row index) belongs
65
- # in models.py once camera-LoRA selection lands. Deferred for now.
66
  # ---------------------------------------------------------------------------
67
 
68
- T2V_NODE_PROMPT = 5536 # CLIPTextEncode positive — wv[0] = prompt
69
- T2V_NODE_NEG_PROMPT = 5537 # CLIPTextEncode negative — wv[0] = negative prompt
70
- T2V_NODE_WIDTH = 5383 # INTConstant "Width" wv[0]
71
- T2V_NODE_HEIGHT = 5382 # INTConstant "Height" wv[0]
72
- T2V_NODE_FPS = 5445 # INTConstant "FPS" wv[0]
73
- T2V_NODE_CLIP_LENGTH = 196 # mxSlider "Clip Length ( in seconds )" wv[0]
74
-
75
- I2V_NODE_PROMPT = 5536
76
- I2V_NODE_NEG_PROMPT = 5537
77
- I2V_NODE_WIDTH = 5383
78
- I2V_NODE_HEIGHT = 5382
79
- I2V_NODE_FPS = 5445
80
- I2V_NODE_CLIP_LENGTH = 196
81
- I2V_NODE_IMAGE = 149 # LoadImage "Load Image1" — wv[0] = filename
82
-
83
- # Mode-specific media nodes — captured from workflows/{a2v,lipsync,keyframe,style}.json
84
- # on 2026-04-30. All four templates contain the same node ids for these inputs (the
85
- # Loaders group is shared across modes); only a subset is wired into each mode's
86
- # pipeline.
87
- #
88
- # VHS_LoadAudioUpload and VHS_LoadVideo carry dict-style widgets_values keyed by
89
- # "audio"/"video". The current set_input helper is list-indexed; passing
90
- # widget_index=0 against a dict adds a numeric "0" key without replacing the
91
- # canonical "audio"/"video" entry. The runtime file-path swap is therefore not
92
- # yet wired Task 12 only validates the patch tuple set. Real path injection
93
- # lands when backend.py grows file-staging in Task 17.
94
-
95
- A2V_NODE_PROMPT = 5536
96
- A2V_NODE_NEG_PROMPT = 5537
97
- A2V_NODE_WIDTH = 5383
98
- A2V_NODE_HEIGHT = 5382
99
- A2V_NODE_FPS = 5445
100
- A2V_NODE_CLIP_LENGTH = 196
101
- A2V_NODE_AUDIO = 5400 # VHS_LoadAudioUpload — dict wv keyed by "audio"
102
-
103
- LIPSYNC_NODE_PROMPT = 5536
104
- LIPSYNC_NODE_NEG_PROMPT = 5537
105
- LIPSYNC_NODE_FPS = 5445
106
- LIPSYNC_NODE_CLIP_LENGTH = 196
107
- LIPSYNC_NODE_IMAGE = 149 # LoadImage "Load Image1" — wv[0] = filename
108
- LIPSYNC_NODE_AUDIO = 5400 # VHS_LoadAudioUpload — dict wv keyed by "audio"
109
-
110
- KEYFRAME_NODE_PROMPT = 5536
111
- KEYFRAME_NODE_NEG_PROMPT = 5537
112
- KEYFRAME_NODE_FPS = 5445
113
- KEYFRAME_NODE_CLIP_LENGTH = 196
114
- KEYFRAME_NODE_FIRST_FRAME = 149 # LoadImage "Load Image1" — wv[0] = filename
115
- KEYFRAME_NODE_LAST_FRAME = 5437 # LoadImage "Load Image2" — wv[0] = filename
116
-
117
- STYLE_NODE_PROMPT = 5536
118
- STYLE_NODE_NEG_PROMPT = 5537
119
- STYLE_NODE_FPS = 5445
120
- STYLE_NODE_CLIP_LENGTH = 196
121
- STYLE_NODE_INPUT_VIDEO = 5444 # VHS_LoadVideo — dict wv keyed by "video"
122
-
123
-
124
- def _frames_to_seconds(frames: int, fps: int) -> int:
125
- """Convert (frames, fps) to integer seconds for the mxSlider clip-length widget.
126
-
127
- The downstream MathExpression is `a*b+1` (a=seconds, b=fps -> total frames),
128
- so for a target frame count F at fps R we need seconds = ceil((F - 1) / R).
129
- Round up so the slider is never short of the requested frames.
130
- """
131
- if fps <= 0:
132
- return 1
133
- return max(1, -(-(frames - 1) // fps))
134
 
135
 
136
  def _t2v_parameterize(inp: dict[str, Any]) -> list[Patch]:
137
- return [
138
- (T2V_NODE_PROMPT, 0, inp["prompt"]),
139
- (T2V_NODE_NEG_PROMPT, 0, inp.get("negative_prompt", "")),
140
- (T2V_NODE_WIDTH, 0, int(inp["width"])),
141
- (T2V_NODE_HEIGHT, 0, int(inp["height"])),
142
- (T2V_NODE_FPS, 0, int(inp["fps"])),
143
- (T2V_NODE_CLIP_LENGTH, 0, _frames_to_seconds(int(inp["frames"]), int(inp["fps"]))),
144
- ]
145
 
146
 
147
  def _i2v_parameterize(inp: dict[str, Any]) -> list[Patch]:
148
- return [
149
- (I2V_NODE_PROMPT, 0, inp["prompt"]),
150
- (I2V_NODE_NEG_PROMPT, 0, inp.get("negative_prompt", "")),
151
- (I2V_NODE_IMAGE, 0, inp["image"]),
152
- (I2V_NODE_WIDTH, 0, int(inp["width"])),
153
- (I2V_NODE_HEIGHT, 0, int(inp["height"])),
154
- (I2V_NODE_FPS, 0, int(inp["fps"])),
155
- (I2V_NODE_CLIP_LENGTH, 0, _frames_to_seconds(int(inp["frames"]), int(inp["fps"]))),
156
  ]
157
 
158
 
159
  def _a2v_parameterize(inp: dict[str, Any]) -> list[Patch]:
160
- return [
161
- (A2V_NODE_PROMPT, 0, inp["prompt"]),
162
- (A2V_NODE_NEG_PROMPT, 0, inp.get("negative_prompt", "")),
163
- (A2V_NODE_AUDIO, "audio", inp["audio"]),
164
- (A2V_NODE_WIDTH, 0, int(inp["width"])),
165
- (A2V_NODE_HEIGHT, 0, int(inp["height"])),
166
- (A2V_NODE_FPS, 0, int(inp["fps"])),
167
- (A2V_NODE_CLIP_LENGTH, 0, _frames_to_seconds(int(inp["frames"]), int(inp["fps"]))),
168
  ]
169
 
170
 
171
  def _lipsync_parameterize(inp: dict[str, Any]) -> list[Patch]:
172
- return [
173
- (LIPSYNC_NODE_PROMPT, 0, inp["prompt"]),
174
- (LIPSYNC_NODE_NEG_PROMPT, 0, inp.get("negative_prompt", "")),
175
- (LIPSYNC_NODE_IMAGE, 0, inp["image"]),
176
- (LIPSYNC_NODE_AUDIO, "audio", inp["audio"]),
177
- (LIPSYNC_NODE_FPS, 0, int(inp["fps"])),
178
- (LIPSYNC_NODE_CLIP_LENGTH, 0, _frames_to_seconds(int(inp["frames"]), int(inp["fps"]))),
179
  ]
180
 
181
 
182
  def _keyframe_parameterize(inp: dict[str, Any]) -> list[Patch]:
183
- return [
184
- (KEYFRAME_NODE_PROMPT, 0, inp["prompt"]),
185
- (KEYFRAME_NODE_NEG_PROMPT, 0, inp.get("negative_prompt", "")),
186
- (KEYFRAME_NODE_FIRST_FRAME, 0, inp["first_frame"]),
187
- (KEYFRAME_NODE_LAST_FRAME, 0, inp["last_frame"]),
188
- (KEYFRAME_NODE_FPS, 0, int(inp["fps"])),
189
- (KEYFRAME_NODE_CLIP_LENGTH, 0, _frames_to_seconds(int(inp["frames"]), int(inp["fps"]))),
190
  ]
191
 
192
 
193
  def _style_parameterize(inp: dict[str, Any]) -> list[Patch]:
194
- return [
195
- (STYLE_NODE_PROMPT, 0, inp["prompt"]),
196
- (STYLE_NODE_NEG_PROMPT, 0, inp.get("negative_prompt", "")),
197
- (STYLE_NODE_INPUT_VIDEO, "video", inp["input_video"]),
198
- (STYLE_NODE_FPS, 0, int(inp["fps"])),
199
- (STYLE_NODE_CLIP_LENGTH, 0, _frames_to_seconds(int(inp["frames"]), int(inp["fps"]))),
200
  ]
201
 
202
 
@@ -226,16 +154,7 @@ _A2V_STAGES = [
226
  Stage("Decode video", 10),
227
  ]
228
 
229
- _LIPSYNC_STAGES = [
230
- Stage("Encode prompt", 5),
231
- Stage("Encode image", 3),
232
- Stage("Encode audio", 5),
233
- Stage("Diffusion (Stage 1)", 52),
234
- Stage("Spatial upscale", 7),
235
- Stage("Diffusion (Stage 2)", 18),
236
- Stage("Decode video", 10),
237
- ]
238
-
239
  _KEYFRAME_STAGES = [
240
  Stage("Encode prompt", 5),
241
  Stage("Encode keyframes", 5),
@@ -244,16 +163,14 @@ _KEYFRAME_STAGES = [
244
  Stage("Diffusion (Stage 2)", 18),
245
  Stage("Decode video", 10),
246
  ]
247
-
248
  _STYLE_STAGES = [
249
  Stage("Encode prompt", 5),
250
- Stage("Decode source video", 5),
251
- Stage("Diffusion (Stage 1)", 55),
252
- Stage("Spatial upscale", 7),
253
- Stage("Diffusion (Stage 2)", 18),
254
- Stage("Decode video", 10),
255
  ]
256
 
 
257
  MODE_REGISTRY["t2v"] = Mode(
258
  name="t2v",
259
  label="Text → Video",
 
5
  - label: display name
6
  - icon: single-character or emoji icon for the sidebar
7
  - stage_map: list of (label, expected_share_pct) for the status banner
8
+ - parameterize_fn: (Gradio inputs dict) -> list[(node_id, field_name, value)]
9
 
10
+ The workflows live in `workflows/<mode>.json` in ComfyUI's API format
11
+ (`{node_id_str: {class_type, inputs}}` produced by the editor's
12
+ "Save (API Format)" feature). That format is what `PromptExecutor.execute()`
13
+ consumes directly, so parameterize_fns just patch field values by node id;
14
+ no graph→API conversion is needed.
 
15
  """
16
 
17
  from __future__ import annotations
 
20
  from dataclasses import dataclass, field
21
  from typing import Any
22
 
23
+ # (node_id, field_name, value)
24
+ Patch = tuple[str, str, Any]
25
  ParameterizeFn = Callable[[dict[str, Any]], list[Patch]]
26
 
27
 
 
40
  stage_map: list[Stage] = field(default_factory=list)
41
 
42
 
 
43
  MODE_REGISTRY: dict[str, Mode] = {}
44
 
45
 
46
  # ---------------------------------------------------------------------------
47
+ # Shared user-input node IDs across all 6 mode API workflows.
48
+ # Captured 2026-05-01 from `/Users/techfreakworm/Downloads/workflows/*_api.json`
49
+ # (master workflow exported via "Save API Format" per mode).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  # ---------------------------------------------------------------------------
51
 
52
+ NODE_PROMPT = "5536" # CLIPTextEncode (positive)inputs.text
53
+ NODE_NEG_PROMPT = "5537" # CLIPTextEncode (negative)inputs.text
54
+ NODE_WIDTH = "5383" # INTConstant — inputs.value
55
+ NODE_HEIGHT = "5382" # INTConstant — inputs.value
56
+ NODE_FPS = "5445" # INTConstant — inputs.value
57
+ NODE_CLIP_SECONDS = "196" # mxSlider inputs.Xi (length in seconds; frames = Xi*fps+1)
58
+ NODE_IMAGE_1 = "149" # LoadImage (first frame / portrait) — inputs.image
59
+ NODE_IMAGE_2 = "5437" # LoadImage (last frame for keyframe mode) — inputs.image
60
+ NODE_AUDIO = "5400" # VHS_LoadAudioUpload — inputs.audio
61
+ NODE_VIDEO = "5444" # VHS_LoadVideo — inputs.video
62
+
63
+ # Per-mode RandomNoise (subgraph-internal): id format `<subgraph_inst>:<inner>`.
64
+ SEED_NODE_BY_MODE: dict[str, str] = {
65
+ "t2v": "5464:5539",
66
+ "a2v": "463:5540",
67
+ "i2v": "209:5541",
68
+ "lipsync": "521:5542",
69
+ "keyframe": "670:5543",
70
+ "style": "5364:5545",
71
+ }
72
+
73
+
74
+ def _seconds_for(frames: int, fps: int) -> int:
75
+ """Inverse of `frames = seconds*fps + 1` from the master's MathExpression."""
76
+ return max(1, (max(1, int(frames)) - 1) // max(1, int(fps)))
77
+
78
+
79
+ def _shared_patches(inp: dict[str, Any], mode: str) -> list[Patch]:
80
+ return [
81
+ (NODE_PROMPT, "text", inp.get("prompt", "")),
82
+ (NODE_NEG_PROMPT, "text", inp.get("negative_prompt", "")),
83
+ (NODE_WIDTH, "value", int(inp.get("width", 512))),
84
+ (NODE_HEIGHT, "value", int(inp.get("height", 768))),
85
+ (NODE_FPS, "value", int(inp.get("fps", 24))),
86
+ (
87
+ NODE_CLIP_SECONDS,
88
+ "Xi",
89
+ _seconds_for(int(inp.get("frames", 81)), int(inp.get("fps", 24))),
90
+ ),
91
+ (SEED_NODE_BY_MODE[mode], "noise_seed", int(inp.get("seed", 42))),
92
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
 
95
  def _t2v_parameterize(inp: dict[str, Any]) -> list[Patch]:
96
+ return _shared_patches(inp, "t2v")
 
 
 
 
 
 
 
97
 
98
 
99
  def _i2v_parameterize(inp: dict[str, Any]) -> list[Patch]:
100
+ return _shared_patches(inp, "i2v") + [
101
+ (NODE_IMAGE_1, "image", inp["image"]),
 
 
 
 
 
 
102
  ]
103
 
104
 
105
  def _a2v_parameterize(inp: dict[str, Any]) -> list[Patch]:
106
+ return _shared_patches(inp, "a2v") + [
107
+ (NODE_AUDIO, "audio", inp["audio"]),
 
 
 
 
 
 
108
  ]
109
 
110
 
111
  def _lipsync_parameterize(inp: dict[str, Any]) -> list[Patch]:
112
+ return _shared_patches(inp, "lipsync") + [
113
+ (NODE_IMAGE_1, "image", inp["image"]),
114
+ (NODE_AUDIO, "audio", inp["audio"]),
 
 
 
 
115
  ]
116
 
117
 
118
  def _keyframe_parameterize(inp: dict[str, Any]) -> list[Patch]:
119
+ return _shared_patches(inp, "keyframe") + [
120
+ (NODE_IMAGE_1, "image", inp["first_frame"]),
121
+ (NODE_IMAGE_2, "image", inp["last_frame"]),
 
 
 
 
122
  ]
123
 
124
 
125
  def _style_parameterize(inp: dict[str, Any]) -> list[Patch]:
126
+ return _shared_patches(inp, "style") + [
127
+ (NODE_VIDEO, "video", inp["input_video"]),
 
 
 
 
128
  ]
129
 
130
 
 
154
  Stage("Decode video", 10),
155
  ]
156
 
157
+ _LIPSYNC_STAGES = list(_A2V_STAGES)
 
 
 
 
 
 
 
 
 
158
  _KEYFRAME_STAGES = [
159
  Stage("Encode prompt", 5),
160
  Stage("Encode keyframes", 5),
 
163
  Stage("Diffusion (Stage 2)", 18),
164
  Stage("Decode video", 10),
165
  ]
 
166
  _STYLE_STAGES = [
167
  Stage("Encode prompt", 5),
168
+ Stage("Encode source video", 10),
169
+ Stage("Diffusion", 70),
170
+ Stage("Decode video", 15),
 
 
171
  ]
172
 
173
+
174
  MODE_REGISTRY["t2v"] = Mode(
175
  name="t2v",
176
  label="Text → Video",
workflow.py CHANGED
@@ -1,4 +1,9 @@
1
- """Pure functions over LTX 2.3 mode workflow JSON templates."""
 
 
 
 
 
2
 
3
  from __future__ import annotations
4
 
@@ -13,74 +18,31 @@ VALID_MODES: tuple[str, ...] = ("t2v", "a2v", "i2v", "lipsync", "keyframe", "sty
13
 
14
 
15
  def load_template(mode: str) -> dict[str, Any]:
16
- """Load a fresh, independent copy of the named mode's workflow template."""
17
  if mode not in VALID_MODES:
18
  raise ValueError(f"unknown mode {mode!r}; expected one of {VALID_MODES}")
19
  path = WORKFLOWS_DIR / f"{mode}.json"
20
  return copy.deepcopy(json.loads(path.read_text()))
21
 
22
 
23
- def set_input(workflow: dict[str, Any], node_id: int, widget_index: int | str, value: Any) -> None:
24
- """Patch a node's widgets_values in place.
25
 
26
- Supports both list-style widgets_values (most ComfyUI nodes patch by integer index,
27
- auto-extending with None) and dict-style widgets_values (VHS_LoadAudioUpload and
28
- similar patch by string key, raising KeyError if the key doesn't exist).
29
 
30
  Args:
31
- workflow: A workflow dict (must have a "nodes" list).
32
- node_id: The id of the node to patch.
33
- widget_index: Integer index (for list widgets) or string key (for dict widgets).
34
- value: New value.
35
 
36
  Raises:
37
- KeyError: If no node with the given id exists, or for dict widgets, if the key
38
- doesn't already exist on the target dict (we don't add new keys).
39
- TypeError: If widget_index type doesn't match the node's widgets_values type.
40
  """
41
- for node in workflow["nodes"]:
42
- if node.get("id") != node_id:
43
- continue
44
- widgets = node.get("widgets_values")
45
- if isinstance(widgets, dict):
46
- if not isinstance(widget_index, str):
47
- raise TypeError(
48
- f"node {node_id} has dict widgets_values; widget_index must be str, "
49
- f"got {type(widget_index).__name__}"
50
- )
51
- if widget_index not in widgets:
52
- raise KeyError(
53
- f"node {node_id} dict widgets_values has no key {widget_index!r}; "
54
- f"available keys: {list(widgets.keys())}"
55
- )
56
- widgets[widget_index] = value
57
- return
58
- # List/None case — preserve existing list-extension behavior.
59
- if not isinstance(widget_index, int):
60
- raise TypeError(
61
- f"node {node_id} has list widgets_values; widget_index must be int, "
62
- f"got {type(widget_index).__name__}"
63
- )
64
- if widgets is None:
65
- widgets = []
66
- node["widgets_values"] = widgets
67
- while len(widgets) <= widget_index:
68
- widgets.append(None)
69
- widgets[widget_index] = value
70
- return
71
- raise KeyError(f"node id {node_id} not found in workflow")
72
-
73
-
74
- def validate(workflow: dict[str, Any]) -> None:
75
- """Static schema validation. Raises ValueError on the first problem found."""
76
- nodes = workflow.get("nodes")
77
- if not isinstance(nodes, list) or len(nodes) == 0:
78
- raise ValueError("workflow has no nodes")
79
-
80
- node_ids = {n.get("id") for n in nodes if "id" in n}
81
- for link in workflow.get("links", []):
82
- if not isinstance(link, list) or len(link) < 6:
83
- raise ValueError(f"malformed link {link}")
84
- _, src, _, dst, _, _ = link
85
- if src not in node_ids or dst not in node_ids:
86
- raise ValueError(f"orphan link {link}")
 
1
+ """Pure functions over LTX 2.3 mode API-format workflow templates.
2
+
3
+ Templates in `workflows/<mode>.json` are saved from ComfyUI's editor via
4
+ "Save (API Format)". Shape: `{node_id_str: {"class_type": str, "inputs": dict}}`.
5
+ This is what ComfyUI's `PromptExecutor.execute(prompt=...)` expects directly.
6
+ """
7
 
8
  from __future__ import annotations
9
 
 
18
 
19
 
20
  def load_template(mode: str) -> dict[str, Any]:
21
+ """Load a fresh, independent copy of the named mode's API workflow template."""
22
  if mode not in VALID_MODES:
23
  raise ValueError(f"unknown mode {mode!r}; expected one of {VALID_MODES}")
24
  path = WORKFLOWS_DIR / f"{mode}.json"
25
  return copy.deepcopy(json.loads(path.read_text()))
26
 
27
 
28
+ def set_input(workflow: dict[str, Any], node_id: int | str, field: str, value: Any) -> None:
29
+ """Patch a node's input field in place.
30
 
31
+ For API-format workflows, each node has an `inputs` dict keyed by field name.
32
+ `node_id` is the dict key (string for top-level, "<inst>:<inner>" for
33
+ subgraph-internal). `field` is an entry name in `inputs`.
34
 
35
  Args:
36
+ workflow: API-format workflow dict (mapping id {class_type, inputs}).
37
+ node_id: Dict key of the target node.
38
+ field: Name of the input field to set.
39
+ value: New value (literal, or `[src_id, src_slot]` link form).
40
 
41
  Raises:
42
+ KeyError: If the node doesn't exist in the workflow.
 
 
43
  """
44
+ nid = str(node_id)
45
+ if nid not in workflow:
46
+ raise KeyError(f"node id {nid!r} not found in workflow")
47
+ inputs = workflow[nid].setdefault("inputs", {})
48
+ inputs[field] = value
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
workflows/a2v.json CHANGED
The diff for this file is too large to render. See raw diff
 
workflows/i2v.json CHANGED
The diff for this file is too large to render. See raw diff
 
workflows/keyframe.json CHANGED
The diff for this file is too large to render. See raw diff
 
workflows/lipsync.json CHANGED
The diff for this file is too large to render. See raw diff
 
workflows/style.json CHANGED
The diff for this file is too large to render. See raw diff
 
workflows/t2v.json CHANGED
The diff for this file is too large to render. See raw diff