File size: 21,901 Bytes
15811ca
 
 
 
 
3ea399a
15811ca
 
b23fbf5
1e054fe
15811ca
 
 
b23fbf5
 
 
15811ca
3ea399a
15811ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3ea399a
15811ca
 
 
 
 
 
 
3ea399a
15811ca
 
 
 
 
 
 
be7dc01
 
 
 
 
 
 
 
 
 
14dcc06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a2c44d8
0a98b65
14dcc06
a2c44d8
 
 
 
 
 
 
 
 
 
 
 
14dcc06
a2c44d8
 
14dcc06
a2c44d8
 
14dcc06
 
 
 
 
 
 
 
be7dc01
 
 
14dcc06
 
 
 
 
 
a2c44d8
27ffbbc
14dcc06
e8627ee
 
 
a2c44d8
 
27ffbbc
 
 
 
 
 
 
e8627ee
27ffbbc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be7dc01
 
 
 
 
 
e8627ee
 
 
 
 
 
 
 
 
 
 
 
 
 
be7dc01
 
19489a8
 
 
 
 
 
 
 
 
 
 
 
 
bce3dbc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15811ca
 
bc868d5
15811ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19489a8
15811ca
 
 
 
 
bce3dbc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19489a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107f495
 
 
 
 
 
 
15811ca
 
 
b23fbf5
 
14dcc06
 
 
 
 
a2c44d8
14dcc06
27ffbbc
b23fbf5
14dcc06
 
a2c44d8
 
 
14dcc06
b23fbf5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bce3dbc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3ea399a
 
bce3dbc
3ea399a
bce3dbc
 
3ea399a
 
b23fbf5
 
 
3ea399a
b23fbf5
 
bce3dbc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b23fbf5
 
 
14dcc06
 
 
 
 
 
a2c44d8
14dcc06
e8627ee
 
 
 
 
 
 
 
 
 
 
b23fbf5
 
107f495
 
3ea399a
 
 
 
107f495
3ea399a
 
b23fbf5
 
 
 
 
1e054fe
 
 
 
 
 
 
 
 
 
b23fbf5
 
 
 
 
 
 
 
 
 
 
 
3ea399a
b23fbf5
 
 
 
 
 
 
14dcc06
 
b23fbf5
3c69062
 
 
 
 
 
 
 
14dcc06
3c69062
14dcc06
 
b23fbf5
 
 
 
 
 
 
 
 
3ea399a
b23fbf5
 
 
 
 
3ea399a
b23fbf5
 
 
 
 
 
3ea399a
b23fbf5
 
 
 
 
 
e8627ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
"""ComfyUI library-mode backend.

Single-process, single-implementation. The @spaces.GPU decorator is the only
divergence between local and HF Spaces deployment.
"""

from __future__ import annotations

import asyncio
import contextvars
import os
import pathlib
import sys
import threading
import traceback as tb_mod
from collections.abc import AsyncIterator, Iterable
from dataclasses import dataclass, field
from typing import Any

import models


@dataclass
class DownloadEvent:
    filename: str
    mb_done: float
    mb_total: float


@dataclass
class ProgressEvent:
    stage: int
    stage_label: str
    step: int
    total_steps: int


@dataclass
class OutputEvent:
    video_path: str
    audio_path: str | None = None
    meta: dict = field(default_factory=dict)


@dataclass
class ErrorEvent:
    category: str  # "oom" | "zerogpu_timeout" | "execution" | "interrupt" | "download"
    message: str
    stage: int | None = None
    traceback: str = ""


def _on_spaces() -> bool:
    return bool(os.environ.get("SPACES_ZERO_GPU"))


try:
    import spaces  # type: ignore
except ImportError:
    spaces = None  # type: ignore[assignment]


def _identity(fn):
    return fn


# --- Per-call ZeroGPU duration estimator -----------------------------------
# `duration` is a per-call timeout. Shorter declared duration β†’ faster queue
# priority on the shared ZeroGPU pool. Estimating from (mode, preset, frames)
# instead of using a one-size-fits-all 600s cap means light T2V calls jump
# the queue while heavy modes (lipsync, style) reserve real headroom.

_BASE_DURATION_S: dict[str, int] = {
    # Rough sampler+decode time at ~120 frames, balanced preset, warm cache.
    "t2v": 90,
    "i2v": 90,
    "a2v": 120,
    "lipsync": 240,   # extra: audio encoder + audio VAE + extra LoRAs
    "keyframe": 180,
    "style": 360,     # extra: preprocessor (canny/dwpose/depth) + IC-LoRAs
}
_PRESET_MULT: dict[str, float] = {"fast": 1.0, "balanced": 1.5, "quality": 3.0}


def _frames_from_workflow(workflow: dict) -> int:
    """Read the frame count from the workflow's EmptyLTXVLatentVideo node."""
    for node in workflow.values():
        if isinstance(node, dict) and node.get("class_type") == "EmptyLTXVLatentVideo":
            try:
                return int((node.get("inputs") or {}).get("length", 121))
            except (TypeError, ValueError):
                return 121
    return 121


def _duration_for(
    executor: Any,
    workflow: dict,
    output_ids: list[str],
    mode: str,
    preset: str,
    multiplier: float = 1.0,
    progress: Any = None,
) -> int:
    """ZeroGPU duration estimator. Same signature as _execute_workflow.

    `progress` is a gr.Progress instance forwarded by the caller; we ignore it
    here (estimator doesn't emit progress) but must accept it positionally so
    ZeroGPU can call us with the same arg list it'll use for _execute_workflow.

    Estimate = (base Γ— preset multiplier + cold-cache buffer + per-frame VAE
    decode time) Γ— retry multiplier, clamped to [60s, 240s]. ZeroGPU rejects
    durations above the server's per-call max with "ZeroGPU illegal duration"
    (client.py:137); 240s is observed to work for Pro identity (~2 min runs
    needed for style + lipsync detailer paths). If the server rejects values
    in this range, the user will see a clear error and can retry.
    """
    base = _BASE_DURATION_S.get(mode, 180)
    mult = _PRESET_MULT.get(preset.lower(), 1.5)
    frames = _frames_from_workflow(workflow)
    est = int((base * mult + 60 + frames * 0.3) * multiplier)
    return max(60, min(est, 240))


# Decorate at module load time so ZeroGPU's startup analyzer detects it.
_GPU = (
    spaces.GPU(duration=_duration_for)
    if (spaces is not None and _on_spaces())
    else _identity
)


@_GPU
def _execute_workflow(
    executor: Any,
    workflow: dict,
    output_ids: list[str],
    mode: str,
    preset: str,
    multiplier: float = 1.0,
    progress: Any = None,
) -> str:
    """Run the workflow on GPU and return the path of the first video output.

    Returns just the video path (a plain string, picklable across the
    @spaces.GPU subprocess boundary). The `mode`, `preset`, and `multiplier`
    args are consumed by `_duration_for` to estimate the GPU slot to reserve.

    `progress` is an optional `gr.Progress` instance. It's the only progress
    channel that crosses the @spaces.GPU subprocess boundary on HF Spaces β€”
    Gradio + the `spaces` library wrap it with cross-process IPC. When set,
    we mirror ComfyUI's step counter into it via the global progress hook,
    chaining to whatever hook was already installed (so the local event-based
    status banner keeps working alongside).
    """
    if progress is not None:
        import comfy.utils as _cu
        _saved_hook = getattr(_cu, "PROGRESS_BAR_HOOK", None)

        def _gp_hook(value, total, _preview=None, **_kw):
            try:
                v, t = int(value), int(total)
                progress(v / max(t, 1), desc=f"Sampling step {v}/{t}")
            except Exception:
                pass
            if _saved_hook is not None:
                try:
                    _saved_hook(value, total, _preview)
                except Exception:
                    pass

        _cu.set_progress_bar_global_hook(_gp_hook)

    executor.execute(
        workflow,
        prompt_id="ltx23-aio",
        extra_data={"client_id": "ltx23-aio"},
        execute_outputs=output_ids,
    )
    hist = getattr(executor, "history_result", {}) or {}
    outs = hist.get("outputs") or {}
    for output in outs.values():
        if not isinstance(output, dict):
            continue
        for value in output.values():
            if not isinstance(value, list):
                continue
            for item in value:
                if isinstance(item, dict):
                    fn = item.get("filename") or ""
                    if fn.endswith((".mp4", ".webm", ".mov")):
                        return item.get("fullpath") or fn
    return ""


class _StubServer:
    """Minimal stub matching the surface ComfyUI's PromptExecutor expects."""

    client_id: str | None = "ltx23-aio"
    last_node_id: str | None = None

    def send_sync(self, event: str, data: dict, sid: str | None = None) -> None:
        pass

    def queue_updated(self) -> None:
        pass


class _StubPromptQueue:
    """Stub matching the surface VideoHelperSuite + others touch."""

    currently_running: dict = {}
    history: dict = {}
    flags: dict = {}

    def get_current_queue(self) -> tuple[list, list]:
        return ([], [])

    def get_tasks_remaining(self) -> int:
        return 0

    def set_flag(self, name: str, data) -> None:
        pass

    def get_flags(self, *a, **kw) -> dict:
        return {}

    def task_done(self, *a, **kw) -> None:
        pass

    def put(self, *a, **kw) -> None:
        pass

    def wipe_queue(self) -> None:
        pass

    def delete_queue_item(self, *a, **kw) -> None:
        pass


class _StubPromptServerInstance:
    """Surface that ComfyUI's `server.PromptServer.instance` exposes to custom nodes.

    VideoHelperSuite, KJNodes, and others read this at import time. They mostly
    use it to register HTTP routes or send WS events or peek at the prompt queue.
    No-ops here are fine β€” we have no real server.
    """

    client_id: str | None = "ltx23-aio"
    # KJNodes' preview thread reads `last_node_id.encode('ascii')` directly.
    # ComfyUI's real server keeps it as a string per executing node and resets
    # to None at end-of-prompt β€” which races the preview thread. Keep it a
    # safe non-empty string so .encode() never NPEs.
    last_node_id: str = "ltx23-aio"
    web_root: str = ""

    class _Routes:
        def get(self, *a, **kw):
            return lambda fn: fn

        def post(self, *a, **kw):
            return lambda fn: fn

        def static(self, *a, **kw):
            return None

    routes = _Routes()
    sockets: dict = {}
    prompt_queue = _StubPromptQueue()
    # Custom-Scripts checks PromptServer.instance.supports β€” claim the
    # "custom_nodes_from_web" capability so it skips its JS install path.
    supports: list[str] = ["custom_nodes_from_web"]
    web_root: str = ""

    def add_routes(self) -> None:
        pass

    def send_sync(self, event: str, data: dict, sid: str | None = None) -> None:
        pass

    def send_progress_text(self, text: str, node_id=None, sid=None) -> None:
        # Comfy_extras nodes call this; we just no-op since we don't have a UI
        # to surface intermediate text on.
        pass

    def queue_updated(self) -> None:
        pass

    def get_node_class_def(self, *a, **kw):
        return None

    def __getattr__(self, name):
        # Anything else our custom nodes might reach for β€” give them a no-op.
        # This is a deliberate liberal catch-all so the inference path doesn't
        # die on cosmetic UI hooks. Inspection-style access (hasattr) gets True.
        def _noop(*a, **kw):
            return None
        return _noop


def _comfy_dir() -> pathlib.Path:
    if _on_spaces():
        return pathlib.Path.home() / "comfyui"
    return pathlib.Path(__file__).parent / "comfyui"


class ComfyUILibraryBackend:
    """Wraps PromptExecutor for in-process workflow execution."""

    def __init__(self) -> None:
        self._comfy_dir = _comfy_dir()
        if not self._comfy_dir.exists():
            raise RuntimeError(
                f"ComfyUI not found at {self._comfy_dir}. "
                f"Local: run `bash setup.sh`. Spaces: see app.py:_bootstrap()."
            )
        if str(self._comfy_dir) not in sys.path:
            sys.path.insert(0, str(self._comfy_dir))

        # Defer comfy imports until the path is set up.
        # NOTE: ComfyUI ships PromptExecutor in the top-level `execution.py`
        # module, NOT under `comfy.execution`. Same for `nodes`. Both must be
        # imported AFTER the sys.path insert above.
        import asyncio
        import threading

        import comfy.cli_args  # noqa: F401 β€” side-effect: registers CLI flags
        import execution  # top-level module β€” provides PromptExecutor
        import nodes  # top-level module β€” provides init_extra_nodes (async)

        # CRITICAL ordering fix: ComfyUI's nodes.py:24 inserts `comfyui/comfy/`
        # at sys.path[0]. That dir contains a module-style `utils.py`, which
        # shadows `comfyui/utils/` (a package containing install_util.py).
        # Some custom nodes (KJNodes, VideoHelperSuite via app.frontend_management)
        # do `from utils.install_util import …` and get `comfy/utils.py` instead,
        # raising "'utils' is not a package". Rewrite sys.path so comfy_dir is
        # ahead of comfy_dir/comfy and force-clear any cached `utils` binding.
        comfy_subdir = str(self._comfy_dir / "comfy")
        sys.path = [p for p in sys.path if p not in (str(self._comfy_dir), comfy_subdir)]
        sys.path.insert(0, comfy_subdir)
        sys.path.insert(0, str(self._comfy_dir))
        if "utils" in sys.modules and not getattr(sys.modules["utils"], "__path__", None):
            del sys.modules["utils"]

        # Some custom nodes (e.g. VideoHelperSuite) read `server.PromptServer.instance`
        # at import time. We don't run a real ComfyUI server, so install a stub
        # that exposes the attributes those nodes touch (sockets, send, etc.).
        import server as comfy_server

        if getattr(comfy_server.PromptServer, "instance", None) is None:
            comfy_server.PromptServer.instance = _StubPromptServerInstance()

        # `nodes.init_extra_nodes` is async. We may be called from within a
        # running event loop (Gradio's handler) β€” running `asyncio.run()` there
        # raises. Run the coroutine in a fresh loop on a worker thread instead.
        def _init_in_thread() -> None:
            loop = asyncio.new_event_loop()
            asyncio.set_event_loop(loop)
            try:
                loop.run_until_complete(nodes.init_extra_nodes())
            finally:
                loop.close()

        thread = threading.Thread(target=_init_in_thread, daemon=False)
        thread.start()
        thread.join()
        # PromptExecutor expects a `server` with client_id, send_sync, last_node_id,
        # queue_updated. A minimal stub no-ops all of them β€” we don't run a real
        # websocket server, we surface progress via comfy.utils.PROGRESS_BAR_HOOK.
        # cache_args["ram"] is read unconditionally inside execute_async even when
        # cache_type is the default false β€” provide a sensible default so it doesn't
        # NoneType-subscript at line 727.
        self._executor = execution.PromptExecutor(
            server=_StubServer(),
            cache_args={"ram": 16.0, "lru": 0},
        )

    def __repr__(self) -> str:
        return f"ComfyUILibraryBackend(comfy_dir={self._comfy_dir!r})"

    async def submit(
        self,
        mode: str,
        workflow: dict,
        *,
        preset: str = "balanced",
        duration_multiplier: float = 1.0,
        gpu_duration: int = 0,  # legacy, ignored (now derived from preset+frames)
        progress: Any = None,
    ) -> AsyncIterator[Any]:
        """Run a workflow end-to-end. Yields Download/Progress/Output/Error events.

        `preset` and `duration_multiplier` flow through to the @spaces.GPU
        duration estimator. The handler can re-call submit() with
        duration_multiplier=2.0 if the first attempt aborts on timeout.
        """
        # Pre-flight: ensure all model files exist.
        try:
            needed = models.walk_workflow_for_models(workflow)
            for download_event in models.ensure_models(needed):
                yield download_event
        except Exception as e:
            yield ErrorEvent(
                category="download",
                message=str(e),
                traceback=tb_mod.format_exc(),
            )
            return

        # Run the inference in a worker thread; pass progress events through a queue.
        queue: asyncio.Queue = asyncio.Queue()
        loop = asyncio.get_running_loop()

        def _push(event: Any) -> None:
            asyncio.run_coroutine_threadsafe(queue.put(event), loop)

        # Track stage progression. ComfyUI fires the progress hook from inside
        # samplers, so we advance the stage every time we observe a new sampler
        # starting (step==0 with a different total than before, or a "new run"
        # signal β€” value smaller than the running max for the same total).
        progress_state = {"stage": 0, "prev_total": -1, "max_step": -1}

        def _hook(value: int, total: int, _preview=None, **_kwargs: Any) -> None:
            v, t = int(value), int(total)
            # New sampler started (different total, or step rewound)
            if t != progress_state["prev_total"] or v < progress_state["max_step"]:
                progress_state["stage"] += 1
                progress_state["prev_total"] = t
                progress_state["max_step"] = v
            else:
                progress_state["max_step"] = max(progress_state["max_step"], v)
            _push(
                ProgressEvent(
                    stage=progress_state["stage"],
                    stage_label="diffusion",
                    step=v,
                    total_steps=t,
                )
            )

        def _worker() -> None:
            import comfy.utils

            saved_hook = getattr(comfy.utils, "PROGRESS_BAR_HOOK", None)
            try:
                # Workflow is already API-format (saved from ComfyUI editor's
                # "Save (API Format)"), so it can be handed to PromptExecutor
                # directly. The execute_outputs list pinpoints which output
                # nodes to evaluate β€” we let PromptExecutor walk the whole
                # graph by passing every output-class node id.
                output_ids = [
                    nid for nid, n in workflow.items()
                    if n.get("class_type", "").startswith(("SaveVideo", "VHS_VideoCombine", "PreviewAudio", "CreateVideo"))
                ]
                print(
                    f"[backend] submitting workflow: {len(workflow)} nodes, "
                    f"output_ids={output_ids}",
                    file=sys.stderr,
                    flush=True,
                )
                # Use the public setter; it writes the same global the
                # ProgressBar class reads, but is the documented API.
                comfy.utils.set_progress_bar_global_hook(_hook)
                # _execute_workflow is module-level and decorated with a
                # @spaces.GPU(duration=callable) on Spaces β€” the callable
                # estimates per-call timeout from (mode, preset, frames) so
                # light calls get fast queue priority while heavy ones reserve
                # real headroom. Off-Spaces it's a plain call.
                video_path = _execute_workflow(
                    self._executor, workflow, output_ids, mode, preset, duration_multiplier, progress,
                )
                # Fallback: if history_result didn't surface a path (rare on
                # Spaces β€” happens when ZeroGPU's subprocess boundary drops
                # mutated state), scan the output dir for the newest mp4
                # written within the last 60 s.
                if not video_path:
                    video_path = _newest_recent_video(self._comfy_dir / "output") or ""
                print(
                    f"[backend] workflow done; video_path={video_path!r}",
                    file=sys.stderr,
                    flush=True,
                )
                _push(OutputEvent(video_path=video_path))
            except Exception as exc:
                tb_text = tb_mod.format_exc()
                print(f"[backend] worker exception:\n{tb_text}", file=sys.stderr, flush=True)
                _push(
                    ErrorEvent(
                        category=_classify(exc),
                        message=str(exc),
                        traceback=tb_text,
                    )
                )
            finally:
                comfy.utils.set_progress_bar_global_hook(saved_hook)
                _free_memory()
                _push(None)  # sentinel: stop the consumer

        # ZeroGPU's @spaces.GPU wrapper reads the user's identity from the
        # current Gradio request via gradio.context.LocalContext.request,
        # which is a contextvar. Plain threads don't inherit contextvars, so
        # without this the worker sees request=None, X-IP-Token never gets
        # read, and `client.schedule` raises "Space app has reached its GPU
        # limit" (token-is-None branch in spaces/zero/client.py:138). Copy
        # the calling task's context so the request β€” and therefore the Pro
        # user's quota attribution β€” survives the thread boundary.
        ctx = contextvars.copy_context()
        thread = threading.Thread(target=ctx.run, args=(_worker,), daemon=True)
        thread.start()

        while True:
            event = await queue.get()
            if event is None:
                return
            yield event

    def interrupt(self) -> None:
        """Cancel the currently running workflow (if any)."""
        try:
            import comfy.model_management as mm

            mm.interrupt_current_processing()
        except Exception:
            pass


def _classify(exc: Exception) -> str:
    name = type(exc).__name__.lower()
    msg = str(exc).lower()
    if "outofmemory" in name or "cuda out of memory" in msg:
        return "oom"
    if "expired zerogpu proxy token" in msg or "expired" in msg and "token" in msg:
        return "expired_token"
    if "illegal duration" in msg:
        return "illegal_duration"
    if "unlogged user" in msg:
        return "unlogged"
    if "exceeded your" in msg and "gpu" in msg:
        return "quota_exceeded"
    # ZeroGPU enforces the @spaces.GPU(duration=N) cap and re-raises as
    # gradio.exceptions.Error('GPU task aborted').
    if "gpu task aborted" in msg or ("gpu" in msg and "aborted" in msg):
        return "gpu_timeout"
    if "interrupt" in name:
        return "interrupt"
    return "execution"


def _free_memory() -> None:
    """Free VRAM after a workflow finishes (success or failure)."""
    try:
        import comfy.model_management as mm

        mm.unload_all_models()
    except Exception:
        pass
    try:
        import torch

        if torch.backends.mps.is_available():
            torch.mps.empty_cache()
    except Exception:
        pass
    try:
        import torch

        if torch.cuda.is_available():
            torch.cuda.empty_cache()
    except Exception:
        pass


def _newest_recent_video(output_root: pathlib.Path, within_seconds: float = 60.0) -> str | None:
    """Filesystem fallback: return the newest .mp4/.webm/.mov under *output_root*
    that was modified within the last *within_seconds* seconds.

    Used when the executor's history_result didn't surface a path β€” typically
    happens when ZeroGPU's subprocess boundary drops the mutation. The disk
    is shared, so the file is there even when the in-memory state isn't.
    """
    import time

    if not output_root.exists():
        return None
    cutoff = time.time() - within_seconds
    candidates: list[tuple[float, pathlib.Path]] = []
    for ext in (".mp4", ".webm", ".mov"):
        for p in output_root.rglob(f"*{ext}"):
            try:
                mtime = p.stat().st_mtime
            except OSError:
                continue
            if mtime >= cutoff:
                candidates.append((mtime, p))
    if not candidates:
        return None
    candidates.sort(reverse=True)
    return str(candidates[0][1])