Spaces:

techfreakworm
/

ACE-Music-Studio

Running on Zero

App Files Files Community

techfreakworm commited on 2 days ago

Commit

05f6b53

unverified ·

1 Parent(s): a5459fd

fix(deploy): per-mode duration estimator handles cover/extend/edit/lyrics signatures

Browse files

Files changed (2) hide show

app.py +58 -10
tests/test_gpu_estimator.py +82 -0

app.py CHANGED Viewed

@@ -159,21 +159,69 @@ def _estimate_gpu_duration(mode: str, params: dict, multiplier: float = 1.0) ->
     return max(_GPU_CLAMP_MIN, min(_GPU_CLAMP_MAX, int(estimated)))
-def _gpu_call_to_estimator(mode: str, *, duration_arg_index: int = 2):
     """Bridge spaces.GPU's per-call (*args, **kwargs) → our (mode, params, multiplier) estimator.
-    spaces.GPU(duration=callable) invokes the callable with the handler's actual
-    runtime args. The handlers here have signature roughly:
-        on_<mode>_click(prompt_or_seed, lyrics_or_other, duration_s, ...)
-    so duration_s is at position 2 by default. The kwargs path also works.
     """
     def from_call(*args, **kwargs):
-        duration_s = kwargs.get("duration_s")
-        if duration_s is None and len(args) > duration_arg_index:
-            candidate = args[duration_arg_index]
-            if isinstance(candidate, (int, float)):
-                duration_s = candidate
         return _estimate_gpu_duration(mode, {"duration_s": duration_s})
     return from_call

     return max(_GPU_CLAMP_MIN, min(_GPU_CLAMP_MAX, int(estimated)))
+# Per-mode hints for where the duration is in the handler's call args.
+# Each entry: (positional_index, kwarg_name).
+# For "edit" mode, the duration is computed as (segment_end_s - segment_start_s).
+# For "lyrics", there's no audio duration; we just default.
+_GPU_DURATION_HINTS: dict[str, tuple[int, str] | str | None] = {
+    "generate": (2, "duration_s"),
+    "cover": (3, "duration_s"),
+    "extend": (3, "extra_duration_s"),
+    "edit": "segment_window",  # special: end - start
+    "lyrics": None,  # no audio length
+}
+def _extract_duration_s(mode: str, args: tuple, kwargs: dict) -> float | None:
+    """Pull the requested audio duration out of a handler's call args, mode-aware.
+    Returns None when the mode has no audio duration concept (lyrics) or when
+    the value can't be found. Caller falls back to a per-mode default.
+    """
+    hint = _GPU_DURATION_HINTS.get(mode)
+    if hint is None:
+        return None
+    if hint == "segment_window":
+        # edit: (source_audio, sub_mode, source_lyrics, target_lyrics, segment_start_s, segment_end_s, ...)
+        start = kwargs.get("segment_start_s")
+        end = kwargs.get("segment_end_s")
+        if start is None and len(args) > 4:
+            start = args[4] if isinstance(args[4], (int, float)) else None
+        if end is None and len(args) > 5:
+            end = args[5] if isinstance(args[5], (int, float)) else None
+        if start is not None and end is not None:
+            window = float(end) - float(start)
+            return window if window > 0 else None
+        return None
+    pos_idx, kw_name = hint
+    if kw_name in kwargs and isinstance(kwargs[kw_name], (int, float)):
+        return float(kwargs[kw_name])
+    if len(args) > pos_idx and isinstance(args[pos_idx], (int, float)):
+        return float(args[pos_idx])
+    return None
+def _gpu_call_to_estimator(mode: str):
     """Bridge spaces.GPU's per-call (*args, **kwargs) → our (mode, params, multiplier) estimator.
+    Per-mode duration extraction handles the different signatures of the five
+    handlers. Falls back to a per-mode default when extraction fails so the
+    estimator still produces a reasonable timeout.
     """
     def from_call(*args, **kwargs):
+        duration_s = _extract_duration_s(mode, args, kwargs)
+        if duration_s is None:
+            # Per-mode default when no duration found in call args.
+            duration_s = {
+                "generate": 30.0,
+                "cover": 30.0,
+                "extend": 20.0,
+                "edit": 8.0,  # typical edit segment window
+                "lyrics": 0.0,  # no audio; base alone
+            }.get(mode, 30.0)
         return _estimate_gpu_duration(mode, {"duration_s": duration_s})
     return from_call

tests/test_gpu_estimator.py ADDED Viewed

	@@ -0,0 +1,82 @@

+"""Unit tests for the per-mode GPU duration extraction."""
+from __future__ import annotations
+def test_extract_generate_positional():
+    from app import _extract_duration_s
+    args = ("a prompt", "lyrics body", 45.0, "vocal_male", None)
+    kwargs = {}
+    assert _extract_duration_s("generate", args, kwargs) == 45.0
+def test_extract_cover_at_index_3():
+    from app import _extract_duration_s
+    args = ("ref.wav", "new style", "new lyrics", 60.0)
+    kwargs = {}
+    assert _extract_duration_s("cover", args, kwargs) == 60.0
+def test_extract_extend_uses_extra_duration_s_kwarg():
+    from app import _extract_duration_s
+    args = ("seed.wav", "more of the same", "extension lyrics", 25.0)
+    kwargs = {}
+    assert _extract_duration_s("extend", args, kwargs) == 25.0
+def test_extract_extend_kwarg_form():
+    from app import _extract_duration_s
+    assert _extract_duration_s("extend", (), {"extra_duration_s": 18.5}) == 18.5
+def test_extract_edit_segment_window():
+    from app import _extract_duration_s
+    args = ("src.wav", "repaint", "src lyrics", "new lyrics", 10.0, 22.5)
+    kwargs = {}
+    assert _extract_duration_s("edit", args, kwargs) == 12.5
+def test_extract_edit_kwarg_window():
+    from app import _extract_duration_s
+    kwargs = {"segment_start_s": 5.0, "segment_end_s": 20.0}
+    assert _extract_duration_s("edit", (), kwargs) == 15.0
+def test_extract_lyrics_returns_none():
+    from app import _extract_duration_s
+    assert _extract_duration_s("lyrics", ("brief", "ABAB"), {}) is None
+def test_extract_generate_falls_back_when_missing():
+    from app import _extract_duration_s
+    # No positional duration, no kwarg → None
+    assert _extract_duration_s("generate", ("p", "l"), {}) is None
+def test_estimator_clamp_floor():
+    from app import _estimate_gpu_duration
+    # lyrics base=15 + 1.0*2 = 17 → clamped up to 60s floor.
+    assert _estimate_gpu_duration("lyrics", {"duration_s": 1.0}) == 60
+def test_estimator_clamp_ceiling():
+    from app import _estimate_gpu_duration
+    # 240s requested * 2 = 480 + base 30 = 510 → clamped to 300
+    assert _estimate_gpu_duration("generate", {"duration_s": 240}) == 300
+def test_estimator_mode_specific_base():
+    from app import _estimate_gpu_duration
+    # 30s requested * 2 = 60 + base 40 (cover) = 100s
+    assert _estimate_gpu_duration("cover", {"duration_s": 30}) == 100