Spaces:

techfreakworm
/

ACE-Music-Studio

Running on Zero

App Files Files Community

techfreakworm commited on 2 days ago

Commit

3f9c655

unverified ·

1 Parent(s): b992e76

feat(deploy): preload all hub models + warm demucs on spaces

Browse files

Expand HF Spaces ``preload_from_hub`` to cover everything the app
actually touches on first interaction: ACE-Step base + XL-SFT, both
LoRA presets, and Qwen 2.5 7B Instruct. Mirror the list in
``_PRELOAD_REPOS`` so the symlink loop knows about them. Add a
demucs ``htdemucs_ft`` warmup at module load (gated by ``SPACE_ID``)
because demucs weights live on dl.fbaipublicfiles.com — outside the
HF preload reach.

Preload disk budget: ~10 GB (ACE base) + ~16 GB (XL-SFT) + ~120 MB
+ ~120 MB (LoRAs) + ~15 GB (Qwen) + ~320 MB (demucs runtime) ≈ 42 GB.
Within the ZeroGPU persistent-storage ceiling but worth keeping an
eye on if upstream weights grow.

Files changed (3) hide show

README.md +5 -2
app.py +34 -0
tests/test_preload_repos.py +19 -0

README.md CHANGED Viewed

@@ -10,8 +10,11 @@ pinned: false
 license: mit
 short_description: Open-source song generation studio on ACE-Step 1.5 XL SFT — Generate, Cover, Extend, Edit, draft Lyrics.
 preload_from_hub:
-- ACE-Step/Ace-Step1.5 vae/diffusion_pytorch_model.safetensors,vae/config.json,encoder/pytorch_model.bin,encoder/config.json,encoder/tokenizer.json
-- ACE-Step/acestep-v15-xl-sft model.safetensors
 ---
 # ACE Music Studio

 license: mit
 short_description: Open-source song generation studio on ACE-Step 1.5 XL SFT — Generate, Cover, Extend, Edit, draft Lyrics.
 preload_from_hub:
+- ACE-Step/Ace-Step1.5
+- ACE-Step/acestep-v15-xl-sft
+- ACE-Step/ACE-Step-v1-chinese-rap-LoRA
+- ACE-Step/ACE-Step-v1.5-chinese-new-year-LoRA
+- Qwen/Qwen2.5-7B-Instruct
 ---
 # ACE Music Studio

app.py CHANGED Viewed

@@ -72,9 +72,22 @@ def get_backend() -> be.ACEStepStudioBackend:
     return _BACKEND
 _PRELOAD_REPOS = (
     "ACE-Step/Ace-Step1.5",
     "ACE-Step/acestep-v15-xl-sft",
 )
@@ -131,6 +144,26 @@ def _bootstrap_spaces_cache() -> None:
     _symlink_snapshots_into_models()
 _GPU_BASE_BY_MODE = {
     "generate": 30,
     "cover": 40,
@@ -250,6 +283,7 @@ def _maybe_spaces_gpu(mode: str):
 # Run cache bootstrap at module import so HF Spaces' startup analyzer sees
 # the symlinks before the lazy backend singleton is constructed on first click.
 _bootstrap_spaces_cache()
 def _safe_call(fn, *args, **kwargs):

     return _BACKEND
+# Repos that are pre-downloaded by HF Spaces' ``preload_from_hub`` (see
+# README frontmatter). The two ACE-Step repos *must* be symlinked into
+# ``./models/<org>/<repo>/`` so the fork's checkpoint resolver finds them
+# without an extra network round-trip. The LoRA repos and Qwen don't
+# strictly need the symlink — ``lora_stack.download_preset`` and the
+# ``transformers`` Auto* loaders resolve them via the HF cache directly
+# from ``hf_hub_download(repo_id, filename)`` / ``from_pretrained(repo_id)``.
+# Including them here is a belt-and-braces measure: the snapshot_download
+# call in ``_symlink_snapshots_into_models`` short-circuits when files are
+# already cached, so the only cost is one symlink each.
 _PRELOAD_REPOS = (
     "ACE-Step/Ace-Step1.5",
     "ACE-Step/acestep-v15-xl-sft",
+    "ACE-Step/ACE-Step-v1-chinese-rap-LoRA",
+    "ACE-Step/ACE-Step-v1.5-chinese-new-year-LoRA",
+    "Qwen/Qwen2.5-7B-Instruct",
 )
     _symlink_snapshots_into_models()
+def _warm_demucs_on_spaces() -> None:
+    """Pre-download Demucs htdemucs_ft so first stem request is fast.
+    Demucs hosts its weights on dl.fbaipublicfiles.com, not HF Hub, so
+    preload_from_hub can't fetch them. We trigger the download at module load
+    on Spaces (gated by SPACE_ID) so user-facing latency is minimal.
+    Off-Spaces this is a no-op — local dev downloads on first user click.
+    """
+    if not os.getenv("SPACE_ID"):
+        return
+    try:
+        from demucs.pretrained import get_model
+        # Calling get_model triggers the download + cache. Discard the result.
+        get_model("htdemucs_ft")
+    except Exception as e:
+        # Warmup is best-effort. Surface in the log but don't crash startup.
+        print(f"[warmup] demucs htdemucs_ft preload skipped: {e}", flush=True)
 _GPU_BASE_BY_MODE = {
     "generate": 30,
     "cover": 40,
 # Run cache bootstrap at module import so HF Spaces' startup analyzer sees
 # the symlinks before the lazy backend singleton is constructed on first click.
 _bootstrap_spaces_cache()
+_warm_demucs_on_spaces()
 def _safe_call(fn, *args, **kwargs):

tests/test_preload_repos.py ADDED Viewed

	@@ -0,0 +1,19 @@

+"""Sanity check on the preload list app.py advertises to HF Spaces.
+If this drifts the README's ``preload_from_hub`` frontmatter and the
+symlink loop in ``_symlink_snapshots_into_models()`` will fall out of
+sync — first-user latency on Spaces regresses without anyone noticing
+until prod. Cheap to assert here.
+"""
+from __future__ import annotations
+def test_preload_repos_shape():
+    from app import _PRELOAD_REPOS
+    assert isinstance(_PRELOAD_REPOS, tuple)
+    assert len(_PRELOAD_REPOS) == 5
+    for repo_id in _PRELOAD_REPOS:
+        assert isinstance(repo_id, str)
+        assert repo_id.startswith(("ACE-Step/", "Qwen/")), repo_id