techfreakworm commited on
Commit
3f9c655
·
unverified ·
1 Parent(s): b992e76

feat(deploy): preload all hub models + warm demucs on spaces

Browse files

Expand HF Spaces ``preload_from_hub`` to cover everything the app
actually touches on first interaction: ACE-Step base + XL-SFT, both
LoRA presets, and Qwen 2.5 7B Instruct. Mirror the list in
``_PRELOAD_REPOS`` so the symlink loop knows about them. Add a
demucs ``htdemucs_ft`` warmup at module load (gated by ``SPACE_ID``)
because demucs weights live on dl.fbaipublicfiles.com — outside the
HF preload reach.

Preload disk budget: ~10 GB (ACE base) + ~16 GB (XL-SFT) + ~120 MB
+ ~120 MB (LoRAs) + ~15 GB (Qwen) + ~320 MB (demucs runtime) ≈ 42 GB.
Within the ZeroGPU persistent-storage ceiling but worth keeping an
eye on if upstream weights grow.

Files changed (3) hide show
  1. README.md +5 -2
  2. app.py +34 -0
  3. tests/test_preload_repos.py +19 -0
README.md CHANGED
@@ -10,8 +10,11 @@ pinned: false
10
  license: mit
11
  short_description: Open-source song generation studio on ACE-Step 1.5 XL SFT — Generate, Cover, Extend, Edit, draft Lyrics.
12
  preload_from_hub:
13
- - ACE-Step/Ace-Step1.5 vae/diffusion_pytorch_model.safetensors,vae/config.json,encoder/pytorch_model.bin,encoder/config.json,encoder/tokenizer.json
14
- - ACE-Step/acestep-v15-xl-sft model.safetensors
 
 
 
15
  ---
16
 
17
  # ACE Music Studio
 
10
  license: mit
11
  short_description: Open-source song generation studio on ACE-Step 1.5 XL SFT — Generate, Cover, Extend, Edit, draft Lyrics.
12
  preload_from_hub:
13
+ - ACE-Step/Ace-Step1.5
14
+ - ACE-Step/acestep-v15-xl-sft
15
+ - ACE-Step/ACE-Step-v1-chinese-rap-LoRA
16
+ - ACE-Step/ACE-Step-v1.5-chinese-new-year-LoRA
17
+ - Qwen/Qwen2.5-7B-Instruct
18
  ---
19
 
20
  # ACE Music Studio
app.py CHANGED
@@ -72,9 +72,22 @@ def get_backend() -> be.ACEStepStudioBackend:
72
  return _BACKEND
73
 
74
 
 
 
 
 
 
 
 
 
 
 
75
  _PRELOAD_REPOS = (
76
  "ACE-Step/Ace-Step1.5",
77
  "ACE-Step/acestep-v15-xl-sft",
 
 
 
78
  )
79
 
80
 
@@ -131,6 +144,26 @@ def _bootstrap_spaces_cache() -> None:
131
  _symlink_snapshots_into_models()
132
 
133
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  _GPU_BASE_BY_MODE = {
135
  "generate": 30,
136
  "cover": 40,
@@ -250,6 +283,7 @@ def _maybe_spaces_gpu(mode: str):
250
  # Run cache bootstrap at module import so HF Spaces' startup analyzer sees
251
  # the symlinks before the lazy backend singleton is constructed on first click.
252
  _bootstrap_spaces_cache()
 
253
 
254
 
255
  def _safe_call(fn, *args, **kwargs):
 
72
  return _BACKEND
73
 
74
 
75
+ # Repos that are pre-downloaded by HF Spaces' ``preload_from_hub`` (see
76
+ # README frontmatter). The two ACE-Step repos *must* be symlinked into
77
+ # ``./models/<org>/<repo>/`` so the fork's checkpoint resolver finds them
78
+ # without an extra network round-trip. The LoRA repos and Qwen don't
79
+ # strictly need the symlink — ``lora_stack.download_preset`` and the
80
+ # ``transformers`` Auto* loaders resolve them via the HF cache directly
81
+ # from ``hf_hub_download(repo_id, filename)`` / ``from_pretrained(repo_id)``.
82
+ # Including them here is a belt-and-braces measure: the snapshot_download
83
+ # call in ``_symlink_snapshots_into_models`` short-circuits when files are
84
+ # already cached, so the only cost is one symlink each.
85
  _PRELOAD_REPOS = (
86
  "ACE-Step/Ace-Step1.5",
87
  "ACE-Step/acestep-v15-xl-sft",
88
+ "ACE-Step/ACE-Step-v1-chinese-rap-LoRA",
89
+ "ACE-Step/ACE-Step-v1.5-chinese-new-year-LoRA",
90
+ "Qwen/Qwen2.5-7B-Instruct",
91
  )
92
 
93
 
 
144
  _symlink_snapshots_into_models()
145
 
146
 
147
+ def _warm_demucs_on_spaces() -> None:
148
+ """Pre-download Demucs htdemucs_ft so first stem request is fast.
149
+
150
+ Demucs hosts its weights on dl.fbaipublicfiles.com, not HF Hub, so
151
+ preload_from_hub can't fetch them. We trigger the download at module load
152
+ on Spaces (gated by SPACE_ID) so user-facing latency is minimal.
153
+ Off-Spaces this is a no-op — local dev downloads on first user click.
154
+ """
155
+ if not os.getenv("SPACE_ID"):
156
+ return
157
+ try:
158
+ from demucs.pretrained import get_model
159
+
160
+ # Calling get_model triggers the download + cache. Discard the result.
161
+ get_model("htdemucs_ft")
162
+ except Exception as e:
163
+ # Warmup is best-effort. Surface in the log but don't crash startup.
164
+ print(f"[warmup] demucs htdemucs_ft preload skipped: {e}", flush=True)
165
+
166
+
167
  _GPU_BASE_BY_MODE = {
168
  "generate": 30,
169
  "cover": 40,
 
283
  # Run cache bootstrap at module import so HF Spaces' startup analyzer sees
284
  # the symlinks before the lazy backend singleton is constructed on first click.
285
  _bootstrap_spaces_cache()
286
+ _warm_demucs_on_spaces()
287
 
288
 
289
  def _safe_call(fn, *args, **kwargs):
tests/test_preload_repos.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Sanity check on the preload list app.py advertises to HF Spaces.
2
+
3
+ If this drifts the README's ``preload_from_hub`` frontmatter and the
4
+ symlink loop in ``_symlink_snapshots_into_models()`` will fall out of
5
+ sync — first-user latency on Spaces regresses without anyone noticing
6
+ until prod. Cheap to assert here.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+
12
+ def test_preload_repos_shape():
13
+ from app import _PRELOAD_REPOS
14
+
15
+ assert isinstance(_PRELOAD_REPOS, tuple)
16
+ assert len(_PRELOAD_REPOS) == 5
17
+ for repo_id in _PRELOAD_REPOS:
18
+ assert isinstance(repo_id, str)
19
+ assert repo_id.startswith(("ACE-Step/", "Qwen/")), repo_id