Spaces:

owenisas
/

stable-audio-3-lab

Running on Zero

App Files Files Community

owenisas commited on 2 days ago

Commit

cefbe11

verified ·

1 Parent(s): c1ec670

Make Space compatible with ZeroGPU

Browse files

Files changed (3) hide show

README.md +5 -4
app.py +5 -11
requirements.txt +2 -8

README.md CHANGED Viewed

@@ -6,7 +6,7 @@ sdk: gradio
 sdk_version: 6.3.0
 app_file: app.py
 python_version: "3.10"
-suggested_hardware: a10g-small
 pinned: false
 license: mit
 hf_oauth: true
@@ -36,8 +36,9 @@ Base checkpoints are not gated, but they are intended mainly for fine-tuning and
 ## Hardware
 - Small models can run on CPU, but GPU is still preferred.
-- Medium and Medium Base expect CUDA plus `flash-attn`.
-- `SAME-L` is treated as GPU-first; `SAME-S` can be used for CPU autoencoder round trips.
-The Space is configured with `suggested_hardware: a10g-small`. Upgrade hardware if medium generations fail due to memory or Flash Attention support.

 sdk_version: 6.3.0
 app_file: app.py
 python_version: "3.10"
+suggested_hardware: zero-a10g
 pinned: false
 license: mit
 hf_oauth: true
 ## Hardware
+- ZeroGPU is enabled through the `spaces.GPU` decorator on generation and autoencoder actions.
 - Small models can run on CPU, but GPU is still preferred.
+- Medium and Medium Base are GPU-first.
+- `SAME-L` is GPU-first; `SAME-S` can be used for CPU autoencoder round trips.
+The Space is configured with `suggested_hardware: zero-a10g`.

app.py CHANGED Viewed

@@ -100,7 +100,7 @@ GENERATION_MODELS: dict[str, GenerationModel] = {
         default_sampler="pingpong",
         requires_cuda=True,
         gated=True,
-        note="High-quality checkpoint; CUDA and flash-attn are expected.",
     ),
     "small-music-base": GenerationModel(
         label="Stable Audio 3 Small Music Base",
@@ -140,7 +140,7 @@ GENERATION_MODELS: dict[str, GenerationModel] = {
         default_cfg=7.0,
         default_sampler="euler",
         requires_cuda=True,
-        note="Base checkpoint intended mainly for fine-tuning; CUDA and flash-attn are expected.",
     ),
 }
@@ -160,10 +160,10 @@ AUTOENCODER_MODELS = {
 COLLECTION_ROWS = [
     ["stable-audio-3-small-music", "Text-to-audio", "Generate tab", "Gated post-trained small music"],
     ["stable-audio-3-small-sfx", "Text-to-audio", "Generate tab", "Gated post-trained small SFX"],
-    ["stable-audio-3-medium", "Text-to-audio", "Generate tab", "Gated medium; needs CUDA + flash-attn"],
     ["stable-audio-3-small-music-base", "Text-to-audio", "Generate tab", "Base checkpoint"],
     ["stable-audio-3-small-sfx-base", "Text-to-audio", "Generate tab", "Base checkpoint"],
-    ["stable-audio-3-medium-base", "Text-to-audio", "Generate tab", "Base checkpoint; needs CUDA + flash-attn"],
     ["stable-audio-3-optimized", "Optimized assets", "Listed only", "MLX/TensorRT artifacts, not generic PyTorch generation"],
     ["SAME-S", "Autoencoder", "Autoencoder tab", "CPU-capable round trip"],
     ["SAME-L", "Autoencoder", "Autoencoder tab", "Large autoencoder; CUDA recommended"],
@@ -175,7 +175,7 @@ MODEL_LOAD_LOCK = threading.RLock()
 def gpu_task(duration: int):
-    if os.getenv("SA3_USE_SPACES_GPU", "").strip().lower() not in {"1", "true", "yes"}:
         return lambda fn: fn
     try:
         import spaces
@@ -288,12 +288,6 @@ def generation_preflight_error(
         has_access, error = user_can_download_gated_model(model.repo_id, token)
         if not has_access:
             return error or "Your Hugging Face account cannot access this gated model.", device
-    if model.requires_cuda and device == "cuda" and not flash_attn_available():
-        return (
-            f"{model.label} expects flash-attn on CUDA. Rebuild the Space with the "
-            "flash-attn wheel in requirements.txt or use a small model.",
-            device,
-        )
     return None, device

         default_sampler="pingpong",
         requires_cuda=True,
         gated=True,
+        note="High-quality checkpoint; GPU-first.",
     ),
     "small-music-base": GenerationModel(
         label="Stable Audio 3 Small Music Base",
         default_cfg=7.0,
         default_sampler="euler",
         requires_cuda=True,
+        note="Base checkpoint intended mainly for fine-tuning; GPU-first.",
     ),
 }
 COLLECTION_ROWS = [
     ["stable-audio-3-small-music", "Text-to-audio", "Generate tab", "Gated post-trained small music"],
     ["stable-audio-3-small-sfx", "Text-to-audio", "Generate tab", "Gated post-trained small SFX"],
+    ["stable-audio-3-medium", "Text-to-audio", "Generate tab", "Gated medium; GPU-first"],
     ["stable-audio-3-small-music-base", "Text-to-audio", "Generate tab", "Base checkpoint"],
     ["stable-audio-3-small-sfx-base", "Text-to-audio", "Generate tab", "Base checkpoint"],
+    ["stable-audio-3-medium-base", "Text-to-audio", "Generate tab", "Base checkpoint; GPU-first"],
     ["stable-audio-3-optimized", "Optimized assets", "Listed only", "MLX/TensorRT artifacts, not generic PyTorch generation"],
     ["SAME-S", "Autoencoder", "Autoencoder tab", "CPU-capable round trip"],
     ["SAME-L", "Autoencoder", "Autoencoder tab", "Large autoencoder; CUDA recommended"],
 def gpu_task(duration: int):
+    if os.getenv("SA3_USE_SPACES_GPU", "1").strip().lower() in {"0", "false", "no"}:
         return lambda fn: fn
     try:
         import spaces
         has_access, error = user_can_download_gated_model(model.repo_id, token)
         if not has_access:
             return error or "Your Hugging Face account cannot access this gated model.", device
     return None, device

requirements.txt CHANGED Viewed

@@ -1,13 +1,7 @@
---extra-index-url https://download.pytorch.org/whl/cu126
-torch==2.7.1
-torchaudio==2.7.1
 gradio==6.3.0
 spaces
 hf_transfer
 soundfile
 git+https://github.com/Stability-AI/stable-audio-3.git@main
-# Required for Stable Audio 3 Medium on CUDA. This is the wheel recommended by
-# Stability AI's README for torch 2.7 / CUDA 12.6 / Python 3.10.
-https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.7.16/flash_attn-2.6.3+cu126torch2.7-cp310-cp310-linux_x86_64.whl

+torch==2.8.0
+torchaudio==2.8.0
 gradio==6.3.0
 spaces
 hf_transfer
 soundfile
 git+https://github.com/Stability-AI/stable-audio-3.git@main