owenisas commited on
Commit
cefbe11
·
verified ·
1 Parent(s): c1ec670

Make Space compatible with ZeroGPU

Browse files
Files changed (3) hide show
  1. README.md +5 -4
  2. app.py +5 -11
  3. requirements.txt +2 -8
README.md CHANGED
@@ -6,7 +6,7 @@ sdk: gradio
6
  sdk_version: 6.3.0
7
  app_file: app.py
8
  python_version: "3.10"
9
- suggested_hardware: a10g-small
10
  pinned: false
11
  license: mit
12
  hf_oauth: true
@@ -36,8 +36,9 @@ Base checkpoints are not gated, but they are intended mainly for fine-tuning and
36
 
37
  ## Hardware
38
 
 
39
  - Small models can run on CPU, but GPU is still preferred.
40
- - Medium and Medium Base expect CUDA plus `flash-attn`.
41
- - `SAME-L` is treated as GPU-first; `SAME-S` can be used for CPU autoencoder round trips.
42
 
43
- The Space is configured with `suggested_hardware: a10g-small`. Upgrade hardware if medium generations fail due to memory or Flash Attention support.
 
6
  sdk_version: 6.3.0
7
  app_file: app.py
8
  python_version: "3.10"
9
+ suggested_hardware: zero-a10g
10
  pinned: false
11
  license: mit
12
  hf_oauth: true
 
36
 
37
  ## Hardware
38
 
39
+ - ZeroGPU is enabled through the `spaces.GPU` decorator on generation and autoencoder actions.
40
  - Small models can run on CPU, but GPU is still preferred.
41
+ - Medium and Medium Base are GPU-first.
42
+ - `SAME-L` is GPU-first; `SAME-S` can be used for CPU autoencoder round trips.
43
 
44
+ The Space is configured with `suggested_hardware: zero-a10g`.
app.py CHANGED
@@ -100,7 +100,7 @@ GENERATION_MODELS: dict[str, GenerationModel] = {
100
  default_sampler="pingpong",
101
  requires_cuda=True,
102
  gated=True,
103
- note="High-quality checkpoint; CUDA and flash-attn are expected.",
104
  ),
105
  "small-music-base": GenerationModel(
106
  label="Stable Audio 3 Small Music Base",
@@ -140,7 +140,7 @@ GENERATION_MODELS: dict[str, GenerationModel] = {
140
  default_cfg=7.0,
141
  default_sampler="euler",
142
  requires_cuda=True,
143
- note="Base checkpoint intended mainly for fine-tuning; CUDA and flash-attn are expected.",
144
  ),
145
  }
146
 
@@ -160,10 +160,10 @@ AUTOENCODER_MODELS = {
160
  COLLECTION_ROWS = [
161
  ["stable-audio-3-small-music", "Text-to-audio", "Generate tab", "Gated post-trained small music"],
162
  ["stable-audio-3-small-sfx", "Text-to-audio", "Generate tab", "Gated post-trained small SFX"],
163
- ["stable-audio-3-medium", "Text-to-audio", "Generate tab", "Gated medium; needs CUDA + flash-attn"],
164
  ["stable-audio-3-small-music-base", "Text-to-audio", "Generate tab", "Base checkpoint"],
165
  ["stable-audio-3-small-sfx-base", "Text-to-audio", "Generate tab", "Base checkpoint"],
166
- ["stable-audio-3-medium-base", "Text-to-audio", "Generate tab", "Base checkpoint; needs CUDA + flash-attn"],
167
  ["stable-audio-3-optimized", "Optimized assets", "Listed only", "MLX/TensorRT artifacts, not generic PyTorch generation"],
168
  ["SAME-S", "Autoencoder", "Autoencoder tab", "CPU-capable round trip"],
169
  ["SAME-L", "Autoencoder", "Autoencoder tab", "Large autoencoder; CUDA recommended"],
@@ -175,7 +175,7 @@ MODEL_LOAD_LOCK = threading.RLock()
175
 
176
 
177
  def gpu_task(duration: int):
178
- if os.getenv("SA3_USE_SPACES_GPU", "").strip().lower() not in {"1", "true", "yes"}:
179
  return lambda fn: fn
180
  try:
181
  import spaces
@@ -288,12 +288,6 @@ def generation_preflight_error(
288
  has_access, error = user_can_download_gated_model(model.repo_id, token)
289
  if not has_access:
290
  return error or "Your Hugging Face account cannot access this gated model.", device
291
- if model.requires_cuda and device == "cuda" and not flash_attn_available():
292
- return (
293
- f"{model.label} expects flash-attn on CUDA. Rebuild the Space with the "
294
- "flash-attn wheel in requirements.txt or use a small model.",
295
- device,
296
- )
297
  return None, device
298
 
299
 
 
100
  default_sampler="pingpong",
101
  requires_cuda=True,
102
  gated=True,
103
+ note="High-quality checkpoint; GPU-first.",
104
  ),
105
  "small-music-base": GenerationModel(
106
  label="Stable Audio 3 Small Music Base",
 
140
  default_cfg=7.0,
141
  default_sampler="euler",
142
  requires_cuda=True,
143
+ note="Base checkpoint intended mainly for fine-tuning; GPU-first.",
144
  ),
145
  }
146
 
 
160
  COLLECTION_ROWS = [
161
  ["stable-audio-3-small-music", "Text-to-audio", "Generate tab", "Gated post-trained small music"],
162
  ["stable-audio-3-small-sfx", "Text-to-audio", "Generate tab", "Gated post-trained small SFX"],
163
+ ["stable-audio-3-medium", "Text-to-audio", "Generate tab", "Gated medium; GPU-first"],
164
  ["stable-audio-3-small-music-base", "Text-to-audio", "Generate tab", "Base checkpoint"],
165
  ["stable-audio-3-small-sfx-base", "Text-to-audio", "Generate tab", "Base checkpoint"],
166
+ ["stable-audio-3-medium-base", "Text-to-audio", "Generate tab", "Base checkpoint; GPU-first"],
167
  ["stable-audio-3-optimized", "Optimized assets", "Listed only", "MLX/TensorRT artifacts, not generic PyTorch generation"],
168
  ["SAME-S", "Autoencoder", "Autoencoder tab", "CPU-capable round trip"],
169
  ["SAME-L", "Autoencoder", "Autoencoder tab", "Large autoencoder; CUDA recommended"],
 
175
 
176
 
177
  def gpu_task(duration: int):
178
+ if os.getenv("SA3_USE_SPACES_GPU", "1").strip().lower() in {"0", "false", "no"}:
179
  return lambda fn: fn
180
  try:
181
  import spaces
 
288
  has_access, error = user_can_download_gated_model(model.repo_id, token)
289
  if not has_access:
290
  return error or "Your Hugging Face account cannot access this gated model.", device
 
 
 
 
 
 
291
  return None, device
292
 
293
 
requirements.txt CHANGED
@@ -1,13 +1,7 @@
1
- --extra-index-url https://download.pytorch.org/whl/cu126
2
-
3
- torch==2.7.1
4
- torchaudio==2.7.1
5
  gradio==6.3.0
6
  spaces
7
  hf_transfer
8
  soundfile
9
  git+https://github.com/Stability-AI/stable-audio-3.git@main
10
-
11
- # Required for Stable Audio 3 Medium on CUDA. This is the wheel recommended by
12
- # Stability AI's README for torch 2.7 / CUDA 12.6 / Python 3.10.
13
- https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.7.16/flash_attn-2.6.3+cu126torch2.7-cp310-cp310-linux_x86_64.whl
 
1
+ torch==2.8.0
2
+ torchaudio==2.8.0
 
 
3
  gradio==6.3.0
4
  spaces
5
  hf_transfer
6
  soundfile
7
  git+https://github.com/Stability-AI/stable-audio-3.git@main