Spaces:
Running on Zero
Running on Zero
Require HF login and user gated-model access
Browse files- .playwright-cli/console-2026-05-20T20-23-31-945Z.log +3 -0
- .playwright-cli/page-2026-05-20T20-23-33-278Z.yml +4 -0
- .playwright-cli/page-2026-05-20T20-23-51-604Z.yml +224 -0
- .playwright-cli/page-2026-05-20T20-24-02-635Z.yml +224 -0
- .playwright-cli/page-2026-05-20T20-24-12-107Z.png +0 -0
- README.md +7 -3
- app.py +159 -41
.playwright-cli/console-2026-05-20T20-23-31-945Z.log
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[ 1437ms] [ERROR] Failed to load resource: the server responded with a status of 400 () @ https://huggingface.co/api/spaces/by-subdomain/owenisas-stable-audio-3-lab:0
|
| 2 |
+
[ 1700ms] [WARNING] Failed to execute 'postMessage' on 'DOMWindow': The target origin provided ('https://huggingface.co') does not match the recipient window's origin ('https://owenisas-stable-audio-3-lab.hf.space'). @ https://owenisas-stable-audio-3-lab.hf.space/assets/Index-Be9xvQ3a.js:1
|
| 3 |
+
[ 1740ms] [ERROR] Failed to load resource: the server responded with a status of 404 () @ https://huggingface.co/api/organizations/owenisas/avatar:0
|
.playwright-cli/page-2026-05-20T20-23-33-278Z.yml
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
- main [ref=e4]:
|
| 2 |
+
- generic [ref=e5]:
|
| 3 |
+
- img [ref=e9]
|
| 4 |
+
- paragraph [ref=e20]: Loading...
|
.playwright-cli/page-2026-05-20T20-23-51-604Z.yml
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
- generic [ref=e1]:
|
| 2 |
+
- main [ref=e4]:
|
| 3 |
+
- generic [ref=e21]:
|
| 4 |
+
- generic [ref=e23]:
|
| 5 |
+
- heading "Stable Audio 3 Lab" [level=1] [ref=e28]
|
| 6 |
+
- generic [ref=e29]:
|
| 7 |
+
- generic [ref=e30]:
|
| 8 |
+
- generic [ref=e31]:
|
| 9 |
+
- button [ref=e32] [cursor=pointer]: Generate
|
| 10 |
+
- button [ref=e33] [cursor=pointer]: Autoencoder
|
| 11 |
+
- button [ref=e34] [cursor=pointer]: Coverage
|
| 12 |
+
- tablist [ref=e35]:
|
| 13 |
+
- tab "Generate" [selected] [ref=e36] [cursor=pointer]
|
| 14 |
+
- tab "Autoencoder" [ref=e37] [cursor=pointer]
|
| 15 |
+
- tab "Coverage" [ref=e38] [cursor=pointer]
|
| 16 |
+
- tabpanel [ref=e39]:
|
| 17 |
+
- generic [ref=e41]:
|
| 18 |
+
- generic [ref=e42]:
|
| 19 |
+
- generic [ref=e43]:
|
| 20 |
+
- generic [ref=e45]:
|
| 21 |
+
- generic [ref=e46]: Model
|
| 22 |
+
- generic [ref=e49]:
|
| 23 |
+
- listbox "Model" [ref=e50]: Stable Audio 3 Small SFX
|
| 24 |
+
- generic:
|
| 25 |
+
- img
|
| 26 |
+
- generic [ref=e52]:
|
| 27 |
+
- generic [ref=e53]: Prompt
|
| 28 |
+
- textbox "Prompt" [ref=e55]:
|
| 29 |
+
- /placeholder: ""
|
| 30 |
+
- text: Close binaural rain on a window, soft cloth movement, detailed texture
|
| 31 |
+
- generic [ref=e57]:
|
| 32 |
+
- generic [ref=e58]: Negative prompt
|
| 33 |
+
- textbox "Negative prompt" [ref=e60]:
|
| 34 |
+
- /placeholder: ""
|
| 35 |
+
- generic [ref=e62]:
|
| 36 |
+
- generic [ref=e64]:
|
| 37 |
+
- generic [ref=e65]:
|
| 38 |
+
- generic [ref=e67]: Duration
|
| 39 |
+
- generic [ref=e68]:
|
| 40 |
+
- spinbutton "number input for Duration" [ref=e69]: "8"
|
| 41 |
+
- button "Reset to default value" [ref=e70] [cursor=pointer]: ↺
|
| 42 |
+
- generic [ref=e71]:
|
| 43 |
+
- generic [ref=e72]: "1"
|
| 44 |
+
- slider "range slider for Duration" [ref=e73] [cursor=pointer]: "8"
|
| 45 |
+
- generic [ref=e74]: "120"
|
| 46 |
+
- generic [ref=e76]:
|
| 47 |
+
- generic [ref=e77]:
|
| 48 |
+
- generic [ref=e79]: Steps
|
| 49 |
+
- generic [ref=e80]:
|
| 50 |
+
- spinbutton "number input for Steps" [ref=e81]: "8"
|
| 51 |
+
- button "Reset to default value" [ref=e82] [cursor=pointer]: ↺
|
| 52 |
+
- generic [ref=e83]:
|
| 53 |
+
- generic [ref=e84]: "1"
|
| 54 |
+
- slider "range slider for Steps" [ref=e85] [cursor=pointer]: "8"
|
| 55 |
+
- generic [ref=e86]: "100"
|
| 56 |
+
- generic [ref=e88]:
|
| 57 |
+
- generic [ref=e89]:
|
| 58 |
+
- generic [ref=e91]: CFG
|
| 59 |
+
- generic [ref=e92]:
|
| 60 |
+
- spinbutton "number input for CFG" [ref=e93]: "1"
|
| 61 |
+
- button "Reset to default value" [ref=e94] [cursor=pointer]: ↺
|
| 62 |
+
- generic [ref=e95]:
|
| 63 |
+
- generic [ref=e96]: "0"
|
| 64 |
+
- slider "range slider for CFG" [ref=e97] [cursor=pointer]: "1"
|
| 65 |
+
- generic [ref=e98]: "12"
|
| 66 |
+
- generic [ref=e100]:
|
| 67 |
+
- generic [ref=e102]:
|
| 68 |
+
- generic [ref=e103]: Sampler
|
| 69 |
+
- generic [ref=e106]:
|
| 70 |
+
- listbox "Sampler" [ref=e107]: pingpong
|
| 71 |
+
- generic:
|
| 72 |
+
- img
|
| 73 |
+
- generic [ref=e109]:
|
| 74 |
+
- generic [ref=e110]: Seed
|
| 75 |
+
- spinbutton "Seed" [ref=e111]: "-1"
|
| 76 |
+
- generic [ref=e113]:
|
| 77 |
+
- generic [ref=e115] [cursor=pointer]:
|
| 78 |
+
- checkbox "Chunked decode" [checked] [ref=e116]
|
| 79 |
+
- generic [ref=e117]: Chunked decode
|
| 80 |
+
- generic [ref=e119] [cursor=pointer]:
|
| 81 |
+
- checkbox "CPU override" [ref=e120]
|
| 82 |
+
- generic [ref=e121]: CPU override
|
| 83 |
+
- generic [ref=e122]:
|
| 84 |
+
- button "Generate" [ref=e123] [cursor=pointer]
|
| 85 |
+
- button "Unload" [ref=e124] [cursor=pointer]
|
| 86 |
+
- button "Runtime" [active] [ref=e125] [cursor=pointer]
|
| 87 |
+
- generic [ref=e126]:
|
| 88 |
+
- generic [ref=e127]:
|
| 89 |
+
- generic [ref=e128]:
|
| 90 |
+
- generic:
|
| 91 |
+
- generic:
|
| 92 |
+
- img
|
| 93 |
+
- text: Model info
|
| 94 |
+
- button "Copy" [ref=e130] [cursor=pointer]:
|
| 95 |
+
- img [ref=e132]
|
| 96 |
+
- generic [ref=e136]:
|
| 97 |
+
- generic [ref=e137]:
|
| 98 |
+
- generic "Line number 1" [ref=e138]: "1"
|
| 99 |
+
- generic [ref=e139]:
|
| 100 |
+
- button "Collapse" [ref=e140] [cursor=pointer]: ▼
|
| 101 |
+
- generic [ref=e141]: "{"
|
| 102 |
+
- generic [ref=e142]:
|
| 103 |
+
- generic [ref=e144]:
|
| 104 |
+
- generic "Line number 2" [ref=e145]: "2"
|
| 105 |
+
- generic [ref=e146]:
|
| 106 |
+
- generic [ref=e147]: "\"repo_id\""
|
| 107 |
+
- generic [ref=e148]: ":"
|
| 108 |
+
- generic [ref=e149]: "\"stabilityai/stable-audio-3-small-sfx\""
|
| 109 |
+
- generic [ref=e150]: ","
|
| 110 |
+
- generic [ref=e152]:
|
| 111 |
+
- generic "Line number 3" [ref=e153]: "3"
|
| 112 |
+
- generic [ref=e154]:
|
| 113 |
+
- generic [ref=e155]: "\"family\""
|
| 114 |
+
- generic [ref=e156]: ":"
|
| 115 |
+
- generic [ref=e157]: "\"post-trained\""
|
| 116 |
+
- generic [ref=e158]: ","
|
| 117 |
+
- generic [ref=e160]:
|
| 118 |
+
- generic "Line number 4" [ref=e161]: "4"
|
| 119 |
+
- generic [ref=e162]:
|
| 120 |
+
- generic [ref=e163]: "\"note\""
|
| 121 |
+
- generic [ref=e164]: ":"
|
| 122 |
+
- generic [ref=e165]: "\"Lightweight sound-effects checkpoint.\""
|
| 123 |
+
- generic [ref=e166]: ","
|
| 124 |
+
- generic [ref=e168]:
|
| 125 |
+
- generic "Line number 5" [ref=e169]: "5"
|
| 126 |
+
- generic [ref=e170]:
|
| 127 |
+
- generic [ref=e171]: "\"token_hint\""
|
| 128 |
+
- generic [ref=e172]: ":"
|
| 129 |
+
- generic [ref=e173]: "\"This is a gated Stability model. Accept the model terms on Hugging Face and add a read-only HF_TOKEN Space secret if download fails.\""
|
| 130 |
+
- generic [ref=e174]:
|
| 131 |
+
- generic "Line number 6" [ref=e175]: "6"
|
| 132 |
+
- generic [ref=e177]: "}"
|
| 133 |
+
- generic [ref=e178]:
|
| 134 |
+
- generic:
|
| 135 |
+
- generic:
|
| 136 |
+
- img
|
| 137 |
+
- text: Output
|
| 138 |
+
- generic "Empty value" [ref=e179]:
|
| 139 |
+
- img [ref=e181]
|
| 140 |
+
- generic [ref=e185]:
|
| 141 |
+
- generic [ref=e186]:
|
| 142 |
+
- generic:
|
| 143 |
+
- generic:
|
| 144 |
+
- img
|
| 145 |
+
- text: Run metadata
|
| 146 |
+
- button "Copy" [ref=e215] [cursor=pointer]:
|
| 147 |
+
- img [ref=e217]
|
| 148 |
+
- generic [ref=e221]:
|
| 149 |
+
- generic [ref=e222]:
|
| 150 |
+
- generic "Line number 1" [ref=e223]: "1"
|
| 151 |
+
- generic [ref=e224]:
|
| 152 |
+
- button "Collapse" [ref=e225] [cursor=pointer]: ▼
|
| 153 |
+
- generic [ref=e226]: "{"
|
| 154 |
+
- generic [ref=e227]:
|
| 155 |
+
- generic [ref=e229]:
|
| 156 |
+
- generic "Line number 2" [ref=e230]: "2"
|
| 157 |
+
- generic [ref=e231]:
|
| 158 |
+
- generic [ref=e232]: "\"device\""
|
| 159 |
+
- generic [ref=e233]: ":"
|
| 160 |
+
- generic [ref=e234]: "\"cpu\""
|
| 161 |
+
- generic [ref=e235]: ","
|
| 162 |
+
- generic [ref=e237]:
|
| 163 |
+
- generic "Line number 3" [ref=e238]: "3"
|
| 164 |
+
- generic [ref=e239]:
|
| 165 |
+
- generic [ref=e240]: "\"cuda_name\""
|
| 166 |
+
- generic [ref=e241]: ":"
|
| 167 |
+
- generic [ref=e242]: "null"
|
| 168 |
+
- generic [ref=e243]: ","
|
| 169 |
+
- generic [ref=e245]:
|
| 170 |
+
- generic "Line number 4" [ref=e246]: "4"
|
| 171 |
+
- generic [ref=e247]:
|
| 172 |
+
- generic [ref=e248]: "\"flash_attn\""
|
| 173 |
+
- generic [ref=e249]: ":"
|
| 174 |
+
- generic [ref=e250]: "true"
|
| 175 |
+
- generic [ref=e251]: ","
|
| 176 |
+
- generic [ref=e253]:
|
| 177 |
+
- generic "Line number 5" [ref=e254]: "5"
|
| 178 |
+
- generic [ref=e255]:
|
| 179 |
+
- generic [ref=e256]: "\"hf_token_present\""
|
| 180 |
+
- generic [ref=e257]: ":"
|
| 181 |
+
- generic [ref=e258]: "false"
|
| 182 |
+
- generic [ref=e259]: ","
|
| 183 |
+
- generic [ref=e261]:
|
| 184 |
+
- generic "Line number 6" [ref=e262]: "6"
|
| 185 |
+
- generic [ref=e263]:
|
| 186 |
+
- generic [ref=e264]: "\"loaded_generation_model\""
|
| 187 |
+
- generic [ref=e265]: ":"
|
| 188 |
+
- generic [ref=e266]: "null"
|
| 189 |
+
- generic [ref=e267]: ","
|
| 190 |
+
- generic [ref=e269]:
|
| 191 |
+
- generic "Line number 7" [ref=e270]: "7"
|
| 192 |
+
- generic [ref=e271]:
|
| 193 |
+
- generic [ref=e272]: "\"loaded_autoencoder\""
|
| 194 |
+
- generic [ref=e273]: ":"
|
| 195 |
+
- generic [ref=e274]: "null"
|
| 196 |
+
- generic [ref=e275]:
|
| 197 |
+
- generic "Line number 8" [ref=e276]: "8"
|
| 198 |
+
- generic [ref=e278]: "}"
|
| 199 |
+
- generic [ref=e192]:
|
| 200 |
+
- button "Use via API logo" [ref=e193] [cursor=pointer]:
|
| 201 |
+
- text: Use via API
|
| 202 |
+
- img "logo" [ref=e194]
|
| 203 |
+
- generic [ref=e195]: ·
|
| 204 |
+
- link "Built with Gradio logo" [ref=e196] [cursor=pointer]:
|
| 205 |
+
- /url: https://gradio.app
|
| 206 |
+
- text: Built with Gradio
|
| 207 |
+
- img "logo" [ref=e197]
|
| 208 |
+
- generic [ref=e198]: ·
|
| 209 |
+
- button "Settings Settings" [ref=e199] [cursor=pointer]:
|
| 210 |
+
- text: Settings
|
| 211 |
+
- img "Settings" [ref=e200]
|
| 212 |
+
- generic [ref=e201]:
|
| 213 |
+
- generic [ref=e202]:
|
| 214 |
+
- img [ref=e203]
|
| 215 |
+
- link "owenisas" [ref=e204] [cursor=pointer]:
|
| 216 |
+
- /url: https://huggingface.co/owenisas
|
| 217 |
+
- generic [ref=e205]: /
|
| 218 |
+
- link "stable-audio-3-lab" [ref=e206] [cursor=pointer]:
|
| 219 |
+
- /url: https://huggingface.co/spaces/owenisas/stable-audio-3-lab
|
| 220 |
+
- link "0" [ref=e207] [cursor=pointer]:
|
| 221 |
+
- /url: https://huggingface.co/spaces/owenisas/stable-audio-3-lab
|
| 222 |
+
- img [ref=e208]
|
| 223 |
+
- paragraph [ref=e210]: "0"
|
| 224 |
+
- img [ref=e212] [cursor=pointer]
|
.playwright-cli/page-2026-05-20T20-24-02-635Z.yml
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
- generic [ref=e1]:
|
| 2 |
+
- main [ref=e4]:
|
| 3 |
+
- generic [ref=e21]:
|
| 4 |
+
- generic [ref=e23]:
|
| 5 |
+
- heading "Stable Audio 3 Lab" [level=1] [ref=e28]
|
| 6 |
+
- generic [ref=e29]:
|
| 7 |
+
- generic [ref=e30]:
|
| 8 |
+
- generic [ref=e31]:
|
| 9 |
+
- button [ref=e32] [cursor=pointer]: Generate
|
| 10 |
+
- button [ref=e33] [cursor=pointer]: Autoencoder
|
| 11 |
+
- button [ref=e34] [cursor=pointer]: Coverage
|
| 12 |
+
- tablist [ref=e35]:
|
| 13 |
+
- tab "Generate" [selected] [ref=e36] [cursor=pointer]
|
| 14 |
+
- tab "Autoencoder" [ref=e37] [cursor=pointer]
|
| 15 |
+
- tab "Coverage" [ref=e38] [cursor=pointer]
|
| 16 |
+
- tabpanel [ref=e39]:
|
| 17 |
+
- generic [ref=e41]:
|
| 18 |
+
- generic [ref=e42]:
|
| 19 |
+
- generic [ref=e43]:
|
| 20 |
+
- generic [ref=e45]:
|
| 21 |
+
- generic [ref=e46]: Model
|
| 22 |
+
- generic [ref=e49]:
|
| 23 |
+
- listbox "Model" [ref=e50]: Stable Audio 3 Small SFX
|
| 24 |
+
- generic:
|
| 25 |
+
- img
|
| 26 |
+
- generic [ref=e52]:
|
| 27 |
+
- generic [ref=e53]: Prompt
|
| 28 |
+
- textbox "Prompt" [ref=e55]:
|
| 29 |
+
- /placeholder: ""
|
| 30 |
+
- text: Close binaural rain on a window, soft cloth movement, detailed texture
|
| 31 |
+
- generic [ref=e57]:
|
| 32 |
+
- generic [ref=e58]: Negative prompt
|
| 33 |
+
- textbox "Negative prompt" [ref=e60]:
|
| 34 |
+
- /placeholder: ""
|
| 35 |
+
- generic [ref=e62]:
|
| 36 |
+
- generic [ref=e64]:
|
| 37 |
+
- generic [ref=e65]:
|
| 38 |
+
- generic [ref=e67]: Duration
|
| 39 |
+
- generic [ref=e68]:
|
| 40 |
+
- spinbutton "number input for Duration" [ref=e69]: "8"
|
| 41 |
+
- button "Reset to default value" [ref=e70] [cursor=pointer]: ↺
|
| 42 |
+
- generic [ref=e71]:
|
| 43 |
+
- generic [ref=e72]: "1"
|
| 44 |
+
- slider "range slider for Duration" [ref=e73] [cursor=pointer]: "8"
|
| 45 |
+
- generic [ref=e74]: "120"
|
| 46 |
+
- generic [ref=e76]:
|
| 47 |
+
- generic [ref=e77]:
|
| 48 |
+
- generic [ref=e79]: Steps
|
| 49 |
+
- generic [ref=e80]:
|
| 50 |
+
- spinbutton "number input for Steps" [ref=e81]: "8"
|
| 51 |
+
- button "Reset to default value" [ref=e82] [cursor=pointer]: ↺
|
| 52 |
+
- generic [ref=e83]:
|
| 53 |
+
- generic [ref=e84]: "1"
|
| 54 |
+
- slider "range slider for Steps" [ref=e85] [cursor=pointer]: "8"
|
| 55 |
+
- generic [ref=e86]: "100"
|
| 56 |
+
- generic [ref=e88]:
|
| 57 |
+
- generic [ref=e89]:
|
| 58 |
+
- generic [ref=e91]: CFG
|
| 59 |
+
- generic [ref=e92]:
|
| 60 |
+
- spinbutton "number input for CFG" [ref=e93]: "1"
|
| 61 |
+
- button "Reset to default value" [ref=e94] [cursor=pointer]: ↺
|
| 62 |
+
- generic [ref=e95]:
|
| 63 |
+
- generic [ref=e96]: "0"
|
| 64 |
+
- slider "range slider for CFG" [ref=e97] [cursor=pointer]: "1"
|
| 65 |
+
- generic [ref=e98]: "12"
|
| 66 |
+
- generic [ref=e100]:
|
| 67 |
+
- generic [ref=e102]:
|
| 68 |
+
- generic [ref=e103]: Sampler
|
| 69 |
+
- generic [ref=e106]:
|
| 70 |
+
- listbox "Sampler" [ref=e107]: pingpong
|
| 71 |
+
- generic:
|
| 72 |
+
- img
|
| 73 |
+
- generic [ref=e109]:
|
| 74 |
+
- generic [ref=e110]: Seed
|
| 75 |
+
- spinbutton "Seed" [ref=e111]: "-1"
|
| 76 |
+
- generic [ref=e113]:
|
| 77 |
+
- generic [ref=e115] [cursor=pointer]:
|
| 78 |
+
- checkbox "Chunked decode" [checked] [ref=e116]
|
| 79 |
+
- generic [ref=e117]: Chunked decode
|
| 80 |
+
- generic [ref=e119] [cursor=pointer]:
|
| 81 |
+
- checkbox "CPU override" [ref=e120]
|
| 82 |
+
- generic [ref=e121]: CPU override
|
| 83 |
+
- generic [ref=e122]:
|
| 84 |
+
- button "Generate" [active] [ref=e123] [cursor=pointer]
|
| 85 |
+
- button "Unload" [ref=e124] [cursor=pointer]
|
| 86 |
+
- button "Runtime" [ref=e125] [cursor=pointer]
|
| 87 |
+
- generic [ref=e126]:
|
| 88 |
+
- generic [ref=e127]:
|
| 89 |
+
- generic [ref=e128]:
|
| 90 |
+
- generic:
|
| 91 |
+
- generic:
|
| 92 |
+
- img
|
| 93 |
+
- text: Model info
|
| 94 |
+
- button "Copy" [ref=e130] [cursor=pointer]:
|
| 95 |
+
- img [ref=e132]
|
| 96 |
+
- generic [ref=e136]:
|
| 97 |
+
- generic [ref=e137]:
|
| 98 |
+
- generic "Line number 1" [ref=e138]: "1"
|
| 99 |
+
- generic [ref=e139]:
|
| 100 |
+
- button "Collapse" [ref=e140] [cursor=pointer]: ▼
|
| 101 |
+
- generic [ref=e141]: "{"
|
| 102 |
+
- generic [ref=e142]:
|
| 103 |
+
- generic [ref=e144]:
|
| 104 |
+
- generic "Line number 2" [ref=e145]: "2"
|
| 105 |
+
- generic [ref=e146]:
|
| 106 |
+
- generic [ref=e147]: "\"repo_id\""
|
| 107 |
+
- generic [ref=e148]: ":"
|
| 108 |
+
- generic [ref=e149]: "\"stabilityai/stable-audio-3-small-sfx\""
|
| 109 |
+
- generic [ref=e150]: ","
|
| 110 |
+
- generic [ref=e152]:
|
| 111 |
+
- generic "Line number 3" [ref=e153]: "3"
|
| 112 |
+
- generic [ref=e154]:
|
| 113 |
+
- generic [ref=e155]: "\"family\""
|
| 114 |
+
- generic [ref=e156]: ":"
|
| 115 |
+
- generic [ref=e157]: "\"post-trained\""
|
| 116 |
+
- generic [ref=e158]: ","
|
| 117 |
+
- generic [ref=e160]:
|
| 118 |
+
- generic "Line number 4" [ref=e161]: "4"
|
| 119 |
+
- generic [ref=e162]:
|
| 120 |
+
- generic [ref=e163]: "\"note\""
|
| 121 |
+
- generic [ref=e164]: ":"
|
| 122 |
+
- generic [ref=e165]: "\"Lightweight sound-effects checkpoint.\""
|
| 123 |
+
- generic [ref=e166]: ","
|
| 124 |
+
- generic [ref=e168]:
|
| 125 |
+
- generic "Line number 5" [ref=e169]: "5"
|
| 126 |
+
- generic [ref=e170]:
|
| 127 |
+
- generic [ref=e171]: "\"token_hint\""
|
| 128 |
+
- generic [ref=e172]: ":"
|
| 129 |
+
- generic [ref=e173]: "\"This is a gated Stability model. Accept the model terms on Hugging Face and add a read-only HF_TOKEN Space secret if download fails.\""
|
| 130 |
+
- generic [ref=e174]:
|
| 131 |
+
- generic "Line number 6" [ref=e175]: "6"
|
| 132 |
+
- generic [ref=e177]: "}"
|
| 133 |
+
- generic [ref=e178]:
|
| 134 |
+
- generic:
|
| 135 |
+
- generic:
|
| 136 |
+
- img
|
| 137 |
+
- text: Output
|
| 138 |
+
- generic "Empty value" [ref=e179]:
|
| 139 |
+
- img [ref=e181]
|
| 140 |
+
- generic [ref=e185]:
|
| 141 |
+
- generic [ref=e186]:
|
| 142 |
+
- generic:
|
| 143 |
+
- generic:
|
| 144 |
+
- img
|
| 145 |
+
- text: Run metadata
|
| 146 |
+
- button "Copy" [ref=e215] [cursor=pointer]:
|
| 147 |
+
- img [ref=e217]
|
| 148 |
+
- generic [ref=e221]:
|
| 149 |
+
- generic [ref=e222]:
|
| 150 |
+
- generic "Line number 1" [ref=e223]: "1"
|
| 151 |
+
- generic [ref=e224]:
|
| 152 |
+
- button "Collapse" [ref=e225] [cursor=pointer]: ▼
|
| 153 |
+
- generic [ref=e226]: "{"
|
| 154 |
+
- generic [ref=e227]:
|
| 155 |
+
- generic [ref=e229]:
|
| 156 |
+
- generic "Line number 2" [ref=e230]: "2"
|
| 157 |
+
- generic [ref=e231]:
|
| 158 |
+
- generic [ref=e232]: "\"status\""
|
| 159 |
+
- generic [ref=e233]: ":"
|
| 160 |
+
- generic [ref=e234]: "\"blocked\""
|
| 161 |
+
- generic [ref=e235]: ","
|
| 162 |
+
- generic [ref=e237]:
|
| 163 |
+
- generic "Line number 3" [ref=e238]: "3"
|
| 164 |
+
- generic [ref=e239]:
|
| 165 |
+
- generic [ref=e240]: "\"error\""
|
| 166 |
+
- generic [ref=e241]: ":"
|
| 167 |
+
- generic [ref=e279]: "\"Stable Audio 3 Small SFX is gated. Accept the model terms on Hugging Face, then add a read-only HF_TOKEN secret to this Space before running it.\""
|
| 168 |
+
- generic [ref=e243]: ","
|
| 169 |
+
- generic [ref=e245]:
|
| 170 |
+
- generic "Line number 4" [ref=e246]: "4"
|
| 171 |
+
- generic [ref=e247]:
|
| 172 |
+
- generic [ref=e248]: "\"model\""
|
| 173 |
+
- generic [ref=e249]: ":"
|
| 174 |
+
- generic [ref=e280]: "\"small-sfx\""
|
| 175 |
+
- generic [ref=e251]: ","
|
| 176 |
+
- generic [ref=e253]:
|
| 177 |
+
- generic "Line number 5" [ref=e254]: "5"
|
| 178 |
+
- generic [ref=e255]:
|
| 179 |
+
- generic [ref=e256]: "\"repo_id\""
|
| 180 |
+
- generic [ref=e257]: ":"
|
| 181 |
+
- generic [ref=e281]: "\"stabilityai/stable-audio-3-small-sfx\""
|
| 182 |
+
- generic [ref=e259]: ","
|
| 183 |
+
- generic [ref=e261]:
|
| 184 |
+
- generic "Line number 6" [ref=e262]: "6"
|
| 185 |
+
- generic [ref=e263]:
|
| 186 |
+
- generic [ref=e264]: "\"device\""
|
| 187 |
+
- generic [ref=e265]: ":"
|
| 188 |
+
- generic [ref=e282]: "\"unknown\""
|
| 189 |
+
- generic [ref=e267]: ","
|
| 190 |
+
- generic [ref=e269]:
|
| 191 |
+
- generic "Line number 7" [ref=e270]: "7"
|
| 192 |
+
- generic [ref=e271]:
|
| 193 |
+
- generic [ref=e272]: "\"hf_token_present\""
|
| 194 |
+
- generic [ref=e273]: ":"
|
| 195 |
+
- generic [ref=e283]: "false"
|
| 196 |
+
- generic [ref=e275]:
|
| 197 |
+
- generic "Line number 8" [ref=e276]: "8"
|
| 198 |
+
- generic [ref=e278]: "}"
|
| 199 |
+
- generic [ref=e192]:
|
| 200 |
+
- button "Use via API logo" [ref=e193] [cursor=pointer]:
|
| 201 |
+
- text: Use via API
|
| 202 |
+
- img "logo" [ref=e194]
|
| 203 |
+
- generic [ref=e195]: ·
|
| 204 |
+
- link "Built with Gradio logo" [ref=e196] [cursor=pointer]:
|
| 205 |
+
- /url: https://gradio.app
|
| 206 |
+
- text: Built with Gradio
|
| 207 |
+
- img "logo" [ref=e197]
|
| 208 |
+
- generic [ref=e198]: ·
|
| 209 |
+
- button "Settings Settings" [ref=e199] [cursor=pointer]:
|
| 210 |
+
- text: Settings
|
| 211 |
+
- img "Settings" [ref=e200]
|
| 212 |
+
- generic [ref=e201]:
|
| 213 |
+
- generic [ref=e202]:
|
| 214 |
+
- img [ref=e203]
|
| 215 |
+
- link "owenisas" [ref=e204] [cursor=pointer]:
|
| 216 |
+
- /url: https://huggingface.co/owenisas
|
| 217 |
+
- generic [ref=e205]: /
|
| 218 |
+
- link "stable-audio-3-lab" [ref=e206] [cursor=pointer]:
|
| 219 |
+
- /url: https://huggingface.co/spaces/owenisas/stable-audio-3-lab
|
| 220 |
+
- link "0" [ref=e207] [cursor=pointer]:
|
| 221 |
+
- /url: https://huggingface.co/spaces/owenisas/stable-audio-3-lab
|
| 222 |
+
- img [ref=e208]
|
| 223 |
+
- paragraph [ref=e210]: "0"
|
| 224 |
+
- img [ref=e212] [cursor=pointer]
|
.playwright-cli/page-2026-05-20T20-24-12-107Z.png
ADDED
|
README.md
CHANGED
|
@@ -9,6 +9,9 @@ python_version: "3.10"
|
|
| 9 |
suggested_hardware: a10g-small
|
| 10 |
pinned: false
|
| 11 |
license: mit
|
|
|
|
|
|
|
|
|
|
| 12 |
---
|
| 13 |
|
| 14 |
# Stable Audio 3 Lab
|
|
@@ -23,10 +26,11 @@ The optimized repo (`stabilityai/stable-audio-3-optimized`) currently ships MLX
|
|
| 23 |
|
| 24 |
## Access
|
| 25 |
|
| 26 |
-
|
|
|
|
| 27 |
|
| 28 |
-
1.
|
| 29 |
-
2.
|
| 30 |
|
| 31 |
Base checkpoints are not gated, but they are intended mainly for fine-tuning and may not sound as polished.
|
| 32 |
|
|
|
|
| 9 |
suggested_hardware: a10g-small
|
| 10 |
pinned: false
|
| 11 |
license: mit
|
| 12 |
+
hf_oauth: true
|
| 13 |
+
hf_oauth_scopes:
|
| 14 |
+
- gated-repos
|
| 15 |
---
|
| 16 |
|
| 17 |
# Stable Audio 3 Lab
|
|
|
|
| 26 |
|
| 27 |
## Access
|
| 28 |
|
| 29 |
+
This Space requires Hugging Face login. The post-trained Stable Audio 3
|
| 30 |
+
checkpoints are gated on Hugging Face, so each user must:
|
| 31 |
|
| 32 |
+
1. Sign in with Hugging Face.
|
| 33 |
+
2. Accept the terms on each gated model page from their own account.
|
| 34 |
|
| 35 |
Base checkpoints are not gated, but they are intended mainly for fine-tuning and may not sound as polished.
|
| 36 |
|
app.py
CHANGED
|
@@ -7,7 +7,11 @@ import json
|
|
| 7 |
import os
|
| 8 |
import sys
|
| 9 |
import tempfile
|
|
|
|
| 10 |
import time
|
|
|
|
|
|
|
|
|
|
| 11 |
from dataclasses import dataclass
|
| 12 |
from typing import Any
|
| 13 |
|
|
@@ -167,6 +171,7 @@ COLLECTION_ROWS = [
|
|
| 167 |
|
| 168 |
MODEL_CACHE: dict[str, Any] = {"key": None, "model": None}
|
| 169 |
AE_CACHE: dict[str, Any] = {"key": None, "model": None}
|
|
|
|
| 170 |
|
| 171 |
|
| 172 |
def gpu_task(duration: int):
|
|
@@ -196,27 +201,78 @@ def flash_attn_available() -> bool:
|
|
| 196 |
return importlib.util.find_spec("flash_attn") is not None
|
| 197 |
|
| 198 |
|
| 199 |
-
def
|
| 200 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
|
| 202 |
|
| 203 |
def stable_audio_token_hint(model: GenerationModel) -> str:
|
| 204 |
if not model.gated:
|
| 205 |
-
return ""
|
| 206 |
-
if hf_token_present():
|
| 207 |
-
return ""
|
| 208 |
return (
|
| 209 |
-
"
|
| 210 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
|
|
|
|
| 213 |
|
| 214 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
device = "unknown"
|
| 216 |
-
|
|
|
|
| 217 |
return (
|
| 218 |
-
|
| 219 |
-
"then add a read-only HF_TOKEN secret to this Space before running it.",
|
| 220 |
device,
|
| 221 |
)
|
| 222 |
|
|
@@ -228,6 +284,10 @@ def generation_preflight_error(model: GenerationModel, allow_cpu_medium: bool) -
|
|
| 228 |
"Use a GPU Space or enable the CPU override for a slow/debug-only attempt.",
|
| 229 |
device,
|
| 230 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
if model.requires_cuda and device == "cuda" and not flash_attn_available():
|
| 232 |
return (
|
| 233 |
f"{model.label} expects flash-attn on CUDA. Rebuild the Space with the "
|
|
@@ -237,8 +297,13 @@ def generation_preflight_error(model: GenerationModel, allow_cpu_medium: bool) -
|
|
| 237 |
return None, device
|
| 238 |
|
| 239 |
|
| 240 |
-
def assert_generation_runtime(
|
| 241 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
if error:
|
| 243 |
raise gr.Error(error)
|
| 244 |
return device
|
|
@@ -270,27 +335,45 @@ def clear_torch_memory() -> None:
|
|
| 270 |
gc.collect()
|
| 271 |
|
| 272 |
|
| 273 |
-
def load_generation_model(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
model_def = GENERATION_MODELS[model_key]
|
| 275 |
-
device = assert_generation_runtime(model_def, allow_cpu_medium)
|
| 276 |
|
| 277 |
if MODEL_CACHE["key"] == model_key and MODEL_CACHE["model"] is not None:
|
| 278 |
return MODEL_CACHE["model"], device
|
| 279 |
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
|
| 284 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
|
| 286 |
-
model_half = device == "cuda"
|
| 287 |
-
model = StableAudioModel.from_pretrained(model_key, model_half=model_half)
|
| 288 |
-
MODEL_CACHE["key"] = model_key
|
| 289 |
-
MODEL_CACHE["model"] = model
|
| 290 |
-
return model, device
|
| 291 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
|
| 293 |
-
def load_autoencoder(model_key: str, allow_cpu_same_l: bool):
|
| 294 |
model_def = AUTOENCODER_MODELS[model_key]
|
| 295 |
torch = import_torch()
|
| 296 |
device = current_device(torch)
|
|
@@ -303,16 +386,21 @@ def load_autoencoder(model_key: str, allow_cpu_same_l: bool):
|
|
| 303 |
if AE_CACHE["key"] == model_key and AE_CACHE["model"] is not None:
|
| 304 |
return AE_CACHE["model"], device
|
| 305 |
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
|
| 310 |
-
|
| 311 |
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
|
|
|
| 316 |
|
| 317 |
|
| 318 |
def model_changed(model_key: str):
|
|
@@ -346,6 +434,8 @@ def generate_audio(
|
|
| 346 |
seed: int,
|
| 347 |
chunked_decode: bool,
|
| 348 |
allow_cpu_medium: bool,
|
|
|
|
|
|
|
| 349 |
progress=gr.Progress(track_tqdm=True),
|
| 350 |
):
|
| 351 |
model_def = GENERATION_MODELS[model_key]
|
|
@@ -357,7 +447,12 @@ def generate_audio(
|
|
| 357 |
"repo_id": model_def.repo_id,
|
| 358 |
}
|
| 359 |
|
| 360 |
-
preflight_error, preflight_device = generation_preflight_error(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 361 |
if preflight_error:
|
| 362 |
return None, {
|
| 363 |
"status": "blocked",
|
|
@@ -365,7 +460,9 @@ def generate_audio(
|
|
| 365 |
"model": model_def.key,
|
| 366 |
"repo_id": model_def.repo_id,
|
| 367 |
"device": preflight_device,
|
| 368 |
-
"
|
|
|
|
|
|
|
| 369 |
}
|
| 370 |
|
| 371 |
progress(0.05, desc="Loading model")
|
|
@@ -374,7 +471,7 @@ def generate_audio(
|
|
| 374 |
if seed < 0:
|
| 375 |
seed = int.from_bytes(os.urandom(4), "little") % 100000
|
| 376 |
|
| 377 |
-
model, device = load_generation_model(model_key, allow_cpu_medium)
|
| 378 |
progress(0.25, desc="Generating")
|
| 379 |
audio = model.generate(
|
| 380 |
prompt=prompt.strip(),
|
|
@@ -412,6 +509,7 @@ def generate_audio(
|
|
| 412 |
"elapsed_s": elapsed,
|
| 413 |
"output_file": out_file.name,
|
| 414 |
"note": model_def.note,
|
|
|
|
| 415 |
}
|
| 416 |
return out_file.name, metadata
|
| 417 |
|
|
@@ -422,8 +520,19 @@ def roundtrip_autoencoder(
|
|
| 422 |
audio_input: tuple[int, np.ndarray] | None,
|
| 423 |
chunked: bool,
|
| 424 |
allow_cpu_same_l: bool,
|
|
|
|
|
|
|
| 425 |
progress=gr.Progress(track_tqdm=True),
|
| 426 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 427 |
if audio_input is None:
|
| 428 |
return None, {
|
| 429 |
"status": "blocked",
|
|
@@ -449,7 +558,7 @@ def roundtrip_autoencoder(
|
|
| 449 |
|
| 450 |
progress(0.05, desc="Loading autoencoder")
|
| 451 |
started = time.time()
|
| 452 |
-
model, device = load_autoencoder(model_key, allow_cpu_same_l)
|
| 453 |
|
| 454 |
progress(0.25, desc="Encoding")
|
| 455 |
sr, data = audio_input
|
|
@@ -480,20 +589,26 @@ def roundtrip_autoencoder(
|
|
| 480 |
"latent_shape": list(latents.shape),
|
| 481 |
"elapsed_s": round(time.time() - started, 3),
|
| 482 |
"output_file": out_file.name,
|
|
|
|
| 483 |
}
|
| 484 |
return out_file.name, metadata
|
| 485 |
|
| 486 |
|
| 487 |
-
def unload_models():
|
|
|
|
|
|
|
| 488 |
MODEL_CACHE["key"] = None
|
| 489 |
MODEL_CACHE["model"] = None
|
| 490 |
AE_CACHE["key"] = None
|
| 491 |
AE_CACHE["model"] = None
|
| 492 |
clear_torch_memory()
|
| 493 |
-
return {"status": "unloaded"}
|
| 494 |
|
| 495 |
|
| 496 |
-
def runtime_status(
|
|
|
|
|
|
|
|
|
|
| 497 |
try:
|
| 498 |
torch = import_torch()
|
| 499 |
device = current_device(torch)
|
|
@@ -507,7 +622,9 @@ def runtime_status():
|
|
| 507 |
"device": device,
|
| 508 |
"cuda_name": cuda_name,
|
| 509 |
"flash_attn": flash_attn_available(),
|
| 510 |
-
"
|
|
|
|
|
|
|
| 511 |
"loaded_generation_model": MODEL_CACHE["key"],
|
| 512 |
"loaded_autoencoder": AE_CACHE["key"],
|
| 513 |
}
|
|
@@ -524,6 +641,7 @@ css = """
|
|
| 524 |
|
| 525 |
with gr.Blocks(title="Stable Audio 3 Lab") as demo:
|
| 526 |
gr.Markdown("# Stable Audio 3 Lab")
|
|
|
|
| 527 |
|
| 528 |
with gr.Tab("Generate"):
|
| 529 |
with gr.Row(equal_height=False):
|
|
|
|
| 7 |
import os
|
| 8 |
import sys
|
| 9 |
import tempfile
|
| 10 |
+
import threading
|
| 11 |
import time
|
| 12 |
+
import urllib.error
|
| 13 |
+
import urllib.request
|
| 14 |
+
from contextlib import contextmanager
|
| 15 |
from dataclasses import dataclass
|
| 16 |
from typing import Any
|
| 17 |
|
|
|
|
| 171 |
|
| 172 |
MODEL_CACHE: dict[str, Any] = {"key": None, "model": None}
|
| 173 |
AE_CACHE: dict[str, Any] = {"key": None, "model": None}
|
| 174 |
+
MODEL_LOAD_LOCK = threading.RLock()
|
| 175 |
|
| 176 |
|
| 177 |
def gpu_task(duration: int):
|
|
|
|
| 201 |
return importlib.util.find_spec("flash_attn") is not None
|
| 202 |
|
| 203 |
|
| 204 |
+
def oauth_token_value(oauth_token: gr.OAuthToken | None) -> str | None:
|
| 205 |
+
token = getattr(oauth_token, "token", None)
|
| 206 |
+
return token if isinstance(token, str) and token else None
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
def oauth_username(oauth_profile: gr.OAuthProfile | None) -> str | None:
|
| 210 |
+
username = getattr(oauth_profile, "username", None)
|
| 211 |
+
return username if isinstance(username, str) and username else None
|
| 212 |
|
| 213 |
|
| 214 |
def stable_audio_token_hint(model: GenerationModel) -> str:
|
| 215 |
if not model.gated:
|
| 216 |
+
return "Sign in with Hugging Face before running this Space."
|
|
|
|
|
|
|
| 217 |
return (
|
| 218 |
+
"Sign in with Hugging Face and accept this gated model's terms from your "
|
| 219 |
+
"own account before running it."
|
| 220 |
+
)
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
def user_can_download_gated_model(repo_id: str, token: str) -> tuple[bool, str | None]:
|
| 224 |
+
request = urllib.request.Request(
|
| 225 |
+
f"https://huggingface.co/{repo_id}/resolve/main/model_config.json",
|
| 226 |
+
method="HEAD",
|
| 227 |
+
headers={"Authorization": f"Bearer {token}"},
|
| 228 |
)
|
| 229 |
+
try:
|
| 230 |
+
with urllib.request.urlopen(request, timeout=20) as response:
|
| 231 |
+
return response.status < 400, None
|
| 232 |
+
except urllib.error.HTTPError as exc:
|
| 233 |
+
if exc.code in {401, 403}:
|
| 234 |
+
return (
|
| 235 |
+
False,
|
| 236 |
+
"Your Hugging Face account does not have access to this gated model yet. "
|
| 237 |
+
"Open the model page while logged in, accept Stability's terms, then retry.",
|
| 238 |
+
)
|
| 239 |
+
return False, f"Hugging Face access check failed with HTTP {exc.code}."
|
| 240 |
+
except Exception as exc:
|
| 241 |
+
return False, f"Hugging Face access check failed: {exc!r}"
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
@contextmanager
|
| 245 |
+
def hub_download_token(token: str | None):
|
| 246 |
+
if not token:
|
| 247 |
+
yield
|
| 248 |
+
return
|
| 249 |
|
| 250 |
+
import stable_audio_3.model_configs as model_configs
|
| 251 |
|
| 252 |
+
original_download = model_configs.hf_hub_download
|
| 253 |
+
|
| 254 |
+
def download_with_user_token(*args, **kwargs):
|
| 255 |
+
kwargs.setdefault("token", token)
|
| 256 |
+
return original_download(*args, **kwargs)
|
| 257 |
+
|
| 258 |
+
model_configs.hf_hub_download = download_with_user_token
|
| 259 |
+
try:
|
| 260 |
+
yield
|
| 261 |
+
finally:
|
| 262 |
+
model_configs.hf_hub_download = original_download
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
def generation_preflight_error(
|
| 266 |
+
model: GenerationModel,
|
| 267 |
+
allow_cpu_medium: bool,
|
| 268 |
+
oauth_profile: gr.OAuthProfile | None,
|
| 269 |
+
oauth_token: gr.OAuthToken | None,
|
| 270 |
+
) -> tuple[str | None, str]:
|
| 271 |
device = "unknown"
|
| 272 |
+
token = oauth_token_value(oauth_token)
|
| 273 |
+
if oauth_profile is None or not token:
|
| 274 |
return (
|
| 275 |
+
"Sign in with Hugging Face before running this Space.",
|
|
|
|
| 276 |
device,
|
| 277 |
)
|
| 278 |
|
|
|
|
| 284 |
"Use a GPU Space or enable the CPU override for a slow/debug-only attempt.",
|
| 285 |
device,
|
| 286 |
)
|
| 287 |
+
if model.gated:
|
| 288 |
+
has_access, error = user_can_download_gated_model(model.repo_id, token)
|
| 289 |
+
if not has_access:
|
| 290 |
+
return error or "Your Hugging Face account cannot access this gated model.", device
|
| 291 |
if model.requires_cuda and device == "cuda" and not flash_attn_available():
|
| 292 |
return (
|
| 293 |
f"{model.label} expects flash-attn on CUDA. Rebuild the Space with the "
|
|
|
|
| 297 |
return None, device
|
| 298 |
|
| 299 |
|
| 300 |
+
def assert_generation_runtime(
|
| 301 |
+
model: GenerationModel,
|
| 302 |
+
allow_cpu_medium: bool,
|
| 303 |
+
oauth_profile: gr.OAuthProfile | None,
|
| 304 |
+
oauth_token: gr.OAuthToken | None,
|
| 305 |
+
) -> str:
|
| 306 |
+
error, device = generation_preflight_error(model, allow_cpu_medium, oauth_profile, oauth_token)
|
| 307 |
if error:
|
| 308 |
raise gr.Error(error)
|
| 309 |
return device
|
|
|
|
| 335 |
gc.collect()
|
| 336 |
|
| 337 |
|
| 338 |
+
def load_generation_model(
|
| 339 |
+
model_key: str,
|
| 340 |
+
allow_cpu_medium: bool,
|
| 341 |
+
oauth_profile: gr.OAuthProfile | None,
|
| 342 |
+
oauth_token: gr.OAuthToken | None,
|
| 343 |
+
):
|
| 344 |
model_def = GENERATION_MODELS[model_key]
|
| 345 |
+
device = assert_generation_runtime(model_def, allow_cpu_medium, oauth_profile, oauth_token)
|
| 346 |
|
| 347 |
if MODEL_CACHE["key"] == model_key and MODEL_CACHE["model"] is not None:
|
| 348 |
return MODEL_CACHE["model"], device
|
| 349 |
|
| 350 |
+
with MODEL_LOAD_LOCK:
|
| 351 |
+
if MODEL_CACHE["key"] == model_key and MODEL_CACHE["model"] is not None:
|
| 352 |
+
return MODEL_CACHE["model"], device
|
| 353 |
+
|
| 354 |
+
MODEL_CACHE["model"] = None
|
| 355 |
+
MODEL_CACHE["key"] = None
|
| 356 |
+
clear_torch_memory()
|
| 357 |
+
|
| 358 |
+
from stable_audio_3 import StableAudioModel
|
| 359 |
|
| 360 |
+
model_half = device == "cuda"
|
| 361 |
+
with hub_download_token(oauth_token_value(oauth_token)):
|
| 362 |
+
model = StableAudioModel.from_pretrained(model_key, model_half=model_half)
|
| 363 |
+
MODEL_CACHE["key"] = model_key
|
| 364 |
+
MODEL_CACHE["model"] = model
|
| 365 |
+
return model, device
|
| 366 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 367 |
|
| 368 |
+
def load_autoencoder(
|
| 369 |
+
model_key: str,
|
| 370 |
+
allow_cpu_same_l: bool,
|
| 371 |
+
oauth_profile: gr.OAuthProfile | None,
|
| 372 |
+
oauth_token: gr.OAuthToken | None,
|
| 373 |
+
):
|
| 374 |
+
if oauth_profile is None or not oauth_token_value(oauth_token):
|
| 375 |
+
raise gr.Error("Sign in with Hugging Face before running this Space.")
|
| 376 |
|
|
|
|
| 377 |
model_def = AUTOENCODER_MODELS[model_key]
|
| 378 |
torch = import_torch()
|
| 379 |
device = current_device(torch)
|
|
|
|
| 386 |
if AE_CACHE["key"] == model_key and AE_CACHE["model"] is not None:
|
| 387 |
return AE_CACHE["model"], device
|
| 388 |
|
| 389 |
+
with MODEL_LOAD_LOCK:
|
| 390 |
+
if AE_CACHE["key"] == model_key and AE_CACHE["model"] is not None:
|
| 391 |
+
return AE_CACHE["model"], device
|
| 392 |
+
|
| 393 |
+
AE_CACHE["model"] = None
|
| 394 |
+
AE_CACHE["key"] = None
|
| 395 |
+
clear_torch_memory()
|
| 396 |
|
| 397 |
+
from stable_audio_3 import AutoencoderModel
|
| 398 |
|
| 399 |
+
with hub_download_token(oauth_token_value(oauth_token)):
|
| 400 |
+
model = AutoencoderModel.from_pretrained(model_key)
|
| 401 |
+
AE_CACHE["key"] = model_key
|
| 402 |
+
AE_CACHE["model"] = model
|
| 403 |
+
return model, device
|
| 404 |
|
| 405 |
|
| 406 |
def model_changed(model_key: str):
|
|
|
|
| 434 |
seed: int,
|
| 435 |
chunked_decode: bool,
|
| 436 |
allow_cpu_medium: bool,
|
| 437 |
+
oauth_profile: gr.OAuthProfile | None = None,
|
| 438 |
+
oauth_token: gr.OAuthToken | None = None,
|
| 439 |
progress=gr.Progress(track_tqdm=True),
|
| 440 |
):
|
| 441 |
model_def = GENERATION_MODELS[model_key]
|
|
|
|
| 447 |
"repo_id": model_def.repo_id,
|
| 448 |
}
|
| 449 |
|
| 450 |
+
preflight_error, preflight_device = generation_preflight_error(
|
| 451 |
+
model_def,
|
| 452 |
+
allow_cpu_medium,
|
| 453 |
+
oauth_profile,
|
| 454 |
+
oauth_token,
|
| 455 |
+
)
|
| 456 |
if preflight_error:
|
| 457 |
return None, {
|
| 458 |
"status": "blocked",
|
|
|
|
| 460 |
"model": model_def.key,
|
| 461 |
"repo_id": model_def.repo_id,
|
| 462 |
"device": preflight_device,
|
| 463 |
+
"signed_in": oauth_profile is not None,
|
| 464 |
+
"username": oauth_username(oauth_profile),
|
| 465 |
+
"oauth_token_present": bool(oauth_token_value(oauth_token)),
|
| 466 |
}
|
| 467 |
|
| 468 |
progress(0.05, desc="Loading model")
|
|
|
|
| 471 |
if seed < 0:
|
| 472 |
seed = int.from_bytes(os.urandom(4), "little") % 100000
|
| 473 |
|
| 474 |
+
model, device = load_generation_model(model_key, allow_cpu_medium, oauth_profile, oauth_token)
|
| 475 |
progress(0.25, desc="Generating")
|
| 476 |
audio = model.generate(
|
| 477 |
prompt=prompt.strip(),
|
|
|
|
| 509 |
"elapsed_s": elapsed,
|
| 510 |
"output_file": out_file.name,
|
| 511 |
"note": model_def.note,
|
| 512 |
+
"username": oauth_username(oauth_profile),
|
| 513 |
}
|
| 514 |
return out_file.name, metadata
|
| 515 |
|
|
|
|
| 520 |
audio_input: tuple[int, np.ndarray] | None,
|
| 521 |
chunked: bool,
|
| 522 |
allow_cpu_same_l: bool,
|
| 523 |
+
oauth_profile: gr.OAuthProfile | None = None,
|
| 524 |
+
oauth_token: gr.OAuthToken | None = None,
|
| 525 |
progress=gr.Progress(track_tqdm=True),
|
| 526 |
):
|
| 527 |
+
if oauth_profile is None or not oauth_token_value(oauth_token):
|
| 528 |
+
return None, {
|
| 529 |
+
"status": "blocked",
|
| 530 |
+
"error": "Sign in with Hugging Face before running this Space.",
|
| 531 |
+
"autoencoder": model_key,
|
| 532 |
+
"repo_id": AUTOENCODER_MODELS[model_key]["repo_id"],
|
| 533 |
+
"signed_in": oauth_profile is not None,
|
| 534 |
+
}
|
| 535 |
+
|
| 536 |
if audio_input is None:
|
| 537 |
return None, {
|
| 538 |
"status": "blocked",
|
|
|
|
| 558 |
|
| 559 |
progress(0.05, desc="Loading autoencoder")
|
| 560 |
started = time.time()
|
| 561 |
+
model, device = load_autoencoder(model_key, allow_cpu_same_l, oauth_profile, oauth_token)
|
| 562 |
|
| 563 |
progress(0.25, desc="Encoding")
|
| 564 |
sr, data = audio_input
|
|
|
|
| 589 |
"latent_shape": list(latents.shape),
|
| 590 |
"elapsed_s": round(time.time() - started, 3),
|
| 591 |
"output_file": out_file.name,
|
| 592 |
+
"username": oauth_username(oauth_profile),
|
| 593 |
}
|
| 594 |
return out_file.name, metadata
|
| 595 |
|
| 596 |
|
| 597 |
+
def unload_models(oauth_profile: gr.OAuthProfile | None = None):
|
| 598 |
+
if oauth_profile is None:
|
| 599 |
+
return {"status": "blocked", "error": "Sign in with Hugging Face before running this Space."}
|
| 600 |
MODEL_CACHE["key"] = None
|
| 601 |
MODEL_CACHE["model"] = None
|
| 602 |
AE_CACHE["key"] = None
|
| 603 |
AE_CACHE["model"] = None
|
| 604 |
clear_torch_memory()
|
| 605 |
+
return {"status": "unloaded", "username": oauth_username(oauth_profile)}
|
| 606 |
|
| 607 |
|
| 608 |
+
def runtime_status(
|
| 609 |
+
oauth_profile: gr.OAuthProfile | None = None,
|
| 610 |
+
oauth_token: gr.OAuthToken | None = None,
|
| 611 |
+
):
|
| 612 |
try:
|
| 613 |
torch = import_torch()
|
| 614 |
device = current_device(torch)
|
|
|
|
| 622 |
"device": device,
|
| 623 |
"cuda_name": cuda_name,
|
| 624 |
"flash_attn": flash_attn_available(),
|
| 625 |
+
"signed_in": oauth_profile is not None,
|
| 626 |
+
"username": oauth_username(oauth_profile),
|
| 627 |
+
"oauth_token_present": bool(oauth_token_value(oauth_token)),
|
| 628 |
"loaded_generation_model": MODEL_CACHE["key"],
|
| 629 |
"loaded_autoencoder": AE_CACHE["key"],
|
| 630 |
}
|
|
|
|
| 641 |
|
| 642 |
with gr.Blocks(title="Stable Audio 3 Lab") as demo:
|
| 643 |
gr.Markdown("# Stable Audio 3 Lab")
|
| 644 |
+
gr.LoginButton(value="Sign in with Hugging Face", logout_value="Logout ({})")
|
| 645 |
|
| 646 |
with gr.Tab("Generate"):
|
| 647 |
with gr.Row(equal_height=False):
|