Spaces:
Running on Zero
Running on Zero
Commit ·
48d32ab
1
Parent(s): e44cca0
Fix ZeroGPU duration: dynamic per-sentence sizing, cap at 120s (#3)
Browse files- Fix ZeroGPU duration: dynamic per-sentence sizing, cap at 120s (d621c9389cc82dd28fddd421518e8937c72cac60)
- Tighten GPU window: 10s base + 1s/sentence, quote-aware count (fc8ba6b960b627a6efce811ce1ea509f8d29ca18)
Co-authored-by: Manmay Nakhashi <Manmay@users.noreply.huggingface.co>
app.py
CHANGED
|
@@ -182,8 +182,70 @@ async def homepage():
|
|
| 182 |
return f.read()
|
| 183 |
|
| 184 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
@app.api()
|
| 186 |
-
@spaces.GPU(duration=
|
| 187 |
def generate_audio(
|
| 188 |
prompt: str,
|
| 189 |
audio_ref: FileData | None,
|
|
|
|
| 182 |
return f.read()
|
| 183 |
|
| 184 |
|
| 185 |
+
_GPU_BASE_S = 10 # bare-minimum window even for a single sentence
|
| 186 |
+
_GPU_PER_SENTENCE_S = 1 # add 1 s per additional sentence
|
| 187 |
+
_GPU_CAP_S = 110 # leave 10 s headroom under ZeroGPU's 120 s ceiling
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
def _count_sentences(prompt: str) -> int:
|
| 191 |
+
"""Count TTS sentences in ``prompt`` using the same quote-aware splitter
|
| 192 |
+
the long-form chunker uses (``src/text_chunker``). Terminators inside
|
| 193 |
+
``"..."`` dialogue do **not** count, so the GPU window calc agrees with
|
| 194 |
+
what the chunker sees — and dialogue-heavy prompts don't get over-budgeted.
|
| 195 |
+
Always returns ≥1 so a single fragment still gets a real window.
|
| 196 |
+
"""
|
| 197 |
+
if not prompt or not prompt.strip():
|
| 198 |
+
return 1
|
| 199 |
+
try:
|
| 200 |
+
from text_chunker import split_sentences_outside_quotes
|
| 201 |
+
n = len(split_sentences_outside_quotes(prompt))
|
| 202 |
+
except Exception:
|
| 203 |
+
# Fallback: cheap punctuation count if the chunker import fails for any
|
| 204 |
+
# reason — preserves the ability to size GPU windows even on a broken
|
| 205 |
+
# import path.
|
| 206 |
+
n = sum(1 for ch in prompt if ch in ".!?")
|
| 207 |
+
return max(1, n)
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
def _gpu_duration(
|
| 211 |
+
prompt: str,
|
| 212 |
+
audio_ref: FileData | None,
|
| 213 |
+
cfg: float,
|
| 214 |
+
stg: float,
|
| 215 |
+
dur_mult: float,
|
| 216 |
+
gen_dur: float,
|
| 217 |
+
ref_dur: float,
|
| 218 |
+
seed: int,
|
| 219 |
+
denoise_ref: bool = True,
|
| 220 |
+
max_chunk_duration: float = 45.0,
|
| 221 |
+
target_chunk_duration: float = 37.0,
|
| 222 |
+
crossfade_ms: float = 50.0,
|
| 223 |
+
) -> int:
|
| 224 |
+
"""Per-call ZeroGPU window sizing.
|
| 225 |
+
|
| 226 |
+
ZeroGPU rejects any static decorator value above the account's per-call
|
| 227 |
+
cap (120 s on PRO), but ``duration=`` also accepts a callable evaluated
|
| 228 |
+
per request — we ask only for what each call needs:
|
| 229 |
+
|
| 230 |
+
window = _GPU_BASE_S + (num_sentences - 1) × _GPU_PER_SENTENCE_S
|
| 231 |
+
|
| 232 |
+
Defaults: 10 s base + 1 s/extra sentence, capped at 110 s (a 10 s safety
|
| 233 |
+
margin under the 120 s ZeroGPU ceiling). Numbers tuned to observed
|
| 234 |
+
runtime on this Space's hardware.
|
| 235 |
+
|
| 236 |
+
Under-allocating is worse than over: if a call exceeds its allocated
|
| 237 |
+
duration ZeroGPU kills it (the user sees a generation failure) **and**
|
| 238 |
+
daily quota is still consumed against the time actually spent. Shorter
|
| 239 |
+
allocations *do* improve queue priority (per HF docs), which is why we
|
| 240 |
+
don't just pin everything at 110.
|
| 241 |
+
"""
|
| 242 |
+
n = _count_sentences(prompt)
|
| 243 |
+
needed = _GPU_BASE_S + (n - 1) * _GPU_PER_SENTENCE_S
|
| 244 |
+
return max(_GPU_BASE_S, min(needed, _GPU_CAP_S))
|
| 245 |
+
|
| 246 |
+
|
| 247 |
@app.api()
|
| 248 |
+
@spaces.GPU(duration=_gpu_duration)
|
| 249 |
def generate_audio(
|
| 250 |
prompt: str,
|
| 251 |
audio_ref: FileData | None,
|