tedi-resemble Manmay commited on
Commit
48d32ab
·
1 Parent(s): e44cca0

Fix ZeroGPU duration: dynamic per-sentence sizing, cap at 120s (#3)

Browse files

- Fix ZeroGPU duration: dynamic per-sentence sizing, cap at 120s (d621c9389cc82dd28fddd421518e8937c72cac60)
- Tighten GPU window: 10s base + 1s/sentence, quote-aware count (fc8ba6b960b627a6efce811ce1ea509f8d29ca18)


Co-authored-by: Manmay Nakhashi <Manmay@users.noreply.huggingface.co>

Files changed (1) hide show
  1. app.py +63 -1
app.py CHANGED
@@ -182,8 +182,70 @@ async def homepage():
182
  return f.read()
183
 
184
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  @app.api()
186
- @spaces.GPU(duration=600)
187
  def generate_audio(
188
  prompt: str,
189
  audio_ref: FileData | None,
 
182
  return f.read()
183
 
184
 
185
+ _GPU_BASE_S = 10 # bare-minimum window even for a single sentence
186
+ _GPU_PER_SENTENCE_S = 1 # add 1 s per additional sentence
187
+ _GPU_CAP_S = 110 # leave 10 s headroom under ZeroGPU's 120 s ceiling
188
+
189
+
190
+ def _count_sentences(prompt: str) -> int:
191
+ """Count TTS sentences in ``prompt`` using the same quote-aware splitter
192
+ the long-form chunker uses (``src/text_chunker``). Terminators inside
193
+ ``"..."`` dialogue do **not** count, so the GPU window calc agrees with
194
+ what the chunker sees — and dialogue-heavy prompts don't get over-budgeted.
195
+ Always returns ≥1 so a single fragment still gets a real window.
196
+ """
197
+ if not prompt or not prompt.strip():
198
+ return 1
199
+ try:
200
+ from text_chunker import split_sentences_outside_quotes
201
+ n = len(split_sentences_outside_quotes(prompt))
202
+ except Exception:
203
+ # Fallback: cheap punctuation count if the chunker import fails for any
204
+ # reason — preserves the ability to size GPU windows even on a broken
205
+ # import path.
206
+ n = sum(1 for ch in prompt if ch in ".!?")
207
+ return max(1, n)
208
+
209
+
210
+ def _gpu_duration(
211
+ prompt: str,
212
+ audio_ref: FileData | None,
213
+ cfg: float,
214
+ stg: float,
215
+ dur_mult: float,
216
+ gen_dur: float,
217
+ ref_dur: float,
218
+ seed: int,
219
+ denoise_ref: bool = True,
220
+ max_chunk_duration: float = 45.0,
221
+ target_chunk_duration: float = 37.0,
222
+ crossfade_ms: float = 50.0,
223
+ ) -> int:
224
+ """Per-call ZeroGPU window sizing.
225
+
226
+ ZeroGPU rejects any static decorator value above the account's per-call
227
+ cap (120 s on PRO), but ``duration=`` also accepts a callable evaluated
228
+ per request — we ask only for what each call needs:
229
+
230
+ window = _GPU_BASE_S + (num_sentences - 1) × _GPU_PER_SENTENCE_S
231
+
232
+ Defaults: 10 s base + 1 s/extra sentence, capped at 110 s (a 10 s safety
233
+ margin under the 120 s ZeroGPU ceiling). Numbers tuned to observed
234
+ runtime on this Space's hardware.
235
+
236
+ Under-allocating is worse than over: if a call exceeds its allocated
237
+ duration ZeroGPU kills it (the user sees a generation failure) **and**
238
+ daily quota is still consumed against the time actually spent. Shorter
239
+ allocations *do* improve queue priority (per HF docs), which is why we
240
+ don't just pin everything at 110.
241
+ """
242
+ n = _count_sentences(prompt)
243
+ needed = _GPU_BASE_S + (n - 1) * _GPU_PER_SENTENCE_S
244
+ return max(_GPU_BASE_S, min(needed, _GPU_CAP_S))
245
+
246
+
247
  @app.api()
248
+ @spaces.GPU(duration=_gpu_duration)
249
  def generate_audio(
250
  prompt: str,
251
  audio_ref: FileData | None,