Manmay Nakhashi commited on
Commit
433ac9f
·
1 Parent(s): 5cc51a5

Add Reference duration slider (3–30s, default 10s)

Browse files

Exposes how many seconds of the uploaded voice reference the model
conditions on. Wired through on_generate -> TTSServer.generate_to_file
(ref_duration kwarg already existed). Examples updated with the new
default value.

Files changed (1) hide show
  1. app.py +11 -6
app.py CHANGED
@@ -108,7 +108,7 @@ EXAMPLES: list[tuple[str, str, str]] = [
108
 
109
  @spaces.GPU(duration=120)
110
  def on_generate(prompt: str, audio_ref, cfg: float, stg: float, dur_mult: float,
111
- gen_dur: float, seed: int):
112
  if not prompt or not prompt.strip():
113
  raise gr.Error("Prompt is empty.")
114
  t0 = time.time()
@@ -121,6 +121,7 @@ def on_generate(prompt: str, audio_ref, cfg: float, stg: float, dur_mult: float,
121
  cfg_scale=cfg, stg_scale=stg,
122
  duration_multiplier=dur_mult, seed=int(seed),
123
  gen_duration=float(gen_dur),
 
124
  )
125
  elapsed = time.time() - t0
126
  logging.info(f"Generated in {elapsed:.2f}s -> {output}")
@@ -166,6 +167,9 @@ with gr.Blocks(
166
  gen_dur_slider = gr.Slider(0.0, 60.0, value=0.0, step=1.0,
167
  label="Target duration (s) — 0 = auto from prompt; "
168
  "set higher (≥20s) for long-form music or scenes")
 
 
 
169
  seed_input = gr.Number(value=42, label="Seed", precision=0)
170
  audio_out = gr.Audio(label="Generated audio", type="filepath")
171
  with gr.Accordion("Prompt writing guide", open=False):
@@ -183,7 +187,7 @@ with gr.Blocks(
183
  gen_btn.click(
184
  on_generate,
185
  inputs=[prompt_box, audio_ref, cfg_slider, stg_slider,
186
- dur_slider, gen_dur_slider, seed_input],
187
  outputs=[audio_out],
188
  )
189
 
@@ -192,16 +196,17 @@ with gr.Blocks(
192
  gr.Examples(
193
  label="🎬 Click any row to generate a sample",
194
  examples=[
195
- [name, prompt, voice_path, 2.5, 1.5, 1.1, 0.0, 42]
196
  for name, voice_path, prompt in EXAMPLES
197
  ],
198
  example_labels=[name for name, _, _ in EXAMPLES],
199
  inputs=[gr.Textbox(visible=False, label="Scene"),
200
  prompt_box, audio_ref,
201
- cfg_slider, stg_slider, dur_slider, gen_dur_slider, seed_input],
 
202
  outputs=[audio_out],
203
- fn=lambda _name, prompt, ref, cfg, stg, dur, gen_dur, seed: on_generate(
204
- prompt, ref, cfg, stg, dur, gen_dur, seed),
205
  cache_examples=False,
206
  run_on_click=True,
207
  examples_per_page=20,
 
108
 
109
  @spaces.GPU(duration=120)
110
  def on_generate(prompt: str, audio_ref, cfg: float, stg: float, dur_mult: float,
111
+ gen_dur: float, ref_dur: float, seed: int):
112
  if not prompt or not prompt.strip():
113
  raise gr.Error("Prompt is empty.")
114
  t0 = time.time()
 
121
  cfg_scale=cfg, stg_scale=stg,
122
  duration_multiplier=dur_mult, seed=int(seed),
123
  gen_duration=float(gen_dur),
124
+ ref_duration=float(ref_dur),
125
  )
126
  elapsed = time.time() - t0
127
  logging.info(f"Generated in {elapsed:.2f}s -> {output}")
 
167
  gen_dur_slider = gr.Slider(0.0, 60.0, value=0.0, step=1.0,
168
  label="Target duration (s) — 0 = auto from prompt; "
169
  "set higher (≥20s) for long-form music or scenes")
170
+ ref_dur_slider = gr.Slider(3.0, 30.0, value=10.0, step=1.0,
171
+ label="Reference duration (s) — how many seconds of the "
172
+ "uploaded voice reference the model conditions on")
173
  seed_input = gr.Number(value=42, label="Seed", precision=0)
174
  audio_out = gr.Audio(label="Generated audio", type="filepath")
175
  with gr.Accordion("Prompt writing guide", open=False):
 
187
  gen_btn.click(
188
  on_generate,
189
  inputs=[prompt_box, audio_ref, cfg_slider, stg_slider,
190
+ dur_slider, gen_dur_slider, ref_dur_slider, seed_input],
191
  outputs=[audio_out],
192
  )
193
 
 
196
  gr.Examples(
197
  label="🎬 Click any row to generate a sample",
198
  examples=[
199
+ [name, prompt, voice_path, 2.5, 1.5, 1.1, 0.0, 10.0, 42]
200
  for name, voice_path, prompt in EXAMPLES
201
  ],
202
  example_labels=[name for name, _, _ in EXAMPLES],
203
  inputs=[gr.Textbox(visible=False, label="Scene"),
204
  prompt_box, audio_ref,
205
+ cfg_slider, stg_slider, dur_slider, gen_dur_slider,
206
+ ref_dur_slider, seed_input],
207
  outputs=[audio_out],
208
+ fn=lambda _name, prompt, ref, cfg, stg, dur, gen_dur, ref_dur, seed: on_generate(
209
+ prompt, ref, cfg, stg, dur, gen_dur, ref_dur, seed),
210
  cache_examples=False,
211
  run_on_click=True,
212
  examples_per_page=20,