Spaces:
Running on Zero
Running on Zero
Manmay Nakhashi commited on
Commit ·
433ac9f
1
Parent(s): 5cc51a5
Add Reference duration slider (3–30s, default 10s)
Browse filesExposes how many seconds of the uploaded voice reference the model
conditions on. Wired through on_generate -> TTSServer.generate_to_file
(ref_duration kwarg already existed). Examples updated with the new
default value.
app.py
CHANGED
|
@@ -108,7 +108,7 @@ EXAMPLES: list[tuple[str, str, str]] = [
|
|
| 108 |
|
| 109 |
@spaces.GPU(duration=120)
|
| 110 |
def on_generate(prompt: str, audio_ref, cfg: float, stg: float, dur_mult: float,
|
| 111 |
-
gen_dur: float, seed: int):
|
| 112 |
if not prompt or not prompt.strip():
|
| 113 |
raise gr.Error("Prompt is empty.")
|
| 114 |
t0 = time.time()
|
|
@@ -121,6 +121,7 @@ def on_generate(prompt: str, audio_ref, cfg: float, stg: float, dur_mult: float,
|
|
| 121 |
cfg_scale=cfg, stg_scale=stg,
|
| 122 |
duration_multiplier=dur_mult, seed=int(seed),
|
| 123 |
gen_duration=float(gen_dur),
|
|
|
|
| 124 |
)
|
| 125 |
elapsed = time.time() - t0
|
| 126 |
logging.info(f"Generated in {elapsed:.2f}s -> {output}")
|
|
@@ -166,6 +167,9 @@ with gr.Blocks(
|
|
| 166 |
gen_dur_slider = gr.Slider(0.0, 60.0, value=0.0, step=1.0,
|
| 167 |
label="Target duration (s) — 0 = auto from prompt; "
|
| 168 |
"set higher (≥20s) for long-form music or scenes")
|
|
|
|
|
|
|
|
|
|
| 169 |
seed_input = gr.Number(value=42, label="Seed", precision=0)
|
| 170 |
audio_out = gr.Audio(label="Generated audio", type="filepath")
|
| 171 |
with gr.Accordion("Prompt writing guide", open=False):
|
|
@@ -183,7 +187,7 @@ with gr.Blocks(
|
|
| 183 |
gen_btn.click(
|
| 184 |
on_generate,
|
| 185 |
inputs=[prompt_box, audio_ref, cfg_slider, stg_slider,
|
| 186 |
-
dur_slider, gen_dur_slider, seed_input],
|
| 187 |
outputs=[audio_out],
|
| 188 |
)
|
| 189 |
|
|
@@ -192,16 +196,17 @@ with gr.Blocks(
|
|
| 192 |
gr.Examples(
|
| 193 |
label="🎬 Click any row to generate a sample",
|
| 194 |
examples=[
|
| 195 |
-
[name, prompt, voice_path, 2.5, 1.5, 1.1, 0.0, 42]
|
| 196 |
for name, voice_path, prompt in EXAMPLES
|
| 197 |
],
|
| 198 |
example_labels=[name for name, _, _ in EXAMPLES],
|
| 199 |
inputs=[gr.Textbox(visible=False, label="Scene"),
|
| 200 |
prompt_box, audio_ref,
|
| 201 |
-
cfg_slider, stg_slider, dur_slider, gen_dur_slider,
|
|
|
|
| 202 |
outputs=[audio_out],
|
| 203 |
-
fn=lambda _name, prompt, ref, cfg, stg, dur, gen_dur, seed: on_generate(
|
| 204 |
-
prompt, ref, cfg, stg, dur, gen_dur, seed),
|
| 205 |
cache_examples=False,
|
| 206 |
run_on_click=True,
|
| 207 |
examples_per_page=20,
|
|
|
|
| 108 |
|
| 109 |
@spaces.GPU(duration=120)
|
| 110 |
def on_generate(prompt: str, audio_ref, cfg: float, stg: float, dur_mult: float,
|
| 111 |
+
gen_dur: float, ref_dur: float, seed: int):
|
| 112 |
if not prompt or not prompt.strip():
|
| 113 |
raise gr.Error("Prompt is empty.")
|
| 114 |
t0 = time.time()
|
|
|
|
| 121 |
cfg_scale=cfg, stg_scale=stg,
|
| 122 |
duration_multiplier=dur_mult, seed=int(seed),
|
| 123 |
gen_duration=float(gen_dur),
|
| 124 |
+
ref_duration=float(ref_dur),
|
| 125 |
)
|
| 126 |
elapsed = time.time() - t0
|
| 127 |
logging.info(f"Generated in {elapsed:.2f}s -> {output}")
|
|
|
|
| 167 |
gen_dur_slider = gr.Slider(0.0, 60.0, value=0.0, step=1.0,
|
| 168 |
label="Target duration (s) — 0 = auto from prompt; "
|
| 169 |
"set higher (≥20s) for long-form music or scenes")
|
| 170 |
+
ref_dur_slider = gr.Slider(3.0, 30.0, value=10.0, step=1.0,
|
| 171 |
+
label="Reference duration (s) — how many seconds of the "
|
| 172 |
+
"uploaded voice reference the model conditions on")
|
| 173 |
seed_input = gr.Number(value=42, label="Seed", precision=0)
|
| 174 |
audio_out = gr.Audio(label="Generated audio", type="filepath")
|
| 175 |
with gr.Accordion("Prompt writing guide", open=False):
|
|
|
|
| 187 |
gen_btn.click(
|
| 188 |
on_generate,
|
| 189 |
inputs=[prompt_box, audio_ref, cfg_slider, stg_slider,
|
| 190 |
+
dur_slider, gen_dur_slider, ref_dur_slider, seed_input],
|
| 191 |
outputs=[audio_out],
|
| 192 |
)
|
| 193 |
|
|
|
|
| 196 |
gr.Examples(
|
| 197 |
label="🎬 Click any row to generate a sample",
|
| 198 |
examples=[
|
| 199 |
+
[name, prompt, voice_path, 2.5, 1.5, 1.1, 0.0, 10.0, 42]
|
| 200 |
for name, voice_path, prompt in EXAMPLES
|
| 201 |
],
|
| 202 |
example_labels=[name for name, _, _ in EXAMPLES],
|
| 203 |
inputs=[gr.Textbox(visible=False, label="Scene"),
|
| 204 |
prompt_box, audio_ref,
|
| 205 |
+
cfg_slider, stg_slider, dur_slider, gen_dur_slider,
|
| 206 |
+
ref_dur_slider, seed_input],
|
| 207 |
outputs=[audio_out],
|
| 208 |
+
fn=lambda _name, prompt, ref, cfg, stg, dur, gen_dur, ref_dur, seed: on_generate(
|
| 209 |
+
prompt, ref, cfg, stg, dur, gen_dur, ref_dur, seed),
|
| 210 |
cache_examples=False,
|
| 211 |
run_on_click=True,
|
| 212 |
examples_per_page=20,
|