Spaces:
Running on Zero
Running on Zero
multimodalart commited on
Commit ·
ce4f7f1
1
Parent(s): 12aed82
Copy reference audio before processor deletes it; tidy UI into advanced accordion
Browse files
app.py
CHANGED
|
@@ -9,6 +9,7 @@ import asyncio
|
|
| 9 |
import base64
|
| 10 |
import logging
|
| 11 |
import os
|
|
|
|
| 12 |
import sys
|
| 13 |
import tempfile
|
| 14 |
import uuid
|
|
@@ -244,7 +245,15 @@ def generate(
|
|
| 244 |
|
| 245 |
async def patched(url):
|
| 246 |
if url.startswith("file://"):
|
| 247 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 248 |
return await original(url)
|
| 249 |
|
| 250 |
processor._download_reference = patched
|
|
@@ -319,37 +328,38 @@ with gr.Blocks(title="Scenema Audio") as demo:
|
|
| 319 |
lines=2,
|
| 320 |
placeholder='e.g. "A warm male voice with a slight British accent..."',
|
| 321 |
)
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 332 |
scene = gr.Textbox(label="Scene", placeholder="e.g. busy cafe at midday")
|
| 333 |
action = gr.Textbox(label="Performance direction (<action>)")
|
| 334 |
sound_before = gr.Textbox(label="Sound event before speech (<sound>)")
|
| 335 |
-
|
|
|
|
|
|
|
| 336 |
raw_xml = gr.Textbox(
|
| 337 |
-
label="<speak> XML (overrides fields above when set)",
|
| 338 |
lines=4,
|
| 339 |
)
|
| 340 |
-
with gr.Accordion("Voice cloning (optional)", open=False):
|
| 341 |
-
reference_audio = gr.Audio(
|
| 342 |
-
label="Reference voice (10-20s)",
|
| 343 |
-
type="filepath",
|
| 344 |
-
)
|
| 345 |
-
with gr.Row():
|
| 346 |
-
mode = gr.Radio(
|
| 347 |
-
["generate", "voice_design"], value="generate", label="Mode"
|
| 348 |
-
)
|
| 349 |
-
seed = gr.Number(value=42, precision=0, label="Seed (-1 = random)")
|
| 350 |
-
with gr.Row():
|
| 351 |
-
background_sfx = gr.Checkbox(value=False, label="Keep background SFX")
|
| 352 |
-
skip_vc = gr.Checkbox(value=False, label="Skip SeedVC post-processing")
|
| 353 |
run_btn = gr.Button("Generate", variant="primary")
|
| 354 |
with gr.Column(scale=2):
|
| 355 |
out_audio = gr.Audio(label="Output", type="filepath")
|
|
|
|
| 9 |
import base64
|
| 10 |
import logging
|
| 11 |
import os
|
| 12 |
+
import shutil
|
| 13 |
import sys
|
| 14 |
import tempfile
|
| 15 |
import uuid
|
|
|
|
| 245 |
|
| 246 |
async def patched(url):
|
| 247 |
if url.startswith("file://"):
|
| 248 |
+
# Copy to a throwaway temp file — AudioProcessor unlinks
|
| 249 |
+
# ref_wav_path on completion, which would otherwise destroy
|
| 250 |
+
# the user's uploaded gradio file and break subsequent runs.
|
| 251 |
+
src = url[len("file://"):]
|
| 252 |
+
suffix = Path(src).suffix or ".wav"
|
| 253 |
+
tmp = tempfile.NamedTemporaryFile(suffix=suffix, delete=False)
|
| 254 |
+
tmp.close()
|
| 255 |
+
shutil.copyfile(src, tmp.name)
|
| 256 |
+
return tmp.name
|
| 257 |
return await original(url)
|
| 258 |
|
| 259 |
processor._download_reference = patched
|
|
|
|
| 328 |
lines=2,
|
| 329 |
placeholder='e.g. "A warm male voice with a slight British accent..."',
|
| 330 |
)
|
| 331 |
+
gender = gr.Radio(["male", "female"], value="male", label="Gender")
|
| 332 |
+
reference_audio = gr.Audio(
|
| 333 |
+
label="Voice cloning reference (optional, 10-20s)",
|
| 334 |
+
type="filepath",
|
| 335 |
+
)
|
| 336 |
+
with gr.Accordion("Advanced settings", open=False):
|
| 337 |
+
with gr.Row():
|
| 338 |
+
mode = gr.Radio(
|
| 339 |
+
["generate", "voice_design"],
|
| 340 |
+
value="generate",
|
| 341 |
+
label="Mode",
|
| 342 |
+
info="voice_design = 15s voice preview",
|
| 343 |
+
)
|
| 344 |
+
seed = gr.Number(value=42, precision=0, label="Seed (-1 = random)")
|
| 345 |
+
with gr.Row():
|
| 346 |
+
language = gr.Dropdown(
|
| 347 |
+
["en", "es", "fr", "de", "it", "pt", "ja", "zh", "ko"],
|
| 348 |
+
value="en", label="Language",
|
| 349 |
+
)
|
| 350 |
+
shot = gr.Radio(
|
| 351 |
+
["closeup", "wide", "scene"], value="closeup", label="Shot"
|
| 352 |
+
)
|
| 353 |
scene = gr.Textbox(label="Scene", placeholder="e.g. busy cafe at midday")
|
| 354 |
action = gr.Textbox(label="Performance direction (<action>)")
|
| 355 |
sound_before = gr.Textbox(label="Sound event before speech (<sound>)")
|
| 356 |
+
with gr.Row():
|
| 357 |
+
background_sfx = gr.Checkbox(value=False, label="Keep background SFX")
|
| 358 |
+
skip_vc = gr.Checkbox(value=False, label="Skip SeedVC post-processing")
|
| 359 |
raw_xml = gr.Textbox(
|
| 360 |
+
label="Raw <speak> XML (overrides all fields above when set)",
|
| 361 |
lines=4,
|
| 362 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 363 |
run_btn = gr.Button("Generate", variant="primary")
|
| 364 |
with gr.Column(scale=2):
|
| 365 |
out_audio = gr.Audio(label="Output", type="filepath")
|