multimodalart commited on
Commit
ce4f7f1
·
1 Parent(s): 12aed82

Copy reference audio before processor deletes it; tidy UI into advanced accordion

Browse files
Files changed (1) hide show
  1. app.py +36 -26
app.py CHANGED
@@ -9,6 +9,7 @@ import asyncio
9
  import base64
10
  import logging
11
  import os
 
12
  import sys
13
  import tempfile
14
  import uuid
@@ -244,7 +245,15 @@ def generate(
244
 
245
  async def patched(url):
246
  if url.startswith("file://"):
247
- return url[len("file://"):]
 
 
 
 
 
 
 
 
248
  return await original(url)
249
 
250
  processor._download_reference = patched
@@ -319,37 +328,38 @@ with gr.Blocks(title="Scenema Audio") as demo:
319
  lines=2,
320
  placeholder='e.g. "A warm male voice with a slight British accent..."',
321
  )
322
- with gr.Row():
323
- gender = gr.Radio(["male", "female"], value="male", label="Gender")
324
- language = gr.Dropdown(
325
- ["en", "es", "fr", "de", "it", "pt", "ja", "zh", "ko"],
326
- value="en", label="Language",
327
- )
328
- shot = gr.Radio(
329
- ["closeup", "wide", "scene"], value="closeup", label="Shot"
330
- )
331
- with gr.Accordion("Scene & direction (optional)", open=False):
 
 
 
 
 
 
 
 
 
 
 
 
332
  scene = gr.Textbox(label="Scene", placeholder="e.g. busy cafe at midday")
333
  action = gr.Textbox(label="Performance direction (<action>)")
334
  sound_before = gr.Textbox(label="Sound event before speech (<sound>)")
335
- with gr.Accordion("Raw XML override (optional)", open=False):
 
 
336
  raw_xml = gr.Textbox(
337
- label="<speak> XML (overrides fields above when set)",
338
  lines=4,
339
  )
340
- with gr.Accordion("Voice cloning (optional)", open=False):
341
- reference_audio = gr.Audio(
342
- label="Reference voice (10-20s)",
343
- type="filepath",
344
- )
345
- with gr.Row():
346
- mode = gr.Radio(
347
- ["generate", "voice_design"], value="generate", label="Mode"
348
- )
349
- seed = gr.Number(value=42, precision=0, label="Seed (-1 = random)")
350
- with gr.Row():
351
- background_sfx = gr.Checkbox(value=False, label="Keep background SFX")
352
- skip_vc = gr.Checkbox(value=False, label="Skip SeedVC post-processing")
353
  run_btn = gr.Button("Generate", variant="primary")
354
  with gr.Column(scale=2):
355
  out_audio = gr.Audio(label="Output", type="filepath")
 
9
  import base64
10
  import logging
11
  import os
12
+ import shutil
13
  import sys
14
  import tempfile
15
  import uuid
 
245
 
246
  async def patched(url):
247
  if url.startswith("file://"):
248
+ # Copy to a throwaway temp file — AudioProcessor unlinks
249
+ # ref_wav_path on completion, which would otherwise destroy
250
+ # the user's uploaded gradio file and break subsequent runs.
251
+ src = url[len("file://"):]
252
+ suffix = Path(src).suffix or ".wav"
253
+ tmp = tempfile.NamedTemporaryFile(suffix=suffix, delete=False)
254
+ tmp.close()
255
+ shutil.copyfile(src, tmp.name)
256
+ return tmp.name
257
  return await original(url)
258
 
259
  processor._download_reference = patched
 
328
  lines=2,
329
  placeholder='e.g. "A warm male voice with a slight British accent..."',
330
  )
331
+ gender = gr.Radio(["male", "female"], value="male", label="Gender")
332
+ reference_audio = gr.Audio(
333
+ label="Voice cloning reference (optional, 10-20s)",
334
+ type="filepath",
335
+ )
336
+ with gr.Accordion("Advanced settings", open=False):
337
+ with gr.Row():
338
+ mode = gr.Radio(
339
+ ["generate", "voice_design"],
340
+ value="generate",
341
+ label="Mode",
342
+ info="voice_design = 15s voice preview",
343
+ )
344
+ seed = gr.Number(value=42, precision=0, label="Seed (-1 = random)")
345
+ with gr.Row():
346
+ language = gr.Dropdown(
347
+ ["en", "es", "fr", "de", "it", "pt", "ja", "zh", "ko"],
348
+ value="en", label="Language",
349
+ )
350
+ shot = gr.Radio(
351
+ ["closeup", "wide", "scene"], value="closeup", label="Shot"
352
+ )
353
  scene = gr.Textbox(label="Scene", placeholder="e.g. busy cafe at midday")
354
  action = gr.Textbox(label="Performance direction (<action>)")
355
  sound_before = gr.Textbox(label="Sound event before speech (<sound>)")
356
+ with gr.Row():
357
+ background_sfx = gr.Checkbox(value=False, label="Keep background SFX")
358
+ skip_vc = gr.Checkbox(value=False, label="Skip SeedVC post-processing")
359
  raw_xml = gr.Textbox(
360
+ label="Raw <speak> XML (overrides all fields above when set)",
361
  lines=4,
362
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
363
  run_btn = gr.Button("Generate", variant="primary")
364
  with gr.Column(scale=2):
365
  out_audio = gr.Audio(label="Output", type="filepath")