Dramabox

Running on Zero

Manmay Nakhashi commited on 27 days ago

Commit

f1c4065

1 Parent(s): b2203ed

Add 8 voice references + click-to-generate Examples table

Bundles short voice ref clips under assets/voices/ matched to each
named scene (villain → Harvey-Keitel-style growl, talk-show wheeze →
Conan, etc.). gr.Examples with run_on_click=True fills the prompt +
voice ref inputs and fires generation in one click — same UX as the
IndexTTS-2 demo.

Files changed (10) hide show

.gitattributes +2 -0
app.py +32 -16
assets/voices/female_american.wav +3 -0
assets/voices/female_shadowheart.wav +3 -0
assets/voices/male_arnie.mp3 +3 -0
assets/voices/male_conan.mp3 +3 -0
assets/voices/male_harvey_keitel.mp3 +3 -0
assets/voices/male_old_movie.wav +3 -0
assets/voices/male_petergriffin.wav +3 -0
assets/voices/male_samuel_j.mp3 +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.wav filter=lfs diff=lfs merge=lfs -text
+*.mp3 filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -45,55 +45,67 @@ def _ensure_tts() -> TTSServer:
     return _TTS
-# ── Example prompts (shown as click-to-fill chips in the UI) ─────────────────
-EXAMPLES: list[tuple[str, str]] = [
     (
         "Villain monologue",
         'A shadowy villain speaks with cold menace, "You have entered my domain, mortal." '
         'He chuckles darkly, "Such arrogance will be your undoing." '
-        'His voice rises with fury, "Kneel, or be destroyed where you stand!"'
     ),
     (
         "Talk-show host wheeze-laugh",
         'A talk show host gasps with shock, "No! You did NOT just say that!" '
         'He bursts into uncontrollable laughter, "Hahaha! Oh my god, oh my god!" '
-        'He wheezes, "I cannot, I literally cannot breathe right now!"'
     ),
     (
         "Tender goodnight whisper",
         'A woman speaks tenderly, "It has been a long day, my love." '
         'She whispers, "Close your eyes. I am right here." '
-        'She hums quietly, "Mmmm-mmm. Sleep now."'
     ),
     (
         "Old-school radio anchor",
         'A radio host clears his throat, "Excuse me, pardon that." '
         'He settles into a warm, professional tone, "Good evening everyone, '
-        'and welcome back to the show. We have got a wonderful lineup tonight."'
     ),
     (
         "Catgirl uncontrollable giggling",
         'A playful girl already mid-giggle, "Hehehe, oh my gosh you should see your face!" '
         'She gasps for air between giggles, "Oh my, hehe, oh my, I cannot stop!" '
-        'She tries to compose herself, "Ahhhhh okay okay okay, I will stop, I promise."'
     ),
     (
         "Hero stammering courage",
         'A young warrior speaks with a trembling voice, "I... I do not know if I can do this." '
         'He takes a shaky breath, "But someone has to try." '
-        'His voice steadies with growing fire, "No more running. I WILL fight!"'
     ),
     (
         "Exhausted dad, fraying patience",
         'An exhausted father speaks with fraying patience, "Sweetie, daddy is asking very nicely." '
         'He sighs deeply, "Ohhhh my goodness." '
         'He puts on an overly cheerful voice, "Hey buddy! Look at the shiny thing!" '
-        'Then he laughs helplessly, "Hahaha, I am losing my mind."'
     ),
     (
         "Smug-confident announcer",
         'A confident announcer speaks proudly, "And now, the moment you have all been waiting for." '
-        'He chuckles knowingly, "Heheh, trust me, this one is going to blow you away."'
     ),
 ]
@@ -139,7 +151,7 @@ with gr.Blocks(
         with gr.Column(scale=3):
             prompt_box = gr.Textbox(
                 label="Scene prompt",
-                placeholder=EXAMPLES[0][1],
                 lines=6, elem_classes=["prompt-box"],
             )
             audio_ref = gr.Audio(
@@ -173,19 +185,23 @@ with gr.Blocks(
         outputs=[audio_out],
     )
-    # Click-to-run example table — fills inputs AND triggers generation.
     gr.Examples(
-        label="Example prompts (click any row to generate)",
         examples=[
-            [name, prompt, None, 2.5, 1.5, 1.1, 42]
-            for name, prompt in EXAMPLES
         ],
-        inputs=[gr.Textbox(visible=False), prompt_box, audio_ref,
                 cfg_slider, stg_slider, dur_slider, seed_input],
         outputs=[audio_out],
         fn=lambda _name, prompt, ref, cfg, stg, dur, seed: on_generate(prompt, ref, cfg, stg, dur, seed),
         cache_examples=False,
         run_on_click=True,
     )

     return _TTS
+# ── Example prompts shipped with a matching voice reference ──────────────────
+# Files live under assets/voices/ so users can click a row and generate
+# without uploading anything.
+_VOICES_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "voices")
+EXAMPLES: list[tuple[str, str, str]] = [
     (
         "Villain monologue",
+        os.path.join(_VOICES_DIR, "male_harvey_keitel.mp3"),
         'A shadowy villain speaks with cold menace, "You have entered my domain, mortal." '
         'He chuckles darkly, "Such arrogance will be your undoing." '
+        'His voice rises with fury, "Kneel, or be destroyed where you stand!"',
     ),
     (
         "Talk-show host wheeze-laugh",
+        os.path.join(_VOICES_DIR, "male_conan.mp3"),
         'A talk show host gasps with shock, "No! You did NOT just say that!" '
         'He bursts into uncontrollable laughter, "Hahaha! Oh my god, oh my god!" '
+        'He wheezes, "I cannot, I literally cannot breathe right now!"',
     ),
     (
         "Tender goodnight whisper",
+        os.path.join(_VOICES_DIR, "female_shadowheart.wav"),
         'A woman speaks tenderly, "It has been a long day, my love." '
         'She whispers, "Close your eyes. I am right here." '
+        'She hums quietly, "Mmmm-mmm. Sleep now."',
     ),
     (
         "Old-school radio anchor",
+        os.path.join(_VOICES_DIR, "male_old_movie.wav"),
         'A radio host clears his throat, "Excuse me, pardon that." '
         'He settles into a warm, professional tone, "Good evening everyone, '
+        'and welcome back to the show. We have got a wonderful lineup tonight."',
     ),
     (
         "Catgirl uncontrollable giggling",
+        os.path.join(_VOICES_DIR, "female_american.wav"),
         'A playful girl already mid-giggle, "Hehehe, oh my gosh you should see your face!" '
         'She gasps for air between giggles, "Oh my, hehe, oh my, I cannot stop!" '
+        'She tries to compose herself, "Ahhhhh okay okay okay, I will stop, I promise."',
     ),
     (
         "Hero stammering courage",
+        os.path.join(_VOICES_DIR, "male_arnie.mp3"),
         'A young warrior speaks with a trembling voice, "I... I do not know if I can do this." '
         'He takes a shaky breath, "But someone has to try." '
+        'His voice steadies with growing fire, "No more running. I WILL fight!"',
     ),
     (
         "Exhausted dad, fraying patience",
+        os.path.join(_VOICES_DIR, "male_petergriffin.wav"),
         'An exhausted father speaks with fraying patience, "Sweetie, daddy is asking very nicely." '
         'He sighs deeply, "Ohhhh my goodness." '
         'He puts on an overly cheerful voice, "Hey buddy! Look at the shiny thing!" '
+        'Then he laughs helplessly, "Hahaha, I am losing my mind."',
     ),
     (
         "Smug-confident announcer",
+        os.path.join(_VOICES_DIR, "male_samuel_j.mp3"),
         'A confident announcer speaks proudly, "And now, the moment you have all been waiting for." '
+        'He chuckles knowingly, "Heheh, trust me, this one is going to blow you away."',
     ),
 ]
         with gr.Column(scale=3):
             prompt_box = gr.Textbox(
                 label="Scene prompt",
+                placeholder=EXAMPLES[0][2],
                 lines=6, elem_classes=["prompt-box"],
             )
             audio_ref = gr.Audio(
         outputs=[audio_out],
     )
+    # Click-to-generate example table. Each row preloads a paired voice
+    # reference + prompt and runs the model immediately.
     gr.Examples(
+        label="🎬 Click any row to generate a sample",
         examples=[
+            [name, prompt, voice_path, 2.5, 1.5, 1.1, 42]
+            for name, voice_path, prompt in EXAMPLES
         ],
+        example_labels=[name for name, _, _ in EXAMPLES],
+        inputs=[gr.Textbox(visible=False, label="Scene"),
+                prompt_box, audio_ref,
                 cfg_slider, stg_slider, dur_slider, seed_input],
         outputs=[audio_out],
         fn=lambda _name, prompt, ref, cfg, stg, dur, seed: on_generate(prompt, ref, cfg, stg, dur, seed),
         cache_examples=False,
         run_on_click=True,
+        examples_per_page=20,
     )

assets/voices/female_american.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:630416506da57ae3b5e8b19a76c18580313aa1a394402ffec670d5e586c69bdb
+size 145916

assets/voices/female_shadowheart.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fa9176f2725df675da1e975ca0b6bcf7b817da9a07cd54bab28fbcc47ccbeb7f
+size 2985722

assets/voices/male_arnie.mp3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:73d91a1065448f668debdc3adedae02285adba1a48c80b54f75735c439ee2d4f
+size 667826

assets/voices/male_conan.mp3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f67ac79623d99dca70019a73e0baa442d686104592dbd6424dcc51d74cc478a4
+size 475356

assets/voices/male_harvey_keitel.mp3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:525dc4fddcb679233314b69777df82b4758a20a4a339827e37608b947532997b
+size 481015

assets/voices/male_old_movie.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:391f0a4bef6faedc1100f457bdcfa7e71109a50adb8023cf1e4921c42eadefc8
+size 625916

assets/voices/male_petergriffin.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f0a8b708aee90c7dde4eed747ca0b453456b742650699c26fa6ee4e98c8cee0e
+size 486882

assets/voices/male_samuel_j.mp3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ad9d85b386f1be92d422676ddbd41ce9df2bc05f55cf8e0fcdfbd96a768d589a
+size 271351