Manmay Nakhashi commited on
Commit Β·
f1c4065
1
Parent(s): b2203ed
Add 8 voice references + click-to-generate Examples table
Browse filesBundles short voice ref clips under assets/voices/ matched to each
named scene (villain β Harvey-Keitel-style growl, talk-show wheeze β
Conan, etc.). gr.Examples with run_on_click=True fills the prompt +
voice ref inputs and fires generation in one click β same UX as the
IndexTTS-2 demo.
- .gitattributes +2 -0
- app.py +32 -16
- assets/voices/female_american.wav +3 -0
- assets/voices/female_shadowheart.wav +3 -0
- assets/voices/male_arnie.mp3 +3 -0
- assets/voices/male_conan.mp3 +3 -0
- assets/voices/male_harvey_keitel.mp3 +3 -0
- assets/voices/male_old_movie.wav +3 -0
- assets/voices/male_petergriffin.wav +3 -0
- assets/voices/male_samuel_j.mp3 +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.wav filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
*.mp3 filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
|
@@ -45,55 +45,67 @@ def _ensure_tts() -> TTSServer:
|
|
| 45 |
return _TTS
|
| 46 |
|
| 47 |
|
| 48 |
-
# ββ Example prompts
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
(
|
| 51 |
"Villain monologue",
|
|
|
|
| 52 |
'A shadowy villain speaks with cold menace, "You have entered my domain, mortal." '
|
| 53 |
'He chuckles darkly, "Such arrogance will be your undoing." '
|
| 54 |
-
'His voice rises with fury, "Kneel, or be destroyed where you stand!"'
|
| 55 |
),
|
| 56 |
(
|
| 57 |
"Talk-show host wheeze-laugh",
|
|
|
|
| 58 |
'A talk show host gasps with shock, "No! You did NOT just say that!" '
|
| 59 |
'He bursts into uncontrollable laughter, "Hahaha! Oh my god, oh my god!" '
|
| 60 |
-
'He wheezes, "I cannot, I literally cannot breathe right now!"'
|
| 61 |
),
|
| 62 |
(
|
| 63 |
"Tender goodnight whisper",
|
|
|
|
| 64 |
'A woman speaks tenderly, "It has been a long day, my love." '
|
| 65 |
'She whispers, "Close your eyes. I am right here." '
|
| 66 |
-
'She hums quietly, "Mmmm-mmm. Sleep now."'
|
| 67 |
),
|
| 68 |
(
|
| 69 |
"Old-school radio anchor",
|
|
|
|
| 70 |
'A radio host clears his throat, "Excuse me, pardon that." '
|
| 71 |
'He settles into a warm, professional tone, "Good evening everyone, '
|
| 72 |
-
'and welcome back to the show. We have got a wonderful lineup tonight."'
|
| 73 |
),
|
| 74 |
(
|
| 75 |
"Catgirl uncontrollable giggling",
|
|
|
|
| 76 |
'A playful girl already mid-giggle, "Hehehe, oh my gosh you should see your face!" '
|
| 77 |
'She gasps for air between giggles, "Oh my, hehe, oh my, I cannot stop!" '
|
| 78 |
-
'She tries to compose herself, "Ahhhhh okay okay okay, I will stop, I promise."'
|
| 79 |
),
|
| 80 |
(
|
| 81 |
"Hero stammering courage",
|
|
|
|
| 82 |
'A young warrior speaks with a trembling voice, "I... I do not know if I can do this." '
|
| 83 |
'He takes a shaky breath, "But someone has to try." '
|
| 84 |
-
'His voice steadies with growing fire, "No more running. I WILL fight!"'
|
| 85 |
),
|
| 86 |
(
|
| 87 |
"Exhausted dad, fraying patience",
|
|
|
|
| 88 |
'An exhausted father speaks with fraying patience, "Sweetie, daddy is asking very nicely." '
|
| 89 |
'He sighs deeply, "Ohhhh my goodness." '
|
| 90 |
'He puts on an overly cheerful voice, "Hey buddy! Look at the shiny thing!" '
|
| 91 |
-
'Then he laughs helplessly, "Hahaha, I am losing my mind."'
|
| 92 |
),
|
| 93 |
(
|
| 94 |
"Smug-confident announcer",
|
|
|
|
| 95 |
'A confident announcer speaks proudly, "And now, the moment you have all been waiting for." '
|
| 96 |
-
'He chuckles knowingly, "Heheh, trust me, this one is going to blow you away."'
|
| 97 |
),
|
| 98 |
]
|
| 99 |
|
|
@@ -139,7 +151,7 @@ with gr.Blocks(
|
|
| 139 |
with gr.Column(scale=3):
|
| 140 |
prompt_box = gr.Textbox(
|
| 141 |
label="Scene prompt",
|
| 142 |
-
placeholder=EXAMPLES[0][
|
| 143 |
lines=6, elem_classes=["prompt-box"],
|
| 144 |
)
|
| 145 |
audio_ref = gr.Audio(
|
|
@@ -173,19 +185,23 @@ with gr.Blocks(
|
|
| 173 |
outputs=[audio_out],
|
| 174 |
)
|
| 175 |
|
| 176 |
-
# Click-to-
|
|
|
|
| 177 |
gr.Examples(
|
| 178 |
-
label="
|
| 179 |
examples=[
|
| 180 |
-
[name, prompt,
|
| 181 |
-
for name, prompt in EXAMPLES
|
| 182 |
],
|
| 183 |
-
|
|
|
|
|
|
|
| 184 |
cfg_slider, stg_slider, dur_slider, seed_input],
|
| 185 |
outputs=[audio_out],
|
| 186 |
fn=lambda _name, prompt, ref, cfg, stg, dur, seed: on_generate(prompt, ref, cfg, stg, dur, seed),
|
| 187 |
cache_examples=False,
|
| 188 |
run_on_click=True,
|
|
|
|
| 189 |
)
|
| 190 |
|
| 191 |
|
|
|
|
| 45 |
return _TTS
|
| 46 |
|
| 47 |
|
| 48 |
+
# ββ Example prompts shipped with a matching voice reference ββββββββββββββββββ
|
| 49 |
+
# Files live under assets/voices/ so users can click a row and generate
|
| 50 |
+
# without uploading anything.
|
| 51 |
+
_VOICES_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "voices")
|
| 52 |
+
|
| 53 |
+
EXAMPLES: list[tuple[str, str, str]] = [
|
| 54 |
(
|
| 55 |
"Villain monologue",
|
| 56 |
+
os.path.join(_VOICES_DIR, "male_harvey_keitel.mp3"),
|
| 57 |
'A shadowy villain speaks with cold menace, "You have entered my domain, mortal." '
|
| 58 |
'He chuckles darkly, "Such arrogance will be your undoing." '
|
| 59 |
+
'His voice rises with fury, "Kneel, or be destroyed where you stand!"',
|
| 60 |
),
|
| 61 |
(
|
| 62 |
"Talk-show host wheeze-laugh",
|
| 63 |
+
os.path.join(_VOICES_DIR, "male_conan.mp3"),
|
| 64 |
'A talk show host gasps with shock, "No! You did NOT just say that!" '
|
| 65 |
'He bursts into uncontrollable laughter, "Hahaha! Oh my god, oh my god!" '
|
| 66 |
+
'He wheezes, "I cannot, I literally cannot breathe right now!"',
|
| 67 |
),
|
| 68 |
(
|
| 69 |
"Tender goodnight whisper",
|
| 70 |
+
os.path.join(_VOICES_DIR, "female_shadowheart.wav"),
|
| 71 |
'A woman speaks tenderly, "It has been a long day, my love." '
|
| 72 |
'She whispers, "Close your eyes. I am right here." '
|
| 73 |
+
'She hums quietly, "Mmmm-mmm. Sleep now."',
|
| 74 |
),
|
| 75 |
(
|
| 76 |
"Old-school radio anchor",
|
| 77 |
+
os.path.join(_VOICES_DIR, "male_old_movie.wav"),
|
| 78 |
'A radio host clears his throat, "Excuse me, pardon that." '
|
| 79 |
'He settles into a warm, professional tone, "Good evening everyone, '
|
| 80 |
+
'and welcome back to the show. We have got a wonderful lineup tonight."',
|
| 81 |
),
|
| 82 |
(
|
| 83 |
"Catgirl uncontrollable giggling",
|
| 84 |
+
os.path.join(_VOICES_DIR, "female_american.wav"),
|
| 85 |
'A playful girl already mid-giggle, "Hehehe, oh my gosh you should see your face!" '
|
| 86 |
'She gasps for air between giggles, "Oh my, hehe, oh my, I cannot stop!" '
|
| 87 |
+
'She tries to compose herself, "Ahhhhh okay okay okay, I will stop, I promise."',
|
| 88 |
),
|
| 89 |
(
|
| 90 |
"Hero stammering courage",
|
| 91 |
+
os.path.join(_VOICES_DIR, "male_arnie.mp3"),
|
| 92 |
'A young warrior speaks with a trembling voice, "I... I do not know if I can do this." '
|
| 93 |
'He takes a shaky breath, "But someone has to try." '
|
| 94 |
+
'His voice steadies with growing fire, "No more running. I WILL fight!"',
|
| 95 |
),
|
| 96 |
(
|
| 97 |
"Exhausted dad, fraying patience",
|
| 98 |
+
os.path.join(_VOICES_DIR, "male_petergriffin.wav"),
|
| 99 |
'An exhausted father speaks with fraying patience, "Sweetie, daddy is asking very nicely." '
|
| 100 |
'He sighs deeply, "Ohhhh my goodness." '
|
| 101 |
'He puts on an overly cheerful voice, "Hey buddy! Look at the shiny thing!" '
|
| 102 |
+
'Then he laughs helplessly, "Hahaha, I am losing my mind."',
|
| 103 |
),
|
| 104 |
(
|
| 105 |
"Smug-confident announcer",
|
| 106 |
+
os.path.join(_VOICES_DIR, "male_samuel_j.mp3"),
|
| 107 |
'A confident announcer speaks proudly, "And now, the moment you have all been waiting for." '
|
| 108 |
+
'He chuckles knowingly, "Heheh, trust me, this one is going to blow you away."',
|
| 109 |
),
|
| 110 |
]
|
| 111 |
|
|
|
|
| 151 |
with gr.Column(scale=3):
|
| 152 |
prompt_box = gr.Textbox(
|
| 153 |
label="Scene prompt",
|
| 154 |
+
placeholder=EXAMPLES[0][2],
|
| 155 |
lines=6, elem_classes=["prompt-box"],
|
| 156 |
)
|
| 157 |
audio_ref = gr.Audio(
|
|
|
|
| 185 |
outputs=[audio_out],
|
| 186 |
)
|
| 187 |
|
| 188 |
+
# Click-to-generate example table. Each row preloads a paired voice
|
| 189 |
+
# reference + prompt and runs the model immediately.
|
| 190 |
gr.Examples(
|
| 191 |
+
label="π¬ Click any row to generate a sample",
|
| 192 |
examples=[
|
| 193 |
+
[name, prompt, voice_path, 2.5, 1.5, 1.1, 42]
|
| 194 |
+
for name, voice_path, prompt in EXAMPLES
|
| 195 |
],
|
| 196 |
+
example_labels=[name for name, _, _ in EXAMPLES],
|
| 197 |
+
inputs=[gr.Textbox(visible=False, label="Scene"),
|
| 198 |
+
prompt_box, audio_ref,
|
| 199 |
cfg_slider, stg_slider, dur_slider, seed_input],
|
| 200 |
outputs=[audio_out],
|
| 201 |
fn=lambda _name, prompt, ref, cfg, stg, dur, seed: on_generate(prompt, ref, cfg, stg, dur, seed),
|
| 202 |
cache_examples=False,
|
| 203 |
run_on_click=True,
|
| 204 |
+
examples_per_page=20,
|
| 205 |
)
|
| 206 |
|
| 207 |
|
assets/voices/female_american.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:630416506da57ae3b5e8b19a76c18580313aa1a394402ffec670d5e586c69bdb
|
| 3 |
+
size 145916
|
assets/voices/female_shadowheart.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa9176f2725df675da1e975ca0b6bcf7b817da9a07cd54bab28fbcc47ccbeb7f
|
| 3 |
+
size 2985722
|
assets/voices/male_arnie.mp3
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:73d91a1065448f668debdc3adedae02285adba1a48c80b54f75735c439ee2d4f
|
| 3 |
+
size 667826
|
assets/voices/male_conan.mp3
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f67ac79623d99dca70019a73e0baa442d686104592dbd6424dcc51d74cc478a4
|
| 3 |
+
size 475356
|
assets/voices/male_harvey_keitel.mp3
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:525dc4fddcb679233314b69777df82b4758a20a4a339827e37608b947532997b
|
| 3 |
+
size 481015
|
assets/voices/male_old_movie.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:391f0a4bef6faedc1100f457bdcfa7e71109a50adb8023cf1e4921c42eadefc8
|
| 3 |
+
size 625916
|
assets/voices/male_petergriffin.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f0a8b708aee90c7dde4eed747ca0b453456b742650699c26fa6ee4e98c8cee0e
|
| 3 |
+
size 486882
|
assets/voices/male_samuel_j.mp3
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ad9d85b386f1be92d422676ddbd41ce9df2bc05f55cf8e0fcdfbd96a768d589a
|
| 3 |
+
size 271351
|