Manmay Nakhashi commited on
Commit
f1c4065
Β·
1 Parent(s): b2203ed

Add 8 voice references + click-to-generate Examples table

Browse files

Bundles short voice ref clips under assets/voices/ matched to each
named scene (villain β†’ Harvey-Keitel-style growl, talk-show wheeze β†’
Conan, etc.). gr.Examples with run_on_click=True fills the prompt +
voice ref inputs and fires generation in one click β€” same UX as the
IndexTTS-2 demo.

.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.wav filter=lfs diff=lfs merge=lfs -text
37
+ *.mp3 filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -45,55 +45,67 @@ def _ensure_tts() -> TTSServer:
45
  return _TTS
46
 
47
 
48
- # ── Example prompts (shown as click-to-fill chips in the UI) ─────────────────
49
- EXAMPLES: list[tuple[str, str]] = [
 
 
 
 
50
  (
51
  "Villain monologue",
 
52
  'A shadowy villain speaks with cold menace, "You have entered my domain, mortal." '
53
  'He chuckles darkly, "Such arrogance will be your undoing." '
54
- 'His voice rises with fury, "Kneel, or be destroyed where you stand!"'
55
  ),
56
  (
57
  "Talk-show host wheeze-laugh",
 
58
  'A talk show host gasps with shock, "No! You did NOT just say that!" '
59
  'He bursts into uncontrollable laughter, "Hahaha! Oh my god, oh my god!" '
60
- 'He wheezes, "I cannot, I literally cannot breathe right now!"'
61
  ),
62
  (
63
  "Tender goodnight whisper",
 
64
  'A woman speaks tenderly, "It has been a long day, my love." '
65
  'She whispers, "Close your eyes. I am right here." '
66
- 'She hums quietly, "Mmmm-mmm. Sleep now."'
67
  ),
68
  (
69
  "Old-school radio anchor",
 
70
  'A radio host clears his throat, "Excuse me, pardon that." '
71
  'He settles into a warm, professional tone, "Good evening everyone, '
72
- 'and welcome back to the show. We have got a wonderful lineup tonight."'
73
  ),
74
  (
75
  "Catgirl uncontrollable giggling",
 
76
  'A playful girl already mid-giggle, "Hehehe, oh my gosh you should see your face!" '
77
  'She gasps for air between giggles, "Oh my, hehe, oh my, I cannot stop!" '
78
- 'She tries to compose herself, "Ahhhhh okay okay okay, I will stop, I promise."'
79
  ),
80
  (
81
  "Hero stammering courage",
 
82
  'A young warrior speaks with a trembling voice, "I... I do not know if I can do this." '
83
  'He takes a shaky breath, "But someone has to try." '
84
- 'His voice steadies with growing fire, "No more running. I WILL fight!"'
85
  ),
86
  (
87
  "Exhausted dad, fraying patience",
 
88
  'An exhausted father speaks with fraying patience, "Sweetie, daddy is asking very nicely." '
89
  'He sighs deeply, "Ohhhh my goodness." '
90
  'He puts on an overly cheerful voice, "Hey buddy! Look at the shiny thing!" '
91
- 'Then he laughs helplessly, "Hahaha, I am losing my mind."'
92
  ),
93
  (
94
  "Smug-confident announcer",
 
95
  'A confident announcer speaks proudly, "And now, the moment you have all been waiting for." '
96
- 'He chuckles knowingly, "Heheh, trust me, this one is going to blow you away."'
97
  ),
98
  ]
99
 
@@ -139,7 +151,7 @@ with gr.Blocks(
139
  with gr.Column(scale=3):
140
  prompt_box = gr.Textbox(
141
  label="Scene prompt",
142
- placeholder=EXAMPLES[0][1],
143
  lines=6, elem_classes=["prompt-box"],
144
  )
145
  audio_ref = gr.Audio(
@@ -173,19 +185,23 @@ with gr.Blocks(
173
  outputs=[audio_out],
174
  )
175
 
176
- # Click-to-run example table β€” fills inputs AND triggers generation.
 
177
  gr.Examples(
178
- label="Example prompts (click any row to generate)",
179
  examples=[
180
- [name, prompt, None, 2.5, 1.5, 1.1, 42]
181
- for name, prompt in EXAMPLES
182
  ],
183
- inputs=[gr.Textbox(visible=False), prompt_box, audio_ref,
 
 
184
  cfg_slider, stg_slider, dur_slider, seed_input],
185
  outputs=[audio_out],
186
  fn=lambda _name, prompt, ref, cfg, stg, dur, seed: on_generate(prompt, ref, cfg, stg, dur, seed),
187
  cache_examples=False,
188
  run_on_click=True,
 
189
  )
190
 
191
 
 
45
  return _TTS
46
 
47
 
48
+ # ── Example prompts shipped with a matching voice reference ──────────────────
49
+ # Files live under assets/voices/ so users can click a row and generate
50
+ # without uploading anything.
51
+ _VOICES_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "voices")
52
+
53
+ EXAMPLES: list[tuple[str, str, str]] = [
54
  (
55
  "Villain monologue",
56
+ os.path.join(_VOICES_DIR, "male_harvey_keitel.mp3"),
57
  'A shadowy villain speaks with cold menace, "You have entered my domain, mortal." '
58
  'He chuckles darkly, "Such arrogance will be your undoing." '
59
+ 'His voice rises with fury, "Kneel, or be destroyed where you stand!"',
60
  ),
61
  (
62
  "Talk-show host wheeze-laugh",
63
+ os.path.join(_VOICES_DIR, "male_conan.mp3"),
64
  'A talk show host gasps with shock, "No! You did NOT just say that!" '
65
  'He bursts into uncontrollable laughter, "Hahaha! Oh my god, oh my god!" '
66
+ 'He wheezes, "I cannot, I literally cannot breathe right now!"',
67
  ),
68
  (
69
  "Tender goodnight whisper",
70
+ os.path.join(_VOICES_DIR, "female_shadowheart.wav"),
71
  'A woman speaks tenderly, "It has been a long day, my love." '
72
  'She whispers, "Close your eyes. I am right here." '
73
+ 'She hums quietly, "Mmmm-mmm. Sleep now."',
74
  ),
75
  (
76
  "Old-school radio anchor",
77
+ os.path.join(_VOICES_DIR, "male_old_movie.wav"),
78
  'A radio host clears his throat, "Excuse me, pardon that." '
79
  'He settles into a warm, professional tone, "Good evening everyone, '
80
+ 'and welcome back to the show. We have got a wonderful lineup tonight."',
81
  ),
82
  (
83
  "Catgirl uncontrollable giggling",
84
+ os.path.join(_VOICES_DIR, "female_american.wav"),
85
  'A playful girl already mid-giggle, "Hehehe, oh my gosh you should see your face!" '
86
  'She gasps for air between giggles, "Oh my, hehe, oh my, I cannot stop!" '
87
+ 'She tries to compose herself, "Ahhhhh okay okay okay, I will stop, I promise."',
88
  ),
89
  (
90
  "Hero stammering courage",
91
+ os.path.join(_VOICES_DIR, "male_arnie.mp3"),
92
  'A young warrior speaks with a trembling voice, "I... I do not know if I can do this." '
93
  'He takes a shaky breath, "But someone has to try." '
94
+ 'His voice steadies with growing fire, "No more running. I WILL fight!"',
95
  ),
96
  (
97
  "Exhausted dad, fraying patience",
98
+ os.path.join(_VOICES_DIR, "male_petergriffin.wav"),
99
  'An exhausted father speaks with fraying patience, "Sweetie, daddy is asking very nicely." '
100
  'He sighs deeply, "Ohhhh my goodness." '
101
  'He puts on an overly cheerful voice, "Hey buddy! Look at the shiny thing!" '
102
+ 'Then he laughs helplessly, "Hahaha, I am losing my mind."',
103
  ),
104
  (
105
  "Smug-confident announcer",
106
+ os.path.join(_VOICES_DIR, "male_samuel_j.mp3"),
107
  'A confident announcer speaks proudly, "And now, the moment you have all been waiting for." '
108
+ 'He chuckles knowingly, "Heheh, trust me, this one is going to blow you away."',
109
  ),
110
  ]
111
 
 
151
  with gr.Column(scale=3):
152
  prompt_box = gr.Textbox(
153
  label="Scene prompt",
154
+ placeholder=EXAMPLES[0][2],
155
  lines=6, elem_classes=["prompt-box"],
156
  )
157
  audio_ref = gr.Audio(
 
185
  outputs=[audio_out],
186
  )
187
 
188
+ # Click-to-generate example table. Each row preloads a paired voice
189
+ # reference + prompt and runs the model immediately.
190
  gr.Examples(
191
+ label="🎬 Click any row to generate a sample",
192
  examples=[
193
+ [name, prompt, voice_path, 2.5, 1.5, 1.1, 42]
194
+ for name, voice_path, prompt in EXAMPLES
195
  ],
196
+ example_labels=[name for name, _, _ in EXAMPLES],
197
+ inputs=[gr.Textbox(visible=False, label="Scene"),
198
+ prompt_box, audio_ref,
199
  cfg_slider, stg_slider, dur_slider, seed_input],
200
  outputs=[audio_out],
201
  fn=lambda _name, prompt, ref, cfg, stg, dur, seed: on_generate(prompt, ref, cfg, stg, dur, seed),
202
  cache_examples=False,
203
  run_on_click=True,
204
+ examples_per_page=20,
205
  )
206
 
207
 
assets/voices/female_american.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:630416506da57ae3b5e8b19a76c18580313aa1a394402ffec670d5e586c69bdb
3
+ size 145916
assets/voices/female_shadowheart.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa9176f2725df675da1e975ca0b6bcf7b817da9a07cd54bab28fbcc47ccbeb7f
3
+ size 2985722
assets/voices/male_arnie.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73d91a1065448f668debdc3adedae02285adba1a48c80b54f75735c439ee2d4f
3
+ size 667826
assets/voices/male_conan.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f67ac79623d99dca70019a73e0baa442d686104592dbd6424dcc51d74cc478a4
3
+ size 475356
assets/voices/male_harvey_keitel.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:525dc4fddcb679233314b69777df82b4758a20a4a339827e37608b947532997b
3
+ size 481015
assets/voices/male_old_movie.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:391f0a4bef6faedc1100f457bdcfa7e71109a50adb8023cf1e4921c42eadefc8
3
+ size 625916
assets/voices/male_petergriffin.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0a8b708aee90c7dde4eed747ca0b453456b742650699c26fa6ee4e98c8cee0e
3
+ size 486882
assets/voices/male_samuel_j.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad9d85b386f1be92d422676ddbd41ce9df2bc05f55cf8e0fcdfbd96a768d589a
3
+ size 271351