PatnaikAshish commited on
Commit
3831cc8
Β·
verified Β·
1 Parent(s): 66525f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +148 -73
app.py CHANGED
@@ -1,21 +1,21 @@
1
  import gradio as gr
 
 
2
  from core.cloner import KokoClone
3
 
4
  # 1. Initialize the cloner globally so models load only once when the server starts
5
  print("Loading KokoClone models for the Web UI...")
6
  cloner = KokoClone()
7
-
8
  def clone_voice(text, lang, ref_audio_path):
9
- """Gradio prediction function."""
10
  if not text or not text.strip():
11
  raise gr.Error("Please enter some text.")
12
  if not ref_audio_path:
13
  raise gr.Error("Please upload or record a reference audio file.")
14
-
15
  output_file = "gradio_output.wav"
16
-
17
  try:
18
- # Call the core engine
19
  cloner.generate(
20
  text=text,
21
  lang=lang,
@@ -26,79 +26,154 @@ def clone_voice(text, lang, ref_audio_path):
26
  except Exception as e:
27
  raise gr.Error(f"An error occurred during generation: {str(e)}")
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  # 2. Build the Gradio UI using Blocks
30
  with gr.Blocks() as demo:
31
- # Using gr.HTML for the header ensures CSS styles like text-align are respected
32
- gr.HTML("""
33
- <div style="text-align: center; margin-bottom: 20px;">
34
- <h1 style="margin: 0;">🎧 KokoClone</h1>
35
- <p style="margin: 10px 0; color: #666;">
36
- Voice Cloning, Now Inside Kokoro.<br>
37
- Generate natural multilingual speech and clone any target voice with ease.<br>
38
- <i>Built on Kokoro TTS.</i>
39
- </p>
40
  </div>
41
- """)
42
-
43
- with gr.Row():
44
- # LEFT COLUMN: Inputs
45
- with gr.Column(scale=1):
46
- text_input = gr.Textbox(
47
- label="1. Text to Synthesize",
48
- lines=4,
49
- placeholder="Enter the text you want spoken..."
50
- )
51
-
52
- lang_input = gr.Dropdown(
53
- label="2. Language",
54
- choices=[
55
- ("English", "en"),
56
- ("Hindi", "hi"),
57
- ("French", "fr"),
58
- ("Japanese", "ja"),
59
- ("Chinese", "zh"),
60
- ("Italian", "it"),
61
- ("Spanish", "es"),
62
- ("Portuguese", "pt")
63
- ],
64
- value="en"
65
- )
66
-
67
- # Using type="filepath" passes the temp file path directly to our cloner
68
- ref_audio_input = gr.Audio(
69
- label="3. Reference Voice (Upload or Record)",
70
- type="filepath"
71
- )
72
-
73
- submit_btn = gr.Button("πŸš€ Generate Clone", variant="primary")
74
-
75
- # RIGHT COLUMN: Outputs and Instructions
76
- with gr.Column(scale=1):
77
- output_audio = gr.Audio(
78
- label="Generated Cloned Audio",
79
- interactive=False,
80
- autoplay=False
81
- )
82
-
83
- gr.Markdown(
84
- """
85
- <br>
86
-
87
- ### πŸ’‘ Tips for Best Results:
88
- * **Clean Audio:** Use a reference audio clip without background noise or music.
89
- * **Length:** A reference clip of 3 to 10 seconds is usually the sweet spot.
90
- * **Language Match:** Make sure the selected language matches the text you typed!
91
- * **First Run:** The very first generation might take a few extra seconds while the models allocate memory.
92
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  )
94
 
95
- # 3. Wire the button to the function
96
- submit_btn.click(
97
- fn=clone_voice,
98
- inputs=[text_input, lang_input, ref_audio_input],
99
- outputs=output_audio
100
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
  # 4. Launch the app
103
  if __name__ == "__main__":
104
- demo.launch()
 
 
1
  import gradio as gr
2
+ import os
3
+ import soundfile as sf
4
  from core.cloner import KokoClone
5
 
6
  # 1. Initialize the cloner globally so models load only once when the server starts
7
  print("Loading KokoClone models for the Web UI...")
8
  cloner = KokoClone()
 
9
  def clone_voice(text, lang, ref_audio_path):
10
+ """Gradio handler: text + reference audio β†’ cloned speech."""
11
  if not text or not text.strip():
12
  raise gr.Error("Please enter some text.")
13
  if not ref_audio_path:
14
  raise gr.Error("Please upload or record a reference audio file.")
15
+
16
  output_file = "gradio_output.wav"
17
+
18
  try:
 
19
  cloner.generate(
20
  text=text,
21
  lang=lang,
 
26
  except Exception as e:
27
  raise gr.Error(f"An error occurred during generation: {str(e)}")
28
 
29
+
30
+ def convert_voice(source_audio_path, ref_audio_path):
31
+ """Gradio handler: source audio + reference audio β†’ re-voiced speech."""
32
+ if not source_audio_path:
33
+ raise gr.Error("Please upload or record a source audio file.")
34
+ if not ref_audio_path:
35
+ raise gr.Error("Please upload or record a reference audio file.")
36
+
37
+ output_file = "gradio_convert_output.wav"
38
+
39
+ try:
40
+ cloner.convert(
41
+ source_audio=source_audio_path,
42
+ reference_audio=ref_audio_path,
43
+ output_path=output_file
44
+ )
45
+ return output_file
46
+ except Exception as e:
47
+ raise gr.Error(f"An error occurred during conversion: {str(e)}")
48
+
49
  # 2. Build the Gradio UI using Blocks
50
  with gr.Blocks() as demo:
51
+ gr.Markdown(
52
+ """
53
+ <div style="text-align: center;">
54
+ <h1>🎧 KokoClone</h1>
55
+ <p>Voice Cloning, Now Inside Kokoro.<br>
56
+ Generate natural multilingual speech and clone any target voice with ease.<br>
57
+ <i>Built on Kokoro TTS.</i></p>
 
 
58
  </div>
59
+ """
60
+ )
61
+
62
+ with gr.Tabs():
63
+ # ── Tab 1: Text β†’ Cloned Speech ─────────────────────────────────────
64
+ with gr.Tab("🎀 Text β†’ Clone"):
65
+ with gr.Row():
66
+ with gr.Column(scale=1):
67
+ text_input = gr.Textbox(
68
+ label="1. Text to Synthesize",
69
+ lines=4,
70
+ placeholder="Enter the text you want spoken..."
71
+ )
72
+
73
+ lang_input = gr.Dropdown(
74
+ label="2. Language",
75
+ choices=[
76
+ ("English", "en"),
77
+ ("Hindi", "hi"),
78
+ ("French", "fr"),
79
+ ("Japanese", "ja"),
80
+ ("Chinese", "zh"),
81
+ ("Italian", "it"),
82
+ ("Spanish", "es"),
83
+ ("Portuguese", "pt")
84
+ ],
85
+ value="en"
86
+ )
87
+
88
+ ref_audio_input = gr.Audio(
89
+ label="3. Reference Voice (Upload or Record)",
90
+ type="filepath"
91
+ )
92
+
93
+ submit_btn = gr.Button("πŸš€ Generate Clone", variant="primary")
94
+
95
+ with gr.Column(scale=1):
96
+ output_audio = gr.Audio(
97
+ label="Generated Cloned Audio",
98
+ interactive=False,
99
+ autoplay=False
100
+ )
101
+
102
+ gr.Markdown(
103
+ """
104
+ <br>
105
+
106
+ ### πŸ’‘ Tips for Best Results:
107
+ * **Clean Audio:** Use a reference audio clip without background noise or music.
108
+ * **Length:** A reference clip of 3 to 10 seconds is usually the sweet spot.
109
+ * **Language Match:** Make sure the selected language matches the text you typed!
110
+ * **First Run:** The very first generation might take a few extra seconds while the models allocate memory.
111
+ """
112
+ )
113
+
114
+ submit_btn.click(
115
+ fn=lambda: gr.update(value="βŒ› Generating...", interactive=False),
116
+ outputs=submit_btn
117
+ ).then(
118
+ fn=clone_voice,
119
+ inputs=[text_input, lang_input, ref_audio_input],
120
+ outputs=output_audio
121
+ ).then(
122
+ fn=lambda: gr.update(value="πŸš€ Generate Clone", interactive=True),
123
+ outputs=submit_btn
124
  )
125
 
126
+ # ── Tab 2: Audio β†’ Re-voiced Speech ─────────────────────────────────
127
+ with gr.Tab("πŸ” Audio β†’ Clone"):
128
+ with gr.Row():
129
+ with gr.Column(scale=1):
130
+ source_audio_input = gr.Audio(
131
+ label="1. Source Audio (speech to re-voice)",
132
+ type="filepath"
133
+ )
134
+
135
+ ref_audio_convert_input = gr.Audio(
136
+ label="2. Reference Voice (target speaker)",
137
+ type="filepath"
138
+ )
139
+
140
+ convert_btn = gr.Button("πŸ” Convert Voice", variant="primary")
141
+
142
+ with gr.Column(scale=1):
143
+ convert_output_audio = gr.Audio(
144
+ label="Converted Audio",
145
+ interactive=False,
146
+ autoplay=False
147
+ )
148
+
149
+ gr.Markdown(
150
+ """
151
+ <br>
152
+
153
+ ### πŸ’‘ How it works:
154
+ * Upload any speech recording as the **source**.
155
+ * Upload a short clip of the **target speaker** as the reference.
156
+ * KokoClone re-voices the source speech to sound like the reference β€” no transcription needed.
157
+
158
+ ### Tips:
159
+ * Clean, noise-free audio works best for both inputs.
160
+ * Reference clips of 3–10 seconds give the best voice transfer.
161
+ """
162
+ )
163
+
164
+ convert_btn.click(
165
+ fn=lambda: gr.update(value="βŒ› Converting...", interactive=False),
166
+ outputs=convert_btn
167
+ ).then(
168
+ fn=convert_voice,
169
+ inputs=[source_audio_input, ref_audio_convert_input],
170
+ outputs=convert_output_audio
171
+ ).then(
172
+ fn=lambda: gr.update(value="πŸ” Convert Voice", interactive=True),
173
+ outputs=convert_btn
174
+ )
175
 
176
  # 4. Launch the app
177
  if __name__ == "__main__":
178
+ # Gradio 6.0 fix: Moved theme here and removed show_api
179
+ demo.launch(server_name="0.0.0.0")