clone

Runtime error

App Files Files Community

PatnaikAshish commited on Mar 21

Commit

3831cc8

verified ·

1 Parent(s): 66525f3

Update app.py

Browse files

Files changed (1) hide show

app.py +148 -73

app.py CHANGED Viewed

@@ -1,21 +1,21 @@
 import gradio as gr
 from core.cloner import KokoClone
 # 1. Initialize the cloner globally so models load only once when the server starts
 print("Loading KokoClone models for the Web UI...")
 cloner = KokoClone()
 def clone_voice(text, lang, ref_audio_path):
-    """Gradio prediction function."""
     if not text or not text.strip():
         raise gr.Error("Please enter some text.")
     if not ref_audio_path:
         raise gr.Error("Please upload or record a reference audio file.")
     output_file = "gradio_output.wav"
     try:
-        # Call the core engine
         cloner.generate(
             text=text,
             lang=lang,
@@ -26,79 +26,154 @@ def clone_voice(text, lang, ref_audio_path):
     except Exception as e:
         raise gr.Error(f"An error occurred during generation: {str(e)}")
 # 2. Build the Gradio UI using Blocks
 with gr.Blocks() as demo:
-    # Using gr.HTML for the header ensures CSS styles like text-align are respected
-    gr.HTML("""
-        <div style="text-align: center; margin-bottom: 20px;">
-            <h1 style="margin: 0;">🎧 KokoClone</h1>
-            <p style="margin: 10px 0; color: #666;">
-                Voice Cloning, Now Inside Kokoro.<br>
-                Generate natural multilingual speech and clone any target voice with ease.<br>
-                <i>Built on Kokoro TTS.</i>
-            </p>
         </div>
-    """)
-    with gr.Row():
-        # LEFT COLUMN: Inputs
-        with gr.Column(scale=1):
-            text_input = gr.Textbox(
-                label="1. Text to Synthesize",
-                lines=4,
-                placeholder="Enter the text you want spoken..."
-            )
-            lang_input = gr.Dropdown(
-                label="2. Language",
-                choices=[
-                    ("English", "en"),
-                    ("Hindi", "hi"),
-                    ("French", "fr"),
-                    ("Japanese", "ja"),
-                    ("Chinese", "zh"),
-                    ("Italian", "it"),
-                    ("Spanish", "es"),
-                    ("Portuguese", "pt")
-                ],
-                value="en"
-            )
-            # Using type="filepath" passes the temp file path directly to our cloner
-            ref_audio_input = gr.Audio(
-                label="3. Reference Voice (Upload or Record)",
-                type="filepath"
-            )
-            submit_btn = gr.Button("🚀 Generate Clone", variant="primary")
-        # RIGHT COLUMN: Outputs and Instructions
-        with gr.Column(scale=1):
-            output_audio = gr.Audio(
-                label="Generated Cloned Audio",
-                interactive=False,
-                autoplay=False
-            )
-            gr.Markdown(
-                """
-                <br>
-                ### 💡 Tips for Best Results:
-                * **Clean Audio:** Use a reference audio clip without background noise or music.
-                * **Length:** A reference clip of 3 to 10 seconds is usually the sweet spot.
-                * **Language Match:** Make sure the selected language matches the text you typed!
-                * **First Run:** The very first generation might take a few extra seconds while the models allocate memory.
-                """
             )
-    # 3. Wire the button to the function
-    submit_btn.click(
-        fn=clone_voice,
-        inputs=[text_input, lang_input, ref_audio_input],
-        outputs=output_audio
-    )
 # 4. Launch the app
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import os
+import soundfile as sf
 from core.cloner import KokoClone
 # 1. Initialize the cloner globally so models load only once when the server starts
 print("Loading KokoClone models for the Web UI...")
 cloner = KokoClone()
 def clone_voice(text, lang, ref_audio_path):
+    """Gradio handler: text + reference audio → cloned speech."""
     if not text or not text.strip():
         raise gr.Error("Please enter some text.")
     if not ref_audio_path:
         raise gr.Error("Please upload or record a reference audio file.")
     output_file = "gradio_output.wav"
     try:
         cloner.generate(
             text=text,
             lang=lang,
     except Exception as e:
         raise gr.Error(f"An error occurred during generation: {str(e)}")
+def convert_voice(source_audio_path, ref_audio_path):
+    """Gradio handler: source audio + reference audio → re-voiced speech."""
+    if not source_audio_path:
+        raise gr.Error("Please upload or record a source audio file.")
+    if not ref_audio_path:
+        raise gr.Error("Please upload or record a reference audio file.")
+    output_file = "gradio_convert_output.wav"
+    try:
+        cloner.convert(
+            source_audio=source_audio_path,
+            reference_audio=ref_audio_path,
+            output_path=output_file
+        )
+        return output_file
+    except Exception as e:
+        raise gr.Error(f"An error occurred during conversion: {str(e)}")
 # 2. Build the Gradio UI using Blocks
 with gr.Blocks() as demo:
+    gr.Markdown(
+        """
+        <div style="text-align: center;">
+            <h1>🎧 KokoClone</h1>
+            <p>Voice Cloning, Now Inside Kokoro.<br>
+            Generate natural multilingual speech and clone any target voice with ease.<br>
+            <i>Built on Kokoro TTS.</i></p>
         </div>
+        """
+    )
+    with gr.Tabs():
+        # ── Tab 1: Text → Cloned Speech ─────────────────────────────────────
+        with gr.Tab("🎤 Text → Clone"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    text_input = gr.Textbox(
+                        label="1. Text to Synthesize",
+                        lines=4,
+                        placeholder="Enter the text you want spoken..."
+                    )
+                    lang_input = gr.Dropdown(
+                        label="2. Language",
+                        choices=[
+                            ("English", "en"),
+                            ("Hindi", "hi"),
+                            ("French", "fr"),
+                            ("Japanese", "ja"),
+                            ("Chinese", "zh"),
+                            ("Italian", "it"),
+                            ("Spanish", "es"),
+                            ("Portuguese", "pt")
+                        ],
+                        value="en"
+                    )
+                    ref_audio_input = gr.Audio(
+                        label="3. Reference Voice (Upload or Record)",
+                        type="filepath"
+                    )
+                    submit_btn = gr.Button("🚀 Generate Clone", variant="primary")
+                with gr.Column(scale=1):
+                    output_audio = gr.Audio(
+                        label="Generated Cloned Audio",
+                        interactive=False,
+                        autoplay=False
+                    )
+                    gr.Markdown(
+                        """
+                        <br>
+                        ### 💡 Tips for Best Results:
+                        * **Clean Audio:** Use a reference audio clip without background noise or music.
+                        * **Length:** A reference clip of 3 to 10 seconds is usually the sweet spot.
+                        * **Language Match:** Make sure the selected language matches the text you typed!
+                        * **First Run:** The very first generation might take a few extra seconds while the models allocate memory.
+                        """
+                    )
+            submit_btn.click(
+                fn=lambda: gr.update(value="⌛ Generating...", interactive=False),
+                outputs=submit_btn
+            ).then(
+                fn=clone_voice,
+                inputs=[text_input, lang_input, ref_audio_input],
+                outputs=output_audio
+            ).then(
+                fn=lambda: gr.update(value="🚀 Generate Clone", interactive=True),
+                outputs=submit_btn
             )
+        # ── Tab 2: Audio → Re-voiced Speech ─────────────────────────────────
+        with gr.Tab("🔁 Audio → Clone"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    source_audio_input = gr.Audio(
+                        label="1. Source Audio (speech to re-voice)",
+                        type="filepath"
+                    )
+                    ref_audio_convert_input = gr.Audio(
+                        label="2. Reference Voice (target speaker)",
+                        type="filepath"
+                    )
+                    convert_btn = gr.Button("🔁 Convert Voice", variant="primary")
+                with gr.Column(scale=1):
+                    convert_output_audio = gr.Audio(
+                        label="Converted Audio",
+                        interactive=False,
+                        autoplay=False
+                    )
+                    gr.Markdown(
+                        """
+                        <br>
+                        ### 💡 How it works:
+                        * Upload any speech recording as the **source**.
+                        * Upload a short clip of the **target speaker** as the reference.
+                        * KokoClone re-voices the source speech to sound like the reference — no transcription needed.
+                        ### Tips:
+                        * Clean, noise-free audio works best for both inputs.
+                        * Reference clips of 3–10 seconds give the best voice transfer.
+                        """
+                    )
+            convert_btn.click(
+                fn=lambda: gr.update(value="⌛ Converting...", interactive=False),
+                outputs=convert_btn
+            ).then(
+                fn=convert_voice,
+                inputs=[source_audio_input, ref_audio_convert_input],
+                outputs=convert_output_audio
+            ).then(
+                fn=lambda: gr.update(value="🔁 Convert Voice", interactive=True),
+                outputs=convert_btn
+            )
 # 4. Launch the app
 if __name__ == "__main__":
+    # Gradio 6.0 fix: Moved theme here and removed show_api
+    demo.launch(server_name="0.0.0.0")