Spaces:

arjunbroepic
/

supertonic-3-cpu

Running

App Files Files Community

arjunbroepic commited on 14 days ago

Commit

e791b3f

verified ·

1 Parent(s): 6a5f2ec

Create app.py

Browse files

Files changed (1) hide show

app.py +67 -0

app.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import gradio as gr
+from supertonic import TTS
+import os
+# Initialize TTS - auto_download=True handles the HF model fetching automatically
+tts = TTS(auto_download=True)
+# List of available voices based on the repository structure
+VOICES = ["M1", "M2", "M3", "M4", "M5", "F1", "F2", "F3", "F4", "F5"]
+# List of supported languages (Top 10 shown, you can add all 31)
+LANGUAGES = {
+    "English": "en", "Korean": "ko", "Japanese": "ja", "Spanish": "es",
+    "French": "fr", "German": "de", "Hindi": "hi", "Italian": "it",
+    "Portuguese": "pt", "Russian": "ru", "Vietnamese": "vi"
+}
+def generate_speech(text, voice, language_name):
+    lang_code = LANGUAGES[language_name]
+    # Get the voice style object
+    style = tts.get_voice_style(voice_name=voice)
+    # Synthesize
+    wav, duration = tts.synthesize(text, voice_style=style, lang=lang_code)
+    # Save to a temporary path for Gradio to pick up
+    output_path = "output.wav"
+    tts.save_audio(wav, output_path)
+    return output_path, f"Duration: {duration:.2f} seconds"
+# Define the Gradio Interface
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🎙️ Supertonic 3 TTS (On-Device CPU)")
+    gr.Markdown("Lightning-fast multilingual TTS by Supertone. Running entirely on CPU via ONNX.")
+    with gr.Row():
+        with gr.Column():
+            input_text = gr.Textbox(
+                label="Input Text",
+                placeholder="Type something here...",
+                value="Hello! Supertonic 3 is now running on a Hugging Face CPU Space.",
+                lines=3
+            )
+            voice_opt = gr.Dropdown(choices=VOICES, value="M1", label="Voice Style")
+            lang_opt = gr.Dropdown(choices=list(LANGUAGES.keys()), value="English", label="Language")
+            btn = gr.Button("Generate Audio", variant="primary")
+        with gr.Column():
+            audio_output = gr.Audio(label="Synthesized Audio", type="filepath")
+            stats = gr.Label(label="Metadata")
+    gr.Examples(
+        examples=[
+            ["The train delay was announced at 4:45 PM <breath> due to track maintenance.", "M1", "English"],
+            ["こんにちは、スーパートーンの世界へようこそ。", "F1", "Japanese"],
+            ["¡Hola! Este es un ejemplo de síntesis de voz en español.", "M2", "Spanish"]
+        ],
+        inputs=[input_text, voice_opt, lang_opt]
+    )
+    btn.click(generate_speech, inputs=[input_text, voice_opt, lang_opt], outputs=[audio_output, stats])
+if __name__ == "__main__":
+    demo.launch()