arjunbroepic commited on
Commit
e791b3f
·
verified ·
1 Parent(s): 6a5f2ec

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -0
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from supertonic import TTS
3
+ import os
4
+
5
+ # Initialize TTS - auto_download=True handles the HF model fetching automatically
6
+ tts = TTS(auto_download=True)
7
+
8
+ # List of available voices based on the repository structure
9
+ VOICES = ["M1", "M2", "M3", "M4", "M5", "F1", "F2", "F3", "F4", "F5"]
10
+
11
+ # List of supported languages (Top 10 shown, you can add all 31)
12
+ LANGUAGES = {
13
+ "English": "en", "Korean": "ko", "Japanese": "ja", "Spanish": "es",
14
+ "French": "fr", "German": "de", "Hindi": "hi", "Italian": "it",
15
+ "Portuguese": "pt", "Russian": "ru", "Vietnamese": "vi"
16
+ }
17
+
18
+ def generate_speech(text, voice, language_name):
19
+ lang_code = LANGUAGES[language_name]
20
+
21
+ # Get the voice style object
22
+ style = tts.get_voice_style(voice_name=voice)
23
+
24
+ # Synthesize
25
+ wav, duration = tts.synthesize(text, voice_style=style, lang=lang_code)
26
+
27
+ # Save to a temporary path for Gradio to pick up
28
+ output_path = "output.wav"
29
+ tts.save_audio(wav, output_path)
30
+
31
+ return output_path, f"Duration: {duration:.2f} seconds"
32
+
33
+ # Define the Gradio Interface
34
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
35
+ gr.Markdown("# 🎙️ Supertonic 3 TTS (On-Device CPU)")
36
+ gr.Markdown("Lightning-fast multilingual TTS by Supertone. Running entirely on CPU via ONNX.")
37
+
38
+ with gr.Row():
39
+ with gr.Column():
40
+ input_text = gr.Textbox(
41
+ label="Input Text",
42
+ placeholder="Type something here...",
43
+ value="Hello! Supertonic 3 is now running on a Hugging Face CPU Space.",
44
+ lines=3
45
+ )
46
+ voice_opt = gr.Dropdown(choices=VOICES, value="M1", label="Voice Style")
47
+ lang_opt = gr.Dropdown(choices=list(LANGUAGES.keys()), value="English", label="Language")
48
+ btn = gr.Button("Generate Audio", variant="primary")
49
+
50
+ with gr.Column():
51
+ audio_output = gr.Audio(label="Synthesized Audio", type="filepath")
52
+ stats = gr.Label(label="Metadata")
53
+
54
+ gr.Examples(
55
+ examples=[
56
+ ["The train delay was announced at 4:45 PM <breath> due to track maintenance.", "M1", "English"],
57
+ ["こんにちは、スーパートーンの世界へようこそ。", "F1", "Japanese"],
58
+ ["¡Hola! Este es un ejemplo de síntesis de voz en español.", "M2", "Spanish"]
59
+ ],
60
+ inputs=[input_text, voice_opt, lang_opt]
61
+ )
62
+
63
+ btn.click(generate_speech, inputs=[input_text, voice_opt, lang_opt], outputs=[audio_output, stats])
64
+
65
+ if __name__ == "__main__":
66
+ demo.launch()
67
+