| import gradio as gr |
| from supertonic import TTS |
| import os |
| import numpy as np |
|
|
| try: |
| tts = TTS(auto_download=True) |
| except Exception as e: |
| print(f"Error initializing TTS: {e}") |
|
|
| VOICES = [ |
| "F6-zh", "F7-gb", "F8-gb", |
| "M1", "M2", "M3", "M4", "M5", |
| "F1", "F2", "F3", "F4", "F5", |
| ] |
| CUSTOM_VOICES = VOICES[:3] |
| CUSTOM_VOICES_DIR = "my_voice_styles" |
| |
| LANGUAGES = { |
| "English": "en", "Korean": "ko", "Japanese": "ja", "Arabic": "ar", |
| "Bulgarian": "bg", "Czech": "cs", "Danish": "da", "German": "de", |
| "Greek": "el", "Spanish": "es", "Estonian": "et", "Finnish": "fi", |
| "French": "fr", "Hindi": "hi", "Croatian": "hr", "Hungarian": "hu", |
| "Indonesian": "id", "Italian": "it", "Lithuanian": "lt", "Latvian": "lv", |
| "Dutch": "nl", "Polish": "pl", "Portuguese": "pt", "Romanian": "ro", |
| "Russian": "ru", "Slovak": "sk", "Slovenian": "sl", "Swedish": "sv", |
| "Turkish": "tr", "Ukrainian": "uk", "Vietnamese": "vi" |
| } |
|
|
| def generate_speech(text, voice, language_name): |
| if not text.strip(): |
| raise gr.Error("Please enter some text.") |
| |
| try: |
| lang_code = LANGUAGES[language_name] |
| |
| |
| if voice in CUSTOM_VOICES: |
| style_path = os.path.join(CUSTOM_VOICES_DIR, f"{voice}.json") |
| style = tts.get_voice_style_from_path(style_path) |
| else: |
| style = tts.get_voice_style(voice_name=voice) |
| |
| |
| wav, duration = tts.synthesize(text, voice_style=style, lang=lang_code, speed=0.95) |
| |
| |
| output_path = "output.wav" |
| tts.save_audio(wav, output_path) |
| |
| |
| readable_duration = float(duration[0]) |
| |
| return output_path, f"Generation Successful! \nDuration: {readable_duration:.2f}s" |
| |
| except Exception as e: |
| raise gr.Error(f"Generation failed: {str(e)}") |
|
|
| |
| with gr.Blocks(theme='soft', title="Supertonic-3 Custom Voices") as demo: |
| gr.Markdown("# 🔊 Supertonic-3 Custom Voices") |
| gr.Markdown("""An on-device, lightweight Text-to-Speech system by **[Supertone](https://huggingface.co/Supertone/supertonic-3)**. Running on CPU via ONNX. |
| You can create custom voice style from **[HERE](https://github.com/saurabhv749/supertonic3-voice-clone)**""") |
| |
| with gr.Row(): |
| with gr.Column(scale=1): |
| input_text = gr.Textbox( |
| label="Input Text", |
| placeholder="Type your message here...", |
| value="In the same track, he reveals his father has been diagnosed with cancer.", |
| lines=4 |
| ) |
| |
| with gr.Row(): |
| voice_opt = gr.Dropdown( |
| choices=VOICES, |
| value="F8-gb", |
| label="Voice Style" |
| ) |
| lang_opt = gr.Dropdown( |
| choices=sorted(list(LANGUAGES.keys())), |
| value="English", |
| label="Language" |
| ) |
| |
| btn = gr.Button("Synthesize Speech", variant="primary") |
| |
| with gr.Column(scale=1): |
| audio_output = gr.Audio(label="Synthesized Audio", type="filepath") |
| status_box = gr.Textbox(label="Status", interactive=False) |
|
|
| |
| gr.Examples( |
| examples=[ |
| ["Hello! This is a test of the Supertonic 3 system with custom voices, running locally.", "F7-gb", "English"], |
| ["こんにちは、これは日本語の音声合成のテストです。", "F1", "Japanese"], |
| ["पिछले कुछ सालों में फ़ीवर के इलाज के लिए कई नई, और पहले से ज़्यादा असरदार दवाएं उपलब्ध हुई हैं.", "F6-zh", "Hindi"], |
| ["C'est un plaisir de vous rencontrer.", "M4", "French"], |
| ["¡Hola! Esta es una prueba de voz en español.", "F8-gb", "Spanish"], |
| ], |
| inputs=[input_text, voice_opt, lang_opt] |
| ) |
|
|
| btn.click( |
| fn=generate_speech, |
| inputs=[input_text, voice_opt, lang_opt], |
| outputs=[audio_output, status_box] |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|