arjunbroepic commited on
Commit
c9f7c1d
·
verified ·
1 Parent(s): 34e356d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -21
app.py CHANGED
@@ -1,50 +1,111 @@
1
  import gradio as gr
2
  from supertonic import TTS
3
  import os
 
4
 
5
- # Initialize TTS
6
- tts = TTS(auto_download=True)
 
 
 
 
7
 
 
8
  VOICES = ["M1", "M2", "M3", "M4", "M5", "F1", "F2", "F3", "F4", "F5"]
 
 
9
  LANGUAGES = {
10
- "English": "en", "Korean": "ko", "Japanese": "ja", "Spanish": "es",
11
- "French": "fr", "German": "de", "Hindi": "hi", "Italian": "it"
 
 
 
 
 
 
12
  }
13
 
14
  def generate_speech(text, voice, language_name):
15
- lang_code = LANGUAGES[language_name]
16
- style = tts.get_voice_style(voice_name=voice)
17
- wav, duration = tts.synthesize(text, voice_style=style, lang=lang_code)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- output_path = "output.wav"
20
- tts.save_audio(wav, output_path)
21
- return output_path, f"Duration: {duration:.2f}s"
22
 
23
- # Gradio 6 focuses on cleaner layout and updated theme handling
24
  with gr.Blocks(theme='soft', title="Supertonic 3 TTS") as demo:
25
- gr.Markdown("## 🎙️ Supertonic 3 TTS (Secure & Optimized)")
 
26
 
27
  with gr.Row():
28
  with gr.Column(scale=1):
29
  input_text = gr.Textbox(
30
  label="Input Text",
31
- placeholder="Type here...",
32
- value="Hello! I am now running on a secure version of Gradio.",
33
- lines=3
34
  )
 
35
  with gr.Row():
36
- voice_opt = gr.Dropdown(choices=VOICES, value="M1", label="Voice")
37
- lang_opt = gr.Dropdown(choices=list(LANGUAGES.keys()), value="English", label="Language")
38
- btn = gr.Button("Generate", variant="primary")
 
 
 
 
 
 
 
 
 
39
 
40
  with gr.Column(scale=1):
41
- audio_output = gr.Audio(label="Result", type="filepath")
42
- stats = gr.Textbox(label="Metadata", interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  btn.click(
45
  fn=generate_speech,
46
  inputs=[input_text, voice_opt, lang_opt],
47
- outputs=[audio_output, stats]
48
  )
49
 
50
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  from supertonic import TTS
3
  import os
4
+ import numpy as np
5
 
6
+ # Initialize TTS - auto_download=True handles the HF model fetching automatically
7
+ # This runs on CPU by default via ONNX
8
+ try:
9
+ tts = TTS(auto_download=True)
10
+ except Exception as e:
11
+ print(f"Error initializing TTS: {e}")
12
 
13
+ # All available voices found in the repository tree
14
  VOICES = ["M1", "M2", "M3", "M4", "M5", "F1", "F2", "F3", "F4", "F5"]
15
+
16
+ # The 31 supported languages
17
  LANGUAGES = {
18
+ "English": "en", "Korean": "ko", "Japanese": "ja", "Arabic": "ar",
19
+ "Bulgarian": "bg", "Czech": "cs", "Danish": "da", "German": "de",
20
+ "Greek": "el", "Spanish": "es", "Estonian": "et", "Finnish": "fi",
21
+ "French": "fr", "Hindi": "hi", "Croatian": "hr", "Hungarian": "hu",
22
+ "Indonesian": "id", "Italian": "it", "Lithuanian": "lt", "Latvian": "lv",
23
+ "Dutch": "nl", "Polish": "pl", "Portuguese": "pt", "Romanian": "ro",
24
+ "Russian": "ru", "Slovak": "sk", "Slovenian": "sl", "Swedish": "sv",
25
+ "Turkish": "tr", "Ukrainian": "uk", "Vietnamese": "vi"
26
  }
27
 
28
  def generate_speech(text, voice, language_name):
29
+ if not text.strip():
30
+ raise gr.Error("Please enter some text.")
31
+
32
+ try:
33
+ lang_code = LANGUAGES[language_name]
34
+
35
+ # Get the voice style object
36
+ style = tts.get_voice_style(voice_name=voice)
37
+
38
+ # Synthesize (Returns wav data and a numpy array for duration)
39
+ wav, duration = tts.synthesize(text, voice_style=style, lang=lang_code)
40
+
41
+ # Save to a temporary path
42
+ output_path = "output.wav"
43
+ tts.save_audio(wav, output_path)
44
+
45
+ # FIX: Convert numpy.ndarray duration to float for f-string compatibility
46
+ readable_duration = float(duration)
47
+
48
+ return output_path, f"Generation Successful! \nDuration: {readable_duration:.2f}s"
49
 
50
+ except Exception as e:
51
+ raise gr.Error(f"Generation failed: {str(e)}")
 
52
 
53
+ # Define the Gradio Interface
54
  with gr.Blocks(theme='soft', title="Supertonic 3 TTS") as demo:
55
+ gr.Markdown("# 🎙️ Supertonic 3: Multilingual TTS")
56
+ gr.Markdown("An on-device, lightweight Text-to-Speech system by **Supertone**. Running on CPU via ONNX.")
57
 
58
  with gr.Row():
59
  with gr.Column(scale=1):
60
  input_text = gr.Textbox(
61
  label="Input Text",
62
+ placeholder="Type your message here...",
63
+ value="The train delay was announced at 4:45 PM <breath> due to track maintenance.",
64
+ lines=4
65
  )
66
+
67
  with gr.Row():
68
+ voice_opt = gr.Dropdown(
69
+ choices=VOICES,
70
+ value="M1",
71
+ label="Voice Style"
72
+ )
73
+ lang_opt = gr.Dropdown(
74
+ choices=sorted(list(LANGUAGES.keys())),
75
+ value="English",
76
+ label="Language"
77
+ )
78
+
79
+ btn = gr.Button("Synthesize Speech", variant="primary")
80
 
81
  with gr.Column(scale=1):
82
+ audio_output = gr.Audio(label="Synthesized Audio", type="filepath")
83
+ status_box = gr.Textbox(label="Status", interactive=False)
84
+
85
+ gr.HTML("""
86
+ <div style="margin-top: 20px; text-align: center;">
87
+ <p>Supported Expression Tags: <code>&lt;laugh&gt;</code>, <code>&lt;breath&gt;</code>, <code>&lt;sigh&gt;</code></p>
88
+ <a href="https://huggingface.co/Supertone/supertonic-3">
89
+ <img src="https://img.shields.io/badge/Model-Supertone%20Supertonic--3-blue?logo=huggingface" alt="Model Page">
90
+ </a>
91
+ </div>
92
+ """)
93
+
94
+ # Setup Examples
95
+ gr.Examples(
96
+ examples=[
97
+ ["Hello! This is a test of the Supertonic 3 system running locally.", "M1", "English"],
98
+ ["こんにちは、これは日本語の音声合成のテストです。", "F1", "Japanese"],
99
+ ["¡Hola! Esta es una prueba de voz en español.", "F3", "Spanish"],
100
+ ["C'est un plaisir de vous rencontrer.", "M4", "French"]
101
+ ],
102
+ inputs=[input_text, voice_opt, lang_opt]
103
+ )
104
 
105
  btn.click(
106
  fn=generate_speech,
107
  inputs=[input_text, voice_opt, lang_opt],
108
+ outputs=[audio_output, status_box]
109
  )
110
 
111
  if __name__ == "__main__":