sam749 commited on
Commit
52af9c3
·
verified ·
1 Parent(s): c647242

Upload to hub

Browse files
README.md CHANGED
@@ -1,13 +1,14 @@
1
  ---
2
- title: Supertonic3 Custom Styles
3
- emoji: 🌍
4
- colorFrom: gray
5
- colorTo: pink
6
  sdk: gradio
7
  sdk_version: 6.14.0
8
- python_version: '3.13'
9
  app_file: app.py
10
  pinned: false
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
  ---
2
+ title: Supertonic 3 TTS
3
+ emoji: 🗣️
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: gradio
7
  sdk_version: 6.14.0
 
8
  app_file: app.py
9
  pinned: false
10
+ license: openrail
11
+ models:
12
+ - Supertone/supertonic-3
13
+ short_description: Runs supertonic 3 on cpu
14
+ ---
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from supertonic import TTS
3
+ import os
4
+ import numpy as np
5
+
6
+ try:
7
+ tts = TTS(auto_download=True)
8
+ except Exception as e:
9
+ print(f"Error initializing TTS: {e}")
10
+
11
+ VOICES = [
12
+ "F6-zh", "F7-gb", "F8-gb", # custom
13
+ "M1", "M2", "M3", "M4", "M5",
14
+ "F1", "F2", "F3", "F4", "F5",
15
+ ]
16
+ CUSTOM_VOICES = VOICES[:3]
17
+ CUSTOM_VOICES_DIR = "my_voice_styles"
18
+ # The 31 supported languages
19
+ LANGUAGES = {
20
+ "English": "en", "Korean": "ko", "Japanese": "ja", "Arabic": "ar",
21
+ "Bulgarian": "bg", "Czech": "cs", "Danish": "da", "German": "de",
22
+ "Greek": "el", "Spanish": "es", "Estonian": "et", "Finnish": "fi",
23
+ "French": "fr", "Hindi": "hi", "Croatian": "hr", "Hungarian": "hu",
24
+ "Indonesian": "id", "Italian": "it", "Lithuanian": "lt", "Latvian": "lv",
25
+ "Dutch": "nl", "Polish": "pl", "Portuguese": "pt", "Romanian": "ro",
26
+ "Russian": "ru", "Slovak": "sk", "Slovenian": "sl", "Swedish": "sv",
27
+ "Turkish": "tr", "Ukrainian": "uk", "Vietnamese": "vi"
28
+ }
29
+
30
+ def generate_speech(text, voice, language_name):
31
+ if not text.strip():
32
+ raise gr.Error("Please enter some text.")
33
+
34
+ try:
35
+ lang_code = LANGUAGES[language_name]
36
+
37
+ # Get the voice style object
38
+ if voice in CUSTOM_VOICES:
39
+ style_path = os.path.join(CUSTOM_VOICES_DIR, f"{voice}.json")
40
+ style = tts.get_voice_style_from_path(style_path)
41
+ else:
42
+ style = tts.get_voice_style(voice_name=voice)
43
+
44
+ # Synthesize (Returns wav data and a numpy array for duration)
45
+ wav, duration = tts.synthesize(text, voice_style=style, lang=lang_code, speed=0.95)
46
+
47
+ # Save to a temporary path
48
+ output_path = "output.wav"
49
+ tts.save_audio(wav, output_path)
50
+
51
+ # FIX: Convert numpy.ndarray duration to float for f-string compatibility
52
+ readable_duration = float(duration[0])
53
+
54
+ return output_path, f"Generation Successful! \nDuration: {readable_duration:.2f}s"
55
+
56
+ except Exception as e:
57
+ raise gr.Error(f"Generation failed: {str(e)}")
58
+
59
+ # Define the Gradio Interface
60
+ with gr.Blocks(theme='soft', title="Supertonic-3 Custom Voices") as demo:
61
+ gr.Markdown("# 🔊 Supertonic-3 Custom Voices")
62
+ gr.Markdown("""An on-device, lightweight Text-to-Speech system by **[Supertone](https://huggingface.co/Supertone/supertonic-3)**. Running on CPU via ONNX.
63
+ You can create custom voice style from **[HERE](https://github.com/saurabhv749/supertonic3-voice-clone)**""")
64
+
65
+ with gr.Row():
66
+ with gr.Column(scale=1):
67
+ input_text = gr.Textbox(
68
+ label="Input Text",
69
+ placeholder="Type your message here...",
70
+ value="In the same track, he reveals his father has been diagnosed with cancer.",
71
+ lines=4
72
+ )
73
+
74
+ with gr.Row():
75
+ voice_opt = gr.Dropdown(
76
+ choices=VOICES,
77
+ value="F8-gb",
78
+ label="Voice Style"
79
+ )
80
+ lang_opt = gr.Dropdown(
81
+ choices=sorted(list(LANGUAGES.keys())),
82
+ value="English",
83
+ label="Language"
84
+ )
85
+
86
+ btn = gr.Button("Synthesize Speech", variant="primary")
87
+
88
+ with gr.Column(scale=1):
89
+ audio_output = gr.Audio(label="Synthesized Audio", type="filepath")
90
+ status_box = gr.Textbox(label="Status", interactive=False)
91
+
92
+ # Setup Examples
93
+ gr.Examples(
94
+ examples=[
95
+ ["Hello! This is a test of the Supertonic 3 system with custom voices, running locally.", "F7-gb", "English"],
96
+ ["こんにちは、これは日本語の音声合成のテストです。", "F1", "Japanese"],
97
+ ["पिछले कुछ सालों में फ़ीवर के इलाज के लिए कई नई, और पहले से ज़्यादा असरदार दवाएं उपलब्ध हुई हैं.", "F6-zh", "Hindi"],
98
+ ["C'est un plaisir de vous rencontrer.", "M4", "French"],
99
+ ["¡Hola! Esta es una prueba de voz en español.", "F8-gb", "Spanish"],
100
+ ],
101
+ inputs=[input_text, voice_opt, lang_opt]
102
+ )
103
+
104
+ btn.click(
105
+ fn=generate_speech,
106
+ inputs=[input_text, voice_opt, lang_opt],
107
+ outputs=[audio_output, status_box]
108
+ )
109
+
110
+ if __name__ == "__main__":
111
+ demo.launch(share=True)
my_voice_styles/F6-zh.json ADDED
The diff for this file is too large to render. See raw diff
 
my_voice_styles/F7-gb.json ADDED
The diff for this file is too large to render. See raw diff
 
my_voice_styles/F8-gb.json ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio==5.50.0
2
+ supertonic==1.2.3
3
+ numpy==2.0.2