trysem commited on
Commit
7a2a004
·
verified ·
1 Parent(s): 7fd7e3f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -0
app.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import numpy as np
4
+ from transformers import pipeline
5
+
6
+ # 1. Setup device (Use GPU if available on the Space, otherwise CPU)
7
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
8
+
9
+ print(f"Loading aoxo/swaram model on {device}...")
10
+
11
+ # 2. Load the TTS pipeline globally so it only loads once when the Space starts
12
+ try:
13
+ synthesizer = pipeline("text-to-speech", model="aoxo/swaram", device=device)
14
+ print("Model loaded successfully!")
15
+ except Exception as e:
16
+ print(f"Error loading model: {e}")
17
+ synthesizer = None
18
+
19
+ # 3. Define the prediction function
20
+ def generate_audio(text):
21
+ if not text.strip():
22
+ return None, "Please enter some text."
23
+
24
+ if synthesizer is None:
25
+ return None, "Error: Model failed to load. Check Space logs."
26
+
27
+ try:
28
+ # Generate speech
29
+ speech = synthesizer(text)
30
+
31
+ # The transformers pipeline returns a dictionary:
32
+ # {'audio': numpy array, 'sampling_rate': int}
33
+ audio_data = speech["audio"]
34
+ sample_rate = speech["sampling_rate"]
35
+
36
+ # Gradio expects audio in (sample_rate, numpy_1D_array) format
37
+ # Pipeline audio is usually shape (1, N). We squeeze it to (N,)
38
+ if len(audio_data.shape) > 1:
39
+ audio_data = np.squeeze(audio_data)
40
+
41
+ return (sample_rate, audio_data), "Success!"
42
+
43
+ except Exception as e:
44
+ return None, f"Generation Error: {str(e)}"
45
+
46
+ # 4. Build the Gradio Interface
47
+ with gr.Blocks(title="Swaram Malayalam TTS", theme=gr.themes.Soft()) as demo:
48
+ gr.Markdown(
49
+ """
50
+ # 🗣️ Swaram Malayalam Text-to-Speech
51
+ Enter Malayalam text below to generate speech using the `aoxo/swaram` model.
52
+ """
53
+ )
54
+
55
+ with gr.Row():
56
+ with gr.Column():
57
+ text_input = gr.Textbox(
58
+ label="Enter Malayalam Text",
59
+ placeholder="മലയാളം ടൈപ്പ് ചെയ്യുക...",
60
+ lines=5
61
+ )
62
+
63
+ with gr.Row():
64
+ clear_btn = gr.Button("Clear")
65
+ generate_btn = gr.Button("Generate Speech", variant="primary")
66
+
67
+ gr.Examples(
68
+ examples=[
69
+ ["നമസ്കാരം, ഇതെന്റെ പുതിയ ശബ്ദമാണ്."],
70
+ ["കേരളം ദൈവത്തിന്റെ സ്വന്തം നാടാണ്."],
71
+ ["കള്ളാ കടയാടി മോനെ"]
72
+ ],
73
+ inputs=[text_input],
74
+ label="Examples"
75
+ )
76
+
77
+ with gr.Column():
78
+ audio_output = gr.Audio(label="Generated Audio", type="numpy", interactive=False)
79
+ status_output = gr.Textbox(label="Status", interactive=False)
80
+
81
+ # Event Listeners
82
+ generate_btn.click(
83
+ fn=generate_audio,
84
+ inputs=[text_input],
85
+ outputs=[audio_output, status_output],
86
+ api_name="synthesize" # Allows this Space to be used as an API later
87
+ )
88
+
89
+ clear_btn.click(
90
+ fn=lambda: (None, None, ""),
91
+ inputs=[],
92
+ outputs=[text_input, audio_output, status_output]
93
+ )
94
+
95
+ # 5. Launch the app
96
+ if __name__ == "__main__":
97
+ demo.launch()