import gradio as gr import torch import numpy as np from transformers import pipeline # 1. Setup device (Use GPU if available on the Space, otherwise CPU) device = "cuda:0" if torch.cuda.is_available() else "cpu" print(f"Loading aoxo/swaram model on {device}...") # 2. Load the TTS pipeline globally so it only loads once when the Space starts try: synthesizer = pipeline("text-to-speech", model="aoxo/swaram", device=device) print("Model loaded successfully!") except Exception as e: print(f"Error loading model: {e}") synthesizer = None # 3. Define the prediction function def generate_audio(text): if not text.strip(): return None, "Please enter some text." if synthesizer is None: return None, "Error: Model failed to load. Check Space logs." try: # Generate speech speech = synthesizer(text) # The transformers pipeline returns a dictionary: # {'audio': numpy array, 'sampling_rate': int} audio_data = speech["audio"] sample_rate = speech["sampling_rate"] # Gradio expects audio in (sample_rate, numpy_1D_array) format # Pipeline audio is usually shape (1, N). We squeeze it to (N,) if len(audio_data.shape) > 1: audio_data = np.squeeze(audio_data) return (sample_rate, audio_data), "Success!" except Exception as e: return None, f"Generation Error: {str(e)}" # 4. Build the Gradio Interface with gr.Blocks(title="Swaram Malayalam TTS", theme=gr.themes.Soft()) as demo: gr.Markdown( """ # ๐Ÿ—ฃ๏ธ Swaram Malayalam Text-to-Speech Enter Malayalam text below to generate speech using the `aoxo/swaram` model. """ ) with gr.Row(): with gr.Column(): text_input = gr.Textbox( label="Enter Malayalam Text", placeholder="เดฎเดฒเดฏเดพเดณเด‚ เดŸเตˆเดชเตเดชเต เดšเต†เดฏเตเดฏเตเด•...", lines=5 ) with gr.Row(): clear_btn = gr.Button("Clear") generate_btn = gr.Button("Generate Speech", variant="primary") gr.Examples( examples=[ ["เดจเดฎเดธเตเด•เดพเดฐเด‚, เด‡เดคเต†เดจเตเดฑเต† เดชเตเดคเดฟเดฏ เดถเดฌเตเดฆเดฎเดพเดฃเต."], ["เด•เต‡เดฐเดณเด‚ เดฆเตˆเดตเดคเตเดคเดฟเดจเตเดฑเต† เดธเตเดตเดจเตเดคเด‚ เดจเดพเดŸเดพเดฃเต."], ["เด•เดณเตเดณเดพ เด•เดŸเดฏเดพเดŸเดฟ เดฎเต‹เดจเต†"] ], inputs=[text_input], label="Examples" ) with gr.Column(): audio_output = gr.Audio(label="Generated Audio", type="numpy", interactive=False) status_output = gr.Textbox(label="Status", interactive=False) # Event Listeners generate_btn.click( fn=generate_audio, inputs=[text_input], outputs=[audio_output, status_output], api_name="synthesize" # Allows this Space to be used as an API later ) clear_btn.click( fn=lambda: (None, None, ""), inputs=[], outputs=[text_input, audio_output, status_output] ) # 5. Launch the app if __name__ == "__main__": demo.launch()