| import gradio as gr |
| import time |
| from transformers import pipeline |
|
|
| def tts_inference(text, model_name): |
| model = {"reference": model_name} |
| pipe = pipeline("text-to-speech", model=model['reference']) |
| print('Processing...') |
| t = time.time() |
| output = pipe(text) |
| t = time.time() - t |
| print(f"Took {round(t)} seconds") |
| return (output["audio"], output["sampling_rate"]) |
|
|
| |
| available_models = [ |
| "microsoft/speecht5_tts", |
| "facebook/mms-tts-eng", |
| "suno/bark" |
| ] |
|
|
| gr.Interface( |
| fn=tts_inference, |
| inputs=[ |
| gr.Textbox(label="Enter text", placeholder="Type something to convert to speech..."), |
| gr.Dropdown(available_models, label="Select Model") |
| ], |
| outputs=gr.Audio(type="numpy", label="Generated Speech"), |
| title="Hugging Face TTS Space", |
| description="Enter text and generate speech using Hugging Face's text-to-speech models." |
| ).launch() |
|
|