import gradio as gr import torch from transformers import AutoProcessor, VitsModel model_id = "facebook/mms-tts-ava" processor = AutoProcessor.from_pretrained(model_id) model = VitsModel.from_pretrained(model_id) def tts_fn(text): inputs = processor(text=text, return_tensors="pt") with torch.no_grad(): output = model(**inputs) waveform = output.waveform.squeeze().numpy() return (16000, waveform) gr.Interface( fn=tts_fn, inputs=gr.Textbox(label="Nhập văn bản Avar"), outputs=gr.Audio(label="Kết quả TTS", type="numpy"), title="TTS Avar - facebook/mms-tts-ava" ).launch()