| import gradio as gr |
| from transformers import AutoProcessor, VitsModel |
| import torch |
| import scipy.io.wavfile |
| import tempfile |
|
|
| |
| model = VitsModel.from_pretrained("facebook/mms-tts-fon") |
| processor = AutoProcessor.from_pretrained("facebook/mms-tts-fon") |
|
|
| |
| sampling_rate = model.config.sampling_rate |
|
|
| |
| def tts_fon(text): |
| inputs = processor(text, return_tensors="pt") |
| with torch.no_grad(): |
| audio = model(**inputs).waveform[0].numpy() |
|
|
| |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: |
| scipy.io.wavfile.write(f.name, rate=sampling_rate, data=audio) |
| return f.name |
|
|
| |
| title = "🗣️ Fon Text-to-Speech (TTS) with Meta MMS" |
| description = """ |
| This Space uses Meta AI's `facebook/mms-tts-fon` model to synthesize speech in the Fon language. |
| The model is part of the [Massively Multilingual Speech (MMS)](https://huggingface.co/facebook/mms-tts-fon) project. |
| |
| Fon is a Gbe language spoken in Benin and Togo. This demo allows you to input Fon text and hear the synthesized audio output. |
| |
| --- |
| |
| ### 🔧 How to Use: |
| 1. Type a sentence in **Fon** (Latin script, tone markers optional). |
| 2. Press **Submit** or hit **Enter**. |
| 3. Wait a few seconds for audio synthesis. |
| 4. Listen or download the audio. |
| |
| --- |
| |
| ### 📜 Rules & Notes: |
| 1. Input should be in **Fon** only (avoid English or other languages). |
| 2. You may enter as much text as you want, but long inputs may slow processing. Short to medium sentences are recommended. |
| 3. Use correct Unicode characters (ɛ, ɔ, etc.) if tones are important. |
| 4. Tone marks like `à, é, ǒ, ê` are supported but optional. |
| 5. Output uses a single female voice (pretrained by Meta). |
| 6. Audio is generated at the model’s default sampling rate (may vary by version). |
| 7. Model is intended for **research and demonstration** only. |
| 8. Do **not** use for commercial purposes without permission. |
| 9. Underlying model licensed under **CC-BY-NC 4.0**. |
| 10. Please be respectful — offensive or inappropriate input is not allowed. |
| |
| --- |
| |
| ✨ Powered by Meta AI's MMS-TTS and Hugging Face 🤗 |
| """ |
|
|
| |
| iface = gr.Interface( |
| fn=tts_fon, |
| inputs=gr.Textbox(label="Enter Fon text here", placeholder="e.g. Fɔ̀ngbè sɔ̀ wá kpɔ́ nù.", lines=3), |
| outputs=gr.Audio(label="Synthesized Fon Speech", type="filepath"), |
| title=title, |
| description=description, |
| theme="default" |
| ) |
|
|
| iface.launch() |