Spaces:

Guunk
/

Ttsfon

Running

App Files Files Community

Ttsfon / app.py

Guunk

Create app.py

c8b5506 verified 9 months ago

raw

history blame contribute delete

2.61 kB

	import gradio as gr
	from transformers import AutoProcessor, VitsModel
	import torch
	import scipy.io.wavfile
	import tempfile

	# Load the Fon TTS model from Meta AI
	model = VitsModel.from_pretrained("facebook/mms-tts-fon")
	processor = AutoProcessor.from_pretrained("facebook/mms-tts-fon")

	# Automatically get sampling rate from model config
	sampling_rate = model.config.sampling_rate

	# Function to synthesize Fon audio from text
	def tts_fon(text):
	inputs = processor(text, return_tensors="pt")
	with torch.no_grad():
	audio = model(**inputs).waveform[0].numpy()

	# Save temporary WAV file using the model's sampling rate
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
	scipy.io.wavfile.write(f.name, rate=sampling_rate, data=audio)
	return f.name

	# Title and detailed description
	title = "🗣️ Fon Text-to-Speech (TTS) with Meta MMS"
	description = """
	This Space uses Meta AI's `facebook/mms-tts-fon` model to synthesize speech in the Fon language.
	The model is part of the [Massively Multilingual Speech (MMS)](https://huggingface.co/facebook/mms-tts-fon) project.

	Fon is a Gbe language spoken in Benin and Togo. This demo allows you to input Fon text and hear the synthesized audio output.

	---

	### 🔧 How to Use:
	1. Type a sentence in Fon (Latin script, tone markers optional).
	2. Press Submit or hit Enter.
	3. Wait a few seconds for audio synthesis.
	4. Listen or download the audio.

	---

	### 📜 Rules & Notes:
	1. Input should be in Fon only (avoid English or other languages).
	2. You may enter as much text as you want, but long inputs may slow processing. Short to medium sentences are recommended.
	3. Use correct Unicode characters (ɛ, ɔ, etc.) if tones are important.
	4. Tone marks like `à, é, ǒ, ê` are supported but optional.
	5. Output uses a single female voice (pretrained by Meta).
	6. Audio is generated at the model’s default sampling rate (may vary by version).
	7. Model is intended for research and demonstration only.
	8. Do not use for commercial purposes without permission.
	9. Underlying model licensed under CC-BY-NC 4.0.
	10. Please be respectful — offensive or inappropriate input is not allowed.

	---

	✨ Powered by Meta AI's MMS-TTS and Hugging Face 🤗
	"""

	# Gradio interface
	iface = gr.Interface(
	fn=tts_fon,
	inputs=gr.Textbox(label="Enter Fon text here", placeholder="e.g. Fɔ̀ngbè sɔ̀ wá kpɔ́ nù.", lines=3),
	outputs=gr.Audio(label="Synthesized Fon Speech", type="filepath"),
	title=title,
	description=description,
	theme="default"
	)

	iface.launch()