Spaces:

farid678
/

TTSModel

Runtime error

App Files Files Community

TTSModel / app.py

farid678

Update app.py

540093e verified 4 months ago

raw

history blame contribute delete

2.73 kB

	import torch
	import numpy as np
	import gradio as gr
	from transformers import pipeline
	import logging
	from scipy.io.wavfile import write
	import uuid
	import os
	import warnings

	# -----------------------------
	# SUPPRESS WARNINGS
	# -----------------------------
	warnings.filterwarnings("ignore", category=FutureWarning)
	logging.getLogger("transformers").setLevel(logging.ERROR)

	# -----------------------------
	# DEVICE SETUP
	# -----------------------------
	device = 0 if torch.cuda.is_available() else -1

	# -----------------------------
	# PATH TO FINE-TUNED MODEL
	# -----------------------------
	model_dir = "./" # مسیر فایل‌های fine-tuned Orpheus در Space

	# -----------------------------
	# LOAD TTS PIPELINE
	# -----------------------------
	tts_pipe = pipeline(
	task="text-to-speech",
	model=model_dir,
	device=device
	)

	# -----------------------------
	# INFERENCE FUNCTION
	# -----------------------------
	def tts_generate(text):
	if not text.strip():
	return None

	# اجرای مدل TTS
	output = tts_pipe(text)

	if "audio" not in output:
	raise ValueError("TTS pipeline did not return audio")

	audio = np.array(output["audio"], dtype=np.float32)

	# sanitize audio to avoid RuntimeWarning
	audio = np.nan_to_num(audio) # convert NaN/Inf to 0
	audio = np.clip(audio, -1.0, 1.0) # limit values to [-1,1]

	# بررسی و مقدار پیش‌فرض sampling rate
	sr = output.get("sampling_rate") or 22050

	# تبدیل float32 به int16
	audio_int16 = (audio * 32767).astype(np.int16)

	# ساخت پوشه خروجی
	os.makedirs("outputs", exist_ok=True)
	out_path = f"outputs/{uuid.uuid4().hex}.wav"

	# ذخیره WAV
	write(out_path, sr, audio_int16)

	return out_path

	# -----------------------------
	# SAMPLE TEXTS
	# -----------------------------
	SAMPLES = [
	"Just end up crashing somewhere. <laugh> No, because remember last time?",
	"Hmm… I don't know. <laugh> This feels like a bad idea. <gasp>",
	"I'm so tired today <yawn> but I still have so much work to do.",
	]

	# -----------------------------
	# GRADIO INTERFACE
	# -----------------------------
	demo = gr.Interface(
	fn=tts_generate,
	inputs=gr.Textbox(
	label="Enter text (use expressive tags like <laugh>, <sigh>)",
	lines=5,
	placeholder=SAMPLES[0],
	),
	outputs=gr.Audio(type="filepath", label="Generated Audio"),
	title="Fine-tuned Orpheus-3B Expressive TTS",
	examples=[[s] for s in SAMPLES],
	)

	# -----------------------------
	# CLEAN RUN
	# -----------------------------
	if __name__ == "__main__":
	demo.launch(ssr_mode=False) # کاهش خطاهای asyncio / Invalid file descriptor