Spaces:

Robobyte
/

VoxMorph

Runtime error

App Files Files Community

VoxMorph / app.py

Robobyte

Update app.py

e1bcdd0 verified 20 days ago

raw

history blame contribute delete

3.09 kB

	import gradio as gr
	import torch
	import soundfile as sf
	import tempfile
	import os
	import traceback

	from io import BytesIO
	from zipvoice.luxvoice import LuxTTS

	# -----------------------------
	# ENV OPTIMIZATION
	# -----------------------------
	os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
	os.environ["TOKENIZERS_PARALLELISM"] = "false"

	# -----------------------------
	# LOAD MODEL ON START
	# -----------------------------
	device = "cuda" if torch.cuda.is_available() else "cpu"
	lux_tts = LuxTTS("YatharthS/LuxTTS", device=device)

	# -----------------------------
	# AUDIO HANDLING
	# -----------------------------
	def prepare_audio(file_obj):
	if isinstance(file_obj, str):
	return file_obj

	data = file_obj.read()
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
	f.write(data)
	return f.name

	# -----------------------------
	# MAIN FUNCTION
	# -----------------------------
	def generate_speech(
	ref_audio_file,
	reference_transcript,
	text,
	rms=0.01,
	t_shift=0.9,
	num_steps=4,
	speed=1.0,
	ref_duration=6.0
	):
	prompt_path = None

	try:
	if not ref_audio_file:
	return None, "No reference audio"

	prompt_path = prepare_audio(ref_audio_file)

	encoded = lux_tts.encode_prompt(
	prompt_path,
	duration=ref_duration,
	rms=rms,
	prompt_text=reference_transcript.strip() or None
	)

	audio = lux_tts.generate_speech(
	text,
	encoded,
	num_steps=num_steps,
	t_shift=t_shift,
	speed=speed,
	return_smooth=False
	).cpu().numpy().squeeze()

	return (48000, audio), "Success"

	except Exception:
	return None, traceback.format_exc()

	finally:
	if prompt_path and os.path.exists(prompt_path) and not isinstance(ref_audio_file, str):
	try:
	os.remove(prompt_path)
	except:
	pass

	# -----------------------------
	# UI
	# -----------------------------
	with gr.Blocks(title="LuxTTS Voice Cloning") as demo:
	gr.Markdown("# 🎤 LuxTTS Voice Cloning")

	with gr.Row():
	with gr.Column():
	ref_audio = gr.Audio(type="filepath", label="Reference Audio")
	ref_text = gr.Textbox(label="Reference Transcript")

	with gr.Column():
	text = gr.Textbox(lines=5, label="Text to Generate")

	with gr.Accordion("Advanced", open=False):
	rms = gr.Slider(0.001, 0.05, value=0.01)
	t_shift = gr.Slider(0.1, 1.5, value=0.9)
	steps = gr.Slider(1, 10, value=4, step=1)
	speed = gr.Slider(0.5, 2.0, value=1.0)
	duration = gr.Slider(1.0, 20.0, value=6.0)

	btn = gr.Button("Generate")
	out_audio = gr.Audio(type="numpy")
	status = gr.Textbox()

	btn.click(
	generate_speech,
	inputs=[ref_audio, ref_text, text, rms, t_shift, steps, speed, duration],
	outputs=[out_audio, status]
	)

	demo.launch()