duyv
/

API_MC_AI

Model card Files Files and versions

API_MC_AI / VietTTS /text_to_speech.py

duyv's picture

Upload text_to_speech.py

17cf0ef verified 8 months ago

history blame contribute delete

1.36 kB

	import sys
	import os
	import base64
	from tts import TTS
	from utils.file_utils import load_prompt_speech_from_file, load_voices
	from app.config import settings
	from datetime import datetime


	def load_model(
	speed,
	voice,
	text,
	output_path,
	output_format="wav",
	):
	print("Loading TTS model...", settings.DIR_ROOT)
	tts_obj = TTS(model_dir=os.path.join(settings.DIR_ROOT, "VietTTS", "models"))
	VOICE_MAP = load_voices(os.path.join(settings.DIR_ROOT, "VietTTS", "samples"))

	speed = float(speed)

	if voice.isdigit():
	voice_file = list(VOICE_MAP.values())[int(voice)]
	else:
	voice_file = VOICE_MAP.get(voice)

	if not voice_file or not os.path.exists(voice_file):
	raise ValueError("Voice file not found")

	print(f"Output path: {output_path}")

	prompt_speech_16k = load_prompt_speech_from_file(filepath=voice_file, min_duration=3, max_duration=10)

	tts_obj.tts_to_file(text=text, prompt_speech_16k=prompt_speech_16k, output_path=output_path, speed=speed)

	print("END TTS worker")


	if __name__ == "__main__":
	speed = sys.argv[1]
	voice = sys.argv[2]
	text = sys.argv[3]
	output_path = sys.argv[4]
	output_format = sys.argv[5] if len(sys.argv) > 5 else "wav"

	load_model(speed, voice, text, output_path, output_format)