Spaces:

Younup
/

UpVoice

Sleeping

App Files Files Community

UpVoice / src /audio_generation.py

breddaz

fix output folder permissions

8db2fc1 11 months ago

raw

history blame contribute delete

5.62 kB

	from dataclasses import dataclass
	from typing import List, Tuple
	import os
	from openai import OpenAI
	import tempfile
	from pydub import AudioSegment
	import base64
	from pathlib import Path
	from script_generation import PodScript

	output_root = Path(tempfile.gettempdir()) / "upvoice_outputs"
	output_root.mkdir(parents=True, exist_ok=True)


	@dataclass
	class AudioConfig:
	output_path: str = str(output_root)
	api_key: str = os.getenv("OPENAI_API_KEY", "")
	model: str = "gpt-4o-mini-tts"
	mocked: bool = False

	class ScriptToAudio:
	def __init__(self, config: AudioConfig):
	self._config = config
	self._llm_client = OpenAI(api_key=config.api_key)

	async def _text_to_speech(self, script: PodScript, voice_1: str, voice_2: str) -> Tuple[List[str], str]:
	"""Converts the text in the conversation JSON to speech using the specified voices.
	Args:
	conversation_json (Dict): The conversation JSON containing the text to be converted.
	voice_1 (str): The voice for the first speaker.
	voice_2 (str): The voice for the second speaker.
	Returns:
	Tuple[List[str], str]: A tuple containing a list of filenames of the generated audio files and the output directory.
	Raises:
	ValueError: If the conversation JSON is empty or if the voices are invalid.
	RuntimeError: If the text-to-speech conversion fails.
	"""
	output_dir = Path(self._create_output_directory())
	filenames = []

	try:
	for i, line in enumerate(script.conversation):
	filename = output_dir / f"output_{i}.wav"
	voice = voice_1 if i % 2 == 0 else voice_2

	tmp_path= await self._generate_audio(line.text, voice)
	os.rename(tmp_path, filename)
	filenames.append(str(filename))

	return filenames, str(output_dir)
	except Exception as e:
	raise RuntimeError(f"Failed to convert text to speech: {e}")


	async def _generate_audio(self, text: str, voice: str) -> str:
	"""Generates audio from the given text using the specified voice.
	Args:
	text (str): The input text to be converted to audio.
	voice (str): The voice to be used for the audio generation.
	Returns:
	str: Astring containing the filename of the generated audio file.
	Raises:
	RuntimeError: If the audio generation fails.
	"""
	try:
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:

	response = self._llm_client.audio.speech.create(
	model=self._config.model,
	voice=voice.lower(),
	input=text,
	response_format="wav"
	)
	tmp_path = tmp_file.name
	with open(tmp_path, "wb") as f:
	f.write(response.content)
	""" with self._llm_client.audio.speech.with_streaming_response.create(
	model=self._config.model,
	voice=voice.lower(),
	input=text,
	) as response:
	response.stream_to_file(tmp_path) """

	return tmp_path
	except Exception as e:
	raise RuntimeError(f"Failed to generate audio: {e}")



	def _create_output_directory(self) -> str:
	"""Creates a unique output directory for the generated audio files.
	Returns:
	str: The name of the created output directory.
	Raises:
	RuntimeError: If the directory creation fails.
	"""
	random_bytes = os.urandom(8)
	folder_name = base64.urlsafe_b64encode(random_bytes).decode("utf-8")
	final_folder = Path(self._config.output_path) / folder_name
	final_folder.mkdir(parents=True, exist_ok=True)
	return str(final_folder)

	def _combine_audio_files(self, filenames: List[str], output_file: str) -> None:
	"""Combines multiple audio files into a single WAV file.
	Args:
	filenames (List[str]): A list of filenames of the audio files to be combined.
	output_file (str): The name of the output WAV file.
	Raises:
	ValueError: If the input filenames list is empty.
	RuntimeError: If the audio file combination fails.
	"""
	if not filenames:
	raise ValueError("No input files provided")

	try:
	audio_segments = []

	for filename in filenames:
	audio_segment = AudioSegment.from_mp3(filename)
	audio_segments.append(audio_segment)

	combined = sum(audio_segments, AudioSegment.empty())

	combined.export(output_file, format="wav")

	for filename in filenames:
	os.remove(filename)

	except Exception as e:
	raise RuntimeError(f"Failed to combine audio files: {e}")

	async def run(self, script:PodScript ,voice_1: str, voice_2: str) -> str:
	if self._config.mocked:
	return './src/public/articles/mocked_java9_openai_tts.wav'

	try:
	audio_files, folder_name = await self._text_to_speech(
	script, voice_1, voice_2
	)
	final_output = os.path.join(folder_name, "combined_output.wav")
	self._combine_audio_files(audio_files, final_output)
	return final_output
	except Exception as e:
	raise RuntimeError(f"Failed to convert article to podcast: {e}")