import aiohttp import struct import logging from typing import Tuple logger = logging.getLogger(__name__) class TTSClient: """Client for Kokoro TTS via Hugging Face Cloud API""" def __init__(self, api_url: str): """ Initialize TTS client Args: api_url: Base URL for the TTS API (HF_TTS environment variable) """ self.api_url = api_url.rstrip('/') logger.info(f"Using cloud TTS API at {self.api_url}") async def generate(self, text: str, voice: str, speed: float = 1.0) -> Tuple[bytes, float]: """ Generate speech from text Args: text: Text to convert to speech voice: Voice identifier (e.g., 'af_heart', 'am_adam') speed: Speech speed multiplier (0.5-2.0, default 1.0) Returns: Tuple of (audio_bytes, duration_seconds) """ endpoint = f"{self.api_url}/v1/audio/speech" logger.debug(f"Generating audio with voice={voice}, speed={speed}, text_length={len(text)}") async with aiohttp.ClientSession() as session: async with session.post( endpoint, json={ "model": "kokoro", "input": text, "voice": voice, "speed": speed }, headers={"Content-Type": "application/json"}, timeout=aiohttp.ClientTimeout(total=120) ) as response: if response.status != 200: error_text = await response.text() raise Exception(f"TTS API error ({response.status}): {error_text}") audio_data = await response.read() duration = self._estimate_audio_duration(audio_data) logger.debug(f"Generated audio: {len(audio_data)} bytes, {duration:.2f}s") return audio_data, duration def _estimate_audio_duration(self, audio_buffer: bytes) -> float: """ Estimate audio duration from WAV buffer WAV format: 44 byte header, then PCM data """ if len(audio_buffer) < 44: # Fallback estimation return (len(audio_buffer) - 44) / (2 * 24000) # Check if it's a valid WAV file (starts with 'RIFF') if audio_buffer[:4] != b'RIFF': # Fallback estimation return (len(audio_buffer) - 44) / (2 * 24000) try: # Parse WAV header # Data size at bytes 40-43 data_size = struct.unpack(' list: """Return list of available TTS voices""" return [ "af_heart", "af_alloy", "af_aoede", "af_bella", "af_jessica", "af_kore", "af_nicole", "af_nova", "af_river", "af_sarah", "af_sky", "am_adam", "am_echo", "am_eric", "am_fenrir", "am_liam", "am_michael", "am_onyx", "am_puck", "am_santa", "bf_emma", "bf_isabella", "bm_george", "bm_lewis", "bf_alice", "bf_lily", "bm_daniel", "bm_fable" ]