import logging import os import uuid from typing import Optional import httpx from fastapi import HTTPException, UploadFile logger = logging.getLogger(__name__) class ElevenLabsService: def __init__(self) -> None: self.api_key = os.getenv("ELEVENLABS_API_KEY") self.base_url = os.getenv("ELEVENLABS_BASE_URL", "https://api.elevenlabs.io/v1") self.model_id = os.getenv("ELEVENLABS_MODEL_ID", "eleven_multilingual_v2") self.timeout = float(os.getenv("ELEVENLABS_TIMEOUT_SECONDS", "40")) def _headers(self) -> dict: if not self.api_key: raise HTTPException( status_code=500, detail="ELEVENLABS_API_KEY is not configured", ) return {"xi-api-key": self.api_key} async def clone_voice(self, audio_file: UploadFile) -> str: """Clone a voice in ElevenLabs and return the generated voice_id.""" if audio_file is None: raise HTTPException(status_code=400, detail="speaker_wav is required") file_bytes = await audio_file.read() if not file_bytes: raise HTTPException(status_code=400, detail="speaker_wav is empty") voice_name = f"voiceapi-temp-{uuid.uuid4().hex[:10]}" files = { "files": ( audio_file.filename or "sample.wav", file_bytes, audio_file.content_type or "audio/wav", ) } data = { "name": voice_name, "description": "Temporary cloned voice from VoiceAPI session", } try: with httpx.Client(timeout=self.timeout) as client: response = client.post( f"{self.base_url}/voices/add", headers=self._headers(), data=data, files=files, ) if response.status_code >= 400: logger.error("ElevenLabs clone failed: %s", response.text) raise HTTPException( status_code=502, detail=f"Voice cloning failed: {response.text[:300]}", ) payload = response.json() voice_id = payload.get("voice_id") if not voice_id: raise HTTPException( status_code=502, detail="voice_id missing in clone response" ) return voice_id except httpx.TimeoutException: raise HTTPException(status_code=504, detail="Voice cloning timed out") except HTTPException: raise except Exception as exc: logger.exception("Unexpected clone error") raise HTTPException(status_code=500, detail=f"Clone request failed: {exc}") def generate_speech( self, text: str, voice_id: str, language: Optional[str] = None, output_format: str = "mp3_44100_128", ) -> bytes: """Generate speech bytes using ElevenLabs text-to-speech API.""" if not text.strip(): raise HTTPException(status_code=400, detail="text is required") if not voice_id: raise HTTPException(status_code=400, detail="voice_id is required") body = { "text": text, "model_id": self.model_id, "voice_settings": { "stability": 0.5, "similarity_boost": 0.75, }, } if language: body["language_code"] = language try: with httpx.Client(timeout=self.timeout) as client: response = client.post( f"{self.base_url}/text-to-speech/{voice_id}", params={"output_format": output_format}, headers={**self._headers(), "Accept": "audio/mpeg"}, json=body, ) if response.status_code >= 400: logger.error("ElevenLabs TTS failed: %s", response.text) raise HTTPException( status_code=502, detail=f"Speech generation failed: {response.text[:300]}", ) return response.content except httpx.TimeoutException: raise HTTPException(status_code=504, detail="Speech generation timed out") except HTTPException: raise except Exception as exc: logger.exception("Unexpected speech generation error") raise HTTPException( status_code=500, detail=f"Speech generation request failed: {exc}" )