Spaces:

MCP-1st-Birthday
/

codeAtlas

Sleeping

App Files Files Community

aghilsabu commited on Nov 30, 2025

Commit

c0c9f39

1 Parent(s): 4703744

feat: add ElevenLabs voice synthesis integration

Browse files

Files changed (3) hide show

src/integrations/__init__.py +7 -0
src/integrations/elevenlabs.py +85 -0
src/integrations/voice.py +61 -0

src/integrations/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+"""CodeAtlas Integrations"""
+from .elevenlabs import VoiceNarrator
+from .voice import generate_audio_summary
+from .modal_client import ModalClient, get_modal_client
+__all__ = ["VoiceNarrator", "generate_audio_summary", "ModalClient", "get_modal_client"]

src/integrations/elevenlabs.py ADDED Viewed

	@@ -0,0 +1,85 @@

+"""ElevenLabs Voice Integration"""
+import logging
+from datetime import datetime
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional, Tuple
+from ..config import get_config, AUDIOS_DIR
+logger = logging.getLogger("codeatlas.elevenlabs")
+try:
+    from elevenlabs.client import ElevenLabs
+    ELEVENLABS_AVAILABLE = True
+except ImportError:
+    ELEVENLABS_AVAILABLE = False
+@dataclass
+class VoiceConfig:
+    voice_id: str = "JBFqnCBsd6RMkjVDRZzb"
+    model_id: str = "eleven_multilingual_v2"
+    output_format: str = "mp3_44100_128"
+AVAILABLE_VOICES = {
+    "George (Male)": "JBFqnCBsd6RMkjVDRZzb",
+    "Rachel (Female)": "21m00Tcm4TlvDq8ikWAM",
+    "Adam (Male)": "pNInz6obpgDQGcFmaJgB",
+    "Bella (Female)": "EXAVITQu4vr4xnSDxMaL",
+    "Antoni (Male)": "ErXwobaYiN019PkySvjV",
+}
+class VoiceNarrator:
+    def __init__(self, api_key: Optional[str] = None, voice_config: Optional[VoiceConfig] = None):
+        self.config = get_config()
+        self.api_key = api_key or self.config.elevenlabs_api_key
+        self.voice_config = voice_config or VoiceConfig()
+        self.audios_dir = AUDIOS_DIR
+        self._client = None
+    @property
+    def available(self) -> bool:
+        return ELEVENLABS_AVAILABLE and bool(self.api_key)
+    @property
+    def client(self):
+        if self._client is None and self.available:
+            self._client = ElevenLabs(api_key=self.api_key)
+        return self._client
+    def generate(self, text: str, voice_id: Optional[str] = None) -> Tuple[Optional[Path], Optional[str]]:
+        if not ELEVENLABS_AVAILABLE:
+            return None, "ElevenLabs not installed. Run: pip install elevenlabs"
+        if not self.api_key:
+            return None, "ElevenLabs API key not configured"
+        if not text or not text.strip():
+            return None, "No text provided"
+        try:
+            audio = self.client.text_to_speech.convert(
+                text=text,
+                voice_id=voice_id or self.voice_config.voice_id,
+                model_id=self.voice_config.model_id,
+                output_format=self.voice_config.output_format,
+            )
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            audio_path = self.audios_dir / f"summary_{timestamp}.mp3"
+            with open(audio_path, "wb") as f:
+                for chunk in audio:
+                    f.write(chunk)
+            logger.info(f"Generated audio: {audio_path}")
+            return audio_path, None
+        except Exception as e:
+            logger.exception("Audio generation failed")
+            return None, f"Error: {str(e)}"
+    def get_voices(self) -> dict:
+        return AVAILABLE_VOICES

src/integrations/voice.py ADDED Viewed

	@@ -0,0 +1,61 @@

+"""Voice Module - AI description + ElevenLabs TTS"""
+import logging
+from typing import Tuple, Optional
+from pathlib import Path
+from ..config import get_config
+from ..core.analyzer import CodeAnalyzer
+from .elevenlabs import VoiceNarrator
+logger = logging.getLogger("codeatlas.voice")
+NARRATION_PROMPT = """Analyze this architecture diagram and provide a brief, conversational summary suitable for audio narration.
+Keep it under 200 words. Focus on what the codebase does, key components and their relationships, and the overall architecture pattern.
+Provide a natural, spoken summary (no bullet points, no markdown)."""
+def generate_audio_summary(
+    dot_source: str,
+    gemini_api_key: Optional[str] = None,
+    elevenlabs_api_key: Optional[str] = None,
+    model_name: Optional[str] = None,
+    voice_id: Optional[str] = None,
+) -> Tuple[Optional[Path], str]:
+    config = get_config()
+    gemini_key = gemini_api_key or config.gemini_api_key
+    elevenlabs_key = elevenlabs_api_key or config.elevenlabs_api_key
+    if not elevenlabs_key:
+        return None, "⚠️ ElevenLabs API key not set. Go to Settings."
+    if not gemini_key:
+        return None, "⚠️ Gemini API key not set. Go to Settings."
+    if not dot_source:
+        return None, "⚠️ No diagram loaded. Generate or load a diagram first."
+    try:
+        logger.info("Generating description for audio...")
+        analyzer = CodeAnalyzer(api_key=gemini_key, model_name="gemini-2.0-flash")
+        prompt = f"{NARRATION_PROMPT}\n\nDOT diagram:\n```\n{dot_source}\n```"
+        result = analyzer.chat(prompt, "", None)
+        if not result.success or not result.content:
+            return None, f"⚠️ Failed to generate description: {result.error or 'Empty response'}"
+        logger.info(f"Generated description: {len(result.content)} chars")
+        narrator = VoiceNarrator(api_key=elevenlabs_key)
+        if not narrator.available:
+            return None, "⚠️ ElevenLabs not available"
+        audio_path, error = narrator.generate(result.content, voice_id)
+        if error:
+            return None, f"❌ {error}"
+        return audio_path, "✅ Audio generated!"
+    except Exception as e:
+        logger.exception("Audio generation failed")
+        return None, f"❌ Error: {str(e)}"