aghilsabu commited on
Commit
c0c9f39
·
1 Parent(s): 4703744

feat: add ElevenLabs voice synthesis integration

Browse files
src/integrations/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ """CodeAtlas Integrations"""
2
+
3
+ from .elevenlabs import VoiceNarrator
4
+ from .voice import generate_audio_summary
5
+ from .modal_client import ModalClient, get_modal_client
6
+
7
+ __all__ = ["VoiceNarrator", "generate_audio_summary", "ModalClient", "get_modal_client"]
src/integrations/elevenlabs.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ElevenLabs Voice Integration"""
2
+
3
+ import logging
4
+ from datetime import datetime
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+ from typing import Optional, Tuple
8
+
9
+ from ..config import get_config, AUDIOS_DIR
10
+
11
+ logger = logging.getLogger("codeatlas.elevenlabs")
12
+
13
+ try:
14
+ from elevenlabs.client import ElevenLabs
15
+ ELEVENLABS_AVAILABLE = True
16
+ except ImportError:
17
+ ELEVENLABS_AVAILABLE = False
18
+
19
+
20
+ @dataclass
21
+ class VoiceConfig:
22
+ voice_id: str = "JBFqnCBsd6RMkjVDRZzb"
23
+ model_id: str = "eleven_multilingual_v2"
24
+ output_format: str = "mp3_44100_128"
25
+
26
+
27
+ AVAILABLE_VOICES = {
28
+ "George (Male)": "JBFqnCBsd6RMkjVDRZzb",
29
+ "Rachel (Female)": "21m00Tcm4TlvDq8ikWAM",
30
+ "Adam (Male)": "pNInz6obpgDQGcFmaJgB",
31
+ "Bella (Female)": "EXAVITQu4vr4xnSDxMaL",
32
+ "Antoni (Male)": "ErXwobaYiN019PkySvjV",
33
+ }
34
+
35
+
36
+ class VoiceNarrator:
37
+ def __init__(self, api_key: Optional[str] = None, voice_config: Optional[VoiceConfig] = None):
38
+ self.config = get_config()
39
+ self.api_key = api_key or self.config.elevenlabs_api_key
40
+ self.voice_config = voice_config or VoiceConfig()
41
+ self.audios_dir = AUDIOS_DIR
42
+ self._client = None
43
+
44
+ @property
45
+ def available(self) -> bool:
46
+ return ELEVENLABS_AVAILABLE and bool(self.api_key)
47
+
48
+ @property
49
+ def client(self):
50
+ if self._client is None and self.available:
51
+ self._client = ElevenLabs(api_key=self.api_key)
52
+ return self._client
53
+
54
+ def generate(self, text: str, voice_id: Optional[str] = None) -> Tuple[Optional[Path], Optional[str]]:
55
+ if not ELEVENLABS_AVAILABLE:
56
+ return None, "ElevenLabs not installed. Run: pip install elevenlabs"
57
+ if not self.api_key:
58
+ return None, "ElevenLabs API key not configured"
59
+ if not text or not text.strip():
60
+ return None, "No text provided"
61
+
62
+ try:
63
+ audio = self.client.text_to_speech.convert(
64
+ text=text,
65
+ voice_id=voice_id or self.voice_config.voice_id,
66
+ model_id=self.voice_config.model_id,
67
+ output_format=self.voice_config.output_format,
68
+ )
69
+
70
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
71
+ audio_path = self.audios_dir / f"summary_{timestamp}.mp3"
72
+
73
+ with open(audio_path, "wb") as f:
74
+ for chunk in audio:
75
+ f.write(chunk)
76
+
77
+ logger.info(f"Generated audio: {audio_path}")
78
+ return audio_path, None
79
+
80
+ except Exception as e:
81
+ logger.exception("Audio generation failed")
82
+ return None, f"Error: {str(e)}"
83
+
84
+ def get_voices(self) -> dict:
85
+ return AVAILABLE_VOICES
src/integrations/voice.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Voice Module - AI description + ElevenLabs TTS"""
2
+
3
+ import logging
4
+ from typing import Tuple, Optional
5
+ from pathlib import Path
6
+
7
+ from ..config import get_config
8
+ from ..core.analyzer import CodeAnalyzer
9
+ from .elevenlabs import VoiceNarrator
10
+
11
+ logger = logging.getLogger("codeatlas.voice")
12
+
13
+ NARRATION_PROMPT = """Analyze this architecture diagram and provide a brief, conversational summary suitable for audio narration.
14
+ Keep it under 200 words. Focus on what the codebase does, key components and their relationships, and the overall architecture pattern.
15
+ Provide a natural, spoken summary (no bullet points, no markdown)."""
16
+
17
+
18
+ def generate_audio_summary(
19
+ dot_source: str,
20
+ gemini_api_key: Optional[str] = None,
21
+ elevenlabs_api_key: Optional[str] = None,
22
+ model_name: Optional[str] = None,
23
+ voice_id: Optional[str] = None,
24
+ ) -> Tuple[Optional[Path], str]:
25
+ config = get_config()
26
+ gemini_key = gemini_api_key or config.gemini_api_key
27
+ elevenlabs_key = elevenlabs_api_key or config.elevenlabs_api_key
28
+
29
+ if not elevenlabs_key:
30
+ return None, "⚠️ ElevenLabs API key not set. Go to Settings."
31
+ if not gemini_key:
32
+ return None, "⚠️ Gemini API key not set. Go to Settings."
33
+ if not dot_source:
34
+ return None, "⚠️ No diagram loaded. Generate or load a diagram first."
35
+
36
+ try:
37
+ logger.info("Generating description for audio...")
38
+ analyzer = CodeAnalyzer(api_key=gemini_key, model_name="gemini-2.0-flash")
39
+
40
+ prompt = f"{NARRATION_PROMPT}\n\nDOT diagram:\n```\n{dot_source}\n```"
41
+ result = analyzer.chat(prompt, "", None)
42
+
43
+ if not result.success or not result.content:
44
+ return None, f"⚠️ Failed to generate description: {result.error or 'Empty response'}"
45
+
46
+ logger.info(f"Generated description: {len(result.content)} chars")
47
+
48
+ narrator = VoiceNarrator(api_key=elevenlabs_key)
49
+ if not narrator.available:
50
+ return None, "⚠️ ElevenLabs not available"
51
+
52
+ audio_path, error = narrator.generate(result.content, voice_id)
53
+
54
+ if error:
55
+ return None, f"❌ {error}"
56
+
57
+ return audio_path, "✅ Audio generated!"
58
+
59
+ except Exception as e:
60
+ logger.exception("Audio generation failed")
61
+ return None, f"❌ Error: {str(e)}"