File size: 10,956 Bytes
25e6afd
 
 
 
 
54b5712
 
 
25e6afd
 
 
 
 
 
 
 
45e9602
 
 
25e6afd
 
45e9602
 
25e6afd
45e9602
 
25e6afd
 
 
 
 
 
 
 
 
 
 
 
 
 
45e9602
 
 
 
 
 
 
 
25e6afd
 
 
 
 
 
 
 
 
 
 
45e9602
 
25e6afd
 
 
 
 
54b5712
 
 
 
 
 
 
 
25e6afd
 
54b5712
 
 
 
 
 
 
 
 
 
25e6afd
54b5712
 
 
 
 
 
 
25e6afd
54b5712
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25e6afd
 
54b5712
 
 
 
25e6afd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54b5712
 
 
 
 
45e9602
 
54b5712
 
 
 
 
 
 
 
45e9602
 
54b5712
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25e6afd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
import asyncio
import edge_tts
import pygame
import os
import logging
import shutil
import subprocess
from typing import List, Dict, Optional

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger("EdgeTTS")

class EdgeTextToSpeech:
    def __init__(self):
        """Initialize Microsoft Edge TTS."""
        # Always set a default voice regardless of audio device availability
        self.current_voice = "en-US-AriaNeural"
        self.mixer_available = False
        try:
            pygame.mixer.init(frequency=22050, size=-16, channels=2, buffer=512)
            self.mixer_available = True
            logger.info("Edge TTS engine initialized successfully (pygame mixer ready)")
        except Exception as e:
            # In containers there is no audio device; this is expected. We can still save audio files.
            logger.warning(f"Pygame mixer not available (no audio device). File generation will still work. Details: {e}")

    async def speak_async(self, text: str, voice: str = None):
        """Convert text to speech using Edge TTS (async)."""
        try:
            voice_to_use = voice or self.current_voice
            logger.info(f"Speaking with {voice_to_use}: {text}")
            
            # Create TTS communication
            communicate = edge_tts.Communicate(text, voice_to_use)
            
            # Save to temporary file in current directory
            temp_filename = f"temp_edge_audio_{hash(text) % 10000}.mp3"
            await communicate.save(temp_filename)
            
            # Play the audio file if a mixer is available (local/dev only)
            if self.mixer_available and pygame.mixer.get_init():
                pygame.mixer.music.load(temp_filename)
                pygame.mixer.music.play()
                
                # Wait for playback to finish
                while pygame.mixer.music.get_busy():
                    await asyncio.sleep(0.1)
            
            # Clean up temporary file
            try:
                os.remove(temp_filename)
            except:
                pass
                
            return True
            
        except Exception as e:
            logger.error(f"Error in Edge TTS: {e}")
            # In server/container contexts, skip local playback fallback.
            return False

    def speak(self, text: str, voice: str = None):
        """Synchronous wrapper for speak_async."""
        return asyncio.run(self.speak_async(text, voice))

    async def save_audio_async(self, text: str, filename: str, voice: str = None) -> Optional[str]:
        """Save text-to-speech audio to a file and return the actual saved path.

        Primary: Edge TTS (mp3). Fallback: pyttsx3 -> wav, then convert to mp3 if ffmpeg is available.
        Returns:
            str: Absolute path to the saved file (mp3 or wav) on success.
            None: on failure.
        """
        try:
            voice_to_use = voice or self.current_voice

            # Ensure target directory exists
            os.makedirs(os.path.dirname(filename), exist_ok=True)

            # Prefer mp3 when using Edge TTS
            target_path = filename
            if not target_path.lower().endswith('.mp3'):
                target_path = f"{os.path.splitext(filename)[0]}.mp3"

            # Try Edge TTS first
            communicate = edge_tts.Communicate(text, voice_to_use)
            await communicate.save(target_path)
            # Ensure file is fully written
            self._wait_for_file(target_path)

            logger.info(f"Audio saved to: {target_path}")
            return target_path

        except Exception as e:
            logger.error(f"Error saving audio with Edge TTS: {e}")

            # Fallback to local TTS: save WAV via pyttsx3
            try:
                wav_path = f"{os.path.splitext(filename)[0]}.wav"
                await self._fallback_save_wav_async(text, wav_path)
                self._wait_for_file(wav_path)

                # Convert to mp3 if ffmpeg exists
                if self._has_ffmpeg():
                    mp3_path = f"{os.path.splitext(filename)[0]}.mp3"
                    self._ffmpeg_wav_to_mp3(wav_path, mp3_path)
                    self._wait_for_file(mp3_path)
                    # Remove the intermediate wav
                    try:
                        os.remove(wav_path)
                    except Exception:
                        pass
                    logger.info(f"Audio saved to: {mp3_path} (fallback via ffmpeg)")
                    return mp3_path

                # If no ffmpeg, keep WAV
                logger.info(f"Audio saved to: {wav_path} (fallback WAV; ffmpeg not found)")
                return wav_path

            except Exception as fe:
                logger.error(f"Fallback TTS save failed: {fe}")
                return None

    def save_audio(self, text: str, filename: str, voice: str = None):
        """Synchronous wrapper for save_audio_async.

        Returns the actual path to the saved file or None.
        """
        return asyncio.run(self.save_audio_async(text, filename, voice))

    async def get_available_voices(self) -> List[Dict]:
        """Get list of available voices."""
        try:
            voices = await edge_tts.list_voices()
            return voices
        except Exception as e:
            logger.error(f"Error getting voices: {e}")
            return []

    def list_voices(self):
        """List available voices (synchronous)."""
        voices = asyncio.run(self.get_available_voices())
        
        print("\nAvailable voices:")
        print("-" * 50)
        
        # Group by language
        lang_groups = {}
        for voice in voices:
            lang = voice['Locale']
            if lang not in lang_groups:
                lang_groups[lang] = []
            lang_groups[lang].append(voice)
        
        # Show popular languages first
        priority_langs = ['en-US', 'en-GB', 'en-AU', 'es-ES', 'fr-FR', 'de-DE', 'it-IT', 'ja-JP']
        
        for lang in priority_langs:
            if lang in lang_groups:
                print(f"\n{lang}:")
                for voice in lang_groups[lang][:3]:  # Show first 3 voices per language
                    gender = voice.get('Gender', 'Unknown')
                    print(f"  {voice['ShortName']} - {voice['FriendlyName']} ({gender})")
        
        print(f"\n... and {len(voices)} total voices available")
        return voices

    async def _fallback_save_wav_async(self, text: str, wav_path: str):
        """Save speech to a WAV file using pyttsx3 in a background thread."""
        def _save():
            import pyttsx3
            engine = pyttsx3.init()
            # Prefer eSpeak NG in Linux containers
            # engine.setProperty('voice', 'english')  # optional
            # Optional: adjust rate/voice here if needed
            engine.save_to_file(text, wav_path)
            engine.runAndWait()

        await asyncio.to_thread(_save)

    async def _fallback_play_async(self, text: str):
        """Play speech locally using pyttsx3 (blocking in thread)."""
        # In container/server context, do not attempt local playback
        return

    def _has_ffmpeg(self) -> bool:
        return shutil.which("ffmpeg") is not None

    def _ffmpeg_wav_to_mp3(self, wav_path: str, mp3_path: str):
        cmd = [
            "ffmpeg", "-y",
            "-i", wav_path,
            "-vn",
            "-ar", "22050",
            "-ac", "2",
            "-b:a", "128k",
            mp3_path,
        ]
        try:
            subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        except Exception as e:
            logger.error(f"ffmpeg conversion failed: {e}")

    def _wait_for_file(self, path: str, timeout: float = 3.0):
        """Wait until a file exists and has non-zero size or timeout reached."""
        import time
        start = time.time()
        while time.time() - start < timeout:
            try:
                if os.path.exists(path) and os.path.getsize(path) > 0:
                    return
            except Exception:
                pass
            time.sleep(0.05)

    def run_interactive_mode(self):
        """Run interactive Edge TTS mode."""
        print("\n=== Microsoft Edge Text-to-Speech ===")
        print("Commands:")
        print("  Type text to speak it")
        print("  'voice <name>' - Change voice (e.g., 'voice en-US-JennyNeural')")
        print("  'voices' - List available voices")
        print("  'save <filename>' - Save last text to file")
        print("  'current' - Show current voice")
        print("  'quit' - Exit program")
        print("=" * 38)
        
        last_text = ""
        
        while True:
            try:
                user_input = input(f"\n[{self.current_voice}] Enter text: ").strip()
                
                if not user_input:
                    continue
                    
                if user_input.lower() == 'quit':
                    print("Goodbye!")
                    break
                elif user_input.lower() == 'voices':
                    self.list_voices()
                elif user_input.lower() == 'current':
                    print(f"Current voice: {self.current_voice}")
                elif user_input.lower().startswith('voice '):
                    new_voice = user_input[6:].strip()
                    if new_voice:
                        self.current_voice = new_voice
                        print(f"Voice changed to: {new_voice}")
                        # Test the new voice
                        self.speak("Voice changed successfully", new_voice)
                    else:
                        print("Please specify a voice name")
                elif user_input.lower().startswith('save '):
                    filename = user_input[5:].strip()
                    if last_text and filename:
                        self.save_audio(last_text, filename)
                    else:
                        print("No text to save or filename not provided")
                else:
                    # Speak the entered text
                    last_text = user_input
                    self.speak(user_input)
                    
            except KeyboardInterrupt:
                print("\nGoodbye!")
                break
            except Exception as e:
                logger.error(f"Error in interactive mode: {e}")

if __name__ == "__main__":
    # Install required packages first
    print("Make sure to install required packages:")
    print("pip install edge-tts pygame")
    print()
    
    # Create Edge TTS instance
    tts = EdgeTextToSpeech()
    
    # Test basic functionality
    tts.speak("Hello! Microsoft Edge text to speech is working perfectly.")
    
    # Run interactive mode
    tts.run_interactive_mode()