Commit ·
a7c6f3a
1
Parent(s): 662decd
fix: Correct TTS endpoint and payload format, filter .mp4 only
Browse files
modules/text_story/services/tts_handler.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
"""
|
| 2 |
TTS Handler for Text Story module.
|
| 3 |
-
Handles voice generation and audio processing.
|
| 4 |
"""
|
| 5 |
|
| 6 |
import os
|
|
@@ -21,6 +21,10 @@ class TTSHandler:
|
|
| 21 |
self.tts_url = os.getenv("HF_TTS", "")
|
| 22 |
if not self.tts_url:
|
| 23 |
logger.warning("TTSHandler: HF_TTS not configured, TTS will fail")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
async def generate_tts(self, text: str, voice: str, output_path: str) -> float:
|
| 26 |
"""
|
|
@@ -37,23 +41,32 @@ class TTSHandler:
|
|
| 37 |
if not self.tts_url:
|
| 38 |
raise ValueError("HF_TTS environment variable not set")
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
try:
|
| 41 |
async with aiohttp.ClientSession() as session:
|
|
|
|
| 42 |
payload = {
|
| 43 |
-
"
|
|
|
|
| 44 |
"voice": voice
|
| 45 |
}
|
| 46 |
|
| 47 |
async with session.post(
|
| 48 |
-
|
| 49 |
json=payload,
|
| 50 |
-
|
|
|
|
| 51 |
) as response:
|
| 52 |
if response.status != 200:
|
| 53 |
error_text = await response.text()
|
| 54 |
-
raise Exception(f"TTS
|
| 55 |
|
| 56 |
audio_data = await response.read()
|
|
|
|
| 57 |
|
| 58 |
# Save raw audio
|
| 59 |
temp_path = output_path + ".temp.wav"
|
|
@@ -70,8 +83,11 @@ class TTSHandler:
|
|
| 70 |
logger.info(f"TTS: Generated {len(text)} chars, {duration:.2f}s")
|
| 71 |
return duration
|
| 72 |
|
|
|
|
|
|
|
|
|
|
| 73 |
except Exception as e:
|
| 74 |
-
logger.error(f"TTS generation failed: {e}")
|
| 75 |
raise
|
| 76 |
|
| 77 |
def trim_silence(self, input_path: str, output_path: str,
|
|
|
|
| 1 |
"""
|
| 2 |
TTS Handler for Text Story module.
|
| 3 |
+
Handles voice generation and audio processing using Kokoro TTS.
|
| 4 |
"""
|
| 5 |
|
| 6 |
import os
|
|
|
|
| 21 |
self.tts_url = os.getenv("HF_TTS", "")
|
| 22 |
if not self.tts_url:
|
| 23 |
logger.warning("TTSHandler: HF_TTS not configured, TTS will fail")
|
| 24 |
+
else:
|
| 25 |
+
# Remove trailing slash
|
| 26 |
+
self.tts_url = self.tts_url.rstrip('/')
|
| 27 |
+
logger.info(f"TTSHandler: Using TTS endpoint {self.tts_url}")
|
| 28 |
|
| 29 |
async def generate_tts(self, text: str, voice: str, output_path: str) -> float:
|
| 30 |
"""
|
|
|
|
| 41 |
if not self.tts_url:
|
| 42 |
raise ValueError("HF_TTS environment variable not set")
|
| 43 |
|
| 44 |
+
# Correct endpoint format (same as video_creator)
|
| 45 |
+
endpoint = f"{self.tts_url}/v1/audio/speech"
|
| 46 |
+
|
| 47 |
+
logger.info(f"TTS: Generating voice '{voice}' for: {text[:50]}...")
|
| 48 |
+
|
| 49 |
try:
|
| 50 |
async with aiohttp.ClientSession() as session:
|
| 51 |
+
# Correct payload format for Kokoro TTS
|
| 52 |
payload = {
|
| 53 |
+
"model": "kokoro",
|
| 54 |
+
"input": text,
|
| 55 |
"voice": voice
|
| 56 |
}
|
| 57 |
|
| 58 |
async with session.post(
|
| 59 |
+
endpoint,
|
| 60 |
json=payload,
|
| 61 |
+
headers={"Content-Type": "application/json"},
|
| 62 |
+
timeout=aiohttp.ClientTimeout(total=120)
|
| 63 |
) as response:
|
| 64 |
if response.status != 200:
|
| 65 |
error_text = await response.text()
|
| 66 |
+
raise Exception(f"TTS API error ({response.status}): {error_text}")
|
| 67 |
|
| 68 |
audio_data = await response.read()
|
| 69 |
+
logger.info(f"TTS: Received {len(audio_data)} bytes")
|
| 70 |
|
| 71 |
# Save raw audio
|
| 72 |
temp_path = output_path + ".temp.wav"
|
|
|
|
| 83 |
logger.info(f"TTS: Generated {len(text)} chars, {duration:.2f}s")
|
| 84 |
return duration
|
| 85 |
|
| 86 |
+
except aiohttp.ClientError as e:
|
| 87 |
+
logger.error(f"TTS network error: {type(e).__name__}: {e}")
|
| 88 |
+
raise Exception(f"TTS network error: {e}")
|
| 89 |
except Exception as e:
|
| 90 |
+
logger.error(f"TTS generation failed: {type(e).__name__}: {e}")
|
| 91 |
raise
|
| 92 |
|
| 93 |
def trim_silence(self, input_path: str, output_path: str,
|