| import sys |
| import re |
| import asyncio |
| from kokoro import KPipeline |
| import base_model |
| import utils |
|
|
| class StreamingEngine(base_model.BaseEngine): |
| def __init__(self, name): |
| |
| self.default_voice = "af_heart" |
| self.voice_mapping = { |
| "alloy": "af_heart", |
| "echo": "af_bella", |
| "fable": "af_nicole", |
| "onyx": "af_aoede", |
| "nova": "af_aoede", |
| "shimmer": "af_aoede" |
| } |
| |
| |
| super().__init__(name) |
|
|
| def load_model(self): |
| try: |
| self.tts = KPipeline(lang_code='a') |
| |
| self.sample_rate = 24000 |
| print(f"Model Loaded. Rate: {self.sample_rate}") |
| except Exception as e: |
| |
| print(f"Error initializing model {self.name}: {e}") |
| raise RuntimeError(f"Failed to load model {self.name}") from e |
|
|
| def get_style_safe(self, voice_name: str): |
| """ |
| Safely retrieves a voice style. |
| """ |
| |
| clean_name = voice_name.lower().strip() |
| target_name = self.voice_mapping.get(clean_name, self.default_voice) |
| print(f"Found voice {target_name}") |
| return target_name |
|
|
| def preprocess_text(self, text): |
| if not text: |
| return [] |
| return [text] |
| |
| def generate(self, chunks: str, voice_name: str, speed: float): |
| """ |
| Generates audio. |
| Returns: audio_float_array |
| """ |
| |
| |
| |
| generator = self.tts(chunks, voice=voice_name,speed=speed) |
| for i, (gs, ps, audio) in enumerate(generator): |
| yield audio.numpy() |