import io import base64 import json import os from pathlib import Path import gradio as gr import numpy as np import openai from fastrtc import ( AdditionalOutputs, ReplyOnStopWords, Stream, get_stt_model, get_twilio_turn_credentials, ) class SambanovaVoiceService: """Dịch vụ Sambanova AI với Voice Streaming hoàn chỉnh""" def __init__(self, tts_service=None): # Khởi tạo client Sambanova self.client = openai.OpenAI( api_key=os.environ.get("SAMBANOVA_API_KEY"), base_url="https://api.sambanova.ai/v1", ) # Khởi tạo STT model self.stt_model = get_stt_model() # Tích hợp TTS service self.tts_service = tts_service print("✅ Sambanova Voice Service initialized với TTS") def get_available_models(self): """Lấy danh sách model có sẵn""" return [ "Meta-Llama-3.1-8B-Instruct", "Meta-Llama-3.1-70B-Instruct" ] def generate_response(self, messages, model="Meta-Llama-3.1-8B-Instruct", temperature=0.1, top_p=0.1): """Generate response từ Sambanova API""" try: response = self.client.chat.completions.create( model=model, messages=messages, temperature=temperature, top_p=top_p, max_tokens=1024, ) return response.choices[0].message.content except Exception as e: print(f"❌ Sambanova API Error: {e}") return f"Xin lỗi, có lỗi xảy ra: {str(e)}" def stream_generate_response(self, messages, model="Meta-Llama-3.1-8B-Instruct", temperature=0.1, top_p=0.1): """Stream response từ Sambanova API""" try: response = self.client.chat.completions.create( model=model, messages=messages, temperature=temperature, top_p=top_p, max_tokens=1024, stream=True ) full_response = "" for chunk in response: if chunk.choices[0].delta.content: text_chunk = chunk.choices[0].delta.content full_response += text_chunk yield text_chunk, full_response except Exception as e: error_msg = f"❌ Lỗi: {str(e)}" yield error_msg, error_msg def speech_to_text(self, audio): """Chuyển speech thành text""" try: text = self.stt_model.stt(audio) print(f"🎤 STT Result: {text}") return text except Exception as e: print(f"❌ STT Error: {e}") return "" def text_to_speech(self, text, language='vi'): """Chuyển text thành speech sử dụng TTS service""" if self.tts_service is None: print("❌ TTS service chưa được khởi tạo") return None try: audio_bytes = self.tts_service.text_to_speech(text, language) if audio_bytes: # Lưu file tạm và trả về filepath filename = f"tts_{int(time.time())}.mp3" filepath = self.tts_service.save_tts_audio(audio_bytes, filename) return filepath return None except Exception as e: print(f"❌ TTS Error: {e}") return None def generate_response_with_voice(self, messages, model="Meta-Llama-3.1-8B-Instruct", language='vi'): """Generate response và chuyển thành voice""" try: # Lấy text response text_response = self.generate_response(messages, model) # Chuyển thành voice audio_filepath = self.text_to_speech(text_response, language) return { "text": text_response, "audio": audio_filepath, "audio_bytes": self.tts_service.text_to_speech(text_response, language) if audio_filepath else None } except Exception as e: print(f"❌ Error in generate_response_with_voice: {e}") return { "text": f"Xin lỗi, có lỗi xảy ra: {str(e)}", "audio": None, "audio_bytes": None }