/** * TTS (Text-to-Speech) Provider Implementation * * Factory pattern for routing TTS requests to appropriate provider implementations. * Follows the same architecture as lib/ai/providers.ts for consistency. * * Currently Supported Providers: * - OpenAI TTS: https://platform.openai.com/docs/guides/text-to-speech * - Azure TTS: https://learn.microsoft.com/en-us/azure/ai-services/speech-service/text-to-speech * - GLM TTS: https://docs.bigmodel.cn/cn/guide/models/sound-and-video/glm-tts * - Qwen TTS: https://bailian.console.aliyun.com/ * - MiniMax TTS: https://platform.minimaxi.com/docs/api-reference/speech-t2a-http * - Doubao TTS: https://www.volcengine.com/docs/6561/1257543 * - ElevenLabs TTS: https://elevenlabs.io/docs/api-reference/text-to-speech/convert * - Browser Native: Web Speech API (client-side only) * * HOW TO ADD A NEW PROVIDER: * * 1. Add provider ID to TTSProviderId in lib/audio/types.ts * Example: | 'elevenlabs-tts' * * 2. Add provider configuration to lib/audio/constants.ts * Example: * 'elevenlabs-tts': { * id: 'elevenlabs-tts', * name: 'ElevenLabs', * requiresApiKey: true, * defaultBaseUrl: 'https://api.elevenlabs.io/v1', * icon: '/logos/elevenlabs.svg', * voices: [...], * supportedFormats: ['mp3', 'pcm'], * speedRange: { min: 0.5, max: 2.0, default: 1.0 } * } * * 3. Implement provider function in this file * Pattern: async function generateXxxTTS(config, text): Promise * - Validate config and build API request * - Handle API authentication (apiKey, headers) * - Convert provider-specific parameters (voice, speed, format) * - Return { audio: Uint8Array, format: string } * * Example: * async function generateElevenLabsTTS( * config: TTSModelConfig, * text: string * ): Promise { * const baseUrl = config.baseUrl || TTS_PROVIDERS['elevenlabs-tts'].defaultBaseUrl; * * const response = await fetch(`${baseUrl}/text-to-speech/${config.voice}`, { * method: 'POST', * headers: { * 'xi-api-key': config.apiKey!, * 'Content-Type': 'application/json', * }, * body: JSON.stringify({ * text, * model_id: 'eleven_multilingual_v2', * voice_settings: { * stability: 0.5, * similarity_boost: 0.75, * } * }), * }); * * if (!response.ok) { * throw new Error(`ElevenLabs TTS API error: ${response.statusText}`); * } * * const arrayBuffer = await response.arrayBuffer(); * return { * audio: new Uint8Array(arrayBuffer), * format: 'mp3', * }; * } * * 4. Add case to generateTTS() switch statement * case 'elevenlabs-tts': * return await generateElevenLabsTTS(config, text); * * 5. Add i18n translations in lib/i18n.ts * providerElevenLabsTTS: { zh: 'ElevenLabs TTS', en: 'ElevenLabs TTS' } * * Error Handling Patterns: * - Always validate API key if requiresApiKey is true * - Throw descriptive errors for API failures * - Include response.statusText or error messages from API * - For client-only providers (browser-native), throw error directing to client-side usage * * API Call Patterns: * - Direct API: Use fetch with appropriate headers and body format (recommended for better encoding support) * - SSML: For Azure-like providers requiring SSML markup * - URL-based: For providers returning audio URL (download in second step) */ import type { TTSModelConfig } from './types'; import { isCustomTTSProvider } from './types'; import { TTS_PROVIDERS } from './constants'; import { VOXCPM_VLLM_MODEL_ID, VOXCPM_AUTO_VOICE_ID, normalizeVoxCPMBackend, type VoxCPMProviderOptions, } from './voxcpm'; /** * Result of TTS generation */ export interface TTSGenerationResult { audio: Uint8Array; format: string; } /** * Thrown when a TTS provider returns a rate-limit / concurrency-quota error. * Allows downstream consumers to distinguish rate-limit errors from other TTS failures. * * TODO: The API route currently catches all errors uniformly as GENERATION_FAILED. * This class enables future retry/backoff logic without changing the throw sites. */ export class TTSRateLimitError extends Error { constructor( public readonly provider: string, message: string, ) { super(message); this.name = 'TTSRateLimitError'; } } /** * Generate speech using specified TTS provider */ export async function generateTTS( config: TTSModelConfig, text: string, ): Promise { const provider = TTS_PROVIDERS[config.providerId as keyof typeof TTS_PROVIDERS]; // Validate API key if required (only for built-in providers with known config) if (provider?.requiresApiKey && !config.apiKey) { throw new Error(`API key required for TTS provider: ${config.providerId}`); } switch (config.providerId) { case 'openai-tts': return await generateOpenAITTS(config, text); case 'azure-tts': return await generateAzureTTS(config, text); case 'glm-tts': return await generateGLMTTS(config, text); case 'qwen-tts': return await generateQwenTTS(config, text); case 'voxcpm-tts': return await generateVoxCPMTTS(config, text); case 'minimax-tts': return await generateMiniMaxTTS(config, text); case 'doubao-tts': return await generateDoubaoTTS(config, text); case 'elevenlabs-tts': return await generateElevenLabsTTS(config, text); case 'browser-native-tts': throw new Error( 'Browser Native TTS must be handled client-side using Web Speech API. This provider cannot be used on the server.', ); default: if (isCustomTTSProvider(config.providerId)) { return await generateOpenAITTS(config, text); } throw new Error(`Unsupported TTS provider: ${config.providerId}`); } } /** * OpenAI TTS implementation (direct API call with explicit UTF-8 encoding) */ async function generateOpenAITTS( config: TTSModelConfig, text: string, ): Promise { const baseUrl = config.baseUrl || TTS_PROVIDERS['openai-tts'].defaultBaseUrl; // Use gpt-4o-mini-tts for best quality and intelligent realtime applications const response = await fetch(`${baseUrl}/audio/speech`, { method: 'POST', headers: { Authorization: `Bearer ${config.apiKey}`, 'Content-Type': 'application/json; charset=utf-8', }, body: JSON.stringify({ model: config.modelId || 'gpt-4o-mini-tts', input: text, voice: config.voice, speed: config.speed || 1.0, }), }); if (!response.ok) { const error = await response.json().catch(() => ({ error: response.statusText })); throw new Error(`OpenAI TTS API error: ${error.error?.message || response.statusText}`); } const arrayBuffer = await response.arrayBuffer(); const contentType = response.headers.get('content-type') || ''; const format = getAudioResponseFormat(contentType); return { audio: new Uint8Array(arrayBuffer), format, }; } /** * VoxCPM2 TTS implementation. * * MultiMind Classroom keeps one internal VoxCPM request shape, then adapts it to the * selected official backend protocol. */ async function generateVoxCPMTTS( config: TTSModelConfig, text: string, ): Promise { const baseUrl = (config.baseUrl || TTS_PROVIDERS['voxcpm-tts'].defaultBaseUrl || '').replace( /\/$/, '', ); if (!baseUrl) { throw new Error('VoxCPM base URL is required'); } const options = (config.providerOptions || {}) as VoxCPMProviderOptions; const backend = normalizeVoxCPMBackend(options.backend); const voicePrompt = options.voicePrompt || (config.voice && config.voice !== 'default' && config.voice !== VOXCPM_AUTO_VOICE_ID ? config.voice : undefined); if (config.voice === VOXCPM_AUTO_VOICE_ID && !voicePrompt) { throw new Error('VoxCPM Auto Voice requires agent context'); } const cfgValue = options.cfgValue ?? 2.0; const inferenceTimesteps = options.inferenceTimesteps ?? 10; const normalize = options.normalize ?? false; const denoise = options.denoise ?? false; const usePromptContinuation = Boolean(options.promptText?.trim() && options.referenceAudioBase64); const request = { targetText: usePromptContinuation ? text : buildVoxCPMTargetText(text, voicePrompt), voicePrompt, promptText: options.promptText, cfgValue, inferenceTimesteps, normalize, denoise, referenceAudioBase64: options.referenceAudioBase64, referenceAudioMimeType: options.referenceAudioMimeType, referenceAudioName: options.referenceAudioName, }; const response = backend === 'nano-vllm' ? await postVoxCPMNanoVLLM(baseUrl, request, config.apiKey) : backend === 'python-api' ? await postVoxCPMPythonAPI(baseUrl, request, config.apiKey) : await postVoxCPMVLLMOmni(baseUrl, request, config); if (!response.ok) { throw new Error(`VoxCPM TTS API error: ${await readTTSApiError(response)}`); } const arrayBuffer = await response.arrayBuffer(); const contentType = response.headers.get('content-type') || ''; const format = getAudioResponseFormat(contentType); return { audio: new Uint8Array(arrayBuffer), format, }; } function buildVoxCPMTargetText(text: string, voicePrompt?: string): string { const prompt = voicePrompt ?.replace(/[\p{C}]+/gu, ' ') .replace(/[()()]/gu, '') .replace(/\s+/gu, ' ') .trim(); return prompt ? `(${prompt})${text}` : text; } function getAudioResponseFormat(contentType: string): string { if (contentType.includes('audio/wav') || contentType.includes('audio/x-wav')) return 'wav'; if (contentType.includes('audio/mpeg') || contentType.includes('audio/mp3')) return 'mp3'; if (contentType.includes('audio/flac')) return 'flac'; if (contentType.includes('audio/ogg')) return 'ogg'; if (contentType.includes('audio/webm')) return 'webm'; return 'mp3'; } function getVoxCPMAudioFormat(mimeType?: string, fileName?: string): string { const lowerName = fileName?.toLowerCase() || ''; if (mimeType?.includes('wav') || lowerName.endsWith('.wav')) return 'wav'; if (mimeType?.includes('mpeg') || mimeType?.includes('mp3') || lowerName.endsWith('.mp3')) { return 'mp3'; } if (mimeType?.includes('flac') || lowerName.endsWith('.flac')) return 'flac'; if (mimeType?.includes('ogg') || lowerName.endsWith('.ogg')) return 'ogg'; if (mimeType?.includes('webm') || lowerName.endsWith('.webm')) return 'webm'; return 'wav'; } function getVLLMOmniSpeechUrl(baseUrl: string): string { return baseUrl.endsWith('/v1') ? `${baseUrl}/audio/speech` : `${baseUrl}/v1/audio/speech`; } function getVLLMOmniModelId(config: TTSModelConfig): string { const modelId = config.modelId?.trim(); if (!modelId || modelId === 'VoxCPM2') return VOXCPM_VLLM_MODEL_ID; return modelId; } function getBackendAuthHeaders(apiKey?: string): Record { return apiKey?.trim() ? { Authorization: `Bearer ${apiKey.trim()}` } : {}; } async function postVoxCPMVLLMOmni( baseUrl: string, params: { targetText: string; promptText?: string; referenceAudioBase64?: string; referenceAudioMimeType?: string; referenceAudioName?: string; }, config: TTSModelConfig, ): Promise { const payload: Record = { model: getVLLMOmniModelId(config), input: params.targetText, // VoxCPM2's vLLM-Omni adapter currently ignores named voices; prompts/ref_audio carry voice identity. voice: 'default', response_format: 'wav', stream: false, }; if (params.referenceAudioBase64) { const referenceAudio = getVoxCPMDataAudioUrl( params.referenceAudioBase64, params.referenceAudioMimeType, params.referenceAudioName, ); payload.ref_audio = referenceAudio; if (params.promptText?.trim()) { payload.prompt_audio = referenceAudio; payload.prompt_text = params.promptText.trim(); } } return fetch(getVLLMOmniSpeechUrl(baseUrl), { method: 'POST', headers: { 'Content-Type': 'application/json; charset=utf-8', ...getBackendAuthHeaders(config.apiKey), }, body: JSON.stringify(payload), }); } function getVoxCPMDataAudioUrl(base64: string, mimeType?: string, fileName?: string): string { const format = getVoxCPMAudioFormat(mimeType, fileName); const mediaType = mimeType?.trim() || (format === 'mp3' ? 'audio/mpeg' : format === 'flac' ? 'audio/flac' : format === 'ogg' ? 'audio/ogg' : format === 'webm' ? 'audio/webm' : 'audio/wav'); return `data:${mediaType};base64,${base64}`; } function base64ToBlob(base64: string, mimeType?: string): Blob { const binary = atob(base64); const bytes = new Uint8Array(binary.length); for (let index = 0; index < binary.length; index++) { bytes[index] = binary.charCodeAt(index); } return new Blob([bytes], { type: mimeType || 'audio/wav' }); } async function postVoxCPMPythonAPI( baseUrl: string, params: { targetText: string; promptText?: string; cfgValue: number; inferenceTimesteps: number; normalize: boolean; denoise: boolean; referenceAudioBase64?: string; referenceAudioMimeType?: string; referenceAudioName?: string; }, apiKey?: string, ): Promise { const formData = new FormData(); formData.set('text', params.targetText); formData.set('cfg_value', String(params.cfgValue)); formData.set('inference_timesteps', String(params.inferenceTimesteps)); formData.set('normalize', String(params.normalize)); formData.set('denoise', String(params.denoise)); if (params.referenceAudioBase64) { const audioBlob = base64ToBlob(params.referenceAudioBase64, params.referenceAudioMimeType); const audioName = params.referenceAudioName || 'reference.wav'; formData.set('reference_audio', audioBlob, audioName); if (params.promptText?.trim()) { formData.set('prompt_audio', audioBlob, audioName); formData.set('prompt_text', params.promptText.trim()); } } return fetch(`${baseUrl}/tts/upload`, { method: 'POST', headers: getBackendAuthHeaders(apiKey), body: formData, }); } async function postVoxCPMNanoVLLM( baseUrl: string, params: { targetText: string; promptText?: string; cfgValue: number; referenceAudioBase64?: string; referenceAudioMimeType?: string; referenceAudioName?: string; }, apiKey?: string, ): Promise { const payload: Record = { target_text: params.targetText, cfg_value: params.cfgValue, }; if (params.referenceAudioBase64) { const format = getVoxCPMAudioFormat(params.referenceAudioMimeType, params.referenceAudioName); payload.ref_audio_wav_base64 = params.referenceAudioBase64; payload.ref_audio_wav_format = format; if (params.promptText?.trim()) { payload.prompt_wav_base64 = params.referenceAudioBase64; payload.prompt_wav_format = format; payload.prompt_text = params.promptText.trim(); } } return fetch(`${baseUrl}/generate`, { method: 'POST', headers: { 'Content-Type': 'application/json; charset=utf-8', ...getBackendAuthHeaders(apiKey), }, body: JSON.stringify(payload), }); } async function readTTSApiError(response: Response): Promise { const text = await response.text().catch(() => response.statusText); if (!text) return response.statusText; try { const json = JSON.parse(text) as { detail?: unknown; error?: { message?: string } | string }; if (typeof json.detail === 'string') return json.detail; if (typeof json.error === 'string') return json.error; if (json.error?.message) return json.error.message; } catch { // Fall through to raw text. } return text; } /** * Azure TTS implementation (direct API call with SSML) */ async function generateAzureTTS( config: TTSModelConfig, text: string, ): Promise { const baseUrl = config.baseUrl || TTS_PROVIDERS['azure-tts'].defaultBaseUrl; // Build SSML const rate = config.speed ? `${((config.speed - 1) * 100).toFixed(0)}%` : '0%'; const ssml = ` ${escapeXml(text)} `.trim(); const response = await fetch(`${baseUrl}/cognitiveservices/v1`, { method: 'POST', headers: { 'Ocp-Apim-Subscription-Key': config.apiKey!, 'Content-Type': 'application/ssml+xml; charset=utf-8', 'X-Microsoft-OutputFormat': 'audio-16khz-128kbitrate-mono-mp3', }, body: ssml, }); if (!response.ok) { throw new Error(`Azure TTS API error: ${response.statusText}`); } const arrayBuffer = await response.arrayBuffer(); return { audio: new Uint8Array(arrayBuffer), format: 'mp3', }; } /** * GLM TTS implementation (GLM API) */ async function generateGLMTTS(config: TTSModelConfig, text: string): Promise { const baseUrl = config.baseUrl || TTS_PROVIDERS['glm-tts'].defaultBaseUrl; const response = await fetch(`${baseUrl}/audio/speech`, { method: 'POST', headers: { Authorization: `Bearer ${config.apiKey}`, 'Content-Type': 'application/json; charset=utf-8', }, body: JSON.stringify({ model: config.modelId || 'glm-tts', input: text, voice: config.voice, speed: config.speed || 1.0, volume: 1.0, response_format: 'wav', }), }); if (!response.ok) { const errorText = await response.text().catch(() => response.statusText); let errorMessage = `GLM TTS API error: ${errorText}`; try { const errorJson = JSON.parse(errorText); if (errorJson.error?.message) { errorMessage = `GLM TTS API error: ${errorJson.error.message} (code: ${errorJson.error.code})`; } } catch { // If not JSON, use the text as is } throw new Error(errorMessage); } const arrayBuffer = await response.arrayBuffer(); return { audio: new Uint8Array(arrayBuffer), format: 'wav', }; } /** * Qwen TTS implementation (DashScope API - Qwen3 TTS Flash) */ async function generateQwenTTS(config: TTSModelConfig, text: string): Promise { const baseUrl = config.baseUrl || TTS_PROVIDERS['qwen-tts'].defaultBaseUrl; // Calculate speed: Qwen3 uses rate parameter from -500 to 500 // speed 1.0 = rate 0, speed 2.0 = rate 500, speed 0.5 = rate -250 const rate = Math.round(((config.speed || 1.0) - 1.0) * 500); const response = await fetch(`${baseUrl}/services/aigc/multimodal-generation/generation`, { method: 'POST', headers: { Authorization: `Bearer ${config.apiKey}`, 'Content-Type': 'application/json; charset=utf-8', }, body: JSON.stringify({ model: config.modelId || 'qwen3-tts-flash', input: { text, voice: config.voice, language_type: 'Chinese', // Default to Chinese, can be made configurable }, parameters: { rate, // Speech rate from -500 to 500 }, }), }); if (!response.ok) { const errorText = await response.text().catch(() => response.statusText); throw new Error(`Qwen TTS API error: ${errorText}`); } const data = await response.json(); // Check for audio URL in response if (!data.output?.audio?.url) { throw new Error(`Qwen TTS error: No audio URL in response. Response: ${JSON.stringify(data)}`); } // Download audio from URL const audioUrl = data.output.audio.url; const audioResponse = await fetch(audioUrl); if (!audioResponse.ok) { throw new Error(`Failed to download audio from URL: ${audioResponse.statusText}`); } const arrayBuffer = await audioResponse.arrayBuffer(); return { audio: new Uint8Array(arrayBuffer), format: 'wav', // Qwen3 TTS returns WAV format }; } /** * MiniMax TTS implementation (synchronous HTTP API) */ async function generateMiniMaxTTS( config: TTSModelConfig, text: string, ): Promise { const baseUrl = (config.baseUrl || TTS_PROVIDERS['minimax-tts'].defaultBaseUrl || '').replace( /\/$/, '', ); const response = await fetch(`${baseUrl}/v1/t2a_v2`, { method: 'POST', headers: { Authorization: `Bearer ${config.apiKey}`, 'Content-Type': 'application/json; charset=utf-8', }, body: JSON.stringify({ model: config.modelId || 'speech-2.8-hd', text, stream: false, output_format: 'hex', voice_setting: { voice_id: config.voice, speed: config.speed || 1.0, vol: 1, pitch: 0, }, audio_setting: { sample_rate: 32000, bitrate: 128000, format: config.format || 'mp3', channel: 1, }, language_boost: 'auto', }), }); if (!response.ok) { const errorText = await response.text().catch(() => response.statusText); throw new Error(`MiniMax TTS API error: ${errorText}`); } const data = await response.json(); const hexAudio = data?.data?.audio; if (!hexAudio || typeof hexAudio !== 'string') { throw new Error(`MiniMax TTS error: No audio returned. Response: ${JSON.stringify(data)}`); } const cleanedHex = hexAudio.trim(); if (cleanedHex.length % 2 !== 0) { throw new Error('MiniMax TTS error: invalid hex audio payload length'); } const audio = new Uint8Array( cleanedHex.match(/.{1,2}/g)?.map((byte: string) => parseInt(byte, 16)) || [], ); return { audio, format: data?.extra_info?.audio_format || config.format || 'mp3', }; } /** * ElevenLabs TTS implementation (direct API call with voice-specific endpoint) */ async function generateElevenLabsTTS( config: TTSModelConfig, text: string, ): Promise { const baseUrl = config.baseUrl || TTS_PROVIDERS['elevenlabs-tts'].defaultBaseUrl; const requestedFormat = config.format || 'mp3'; const clampedSpeed = Math.min(1.2, Math.max(0.7, config.speed || 1.0)); const outputFormatMap: Record = { mp3: 'mp3_44100_128', opus: 'opus_48000_96', pcm: 'pcm_44100', wav: 'wav_44100', ulaw: 'ulaw_8000', alaw: 'alaw_8000', }; const outputFormat = outputFormatMap[requestedFormat] || outputFormatMap.mp3; const response = await fetch( `${baseUrl}/text-to-speech/${encodeURIComponent(config.voice)}?output_format=${outputFormat}`, { method: 'POST', headers: { 'xi-api-key': config.apiKey!, 'Content-Type': 'application/json; charset=utf-8', }, body: JSON.stringify({ text, model_id: config.modelId || 'eleven_multilingual_v2', voice_settings: { stability: 0.5, similarity_boost: 0.75, speed: clampedSpeed, }, }), }, ); if (!response.ok) { const errorText = await response.text().catch(() => response.statusText); throw new Error(`ElevenLabs TTS API error: ${errorText || response.statusText}`); } const arrayBuffer = await response.arrayBuffer(); return { audio: new Uint8Array(arrayBuffer), format: requestedFormat, }; } /** * Get current TTS configuration from settings store * Note: This function should only be called in browser context */ export async function getCurrentTTSConfig(): Promise { if (typeof window === 'undefined') { throw new Error('getCurrentTTSConfig() can only be called in browser context'); } // Lazy import to avoid circular dependency const { useSettingsStore } = await import('@/lib/store/settings'); const { ttsProviderId, ttsVoice, ttsSpeed, ttsProvidersConfig } = useSettingsStore.getState(); const providerConfig = ttsProvidersConfig?.[ttsProviderId]; return { providerId: ttsProviderId, modelId: providerConfig?.modelId || TTS_PROVIDERS[ttsProviderId as keyof typeof TTS_PROVIDERS]?.defaultModelId || '', apiKey: providerConfig?.apiKey, baseUrl: providerConfig?.baseUrl || providerConfig?.customDefaultBaseUrl, voice: ttsVoice, speed: ttsSpeed, }; } // Re-export from constants for convenience export { getAllTTSProviders, getTTSProvider, getTTSVoices } from './constants'; /** * Doubao TTS 2.0 implementation (Volcengine Seed-TTS 2.0) */ async function generateDoubaoTTS( config: TTSModelConfig, text: string, ): Promise { const colonIdx = (config.apiKey || '').indexOf(':'); if (colonIdx <= 0) { throw new Error( 'Doubao TTS requires API key in format "appId:accessKey". Get both from the Volcengine console.', ); } const appId = config.apiKey!.slice(0, colonIdx); const accessKey = config.apiKey!.slice(colonIdx + 1); const baseUrl = config.baseUrl || TTS_PROVIDERS['doubao-tts'].defaultBaseUrl; const speechRate = Math.round(((config.speed || 1.0) - 1.0) * 100); const response = await fetch(`${baseUrl}/unidirectional`, { method: 'POST', headers: { 'Content-Type': 'application/json', 'X-Api-App-Id': appId, 'X-Api-Access-Key': accessKey, 'X-Api-Resource-Id': 'seed-tts-2.0', }, body: JSON.stringify({ user: { uid: 'multimind' }, req_params: { text, speaker: config.voice, audio_params: { format: 'mp3', sample_rate: 24000, speech_rate: speechRate }, }, }), }); if (!response.ok) { const errorText = await response.text().catch(() => response.statusText); throw new Error(`Doubao TTS API error (${response.status}): ${errorText}`); } const responseText = await response.text(); const audioChunks: Uint8Array[] = []; let depth = 0; let start = -1; for (let i = 0; i < responseText.length; i++) { if (responseText[i] === '{') { if (depth === 0) start = i; depth++; } else if (responseText[i] === '}') { depth--; if (depth === 0 && start >= 0) { let chunk: { code: number; message?: string; data?: string }; try { chunk = JSON.parse(responseText.slice(start, i + 1)); } catch { start = -1; continue; } start = -1; if (chunk.code === 0 && chunk.data) { audioChunks.push(new Uint8Array(Buffer.from(chunk.data, 'base64'))); } else if (chunk.code === 20000000) { break; } else if (chunk.code && chunk.code !== 0) { if (chunk.code === 45000000 || chunk.code === 45000292) { throw new TTSRateLimitError( 'doubao-tts', chunk.message || 'concurrency quota exceeded', ); } throw new Error(`Doubao TTS error: ${chunk.message || 'unknown'} (code: ${chunk.code})`); } } } } if (audioChunks.length === 0) { throw new Error('Doubao TTS: no audio data received'); } const totalLength = audioChunks.reduce((sum, c) => sum + c.length, 0); const combined = new Uint8Array(totalLength); let offset = 0; for (const chunk of audioChunks) { combined.set(chunk, offset); offset += chunk.length; } return { audio: combined, format: 'mp3' }; } /** * Escape XML special characters for SSML */ function escapeXml(text: string): string { return text .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') .replace(/'/g, '''); }