| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import { NextRequest } from 'next/server'; |
| import { generateTTS } from '@/lib/audio/tts-providers'; |
| import { resolveTTSApiKey, resolveTTSBaseUrl } from '@/lib/server/provider-config'; |
| import type { TTSProviderId } from '@/lib/audio/types'; |
| import { createLogger } from '@/lib/logger'; |
| import { apiError, apiSuccess } from '@/lib/server/api-response'; |
| import { validateUrlForSSRF } from '@/lib/server/ssrf-guard'; |
| import { VOXCPM_AUTO_VOICE_ID, VOXCPM_TTS_PROVIDER_ID } from '@/lib/audio/voxcpm'; |
|
|
| const log = createLogger('TTS API'); |
|
|
| export const maxDuration = 30; |
|
|
| export async function POST(req: NextRequest) { |
| let ttsProviderId: string | undefined; |
| let ttsVoice: string | undefined; |
| let audioId: string | undefined; |
| try { |
| const body = await req.json(); |
| const { text, ttsModelId, ttsSpeed, ttsApiKey, ttsBaseUrl, ttsProviderOptions } = body as { |
| text: string; |
| audioId: string; |
| ttsProviderId: TTSProviderId; |
| ttsModelId?: string; |
| ttsVoice: string; |
| ttsSpeed?: number; |
| ttsApiKey?: string; |
| ttsBaseUrl?: string; |
| ttsProviderOptions?: Record<string, unknown>; |
| }; |
| ttsProviderId = body.ttsProviderId; |
| ttsVoice = body.ttsVoice; |
| audioId = body.audioId; |
|
|
| |
| if (!text || !audioId || !ttsProviderId || !ttsVoice) { |
| return apiError( |
| 'MISSING_REQUIRED_FIELD', |
| 400, |
| 'Missing required fields: text, audioId, ttsProviderId, ttsVoice', |
| ); |
| } |
|
|
| |
| if (ttsProviderId === 'browser-native-tts') { |
| return apiError('INVALID_REQUEST', 400, 'browser-native-tts must be handled client-side'); |
| } |
|
|
| const voxcpmVoicePrompt = |
| typeof ttsProviderOptions?.voicePrompt === 'string' ? ttsProviderOptions.voicePrompt : ''; |
| if ( |
| ttsProviderId === VOXCPM_TTS_PROVIDER_ID && |
| ttsVoice === VOXCPM_AUTO_VOICE_ID && |
| !voxcpmVoicePrompt.trim() |
| ) { |
| return apiError( |
| 'VOXCPM_AUTO_VOICE_REQUIRES_CONTEXT', |
| 400, |
| 'VoxCPM Auto Voice requires agent context', |
| ); |
| } |
|
|
| const clientBaseUrl = ttsBaseUrl || undefined; |
| if (clientBaseUrl) { |
| const ssrfError = await validateUrlForSSRF(clientBaseUrl); |
| if (ssrfError) { |
| return apiError('INVALID_URL', 403, ssrfError); |
| } |
| } |
|
|
| const apiKey = clientBaseUrl |
| ? ttsApiKey || '' |
| : resolveTTSApiKey(ttsProviderId, ttsApiKey || undefined); |
| const baseUrl = clientBaseUrl |
| ? clientBaseUrl |
| : resolveTTSBaseUrl(ttsProviderId, ttsBaseUrl || undefined); |
|
|
| |
| const config = { |
| providerId: ttsProviderId as TTSProviderId, |
| modelId: ttsModelId, |
| voice: ttsVoice, |
| speed: ttsSpeed ?? 1.0, |
| apiKey, |
| baseUrl, |
| providerOptions: ttsProviderOptions, |
| }; |
|
|
| log.info( |
| `Generating TTS: provider=${ttsProviderId}, model=${ttsModelId || 'default'}, voice=${ttsVoice}, audioId=${audioId}, textLen=${text.length}`, |
| ); |
|
|
| |
| const { audio, format } = await generateTTS(config, text); |
|
|
| |
| const base64 = Buffer.from(audio).toString('base64'); |
|
|
| return apiSuccess({ audioId, base64, format }); |
| } catch (error) { |
| log.error( |
| `TTS generation failed [provider=${ttsProviderId ?? 'unknown'}, voice=${ttsVoice ?? 'unknown'}, audioId=${audioId ?? 'unknown'}]:`, |
| error, |
| ); |
| return apiError( |
| 'GENERATION_FAILED', |
| 500, |
| error instanceof Error ? error.message : String(error), |
| ); |
| } |
| } |
|
|