import { useState, useEffect, useRef, type ReactNode } from 'react'; import { Label } from '@/components/ui/label'; import { Input } from '@/components/ui/input'; import { Button } from '@/components/ui/button'; import { Textarea } from '@/components/ui/textarea'; import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs'; import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue, } from '@/components/ui/select'; import { AlertDialog, AlertDialogAction, AlertDialogCancel, AlertDialogContent, AlertDialogDescription, AlertDialogFooter, AlertDialogHeader, AlertDialogTitle, } from '@/components/ui/alert-dialog'; import { useI18n } from '@/lib/hooks/use-i18n'; import { useSettingsStore } from '@/lib/store/settings'; import { TTS_PROVIDERS, DEFAULT_TTS_VOICES } from '@/lib/audio/constants'; import type { TTSProviderId } from '@/lib/audio/types'; import { Volume2, Loader2, CheckCircle2, XCircle, Eye, EyeOff, Plus, Route, Server, Trash2, Upload, Wand2, FileAudio, Mic, Square, } from 'lucide-react'; import { cn } from '@/lib/utils'; import { toast } from 'sonner'; import { createLogger } from '@/lib/logger'; import { useTTSPreview } from '@/lib/audio/use-tts-preview'; import { isCustomTTSProvider } from '@/lib/audio/types'; import { getVoxCPMProviderOptions, normalizeVoxCPMReferenceAudio, validateVoxCPMReferenceAudio, VOXCPM_REFERENCE_AUDIO_MAX_SECONDS, useVoxCPMVoiceProfiles, } from '@/lib/audio/voxcpm-voices'; import { VOXCPM_BACKENDS, VOXCPM_TTS_PROVIDER_ID, buildVoxCPMBackendUrl, getVoxCPMBackendEndpoint, getVoxCPMProfileVoiceId, normalizeVoxCPMBackend, VOXCPM_VLLM_MODEL_ID, voxCPMBackendSupportsReferenceAudio, } from '@/lib/audio/voxcpm'; const log = createLogger('TTSSettings'); interface TTSSettingsProps { selectedProviderId: TTSProviderId; } export function TTSSettings({ selectedProviderId }: TTSSettingsProps) { const { t, locale } = useI18n(); const ttsVoice = useSettingsStore((state) => state.ttsVoice); const ttsSpeed = useSettingsStore((state) => state.ttsSpeed); const ttsProvidersConfig = useSettingsStore((state) => state.ttsProvidersConfig); const setTTSProviderConfig = useSettingsStore((state) => state.setTTSProviderConfig); const activeProviderId = useSettingsStore((state) => state.ttsProviderId); const setTTSVoice = useSettingsStore((state) => state.setTTSVoice); const removeCustomTTSProvider = useSettingsStore((state) => state.removeCustomTTSProvider); const ttsProvider = TTS_PROVIDERS[selectedProviderId as keyof typeof TTS_PROVIDERS]; const isCustom = isCustomTTSProvider(selectedProviderId); const providerConfig = ttsProvidersConfig[selectedProviderId]; const isServerConfigured = !!providerConfig?.isServerConfigured; const isVoxCPM = selectedProviderId === 'voxcpm-tts'; const voxcpmBackend = normalizeVoxCPMBackend(providerConfig?.providerOptions?.backend); const requiresApiKey = isCustom ? !!providerConfig?.requiresApiKey : !!ttsProvider?.requiresApiKey; // When testing a non-active provider, use that provider's default voice // instead of the active provider's voice (which may be incompatible). const effectiveVoice = selectedProviderId === activeProviderId ? ttsVoice : isCustomTTSProvider(selectedProviderId) ? ((providerConfig?.customVoices as Array<{ id: string }> | undefined) || [])[0]?.id || 'default' : DEFAULT_TTS_VOICES[selectedProviderId as keyof typeof DEFAULT_TTS_VOICES] || 'default'; const [showApiKey, setShowApiKey] = useState(false); const [showDeleteConfirm, setShowDeleteConfirm] = useState(false); const [testText, setTestText] = useState(t('settings.ttsTestTextDefault')); const [testStatus, setTestStatus] = useState<'idle' | 'testing' | 'success' | 'error'>('idle'); const [testMessage, setTestMessage] = useState(''); const { previewing: testingTTS, startPreview, stopPreview } = useTTSPreview(); // Doubao TTS uses compound "appId:accessKey" — split for separate UI fields const isDoubao = selectedProviderId === 'doubao-tts'; const rawApiKey = ttsProvidersConfig[selectedProviderId]?.apiKey || ''; const doubaoColonIdx = rawApiKey.indexOf(':'); const doubaoAppId = isDoubao && doubaoColonIdx > 0 ? rawApiKey.slice(0, doubaoColonIdx) : ''; const doubaoAccessKey = isDoubao && doubaoColonIdx > 0 ? rawApiKey.slice(doubaoColonIdx + 1) : isDoubao ? rawApiKey : ''; const setDoubaoCompoundKey = (appId: string, accessKey: string) => { const combined = appId && accessKey ? `${appId}:${accessKey}` : appId || accessKey; setTTSProviderConfig(selectedProviderId, { apiKey: combined }); }; // Keep the sample text in sync with locale changes. useEffect(() => { setTestText(t('settings.ttsTestTextDefault')); }, [t]); // Reset transient UI state when switching providers. useEffect(() => { stopPreview(); setShowApiKey(false); setTestStatus('idle'); setTestMessage(''); }, [selectedProviderId, stopPreview]); const handleTestTTS = async () => { if (!testText.trim()) return; setTestStatus('testing'); setTestMessage(''); try { const providerOptions = selectedProviderId === 'voxcpm-tts' ? { ...(ttsProvidersConfig[selectedProviderId]?.providerOptions || {}), ...(await getVoxCPMProviderOptions(effectiveVoice, { role: 'teacher', locale })), } : undefined; await startPreview({ text: testText, providerId: selectedProviderId, modelId: ttsProvidersConfig[selectedProviderId]?.modelId || ttsProvider?.defaultModelId || '', voice: effectiveVoice, speed: ttsSpeed, apiKey: ttsProvidersConfig[selectedProviderId]?.apiKey, baseUrl: ttsProvidersConfig[selectedProviderId]?.serverBaseUrl || ttsProvidersConfig[selectedProviderId]?.baseUrl || providerConfig?.customDefaultBaseUrl || '', providerOptions, }); setTestStatus('success'); setTestMessage(t('settings.ttsTestSuccess')); } catch (error) { log.error('TTS test failed:', error); setTestStatus('error'); setTestMessage( error instanceof Error && error.message ? `${t('settings.ttsTestFailed')}: ${error.message}` : t('settings.ttsTestFailed'), ); } }; const effectiveBaseUrl = ttsProvidersConfig[selectedProviderId]?.baseUrl || (isCustom ? providerConfig?.customDefaultBaseUrl : ttsProvider?.defaultBaseUrl) || ''; const endpointPath = (() => { if (isCustom) return '/audio/speech'; switch (selectedProviderId) { case 'openai-tts': case 'glm-tts': return '/audio/speech'; case 'azure-tts': return '/cognitiveservices/v1'; case 'qwen-tts': return '/services/aigc/multimodal-generation/generation'; case 'voxcpm-tts': return getVoxCPMBackendEndpoint(voxcpmBackend); case 'elevenlabs-tts': return '/text-to-speech'; case 'doubao-tts': return '/unidirectional'; default: return ''; } })(); const requestUrl = effectiveBaseUrl && endpointPath ? selectedProviderId === 'voxcpm-tts' ? buildVoxCPMBackendUrl(effectiveBaseUrl, voxcpmBackend) : effectiveBaseUrl + endpointPath : ''; const isVoxCPMVLLMOmni = voxcpmBackend === 'vllm-omni'; return (
{/* Server-configured notice */} {isServerConfigured && (
{t('settings.serverConfiguredNotice')}
)} {/* API Key & Base URL */} {(requiresApiKey || isServerConfigured || isCustom || isVoxCPM) && (isVoxCPM ? (
setTTSProviderConfig(selectedProviderId, { baseUrl: e.target.value, }) } className="h-8 min-w-0 rounded-md font-mono text-sm shadow-none" />
{isVoxCPMVLLMOmni && (
setTTSProviderConfig(selectedProviderId, { modelId: e.target.value, }) } className="h-8 min-w-0 rounded-md font-mono text-sm shadow-none" />
)}
{t('settings.requestUrl')} {requestUrl ? ( {requestUrl} ) : ( {t('settings.voxcpmBaseUrlPending')} )}
) : ( <>
{isDoubao ? ( <>
setDoubaoCompoundKey(e.target.value, doubaoAccessKey)} className="font-mono text-sm pr-10" />
setDoubaoCompoundKey(doubaoAppId, e.target.value)} className="font-mono text-sm pr-10" />
) : (
setTTSProviderConfig(selectedProviderId, { apiKey: e.target.value, }) } className="font-mono text-sm pr-10" />
)}
setTTSProviderConfig(selectedProviderId, { baseUrl: e.target.value, }) } className="text-sm" />
{requestUrl && (

{t('settings.requestUrl')}: {requestUrl}

)} ))} {/* Test TTS */}
setTestText(e.target.value)} className="flex-1" />
{testMessage && (
{testStatus === 'success' && } {testStatus === 'error' && }

{testMessage}

)} {/* Available Models */} {ttsProvider?.models?.length > 0 && !isVoxCPM && (
{ttsProvider.models.map((model) => (
{model.name}
))}

{t('settings.modelSelectedViaVoice')}

)} {selectedProviderId === 'voxcpm-tts' && } {/* Custom Voice List Management */} {isCustom && (
{(providerConfig?.customVoices as Array<{ id: string; name: string }> | undefined) ?.length ? (
{/* Column headers */}
ID {t('settings.voiceNamePlaceholder')}
{/* Voice rows */} {( providerConfig?.customVoices as Array<{ id: string; name: string; }> ).map((voice, index) => (
0 && 'border-t border-border/30', )} > {voice.id} {voice.name}
))}
) : (

{t('settings.noVoicesAdded')}

)} | undefined) || [] ).map((v) => v.id)} onAdd={(voiceId, voiceName) => { const voices = [ ...((providerConfig?.customVoices as | Array<{ id: string; name: string }> | undefined) || []), { id: voiceId, name: voiceName }, ]; setTTSProviderConfig(selectedProviderId, { customVoices: voices, } as Record); // Auto-select the first voice if current voice is 'default' if (ttsVoice === 'default' && selectedProviderId === activeProviderId) { setTTSVoice(voiceId); } }} />
)} {/* Delete Custom Provider */} {isCustom && (
)} {/* Delete Confirmation Dialog */} !open && setShowDeleteConfirm(false)} > {t('settings.deleteProvider')} {t('settings.deleteProviderConfirm')} {t('settings.cancelEdit')} { removeCustomTTSProvider(selectedProviderId); setShowDeleteConfirm(false); }} > {t('settings.deleteProvider')}
); } function VoxCPMVoiceManager() { const { t, locale } = useI18n(); const { profiles, addPromptVoice, addCloneVoice, deleteVoice } = useVoxCPMVoiceProfiles(); const ttsSpeed = useSettingsStore((state) => state.ttsSpeed); const ttsProvidersConfig = useSettingsStore((state) => state.ttsProvidersConfig); const { previewing, startPreview, stopPreview } = useTTSPreview(); const voxcpmBackend = normalizeVoxCPMBackend( ttsProvidersConfig[VOXCPM_TTS_PROVIDER_ID]?.providerOptions?.backend, ); const supportsReferenceAudio = voxCPMBackendSupportsReferenceAudio(voxcpmBackend); const [createMode, setCreateMode] = useState<'prompt' | 'clone'>('prompt'); const [promptName, setPromptName] = useState(''); const [voicePrompt, setVoicePrompt] = useState(''); const [cloneName, setCloneName] = useState(''); const [clonePromptText, setClonePromptText] = useState(''); const [cloneVoicePrompt, setCloneVoicePrompt] = useState(''); const [cloneFile, setCloneFile] = useState(null); const [saving, setSaving] = useState<'prompt' | 'clone' | null>(null); const [isRecordingReference, setIsRecordingReference] = useState(false); const [recordingSeconds, setRecordingSeconds] = useState(0); const [previewingVoiceId, setPreviewingVoiceId] = useState(null); const mediaRecorderRef = useRef(null); const recordingChunksRef = useRef([]); const recordingStreamRef = useRef(null); const recordingTimerRef = useRef | null>(null); const stopRecordingTimer = () => { if (recordingTimerRef.current) { clearInterval(recordingTimerRef.current); recordingTimerRef.current = null; } }; const stopRecordingStream = () => { recordingStreamRef.current?.getTracks().forEach((track) => track.stop()); recordingStreamRef.current = null; }; const startReferenceRecording = async () => { if (isRecordingReference) return; if (typeof navigator === 'undefined' || !navigator.mediaDevices?.getUserMedia) { toast.error(t('settings.voxcpmRecordingUnsupported')); return; } try { const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); const mimeType = MediaRecorder.isTypeSupported('audio/webm') ? 'audio/webm' : undefined; const mediaRecorder = new MediaRecorder(stream, mimeType ? { mimeType } : undefined); recordingChunksRef.current = []; recordingStreamRef.current = stream; mediaRecorderRef.current = mediaRecorder; mediaRecorder.ondataavailable = (event) => { if (event.data.size > 0) recordingChunksRef.current.push(event.data); }; mediaRecorder.onstop = () => { void (async () => { const type = mediaRecorder.mimeType || 'audio/webm'; const blob = new Blob(recordingChunksRef.current, { type }); if (blob.size > 0) { try { const referenceAudio = await normalizeVoxCPMReferenceAudio( blob, `voxcpm-reference-${Date.now()}.webm`, ); const file = new File([referenceAudio.blob], referenceAudio.name, { type: referenceAudio.mimeType, }); setCloneFile(file); if (!cloneName.trim()) setCloneName(t('settings.voxcpmRecordedVoiceName')); } catch (error) { toast.error( error instanceof Error ? error.message : t('settings.voxcpmRecordingFailed'), ); } } recordingChunksRef.current = []; setIsRecordingReference(false); setRecordingSeconds(0); stopRecordingTimer(); stopRecordingStream(); })(); }; mediaRecorder.start(); setIsRecordingReference(true); setRecordingSeconds(0); recordingTimerRef.current = setInterval(() => { setRecordingSeconds((seconds) => { const nextSeconds = seconds + 1; if (nextSeconds >= VOXCPM_REFERENCE_AUDIO_MAX_SECONDS) { stopReferenceRecording(); } return nextSeconds; }); }, 1000); } catch (error) { setIsRecordingReference(false); stopRecordingTimer(); stopRecordingStream(); toast.error( error instanceof Error ? error.message : t('settings.voxcpmRecordingStartFailed'), ); } }; const stopReferenceRecording = () => { if (mediaRecorderRef.current?.state === 'recording') { mediaRecorderRef.current.stop(); } }; useEffect(() => { return () => { stopRecordingTimer(); if (mediaRecorderRef.current?.state === 'recording') { mediaRecorderRef.current.stop(); } stopRecordingStream(); }; }, []); useEffect(() => { if (!previewing) setPreviewingVoiceId(null); }, [previewing]); const handlePreviewVoice = async (voiceId: string) => { if (previewingVoiceId === voiceId) { stopPreview(); setPreviewingVoiceId(null); return; } const providerConfig = ttsProvidersConfig[VOXCPM_TTS_PROVIDER_ID]; const baseUrl = providerConfig?.serverBaseUrl || providerConfig?.baseUrl || providerConfig?.customDefaultBaseUrl || ''; if (!baseUrl.trim()) { toast.error(t('settings.voxcpmBaseUrlRequired')); return; } setPreviewingVoiceId(voiceId); try { const providerOptions = await getVoxCPMProviderOptions(voiceId, { role: 'teacher', locale, }); await startPreview({ text: t('settings.ttsTestTextDefault'), providerId: VOXCPM_TTS_PROVIDER_ID, modelId: providerConfig?.modelId || TTS_PROVIDERS[VOXCPM_TTS_PROVIDER_ID]?.defaultModelId || '', voice: voiceId, speed: ttsSpeed, apiKey: providerConfig?.apiKey, baseUrl, providerOptions: { ...(providerConfig?.providerOptions || {}), ...providerOptions, }, }); } catch (error) { setPreviewingVoiceId(null); toast.error(error instanceof Error ? error.message : t('settings.voxcpmPreviewFailed')); } }; const handleAddPromptVoice = async () => { if (!promptName.trim() || !voicePrompt.trim()) return; setSaving('prompt'); try { await addPromptVoice({ name: promptName, voicePrompt, }); setPromptName(''); setVoicePrompt(''); toast.success(t('settings.voxcpmVoiceSaved')); } catch (error) { toast.error(error instanceof Error ? error.message : t('settings.voxcpmVoiceSaveFailed')); } finally { setSaving(null); } }; const handleCloneFileChange = async (file: File | null) => { if (!file) { setCloneFile(null); return; } try { await validateVoxCPMReferenceAudio(file); setCloneFile(file); } catch (error) { setCloneFile(null); toast.error( error instanceof Error ? error.message : t('settings.voxcpmReferenceAudioInvalid'), ); } }; const handleAddCloneVoice = async () => { if (!cloneName.trim() || !cloneFile) return; setSaving('clone'); try { await addCloneVoice({ name: cloneName, referenceAudio: cloneFile, referenceAudioName: cloneFile.name, referenceAudioMimeType: cloneFile.type, promptText: clonePromptText, voicePrompt: cloneVoicePrompt, }); setCloneName(''); setClonePromptText(''); setCloneVoicePrompt(''); setCloneFile(null); toast.success(t('settings.voxcpmCloneSaved')); } catch (error) { toast.error(error instanceof Error ? error.message : t('settings.voxcpmCloneSaveFailed')); } finally { setSaving(null); } }; const promptCount = profiles.filter((profile) => profile.kind !== 'clone').length; const cloneCount = profiles.filter((profile) => profile.kind === 'clone').length; return (

{t('settings.voxcpmVoicesDescription')}

{t('settings.voxcpmAutoVoicePrivacyNote')}

{t('settings.voxcpmPromptCount', { count: promptCount + 1 })} {t('settings.voxcpmCloneCount', { count: cloneCount })} {!supportsReferenceAudio && ( {t('settings.voxcpmCloneUnsupported')} )}
{t('settings.voxcpmVoicePool')} {t('settings.voxcpmVoiceCount', { count: profiles.length + 1 })}
} title={t('settings.voxcpmAutoVoice')} badge={t('toolbar.default')} badgeTone="default" detail={t('settings.voxcpmAutoVoiceDescription')} kind="auto" /> {profiles.length > 0 ? ( profiles.map((profile) => { const voiceId = getVoxCPMProfileVoiceId(profile.id); const canPreview = profile.kind !== 'clone' || supportsReferenceAudio; return ( ) : ( ) } title={profile.name} badge={ profile.kind === 'clone' && !supportsReferenceAudio ? t('settings.voxcpmUnavailable') : profile.kind === 'clone' ? t('settings.voxcpmClone') : 'Prompt' } badgeTone={ profile.kind === 'clone' && !supportsReferenceAudio ? 'warning' : 'neutral' } detail={ profile.kind === 'clone' && !supportsReferenceAudio ? t('settings.voxcpmCloneUnsupportedDetail') : profile.kind === 'clone' ? profile.referenceAudioName || 'reference audio' : profile.voicePrompt || '' } kind={profile.kind === 'clone' ? 'clone' : 'prompt'} muted={profile.kind === 'clone' && !supportsReferenceAudio} previewing={canPreview && previewingVoiceId === voiceId} onPreview={canPreview ? () => handlePreviewVoice(voiceId) : undefined} onDelete={async () => { await deleteVoice(profile.id); }} /> ); }) ) : (
{t('settings.voxcpmNoCustomVoices')}
)}
setCreateMode(value as typeof createMode)} >
Prompt {t('settings.voxcpmClone')} {createMode === 'clone' && !supportsReferenceAudio && ( {t('settings.voxcpmCloneSaveOnly')} )}
setPromptName(e.target.value)} placeholder={t('settings.voxcpmVoiceNamePlaceholder')} className="h-10 rounded-md text-sm" />