import { useState, useEffect, useRef, type ReactNode } from 'react';
import { Label } from '@/components/ui/label';
import { Input } from '@/components/ui/input';
import { Button } from '@/components/ui/button';
import { Textarea } from '@/components/ui/textarea';
import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs';
import {
Select,
SelectContent,
SelectItem,
SelectTrigger,
SelectValue,
} from '@/components/ui/select';
import {
AlertDialog,
AlertDialogAction,
AlertDialogCancel,
AlertDialogContent,
AlertDialogDescription,
AlertDialogFooter,
AlertDialogHeader,
AlertDialogTitle,
} from '@/components/ui/alert-dialog';
import { useI18n } from '@/lib/hooks/use-i18n';
import { useSettingsStore } from '@/lib/store/settings';
import { TTS_PROVIDERS, DEFAULT_TTS_VOICES } from '@/lib/audio/constants';
import type { TTSProviderId } from '@/lib/audio/types';
import {
Volume2,
Loader2,
CheckCircle2,
XCircle,
Eye,
EyeOff,
Plus,
Route,
Server,
Trash2,
Upload,
Wand2,
FileAudio,
Mic,
Square,
} from 'lucide-react';
import { cn } from '@/lib/utils';
import { toast } from 'sonner';
import { createLogger } from '@/lib/logger';
import { useTTSPreview } from '@/lib/audio/use-tts-preview';
import { isCustomTTSProvider } from '@/lib/audio/types';
import {
getVoxCPMProviderOptions,
normalizeVoxCPMReferenceAudio,
validateVoxCPMReferenceAudio,
VOXCPM_REFERENCE_AUDIO_MAX_SECONDS,
useVoxCPMVoiceProfiles,
} from '@/lib/audio/voxcpm-voices';
import {
VOXCPM_BACKENDS,
VOXCPM_TTS_PROVIDER_ID,
buildVoxCPMBackendUrl,
getVoxCPMBackendEndpoint,
getVoxCPMProfileVoiceId,
normalizeVoxCPMBackend,
VOXCPM_VLLM_MODEL_ID,
voxCPMBackendSupportsReferenceAudio,
} from '@/lib/audio/voxcpm';
const log = createLogger('TTSSettings');
interface TTSSettingsProps {
selectedProviderId: TTSProviderId;
}
export function TTSSettings({ selectedProviderId }: TTSSettingsProps) {
const { t, locale } = useI18n();
const ttsVoice = useSettingsStore((state) => state.ttsVoice);
const ttsSpeed = useSettingsStore((state) => state.ttsSpeed);
const ttsProvidersConfig = useSettingsStore((state) => state.ttsProvidersConfig);
const setTTSProviderConfig = useSettingsStore((state) => state.setTTSProviderConfig);
const activeProviderId = useSettingsStore((state) => state.ttsProviderId);
const setTTSVoice = useSettingsStore((state) => state.setTTSVoice);
const removeCustomTTSProvider = useSettingsStore((state) => state.removeCustomTTSProvider);
const ttsProvider = TTS_PROVIDERS[selectedProviderId as keyof typeof TTS_PROVIDERS];
const isCustom = isCustomTTSProvider(selectedProviderId);
const providerConfig = ttsProvidersConfig[selectedProviderId];
const isServerConfigured = !!providerConfig?.isServerConfigured;
const isVoxCPM = selectedProviderId === 'voxcpm-tts';
const voxcpmBackend = normalizeVoxCPMBackend(providerConfig?.providerOptions?.backend);
const requiresApiKey = isCustom
? !!providerConfig?.requiresApiKey
: !!ttsProvider?.requiresApiKey;
// When testing a non-active provider, use that provider's default voice
// instead of the active provider's voice (which may be incompatible).
const effectiveVoice =
selectedProviderId === activeProviderId
? ttsVoice
: isCustomTTSProvider(selectedProviderId)
? ((providerConfig?.customVoices as Array<{ id: string }> | undefined) || [])[0]?.id ||
'default'
: DEFAULT_TTS_VOICES[selectedProviderId as keyof typeof DEFAULT_TTS_VOICES] || 'default';
const [showApiKey, setShowApiKey] = useState(false);
const [showDeleteConfirm, setShowDeleteConfirm] = useState(false);
const [testText, setTestText] = useState(t('settings.ttsTestTextDefault'));
const [testStatus, setTestStatus] = useState<'idle' | 'testing' | 'success' | 'error'>('idle');
const [testMessage, setTestMessage] = useState('');
const { previewing: testingTTS, startPreview, stopPreview } = useTTSPreview();
// Doubao TTS uses compound "appId:accessKey" — split for separate UI fields
const isDoubao = selectedProviderId === 'doubao-tts';
const rawApiKey = ttsProvidersConfig[selectedProviderId]?.apiKey || '';
const doubaoColonIdx = rawApiKey.indexOf(':');
const doubaoAppId = isDoubao && doubaoColonIdx > 0 ? rawApiKey.slice(0, doubaoColonIdx) : '';
const doubaoAccessKey =
isDoubao && doubaoColonIdx > 0
? rawApiKey.slice(doubaoColonIdx + 1)
: isDoubao
? rawApiKey
: '';
const setDoubaoCompoundKey = (appId: string, accessKey: string) => {
const combined = appId && accessKey ? `${appId}:${accessKey}` : appId || accessKey;
setTTSProviderConfig(selectedProviderId, { apiKey: combined });
};
// Keep the sample text in sync with locale changes.
useEffect(() => {
setTestText(t('settings.ttsTestTextDefault'));
}, [t]);
// Reset transient UI state when switching providers.
useEffect(() => {
stopPreview();
setShowApiKey(false);
setTestStatus('idle');
setTestMessage('');
}, [selectedProviderId, stopPreview]);
const handleTestTTS = async () => {
if (!testText.trim()) return;
setTestStatus('testing');
setTestMessage('');
try {
const providerOptions =
selectedProviderId === 'voxcpm-tts'
? {
...(ttsProvidersConfig[selectedProviderId]?.providerOptions || {}),
...(await getVoxCPMProviderOptions(effectiveVoice, { role: 'teacher', locale })),
}
: undefined;
await startPreview({
text: testText,
providerId: selectedProviderId,
modelId:
ttsProvidersConfig[selectedProviderId]?.modelId || ttsProvider?.defaultModelId || '',
voice: effectiveVoice,
speed: ttsSpeed,
apiKey: ttsProvidersConfig[selectedProviderId]?.apiKey,
baseUrl:
ttsProvidersConfig[selectedProviderId]?.serverBaseUrl ||
ttsProvidersConfig[selectedProviderId]?.baseUrl ||
providerConfig?.customDefaultBaseUrl ||
'',
providerOptions,
});
setTestStatus('success');
setTestMessage(t('settings.ttsTestSuccess'));
} catch (error) {
log.error('TTS test failed:', error);
setTestStatus('error');
setTestMessage(
error instanceof Error && error.message
? `${t('settings.ttsTestFailed')}: ${error.message}`
: t('settings.ttsTestFailed'),
);
}
};
const effectiveBaseUrl =
ttsProvidersConfig[selectedProviderId]?.baseUrl ||
(isCustom ? providerConfig?.customDefaultBaseUrl : ttsProvider?.defaultBaseUrl) ||
'';
const endpointPath = (() => {
if (isCustom) return '/audio/speech';
switch (selectedProviderId) {
case 'openai-tts':
case 'glm-tts':
return '/audio/speech';
case 'azure-tts':
return '/cognitiveservices/v1';
case 'qwen-tts':
return '/services/aigc/multimodal-generation/generation';
case 'voxcpm-tts':
return getVoxCPMBackendEndpoint(voxcpmBackend);
case 'elevenlabs-tts':
return '/text-to-speech';
case 'doubao-tts':
return '/unidirectional';
default:
return '';
}
})();
const requestUrl =
effectiveBaseUrl && endpointPath
? selectedProviderId === 'voxcpm-tts'
? buildVoxCPMBackendUrl(effectiveBaseUrl, voxcpmBackend)
: effectiveBaseUrl + endpointPath
: '';
const isVoxCPMVLLMOmni = voxcpmBackend === 'vllm-omni';
return (
{/* Server-configured notice */}
{isServerConfigured && (
{t('settings.serverConfiguredNotice')}
)}
{/* API Key & Base URL */}
{(requiresApiKey || isServerConfigured || isCustom || isVoxCPM) &&
(isVoxCPM ? (
{t('settings.requestUrl')}
{requestUrl ? (
{requestUrl}
) : (
{t('settings.voxcpmBaseUrlPending')}
)}
) : (
<>
{isDoubao ? (
<>
>
) : (
)}
setTTSProviderConfig(selectedProviderId, {
baseUrl: e.target.value,
})
}
className="text-sm"
/>
{requestUrl && (
{t('settings.requestUrl')}: {requestUrl}
)}
>
))}
{/* Test TTS */}
{testMessage && (
{testStatus === 'success' &&
}
{testStatus === 'error' &&
}
{testMessage}
)}
{/* Available Models */}
{ttsProvider?.models?.length > 0 && !isVoxCPM && (
{ttsProvider.models.map((model) => (
{model.name}
))}
{t('settings.modelSelectedViaVoice')}
)}
{selectedProviderId === 'voxcpm-tts' &&
}
{/* Custom Voice List Management */}
{isCustom && (
{(providerConfig?.customVoices as Array<{ id: string; name: string }> | undefined)
?.length ? (
{/* Column headers */}
ID
{t('settings.voiceNamePlaceholder')}
{/* Voice rows */}
{(
providerConfig?.customVoices as Array<{
id: string;
name: string;
}>
).map((voice, index) => (
0 && 'border-t border-border/30',
)}
>
{voice.id}
{voice.name}
))}
) : (
{t('settings.noVoicesAdded')}
)}
| undefined) ||
[]
).map((v) => v.id)}
onAdd={(voiceId, voiceName) => {
const voices = [
...((providerConfig?.customVoices as
| Array<{ id: string; name: string }>
| undefined) || []),
{ id: voiceId, name: voiceName },
];
setTTSProviderConfig(selectedProviderId, {
customVoices: voices,
} as Record);
// Auto-select the first voice if current voice is 'default'
if (ttsVoice === 'default' && selectedProviderId === activeProviderId) {
setTTSVoice(voiceId);
}
}}
/>
)}
{/* Delete Custom Provider */}
{isCustom && (
)}
{/* Delete Confirmation Dialog */}
!open && setShowDeleteConfirm(false)}
>
{t('settings.deleteProvider')}
{t('settings.deleteProviderConfirm')}
{t('settings.cancelEdit')}
{
removeCustomTTSProvider(selectedProviderId);
setShowDeleteConfirm(false);
}}
>
{t('settings.deleteProvider')}
);
}
function VoxCPMVoiceManager() {
const { t, locale } = useI18n();
const { profiles, addPromptVoice, addCloneVoice, deleteVoice } = useVoxCPMVoiceProfiles();
const ttsSpeed = useSettingsStore((state) => state.ttsSpeed);
const ttsProvidersConfig = useSettingsStore((state) => state.ttsProvidersConfig);
const { previewing, startPreview, stopPreview } = useTTSPreview();
const voxcpmBackend = normalizeVoxCPMBackend(
ttsProvidersConfig[VOXCPM_TTS_PROVIDER_ID]?.providerOptions?.backend,
);
const supportsReferenceAudio = voxCPMBackendSupportsReferenceAudio(voxcpmBackend);
const [createMode, setCreateMode] = useState<'prompt' | 'clone'>('prompt');
const [promptName, setPromptName] = useState('');
const [voicePrompt, setVoicePrompt] = useState('');
const [cloneName, setCloneName] = useState('');
const [clonePromptText, setClonePromptText] = useState('');
const [cloneVoicePrompt, setCloneVoicePrompt] = useState('');
const [cloneFile, setCloneFile] = useState(null);
const [saving, setSaving] = useState<'prompt' | 'clone' | null>(null);
const [isRecordingReference, setIsRecordingReference] = useState(false);
const [recordingSeconds, setRecordingSeconds] = useState(0);
const [previewingVoiceId, setPreviewingVoiceId] = useState(null);
const mediaRecorderRef = useRef(null);
const recordingChunksRef = useRef([]);
const recordingStreamRef = useRef(null);
const recordingTimerRef = useRef | null>(null);
const stopRecordingTimer = () => {
if (recordingTimerRef.current) {
clearInterval(recordingTimerRef.current);
recordingTimerRef.current = null;
}
};
const stopRecordingStream = () => {
recordingStreamRef.current?.getTracks().forEach((track) => track.stop());
recordingStreamRef.current = null;
};
const startReferenceRecording = async () => {
if (isRecordingReference) return;
if (typeof navigator === 'undefined' || !navigator.mediaDevices?.getUserMedia) {
toast.error(t('settings.voxcpmRecordingUnsupported'));
return;
}
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
const mimeType = MediaRecorder.isTypeSupported('audio/webm') ? 'audio/webm' : undefined;
const mediaRecorder = new MediaRecorder(stream, mimeType ? { mimeType } : undefined);
recordingChunksRef.current = [];
recordingStreamRef.current = stream;
mediaRecorderRef.current = mediaRecorder;
mediaRecorder.ondataavailable = (event) => {
if (event.data.size > 0) recordingChunksRef.current.push(event.data);
};
mediaRecorder.onstop = () => {
void (async () => {
const type = mediaRecorder.mimeType || 'audio/webm';
const blob = new Blob(recordingChunksRef.current, { type });
if (blob.size > 0) {
try {
const referenceAudio = await normalizeVoxCPMReferenceAudio(
blob,
`voxcpm-reference-${Date.now()}.webm`,
);
const file = new File([referenceAudio.blob], referenceAudio.name, {
type: referenceAudio.mimeType,
});
setCloneFile(file);
if (!cloneName.trim()) setCloneName(t('settings.voxcpmRecordedVoiceName'));
} catch (error) {
toast.error(
error instanceof Error ? error.message : t('settings.voxcpmRecordingFailed'),
);
}
}
recordingChunksRef.current = [];
setIsRecordingReference(false);
setRecordingSeconds(0);
stopRecordingTimer();
stopRecordingStream();
})();
};
mediaRecorder.start();
setIsRecordingReference(true);
setRecordingSeconds(0);
recordingTimerRef.current = setInterval(() => {
setRecordingSeconds((seconds) => {
const nextSeconds = seconds + 1;
if (nextSeconds >= VOXCPM_REFERENCE_AUDIO_MAX_SECONDS) {
stopReferenceRecording();
}
return nextSeconds;
});
}, 1000);
} catch (error) {
setIsRecordingReference(false);
stopRecordingTimer();
stopRecordingStream();
toast.error(
error instanceof Error ? error.message : t('settings.voxcpmRecordingStartFailed'),
);
}
};
const stopReferenceRecording = () => {
if (mediaRecorderRef.current?.state === 'recording') {
mediaRecorderRef.current.stop();
}
};
useEffect(() => {
return () => {
stopRecordingTimer();
if (mediaRecorderRef.current?.state === 'recording') {
mediaRecorderRef.current.stop();
}
stopRecordingStream();
};
}, []);
useEffect(() => {
if (!previewing) setPreviewingVoiceId(null);
}, [previewing]);
const handlePreviewVoice = async (voiceId: string) => {
if (previewingVoiceId === voiceId) {
stopPreview();
setPreviewingVoiceId(null);
return;
}
const providerConfig = ttsProvidersConfig[VOXCPM_TTS_PROVIDER_ID];
const baseUrl =
providerConfig?.serverBaseUrl ||
providerConfig?.baseUrl ||
providerConfig?.customDefaultBaseUrl ||
'';
if (!baseUrl.trim()) {
toast.error(t('settings.voxcpmBaseUrlRequired'));
return;
}
setPreviewingVoiceId(voiceId);
try {
const providerOptions = await getVoxCPMProviderOptions(voiceId, {
role: 'teacher',
locale,
});
await startPreview({
text: t('settings.ttsTestTextDefault'),
providerId: VOXCPM_TTS_PROVIDER_ID,
modelId:
providerConfig?.modelId || TTS_PROVIDERS[VOXCPM_TTS_PROVIDER_ID]?.defaultModelId || '',
voice: voiceId,
speed: ttsSpeed,
apiKey: providerConfig?.apiKey,
baseUrl,
providerOptions: {
...(providerConfig?.providerOptions || {}),
...providerOptions,
},
});
} catch (error) {
setPreviewingVoiceId(null);
toast.error(error instanceof Error ? error.message : t('settings.voxcpmPreviewFailed'));
}
};
const handleAddPromptVoice = async () => {
if (!promptName.trim() || !voicePrompt.trim()) return;
setSaving('prompt');
try {
await addPromptVoice({
name: promptName,
voicePrompt,
});
setPromptName('');
setVoicePrompt('');
toast.success(t('settings.voxcpmVoiceSaved'));
} catch (error) {
toast.error(error instanceof Error ? error.message : t('settings.voxcpmVoiceSaveFailed'));
} finally {
setSaving(null);
}
};
const handleCloneFileChange = async (file: File | null) => {
if (!file) {
setCloneFile(null);
return;
}
try {
await validateVoxCPMReferenceAudio(file);
setCloneFile(file);
} catch (error) {
setCloneFile(null);
toast.error(
error instanceof Error ? error.message : t('settings.voxcpmReferenceAudioInvalid'),
);
}
};
const handleAddCloneVoice = async () => {
if (!cloneName.trim() || !cloneFile) return;
setSaving('clone');
try {
await addCloneVoice({
name: cloneName,
referenceAudio: cloneFile,
referenceAudioName: cloneFile.name,
referenceAudioMimeType: cloneFile.type,
promptText: clonePromptText,
voicePrompt: cloneVoicePrompt,
});
setCloneName('');
setClonePromptText('');
setCloneVoicePrompt('');
setCloneFile(null);
toast.success(t('settings.voxcpmCloneSaved'));
} catch (error) {
toast.error(error instanceof Error ? error.message : t('settings.voxcpmCloneSaveFailed'));
} finally {
setSaving(null);
}
};
const promptCount = profiles.filter((profile) => profile.kind !== 'clone').length;
const cloneCount = profiles.filter((profile) => profile.kind === 'clone').length;
return (
{t('settings.voxcpmVoicesDescription')}
{t('settings.voxcpmAutoVoicePrivacyNote')}
{t('settings.voxcpmPromptCount', { count: promptCount + 1 })}
{t('settings.voxcpmCloneCount', { count: cloneCount })}
{!supportsReferenceAudio && (
{t('settings.voxcpmCloneUnsupported')}
)}
{t('settings.voxcpmVoicePool')}
{t('settings.voxcpmVoiceCount', { count: profiles.length + 1 })}
}
title={t('settings.voxcpmAutoVoice')}
badge={t('toolbar.default')}
badgeTone="default"
detail={t('settings.voxcpmAutoVoiceDescription')}
kind="auto"
/>
{profiles.length > 0 ? (
profiles.map((profile) => {
const voiceId = getVoxCPMProfileVoiceId(profile.id);
const canPreview = profile.kind !== 'clone' || supportsReferenceAudio;
return (
) : (
)
}
title={profile.name}
badge={
profile.kind === 'clone' && !supportsReferenceAudio
? t('settings.voxcpmUnavailable')
: profile.kind === 'clone'
? t('settings.voxcpmClone')
: 'Prompt'
}
badgeTone={
profile.kind === 'clone' && !supportsReferenceAudio ? 'warning' : 'neutral'
}
detail={
profile.kind === 'clone' && !supportsReferenceAudio
? t('settings.voxcpmCloneUnsupportedDetail')
: profile.kind === 'clone'
? profile.referenceAudioName || 'reference audio'
: profile.voicePrompt || ''
}
kind={profile.kind === 'clone' ? 'clone' : 'prompt'}
muted={profile.kind === 'clone' && !supportsReferenceAudio}
previewing={canPreview && previewingVoiceId === voiceId}
onPreview={canPreview ? () => handlePreviewVoice(voiceId) : undefined}
onDelete={async () => {
await deleteVoice(profile.id);
}}
/>
);
})
) : (
{t('settings.voxcpmNoCustomVoices')}
)}
);
}
function formatRecordingTime(seconds: number): string {
const minutes = Math.floor(seconds / 60);
const remainingSeconds = seconds % 60;
return `${minutes}:${remainingSeconds.toString().padStart(2, '0')}`;
}
function VoiceProfileRow({
icon,
title,
badge,
badgeTone = 'neutral',
detail,
kind = 'prompt',
muted,
previewing,
onPreview,
onDelete,
}: {
icon: ReactNode;
title: string;
badge: string;
badgeTone?: 'default' | 'warning' | 'neutral';
detail: string;
kind?: 'auto' | 'prompt' | 'clone';
muted?: boolean;
previewing?: boolean;
onPreview?: () => void;
onDelete?: () => void | Promise;
}) {
const iconClassName =
kind === 'auto'
? 'bg-violet-50 text-violet-700 dark:bg-violet-950/40 dark:text-violet-300'
: kind === 'clone'
? 'bg-sky-50 text-sky-700 dark:bg-sky-950/40 dark:text-sky-300'
: 'bg-muted text-muted-foreground';
const badgeClassName =
badgeTone === 'default'
? 'border-violet-200 bg-violet-50 text-violet-700 dark:border-violet-800/70 dark:bg-violet-950/40 dark:text-violet-300'
: badgeTone === 'warning'
? 'border-amber-200 bg-amber-50 text-amber-700 dark:border-amber-800/70 dark:bg-amber-950/40 dark:text-amber-300'
: 'border-border/70 bg-background text-muted-foreground';
const { t } = useI18n();
return (
{icon}
{onPreview && (
)}
{onDelete && (
)}
);
}
function AddVoiceRow({
onAdd,
existingIds,
}: {
onAdd: (id: string, name: string) => void;
existingIds: string[];
}) {
const { t } = useI18n();
const [voiceId, setVoiceId] = useState('');
const [voiceName, setVoiceName] = useState('');
const handleAdd = () => {
if (!voiceId.trim()) return;
if (existingIds.includes(voiceId.trim())) {
toast.error('Duplicate ID');
return;
}
onAdd(voiceId.trim(), voiceName.trim() || voiceId.trim());
setVoiceId('');
setVoiceName('');
};
return (
);
}