import { useState, useRef, useEffect, useMemo, useCallback } from 'react'; import { Label } from '@/components/ui/label'; import { Input } from '@/components/ui/input'; import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue, } from '@/components/ui/select'; import { Switch } from '@/components/ui/switch'; import { Button } from '@/components/ui/button'; import { useI18n } from '@/lib/hooks/use-i18n'; import { useSettingsStore } from '@/lib/store/settings'; import { TTS_PROVIDERS, getTTSVoices, ASR_PROVIDERS, getASRSupportedLanguages, } from '@/lib/audio/constants'; import type { TTSProviderId, ASRProviderId } from '@/lib/audio/types'; import { isCustomASRProvider } from '@/lib/audio/types'; import { Volume2, Mic, MicOff, CheckCircle2, XCircle, Eye, EyeOff } from 'lucide-react'; import { cn } from '@/lib/utils'; import azureVoicesData from '@/lib/audio/azure.json'; import { createLogger } from '@/lib/logger'; import { getVoxCPMVoiceOptions, useVoxCPMVoiceProfiles } from '@/lib/audio/voxcpm-voices'; import { normalizeVoxCPMBackend, voxCPMBackendSupportsReferenceAudio } from '@/lib/audio/voxcpm'; const log = createLogger('AudioSettings'); /** * Get provider display name with i18n */ function getTTSProviderName(providerId: TTSProviderId, t: (key: string) => string): string { const names: Record = { 'openai-tts': t('settings.providerOpenAITTS'), 'azure-tts': t('settings.providerAzureTTS'), 'glm-tts': t('settings.providerGLMTTS'), 'qwen-tts': t('settings.providerQwenTTS'), 'voxcpm-tts': t('settings.providerVoxCPMTTS'), 'doubao-tts': t('settings.providerDoubaoTTS'), 'elevenlabs-tts': t('settings.providerElevenLabsTTS'), 'minimax-tts': t('settings.providerMiniMaxTTS'), 'browser-native-tts': t('settings.providerBrowserNativeTTS'), }; return names[providerId]; } function getASRProviderName(providerId: ASRProviderId, t: (key: string) => string): string { const names: Record = { 'openai-whisper': t('settings.providerOpenAIWhisper'), 'browser-native': t('settings.providerBrowserNative'), 'qwen-asr': t('settings.providerQwenASR'), }; return names[providerId]; } function getLanguageName(code: string, t: (key: string) => string): string { const key = `settings.lang_${code}`; const translated = t(key); // If translation key not found, return the code itself return translated === key ? code : translated; } interface AudioSettingsProps { onSave?: () => void; } export function AudioSettings({ onSave }: AudioSettingsProps = {}) { const { t } = useI18n(); // TTS state const ttsProviderId = useSettingsStore((state) => state.ttsProviderId); const ttsVoice = useSettingsStore((state) => state.ttsVoice); const ttsProvidersConfig = useSettingsStore((state) => state.ttsProvidersConfig); const setTTSProvider = useSettingsStore((state) => state.setTTSProvider); const setTTSVoice = useSettingsStore((state) => state.setTTSVoice); const setTTSProviderConfig = useSettingsStore((state) => state.setTTSProviderConfig); // ASR state const asrProviderId = useSettingsStore((state) => state.asrProviderId); const asrLanguage = useSettingsStore((state) => state.asrLanguage); const asrProvidersConfig = useSettingsStore((state) => state.asrProvidersConfig); const setASRProvider = useSettingsStore((state) => state.setASRProvider); const setASRLanguage = useSettingsStore((state) => state.setASRLanguage); const setASRProviderConfig = useSettingsStore((state) => state.setASRProviderConfig); const ttsEnabled = useSettingsStore((state) => state.ttsEnabled); const asrEnabled = useSettingsStore((state) => state.asrEnabled); const setTTSEnabled = useSettingsStore((state) => state.setTTSEnabled); const setASREnabled = useSettingsStore((state) => state.setASREnabled); const ttsProvider = TTS_PROVIDERS[ttsProviderId as keyof typeof TTS_PROVIDERS] ?? TTS_PROVIDERS['openai-tts']; // Azure voices - load from static JSON const azureVoices = useMemo(() => azureVoicesData.voices, []); const { profiles: voxcpmProfiles } = useVoxCPMVoiceProfiles(); const voxcpmBackend = normalizeVoxCPMBackend( ttsProvidersConfig['voxcpm-tts']?.providerOptions?.backend, ); // Wrapped setters that trigger onSave callback const handleTTSProviderChange = (providerId: TTSProviderId) => { setTTSProvider(providerId); onSave?.(); }; const handleTTSProviderConfigChange = ( providerId: TTSProviderId, config: Partial<{ apiKey: string; baseUrl: string; model?: string; enabled: boolean }>, ) => { setTTSProviderConfig(providerId, config); onSave?.(); }; const handleASRProviderChange = (providerId: ASRProviderId) => { setASRProvider(providerId); onSave?.(); }; const handleASRLanguageChange = (language: string) => { setASRLanguage(language); onSave?.(); }; const handleASRProviderConfigChange = ( providerId: ASRProviderId, config: Partial<{ apiKey: string; baseUrl: string; enabled: boolean }>, ) => { setASRProviderConfig(providerId, config); onSave?.(); }; // Password visibility state const [showTTSApiKey, setShowTTSApiKey] = useState(false); const [showASRApiKey, setShowASRApiKey] = useState(false); // Language filter state const [selectedLocale, setSelectedLocale] = useState('all'); // Test state const [isRecording, setIsRecording] = useState(false); const [asrResult, setASRResult] = useState(''); const [asrTestStatus, setASRTestStatus] = useState<'idle' | 'testing' | 'success' | 'error'>( 'idle', ); const [asrTestMessage, setASRTestMessage] = useState(''); const audioRef = useRef(null); const audioUrlRef = useRef(null); const browserPreviewCancelRef = useRef<(() => void) | null>(null); const ttsTestRequestIdRef = useRef(0); const mediaRecorderRef = useRef(null); const asrProvider = ASR_PROVIDERS[asrProviderId as keyof typeof ASR_PROVIDERS]; const isCustomASR = isCustomASRProvider(asrProviderId); // Reset locale filter when provider changes (derived state pattern) const [prevTTSProviderId, setPrevTTSProviderId] = useState(ttsProviderId); if (ttsProviderId !== prevTTSProviderId) { setPrevTTSProviderId(ttsProviderId); if (ttsProviderId !== 'azure-tts') { setSelectedLocale('all'); } } const stopTTSPreview = useCallback(() => { ttsTestRequestIdRef.current += 1; browserPreviewCancelRef.current?.(); browserPreviewCancelRef.current = null; audioRef.current?.pause(); if (audioRef.current) { audioRef.current.src = ''; } if (audioUrlRef.current) { URL.revokeObjectURL(audioUrlRef.current); audioUrlRef.current = null; } }, []); // Update voice selection when locale filter changes useEffect(() => { if (ttsProviderId === 'azure-tts' && selectedLocale !== 'all') { // Filter Azure voices by selected locale const filteredVoices = azureVoices.filter((voice) => voice.Locale === selectedLocale); // Check if current voice is in the filtered list const currentVoiceInFilter = filteredVoices.some((voice) => voice.ShortName === ttsVoice); // If current voice is not in filtered list, select the first voice in the filtered list if (!currentVoiceInFilter && filteredVoices.length > 0) { setTTSVoice(filteredVoices[0].ShortName); } } // Intentionally exclude ttsVoice from dependencies to avoid infinite loop // eslint-disable-next-line react-hooks/exhaustive-deps }, [selectedLocale, ttsProviderId, azureVoices, setTTSVoice]); useEffect(() => { stopTTSPreview(); }, [ttsProviderId, stopTTSPreview]); // Initialize and reset TTS voice when provider changes useEffect(() => { let availableVoices: Array<{ id: string; name: string }> = []; if (ttsProviderId === 'azure-tts') { // Use Azure voices from JSON availableVoices = azureVoices.map((voice) => ({ id: voice.ShortName, name: voice.LocalName, })); } else if (ttsProviderId === 'voxcpm-tts') { availableVoices = getVoxCPMVoiceOptions(voxcpmProfiles, { supportsClone: voxCPMBackendSupportsReferenceAudio(voxcpmBackend), }); } else { // Use static voices from constants availableVoices = getTTSVoices(ttsProviderId); } if (availableVoices.length > 0) { // Initialize default voice if not set if (!ttsVoice) { setTTSVoice(availableVoices[0].id); } else { // Check if current voice is available in new provider const currentVoiceExists = availableVoices.some((v) => v.id === ttsVoice); if (!currentVoiceExists) { setTTSVoice(availableVoices[0].id); } } } }, [ttsProviderId, ttsVoice, azureVoices, voxcpmProfiles, voxcpmBackend, setTTSVoice]); // Initialize and reset ASR language when provider changes useEffect(() => { const availableLanguages = getASRSupportedLanguages(asrProviderId); if (availableLanguages.length > 0) { // Initialize default language if not set if (!asrLanguage) { setASRLanguage(availableLanguages[0]); } else { // Check if current language is available in new provider const currentLanguageExists = availableLanguages.includes(asrLanguage); if (!currentLanguageExists) { setASRLanguage(availableLanguages[0]); } } } }, [asrProviderId, asrLanguage, setASRLanguage]); useEffect(() => { return () => { stopTTSPreview(); }; }, [stopTTSPreview]); // Clear ASR test status when provider changes (derived state pattern) const [prevASRProviderId, setPrevASRProviderId] = useState(asrProviderId); if (asrProviderId !== prevASRProviderId) { setPrevASRProviderId(asrProviderId); setASRTestStatus('idle'); setASRTestMessage(''); setASRResult(''); } // Test ASR const handleToggleASRRecording = async () => { if (isRecording) { if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') { mediaRecorderRef.current.stop(); } setIsRecording(false); } else { setASRResult(''); setASRTestStatus('testing'); setASRTestMessage(''); if (asrProviderId === 'browser-native') { const SpeechRecognitionCtor = (window as unknown as Record).SpeechRecognition || (window as unknown as Record).webkitSpeechRecognition; if (!SpeechRecognitionCtor) { setASRTestStatus('error'); setASRTestMessage(t('settings.asrNotSupported')); return; } // eslint-disable-next-line @typescript-eslint/no-explicit-any -- Vendor-prefixed API without standard typings const recognition = new (SpeechRecognitionCtor as new () => any)(); recognition.lang = asrLanguage || 'zh-CN'; recognition.onresult = (event: { results: { [index: number]: { [index: number]: { transcript: string } }; }; }) => { const transcript = event.results[0][0].transcript; setASRResult(transcript); setASRTestStatus('success'); setASRTestMessage(t('settings.asrTestSuccess')); }; recognition.onerror = (event: { error: string }) => { log.error('Speech recognition error:', event.error); setASRTestStatus('error'); setASRTestMessage(t('settings.asrTestFailed') + ': ' + event.error); }; recognition.onend = () => { setIsRecording(false); }; recognition.start(); setIsRecording(true); } else { try { const stream = await navigator.mediaDevices.getUserMedia({ audio: true, }); const mediaRecorder = new MediaRecorder(stream); mediaRecorderRef.current = mediaRecorder; const audioChunks: Blob[] = []; mediaRecorder.ondataavailable = (event) => { audioChunks.push(event.data); }; mediaRecorder.onstop = async () => { stream.getTracks().forEach((track) => track.stop()); const audioBlob = new Blob(audioChunks, { type: 'audio/webm' }); const formData = new FormData(); formData.append('audio', audioBlob, 'recording.webm'); formData.append('providerId', asrProviderId); formData.append('language', asrLanguage); // Only append non-empty values const apiKeyValue = asrProvidersConfig[asrProviderId]?.apiKey; if (apiKeyValue && apiKeyValue.trim()) { formData.append('apiKey', apiKeyValue); } const baseUrlValue = asrProvidersConfig[asrProviderId]?.baseUrl || asrProvidersConfig[asrProviderId]?.customDefaultBaseUrl || ''; if (baseUrlValue && baseUrlValue.trim()) { formData.append('baseUrl', baseUrlValue); } try { const response = await fetch('/api/transcription', { method: 'POST', body: formData, }); if (response.ok) { const data = await response.json(); setASRResult(data.text); setASRTestStatus('success'); setASRTestMessage(t('settings.asrTestSuccess')); } else { setASRTestStatus('error'); const errorData = await response .json() .catch(() => ({ error: response.statusText })); // Show details if available, otherwise show error message setASRTestMessage( errorData.details || errorData.error || t('settings.asrTestFailed'), ); } } catch (error) { log.error('ASR test failed:', error); setASRTestStatus('error'); setASRTestMessage(t('settings.asrTestFailed')); } }; mediaRecorder.start(); setIsRecording(true); } catch (error) { log.error('Failed to access microphone:', error); setASRTestStatus('error'); setASRTestMessage(t('settings.microphoneAccessFailed')); } } } }; return (
{/* TTS Section */}

{t('settings.ttsSection')}

{t('settings.ttsEnabledDescription')}

{ setTTSEnabled(checked); onSave?.(); }} />

{t('settings.ttsVoiceConfigHint')}

{(ttsProvider.requiresApiKey || ttsProvidersConfig[ttsProviderId]?.isServerConfigured || ttsProviderId === 'voxcpm-tts') && ( <>
{(ttsProvider.requiresApiKey || ttsProvidersConfig[ttsProviderId]?.isServerConfigured) && (
handleTTSProviderConfigChange(ttsProviderId, { apiKey: e.target.value, }) } className="font-mono text-sm pr-10" />
)}
handleTTSProviderConfigChange(ttsProviderId, { baseUrl: e.target.value, }) } className="text-sm" />
)}
{/* ASR Section */}

{t('settings.asrSection')}

{t('settings.asrEnabledDescription')}

{ setASREnabled(checked); onSave?.(); }} />
{(asrProvider?.requiresApiKey || isCustomASR || asrProvidersConfig[asrProviderId]?.isServerConfigured) && ( <>
handleASRProviderConfigChange(asrProviderId, { apiKey: e.target.value, }) } className="font-mono text-sm pr-10" />
handleASRProviderConfigChange(asrProviderId, { baseUrl: e.target.value, }) } className="text-sm" />
{(() => { const effectiveBaseUrl = asrProvidersConfig[asrProviderId]?.baseUrl || (isCustomASR ? asrProvidersConfig[asrProviderId]?.customDefaultBaseUrl : asrProvider?.defaultBaseUrl) || ''; if (!effectiveBaseUrl) return null; // Get endpoint path based on provider let endpointPath = ''; if (isCustomASR) { endpointPath = '/audio/transcriptions'; } else { switch (asrProviderId) { case 'openai-whisper': endpointPath = '/audio/transcriptions'; break; case 'qwen-asr': endpointPath = '/services/aigc/multimodal-generation/generation'; break; default: endpointPath = ''; } } if (!endpointPath) return null; const fullUrl = effectiveBaseUrl + endpointPath; return (

{t('settings.requestUrl')}: {fullUrl}

); })()} )} {(() => { const supportedLanguages = getASRSupportedLanguages(asrProviderId); const hasLanguageSelection = supportedLanguages.length > 0; return (
{hasLanguageSelection && (
)}
); })()} {asrTestMessage && (
{asrTestStatus === 'success' && ( )} {asrTestStatus === 'error' && }

{asrTestMessage}

)}
); }