| import { useState, useRef, useCallback } from 'react'; |
| import { ASR_PROVIDERS } from '@/lib/audio/constants'; |
| import { createLogger } from '@/lib/logger'; |
|
|
| const log = createLogger('AudioRecorder'); |
|
|
| |
| declare global { |
| interface Window { |
| |
| SpeechRecognition: any; |
| |
| webkitSpeechRecognition: any; |
| } |
| } |
|
|
| export interface UseAudioRecorderOptions { |
| onTranscription?: (text: string) => void; |
| onError?: (error: string) => void; |
| } |
|
|
| export function useAudioRecorder(options: UseAudioRecorderOptions = {}) { |
| const { onTranscription, onError } = options; |
|
|
| const [isRecording, setIsRecording] = useState(false); |
| const [isProcessing, setIsProcessing] = useState(false); |
| const [recordingTime, setRecordingTime] = useState(0); |
|
|
| const mediaRecorderRef = useRef<MediaRecorder | null>(null); |
| const audioChunksRef = useRef<Blob[]>([]); |
| const timerRef = useRef<NodeJS.Timeout | null>(null); |
| |
| const speechRecognitionRef = useRef<any>(null); |
| |
| const busyRef = useRef(false); |
|
|
| |
| const transcribeAudio = useCallback( |
| async (audioBlob: Blob) => { |
| setIsProcessing(true); |
|
|
| try { |
| const formData = new FormData(); |
| formData.append('audio', audioBlob, 'recording.webm'); |
|
|
| |
| |
| if (typeof window !== 'undefined') { |
| const { useSettingsStore } = await import('@/lib/store/settings'); |
| const { asrProviderId, asrLanguage, asrProvidersConfig } = useSettingsStore.getState(); |
|
|
| formData.append('providerId', asrProviderId); |
| formData.append( |
| 'modelId', |
| asrProvidersConfig?.[asrProviderId]?.modelId || |
| ASR_PROVIDERS[asrProviderId as keyof typeof ASR_PROVIDERS]?.defaultModelId || |
| '', |
| ); |
| formData.append('language', asrLanguage); |
|
|
| |
| const providerConfig = asrProvidersConfig?.[asrProviderId]; |
| if (providerConfig?.apiKey?.trim()) { |
| formData.append('apiKey', providerConfig.apiKey); |
| } |
| const effectiveBaseUrl = |
| providerConfig?.baseUrl?.trim() || providerConfig?.customDefaultBaseUrl || ''; |
| if (effectiveBaseUrl) { |
| formData.append('baseUrl', effectiveBaseUrl); |
| } |
| } |
|
|
| const response = await fetch('/api/transcription', { |
| method: 'POST', |
| body: formData, |
| }); |
|
|
| if (!response.ok) { |
| const error = await response.json(); |
| throw new Error(error.error || 'Transcription failed'); |
| } |
|
|
| const result = await response.json(); |
| onTranscription?.(result.text); |
| } catch (error) { |
| log.error('Transcription error:', error); |
| onError?.(error instanceof Error ? error.message : '语音识别失败,请重试'); |
| } finally { |
| setIsProcessing(false); |
| setRecordingTime(0); |
| } |
| }, |
| [onTranscription, onError], |
| ); |
|
|
| |
| const startRecording = useCallback(async () => { |
| |
| if (busyRef.current) return; |
| busyRef.current = true; |
| try { |
| |
| if (typeof window !== 'undefined') { |
| const { useSettingsStore } = await import('@/lib/store/settings'); |
| const { asrProviderId, asrLanguage } = useSettingsStore.getState(); |
|
|
| |
| if (asrProviderId === 'browser-native') { |
| |
| if (!window.SpeechRecognition && !window.webkitSpeechRecognition) { |
| onError?.('您的浏览器不支持语音识别功能'); |
| return; |
| } |
|
|
| const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition; |
| const recognition = new SpeechRecognition(); |
|
|
| recognition.lang = asrLanguage || 'zh-CN'; |
| recognition.continuous = false; |
| recognition.interimResults = false; |
|
|
| recognition.onstart = () => { |
| setIsRecording(true); |
| setRecordingTime(0); |
|
|
| |
| timerRef.current = setInterval(() => { |
| setRecordingTime((prev) => prev + 1); |
| }, 1000); |
| }; |
|
|
| recognition.onresult = (event: { |
| results: { |
| [index: number]: { [index: number]: { transcript: string } }; |
| }; |
| }) => { |
| const transcript = event.results[0][0].transcript; |
| onTranscription?.(transcript); |
| }; |
|
|
| recognition.onerror = (event: { error: string }) => { |
| log.error('Speech recognition error:', event.error); |
| let errorMessage = '语音识别失败'; |
|
|
| switch (event.error) { |
| case 'aborted': |
| |
| busyRef.current = false; |
| setIsRecording(false); |
| setRecordingTime(0); |
| if (timerRef.current) { |
| clearInterval(timerRef.current); |
| timerRef.current = null; |
| } |
| return; |
| case 'no-speech': |
| errorMessage = '未检测到语音输入'; |
| break; |
| case 'audio-capture': |
| errorMessage = '无法访问麦克风'; |
| break; |
| case 'not-allowed': |
| errorMessage = '麦克风权限被拒绝'; |
| break; |
| case 'network': |
| errorMessage = '网络错误'; |
| break; |
| default: |
| errorMessage = `语音识别错误: ${event.error}`; |
| } |
|
|
| onError?.(errorMessage); |
| busyRef.current = false; |
| setIsRecording(false); |
| setRecordingTime(0); |
| if (timerRef.current) { |
| clearInterval(timerRef.current); |
| timerRef.current = null; |
| } |
| }; |
|
|
| recognition.onend = () => { |
| busyRef.current = false; |
| setIsRecording(false); |
| setRecordingTime(0); |
| if (timerRef.current) { |
| clearInterval(timerRef.current); |
| timerRef.current = null; |
| } |
| }; |
|
|
| recognition.start(); |
| speechRecognitionRef.current = recognition; |
| return; |
| } |
| } |
|
|
| |
| |
| const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); |
|
|
| |
| const mediaRecorder = new MediaRecorder(stream, { |
| mimeType: 'audio/webm', |
| }); |
|
|
| mediaRecorderRef.current = mediaRecorder; |
| audioChunksRef.current = []; |
|
|
| mediaRecorder.ondataavailable = (event) => { |
| if (event.data.size > 0) { |
| audioChunksRef.current.push(event.data); |
| } |
| }; |
|
|
| mediaRecorder.onstop = async () => { |
| |
| stream.getTracks().forEach((track) => track.stop()); |
|
|
| |
| const audioBlob = new Blob(audioChunksRef.current, { |
| type: 'audio/webm', |
| }); |
|
|
| |
| await transcribeAudio(audioBlob); |
| busyRef.current = false; |
| }; |
|
|
| |
| mediaRecorder.start(); |
| setIsRecording(true); |
| setRecordingTime(0); |
|
|
| |
| timerRef.current = setInterval(() => { |
| setRecordingTime((prev) => prev + 1); |
| }, 1000); |
| } catch (error) { |
| busyRef.current = false; |
| log.error('Failed to start recording:', error); |
| onError?.('无法访问麦克风,请检查权限设置'); |
| } |
| }, [onTranscription, onError, transcribeAudio]); |
|
|
| |
| const stopRecording = useCallback(() => { |
| |
| if (speechRecognitionRef.current) { |
| speechRecognitionRef.current.stop(); |
| speechRecognitionRef.current = null; |
| busyRef.current = false; |
| setIsRecording(false); |
| if (timerRef.current) { |
| clearInterval(timerRef.current); |
| timerRef.current = null; |
| } |
| return; |
| } |
|
|
| |
| if (mediaRecorderRef.current && isRecording) { |
| mediaRecorderRef.current.stop(); |
| busyRef.current = false; |
| setIsRecording(false); |
|
|
| if (timerRef.current) { |
| clearInterval(timerRef.current); |
| timerRef.current = null; |
| } |
| } |
| }, [isRecording]); |
|
|
| |
| const cancelRecording = useCallback(() => { |
| |
| if (speechRecognitionRef.current) { |
| speechRecognitionRef.current.onresult = null; |
| speechRecognitionRef.current.onerror = null; |
| speechRecognitionRef.current.stop(); |
| speechRecognitionRef.current = null; |
| busyRef.current = false; |
| setIsRecording(false); |
| setRecordingTime(0); |
| if (timerRef.current) { |
| clearInterval(timerRef.current); |
| timerRef.current = null; |
| } |
| return; |
| } |
|
|
| |
| if (mediaRecorderRef.current && isRecording) { |
| |
| mediaRecorderRef.current.ondataavailable = null; |
| mediaRecorderRef.current.onstop = null; |
| mediaRecorderRef.current.stop(); |
|
|
| |
| if (mediaRecorderRef.current.stream) { |
| mediaRecorderRef.current.stream.getTracks().forEach((track) => track.stop()); |
| } |
|
|
| busyRef.current = false; |
| setIsRecording(false); |
| setRecordingTime(0); |
|
|
| if (timerRef.current) { |
| clearInterval(timerRef.current); |
| timerRef.current = null; |
| } |
|
|
| audioChunksRef.current = []; |
| } |
| }, [isRecording]); |
|
|
| return { |
| isRecording, |
| isProcessing, |
| recordingTime, |
| startRecording, |
| stopRecording, |
| cancelRecording, |
| }; |
| } |
|
|