OpenMAIC-React / src /lib /hooks /use-audio-recorder.ts
muthuk1's picture
Convert OpenMAIC from Next.js to React (Vite)
f56a29b verified
import { useState, useRef, useCallback } from 'react';
import { ASR_PROVIDERS } from '@/lib/audio/constants';
import { createLogger } from '@/lib/logger';
const log = createLogger('AudioRecorder');
// TypeScript declarations for Web Speech API
declare global {
interface Window {
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Web Speech API not typed in lib.dom
SpeechRecognition: any;
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Web Speech API not typed in lib.dom
webkitSpeechRecognition: any;
}
}
export interface UseAudioRecorderOptions {
onTranscription?: (text: string) => void;
onError?: (error: string) => void;
}
export function useAudioRecorder(options: UseAudioRecorderOptions = {}) {
const { onTranscription, onError } = options;
const [isRecording, setIsRecording] = useState(false);
const [isProcessing, setIsProcessing] = useState(false);
const [recordingTime, setRecordingTime] = useState(0);
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const audioChunksRef = useRef<Blob[]>([]);
const timerRef = useRef<NodeJS.Timeout | null>(null);
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Web Speech API not typed
const speechRecognitionRef = useRef<any>(null);
// Synchronous lock to prevent rapid re-entry (React state updates are async)
const busyRef = useRef(false);
// Send audio to server for transcription
const transcribeAudio = useCallback(
async (audioBlob: Blob) => {
setIsProcessing(true);
try {
const formData = new FormData();
formData.append('audio', audioBlob, 'recording.webm');
// Get current ASR configuration from settings store
// Note: This requires importing useSettingsStore in browser context
if (typeof window !== 'undefined') {
const { useSettingsStore } = await import('@/lib/store/settings');
const { asrProviderId, asrLanguage, asrProvidersConfig } = useSettingsStore.getState();
formData.append('providerId', asrProviderId);
formData.append(
'modelId',
asrProvidersConfig?.[asrProviderId]?.modelId ||
ASR_PROVIDERS[asrProviderId as keyof typeof ASR_PROVIDERS]?.defaultModelId ||
'',
);
formData.append('language', asrLanguage);
// Append API key and base URL if configured
const providerConfig = asrProvidersConfig?.[asrProviderId];
if (providerConfig?.apiKey?.trim()) {
formData.append('apiKey', providerConfig.apiKey);
}
const effectiveBaseUrl =
providerConfig?.baseUrl?.trim() || providerConfig?.customDefaultBaseUrl || '';
if (effectiveBaseUrl) {
formData.append('baseUrl', effectiveBaseUrl);
}
}
const response = await fetch('/api/transcription', {
method: 'POST',
body: formData,
});
if (!response.ok) {
const error = await response.json();
throw new Error(error.error || 'Transcription failed');
}
const result = await response.json();
onTranscription?.(result.text);
} catch (error) {
log.error('Transcription error:', error);
onError?.(error instanceof Error ? error.message : '语音识别失败,请重试');
} finally {
setIsProcessing(false);
setRecordingTime(0);
}
},
[onTranscription, onError],
);
// Start recording
const startRecording = useCallback(async () => {
// Synchronous lock — React state is async so isRecording may be stale
if (busyRef.current) return;
busyRef.current = true;
try {
// Get current ASR configuration
if (typeof window !== 'undefined') {
const { useSettingsStore } = await import('@/lib/store/settings');
const { asrProviderId, asrLanguage } = useSettingsStore.getState();
// Use browser native ASR if configured
if (asrProviderId === 'browser-native') {
// Check if Speech Recognition is supported
if (!window.SpeechRecognition && !window.webkitSpeechRecognition) {
onError?.('您的浏览器不支持语音识别功能');
return;
}
const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
const recognition = new SpeechRecognition();
recognition.lang = asrLanguage || 'zh-CN';
recognition.continuous = false;
recognition.interimResults = false;
recognition.onstart = () => {
setIsRecording(true);
setRecordingTime(0);
// Start timer
timerRef.current = setInterval(() => {
setRecordingTime((prev) => prev + 1);
}, 1000);
};
recognition.onresult = (event: {
results: {
[index: number]: { [index: number]: { transcript: string } };
};
}) => {
const transcript = event.results[0][0].transcript;
onTranscription?.(transcript);
};
recognition.onerror = (event: { error: string }) => {
log.error('Speech recognition error:', event.error);
let errorMessage = '语音识别失败';
switch (event.error) {
case 'aborted':
// Non-fatal: caused by our own cancel/stop logic or rapid toggle
busyRef.current = false;
setIsRecording(false);
setRecordingTime(0);
if (timerRef.current) {
clearInterval(timerRef.current);
timerRef.current = null;
}
return;
case 'no-speech':
errorMessage = '未检测到语音输入';
break;
case 'audio-capture':
errorMessage = '无法访问麦克风';
break;
case 'not-allowed':
errorMessage = '麦克风权限被拒绝';
break;
case 'network':
errorMessage = '网络错误';
break;
default:
errorMessage = `语音识别错误: ${event.error}`;
}
onError?.(errorMessage);
busyRef.current = false;
setIsRecording(false);
setRecordingTime(0);
if (timerRef.current) {
clearInterval(timerRef.current);
timerRef.current = null;
}
};
recognition.onend = () => {
busyRef.current = false;
setIsRecording(false);
setRecordingTime(0);
if (timerRef.current) {
clearInterval(timerRef.current);
timerRef.current = null;
}
};
recognition.start();
speechRecognitionRef.current = recognition;
return;
}
}
// Use MediaRecorder for server-side ASR
// Request microphone permission
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
// Create MediaRecorder
const mediaRecorder = new MediaRecorder(stream, {
mimeType: 'audio/webm',
});
mediaRecorderRef.current = mediaRecorder;
audioChunksRef.current = [];
mediaRecorder.ondataavailable = (event) => {
if (event.data.size > 0) {
audioChunksRef.current.push(event.data);
}
};
mediaRecorder.onstop = async () => {
// Stop all audio tracks
stream.getTracks().forEach((track) => track.stop());
// Merge audio chunks
const audioBlob = new Blob(audioChunksRef.current, {
type: 'audio/webm',
});
// Send to server for transcription
await transcribeAudio(audioBlob);
busyRef.current = false;
};
// Start recording
mediaRecorder.start();
setIsRecording(true);
setRecordingTime(0);
// Start timer
timerRef.current = setInterval(() => {
setRecordingTime((prev) => prev + 1);
}, 1000);
} catch (error) {
busyRef.current = false;
log.error('Failed to start recording:', error);
onError?.('无法访问麦克风,请检查权限设置');
}
}, [onTranscription, onError, transcribeAudio]);
// Stop recording
const stopRecording = useCallback(() => {
// Stop Speech Recognition if active
if (speechRecognitionRef.current) {
speechRecognitionRef.current.stop();
speechRecognitionRef.current = null;
busyRef.current = false;
setIsRecording(false);
if (timerRef.current) {
clearInterval(timerRef.current);
timerRef.current = null;
}
return;
}
// Stop MediaRecorder if active
if (mediaRecorderRef.current && isRecording) {
mediaRecorderRef.current.stop();
busyRef.current = false;
setIsRecording(false);
if (timerRef.current) {
clearInterval(timerRef.current);
timerRef.current = null;
}
}
}, [isRecording]);
// Cancel recording
const cancelRecording = useCallback(() => {
// Cancel Speech Recognition if active
if (speechRecognitionRef.current) {
speechRecognitionRef.current.onresult = null; // Prevent transcription callback
speechRecognitionRef.current.onerror = null; // Suppress browser abort error events
speechRecognitionRef.current.stop();
speechRecognitionRef.current = null;
busyRef.current = false;
setIsRecording(false);
setRecordingTime(0);
if (timerRef.current) {
clearInterval(timerRef.current);
timerRef.current = null;
}
return;
}
// Cancel MediaRecorder if active
if (mediaRecorderRef.current && isRecording) {
// Stop recording without transcription
mediaRecorderRef.current.ondataavailable = null;
mediaRecorderRef.current.onstop = null;
mediaRecorderRef.current.stop();
// Stop all audio tracks
if (mediaRecorderRef.current.stream) {
mediaRecorderRef.current.stream.getTracks().forEach((track) => track.stop());
}
busyRef.current = false;
setIsRecording(false);
setRecordingTime(0);
if (timerRef.current) {
clearInterval(timerRef.current);
timerRef.current = null;
}
audioChunksRef.current = [];
}
}, [isRecording]);
return {
isRecording,
isProcessing,
recordingTime,
startRecording,
stopRecording,
cancelRecording,
};
}