OpenMAIC-React / src /lib /hooks /use-audio-recorder.ts

Convert OpenMAIC from Next.js to React (Vite)

f56a29b verified 10 days ago

10.7 kB

	import { useState, useRef, useCallback } from 'react';
	import { ASR_PROVIDERS } from '@/lib/audio/constants';
	import { createLogger } from '@/lib/logger';

	const log = createLogger('AudioRecorder');

	// TypeScript declarations for Web Speech API
	declare global {
	interface Window {
	// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Web Speech API not typed in lib.dom
	SpeechRecognition: any;
	// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Web Speech API not typed in lib.dom
	webkitSpeechRecognition: any;
	}
	}

	export interface UseAudioRecorderOptions {
	onTranscription?: (text: string) => void;
	onError?: (error: string) => void;
	}

	export function useAudioRecorder(options: UseAudioRecorderOptions = {}) {
	const { onTranscription, onError } = options;

	const [isRecording, setIsRecording] = useState(false);
	const [isProcessing, setIsProcessing] = useState(false);
	const [recordingTime, setRecordingTime] = useState(0);

	const mediaRecorderRef = useRef<MediaRecorder \| null>(null);
	const audioChunksRef = useRef<Blob[]>([]);
	const timerRef = useRef<NodeJS.Timeout \| null>(null);
	// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Web Speech API not typed
	const speechRecognitionRef = useRef<any>(null);
	// Synchronous lock to prevent rapid re-entry (React state updates are async)
	const busyRef = useRef(false);

	// Send audio to server for transcription
	const transcribeAudio = useCallback(
	async (audioBlob: Blob) => {
	setIsProcessing(true);

	try {
	const formData = new FormData();
	formData.append('audio', audioBlob, 'recording.webm');

	// Get current ASR configuration from settings store
	// Note: This requires importing useSettingsStore in browser context
	if (typeof window !== 'undefined') {
	const { useSettingsStore } = await import('@/lib/store/settings');
	const { asrProviderId, asrLanguage, asrProvidersConfig } = useSettingsStore.getState();

	formData.append('providerId', asrProviderId);
	formData.append(
	'modelId',
	asrProvidersConfig?.[asrProviderId]?.modelId \|\|
	ASR_PROVIDERS[asrProviderId as keyof typeof ASR_PROVIDERS]?.defaultModelId \|\|
	'',
	);
	formData.append('language', asrLanguage);

	// Append API key and base URL if configured
	const providerConfig = asrProvidersConfig?.[asrProviderId];
	if (providerConfig?.apiKey?.trim()) {
	formData.append('apiKey', providerConfig.apiKey);
	}
	const effectiveBaseUrl =
	providerConfig?.baseUrl?.trim() \|\| providerConfig?.customDefaultBaseUrl \|\| '';
	if (effectiveBaseUrl) {
	formData.append('baseUrl', effectiveBaseUrl);
	}
	}

	const response = await fetch('/api/transcription', {
	method: 'POST',
	body: formData,
	});

	if (!response.ok) {
	const error = await response.json();
	throw new Error(error.error \|\| 'Transcription failed');
	}

	const result = await response.json();
	onTranscription?.(result.text);
	} catch (error) {
	log.error('Transcription error:', error);
	onError?.(error instanceof Error ? error.message : '语音识别失败，请重试');
	} finally {
	setIsProcessing(false);
	setRecordingTime(0);
	}
	},
	[onTranscription, onError],
	);

	// Start recording
	const startRecording = useCallback(async () => {
	// Synchronous lock — React state is async so isRecording may be stale
	if (busyRef.current) return;
	busyRef.current = true;
	try {
	// Get current ASR configuration
	if (typeof window !== 'undefined') {
	const { useSettingsStore } = await import('@/lib/store/settings');
	const { asrProviderId, asrLanguage } = useSettingsStore.getState();

	// Use browser native ASR if configured
	if (asrProviderId === 'browser-native') {
	// Check if Speech Recognition is supported
	if (!window.SpeechRecognition && !window.webkitSpeechRecognition) {
	onError?.('您的浏览器不支持语音识别功能');
	return;
	}

	const SpeechRecognition = window.SpeechRecognition \|\| window.webkitSpeechRecognition;
	const recognition = new SpeechRecognition();

	recognition.lang = asrLanguage \|\| 'zh-CN';
	recognition.continuous = false;
	recognition.interimResults = false;

	recognition.onstart = () => {
	setIsRecording(true);
	setRecordingTime(0);

	// Start timer
	timerRef.current = setInterval(() => {
	setRecordingTime((prev) => prev + 1);
	}, 1000);
	};

	recognition.onresult = (event: {
	results: {
	[index: number]: { [index: number]: { transcript: string } };
	};
	}) => {
	const transcript = event.results[0][0].transcript;
	onTranscription?.(transcript);
	};

	recognition.onerror = (event: { error: string }) => {
	log.error('Speech recognition error:', event.error);
	let errorMessage = '语音识别失败';

	switch (event.error) {
	case 'aborted':
	// Non-fatal: caused by our own cancel/stop logic or rapid toggle
	busyRef.current = false;
	setIsRecording(false);
	setRecordingTime(0);
	if (timerRef.current) {
	clearInterval(timerRef.current);
	timerRef.current = null;
	}
	return;
	case 'no-speech':
	errorMessage = '未检测到语音输入';
	break;
	case 'audio-capture':
	errorMessage = '无法访问麦克风';
	break;
	case 'not-allowed':
	errorMessage = '麦克风权限被拒绝';
	break;
	case 'network':
	errorMessage = '网络错误';
	break;
	default:
	errorMessage = `语音识别错误: ${event.error}`;
	}

	onError?.(errorMessage);
	busyRef.current = false;
	setIsRecording(false);
	setRecordingTime(0);
	if (timerRef.current) {
	clearInterval(timerRef.current);
	timerRef.current = null;
	}
	};

	recognition.onend = () => {
	busyRef.current = false;
	setIsRecording(false);
	setRecordingTime(0);
	if (timerRef.current) {
	clearInterval(timerRef.current);
	timerRef.current = null;
	}
	};

	recognition.start();
	speechRecognitionRef.current = recognition;
	return;
	}
	}

	// Use MediaRecorder for server-side ASR
	// Request microphone permission
	const stream = await navigator.mediaDevices.getUserMedia({ audio: true });

	// Create MediaRecorder
	const mediaRecorder = new MediaRecorder(stream, {
	mimeType: 'audio/webm',
	});

	mediaRecorderRef.current = mediaRecorder;
	audioChunksRef.current = [];

	mediaRecorder.ondataavailable = (event) => {
	if (event.data.size > 0) {
	audioChunksRef.current.push(event.data);
	}
	};

	mediaRecorder.onstop = async () => {
	// Stop all audio tracks
	stream.getTracks().forEach((track) => track.stop());

	// Merge audio chunks
	const audioBlob = new Blob(audioChunksRef.current, {
	type: 'audio/webm',
	});

	// Send to server for transcription
	await transcribeAudio(audioBlob);
	busyRef.current = false;
	};

	// Start recording
	mediaRecorder.start();
	setIsRecording(true);
	setRecordingTime(0);

	// Start timer
	timerRef.current = setInterval(() => {
	setRecordingTime((prev) => prev + 1);
	}, 1000);
	} catch (error) {
	busyRef.current = false;
	log.error('Failed to start recording:', error);
	onError?.('无法访问麦克风，请检查权限设置');
	}
	}, [onTranscription, onError, transcribeAudio]);

	// Stop recording
	const stopRecording = useCallback(() => {
	// Stop Speech Recognition if active
	if (speechRecognitionRef.current) {
	speechRecognitionRef.current.stop();
	speechRecognitionRef.current = null;
	busyRef.current = false;
	setIsRecording(false);
	if (timerRef.current) {
	clearInterval(timerRef.current);
	timerRef.current = null;
	}
	return;
	}

	// Stop MediaRecorder if active
	if (mediaRecorderRef.current && isRecording) {
	mediaRecorderRef.current.stop();
	busyRef.current = false;
	setIsRecording(false);

	if (timerRef.current) {
	clearInterval(timerRef.current);
	timerRef.current = null;
	}
	}
	}, [isRecording]);

	// Cancel recording
	const cancelRecording = useCallback(() => {
	// Cancel Speech Recognition if active
	if (speechRecognitionRef.current) {
	speechRecognitionRef.current.onresult = null; // Prevent transcription callback
	speechRecognitionRef.current.onerror = null; // Suppress browser abort error events
	speechRecognitionRef.current.stop();
	speechRecognitionRef.current = null;
	busyRef.current = false;
	setIsRecording(false);
	setRecordingTime(0);
	if (timerRef.current) {
	clearInterval(timerRef.current);
	timerRef.current = null;
	}
	return;
	}

	// Cancel MediaRecorder if active
	if (mediaRecorderRef.current && isRecording) {
	// Stop recording without transcription
	mediaRecorderRef.current.ondataavailable = null;
	mediaRecorderRef.current.onstop = null;
	mediaRecorderRef.current.stop();

	// Stop all audio tracks
	if (mediaRecorderRef.current.stream) {
	mediaRecorderRef.current.stream.getTracks().forEach((track) => track.stop());
	}

	busyRef.current = false;
	setIsRecording(false);
	setRecordingTime(0);

	if (timerRef.current) {
	clearInterval(timerRef.current);
	timerRef.current = null;
	}

	audioChunksRef.current = [];
	}
	}, [isRecording]);

	return {
	isRecording,
	isProcessing,
	recordingTime,
	startRecording,
	stopRecording,
	cancelRecording,
	};
	}