File size: 4,286 Bytes
f56a29b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
/**
 * Browser Native ASR (Speech Recognition) Hook
 * Uses Web Speech API for client-side speech recognition
 * Completely free, no API key required
 */

import { useState, useCallback, useRef, useEffect } from 'react';
import { createLogger } from '@/lib/logger';

const log = createLogger('BrowserASR');

// Note: Window.SpeechRecognition declaration is in components/ai-elements/prompt-input.tsx

export type ASRErrorCode =
  | 'not-supported'
  | 'no-speech'
  | 'audio-capture'
  | 'not-allowed'
  | 'network'
  | 'aborted'
  | 'unknown';

export interface UseBrowserASROptions {
  onTranscription?: (text: string) => void;
  onError?: (errorCode: ASRErrorCode) => void;
  language?: string;
  continuous?: boolean;
  interimResults?: boolean;
}

export function useBrowserASR(options: UseBrowserASROptions = {}) {
  const {
    onTranscription,
    onError,
    language = 'zh-CN',
    continuous = false,
    interimResults = false,
  } = options;

  const [isListening, setIsListening] = useState(false);
  const [interimTranscript, setInterimTranscript] = useState('');
  // eslint-disable-next-line @typescript-eslint/no-explicit-any -- Web Speech API SpeechRecognition not typed
  const recognitionRef = useRef<any>(null);

  // Use refs for callbacks to avoid stale closures in recognition event handlers
  const onTranscriptionRef = useRef(onTranscription);
  const onErrorRef = useRef(onError);

  useEffect(() => {
    onTranscriptionRef.current = onTranscription;
    onErrorRef.current = onError;
  }, [onTranscription, onError]);

  // SSR-safe support detection
  const [isSupported] = useState(
    () =>
      typeof window !== 'undefined' &&
      !!(window.SpeechRecognition || window.webkitSpeechRecognition),
  );

  const startListening = useCallback(() => {
    // Check if Speech Recognition is supported
    if (
      typeof window === 'undefined' ||
      (!window.SpeechRecognition && !window.webkitSpeechRecognition)
    ) {
      onErrorRef.current?.('not-supported');
      return;
    }

    // Create Speech Recognition instance
    const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
    const recognition = new SpeechRecognition();

    recognition.lang = language;
    recognition.continuous = continuous;
    recognition.interimResults = interimResults;

    recognition.onstart = () => {
      setIsListening(true);
      setInterimTranscript('');
    };

    recognition.onresult = (event: {
      resultIndex: number;
      results: {
        [index: number]: {
          [index: number]: { transcript: string };
          isFinal: boolean;
        };
        length: number;
      };
    }) => {
      let finalTranscript = '';
      let interimText = '';

      for (let i = event.resultIndex; i < event.results.length; i++) {
        const transcript = event.results[i][0].transcript;
        if (event.results[i].isFinal) {
          finalTranscript += transcript;
        } else {
          interimText += transcript;
        }
      }

      if (interimResults) {
        setInterimTranscript(interimText);
      }

      if (finalTranscript) {
        onTranscriptionRef.current?.(finalTranscript);
        setInterimTranscript('');
      }
    };

    recognition.onerror = (event: { error: string }) => {
      log.error('Speech recognition error:', event.error);
      const errorCodeMap: Record<string, ASRErrorCode> = {
        'no-speech': 'no-speech',
        'audio-capture': 'audio-capture',
        'not-allowed': 'not-allowed',
        network: 'network',
        aborted: 'aborted',
      };
      onErrorRef.current?.(errorCodeMap[event.error] ?? 'unknown');
      setIsListening(false);
      setInterimTranscript('');
    };

    recognition.onend = () => {
      setIsListening(false);
      setInterimTranscript('');
    };

    recognition.start();
    recognitionRef.current = recognition;
  }, [language, continuous, interimResults]);

  const stopListening = useCallback(() => {
    if (recognitionRef.current) {
      recognitionRef.current.stop();
      recognitionRef.current = null;
      setIsListening(false);
      setInterimTranscript('');
    }
  }, []);

  return {
    isSupported,
    isListening,
    interimTranscript,
    startListening,
    stopListening,
  };
}