| |
| import { useRef, useEffect, useState } from 'react'; |
| import { useRecoilState, useRecoilValue } from 'recoil'; |
| import { parseTextParts } from 'librechat-data-provider'; |
| import type { TMessageContentParts } from 'librechat-data-provider'; |
| import useTextToSpeechBrowser from '~/hooks/Input/useTextToSpeechBrowser'; |
| import usePauseGlobalAudio from '~/hooks/Audio/usePauseGlobalAudio'; |
| import useAudioRef from '~/hooks/Audio/useAudioRef'; |
| import { logger } from '~/utils'; |
| import store from '~/store'; |
|
|
| type TUseTextToSpeech = { |
| messageId?: string; |
| content?: TMessageContentParts[] | string; |
| isLast?: boolean; |
| index?: number; |
| }; |
|
|
| const useTTSBrowser = (props?: TUseTextToSpeech) => { |
| const { content, isLast = false, index = 0 } = props ?? {}; |
|
|
| const isMouseDownRef = useRef(false); |
| const timerRef = useRef<number | undefined>(undefined); |
| const [isSpeakingState, setIsSpeaking] = useState(false); |
| const { audioRef } = useAudioRef({ setIsPlaying: setIsSpeaking }); |
|
|
| const { pauseGlobalAudio } = usePauseGlobalAudio(index); |
| const [voice, setVoice] = useRecoilState(store.voice); |
| const globalIsPlaying = useRecoilValue(store.globalAudioPlayingFamily(index)); |
|
|
| const isSpeaking = isSpeakingState || (isLast && globalIsPlaying); |
|
|
| const { |
| generateSpeechLocal: generateSpeech, |
| cancelSpeechLocal: cancelSpeech, |
| voices, |
| } = useTextToSpeechBrowser({ setIsSpeaking }); |
|
|
| useEffect(() => { |
| const firstVoice = voices[0]; |
| if (voices.length && typeof firstVoice === 'object') { |
| const lastSelectedVoice = voices.find((v) => |
| typeof v === 'object' ? v.value === voice : v === voice, |
| ); |
| if (lastSelectedVoice != null) { |
| const currentVoice = |
| typeof lastSelectedVoice === 'object' ? lastSelectedVoice.value : lastSelectedVoice; |
| logger.log('useTextToSpeech.ts - Effect:', { voices, voice: currentVoice }); |
| setVoice(currentVoice); |
| return; |
| } |
|
|
| logger.log('useTextToSpeech.ts - Effect:', { voices, voice: firstVoice.value }); |
| setVoice(firstVoice.value); |
| } |
| }, [setVoice, voice, voices]); |
|
|
| const handleMouseDown = () => { |
| isMouseDownRef.current = true; |
| timerRef.current = window.setTimeout(() => { |
| if (isMouseDownRef.current) { |
| const messageContent = content ?? ''; |
| const parsedMessage = |
| typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent); |
| generateSpeech(parsedMessage); |
| } |
| }, 1000); |
| }; |
|
|
| const handleMouseUp = () => { |
| isMouseDownRef.current = false; |
| if (timerRef.current != null) { |
| window.clearTimeout(timerRef.current); |
| } |
| }; |
|
|
| const toggleSpeech = () => { |
| if (isSpeaking === true) { |
| cancelSpeech(); |
| pauseGlobalAudio(); |
| } else { |
| const messageContent = content ?? ''; |
| const parsedMessage = |
| typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent); |
| generateSpeech(parsedMessage); |
| } |
| }; |
|
|
| return { |
| handleMouseDown, |
| handleMouseUp, |
| toggleSpeech, |
| isSpeaking, |
| isLoading: false, |
| audioRef, |
| voices, |
| }; |
| }; |
|
|
| export default useTTSBrowser; |
|
|