'use strict'; // ─── DOM refs ───────────────────────────────────────────────────────────────── const chatBox = document.getElementById('chat-box'); const sendBtn = document.getElementById('send-btn'); const textInput = document.getElementById('text-input'); const micBtn = document.getElementById('mic-btn'); const micLabel = micBtn.querySelector('.mic-label'); const stopBtn = document.getElementById('stop-btn'); const stateLabel = document.getElementById('state-label'); const stateDot = document.getElementById('state-dot'); const clearBtn = document.getElementById('clear-btn'); const brainBtn = document.getElementById('brain-mode-btn'); const voiceCaption = document.getElementById('voice-caption'); const brainStage = document.getElementById('brain-stage'); const brainBubbleStt = document.getElementById('brain-bubble-stt'); const brainBubbleTts = document.getElementById('brain-bubble-tts'); const brainBubbleSttText = document.getElementById('brain-bubble-stt-text'); const brainBubbleTtsText = document.getElementById('brain-bubble-tts-text'); const voiceViz = document.getElementById('voice-viz'); const vizBars = Array.from(voiceViz.querySelectorAll('.viz-bar')); const queueBars = Array.from(document.querySelectorAll('.queue-bar')); const chunksCount = document.getElementById('chunks-count'); const sidebarEl = document.getElementById('sidebar'); const sidebarToggle = document.getElementById('sidebar-toggle'); const mobileMenuBtn = document.getElementById('mobile-menu-btn'); const appEl = document.getElementById('app'); const sThreshold = document.getElementById('s-threshold'); const sThresholdVal = document.getElementById('s-threshold-val'); const sTimeout = document.getElementById('s-timeout'); const sTimeoutVal = document.getElementById('s-timeout-val'); const sVoice = document.getElementById('s-voice'); const mStt = document.getElementById('m-stt'); const mLlm = document.getElementById('m-llm'); const mTts = document.getElementById('m-tts'); const mTotal = document.getElementById('m-total'); const sysStat = document.getElementById('sys-status'); // ─── Ephemeral user identity ────────────────────────────────────────────────── // New page load = new user. Reloading the app generates a fresh ID. const USER_ID = (() => { if (window.crypto && typeof window.crypto.randomUUID === 'function') { return 'u_' + window.crypto.randomUUID().replace(/-/g, '').slice(0, 16); } return ( 'u_' + Date.now().toString(36) + '_' + Math.random().toString(36).slice(2, 10) ); })(); // ─── WebSocket base URL ──────────────────────────────────────────────────────── const WS_BASES = (() => { const scheme = window.location.protocol === 'https:' ? 'wss:' : 'ws:'; const bases = []; const host = window.location.host && window.location.host !== 'null' ? `${scheme}//${window.location.host}` : ''; const push = (base) => { if (base && !bases.includes(base)) bases.push(base); }; push(host); push(`${scheme}//127.0.0.1:8000`); push(`${scheme}//127.0.0.1:8679`); push(`${scheme}//localhost:8000`); push(`${scheme}//localhost:8679`); return bases; })(); let _wsBaseIndex = 0; console.log('[Boot] WS bases:', WS_BASES.join(', ')); // ─── WS handles ─────────────────────────────────────────────────────────────── let chatWS = null; let voiceWS = null; let _chatRetry = 0; let _voiceRetry = 0; let _chatRetryTimer = null; let _voiceRetryTimer = null; // ─── VAD / recording settings ───────────────────────────────────────────────── let SILENCE_MS = 900; // default; user-adjustable in UI let SILENCE_DB = -38; const VAD_MS = 60; const MIN_SPEECH_MS = 320; // discard noise bursts shorter than this // ─── Playback state ─────────────────────────────────────────────────────────── let _ctx = null; let _schedEnd = 0; let _endTimer = null; let _cancelled = false; let _inFlight = 0; let _ttsPlaying = false; let _activeSources = []; let _bargeInArmedAt = 0; let _bargeInFiredAt = 0; let _dropAudioUntil = 0; let _audioChain = Promise.resolve(); let _playbackGen = 0; let _expectedSeq = 0; let _pendingAudio = new Map(); let brainMode = false; let brainVoiceActive = false; let brainRestartTimer = null; let brainAutoRestartTimer = null; let brainPendingAudio = null; let voicePendingPackets = []; let brainLastResponse = ''; // ─── Recording state ────────────────────────────────────────────────────────── let micStream = null; let analyserCtx = null; let analyser = null; let mediaRecorder = null; let audioChunks = []; let isListening = false; let isSpeaking = false; let isProcessing = false; let isRecordingLocked = false; let silenceTimer = null; let vadInt = null; let vizInt = null; let _speechStartMs = 0; let _recorderMime = 'audio/webm'; // ─── AI streaming bubble state ──────────────────────────────────────────────── let aiEl = null; let aiTxt = ''; let thinkingEl = null; let _captionRaf = 0; let _captionText = ''; // ─── Latency timestamps ─────────────────────────────────────────────────────── let tSend = 0, tStt = 0, tLlm = 0, tTts = 0; function boot() { initWebSockets(); appEl.classList.add('visible'); setState('ready'); } // ═══════════════════════════════════════════════════════════════════════════════ // WEBSOCKETS // ═══════════════════════════════════════════════════════════════════════════════ function _backoff(r) { return Math.min(1000 * Math.pow(2, r), 16000); } function _wsBase() { return WS_BASES[Math.min(_wsBaseIndex, WS_BASES.length - 1)] || WS_BASES[0]; } function _advanceWsBase() { if (WS_BASES.length <= 1) return _wsBase(); _wsBaseIndex = (_wsBaseIndex + 1) % WS_BASES.length; console.log('[WS] Switching base to:', _wsBase()); return _wsBase(); } function _setSysStatus(online) { if (!sysStat) return; sysStat.textContent = online ? 'Ready' : 'Reconnecting'; sysStat.className = 'status-badge ' + (online ? 'badge-green' : 'badge-yellow'); } function _connectChat() { if (chatWS && chatWS.readyState <= WebSocket.OPEN) return; chatWS = new WebSocket(`${_wsBase()}/ws/chat`); chatWS.onopen = () => { _chatRetry = 0; console.log('[Chat WS] connected'); chatWS.send(JSON.stringify({ type: 'init', user_id: USER_ID })); }; chatWS.onerror = (e) => console.error('[Chat WS] error:', e); chatWS.onclose = (ev) => { console.log(`[Chat WS] closed (${ev.code})`); _advanceWsBase(); clearTimeout(_chatRetryTimer); _chatRetryTimer = setTimeout(() => { _chatRetry++; _connectChat(); }, _backoff(_chatRetry)); }; chatWS.onmessage = onChatMsg; } function _connectVoice() { if (voiceWS && voiceWS.readyState <= WebSocket.OPEN) return; voiceWS = new WebSocket(`${_wsBase()}/ws/voice`); voiceWS.binaryType = 'arraybuffer'; voiceWS.onopen = () => { _voiceRetry = 0; console.log('[Voice WS] connected, uid:', USER_ID); voiceWS.send(JSON.stringify({ type: 'init', user_id: USER_ID })); _setSysStatus(true); _flushVoicePendingPackets(); _flushBrainPendingAudio(); }; voiceWS.onerror = (e) => console.error('[Voice WS] error:', e); voiceWS.onclose = (ev) => { console.log(`[Voice WS] closed (${ev.code})`); _setSysStatus(false); if (isListening || isSpeaking || isProcessing) { _teardownMicHardware(); _resetVoiceState(); setState('ready'); setMic('off'); micBtn.disabled = false; } clearTimeout(_voiceRetryTimer); _advanceWsBase(); _voiceRetryTimer = setTimeout(() => { _voiceRetry++; _connectVoice(); }, _backoff(_voiceRetry)); if (brainMode && brainVoiceActive) { _queueBrainReconnect(); } }; voiceWS.onmessage = onVoiceMsg; } function initWebSockets() { _connectChat(); _connectVoice(); } // ── Chat WS handler ─────────────────────────────────────────────────────────── function onChatMsg(ev) { let msg; try { msg = JSON.parse(ev.data); } catch { return; } console.log('[Chat WS]', msg.type); switch (msg.type) { case 'llm_token': if (!msg.token) break; if (tLlm === 0) { tLlm = Date.now(); if (tSend > 0) mLlm.textContent = tLlm - tSend + ' ms'; } _removeThinking(); if (!aiEl) { aiEl = document.createElement('div'); aiEl.className = 'message ai'; chatBox.appendChild(aiEl); } aiTxt += msg.token; _renderAiText(); break; case 'chat': if (!msg.text) break; _removeThinking(); if (!aiEl) { aiEl = document.createElement('div'); aiEl.className = 'message ai'; chatBox.appendChild(aiEl); } aiTxt = msg.text; _renderAiText(); break; case 'end': _removeThinking(); _renderAiText(true); aiEl = null; aiTxt = ''; _setCaption(''); if (tSend > 0) mTotal.textContent = Date.now() - tSend + ' ms'; tSend = tStt = tLlm = tTts = 0; isProcessing = false; setState('ready'); break; case 'error': _removeThinking(); appendMsg('⚠️ ' + msg.text, 'system'); aiEl = null; aiTxt = ''; _setCaption(''); isProcessing = false; setState('ready'); break; } } // ── Voice WS handler ────────────────────────────────────────────────────────── function onVoiceMsg(ev) { if (ev.data instanceof ArrayBuffer) { if (Date.now() < _dropAudioUntil) return; // drop late packets after cancel _ttsPlaying = true; // Framed audio: 4-byte big-endian seq id + raw audio bytes. // We buffer/reorder by seq so playback always matches text order. const u8 = new Uint8Array(ev.data); if (u8.length <= 4) return; const seq = (u8[0] << 24) | (u8[1] << 16) | (u8[2] << 8) | (u8[3] << 0); const payload = ev.data.slice(4); _pendingAudio.set(seq >>> 0, payload); const gen = _playbackGen; while (_pendingAudio.has(_expectedSeq)) { const buf = _pendingAudio.get(_expectedSeq); _pendingAudio.delete(_expectedSeq); const playBuf = buf; _audioChain = _audioChain .catch(() => {}) .then(() => { if (gen !== _playbackGen) return; if (_cancelled) return; return enqueueAudio(playBuf); }); _expectedSeq++; } return; } let msg; try { msg = JSON.parse(ev.data); } catch { return; } console.log('[Voice WS]', msg.type); switch (msg.type) { case 'init_ack': console.log('[Voice WS] ack uid:', msg.user_id); break; case 'stt': // New turn: reset audio ordering/buffers. _expectedSeq = 0; _pendingAudio.clear(); tStt = Date.now(); if (tSend > 0) mStt.textContent = tStt - tSend + ' ms'; _removeThinking(); if (!brainMode) appendMsg('🎤 ' + msg.text, 'user'); aiEl = null; aiTxt = ''; _setCaption(''); _brainSetSttBubble(msg.text); if (brainMode) _brainSetTtsBubble(brainLastResponse || '', false); _brainModeSetSearch(true); appendThinking(); setState('processing'); break; case 'llm_token': if (!msg.token) break; if (tLlm === 0) { tLlm = Date.now(); if (tStt > 0) mLlm.textContent = tLlm - tStt + ' ms'; } _removeThinking(); _setCaption(aiTxt + msg.token); brainLastResponse = aiTxt + msg.token; _brainSetTtsBubble(brainLastResponse); _brainModeSetSearch(true); if (!brainMode) { if (!aiEl) { aiEl = document.createElement('div'); aiEl.className = 'message ai'; chatBox.appendChild(aiEl); } aiTxt += msg.token; _renderAiText(); } else { aiTxt += msg.token; } break; case 'end': _renderAiText(true); _removeThinking(); if (brainMode) brainLastResponse = aiTxt || brainLastResponse; aiEl = null; aiTxt = ''; _setCaption(''); _expectedSeq = 0; _pendingAudio.clear(); if (tSend > 0) mTotal.textContent = Date.now() - tSend + ' ms'; tSend = tStt = tLlm = tTts = 0; isProcessing = false; // BUG-FIX-C: schedule _done() to fire after TTS audio drains. // If no TTS audio arrived (_schedEnd == 0), _done fires in ~300 ms. _scheduleEnd(); break; case 'error': _removeThinking(); appendMsg('⚠️ ' + msg.text, 'system'); aiEl = null; aiTxt = ''; _setCaption(''); _expectedSeq = 0; _pendingAudio.clear(); _brainSetTtsBubble('', false); _brainModeSetSearch(false); isProcessing = false; // BUG-FIX-C: unconditionally unlock on error _done(); break; case 'pong': break; default: console.log('[Voice WS] unknown:', msg.type); } } // ─── Thinking bubble ────────────────────────────────────────────────────────── function appendThinking() { if (brainMode) return; if (thinkingEl) return; thinkingEl = document.createElement('div'); thinkingEl.className = 'message ai thinking'; thinkingEl.innerHTML = ''; chatBox.appendChild(thinkingEl); chatBox.scrollTop = chatBox.scrollHeight; } function _removeThinking() { if (thinkingEl) { thinkingEl.remove(); thinkingEl = null; } } function _renderAiText(force = false) { if (!aiEl || !aiTxt) { if (force && aiEl) aiEl.innerHTML = ''; return; } aiEl.innerHTML = typeof marked !== 'undefined' ? marked.parse(aiTxt) : aiTxt.replace(/\n/g, '
'); chatBox.scrollTop = chatBox.scrollHeight; } function _setCaption(text) { _captionText = text || ''; if (_captionRaf) return; _captionRaf = requestAnimationFrame(() => { _captionRaf = 0; if (!voiceCaption) return; voiceCaption.textContent = brainMode ? '' : _captionText; }); } // ═══════════════════════════════════════════════════════════════════════════════ // AUDIO PLAYBACK // ═══════════════════════════════════════════════════════════════════════════════ function _ctxEnsure() { if (!_ctx || _ctx.state === 'closed') { _ctx = new (window.AudioContext || window.webkitAudioContext)(); _schedEnd = 0; } if (_ctx.state === 'suspended') _ctx.resume(); return _ctx; } function _stopAllSources() { const sources = _activeSources.splice(0); for (const src of sources) { try { src.onended = null; src.stop(0); } catch {} try { src.disconnect(); } catch {} } } async function enqueueAudio(buf) { if (_cancelled) return; _inFlight++; _vizQ(); const ctx = _ctxEnsure(); let decoded; try { decoded = await ctx.decodeAudioData(buf.slice(0)); } catch (e) { console.warn('[Audio] decode error:', e.message); _inFlight = Math.max(0, _inFlight - 1); _vizQ(); return; } if (!decoded || decoded.duration < 0.001 || _cancelled) { _inFlight = Math.max(0, _inFlight - 1); _vizQ(); return; } if (tTts === 0 && tLlm > 0) { tTts = Date.now(); mTts.textContent = tTts - tLlm + ' ms'; } const src = ctx.createBufferSource(); src.buffer = decoded; src.connect(ctx.destination); const now = ctx.currentTime; // Tiny gap between chunks improves perceived naturalness (less "machine-gun"). const GAP_S = 0.001; const start = Math.max(now + 0.01, _schedEnd + GAP_S); if (_cancelled) { _inFlight = Math.max(0, _inFlight - 1); _vizQ(); return; } _activeSources.push(src); src.start(start); _schedEnd = start + decoded.duration; src.onended = () => { _inFlight = Math.max(0, _inFlight - 1); _vizQ(); const idx = _activeSources.indexOf(src); if (idx >= 0) _activeSources.splice(idx, 1); }; setState('speaking'); } function _vizQ() { if (chunksCount) chunksCount.textContent = _inFlight; queueBars.forEach((b, i) => { b.classList.toggle('active', i < _inFlight); b.style.height = (i < _inFlight ? 12 + Math.random() * 30 : 4) + 'px'; }); } function _scheduleEnd() { clearTimeout(_endTimer); const ctx = _ctx; if (!ctx || ctx.state === 'closed') { // No audio context — unlock immediately setTimeout(_done, 300); return; } const remainingMs = Math.max(0, (_schedEnd - ctx.currentTime) * 1000); // BUG-FIX-C: always call _done regardless of _cancelled — we must // release the lock. Use a minimal delay when no audio was scheduled. _endTimer = setTimeout(_done, remainingMs + 300); } /** * _done — returns system to fully idle state. * ALWAYS unlocks the mic. Never auto-restarts recording. */ function _done() { _ttsPlaying = false; isProcessing = false; isRecordingLocked = false; _brainModeSetSearch(false); _brainSetTtsBubble(brainLastResponse || '', false); _inFlight = 0; _vizQ(); micBtn.disabled = false; setState('ready'); setMic('off'); if (brainMode && brainVoiceActive) { clearTimeout(brainAutoRestartTimer); brainAutoRestartTimer = setTimeout(() => { if ( !brainMode || !brainVoiceActive || isListening || isProcessing || isRecordingLocked ) { return; } _brainResumeListening(); }, 180); } console.log('[Voice] Idle — ready for next manual press'); } function stopAllAudio() { _cancelled = true; _ttsPlaying = false; _dropAudioUntil = Date.now() + 700; _playbackGen++; _audioChain = Promise.resolve(); _expectedSeq = 0; _pendingAudio.clear(); _stopAllSources(); clearTimeout(_endTimer); _endTimer = null; _schedEnd = 0; _inFlight = 0; _vizQ(); if (_ctx && _ctx.state !== 'closed') { // Close releases scheduled audio immediately; a new ctx is created on demand. _ctx.close().catch(() => {}); } _ctx = null; if (voiceWS && voiceWS.readyState === WebSocket.OPEN) { voiceWS.send(JSON.stringify({ type: 'cancel' })); } } function _bargeInNow(reason = 'speech') { const now = Date.now(); if (now - _bargeInFiredAt < 500) return; // debounce _bargeInFiredAt = now; console.log('[BargeIn] interrupt:', reason); stopAllAudio(); // Unlock immediately so the user can speak right away. isProcessing = false; isRecordingLocked = false; _cancelled = false; aiEl = null; aiTxt = ''; _setCaption(''); _removeThinking(); micBtn.disabled = false; // If mic is already warm (brain continuous mode), just re-arm VAD. if (brainMode && brainVoiceActive) { _brainModeSetSearch(false); // If analyser/mic are already active, VAD tick will immediately // transition into recording on the next speech sample. _brainResumeListening(); return; } // Otherwise, start listening fresh (user initiated by speaking). startListening().catch(() => {}); } // ═══════════════════════════════════════════════════════════════════════════════ // TEXT CHAT // ═══════════════════════════════════════════════════════════════════════════════ sendBtn.onclick = sendText; textInput.addEventListener('keydown', (e) => { if (e.key === 'Enter' && !e.shiftKey) sendText(); }); function sendText() { const text = textInput.value.trim(); if (!text || isProcessing) return; appendMsg(text, 'user'); textInput.value = ''; _cancelled = false; isProcessing = true; tSend = Date.now(); tLlm = tTts = 0; aiEl = null; aiTxt = ''; setState('processing'); appendThinking(); _sendViaChat(text); } function _sendViaChat(text) { const payload = JSON.stringify({ user_id: USER_ID, user_query: text }); if (chatWS && chatWS.readyState === WebSocket.OPEN) { chatWS.send(payload); } else { const _retry = () => { if (chatWS && chatWS.readyState === WebSocket.OPEN) chatWS.send(payload); else setTimeout(_retry, 300); }; _retry(); } } // ═══════════════════════════════════════════════════════════════════════════════ // MICROPHONE / VAD // ═══════════════════════════════════════════════════════════════════════════════ micBtn.onclick = async () => { if (isRecordingLocked || isProcessing) { console.log('[Mic] Ignored — system busy'); return; } if (isListening) { if (brainMode && brainVoiceActive) { console.log('[Brain] Continuous mode active — use Stop to exit'); return; } _teardownMicHardware(); _resetVoiceState(); setState('ready'); setMic('off'); } else { await startListening(); } }; stopBtn.onclick = () => { brainVoiceActive = false; clearTimeout(brainAutoRestartTimer); clearTimeout(brainRestartTimer); brainPendingAudio = null; stopAllAudio(); if (isListening || isSpeaking) _teardownMicHardware(); _resetVoiceState(); setState('ready'); setMic('off'); micBtn.disabled = false; }; // ── startListening ──────────────────────────────────────────────────────────── async function startListening() { if (isListening || isProcessing || isRecordingLocked) return; _ctxEnsure(); try { micStream = await navigator.mediaDevices.getUserMedia({ audio: { echoCancellation: true, noiseSuppression: true, autoGainControl: true, channelCount: 1, sampleRate: 16000, }, }); } catch (err) { console.error('[Mic] getUserMedia failed:', err); appendMsg('⚠️ মাইক্রোফোন অ্যাক্সেস দেওয়া হয়নি।', 'system'); return; } analyserCtx = new AudioContext({ sampleRate: 16000 }); const src = analyserCtx.createMediaStreamSource(micStream); analyser = analyserCtx.createAnalyser(); analyser.fftSize = 512; analyser.smoothingTimeConstant = 0.6; src.connect(analyser); isListening = true; audioChunks = []; setMic('listening'); setState('listening'); voiceViz.classList.add('active'); vadInt = setInterval(vadTick, VAD_MS); vizInt = setInterval(vizTick, 60); console.log('[Mic] Listening started'); } // ── _teardownMicHardware ────────────────────────────────────────────────────── // Stops hardware: intervals, recorder (silenced), mic tracks, AudioContext. // IMPORTANT: does NOT clear audioChunks — caller's onstop captures them first. function _teardownMicHardware() { clearInterval(vadInt); clearInterval(vizInt); clearTimeout(silenceTimer); vadInt = vizInt = silenceTimer = null; // Silence callbacks so no onstop logic fires after forced teardown if (mediaRecorder && mediaRecorder.state !== 'inactive') { mediaRecorder.ondataavailable = () => {}; mediaRecorder.onstop = () => {}; mediaRecorder.stop(); } mediaRecorder = null; micStream?.getTracks().forEach((t) => t.stop()); micStream = null; if (analyserCtx && analyserCtx.state !== 'closed') { analyserCtx.close().catch(() => {}); } analyserCtx = null; analyser = null; voiceViz.classList.remove('active'); vizBars.forEach((b) => (b.style.height = '4px')); console.log('[Mic] Hardware torn down'); } // ── _resetVoiceState ────────────────────────────────────────────────────────── function _resetVoiceState() { isListening = false; isSpeaking = false; isProcessing = false; isRecordingLocked = false; _ttsPlaying = false; _speechStartMs = 0; audioChunks = []; } // ── VAD tick ────────────────────────────────────────────────────────────────── function vadTick() { if (!analyser) return; // In brain mode we allow "barge-in": user speech interrupts TTS playback. // In non-brain mode we still keep the hard lock to prevent overlapping turns. if (!brainMode && (isProcessing || isRecordingLocked)) return; const buf = new Float32Array(analyser.frequencyBinCount); analyser.getFloatTimeDomainData(buf); let sum = 0; for (let i = 0; i < buf.length; i++) sum += buf[i] * buf[i]; const db = 20 * Math.log10(Math.sqrt(sum / buf.length) || 1e-10); const speech = db > SILENCE_DB; if (speech) { // ── Barge-in detector ──────────────────────────────────────────────── if ( brainMode && brainVoiceActive && (_ttsPlaying || isProcessing || isRecordingLocked) ) { // Stricter threshold reduces false triggers from echo + noise. const loud = db > SILENCE_DB + 4; if (loud) { if (!_bargeInArmedAt) _bargeInArmedAt = Date.now(); if (Date.now() - _bargeInArmedAt >= 90) { _bargeInArmedAt = 0; _bargeInNow(_ttsPlaying ? 'vad_tts' : 'vad_thinking'); // After barge-in unlock, continue into the normal recording start // path in this same tick. } else { // Don't start recording until we confirm it’s real barge-in speech. return; } } else { _bargeInArmedAt = 0; return; } } clearTimeout(silenceTimer); silenceTimer = null; if (!isSpeaking) { if (mediaRecorder && mediaRecorder.state !== 'inactive') return; // duplicate guard isSpeaking = true; _speechStartMs = Date.now(); _cancelled = false; _ctxEnsure(); startRecorder(); setMic('recording'); setState('recording'); console.log('[VAD] Speech detected — recording'); } } else { _bargeInArmedAt = 0; if (isSpeaking && !silenceTimer) { silenceTimer = setTimeout(_onSilenceTimeout, SILENCE_MS); } } } // ── _onSilenceTimeout ───────────────────────────────────────────────────────── function _onSilenceTimeout() { silenceTimer = null; const speechDuration = Date.now() - _speechStartMs; if (speechDuration < MIN_SPEECH_MS) { console.log( `[VAD] Too short (${speechDuration} ms) — discard & resume listening`, ); isSpeaking = false; discardRecorder(); // BUG-FIX-D: restart intervals so listening continues if (isListening && !vadInt) { vadInt = setInterval(vadTick, VAD_MS); vizInt = setInterval(vizTick, 60); } setMic('listening'); setState('listening'); return; } console.log( `[VAD] Silence after ${speechDuration} ms — finalising utterance`, ); const keepBrainMicWarm = brainMode && brainVoiceActive; // In brain mode we keep VAD running so we can detect barge-in while the AI is // thinking/speaking. Outside brain mode we stop VAD during processing. if (!keepBrainMicWarm) { clearInterval(vadInt); clearInterval(vizInt); vadInt = vizInt = null; } // Lock state BEFORE stopRecorder (onstop may fire almost immediately) isSpeaking = false; isListening = keepBrainMicWarm; // mic stays "hot" in brain mode isProcessing = true; isRecordingLocked = true; _cancelled = false; tSend = Date.now(); tLlm = 0; tTts = 0; micBtn.disabled = !keepBrainMicWarm; setMic(keepBrainMicWarm ? 'listening' : 'processing'); setState(keepBrainMicWarm ? 'listening' : 'processing'); stopRecorder(); // → triggers onstop asynchronously } // ── Viz tick ────────────────────────────────────────────────────────────────── function vizTick() { if (!analyser) return; const data = new Uint8Array(analyser.frequencyBinCount); analyser.getByteFrequencyData(data); const step = Math.floor(data.length / vizBars.length); vizBars.forEach((b, i) => { const v = data[i * step] / 255; b.style.height = Math.max(4, v * (isSpeaking ? 48 : 18)) + 'px'; }); } // ── MediaRecorder ───────────────────────────────────────────────────────────── function startRecorder() { if (!micStream) return; if (mediaRecorder && mediaRecorder.state !== 'inactive') { console.warn('[Recorder] Duplicate startRecorder() — ignored'); return; } audioChunks = []; _recorderMime = MediaRecorder.isTypeSupported('audio/webm;codecs=opus') ? 'audio/webm;codecs=opus' : 'audio/webm'; try { mediaRecorder = new MediaRecorder(micStream, { mimeType: _recorderMime }); } catch (err) { console.error('[Recorder] Creation failed:', err); isSpeaking = false; setMic('listening'); setState('listening'); return; } mediaRecorder.ondataavailable = (e) => { if (e.data && e.data.size > 0) audioChunks.push(e.data); }; /** * onstop handler * * BUG-FIX-A: Capture audioChunks into a LOCAL variable as the very * first action, before any teardown or async work. Then clear the * module-level audioChunks. _teardownMicHardware() does NOT touch * audioChunks, so the local copy is safe. * * Old (broken) order: * 1. _fullMicTeardown() ← set audioChunks = [] HERE * 2. new Blob(audioChunks) ← always empty! * * New (correct) order: * 1. const captured = audioChunks.slice() ← copy before anything * 2. audioChunks = [] ← clear module ref * 3. _teardownMicHardware() ← safe, chunks are local * 4. new Blob(captured) ← has actual audio data */ mediaRecorder.onstop = async () => { // ── 1. Capture chunks locally (MUST be first) ────────────────────────── const captured = audioChunks.slice(); audioChunks = []; const keepBrainMicWarm = brainMode && brainVoiceActive; // ── 2. Tear down mic hardware unless brain mode wants a live loop ───── if (keepBrainMicWarm) { mediaRecorder = null; setMic('off'); } else { _teardownMicHardware(); setMic('off'); } console.log( `[Recorder] onstop: ${captured.length} chunk(s), ${captured .reduce((s, c) => s + c.size, 0) .toLocaleString()} bytes total`, ); // ── 3. Validate ──────────────────────────────────────────────────────── if (!captured.length) { console.warn('[Recorder] No audio chunks — possible threshold issue'); appendMsg( '⚠️ কোনো অডিও রেকর্ড হয়নি। Silence threshold কমিয়ে দেখুন।', 'system', ); _resetVoiceState(); setState(keepBrainMicWarm ? 'listening' : 'ready'); micBtn.disabled = false; if (keepBrainMicWarm) _brainResumeListening(); return; } // ── 4. Build ArrayBuffer ─────────────────────────────────────────────── const blob = new Blob(captured, { type: _recorderMime }); let buf; try { buf = await blob.arrayBuffer(); } catch (err) { console.error('[Recorder] arrayBuffer() error:', err); _resetVoiceState(); setState(keepBrainMicWarm ? 'listening' : 'ready'); setMic('off'); micBtn.disabled = false; if (keepBrainMicWarm) _brainResumeListening(); return; } console.log(`[VAD] → voice WS: ${buf.byteLength.toLocaleString()} bytes`); // ── 5. Send to backend ───────────────────────────────────────────────── if (voiceWS && voiceWS.readyState === WebSocket.OPEN) { appendThinking(); voiceWS.send(buf); // isProcessing + isRecordingLocked stay true until _done() fires } else { console.warn('[VAD] Voice WS not open — queueing utterance'); voicePendingPackets.push(buf); _connectVoice(); _resetVoiceState(); setState(keepBrainMicWarm ? 'listening' : 'ready'); setMic('off'); micBtn.disabled = false; if (keepBrainMicWarm) _brainResumeListening(); } }; mediaRecorder.start(); console.log('[Recorder] Started, mime:', _recorderMime); } function stopRecorder() { if (mediaRecorder && mediaRecorder.state !== 'inactive') { mediaRecorder.stop(); // triggers onstop asynchronously } } function discardRecorder() { if (!mediaRecorder || mediaRecorder.state === 'inactive') { audioChunks = []; return; } mediaRecorder.ondataavailable = () => {}; mediaRecorder.onstop = () => { audioChunks = []; }; mediaRecorder.stop(); mediaRecorder = null; audioChunks = []; } // ═══════════════════════════════════════════════════════════════════════════════ // UI HELPERS // ═══════════════════════════════════════════════════════════════════════════════ const STATE_MAP = { ready: { label: 'প্রস্তুত', cls: '' }, listening: { label: 'শুনছি…', cls: 'listening' }, recording: { label: 'রেকর্ড হচ্ছে…', cls: 'recording' }, processing: { label: 'প্রক্রিয়া করছে…', cls: 'processing' }, speaking: { label: 'AI বলছে…', cls: 'speaking' }, }; function setState(s) { const cfg = STATE_MAP[s] || STATE_MAP.ready; stateLabel.textContent = cfg.label; stateDot.className = 'state-dot' + (cfg.cls ? ' ' + cfg.cls : ''); if (brainStage) brainStage.dataset.state = s; } const MIC_MAP = { off: { cls: 'mic-off', label: 'Press to Start talking', icon: '🎤' }, listening: { cls: 'mic-listening', label: 'Listening...', icon: '🟢', }, recording: { cls: 'mic-recording', label: 'Listening..', icon: '🔴' }, processing: { cls: 'mic-processing', label: 'Please wait !!!', icon: '⏳' }, }; function setMic(s) { const cfg = MIC_MAP[s] || MIC_MAP.off; micBtn.className = 'mic-btn ' + cfg.cls; micLabel.textContent = cfg.label; micBtn.querySelector('.mic-icon').textContent = cfg.icon; } function appendMsg(text, who) { if (brainMode && who !== 'system') return null; const d = document.createElement('div'); d.className = 'message ' + who; if (who === 'ai' && typeof marked !== 'undefined') { d.innerHTML = marked.parse(text || ''); } else { d.textContent = text; } chatBox.appendChild(d); chatBox.scrollTop = chatBox.scrollHeight; return d; } clearBtn.onclick = () => { chatBox.innerHTML = ''; thinkingEl = null; if (!brainMode) appendMsg('চ্যাট পরিষ্কার করা হয়েছে।', 'system'); }; brainBtn.onclick = () => { setBrainMode(!brainMode); }; sidebarToggle.onclick = () => { sidebarEl.classList.toggle('collapsed'); sidebarToggle.textContent = sidebarEl.classList.contains('collapsed') ? '›' : '‹'; }; mobileMenuBtn.onclick = () => sidebarEl.classList.toggle('mobile-open'); function setBrainMode(on) { brainMode = !!on; document.body.classList.toggle('brain-mode', brainMode); brainBtn.classList.toggle('active', brainMode); brainBtn.setAttribute('aria-pressed', String(brainMode)); if (brainStage) brainStage.setAttribute('aria-hidden', String(!brainMode)); if (voiceCaption) voiceCaption.textContent = ''; if (brainMode) { brainBubbleSttText.textContent = 'Listening…'; brainBubbleTtsText.textContent = brainLastResponse || 'Waiting…'; brainVoiceActive = true; sidebarEl.classList.add('collapsed'); sidebarToggle.textContent = '›'; chatBox.scrollTop = chatBox.scrollHeight; textInput.blur(); _brainModeSetSearch( isProcessing || isListening || isSpeaking || _ttsPlaying, ); if (!isListening && !isProcessing && !isRecordingLocked) { setTimeout(() => { if ( brainMode && brainVoiceActive && !isListening && !isProcessing && !isRecordingLocked ) { _brainResumeListening(); } }, 180); } } else { brainVoiceActive = false; clearTimeout(brainAutoRestartTimer); clearTimeout(brainRestartTimer); brainPendingAudio = null; sidebarEl.classList.remove('collapsed'); sidebarToggle.textContent = '‹'; _brainModeSetSearch(false); _brainSetSttBubble(''); _brainSetTtsBubble('', false); } } function _brainModeSetSearch(active) { if (!brainStage) return; brainStage.classList.toggle('searching', !!active); } function _brainSetSttBubble(text) { if (!brainBubbleStt || !brainBubbleSttText) return; const value = (text || '').trim(); brainBubbleSttText.textContent = value || 'Listening…'; brainBubbleStt.classList.toggle('active', !!value); } function _brainSetTtsBubble(text, active = true) { if (!brainBubbleTts || !brainBubbleTtsText) return; const value = (text || '').trim(); brainBubbleTtsText.textContent = value || 'Waiting…'; brainBubbleTts.classList.toggle('active', !!value || !!active); brainBubbleTts.classList.toggle('speaking', !!active); } function _brainResumeListening() { if ( !brainMode || !brainVoiceActive || isListening || isProcessing || isRecordingLocked ) { return; } if (micStream && analyserCtx && analyser) { isListening = true; setMic('listening'); setState('listening'); voiceViz.classList.add('active'); vadInt = setInterval(vadTick, VAD_MS); vizInt = setInterval(vizTick, 60); _brainModeSetSearch(false); console.log('[Brain] Mic re-armed'); return; } startListening().catch((err) => { console.error('[Brain] resume failed:', err); }); } function _queueBrainReconnect() { if (!brainMode || !brainVoiceActive) return; clearTimeout(brainRestartTimer); brainRestartTimer = setTimeout(() => { if (!brainMode || !brainVoiceActive) return; _flushBrainPendingAudio(); }, 700); } function _flushVoicePendingPackets() { if ( !voiceWS || voiceWS.readyState !== WebSocket.OPEN || !voicePendingPackets.length ) { return; } const packets = voicePendingPackets.splice(0); for (const packet of packets) { try { voiceWS.send(packet); appendThinking(); console.log('[Voice] queued packet flushed'); } catch (err) { console.error('[Voice] flush failed:', err); voicePendingPackets.unshift(packet); _connectVoice(); break; } } } function _flushBrainPendingAudio() { if (!brainPendingAudio) return; if (!voiceWS || voiceWS.readyState !== WebSocket.OPEN) { _queueBrainReconnect(); return; } const buf = brainPendingAudio; brainPendingAudio = null; try { appendThinking(); voiceWS.send(buf); console.log('[Brain] queued utterance flushed'); } catch (err) { console.error('[Brain] flush failed:', err); brainPendingAudio = buf; _queueBrainReconnect(); } } sThreshold.value = SILENCE_DB; sThresholdVal.textContent = SILENCE_DB + ' dB'; sThreshold.oninput = () => { SILENCE_DB = +sThreshold.value; sThresholdVal.textContent = SILENCE_DB + ' dB'; }; sTimeout.value = SILENCE_MS; sTimeoutVal.textContent = SILENCE_MS + ' ms'; sTimeout.oninput = () => { SILENCE_MS = +sTimeout.value; sTimeoutVal.textContent = SILENCE_MS + ' ms'; }; sVoice.onchange = () => appendMsg('🔊 TTS voice: ' + sVoice.value, 'system'); setInterval(() => { if (_inFlight > 0) _vizQ(); }, 140); // ═══════════════════════════════════════════════════════════════════════════════ // BOOT // ═══════════════════════════════════════════════════════════════════════════════ boot();