Voice-AI-Agent / frontend /script.js
rakib72642's picture
fixed stt and added whisper and elevenlabs stt + updated
bed58cc
raw
history blame
34.2 kB
'use strict';
// ─── DOM refs ─────────────────────────────────────────────────────────────────
const chatBox = document.getElementById('chat-box');
const sendBtn = document.getElementById('send-btn');
const textInput = document.getElementById('text-input');
const micBtn = document.getElementById('mic-btn');
const micLabel = micBtn.querySelector('.mic-label');
const stopBtn = document.getElementById('stop-btn');
const stateLabel = document.getElementById('state-label');
const stateDot = document.getElementById('state-dot');
const clearBtn = document.getElementById('clear-btn');
const voiceViz = document.getElementById('voice-viz');
const vizBars = Array.from(voiceViz.querySelectorAll('.viz-bar'));
const queueBars = Array.from(document.querySelectorAll('.queue-bar'));
const chunksCount = document.getElementById('chunks-count');
const initOverlay = document.getElementById('init-overlay');
const initBar = document.getElementById('init-bar');
const initStatus = document.getElementById('init-status');
const sidebarEl = document.getElementById('sidebar');
const sidebarToggle = document.getElementById('sidebar-toggle');
const mobileMenuBtn = document.getElementById('mobile-menu-btn');
const appEl = document.getElementById('app');
const sThreshold = document.getElementById('s-threshold');
const sThresholdVal = document.getElementById('s-threshold-val');
const sTimeout = document.getElementById('s-timeout');
const sTimeoutVal = document.getElementById('s-timeout-val');
const sVoice = document.getElementById('s-voice');
const mStt = document.getElementById('m-stt');
const mLlm = document.getElementById('m-llm');
const mTts = document.getElementById('m-tts');
const mTotal = document.getElementById('m-total');
const sysStat = document.getElementById('sys-status');
// ─── Ephemeral user identity ──────────────────────────────────────────────────
// New page load = new user. Reloading the app generates a fresh ID.
const USER_ID = (() => {
if (window.crypto && typeof window.crypto.randomUUID === 'function') {
return 'u_' + window.crypto.randomUUID().replace(/-/g, '').slice(0, 16);
}
return (
'u_' +
Date.now().toString(36) +
'_' +
Math.random().toString(36).slice(2, 10)
);
})();
// ─── WebSocket base URL ────────────────────────────────────────────────────────
const WS_BASE = 'http://127.0.0.1:8679';
console.log('[Boot] WS base:', WS_BASE);
// ─── WS handles ───────────────────────────────────────────────────────────────
let chatWS = null;
let voiceWS = null;
let _chatRetry = 0;
let _voiceRetry = 0;
let _chatRetryTimer = null;
let _voiceRetryTimer = null;
// ─── VAD / recording settings ─────────────────────────────────────────────────
let SILENCE_MS = 1200; // BUG-FIX-B: was 450 ms
let SILENCE_DB = -38;
const VAD_MS = 80;
const MIN_SPEECH_MS = 400; // discard noise bursts shorter than this
// ─── Playback state ───────────────────────────────────────────────────────────
let _ctx = null;
let _schedEnd = 0;
let _endTimer = null;
let _cancelled = false;
let _inFlight = 0;
let _ttsPlaying = false;
// ─── Recording state ──────────────────────────────────────────────────────────
let micStream = null;
let analyserCtx = null;
let analyser = null;
let mediaRecorder = null;
let audioChunks = [];
let isListening = false;
let isSpeaking = false;
let isProcessing = false;
let isRecordingLocked = false;
let silenceTimer = null;
let vadInt = null;
let vizInt = null;
let _speechStartMs = 0;
let _recorderMime = 'audio/webm';
// ─── AI streaming bubble state ────────────────────────────────────────────────
let aiEl = null;
let aiTxt = '';
let thinkingEl = null;
// ─── Latency timestamps ───────────────────────────────────────────────────────
let tSend = 0,
tStt = 0,
tLlm = 0,
tTts = 0;
// ═══════════════════════════════════════════════════════════════════════════════
// INIT OVERLAY
// ═══════════════════════════════════════════════════════════════════════════════
const STAGES = [
{ id: 'stage-1', text: 'AI Engine শুরু হচ্ছে…', at: 400, pct: 20 },
{
id: 'stage-2',
text: 'Speech Recognition মডেল লোড হচ্ছে…',
at: 1100,
pct: 50,
},
{ id: 'stage-3', text: 'GPU Warmup চলছে…', at: 1900, pct: 75 },
{ id: 'stage-4', text: 'Voice Pipeline প্রস্তুত হচ্ছে…', at: 2700, pct: 90 },
];
let _wsGate = false;
let _stageGate = false;
let _initClosed = false;
function _tryClose() {
if (_initClosed || !_wsGate || !_stageGate) return;
_initClosed = true;
initBar.style.width = '100%';
initStatus.textContent = 'সিস্টেম প্রস্তুত ✓';
setTimeout(() => {
initOverlay.classList.add('hidden');
appEl.style.opacity = '1';
appEl.style.pointerEvents = 'auto';
setState('ready');
}, 450);
}
function boot() {
initWebSockets();
STAGES.forEach(({ id, text, at, pct }, i) => {
setTimeout(() => {
if (i > 0) _stageDone(STAGES[i - 1].id);
const el = document.getElementById(id);
if (el) el.classList.add('active');
initStatus.textContent = text;
initBar.style.width = pct + '%';
}, at);
});
setTimeout(
() => {
_stageDone(STAGES[STAGES.length - 1].id);
_stageGate = true;
_tryClose();
},
STAGES[STAGES.length - 1].at + 650,
);
setTimeout(() => {
if (!_initClosed) {
_wsGate = _stageGate = true;
_tryClose();
}
}, 8000);
}
function _stageDone(id) {
const el = document.getElementById(id);
if (el) {
el.classList.remove('active');
el.classList.add('done');
}
}
// ═══════════════════════════════════════════════════════════════════════════════
// WEBSOCKETS
// ═══════════════════════════════════════════════════════════════════════════════
function _backoff(r) {
return Math.min(1000 * Math.pow(2, r), 16000);
}
function _setSysStatus(online) {
if (!sysStat) return;
sysStat.textContent = online ? 'Ready' : 'Reconnecting';
sysStat.className =
'status-badge ' + (online ? 'badge-green' : 'badge-yellow');
}
function _connectChat() {
if (chatWS && chatWS.readyState <= WebSocket.OPEN) return;
chatWS = new WebSocket(`${WS_BASE}/ws/chat`);
chatWS.onopen = () => {
_chatRetry = 0;
console.log('[Chat WS] connected');
chatWS.send(JSON.stringify({ type: 'init', user_id: USER_ID }));
};
chatWS.onerror = (e) => console.error('[Chat WS] error:', e);
chatWS.onclose = (ev) => {
console.log(`[Chat WS] closed (${ev.code})`);
clearTimeout(_chatRetryTimer);
_chatRetryTimer = setTimeout(() => {
_chatRetry++;
_connectChat();
}, _backoff(_chatRetry));
};
chatWS.onmessage = onChatMsg;
}
function _connectVoice() {
if (voiceWS && voiceWS.readyState <= WebSocket.OPEN) return;
voiceWS = new WebSocket(`${WS_BASE}/ws/voice`);
voiceWS.binaryType = 'arraybuffer';
voiceWS.onopen = () => {
_voiceRetry = 0;
console.log('[Voice WS] connected, uid:', USER_ID);
voiceWS.send(JSON.stringify({ type: 'init', user_id: USER_ID }));
_setSysStatus(true);
_wsGate = true;
_tryClose();
};
voiceWS.onerror = (e) => console.error('[Voice WS] error:', e);
voiceWS.onclose = (ev) => {
console.log(`[Voice WS] closed (${ev.code})`);
_setSysStatus(false);
if (!_initClosed) {
_wsGate = true;
_tryClose();
}
if (isListening || isSpeaking || isProcessing) {
_teardownMicHardware();
_resetVoiceState();
setState('ready');
setMic('off');
micBtn.disabled = false;
}
clearTimeout(_voiceRetryTimer);
_voiceRetryTimer = setTimeout(() => {
_voiceRetry++;
_connectVoice();
}, _backoff(_voiceRetry));
};
voiceWS.onmessage = onVoiceMsg;
}
function initWebSockets() {
_connectChat();
_connectVoice();
}
// ── Chat WS handler ───────────────────────────────────────────────────────────
function onChatMsg(ev) {
let msg;
try {
msg = JSON.parse(ev.data);
} catch {
return;
}
console.log('[Chat WS]', msg.type);
switch (msg.type) {
case 'llm_token':
if (!msg.token) break;
if (tLlm === 0) {
tLlm = Date.now();
if (tSend > 0) mLlm.textContent = tLlm - tSend + ' ms';
}
_removeThinking();
if (!aiEl) {
aiEl = document.createElement('div');
aiEl.className = 'message ai';
chatBox.appendChild(aiEl);
}
aiTxt += msg.token;
aiEl.innerHTML =
typeof marked !== 'undefined'
? marked.parse(aiTxt)
: aiTxt.replace(/\n/g, '<br>');
chatBox.scrollTop = chatBox.scrollHeight;
break;
case 'chat':
if (!msg.text) break;
_removeThinking();
if (!aiEl) {
aiEl = document.createElement('div');
aiEl.className = 'message ai';
chatBox.appendChild(aiEl);
}
aiTxt = msg.text;
aiEl.innerHTML =
typeof marked !== 'undefined'
? marked.parse(aiTxt)
: aiTxt.replace(/\n/g, '<br>');
chatBox.scrollTop = chatBox.scrollHeight;
break;
case 'end':
_removeThinking();
if (aiEl && aiTxt) {
aiEl.innerHTML =
typeof marked !== 'undefined'
? marked.parse(aiTxt)
: aiTxt.replace(/\n/g, '<br>');
chatBox.scrollTop = chatBox.scrollHeight;
}
aiEl = null;
aiTxt = '';
if (tSend > 0) mTotal.textContent = Date.now() - tSend + ' ms';
tSend = tStt = tLlm = tTts = 0;
isProcessing = false;
setState('ready');
break;
case 'error':
_removeThinking();
appendMsg('⚠️ ' + msg.text, 'system');
aiEl = null;
aiTxt = '';
isProcessing = false;
setState('ready');
break;
}
}
// ── Voice WS handler ──────────────────────────────────────────────────────────
function onVoiceMsg(ev) {
if (ev.data instanceof ArrayBuffer) {
_ttsPlaying = true;
enqueueAudio(ev.data);
return;
}
let msg;
try {
msg = JSON.parse(ev.data);
} catch {
return;
}
console.log('[Voice WS]', msg.type);
switch (msg.type) {
case 'init_ack':
console.log('[Voice WS] ack uid:', msg.user_id);
break;
case 'stt':
tStt = Date.now();
if (tSend > 0) mStt.textContent = tStt - tSend + ' ms';
_removeThinking();
appendMsg('🎤 ' + msg.text, 'user');
aiEl = null;
aiTxt = '';
appendThinking();
setState('processing');
break;
case 'llm_token':
if (!msg.token) break;
if (tLlm === 0) {
tLlm = Date.now();
if (tStt > 0) mLlm.textContent = tLlm - tStt + ' ms';
}
_removeThinking();
if (!aiEl) {
aiEl = document.createElement('div');
aiEl.className = 'message ai';
chatBox.appendChild(aiEl);
}
aiTxt += msg.token;
aiEl.innerHTML =
typeof marked !== 'undefined'
? marked.parse(aiTxt)
: aiTxt.replace(/\n/g, '<br>');
chatBox.scrollTop = chatBox.scrollHeight;
break;
case 'end':
if (aiEl && aiTxt) {
aiEl.innerHTML =
typeof marked !== 'undefined'
? marked.parse(aiTxt)
: aiTxt.replace(/\n/g, '<br>');
chatBox.scrollTop = chatBox.scrollHeight;
}
_removeThinking();
aiEl = null;
aiTxt = '';
if (tSend > 0) mTotal.textContent = Date.now() - tSend + ' ms';
tSend = tStt = tLlm = tTts = 0;
isProcessing = false;
// BUG-FIX-C: schedule _done() to fire after TTS audio drains.
// If no TTS audio arrived (_schedEnd == 0), _done fires in ~300 ms.
_scheduleEnd();
break;
case 'error':
_removeThinking();
appendMsg('⚠️ ' + msg.text, 'system');
aiEl = null;
aiTxt = '';
isProcessing = false;
// BUG-FIX-C: unconditionally unlock on error
_done();
break;
case 'pong':
break;
default:
console.log('[Voice WS] unknown:', msg.type);
}
}
// ─── Thinking bubble ──────────────────────────────────────────────────────────
function appendThinking() {
if (thinkingEl) return;
thinkingEl = document.createElement('div');
thinkingEl.className = 'message ai thinking';
thinkingEl.innerHTML =
'<span class="dot"></span><span class="dot"></span><span class="dot"></span>';
chatBox.appendChild(thinkingEl);
chatBox.scrollTop = chatBox.scrollHeight;
}
function _removeThinking() {
if (thinkingEl) {
thinkingEl.remove();
thinkingEl = null;
}
}
// ═══════════════════════════════════════════════════════════════════════════════
// AUDIO PLAYBACK
// ═══════════════════════════════════════════════════════════════════════════════
function _ctxEnsure() {
if (!_ctx || _ctx.state === 'closed') {
_ctx = new (window.AudioContext || window.webkitAudioContext)();
_schedEnd = 0;
}
if (_ctx.state === 'suspended') _ctx.resume();
return _ctx;
}
async function enqueueAudio(buf) {
if (_cancelled) return;
_inFlight++;
_vizQ();
const ctx = _ctxEnsure();
let decoded;
try {
decoded = await ctx.decodeAudioData(buf.slice(0));
} catch (e) {
console.warn('[Audio] decode error:', e.message);
_inFlight = Math.max(0, _inFlight - 1);
_vizQ();
return;
}
if (!decoded || decoded.duration < 0.001 || _cancelled) {
_inFlight = Math.max(0, _inFlight - 1);
_vizQ();
return;
}
if (tTts === 0 && tLlm > 0) {
tTts = Date.now();
mTts.textContent = tTts - tLlm + ' ms';
}
const src = ctx.createBufferSource();
src.buffer = decoded;
src.connect(ctx.destination);
const now = ctx.currentTime;
const start = Math.max(now + 0.01, _schedEnd);
src.start(start);
_schedEnd = start + decoded.duration;
src.onended = () => {
_inFlight = Math.max(0, _inFlight - 1);
_vizQ();
};
setState('speaking');
}
function _vizQ() {
if (chunksCount) chunksCount.textContent = _inFlight;
queueBars.forEach((b, i) => {
b.classList.toggle('active', i < _inFlight);
b.style.height = (i < _inFlight ? 12 + Math.random() * 30 : 4) + 'px';
});
}
function _scheduleEnd() {
clearTimeout(_endTimer);
const ctx = _ctx;
if (!ctx || ctx.state === 'closed') {
// No audio context — unlock immediately
setTimeout(_done, 300);
return;
}
const remainingMs = Math.max(0, (_schedEnd - ctx.currentTime) * 1000);
// BUG-FIX-C: always call _done regardless of _cancelled — we must
// release the lock. Use a minimal delay when no audio was scheduled.
_endTimer = setTimeout(_done, remainingMs + 300);
}
/**
* _done — returns system to fully idle state.
* ALWAYS unlocks the mic. Never auto-restarts recording.
*/
function _done() {
_ttsPlaying = false;
isProcessing = false;
isRecordingLocked = false;
_inFlight = 0;
_vizQ();
micBtn.disabled = false;
setState('ready');
setMic('off');
console.log('[Voice] Idle — ready for next manual press');
}
function stopAllAudio() {
_cancelled = true;
_ttsPlaying = false;
clearTimeout(_endTimer);
_endTimer = null;
_schedEnd = 0;
_inFlight = 0;
_vizQ();
if (_ctx && _ctx.state === 'running') _ctx.suspend().catch(() => {});
if (voiceWS && voiceWS.readyState === WebSocket.OPEN) {
voiceWS.send(JSON.stringify({ type: 'cancel' }));
}
}
// ═══════════════════════════════════════════════════════════════════════════════
// TEXT CHAT
// ═══════════════════════════════════════════════════════════════════════════════
sendBtn.onclick = sendText;
textInput.addEventListener('keydown', (e) => {
if (e.key === 'Enter' && !e.shiftKey) sendText();
});
function sendText() {
const text = textInput.value.trim();
if (!text || isProcessing) return;
appendMsg(text, 'user');
textInput.value = '';
_cancelled = false;
isProcessing = true;
tSend = Date.now();
tLlm = tTts = 0;
aiEl = null;
aiTxt = '';
setState('processing');
appendThinking();
_sendViaChat(text);
}
function _sendViaChat(text) {
const payload = JSON.stringify({ user_id: USER_ID, user_query: text });
if (chatWS && chatWS.readyState === WebSocket.OPEN) {
chatWS.send(payload);
} else {
const _retry = () => {
if (chatWS && chatWS.readyState === WebSocket.OPEN) chatWS.send(payload);
else setTimeout(_retry, 300);
};
_retry();
}
}
// ═══════════════════════════════════════════════════════════════════════════════
// MICROPHONE / VAD
// ═══════════════════════════════════════════════════════════════════════════════
micBtn.onclick = async () => {
if (isRecordingLocked || isProcessing) {
console.log('[Mic] Ignored — system busy');
return;
}
if (isListening) {
_teardownMicHardware();
_resetVoiceState();
setState('ready');
setMic('off');
} else {
await startListening();
}
};
stopBtn.onclick = () => {
stopAllAudio();
if (isListening || isSpeaking) _teardownMicHardware();
_resetVoiceState();
setState('ready');
setMic('off');
micBtn.disabled = false;
};
// ── startListening ────────────────────────────────────────────────────────────
async function startListening() {
if (isListening || isProcessing || isRecordingLocked) return;
_ctxEnsure();
try {
micStream = await navigator.mediaDevices.getUserMedia({
audio: {
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true,
channelCount: 1,
sampleRate: 16000,
},
});
} catch (err) {
console.error('[Mic] getUserMedia failed:', err);
appendMsg('⚠️ মাইক্রোফোন অ্যাক্সেস দেওয়া হয়নি।', 'system');
return;
}
analyserCtx = new AudioContext({ sampleRate: 16000 });
const src = analyserCtx.createMediaStreamSource(micStream);
analyser = analyserCtx.createAnalyser();
analyser.fftSize = 512;
analyser.smoothingTimeConstant = 0.6;
src.connect(analyser);
isListening = true;
audioChunks = [];
setMic('listening');
setState('listening');
voiceViz.classList.add('active');
vadInt = setInterval(vadTick, VAD_MS);
vizInt = setInterval(vizTick, 60);
console.log('[Mic] Listening started');
}
// ── _teardownMicHardware ──────────────────────────────────────────────────────
// Stops hardware: intervals, recorder (silenced), mic tracks, AudioContext.
// IMPORTANT: does NOT clear audioChunks — caller's onstop captures them first.
function _teardownMicHardware() {
clearInterval(vadInt);
clearInterval(vizInt);
clearTimeout(silenceTimer);
vadInt = vizInt = silenceTimer = null;
// Silence callbacks so no onstop logic fires after forced teardown
if (mediaRecorder && mediaRecorder.state !== 'inactive') {
mediaRecorder.ondataavailable = () => {};
mediaRecorder.onstop = () => {};
mediaRecorder.stop();
}
mediaRecorder = null;
micStream?.getTracks().forEach((t) => t.stop());
micStream = null;
if (analyserCtx && analyserCtx.state !== 'closed') {
analyserCtx.close().catch(() => {});
}
analyserCtx = null;
analyser = null;
voiceViz.classList.remove('active');
vizBars.forEach((b) => (b.style.height = '4px'));
console.log('[Mic] Hardware torn down');
}
// ── _resetVoiceState ──────────────────────────────────────────────────────────
function _resetVoiceState() {
isListening = false;
isSpeaking = false;
isProcessing = false;
isRecordingLocked = false;
_ttsPlaying = false;
_speechStartMs = 0;
audioChunks = [];
}
// ── VAD tick ──────────────────────────────────────────────────────────────────
function vadTick() {
if (!analyser) return;
if (_ttsPlaying) return; // mute during TTS playback
if (isProcessing || isRecordingLocked) return; // hard lock
const buf = new Float32Array(analyser.frequencyBinCount);
analyser.getFloatTimeDomainData(buf);
let sum = 0;
for (let i = 0; i < buf.length; i++) sum += buf[i] * buf[i];
const db = 20 * Math.log10(Math.sqrt(sum / buf.length) || 1e-10);
const speech = db > SILENCE_DB;
if (speech) {
clearTimeout(silenceTimer);
silenceTimer = null;
if (!isSpeaking) {
if (mediaRecorder && mediaRecorder.state !== 'inactive') return; // duplicate guard
isSpeaking = true;
_speechStartMs = Date.now();
_cancelled = false;
_ctxEnsure();
startRecorder();
setMic('recording');
setState('recording');
console.log('[VAD] Speech detected — recording');
}
} else {
if (isSpeaking && !silenceTimer) {
silenceTimer = setTimeout(_onSilenceTimeout, SILENCE_MS);
}
}
}
// ── _onSilenceTimeout ─────────────────────────────────────────────────────────
function _onSilenceTimeout() {
silenceTimer = null;
const speechDuration = Date.now() - _speechStartMs;
if (speechDuration < MIN_SPEECH_MS) {
console.log(
`[VAD] Too short (${speechDuration} ms) — discard & resume listening`,
);
isSpeaking = false;
discardRecorder();
// BUG-FIX-D: restart intervals so listening continues
if (isListening && !vadInt) {
vadInt = setInterval(vadTick, VAD_MS);
vizInt = setInterval(vizTick, 60);
}
setMic('listening');
setState('listening');
return;
}
console.log(
`[VAD] Silence after ${speechDuration} ms — finalising utterance`,
);
// Stop VAD before stopRecorder so no new speech detection during processing
clearInterval(vadInt);
clearInterval(vizInt);
vadInt = vizInt = null;
// Lock state BEFORE stopRecorder (onstop may fire almost immediately)
isSpeaking = false;
isListening = false;
isProcessing = true;
isRecordingLocked = true;
_cancelled = false;
tSend = Date.now();
tLlm = 0;
tTts = 0;
micBtn.disabled = true;
setMic('processing');
setState('processing');
stopRecorder(); // → triggers onstop asynchronously
}
// ── Viz tick ──────────────────────────────────────────────────────────────────
function vizTick() {
if (!analyser) return;
const data = new Uint8Array(analyser.frequencyBinCount);
analyser.getByteFrequencyData(data);
const step = Math.floor(data.length / vizBars.length);
vizBars.forEach((b, i) => {
const v = data[i * step] / 255;
b.style.height = Math.max(4, v * (isSpeaking ? 48 : 18)) + 'px';
});
}
// ── MediaRecorder ─────────────────────────────────────────────────────────────
function startRecorder() {
if (!micStream) return;
if (mediaRecorder && mediaRecorder.state !== 'inactive') {
console.warn('[Recorder] Duplicate startRecorder() — ignored');
return;
}
audioChunks = [];
_recorderMime = MediaRecorder.isTypeSupported('audio/webm;codecs=opus')
? 'audio/webm;codecs=opus'
: 'audio/webm';
try {
mediaRecorder = new MediaRecorder(micStream, { mimeType: _recorderMime });
} catch (err) {
console.error('[Recorder] Creation failed:', err);
isSpeaking = false;
setMic('listening');
setState('listening');
return;
}
mediaRecorder.ondataavailable = (e) => {
if (e.data && e.data.size > 0) audioChunks.push(e.data);
};
/**
* onstop handler
*
* BUG-FIX-A: Capture audioChunks into a LOCAL variable as the very
* first action, before any teardown or async work. Then clear the
* module-level audioChunks. _teardownMicHardware() does NOT touch
* audioChunks, so the local copy is safe.
*
* Old (broken) order:
* 1. _fullMicTeardown() ← set audioChunks = [] HERE
* 2. new Blob(audioChunks) ← always empty!
*
* New (correct) order:
* 1. const captured = audioChunks.slice() ← copy before anything
* 2. audioChunks = [] ← clear module ref
* 3. _teardownMicHardware() ← safe, chunks are local
* 4. new Blob(captured) ← has actual audio data
*/
mediaRecorder.onstop = async () => {
// ── 1. Capture chunks locally (MUST be first) ──────────────────────────
const captured = audioChunks.slice();
audioChunks = [];
// ── 2. Tear down mic hardware (safe — captured is local) ───────────────
_teardownMicHardware();
setMic('off');
console.log(
`[Recorder] onstop: ${captured.length} chunk(s), ${captured
.reduce((s, c) => s + c.size, 0)
.toLocaleString()} bytes total`,
);
// ── 3. Validate ────────────────────────────────────────────────────────
if (!captured.length) {
console.warn('[Recorder] No audio chunks — possible threshold issue');
appendMsg(
'⚠️ কোনো অডিও রেকর্ড হয়নি। Silence threshold কমিয়ে দেখুন।',
'system',
);
_resetVoiceState();
setState('ready');
micBtn.disabled = false;
return;
}
// ── 4. Build ArrayBuffer ───────────────────────────────────────────────
const blob = new Blob(captured, { type: _recorderMime });
let buf;
try {
buf = await blob.arrayBuffer();
} catch (err) {
console.error('[Recorder] arrayBuffer() error:', err);
_resetVoiceState();
setState('ready');
setMic('off');
micBtn.disabled = false;
return;
}
console.log(`[VAD] → voice WS: ${buf.byteLength.toLocaleString()} bytes`);
// ── 5. Send to backend ─────────────────────────────────────────────────
if (voiceWS && voiceWS.readyState === WebSocket.OPEN) {
appendThinking();
voiceWS.send(buf);
// isProcessing + isRecordingLocked stay true until _done() fires
} else {
console.warn('[VAD] Voice WS not open — utterance dropped');
appendMsg('⚠️ সার্ভারের সাথে সংযোগ নেই — আবার চেষ্টা করুন।', 'system');
_resetVoiceState();
setState('ready');
setMic('off');
micBtn.disabled = false;
}
};
mediaRecorder.start();
console.log('[Recorder] Started, mime:', _recorderMime);
}
function stopRecorder() {
if (mediaRecorder && mediaRecorder.state !== 'inactive') {
mediaRecorder.stop(); // triggers onstop asynchronously
}
}
function discardRecorder() {
if (!mediaRecorder || mediaRecorder.state === 'inactive') {
audioChunks = [];
return;
}
mediaRecorder.ondataavailable = () => {};
mediaRecorder.onstop = () => {
audioChunks = [];
};
mediaRecorder.stop();
mediaRecorder = null;
audioChunks = [];
}
// ═══════════════════════════════════════════════════════════════════════════════
// UI HELPERS
// ═══════════════════════════════════════════════════════════════════════════════
const STATE_MAP = {
ready: { label: 'প্রস্তুত', cls: '' },
listening: { label: 'শুনছি…', cls: 'listening' },
recording: { label: 'রেকর্ড হচ্ছে…', cls: 'recording' },
processing: { label: 'প্রক্রিয়া করছে…', cls: 'processing' },
speaking: { label: 'AI বলছে…', cls: 'speaking' },
};
function setState(s) {
const cfg = STATE_MAP[s] || STATE_MAP.ready;
stateLabel.textContent = cfg.label;
stateDot.className = 'state-dot' + (cfg.cls ? ' ' + cfg.cls : '');
}
const MIC_MAP = {
off: { cls: 'mic-off', label: 'Voice শুরু করুন', icon: '🎤' },
listening: {
cls: 'mic-listening',
label: 'শুনছি… (বাতিল করতে ক্লিক)',
icon: '🟢',
},
recording: { cls: 'mic-recording', label: 'বলছেন…', icon: '🔴' },
processing: { cls: 'mic-processing', label: 'প্রক্রিয়া করছে…', icon: '⏳' },
};
function setMic(s) {
const cfg = MIC_MAP[s] || MIC_MAP.off;
micBtn.className = 'mic-btn ' + cfg.cls;
micLabel.textContent = cfg.label;
micBtn.querySelector('.mic-icon').textContent = cfg.icon;
}
function appendMsg(text, who) {
const d = document.createElement('div');
d.className = 'message ' + who;
if (who === 'ai' && typeof marked !== 'undefined') {
d.innerHTML = marked.parse(text || '');
} else {
d.textContent = text;
}
chatBox.appendChild(d);
chatBox.scrollTop = chatBox.scrollHeight;
return d;
}
clearBtn.onclick = () => {
chatBox.innerHTML = '';
thinkingEl = null;
appendMsg('চ্যাট পরিষ্কার করা হয়েছে।', 'system');
};
sidebarToggle.onclick = () => {
sidebarEl.classList.toggle('collapsed');
sidebarToggle.textContent = sidebarEl.classList.contains('collapsed')
? '›'
: '‹';
};
mobileMenuBtn.onclick = () => sidebarEl.classList.toggle('mobile-open');
sThreshold.value = SILENCE_DB;
sThresholdVal.textContent = SILENCE_DB + ' dB';
sThreshold.oninput = () => {
SILENCE_DB = +sThreshold.value;
sThresholdVal.textContent = SILENCE_DB + ' dB';
};
sTimeout.value = SILENCE_MS;
sTimeoutVal.textContent = SILENCE_MS + ' ms';
sTimeout.oninput = () => {
SILENCE_MS = +sTimeout.value;
sTimeoutVal.textContent = SILENCE_MS + ' ms';
};
sVoice.onchange = () => appendMsg('🔊 TTS voice: ' + sVoice.value, 'system');
setInterval(() => {
if (_inFlight > 0) _vizQ();
}, 140);
// ═══════════════════════════════════════════════════════════════════════════════
// BOOT
// ═══════════════════════════════════════════════════════════════════════════════
boot();