File size: 5,683 Bytes
8ede856 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 | import { ref, onBeforeUnmount } from 'vue';
import axios from 'axios';
interface VADOptions {
onSpeechStart?: () => void;
onSpeechRealStart?: () => void;
onSpeechEnd: (audio: Float32Array) => void;
onVADMisfire?: () => void;
onFrameProcessed?: (probabilities: { isSpeech: number; notSpeech: number }, frame: Float32Array) => void;
positiveSpeechThreshold?: number;
negativeSpeechThreshold?: number;
redemptionMs?: number;
preSpeechPadMs?: number;
minSpeechMs?: number;
submitUserSpeechOnPause?: boolean;
model?: 'v5' | 'legacy';
baseAssetPath?: string;
onnxWASMBasePath?: string;
}
interface VADInstance {
start(): void;
pause(): void;
listening: boolean;
}
// 声明全局 vad 对象类型
declare global {
interface Window {
vad: {
MicVAD: {
new(options: VADOptions): Promise<VADInstance>;
};
};
}
}
/**
* 使用 VAD (Voice Activity Detection) 进行录音的 composable
* VAD 会自动检测用户何时开始和停止说话,无需手动控制
*/
export function useVADRecording() {
const isRecording = ref(false);
const isSpeaking = ref(false);
const audioEnergy = ref(0); // 0-1 之间的能量值
const vadInstance = ref<VADInstance | null>(null);
const isInitialized = ref(false);
const onSpeechStartCallback = ref<(() => void) | null>(null);
const onSpeechEndCallback = ref<((audio: Float32Array) => void) | null>(null);
// Live Mode 不需要上传音频,直接通过 WebSocket 实时发送
// 初始化 VAD
async function initVAD() {
if (!window.vad) {
console.error('VAD library not loaded. Please ensure the scripts are included in index.html');
return;
}
try {
vadInstance.value = await (window.vad.MicVAD as any).new({
onSpeechStart: () => {
console.log('[VAD] Speech started');
isSpeaking.value = true;
// 调用开始说话回调
if (onSpeechStartCallback.value) {
onSpeechStartCallback.value();
}
},
onSpeechRealStart: () => {
console.log('[VAD] Real speech started');
},
onSpeechEnd: (audio: Float32Array) => {
console.log('[VAD] Speech ended, audio length:', audio.length);
isSpeaking.value = false;
// 调用语音结束回调,传递原始音频数据
if (onSpeechEndCallback.value) {
onSpeechEndCallback.value(audio);
}
},
onVADMisfire: () => {
console.log('[VAD] VAD misfire - speech segment too short');
isSpeaking.value = false;
},
onFrameProcessed: (probabilities: { isSpeech: number; notSpeech: number }, frame: Float32Array) => {
// 计算 RMS (Root Mean Square) 作为能量
let sum = 0;
for (let i = 0; i < frame.length; i++) {
sum += frame[i] * frame[i];
}
const rms = Math.sqrt(sum / frame.length);
// 简单的归一化及平滑处理,根据经验 RMS 通常较小
// 放大系数可以根据实际情况调整
const targetEnergy = Math.min(rms * 5, 1);
audioEnergy.value = audioEnergy.value * 0.8 + targetEnergy * 0.2;
},
// VAD 配置参数
positiveSpeechThreshold: 0.3,
negativeSpeechThreshold: 0.25,
redemptionMs: 1400,
preSpeechPadMs: 800,
minSpeechMs: 400,
submitUserSpeechOnPause: false,
model: 'v5',
baseAssetPath: 'https://cdn.jsdelivr.net/npm/@ricky0123/vad-web@0.0.29/dist/',
onnxWASMBasePath: 'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.22.0/dist/'
});
isInitialized.value = true;
console.log('VAD initialized successfully');
} catch (error) {
console.error('Failed to initialize VAD:', error);
isInitialized.value = false;
}
}
// 开始录音(启动 VAD)
async function startRecording(
onSpeechStart: () => void,
onSpeechEnd: (audio: Float32Array) => void
) {
// 存储回调函数
onSpeechStartCallback.value = onSpeechStart;
onSpeechEndCallback.value = onSpeechEnd;
if (!isInitialized.value) {
await initVAD();
}
if (vadInstance.value) {
vadInstance.value.start();
isRecording.value = true;
console.log('[VAD] Started');
}
}
// 停止录音(暂停 VAD)
function stopRecording() {
if (vadInstance.value) {
vadInstance.value.pause();
isRecording.value = false;
isSpeaking.value = false;
onSpeechStartCallback.value = null;
onSpeechEndCallback.value = null;
console.log('[VAD] Stopped');
}
}
// 清理资源
onBeforeUnmount(() => {
if (vadInstance.value && isRecording.value) {
stopRecording();
}
});
return {
isRecording,
isSpeaking, // 用户是否正在说话
audioEnergy, // 当前音频能量
startRecording,
stopRecording
};
}
|