| |
| |
| |
| |
| |
| |
| #include <cassert> |
| #include "ten_vad.h" |
| #include "aed_st.h" |
| #include "aed.h" |
|
|
| static void int16_to_float(const int16_t* inputs, int inputLen, float* output) { |
| for (int i = 0; i < inputLen; ++i) { |
| output[i] = float(inputs[i]); |
| } |
| } |
|
|
| int ten_vad_create(ten_vad_handle_t* handle, size_t hop_size, float threshold) { |
| if (AUP_Aed_create(handle) < 0) { |
| return -1; |
| } |
| Aed_St* stHdl = nullptr; |
| Aed_StaticCfg aedStCfg; |
| aedStCfg.enableFlag = 1; |
| aedStCfg.fftSz = 0; |
| aedStCfg.hopSz = hop_size; |
| aedStCfg.anaWindowSz = 0; |
| aedStCfg.frqInputAvailableFlag = 0; |
| stHdl = (Aed_St*)(*handle); |
| stHdl->dynamCfg.extVoiceThr = threshold; |
|
|
| if (AUP_Aed_memAllocate(*handle, &aedStCfg) < 0) { |
| return -1; |
| } |
| if (AUP_Aed_init(*handle) < 0) { |
| return -1; |
| } |
| return 0; |
| } |
|
|
| int ten_vad_process(ten_vad_handle_t handle, const int16_t* audio_data, |
| size_t audio_data_length, float* out_probability, |
| int* out_flag) { |
| if (handle == nullptr || audio_data == nullptr || |
| out_probability == nullptr || out_flag == nullptr) { |
| return -1; |
| } |
| Aed_St* ptr = (Aed_St*)handle; |
| assert(audio_data_length == ptr->stCfg.hopSz); |
| int16_to_float(audio_data, audio_data_length, ptr->inputFloatBuff); |
| Aed_InputData aedInputData; |
| Aed_OutputData aedOutputData; |
| aedInputData.binPower = NULL; |
| aedInputData.hopSz = ptr->stCfg.hopSz; |
| aedInputData.nBins = -1; |
| aedInputData.timeSignal = ptr->inputFloatBuff; |
| int ret = AUP_Aed_proc(handle, &aedInputData, &aedOutputData); |
| if (ret == 0) { |
| *out_probability = aedOutputData.voiceProb; |
| *out_flag = aedOutputData.vadRes; |
| } |
| return ret; |
| } |
|
|
| int ten_vad_destroy(ten_vad_handle_t* handle) { |
| return AUP_Aed_destroy(handle); |
| } |
|
|
| const char* ten_vad_get_version(void) { return "1.0"; } |