| |
| |
| |
| |
| |
| |
| #include <stdio.h> |
| #include <stdint.h> |
| #include <stdlib.h> |
| #include <time.h> |
| #include <inttypes.h> |
| #include <string.h> |
| #ifdef _WIN32 |
| #include <windows.h> |
| #endif |
|
|
| #include "ten_vad.h" |
|
|
| #if defined(__APPLE__) |
| #include <TargetConditionals.h> |
| #if TARGET_OS_IPHONE |
| #include "sample_array.h" |
| #endif |
| #endif |
|
|
| const int hop_size = 256; |
|
|
| uint64_t get_timestamp_ms() |
| { |
| #ifdef _WIN32 |
| LARGE_INTEGER frequency; |
| LARGE_INTEGER counter; |
| QueryPerformanceFrequency(&frequency); |
| QueryPerformanceCounter(&counter); |
| return (uint64_t)(counter.QuadPart * 1000 / frequency.QuadPart); |
| #else |
| struct timespec ts; |
| uint64_t millis; |
| clock_gettime(CLOCK_MONOTONIC, &ts); |
| millis = ts.tv_sec * 1000 + ts.tv_nsec / 1000000; |
| return millis; |
| #endif |
| } |
|
|
| |
| #pragma pack(push, 1) |
| typedef struct |
| { |
| char chunk_id[4]; |
| uint32_t chunk_size; |
| char format[4]; |
| } riff_header_t; |
|
|
| |
| typedef struct |
| { |
| char id[4]; |
| uint32_t size; |
| } chunk_header_t; |
| #pragma pack(pop) |
|
|
| |
| typedef struct |
| { |
| uint16_t audio_format; |
| uint16_t num_channels; |
| uint32_t sample_rate; |
| uint32_t byte_rate; |
| uint16_t block_align; |
| uint16_t bits_per_sample; |
| uint32_t data_size; |
| long data_offset; |
| } wav_info_t; |
|
|
| int read_wav_file(FILE *fp, wav_info_t *info); |
|
|
| int vad_process(int16_t *input_buf, uint32_t frame_num, |
| float *out_probs, int32_t *out_flags, |
| float *use_time) |
| { |
| printf("tenvadsrc version: %s\n", ten_vad_get_version()); |
| void *ten_vad_handle = NULL; |
| float voice_threshold = 0.5f; |
| ten_vad_create(&ten_vad_handle, hop_size, voice_threshold); |
|
|
| uint64_t start = get_timestamp_ms(); |
| for (int i = 0; i < frame_num; ++i) |
| { |
| int16_t *audio_data = input_buf + i * hop_size; |
| int res = ten_vad_process(ten_vad_handle, audio_data, hop_size, |
| &out_probs[i], &out_flags[i]); |
| if (res == 0) |
| { |
| printf("[%d] %0.6f, %d\n", i, out_probs[i], out_flags[i]); |
| } |
| else |
| { |
| printf("ten_vad_process failed res %d\n", res); |
| } |
| } |
| uint64_t end = get_timestamp_ms(); |
| *use_time = (float)(end - start); |
|
|
| ten_vad_destroy(&ten_vad_handle); |
| ten_vad_handle = NULL; |
| return 0; |
| } |
|
|
| int test_with_wav(int argc, char *argv[]) |
| { |
| if (argc < 3) |
| { |
| printf("Warning: Test.exe input.wav output.txt\n"); |
| return 0; |
| } |
| char *input_file = argv[1]; |
| char *out_file = argv[2]; |
|
|
| FILE *fp = fopen(input_file, "rb"); |
| if (fp == NULL) |
| { |
| printf("Failed to open input file: %s\n", input_file); |
| return 1; |
| } |
| fseek(fp, 0, SEEK_SET); |
| wav_info_t info; |
| if (read_wav_file(fp, &info) != 0) |
| { |
| printf("Failed to read WAV file header\n"); |
| fclose(fp); |
| return 1; |
| } |
|
|
| uint32_t byte_num = info.data_size; |
| printf("WAV file byte num: %d\n", byte_num); |
| char *input_buf = (char *)malloc(byte_num); |
| fseek(fp, info.data_offset, SEEK_SET); |
| fread(input_buf, 1, byte_num, fp); |
| fclose(fp); |
| fp = NULL; |
|
|
| uint32_t sample_num = byte_num / sizeof(int16_t); |
| float total_audio_time = (float)sample_num / 16.0; |
| printf("total_audio_time: %.2f(ms)\n", total_audio_time); |
| uint32_t frame_num = sample_num / hop_size; |
| printf("Audio frame Num: %d\n", frame_num); |
| float *out_probs = (float *)malloc(frame_num * sizeof(float)); |
| int32_t *out_flags = (int32_t *)malloc(frame_num * sizeof(int32_t)); |
| float use_time = .0; |
| vad_process((int16_t *)input_buf, frame_num, |
| out_probs, out_flags, |
| &use_time); |
| float rtf = use_time / total_audio_time; |
| printf("Consuming time: %f(ms), audio-time: %.2f(ms), =====> RTF: %0.6f\n", |
| use_time, total_audio_time, rtf); |
|
|
| FILE *fout = fopen(out_file, "w"); |
| if (fout != NULL) |
| { |
| for (int i = 0; i < frame_num; i++) |
| { |
| fprintf(fout, "[%d] %0.6f, %d\n", i, out_probs[i], out_flags[i]); |
| } |
| fclose(fout); |
| fout = NULL; |
| } |
|
|
| free(input_buf); |
| free(out_probs); |
| free(out_flags); |
| return 0; |
| } |
|
|
| #if TARGET_OS_IPHONE |
| |
| int test_with_array() |
| { |
| char *input_buf = (char *)sample_array; |
| uint32_t byte_num = sizeof(sample_array) / sizeof(sample_array[0]); |
| printf("WAV file byte num: %d\n", byte_num); |
|
|
| uint32_t sample_num = byte_num / sizeof(int16_t); |
| float total_audio_time = (float)sample_num / 16.0; |
| printf("total_audio_time: %.2f(ms)\n", total_audio_time); |
| uint32_t frame_num = sample_num / hop_size; |
| printf("Audio frame Num: %d\n", frame_num); |
| float *out_probs = (float *)malloc(frame_num * sizeof(float)); |
| int32_t *out_flags = (int32_t *)malloc(frame_num * sizeof(int32_t)); |
| float use_time = .0; |
| vad_process((int16_t *)input_buf, frame_num, |
| out_probs, out_flags, |
| &use_time); |
| float rtf = use_time / total_audio_time; |
| printf("Consuming time: %f(ms), audio-time: %.2f(ms), =====> RTF: %0.6f\n", |
| use_time, total_audio_time, rtf); |
|
|
| return 0; |
| } |
| #endif |
|
|
| int main(int argc, char *argv[]) |
| { |
| #if TARGET_OS_IPHONE |
| return test_with_array(); |
| #else |
| return test_with_wav(argc, argv); |
| #endif |
| } |
|
|
| |
| int read_wav_file(FILE *fp, wav_info_t *info) |
| { |
| if (fp == NULL || info == NULL) |
| return -1; |
| |
| long orig_pos = ftell(fp); |
| fseek(fp, 0, SEEK_SET); |
| |
| riff_header_t riff; |
| if (fread(&riff, sizeof(riff_header_t), 1, fp) != 1) |
| { |
| fprintf(stderr, "Can not read RIFF head\n"); |
| fseek(fp, orig_pos, SEEK_SET); |
| return -1; |
| } |
| |
| if (memcmp(riff.chunk_id, "RIFF", 4) != 0 || |
| memcmp(riff.format, "WAVE", 4) != 0) |
| { |
| fprintf(stderr, "not a valid RIFF/WAVE file\n"); |
| fseek(fp, orig_pos, SEEK_SET); |
| return -1; |
| } |
| |
| int fmt_found = 0, data_found = 0; |
| memset(info, 0, sizeof(wav_info_t)); |
|
|
| |
| while (!feof(fp)) |
| { |
| chunk_header_t chunk; |
| if (fread(&chunk, sizeof(chunk_header_t), 1, fp) != 1) |
| { |
| break; |
| } |
| |
| if (memcmp(chunk.id, "fmt ", 4) == 0) |
| { |
| |
| fmt_found = 1; |
| if (chunk.size < 16) |
| { |
| fprintf(stderr, "fmt chunk size is abnormal\n"); |
| fseek(fp, orig_pos, SEEK_SET); |
| return -1; |
| } |
| |
| if (fread(&info->audio_format, 2, 1, fp) != 1 || |
| fread(&info->num_channels, 2, 1, fp) != 1 || |
| fread(&info->sample_rate, 4, 1, fp) != 1 || |
| fread(&info->byte_rate, 4, 1, fp) != 1 || |
| fread(&info->block_align, 2, 1, fp) != 1 || |
| fread(&info->bits_per_sample, 2, 1, fp) != 1) |
| { |
| fprintf(stderr, "failed to read fmt data\n"); |
| fseek(fp, orig_pos, SEEK_SET); |
| return -1; |
| } |
| |
| if (chunk.size > 16) |
| { |
| fseek(fp, chunk.size - 16, SEEK_CUR); |
| } |
| } |
| |
| else if (memcmp(chunk.id, "data", 4) == 0) |
| { |
| data_found = 1; |
| info->data_size = chunk.size; |
| info->data_offset = ftell(fp); |
| break; |
| } |
| |
| else |
| { |
| |
| fseek(fp, (chunk.size + (chunk.size % 2)), SEEK_CUR); |
| } |
| } |
| |
| if (!fmt_found) |
| { |
| fprintf(stderr, "fmt chunk not found\n"); |
| fseek(fp, orig_pos, SEEK_SET); |
| return -1; |
| } |
| if (!data_found) |
| { |
| fprintf(stderr, "data chunk not found\n"); |
| fseek(fp, orig_pos, SEEK_SET); |
| return -1; |
| } |
| |
| fseek(fp, orig_pos, SEEK_SET); |
| return 0; |
| } |
|
|