File size: 2,927 Bytes
d21d362
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#include "vad_onnx.h"
#include <iostream>
#include <sndfile.h>
#include <vector>
#include <map>
#include <fstream>
#include <string>

int main(int argc, char* argv[]) {
    if (argc < 3) {
        std::cerr << "Usage: " << argv[0] << " <model_absolute_path> <audio_list_absolute_path>" << std::endl;
        return 1;
    }

    // 获取命令行传入的模型路径和音频列表文件路径
    std::string model_path = argv[1];
    std::string audio_list_path = argv[2];

    // 打开 audio_list.txt 文件
    std::ifstream audio_list_file(audio_list_path);
    if (!audio_list_file.is_open()) {
        std::cerr << "Error: Unable to open audio list file: " << audio_list_path << std::endl;
        return 1;
    }

    try {
        VadOnnx vad_model = VadOnnx(model_path);

        // 逐行读取音频文件路径并处理
        std::string wav_path;
        while (std::getline(audio_list_file, wav_path)) {
            if (wav_path.empty()) {
                continue; // 跳过空行
            }
            vad_model.reset_states(); // 重置状态

            std::cout << wav_path << std::endl;

            // 加载音频文件
            SF_INFO sf_info;
            SNDFILE* file = sf_open(wav_path.c_str(), SFM_READ, &sf_info);
            if (!file) {
                std::cerr << "Error: Unable to open audio file: " << wav_path << std::endl;
                continue; // 跳过无法打开的文件
            }

            int samplerate = sf_info.samplerate;
            int channels = sf_info.channels;
            int frames = sf_info.frames;

            std::vector<float> audio_buffer(4096 * channels); // 用于存储每次读取的 4096 帧音频数据

            try {
                // 循环读取音频文件,每次读取 4096 帧
                int read_frames = 0;
                while ((read_frames = sf_readf_float(file, audio_buffer.data(), 4096)) > 0) {
                    // 如果实际读取的帧数小于 4096,则调整 buffer 大小
                    audio_buffer.resize(read_frames * channels);

                    // 推理
                    std::map<std::string, double> result_map = vad_model.vad_dectect(audio_buffer, false);

                    // 打印推理结果
                    if (!result_map.empty()) {
                        for (const auto& pair : result_map) {
                            std::cout << pair.first << ", " << pair.second << std::endl;
                        }
                    }
                }

                sf_close(file);

            } catch (const std::exception& ex) {
                std::cerr << "Error processing file " << wav_path << ": " << ex.what() << std::endl;
                sf_close(file);
            }
        }

        audio_list_file.close();

    } catch (const std::exception& ex) {
        std::cerr << "Error: " << ex.what() << std::endl;
        return 1;
    }

    return 0;
}