File size: 2,523 Bytes
d21d362
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#include "vad_onnx.h"
#include <iostream>
#include <sndfile.h>


int main(int argc, char* argv[]) {
    if (argc < 3) {
        std::cerr << "Usage: " << argv[0] << " <model_absolute_path>" << " <audio_file_absolute_path>" << std::endl;
        return 1;
    }

    // 获取命令行传入的音频文件路径
    std::string model_path = argv[1];
    std::string wav_path = argv[2];

    // std::string model_path = "/Users/chenxiang/translator/Translator/moyoyo_asr_models/silero-vad/silero_vad.onnx";
    // std::string wav_path = "/Users/chenxiang/translator/core/whisper_wrapper/bin/zh.wav";

    // 加载音频文件
    SF_INFO sf_info;
    SNDFILE* file = sf_open(wav_path.c_str(), SFM_READ, &sf_info);

    int samplerate = sf_info.samplerate;
    int channels = sf_info.channels;
    int frames = sf_info.frames;

    std::vector<float> audio(frames * channels);
    sf_readf_float(file, audio.data(), sf_info.frames);
    sf_close(file);

    // 创建目标 buffer 来保存 512 帧音频数据
    std::vector<float> audio_512frames(audio.begin(), audio.begin() + 512);

    try {
        VadOnnx vad_model = VadOnnx(model_path);

        // 输入一段音频数据(512 samples)
        float result_512 = vad_model.forward_infer(audio_512frames);
        std::cout << "result_512 = " << result_512 << std::endl;


        std::vector<float> result_1 = vad_model.vad_dectect(audio);
        if (!result_1.empty()) {
            std::cout << "result_1.size = " << result_1.size() << std::endl;
            for (int i = 0; i < 5 && i < result_1.size(); ++i) {
                std::cout << result_1[i] << ", ";
            }
            std::cout << "(only show 5)" << std::endl;
        }

        std::map<std::string, double> result_map;
        result_map = vad_model.vad_dectect(audio, false);
        std::cerr << "result: " << std::endl;
        if (!result_map.empty()) {
            for (const auto& pair : result_map) {
                std::cout << pair.first << " : " << pair.second << std::endl;
            }
        }

    } catch (const std::exception& ex) {
        std::cerr << "Error: " << ex.what() << std::endl;
    }
    // // 输出音频信息
    // std::cout << "========= 音频信息 =========" << std::endl;
    // std::cout << "采样率: " << samplerate << " Hz" << std::endl;
    // std::cout << "通道数: " << channels << std::endl;
    // std::cout << "总帧数: " << frames << std::endl;
    // std::cout << "===========================" << std::endl;

    return 0;
}