GetcharZp commited on
Commit
50b0b12
·
0 Parent(s):

:tada: init

Browse files
.gitattributes ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.dll filter=lfs diff=lfs merge=lfs -text
37
+ *.so filter=lfs diff=lfs merge=lfs -text
38
+ *.dylib filter=lfs diff=lfs merge=lfs -text
39
+ *.png filter=lfs diff=lfs merge=lfs -text
40
+ *.wav filter=lfs diff=lfs merge=lfs -text
41
+ *.txt filter=lfs diff=lfs merge=lfs -text
42
+ *.mvn filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .idea
2
+ .vscode/
3
+ .DS_Store
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 getcharzp
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ pipeline_tag: text-to-speech
4
+ tags:
5
+ - text-to-speech
6
+ - automatic-speech-recognition
7
+ ---
8
+ <h1 align="center">
9
+ <img width="100%" src="./assets/logo.png" alt="">
10
+ </h1>
11
+
12
+ <p align="center">
13
+ <a href="https://github.com/getcharzp/go-speech/fork" target="blank">
14
+ <img src="https://img.shields.io/github/forks/getcharzp/go-speech?style=for-the-badge" alt="go-speech forks"/>
15
+ </a>
16
+ <a href="https://github.com/getcharzp/go-speech/stargazers" target="blank">
17
+ <img src="https://img.shields.io/github/stars/getcharzp/go-speech?style=for-the-badge" alt="go-speech stars"/>
18
+ </a>
19
+ <a href="https://github.com/getcharzp/go-speech/pulls" target="blank">
20
+ <img src="https://img.shields.io/github/issues-pr/getcharzp/go-speech?style=for-the-badge" alt="go-speech pull-requests"/>
21
+ </a>
22
+ <a href='https://github.com/getcharzp/go-speech/releases'>
23
+ <img src='https://img.shields.io/github/release/getcharzp/go-speech?&label=Latest&style=for-the-badge'>
24
+ </a>
25
+ </p>
26
+
27
+ go-speech 基于 Golang + [ONNX](https://github.com/microsoft/onnxruntime/releases/tag/v1.23.2) 构建的轻量语音库,支持 TTS(文本转语音)与 ASR(语音转文字)。 集成 MeloTTS 及达摩院 Paraformer 架构模型。
28
+
29
+ ## 安装
30
+
31
+ ```shell
32
+ # 下载包
33
+ go get -u github.com/getcharzp/go-speech
34
+
35
+ # 下载模型、动态链接库
36
+ git clone https://huggingface.co/getcharzp/go-speech
37
+ ```
38
+
39
+ ## 快速开始
40
+
41
+ ### TTS
42
+
43
+ ```go
44
+ package main
45
+
46
+ import (
47
+ "github.com/getcharzp/go-speech/tts/melotts"
48
+ "github.com/up-zero/gotool/fileutil"
49
+ "log"
50
+ )
51
+
52
+ func main() {
53
+ ttsEngine, err := melotts.NewEngine(melotts.DefaultConfig())
54
+ if err != nil {
55
+ log.Fatalf("创建引擎失败: %v", err)
56
+ }
57
+ defer ttsEngine.Destroy()
58
+
59
+ text := "2019年12月30日,中国人口突破14亿人,联系电话: 13800138000。"
60
+ wavData, err := ttsEngine.SynthesizeToWav(text, 1.0)
61
+ if err != nil {
62
+ log.Fatalf("合成失败: %v", err)
63
+ }
64
+
65
+ outputPath := "output.wav"
66
+ err = fileutil.FileSave(outputPath, wavData)
67
+ if err != nil {
68
+ log.Fatalf("保存 WAV 失败: %v", err)
69
+ }
70
+ }
71
+ ```
72
+
73
+ <audio controls>
74
+ <source src="https://raw.githubusercontent.com/getcharzp/go-speech/assets/output.wav" type="audio/wav">
75
+ </audio>
76
+
77
+ ### ASR
78
+
79
+ ```go
80
+ package main
81
+
82
+ import (
83
+ "fmt"
84
+ "github.com/getcharzp/go-speech/asr/paraformer"
85
+ "log"
86
+ )
87
+
88
+ func main() {
89
+ asrEngine, err := paraformer.NewEngine(paraformer.DefaultConfig())
90
+ if err != nil {
91
+ log.Fatalf("创建引擎失败: %v", err)
92
+ }
93
+ defer asrEngine.Destroy()
94
+
95
+ text, err := asrEngine.RecognizeFile("./zh-en.wav")
96
+ if err != nil {
97
+ log.Printf("识别出错: %v", err)
98
+ return
99
+ }
100
+ fmt.Printf("识别结果: %s\n", text)
101
+ }
102
+ ```
assets/logo.png ADDED

Git LFS Details

  • SHA256: d7f2c07be243141485826d27b3152a4dd68511832b293bf5b25fc3a69bc92630
  • Pointer size: 131 Bytes
  • Size of remote file: 436 kB
examples/melotts_test.go ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package examples
2
+
3
+ import (
4
+ "github.com/getcharzp/go-speech/tts/melotts"
5
+ "github.com/up-zero/gotool/fileutil"
6
+ "testing"
7
+ )
8
+
9
+ func TestMeloTTS(t *testing.T) {
10
+ cfg := melotts.Config{
11
+ OnnxRuntimeLibPath: "../lib/onnxruntime.dll",
12
+ ModelPath: "../melo_weights/model.onnx",
13
+ TokenPath: "../melo_weights/tokens.txt",
14
+ LexiconPath: "../melo_weights/lexicon.txt",
15
+ }
16
+
17
+ ttsEngine, err := melotts.NewEngine(cfg)
18
+ if err != nil {
19
+ t.Fatalf("创建引擎失败: %v", err)
20
+ }
21
+ defer ttsEngine.Destroy()
22
+
23
+ text := "2019年12月30日,中国人口突破14亿人。联系电话: 13800138000。"
24
+ wavData, err := ttsEngine.SynthesizeToWav(text, 1.0)
25
+ if err != nil {
26
+ t.Fatalf("合成失败: %v", err)
27
+ }
28
+
29
+ outputPath := "output.wav"
30
+ err = fileutil.FileSave(outputPath, wavData)
31
+ if err != nil {
32
+ t.Fatalf("保存 WAV 失败: %v", err)
33
+ }
34
+ }
examples/paraformer_test.go ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package examples
2
+
3
+ import (
4
+ "fmt"
5
+ "github.com/getcharzp/go-speech/asr/paraformer"
6
+ "testing"
7
+ )
8
+
9
+ func TestParaformer(t *testing.T) {
10
+ config := paraformer.Config{
11
+ OnnxRuntimeLibPath: "../lib/onnxruntime.dll",
12
+ ModelPath: "../paraformer_weights/model.int8.onnx",
13
+ TokensPath: "../paraformer_weights/tokens.txt",
14
+ CMVNPath: "../paraformer_weights/am.mvn",
15
+ }
16
+
17
+ asrEngine, err := paraformer.NewEngine(config)
18
+ if err != nil {
19
+ t.Fatalf("创建引擎失败: %v", err)
20
+ }
21
+ defer asrEngine.Destroy()
22
+
23
+ text, err := asrEngine.RecognizeFile("./zh-en.wav")
24
+ if err != nil {
25
+ t.Fatalf("识别出错: %v", err)
26
+ return
27
+ }
28
+ fmt.Printf("识别结果: %s\n", text)
29
+ }
examples/zh-en.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eddf384a906bd6d905c9d9d652d614def1857608b88c2eee663ceeccbb31f7a3
3
+ size 259278
lib/onnxruntime.dll ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dec964ab1ee36cc9b0ae247d13b376627992fc57dec0454354017ab8fd84f1ea
3
+ size 14186016
lib/onnxruntime_amd64.dylib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:283e595e61cf65df7a6b1d59a1616cbd35c8b6399dd90d799d99b71a3ff83160
3
+ size 37411816
lib/onnxruntime_amd64.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:126e4a0191d547839b28a41b9087375711158a3f2c5e7da65ef815f0af6cbe28
3
+ size 22317880
lib/onnxruntime_arm64.dylib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b885992d3d6fa4130d39ec84a80d7504ff52750027c547bb22c86165f19406a
3
+ size 33481272
lib/onnxruntime_arm64.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0afd69a0ae38c5099fd0e8604dda398ac43dee67cd9c6394b5142b19e82528de
3
+ size 17643256
melo_weights/lexicon.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5338968f710ab4e8a32c1bd47b8957c434ff5aee28c77601e6c4d7168852fee
3
+ size 7033499
melo_weights/model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf30582eb1b012250a35b1a4a80e7dfbcf8485e7bb9de0d95efbbeef0e4ad86d
3
+ size 170429550
melo_weights/tokens.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be76fa98ab761477cfbe04fc0c7ccb66525c6611dd0917fbb023257b677066b1
3
+ size 767
paraformer_weights/am.mvn ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29b3c740a2c0cfc6b308126d31d7f265fa2be74f3bb095cd2f143ea970896ae5
3
+ size 11203
paraformer_weights/model.int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f36a0433bcf096bd6d6f11b80a3ac8bed110bdca632fe0d731df8d1a84475945
3
+ size 243371218
paraformer_weights/tokens.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59aba8873a2ed1e122c25fee421e25f283b63290efbde85c1f01a853d83cb6e6
3
+ size 75756