Commit ·
50b0b12
0
Parent(s):
:tada: init
Browse files- .gitattributes +42 -0
- .gitignore +3 -0
- LICENSE +21 -0
- README.md +102 -0
- assets/logo.png +3 -0
- examples/melotts_test.go +34 -0
- examples/paraformer_test.go +29 -0
- examples/zh-en.wav +3 -0
- lib/onnxruntime.dll +3 -0
- lib/onnxruntime_amd64.dylib +3 -0
- lib/onnxruntime_amd64.so +3 -0
- lib/onnxruntime_arm64.dylib +3 -0
- lib/onnxruntime_arm64.so +3 -0
- melo_weights/lexicon.txt +3 -0
- melo_weights/model.onnx +3 -0
- melo_weights/tokens.txt +3 -0
- paraformer_weights/am.mvn +3 -0
- paraformer_weights/model.int8.onnx +3 -0
- paraformer_weights/tokens.txt +3 -0
.gitattributes
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.dll filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
*.so filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
*.dylib filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
*.wav filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
*.txt filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
*.mvn filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.idea
|
| 2 |
+
.vscode/
|
| 3 |
+
.DS_Store
|
LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2025 getcharzp
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
README.md
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: mit
|
| 3 |
+
pipeline_tag: text-to-speech
|
| 4 |
+
tags:
|
| 5 |
+
- text-to-speech
|
| 6 |
+
- automatic-speech-recognition
|
| 7 |
+
---
|
| 8 |
+
<h1 align="center">
|
| 9 |
+
<img width="100%" src="./assets/logo.png" alt="">
|
| 10 |
+
</h1>
|
| 11 |
+
|
| 12 |
+
<p align="center">
|
| 13 |
+
<a href="https://github.com/getcharzp/go-speech/fork" target="blank">
|
| 14 |
+
<img src="https://img.shields.io/github/forks/getcharzp/go-speech?style=for-the-badge" alt="go-speech forks"/>
|
| 15 |
+
</a>
|
| 16 |
+
<a href="https://github.com/getcharzp/go-speech/stargazers" target="blank">
|
| 17 |
+
<img src="https://img.shields.io/github/stars/getcharzp/go-speech?style=for-the-badge" alt="go-speech stars"/>
|
| 18 |
+
</a>
|
| 19 |
+
<a href="https://github.com/getcharzp/go-speech/pulls" target="blank">
|
| 20 |
+
<img src="https://img.shields.io/github/issues-pr/getcharzp/go-speech?style=for-the-badge" alt="go-speech pull-requests"/>
|
| 21 |
+
</a>
|
| 22 |
+
<a href='https://github.com/getcharzp/go-speech/releases'>
|
| 23 |
+
<img src='https://img.shields.io/github/release/getcharzp/go-speech?&label=Latest&style=for-the-badge'>
|
| 24 |
+
</a>
|
| 25 |
+
</p>
|
| 26 |
+
|
| 27 |
+
go-speech 基于 Golang + [ONNX](https://github.com/microsoft/onnxruntime/releases/tag/v1.23.2) 构建的轻量语音库,支持 TTS(文本转语音)与 ASR(语音转文字)。 集成 MeloTTS 及达摩院 Paraformer 架构模型。
|
| 28 |
+
|
| 29 |
+
## 安装
|
| 30 |
+
|
| 31 |
+
```shell
|
| 32 |
+
# 下载包
|
| 33 |
+
go get -u github.com/getcharzp/go-speech
|
| 34 |
+
|
| 35 |
+
# 下载模型、动态链接库
|
| 36 |
+
git clone https://huggingface.co/getcharzp/go-speech
|
| 37 |
+
```
|
| 38 |
+
|
| 39 |
+
## 快速开始
|
| 40 |
+
|
| 41 |
+
### TTS
|
| 42 |
+
|
| 43 |
+
```go
|
| 44 |
+
package main
|
| 45 |
+
|
| 46 |
+
import (
|
| 47 |
+
"github.com/getcharzp/go-speech/tts/melotts"
|
| 48 |
+
"github.com/up-zero/gotool/fileutil"
|
| 49 |
+
"log"
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
func main() {
|
| 53 |
+
ttsEngine, err := melotts.NewEngine(melotts.DefaultConfig())
|
| 54 |
+
if err != nil {
|
| 55 |
+
log.Fatalf("创建引擎失败: %v", err)
|
| 56 |
+
}
|
| 57 |
+
defer ttsEngine.Destroy()
|
| 58 |
+
|
| 59 |
+
text := "2019年12月30日,中国人口突破14亿人,联系电话: 13800138000。"
|
| 60 |
+
wavData, err := ttsEngine.SynthesizeToWav(text, 1.0)
|
| 61 |
+
if err != nil {
|
| 62 |
+
log.Fatalf("合成失败: %v", err)
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
outputPath := "output.wav"
|
| 66 |
+
err = fileutil.FileSave(outputPath, wavData)
|
| 67 |
+
if err != nil {
|
| 68 |
+
log.Fatalf("保存 WAV 失败: %v", err)
|
| 69 |
+
}
|
| 70 |
+
}
|
| 71 |
+
```
|
| 72 |
+
|
| 73 |
+
<audio controls>
|
| 74 |
+
<source src="https://raw.githubusercontent.com/getcharzp/go-speech/assets/output.wav" type="audio/wav">
|
| 75 |
+
</audio>
|
| 76 |
+
|
| 77 |
+
### ASR
|
| 78 |
+
|
| 79 |
+
```go
|
| 80 |
+
package main
|
| 81 |
+
|
| 82 |
+
import (
|
| 83 |
+
"fmt"
|
| 84 |
+
"github.com/getcharzp/go-speech/asr/paraformer"
|
| 85 |
+
"log"
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
func main() {
|
| 89 |
+
asrEngine, err := paraformer.NewEngine(paraformer.DefaultConfig())
|
| 90 |
+
if err != nil {
|
| 91 |
+
log.Fatalf("创建引擎失败: %v", err)
|
| 92 |
+
}
|
| 93 |
+
defer asrEngine.Destroy()
|
| 94 |
+
|
| 95 |
+
text, err := asrEngine.RecognizeFile("./zh-en.wav")
|
| 96 |
+
if err != nil {
|
| 97 |
+
log.Printf("识别出错: %v", err)
|
| 98 |
+
return
|
| 99 |
+
}
|
| 100 |
+
fmt.Printf("识别结果: %s\n", text)
|
| 101 |
+
}
|
| 102 |
+
```
|
assets/logo.png
ADDED
|
Git LFS Details
|
examples/melotts_test.go
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
package examples
|
| 2 |
+
|
| 3 |
+
import (
|
| 4 |
+
"github.com/getcharzp/go-speech/tts/melotts"
|
| 5 |
+
"github.com/up-zero/gotool/fileutil"
|
| 6 |
+
"testing"
|
| 7 |
+
)
|
| 8 |
+
|
| 9 |
+
func TestMeloTTS(t *testing.T) {
|
| 10 |
+
cfg := melotts.Config{
|
| 11 |
+
OnnxRuntimeLibPath: "../lib/onnxruntime.dll",
|
| 12 |
+
ModelPath: "../melo_weights/model.onnx",
|
| 13 |
+
TokenPath: "../melo_weights/tokens.txt",
|
| 14 |
+
LexiconPath: "../melo_weights/lexicon.txt",
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
ttsEngine, err := melotts.NewEngine(cfg)
|
| 18 |
+
if err != nil {
|
| 19 |
+
t.Fatalf("创建引擎失败: %v", err)
|
| 20 |
+
}
|
| 21 |
+
defer ttsEngine.Destroy()
|
| 22 |
+
|
| 23 |
+
text := "2019年12月30日,中国人口突破14亿人。联系电话: 13800138000。"
|
| 24 |
+
wavData, err := ttsEngine.SynthesizeToWav(text, 1.0)
|
| 25 |
+
if err != nil {
|
| 26 |
+
t.Fatalf("合成失败: %v", err)
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
outputPath := "output.wav"
|
| 30 |
+
err = fileutil.FileSave(outputPath, wavData)
|
| 31 |
+
if err != nil {
|
| 32 |
+
t.Fatalf("保存 WAV 失败: %v", err)
|
| 33 |
+
}
|
| 34 |
+
}
|
examples/paraformer_test.go
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
package examples
|
| 2 |
+
|
| 3 |
+
import (
|
| 4 |
+
"fmt"
|
| 5 |
+
"github.com/getcharzp/go-speech/asr/paraformer"
|
| 6 |
+
"testing"
|
| 7 |
+
)
|
| 8 |
+
|
| 9 |
+
func TestParaformer(t *testing.T) {
|
| 10 |
+
config := paraformer.Config{
|
| 11 |
+
OnnxRuntimeLibPath: "../lib/onnxruntime.dll",
|
| 12 |
+
ModelPath: "../paraformer_weights/model.int8.onnx",
|
| 13 |
+
TokensPath: "../paraformer_weights/tokens.txt",
|
| 14 |
+
CMVNPath: "../paraformer_weights/am.mvn",
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
asrEngine, err := paraformer.NewEngine(config)
|
| 18 |
+
if err != nil {
|
| 19 |
+
t.Fatalf("创建引擎失败: %v", err)
|
| 20 |
+
}
|
| 21 |
+
defer asrEngine.Destroy()
|
| 22 |
+
|
| 23 |
+
text, err := asrEngine.RecognizeFile("./zh-en.wav")
|
| 24 |
+
if err != nil {
|
| 25 |
+
t.Fatalf("识别出错: %v", err)
|
| 26 |
+
return
|
| 27 |
+
}
|
| 28 |
+
fmt.Printf("识别结果: %s\n", text)
|
| 29 |
+
}
|
examples/zh-en.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eddf384a906bd6d905c9d9d652d614def1857608b88c2eee663ceeccbb31f7a3
|
| 3 |
+
size 259278
|
lib/onnxruntime.dll
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dec964ab1ee36cc9b0ae247d13b376627992fc57dec0454354017ab8fd84f1ea
|
| 3 |
+
size 14186016
|
lib/onnxruntime_amd64.dylib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:283e595e61cf65df7a6b1d59a1616cbd35c8b6399dd90d799d99b71a3ff83160
|
| 3 |
+
size 37411816
|
lib/onnxruntime_amd64.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:126e4a0191d547839b28a41b9087375711158a3f2c5e7da65ef815f0af6cbe28
|
| 3 |
+
size 22317880
|
lib/onnxruntime_arm64.dylib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b885992d3d6fa4130d39ec84a80d7504ff52750027c547bb22c86165f19406a
|
| 3 |
+
size 33481272
|
lib/onnxruntime_arm64.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0afd69a0ae38c5099fd0e8604dda398ac43dee67cd9c6394b5142b19e82528de
|
| 3 |
+
size 17643256
|
melo_weights/lexicon.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d5338968f710ab4e8a32c1bd47b8957c434ff5aee28c77601e6c4d7168852fee
|
| 3 |
+
size 7033499
|
melo_weights/model.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bf30582eb1b012250a35b1a4a80e7dfbcf8485e7bb9de0d95efbbeef0e4ad86d
|
| 3 |
+
size 170429550
|
melo_weights/tokens.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be76fa98ab761477cfbe04fc0c7ccb66525c6611dd0917fbb023257b677066b1
|
| 3 |
+
size 767
|
paraformer_weights/am.mvn
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:29b3c740a2c0cfc6b308126d31d7f265fa2be74f3bb095cd2f143ea970896ae5
|
| 3 |
+
size 11203
|
paraformer_weights/model.int8.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f36a0433bcf096bd6d6f11b80a3ac8bed110bdca632fe0d731df8d1a84475945
|
| 3 |
+
size 243371218
|
paraformer_weights/tokens.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59aba8873a2ed1e122c25fee421e25f283b63290efbde85c1f01a853d83cb6e6
|
| 3 |
+
size 75756
|