mweinbach1 commited on
Commit
2b9fdf9
·
verified ·
1 Parent(s): 6999348

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizers/german_phoneme.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: onnx
3
+ pipeline_tag: text-to-speech
4
+ tags:
5
+ - onnx
6
+ - onnxruntime
7
+ - text-to-speech
8
+ - magpie-tts
9
+ - nemo
10
+ license: other
11
+ ---
12
+
13
+ # ONNX Magpie Runtime Bundle
14
+
15
+ This repository contains ONNX Runtime artifacts for a local MagpieTTS conversion.
16
+ It is intended for use with the `onnx-magpie` runtime and does not require NeMo
17
+ or PyTorch at inference time.
18
+
19
+ ## Files
20
+
21
+ - `magpie.pipeline.json`: runtime pipeline manifest.
22
+ - `magpie_text_context.onnx`: text/speaker context graph.
23
+ - `magpie_decoder_prefix.onnx`: autoregressive decoder prefix graph.
24
+ - `magpie_codec_decoder.onnx`: codec-token to waveform graph.
25
+ - `tokenizers/`: dependency-free tokenizer artifacts.
26
+ - `onnx-magpie-hub.json`: download manifest consumed by the runtime.
27
+
28
+ ## Runtime
29
+
30
+ ```powershell
31
+ python scripts/run_onnx_magpie.py --hf-repo mweinbach1/onnx-magpie --text "Hello world." --wav-output speech.wav
32
+ ```
33
+
34
+ The original MagpieTTS checkpoint is governed by NVIDIA's model license. Verify
35
+ that your use of these converted artifacts complies with the upstream terms.
magpie.export.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "format": "onnx-magpie-export-summary-v1",
3
+ "nemo_path": "magpie_tts_multilingual_357m\\magpie_tts_multilingual_357m.nemo",
4
+ "opset": 18,
5
+ "graphs": [
6
+ {
7
+ "path": "models\\magpie_text_context.onnx",
8
+ "inputs": [
9
+ "text",
10
+ "text_lens",
11
+ "speaker_indices"
12
+ ],
13
+ "outputs": [
14
+ "cond",
15
+ "cond_mask",
16
+ "additional_decoder_input",
17
+ "additional_decoder_mask"
18
+ ]
19
+ },
20
+ {
21
+ "path": "models\\magpie_decoder_prefix.onnx",
22
+ "inputs": [
23
+ "audio_tokens",
24
+ "audio_tokens_lens",
25
+ "cond",
26
+ "cond_mask",
27
+ "additional_decoder_input",
28
+ "additional_decoder_mask"
29
+ ],
30
+ "outputs": [
31
+ "next_logits",
32
+ "decoder_state"
33
+ ]
34
+ },
35
+ {
36
+ "path": "models\\magpie_codec_decoder.onnx",
37
+ "inputs": [
38
+ "codes",
39
+ "codes_lens"
40
+ ],
41
+ "outputs": [
42
+ "audio",
43
+ "audio_lens"
44
+ ]
45
+ }
46
+ ]
47
+ }
magpie.pipeline.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "format": "onnx-magpie-autoregressive-v1",
3
+ "sample_rate": 22050,
4
+ "tokenizer_path": "tokenizers\\magpie.tokenizers.json",
5
+ "graphs": {
6
+ "text_context": "magpie_text_context.onnx",
7
+ "decoder_prefix": "magpie_decoder_prefix.onnx",
8
+ "codec_decoder": "magpie_codec_decoder.onnx"
9
+ },
10
+ "generation": {
11
+ "max_decoder_steps": 500,
12
+ "min_generated_frames": 4,
13
+ "temperature": 0.6,
14
+ "topk": 80,
15
+ "num_audio_codebooks": 8,
16
+ "num_all_tokens_per_codebook": 2024,
17
+ "frame_stacking_factor": 1,
18
+ "audio_bos_id": 2016,
19
+ "audio_eos_id": 2017,
20
+ "sampling_mode": "argmax"
21
+ },
22
+ "waveform_output": "audio"
23
+ }
magpie_codec_decoder.manifest.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_path": "models\\magpie_codec_decoder.onnx",
3
+ "ir_version": 8,
4
+ "producer_name": "pytorch",
5
+ "producer_version": "2.10.0",
6
+ "opsets": [
7
+ {
8
+ "domain": "ai.onnx",
9
+ "version": 18
10
+ }
11
+ ],
12
+ "inputs": [
13
+ {
14
+ "name": "codes",
15
+ "elem_type": "INT64",
16
+ "shape": [
17
+ 1,
18
+ 8,
19
+ "codec_time"
20
+ ]
21
+ },
22
+ {
23
+ "name": "codes_lens",
24
+ "elem_type": "INT64",
25
+ "shape": [
26
+ 1
27
+ ]
28
+ }
29
+ ],
30
+ "outputs": [
31
+ {
32
+ "name": "audio",
33
+ "elem_type": "FLOAT",
34
+ "shape": [
35
+ "Reshapeaudio_dim_0",
36
+ "sample_time"
37
+ ]
38
+ },
39
+ {
40
+ "name": "audio_lens",
41
+ "elem_type": "INT64",
42
+ "shape": [
43
+ 1
44
+ ]
45
+ }
46
+ ]
47
+ }
magpie_codec_decoder.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2d12c6267d626f65cd76559c0cb420b26144bea9aeb8c3f33f530df0450ddce
3
+ size 128635008
magpie_decoder_prefix.manifest.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_path": "models\\magpie_decoder_prefix.onnx",
3
+ "ir_version": 8,
4
+ "producer_name": "pytorch",
5
+ "producer_version": "2.10.0",
6
+ "opsets": [
7
+ {
8
+ "domain": "ai.onnx",
9
+ "version": 18
10
+ }
11
+ ],
12
+ "inputs": [
13
+ {
14
+ "name": "audio_tokens",
15
+ "elem_type": "INT64",
16
+ "shape": [
17
+ 1,
18
+ 8,
19
+ "audio_time"
20
+ ]
21
+ },
22
+ {
23
+ "name": "audio_tokens_lens",
24
+ "elem_type": "INT64",
25
+ "shape": [
26
+ 1
27
+ ]
28
+ },
29
+ {
30
+ "name": "cond",
31
+ "elem_type": "FLOAT",
32
+ "shape": [
33
+ 1,
34
+ "text_time",
35
+ 768
36
+ ]
37
+ },
38
+ {
39
+ "name": "cond_mask",
40
+ "elem_type": "BOOL",
41
+ "shape": [
42
+ 1,
43
+ "text_time"
44
+ ]
45
+ },
46
+ {
47
+ "name": "additional_decoder_input",
48
+ "elem_type": "FLOAT",
49
+ "shape": [
50
+ 1,
51
+ 110,
52
+ 768
53
+ ]
54
+ },
55
+ {
56
+ "name": "additional_decoder_mask",
57
+ "elem_type": "BOOL",
58
+ "shape": [
59
+ 1,
60
+ 110
61
+ ]
62
+ }
63
+ ],
64
+ "outputs": [
65
+ {
66
+ "name": "next_logits",
67
+ "elem_type": "FLOAT",
68
+ "shape": [
69
+ "Gathernext_logits_dim_0",
70
+ 16192
71
+ ]
72
+ },
73
+ {
74
+ "name": "decoder_state",
75
+ "elem_type": "FLOAT",
76
+ "shape": [
77
+ "Gathernext_logits_dim_0",
78
+ 768
79
+ ]
80
+ }
81
+ ]
82
+ }
magpie_decoder_prefix.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:642ec9330eea5b3df3a4cbf13956d0a45cca1cca26e73fa9d35310b3362f114d
3
+ size 482141691
magpie_text_context.manifest.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_path": "models\\magpie_text_context.onnx",
3
+ "ir_version": 8,
4
+ "producer_name": "pytorch",
5
+ "producer_version": "2.10.0",
6
+ "opsets": [
7
+ {
8
+ "domain": "ai.onnx",
9
+ "version": 18
10
+ }
11
+ ],
12
+ "inputs": [
13
+ {
14
+ "name": "text",
15
+ "elem_type": "INT64",
16
+ "shape": [
17
+ 1,
18
+ "text_time"
19
+ ]
20
+ },
21
+ {
22
+ "name": "text_lens",
23
+ "elem_type": "INT64",
24
+ "shape": [
25
+ 1
26
+ ]
27
+ },
28
+ {
29
+ "name": "speaker_indices",
30
+ "elem_type": "INT64",
31
+ "shape": [
32
+ 1
33
+ ]
34
+ }
35
+ ],
36
+ "outputs": [
37
+ {
38
+ "name": "cond",
39
+ "elem_type": "FLOAT",
40
+ "shape": [
41
+ "LayerNormalizationcond_dim_0",
42
+ "text_time",
43
+ 768
44
+ ]
45
+ },
46
+ {
47
+ "name": "cond_mask",
48
+ "elem_type": "BOOL",
49
+ "shape": [
50
+ 1,
51
+ "text_time"
52
+ ]
53
+ },
54
+ {
55
+ "name": "additional_decoder_input",
56
+ "elem_type": "FLOAT",
57
+ "shape": [
58
+ 1,
59
+ 110,
60
+ 768
61
+ ]
62
+ },
63
+ {
64
+ "name": "additional_decoder_mask",
65
+ "elem_type": "BOOL",
66
+ "shape": [
67
+ 1,
68
+ "Lessadditional_decoder_mask_dim_1"
69
+ ]
70
+ }
71
+ ]
72
+ }
magpie_text_context.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:561208fd9e46fd595e13ff7715bd6f8c7158589f8551f02f4e5fc2f239e9ddc8
3
+ size 428673443
onnx-magpie-hub.json ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "format": "onnx-magpie-hub-v1",
3
+ "pipeline": "magpie.pipeline.json",
4
+ "files": [
5
+ {
6
+ "path": "README.md",
7
+ "size": 1071
8
+ },
9
+ {
10
+ "path": "magpie.export.json",
11
+ "size": 1017
12
+ },
13
+ {
14
+ "path": "magpie.pipeline.json",
15
+ "size": 660
16
+ },
17
+ {
18
+ "path": "magpie_codec_decoder.manifest.json",
19
+ "size": 780
20
+ },
21
+ {
22
+ "path": "magpie_codec_decoder.onnx",
23
+ "size": 128635008
24
+ },
25
+ {
26
+ "path": "magpie_decoder_prefix.manifest.json",
27
+ "size": 1401
28
+ },
29
+ {
30
+ "path": "magpie_decoder_prefix.onnx",
31
+ "size": 482141691
32
+ },
33
+ {
34
+ "path": "magpie_text_context.manifest.json",
35
+ "size": 1243
36
+ },
37
+ {
38
+ "path": "magpie_text_context.onnx",
39
+ "size": 428673443
40
+ },
41
+ {
42
+ "path": "onnx-magpie-hub.json",
43
+ "size": 1855
44
+ },
45
+ {
46
+ "path": "tokenizers/english_phoneme.json",
47
+ "size": 7111300
48
+ },
49
+ {
50
+ "path": "tokenizers/french_chartokenizer.json",
51
+ "size": 219
52
+ },
53
+ {
54
+ "path": "tokenizers/german_phoneme.json",
55
+ "size": 17462337
56
+ },
57
+ {
58
+ "path": "tokenizers/hindi_chartokenizer.json",
59
+ "size": 2583
60
+ },
61
+ {
62
+ "path": "tokenizers/italian_phoneme.json",
63
+ "size": 222
64
+ },
65
+ {
66
+ "path": "tokenizers/japanese_phoneme.json",
67
+ "size": 5743
68
+ },
69
+ {
70
+ "path": "tokenizers/magpie.tokenizers.json",
71
+ "size": 7493
72
+ },
73
+ {
74
+ "path": "tokenizers/mandarin_phoneme.json",
75
+ "size": 494008
76
+ },
77
+ {
78
+ "path": "tokenizers/spanish_phoneme.json",
79
+ "size": 4736224
80
+ },
81
+ {
82
+ "path": "tokenizers/text_ce_tokenizer.json",
83
+ "size": 222
84
+ },
85
+ {
86
+ "path": "tokenizers/vietnamese_phoneme.json",
87
+ "size": 222
88
+ }
89
+ ]
90
+ }
tokenizers/english_phoneme.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizers/french_chartokenizer.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "kind": "byt5",
3
+ "class_name": "ByT5Tokenizer",
4
+ "offset": 633,
5
+ "pad_id": 633,
6
+ "num_tokens": 384,
7
+ "aggregate_offset": 633,
8
+ "byte_offset": 3,
9
+ "eos_token_id": 1,
10
+ "global_eos_token_id": 2361
11
+ }
tokenizers/german_phoneme.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e22f249116339544bea0f5c7a60476867a0306cd6aa762e83f0fdf595bab3b37
3
+ size 17462337
tokenizers/hindi_chartokenizer.json ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "kind": "symbol_table",
3
+ "class_name": "HindiCharsTokenizer",
4
+ "offset": 1017,
5
+ "pad_id": 1206,
6
+ "num_tokens": 191,
7
+ "tokens": [
8
+ " ",
9
+ "अ",
10
+ "आ",
11
+ "इ",
12
+ "ई",
13
+ "उ",
14
+ "ऊ",
15
+ "ऋ",
16
+ "ॠ",
17
+ "ए",
18
+ "ऐ",
19
+ "ओ",
20
+ "औ",
21
+ "ऍ",
22
+ "ऑ",
23
+ "क",
24
+ "ख",
25
+ "ग",
26
+ "घ",
27
+ "ङ",
28
+ "च",
29
+ "छ",
30
+ "ज",
31
+ "झ",
32
+ "ञ",
33
+ "ट",
34
+ "ठ",
35
+ "ड",
36
+ "ढ",
37
+ "ण",
38
+ "त",
39
+ "थ",
40
+ "द",
41
+ "ध",
42
+ "न",
43
+ "प",
44
+ "फ",
45
+ "ब",
46
+ "भ",
47
+ "म",
48
+ "य",
49
+ "र",
50
+ "ल",
51
+ "व",
52
+ "श",
53
+ "ष",
54
+ "स",
55
+ "ह",
56
+ "ळ",
57
+ "ऩ",
58
+ "ऱ",
59
+ "ा",
60
+ "ि",
61
+ "ी",
62
+ "ु",
63
+ "ू",
64
+ "ृ",
65
+ "ॄ",
66
+ "े",
67
+ "ै",
68
+ "ो",
69
+ "ौ",
70
+ "ॅ",
71
+ "ॉ",
72
+ "ँ",
73
+ "ं",
74
+ "ः",
75
+ "्",
76
+ "़",
77
+ "ॊ",
78
+ "ॢ",
79
+ "ॣ",
80
+ "ॆ",
81
+ "।",
82
+ "अ",
83
+ "आ",
84
+ "इ",
85
+ "ई",
86
+ "उ",
87
+ "ऊ",
88
+ "ऋ",
89
+ "ॠ",
90
+ "ए",
91
+ "ऐ",
92
+ "ओ",
93
+ "औ",
94
+ "ऍ",
95
+ "ऑ",
96
+ "क",
97
+ "ख",
98
+ "ग",
99
+ "घ",
100
+ "ङ",
101
+ "च",
102
+ "छ",
103
+ "ज",
104
+ "झ",
105
+ "ञ",
106
+ "ट",
107
+ "ठ",
108
+ "ड",
109
+ "ढ",
110
+ "ण",
111
+ "त",
112
+ "थ",
113
+ "द",
114
+ "ध",
115
+ "न",
116
+ "प",
117
+ "फ",
118
+ "ब",
119
+ "भ",
120
+ "म",
121
+ "य",
122
+ "र",
123
+ "ल",
124
+ "व",
125
+ "श",
126
+ "ष",
127
+ "स",
128
+ "ह",
129
+ "ळ",
130
+ "ऩ",
131
+ "ऱ",
132
+ "ा",
133
+ "ि",
134
+ "ी",
135
+ "ु",
136
+ "ू",
137
+ "ृ",
138
+ "ॄ",
139
+ "े",
140
+ "ै",
141
+ "ो",
142
+ "ौ",
143
+ "ॅ",
144
+ "ॉ",
145
+ "ँ",
146
+ "ं",
147
+ "ः",
148
+ "्",
149
+ "़",
150
+ "ॊ",
151
+ "ॢ",
152
+ "ॣ",
153
+ "ॆ",
154
+ "।",
155
+ "a",
156
+ "b",
157
+ "c",
158
+ "d",
159
+ "e",
160
+ "f",
161
+ "g",
162
+ "h",
163
+ "i",
164
+ "j",
165
+ "k",
166
+ "l",
167
+ "m",
168
+ "n",
169
+ "o",
170
+ "p",
171
+ "q",
172
+ "r",
173
+ "s",
174
+ "t",
175
+ "u",
176
+ "v",
177
+ "w",
178
+ "x",
179
+ "y",
180
+ "z",
181
+ "'",
182
+ "!",
183
+ "\"",
184
+ "(",
185
+ ")",
186
+ ",",
187
+ "-",
188
+ ".",
189
+ "/",
190
+ ":",
191
+ ";",
192
+ "?",
193
+ "[",
194
+ "]",
195
+ "{",
196
+ "}",
197
+ "<pad>",
198
+ "<oov>"
199
+ ],
200
+ "pad_with_space": true,
201
+ "punct": true,
202
+ "punct_list": [
203
+ "!",
204
+ "\"",
205
+ "(",
206
+ ")",
207
+ ",",
208
+ "-",
209
+ ".",
210
+ "/",
211
+ ":",
212
+ ";",
213
+ "?",
214
+ "[",
215
+ "]",
216
+ "{",
217
+ "}"
218
+ ]
219
+ }
tokenizers/italian_phoneme.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "kind": "byt5",
3
+ "class_name": "ByT5Tokenizer",
4
+ "offset": 1208,
5
+ "pad_id": 1208,
6
+ "num_tokens": 384,
7
+ "aggregate_offset": 1208,
8
+ "byte_offset": 3,
9
+ "eos_token_id": 1,
10
+ "global_eos_token_id": 2361
11
+ }
tokenizers/japanese_phoneme.json ADDED
@@ -0,0 +1,435 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "kind": "symbol_table",
3
+ "class_name": "JapanesePhonemeTokenizer",
4
+ "offset": 458,
5
+ "pad_id": 631,
6
+ "num_tokens": 175,
7
+ "tokens": [
8
+ " ",
9
+ "0",
10
+ "1",
11
+ "ァ",
12
+ "ア",
13
+ "ィ",
14
+ "イ",
15
+ "ゥ",
16
+ "ウ",
17
+ "ェ",
18
+ "エ",
19
+ "ォ",
20
+ "オ",
21
+ "カ",
22
+ "ガ",
23
+ "キ",
24
+ "ギ",
25
+ "ク",
26
+ "グ",
27
+ "ケ",
28
+ "ゲ",
29
+ "コ",
30
+ "ゴ",
31
+ "サ",
32
+ "ザ",
33
+ "シ",
34
+ "ジ",
35
+ "ス",
36
+ "ズ",
37
+ "セ",
38
+ "ゼ",
39
+ "ソ",
40
+ "ゾ",
41
+ "タ",
42
+ "ダ",
43
+ "チ",
44
+ "ヂ",
45
+ "ッ",
46
+ "ツ",
47
+ "ヅ",
48
+ "テ",
49
+ "デ",
50
+ "ト",
51
+ "ド",
52
+ "ナ",
53
+ "ニ",
54
+ "ヌ",
55
+ "ネ",
56
+ "ノ",
57
+ "ハ",
58
+ "バ",
59
+ "パ",
60
+ "ヒ",
61
+ "ビ",
62
+ "ピ",
63
+ "フ",
64
+ "ブ",
65
+ "プ",
66
+ "ヘ",
67
+ "ベ",
68
+ "ペ",
69
+ "ホ",
70
+ "ボ",
71
+ "ポ",
72
+ "マ",
73
+ "ミ",
74
+ "ム",
75
+ "メ",
76
+ "モ",
77
+ "ャ",
78
+ "ヤ",
79
+ "ュ",
80
+ "ユ",
81
+ "ョ",
82
+ "ヨ",
83
+ "ラ",
84
+ "リ",
85
+ "ル",
86
+ "レ",
87
+ "ロ",
88
+ "ヮ",
89
+ "ワ",
90
+ "ヲ",
91
+ "ン",
92
+ "ヴ",
93
+ "ヵ",
94
+ "ヶ",
95
+ "ー",
96
+ "A",
97
+ "B",
98
+ "C",
99
+ "D",
100
+ "E",
101
+ "F",
102
+ "G",
103
+ "H",
104
+ "I",
105
+ "J",
106
+ "K",
107
+ "L",
108
+ "M",
109
+ "N",
110
+ "O",
111
+ "P",
112
+ "Q",
113
+ "R",
114
+ "S",
115
+ "T",
116
+ "U",
117
+ "V",
118
+ "W",
119
+ "X",
120
+ "Y",
121
+ "Z",
122
+ "!",
123
+ "\"",
124
+ "(",
125
+ ")",
126
+ ",",
127
+ "-",
128
+ ".",
129
+ "/",
130
+ ":",
131
+ ";",
132
+ "?",
133
+ "[",
134
+ "]",
135
+ "{",
136
+ "}",
137
+ "«",
138
+ "»",
139
+ "•",
140
+ "‥",
141
+ "…",
142
+ "‹",
143
+ "›",
144
+ "※",
145
+ "◦",
146
+ "、",
147
+ "。",
148
+ "〃",
149
+ "〈",
150
+ "〉",
151
+ "《",
152
+ "》",
153
+ "「",
154
+ "」",
155
+ "『",
156
+ "』",
157
+ "【",
158
+ "】",
159
+ "〒",
160
+ "〓",
161
+ "〔",
162
+ "〕",
163
+ "〖",
164
+ "〗",
165
+ "〘",
166
+ "〙",
167
+ "〚",
168
+ "〛",
169
+ "〜",
170
+ "〽",
171
+ "・",
172
+ "・・・",
173
+ "ー",
174
+ "﹅",
175
+ "﹆",
176
+ "!",
177
+ "*",
178
+ "?",
179
+ "⦅",
180
+ "⦆",
181
+ "<pad>",
182
+ "<oov>"
183
+ ],
184
+ "japanese": {
185
+ "pad_with_space": true,
186
+ "punct": true,
187
+ "punct_list": [
188
+ "!",
189
+ "\"",
190
+ "(",
191
+ ")",
192
+ ",",
193
+ "-",
194
+ ".",
195
+ "/",
196
+ ":",
197
+ ";",
198
+ "?",
199
+ "[",
200
+ "]",
201
+ "{",
202
+ "}",
203
+ "«",
204
+ "»",
205
+ "•",
206
+ "‥",
207
+ "…",
208
+ "‹",
209
+ "›",
210
+ "※",
211
+ "◦",
212
+ "、",
213
+ "。",
214
+ "〃",
215
+ "〈",
216
+ "〉",
217
+ "《",
218
+ "》",
219
+ "「",
220
+ "」",
221
+ "『",
222
+ "』",
223
+ "【",
224
+ "】",
225
+ "〒",
226
+ "〓",
227
+ "〔",
228
+ "〕",
229
+ "〖",
230
+ "〗",
231
+ "〘",
232
+ "〙",
233
+ "〚",
234
+ "〛",
235
+ "〜",
236
+ "〽",
237
+ "・",
238
+ "・・・",
239
+ "ー",
240
+ "﹅",
241
+ "﹆",
242
+ "!",
243
+ "*",
244
+ "?",
245
+ "⦅",
246
+ "⦆"
247
+ ],
248
+ "phoneme_list": [
249
+ "0",
250
+ "1",
251
+ "ァ",
252
+ "ア",
253
+ "ィ",
254
+ "イ",
255
+ "ゥ",
256
+ "ウ",
257
+ "ェ",
258
+ "エ",
259
+ "ォ",
260
+ "オ",
261
+ "カ",
262
+ "ガ",
263
+ "キ",
264
+ "ギ",
265
+ "ク",
266
+ "グ",
267
+ "ケ",
268
+ "ゲ",
269
+ "コ",
270
+ "ゴ",
271
+ "サ",
272
+ "ザ",
273
+ "シ",
274
+ "ジ",
275
+ "ス",
276
+ "ズ",
277
+ "セ",
278
+ "ゼ",
279
+ "ソ",
280
+ "ゾ",
281
+ "タ",
282
+ "ダ",
283
+ "チ",
284
+ "ヂ",
285
+ "ッ",
286
+ "ツ",
287
+ "ヅ",
288
+ "テ",
289
+ "デ",
290
+ "ト",
291
+ "ド",
292
+ "ナ",
293
+ "ニ",
294
+ "ヌ",
295
+ "ネ",
296
+ "ノ",
297
+ "ハ",
298
+ "バ",
299
+ "パ",
300
+ "ヒ",
301
+ "ビ",
302
+ "ピ",
303
+ "フ",
304
+ "ブ",
305
+ "プ",
306
+ "ヘ",
307
+ "ベ",
308
+ "ペ",
309
+ "ホ",
310
+ "ボ",
311
+ "ポ",
312
+ "マ",
313
+ "ミ",
314
+ "ム",
315
+ "メ",
316
+ "モ",
317
+ "ャ",
318
+ "ヤ",
319
+ "ュ",
320
+ "ユ",
321
+ "ョ",
322
+ "ヨ",
323
+ "ラ",
324
+ "リ",
325
+ "ル",
326
+ "レ",
327
+ "ロ",
328
+ "ヮ",
329
+ "ワ",
330
+ "ヲ",
331
+ "ン",
332
+ "ヴ",
333
+ "ヵ",
334
+ "ヶ",
335
+ "ー"
336
+ ],
337
+ "ascii_letter_list": [
338
+ "A",
339
+ "B",
340
+ "C",
341
+ "D",
342
+ "E",
343
+ "F",
344
+ "G",
345
+ "H",
346
+ "I",
347
+ "J",
348
+ "K",
349
+ "L",
350
+ "M",
351
+ "N",
352
+ "O",
353
+ "P",
354
+ "Q",
355
+ "R",
356
+ "S",
357
+ "T",
358
+ "U",
359
+ "V",
360
+ "W",
361
+ "X",
362
+ "Y",
363
+ "Z"
364
+ ],
365
+ "g2p_cache": {
366
+ "こんにちは。": [
367
+ "0",
368
+ "コ",
369
+ "1",
370
+ "ン",
371
+ "1",
372
+ "ニ",
373
+ "1",
374
+ "チ",
375
+ "1",
376
+ "ワ",
377
+ "。"
378
+ ],
379
+ "コンニチハ。": [
380
+ "0",
381
+ "コ",
382
+ "1",
383
+ "ン",
384
+ "1",
385
+ "ニ",
386
+ "0",
387
+ "チ",
388
+ "0",
389
+ "ハ",
390
+ "。"
391
+ ],
392
+ "世界。": [
393
+ "1",
394
+ "セ",
395
+ "0",
396
+ "カ",
397
+ "0",
398
+ "イ",
399
+ "。"
400
+ ],
401
+ "こんにちはONNX Magpieの世界。": [
402
+ "0",
403
+ "コ",
404
+ "1",
405
+ "ン",
406
+ "1",
407
+ "ニ",
408
+ "1",
409
+ "チ",
410
+ "1",
411
+ "ワ",
412
+ "O",
413
+ "N",
414
+ "N",
415
+ "X",
416
+ " ",
417
+ "M",
418
+ "A",
419
+ "G",
420
+ "P",
421
+ "I",
422
+ "E",
423
+ "0",
424
+ "ノ",
425
+ "1",
426
+ "セ",
427
+ "0",
428
+ "カ",
429
+ "0",
430
+ "イ",
431
+ "。"
432
+ ]
433
+ }
434
+ }
435
+ }
tokenizers/magpie.tokenizers.json ADDED
@@ -0,0 +1,407 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "format": "onnx-magpie-tokenizers-v1",
3
+ "source": {
4
+ "nemo_path": "magpie_tts_multilingual_357m\\magpie_tts_multilingual_357m.nemo",
5
+ "nemo_commit": "9fc8b801177515117be811f6f77f89dbb080ae41",
6
+ "eos_token_id": 2361
7
+ },
8
+ "language_to_tokenizer": {
9
+ "en": "english_phoneme",
10
+ "es": "spanish_phoneme",
11
+ "de": "german_phoneme",
12
+ "fr": "french_chartokenizer",
13
+ "it": "italian_phoneme",
14
+ "vi": "vietnamese_phoneme",
15
+ "zh": "mandarin_phoneme",
16
+ "hi": "hindi_chartokenizer",
17
+ "ja": "japanese_phoneme"
18
+ },
19
+ "tokenizer_files": {
20
+ "english_phoneme": "english_phoneme.json",
21
+ "spanish_phoneme": "spanish_phoneme.json",
22
+ "german_phoneme": "german_phoneme.json",
23
+ "mandarin_phoneme": "mandarin_phoneme.json",
24
+ "japanese_phoneme": "japanese_phoneme.json",
25
+ "french_chartokenizer": "french_chartokenizer.json",
26
+ "hindi_chartokenizer": "hindi_chartokenizer.json",
27
+ "italian_phoneme": "italian_phoneme.json",
28
+ "vietnamese_phoneme": "vietnamese_phoneme.json",
29
+ "text_ce_tokenizer": "text_ce_tokenizer.json"
30
+ },
31
+ "fixtures": [
32
+ {
33
+ "language": "en",
34
+ "text": "Hello world from ONNX Magpie.",
35
+ "sample_text": "Hello world from ONNX Magpie.",
36
+ "chunk_index": 0,
37
+ "tokenizer": "english_phoneme",
38
+ "ids": [
39
+ 55,
40
+ 79,
41
+ 90,
42
+ 59,
43
+ 62,
44
+ 87,
45
+ 93,
46
+ 90,
47
+ 68,
48
+ 82,
49
+ 59,
50
+ 52,
51
+ 93,
52
+ 27,
53
+ 39,
54
+ 36,
55
+ 34,
56
+ 93,
57
+ 36,
58
+ 35,
59
+ 35,
60
+ 45,
61
+ 93,
62
+ 34,
63
+ 22,
64
+ 28,
65
+ 37,
66
+ 30,
67
+ 26,
68
+ 7,
69
+ 2361
70
+ ]
71
+ },
72
+ {
73
+ "language": "es",
74
+ "text": "Hola mundo desde ONNX Magpie.",
75
+ "sample_text": "Hola mundo desde ONNX Magpie.",
76
+ "chunk_index": 0,
77
+ "tokenizer": "spanish_phoneme",
78
+ "ids": [
79
+ 196,
80
+ 187,
81
+ 148,
82
+ 145,
83
+ 136,
84
+ 196,
85
+ 120,
86
+ 128,
87
+ 121,
88
+ 111,
89
+ 122,
90
+ 196,
91
+ 138,
92
+ 187,
93
+ 139,
94
+ 151,
95
+ 173,
96
+ 139,
97
+ 196,
98
+ 122,
99
+ 121,
100
+ 121,
101
+ 131,
102
+ 196,
103
+ 120,
104
+ 108,
105
+ 114,
106
+ 123,
107
+ 116,
108
+ 112,
109
+ 103,
110
+ 196,
111
+ 2361
112
+ ]
113
+ },
114
+ {
115
+ "language": "de",
116
+ "text": "Hallo Welt von ONNX Magpie.",
117
+ "sample_text": "Hallo Welt von ONNX Magpie.",
118
+ "chunk_index": 0,
119
+ "tokenizer": "german_phoneme",
120
+ "ids": [
121
+ 346,
122
+ 290,
123
+ 332,
124
+ 285,
125
+ 294,
126
+ 297,
127
+ 334,
128
+ 346,
129
+ 303,
130
+ 332,
131
+ 322,
132
+ 294,
133
+ 301,
134
+ 346,
135
+ 248,
136
+ 241,
137
+ 240,
138
+ 346,
139
+ 215,
140
+ 214,
141
+ 214,
142
+ 224,
143
+ 346,
144
+ 213,
145
+ 227,
146
+ 233,
147
+ 242,
148
+ 235,
149
+ 231,
150
+ 277,
151
+ 346,
152
+ 2361
153
+ ]
154
+ },
155
+ {
156
+ "language": "fr",
157
+ "text": "Bonjour le monde depuis ONNX Magpie.",
158
+ "sample_text": "Bonjour le monde depuis ONNX Magpie.",
159
+ "chunk_index": 0,
160
+ "tokenizer": "french_chartokenizer",
161
+ "ids": [
162
+ 702,
163
+ 747,
164
+ 746,
165
+ 742,
166
+ 747,
167
+ 753,
168
+ 750,
169
+ 668,
170
+ 744,
171
+ 737,
172
+ 668,
173
+ 745,
174
+ 747,
175
+ 746,
176
+ 736,
177
+ 737,
178
+ 668,
179
+ 736,
180
+ 737,
181
+ 748,
182
+ 753,
183
+ 741,
184
+ 751,
185
+ 668,
186
+ 715,
187
+ 714,
188
+ 714,
189
+ 724,
190
+ 668,
191
+ 713,
192
+ 733,
193
+ 739,
194
+ 748,
195
+ 741,
196
+ 737,
197
+ 682,
198
+ 634,
199
+ 2361
200
+ ]
201
+ },
202
+ {
203
+ "language": "it",
204
+ "text": "Ciao mondo da ONNX Magpie.",
205
+ "sample_text": "Ciao mondo da ONNX Magpie.",
206
+ "chunk_index": 0,
207
+ "tokenizer": "italian_phoneme",
208
+ "ids": [
209
+ 1278,
210
+ 1316,
211
+ 1308,
212
+ 1322,
213
+ 1243,
214
+ 1320,
215
+ 1322,
216
+ 1321,
217
+ 1311,
218
+ 1322,
219
+ 1243,
220
+ 1311,
221
+ 1308,
222
+ 1243,
223
+ 1290,
224
+ 1289,
225
+ 1289,
226
+ 1299,
227
+ 1243,
228
+ 1288,
229
+ 1308,
230
+ 1314,
231
+ 1323,
232
+ 1316,
233
+ 1312,
234
+ 1257,
235
+ 1209,
236
+ 2361
237
+ ]
238
+ },
239
+ {
240
+ "language": "vi",
241
+ "text": "Xin chào thế giới từ ONNX Magpie.",
242
+ "sample_text": "Xin chào thế giới từ ONNX Magpie.",
243
+ "chunk_index": 0,
244
+ "tokenizer": "vietnamese_phoneme",
245
+ "ids": [
246
+ 1683,
247
+ 1700,
248
+ 1705,
249
+ 1627,
250
+ 1694,
251
+ 1699,
252
+ 1790,
253
+ 1755,
254
+ 1706,
255
+ 1627,
256
+ 1711,
257
+ 1699,
258
+ 1820,
259
+ 1781,
260
+ 1786,
261
+ 1627,
262
+ 1698,
263
+ 1700,
264
+ 1820,
265
+ 1782,
266
+ 1750,
267
+ 1700,
268
+ 1627,
269
+ 1711,
270
+ 1820,
271
+ 1782,
272
+ 1766,
273
+ 1627,
274
+ 1674,
275
+ 1673,
276
+ 1673,
277
+ 1683,
278
+ 1627,
279
+ 1672,
280
+ 1692,
281
+ 1698,
282
+ 1707,
283
+ 1700,
284
+ 1696,
285
+ 1641,
286
+ 1593,
287
+ 2361
288
+ ]
289
+ },
290
+ {
291
+ "language": "zh",
292
+ "text": "你好,ONNX Magpie 世界。",
293
+ "sample_text": "你好,ONNX Magpie 世界。",
294
+ "chunk_index": 0,
295
+ "tokenizer": "mandarin_phoneme",
296
+ "ids": [
297
+ 349,
298
+ 362,
299
+ 356,
300
+ 392,
301
+ 376,
302
+ 352,
303
+ 392,
304
+ 422,
305
+ 409,
306
+ 408,
307
+ 408,
308
+ 418,
309
+ 349,
310
+ 407,
311
+ 395,
312
+ 401,
313
+ 410,
314
+ 403,
315
+ 399,
316
+ 349,
317
+ 385,
318
+ 356,
319
+ 393,
320
+ 371,
321
+ 357,
322
+ 353,
323
+ 393,
324
+ 423,
325
+ 349,
326
+ 2361
327
+ ]
328
+ },
329
+ {
330
+ "language": "hi",
331
+ "text": "नमस्ते दुनिया ONNX Magpie से।",
332
+ "sample_text": "नमस्ते दुनिया ONNX Magpie से।",
333
+ "chunk_index": 0,
334
+ "tokenizer": "hindi_chartokenizer",
335
+ "ids": [
336
+ 1017,
337
+ 1124,
338
+ 1129,
339
+ 1136,
340
+ 1157,
341
+ 1120,
342
+ 1148,
343
+ 1017,
344
+ 1122,
345
+ 1144,
346
+ 1124,
347
+ 1142,
348
+ 1130,
349
+ 1141,
350
+ 1017,
351
+ 1164,
352
+ 1170,
353
+ 1179,
354
+ 1172,
355
+ 1168,
356
+ 1017,
357
+ 1136,
358
+ 1148,
359
+ 1163,
360
+ 1017,
361
+ 2361
362
+ ]
363
+ },
364
+ {
365
+ "language": "ja",
366
+ "text": "こんにちはONNX Magpieの世界。",
367
+ "sample_text": "こんにちはONNX Magpieの世界。",
368
+ "chunk_index": 0,
369
+ "tokenizer": "japanese_phoneme",
370
+ "ids": [
371
+ 458,
372
+ 459,
373
+ 479,
374
+ 460,
375
+ 541,
376
+ 460,
377
+ 503,
378
+ 460,
379
+ 493,
380
+ 460,
381
+ 539,
382
+ 560,
383
+ 559,
384
+ 559,
385
+ 569,
386
+ 458,
387
+ 558,
388
+ 546,
389
+ 552,
390
+ 561,
391
+ 554,
392
+ 550,
393
+ 459,
394
+ 506,
395
+ 460,
396
+ 487,
397
+ 459,
398
+ 471,
399
+ 459,
400
+ 464,
401
+ 597,
402
+ 458,
403
+ 2361
404
+ ]
405
+ }
406
+ ]
407
+ }
tokenizers/mandarin_phoneme.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizers/spanish_phoneme.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizers/text_ce_tokenizer.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "kind": "byt5",
3
+ "class_name": "ByT5Tokenizer",
4
+ "offset": 1976,
5
+ "pad_id": 1976,
6
+ "num_tokens": 384,
7
+ "aggregate_offset": 1976,
8
+ "byte_offset": 3,
9
+ "eos_token_id": 1,
10
+ "global_eos_token_id": 2361
11
+ }
tokenizers/vietnamese_phoneme.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "kind": "byt5",
3
+ "class_name": "ByT5Tokenizer",
4
+ "offset": 1592,
5
+ "pad_id": 1592,
6
+ "num_tokens": 384,
7
+ "aggregate_offset": 1592,
8
+ "byte_offset": 3,
9
+ "eos_token_id": 1,
10
+ "global_eos_token_id": 2361
11
+ }