Text-to-Speech
LiteRT
LiteRT
tts
voice-cloning
voice-design
diffusion
on-device
soniqo
speech-cloud
speech-core
Instructions to use soniqo/VoxCPM2-LiteRT-INT8 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- LiteRT
How to use soniqo/VoxCPM2-LiteRT-INT8 with LiteRT:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
Update VoxCPM2 LiteRT bundle to 512 text tokens
Browse files- config.json +11 -11
- voxcpm2-text-prefill.tflite +2 -2
- voxcpm2-token-step.tflite +2 -2
config.json
CHANGED
|
@@ -216,7 +216,7 @@
|
|
| 216 |
"sample_rate": 48000,
|
| 217 |
"audio_conditioning_sample_rate": 16000,
|
| 218 |
"text_tokenizer": "tokenizer.json",
|
| 219 |
-
"max_text_tokens":
|
| 220 |
"max_generated_tokens": 2048,
|
| 221 |
"default_inference_timesteps": 10,
|
| 222 |
"default_cfg_value": 2.0,
|
|
@@ -239,7 +239,7 @@
|
|
| 239 |
"rank": 2,
|
| 240 |
"shape": [
|
| 241 |
1,
|
| 242 |
-
|
| 243 |
],
|
| 244 |
"name": "serving_default_args_0"
|
| 245 |
},
|
|
@@ -248,7 +248,7 @@
|
|
| 248 |
"rank": 2,
|
| 249 |
"shape": [
|
| 250 |
1,
|
| 251 |
-
|
| 252 |
],
|
| 253 |
"name": "serving_default_args_1"
|
| 254 |
},
|
|
@@ -257,7 +257,7 @@
|
|
| 257 |
"rank": 4,
|
| 258 |
"shape": [
|
| 259 |
1,
|
| 260 |
-
|
| 261 |
4,
|
| 262 |
64
|
| 263 |
],
|
|
@@ -268,7 +268,7 @@
|
|
| 268 |
"rank": 2,
|
| 269 |
"shape": [
|
| 270 |
1,
|
| 271 |
-
|
| 272 |
],
|
| 273 |
"name": "serving_default_args_3"
|
| 274 |
}
|
|
@@ -310,7 +310,7 @@
|
|
| 310 |
28,
|
| 311 |
1,
|
| 312 |
2,
|
| 313 |
-
|
| 314 |
128
|
| 315 |
],
|
| 316 |
"name": "serving_default_args_3"
|
|
@@ -323,7 +323,7 @@
|
|
| 323 |
8,
|
| 324 |
1,
|
| 325 |
2,
|
| 326 |
-
|
| 327 |
128
|
| 328 |
],
|
| 329 |
"name": "serving_default_args_4"
|
|
@@ -407,7 +407,7 @@
|
|
| 407 |
28,
|
| 408 |
1,
|
| 409 |
2,
|
| 410 |
-
|
| 411 |
128
|
| 412 |
],
|
| 413 |
"name": "serving_default_output_3_output"
|
|
@@ -420,7 +420,7 @@
|
|
| 420 |
8,
|
| 421 |
1,
|
| 422 |
2,
|
| 423 |
-
|
| 424 |
128
|
| 425 |
],
|
| 426 |
"name": "serving_default_output_4_output"
|
|
@@ -472,7 +472,7 @@
|
|
| 472 |
28,
|
| 473 |
1,
|
| 474 |
2,
|
| 475 |
-
|
| 476 |
128
|
| 477 |
],
|
| 478 |
"name": "serving_default_output_4_output"
|
|
@@ -485,7 +485,7 @@
|
|
| 485 |
8,
|
| 486 |
1,
|
| 487 |
2,
|
| 488 |
-
|
| 489 |
128
|
| 490 |
],
|
| 491 |
"name": "serving_default_output_5_output"
|
|
|
|
| 216 |
"sample_rate": 48000,
|
| 217 |
"audio_conditioning_sample_rate": 16000,
|
| 218 |
"text_tokenizer": "tokenizer.json",
|
| 219 |
+
"max_text_tokens": 512,
|
| 220 |
"max_generated_tokens": 2048,
|
| 221 |
"default_inference_timesteps": 10,
|
| 222 |
"default_cfg_value": 2.0,
|
|
|
|
| 239 |
"rank": 2,
|
| 240 |
"shape": [
|
| 241 |
1,
|
| 242 |
+
512
|
| 243 |
],
|
| 244 |
"name": "serving_default_args_0"
|
| 245 |
},
|
|
|
|
| 248 |
"rank": 2,
|
| 249 |
"shape": [
|
| 250 |
1,
|
| 251 |
+
512
|
| 252 |
],
|
| 253 |
"name": "serving_default_args_1"
|
| 254 |
},
|
|
|
|
| 257 |
"rank": 4,
|
| 258 |
"shape": [
|
| 259 |
1,
|
| 260 |
+
512,
|
| 261 |
4,
|
| 262 |
64
|
| 263 |
],
|
|
|
|
| 268 |
"rank": 2,
|
| 269 |
"shape": [
|
| 270 |
1,
|
| 271 |
+
512
|
| 272 |
],
|
| 273 |
"name": "serving_default_args_3"
|
| 274 |
}
|
|
|
|
| 310 |
28,
|
| 311 |
1,
|
| 312 |
2,
|
| 313 |
+
2560,
|
| 314 |
128
|
| 315 |
],
|
| 316 |
"name": "serving_default_args_3"
|
|
|
|
| 323 |
8,
|
| 324 |
1,
|
| 325 |
2,
|
| 326 |
+
2560,
|
| 327 |
128
|
| 328 |
],
|
| 329 |
"name": "serving_default_args_4"
|
|
|
|
| 407 |
28,
|
| 408 |
1,
|
| 409 |
2,
|
| 410 |
+
512,
|
| 411 |
128
|
| 412 |
],
|
| 413 |
"name": "serving_default_output_3_output"
|
|
|
|
| 420 |
8,
|
| 421 |
1,
|
| 422 |
2,
|
| 423 |
+
512,
|
| 424 |
128
|
| 425 |
],
|
| 426 |
"name": "serving_default_output_4_output"
|
|
|
|
| 472 |
28,
|
| 473 |
1,
|
| 474 |
2,
|
| 475 |
+
2560,
|
| 476 |
128
|
| 477 |
],
|
| 478 |
"name": "serving_default_output_4_output"
|
|
|
|
| 485 |
8,
|
| 486 |
1,
|
| 487 |
2,
|
| 488 |
+
2560,
|
| 489 |
128
|
| 490 |
],
|
| 491 |
"name": "serving_default_output_5_output"
|
voxcpm2-text-prefill.tflite
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:de284d10f210bb2f63568f5ec503b11a8d4b6cc8e1843764b0e1181df13060ab
|
| 3 |
+
size 2083492400
|
voxcpm2-token-step.tflite
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:400937203f24c2f0be073398b9e9790da373ec0de5f36d07d14618bb65087d4f
|
| 3 |
+
size 2189489680
|