Text-to-Speech
LiteRT
LiteRT
tts
voice-cloning
voice-design
diffusion
on-device
soniqo
speech-cloud
speech-core
Instructions to use soniqo/VoxCPM2-LiteRT-INT8 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- LiteRT
How to use soniqo/VoxCPM2-LiteRT-INT8 with LiteRT:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
INT8 prefill: 7.7 GB FP32 → 1.9 GB INT8 weight-only; audio_encoder/decoder stay FP32 (ai_edge_quantizer rejects Conv ops). Total bundle 10 GB → 4.3 GB, runtime working set 13 GiB → ~5-6 GiB.
Browse files- config.json +12 -12
- voxcpm2-text-prefill.tflite +2 -2
- voxcpm2-text-prefill_recipe.json +1 -0
- voxcpm2-token-step.tflite +2 -2
- voxcpm2-token-step_recipe.json +1 -0
config.json
CHANGED
|
@@ -216,7 +216,7 @@
|
|
| 216 |
"sample_rate": 48000,
|
| 217 |
"audio_conditioning_sample_rate": 16000,
|
| 218 |
"text_tokenizer": "tokenizer.json",
|
| 219 |
-
"max_text_tokens":
|
| 220 |
"max_generated_tokens": 2048,
|
| 221 |
"default_inference_timesteps": 10,
|
| 222 |
"default_cfg_value": 2.0,
|
|
@@ -230,7 +230,7 @@
|
|
| 230 |
"source_repo": "openbmb/VoxCPM2",
|
| 231 |
"voxcpm_src_required": true,
|
| 232 |
"graph_variants": {
|
| 233 |
-
"text_prefill": "
|
| 234 |
"token_step": "int8",
|
| 235 |
"audio_encoder": "fp32",
|
| 236 |
"audio_decoder": "fp32"
|
|
@@ -245,7 +245,7 @@
|
|
| 245 |
"rank": 2,
|
| 246 |
"shape": [
|
| 247 |
1,
|
| 248 |
-
|
| 249 |
],
|
| 250 |
"name": "serving_default_args_0"
|
| 251 |
},
|
|
@@ -254,7 +254,7 @@
|
|
| 254 |
"rank": 2,
|
| 255 |
"shape": [
|
| 256 |
1,
|
| 257 |
-
|
| 258 |
],
|
| 259 |
"name": "serving_default_args_1"
|
| 260 |
},
|
|
@@ -263,7 +263,7 @@
|
|
| 263 |
"rank": 4,
|
| 264 |
"shape": [
|
| 265 |
1,
|
| 266 |
-
|
| 267 |
4,
|
| 268 |
64
|
| 269 |
],
|
|
@@ -274,7 +274,7 @@
|
|
| 274 |
"rank": 2,
|
| 275 |
"shape": [
|
| 276 |
1,
|
| 277 |
-
|
| 278 |
],
|
| 279 |
"name": "serving_default_args_3"
|
| 280 |
},
|
|
@@ -322,7 +322,7 @@
|
|
| 322 |
28,
|
| 323 |
1,
|
| 324 |
2,
|
| 325 |
-
|
| 326 |
128
|
| 327 |
],
|
| 328 |
"name": "serving_default_args_3"
|
|
@@ -335,7 +335,7 @@
|
|
| 335 |
8,
|
| 336 |
1,
|
| 337 |
2,
|
| 338 |
-
|
| 339 |
128
|
| 340 |
],
|
| 341 |
"name": "serving_default_args_4"
|
|
@@ -419,7 +419,7 @@
|
|
| 419 |
28,
|
| 420 |
1,
|
| 421 |
2,
|
| 422 |
-
|
| 423 |
128
|
| 424 |
],
|
| 425 |
"name": "serving_default_output_3_output"
|
|
@@ -432,7 +432,7 @@
|
|
| 432 |
8,
|
| 433 |
1,
|
| 434 |
2,
|
| 435 |
-
|
| 436 |
128
|
| 437 |
],
|
| 438 |
"name": "serving_default_output_4_output"
|
|
@@ -484,7 +484,7 @@
|
|
| 484 |
28,
|
| 485 |
1,
|
| 486 |
2,
|
| 487 |
-
|
| 488 |
128
|
| 489 |
],
|
| 490 |
"name": "serving_default_output_4_output"
|
|
@@ -497,7 +497,7 @@
|
|
| 497 |
8,
|
| 498 |
1,
|
| 499 |
2,
|
| 500 |
-
|
| 501 |
128
|
| 502 |
],
|
| 503 |
"name": "serving_default_output_5_output"
|
|
|
|
| 216 |
"sample_rate": 48000,
|
| 217 |
"audio_conditioning_sample_rate": 16000,
|
| 218 |
"text_tokenizer": "tokenizer.json",
|
| 219 |
+
"max_text_tokens": 256,
|
| 220 |
"max_generated_tokens": 2048,
|
| 221 |
"default_inference_timesteps": 10,
|
| 222 |
"default_cfg_value": 2.0,
|
|
|
|
| 230 |
"source_repo": "openbmb/VoxCPM2",
|
| 231 |
"voxcpm_src_required": true,
|
| 232 |
"graph_variants": {
|
| 233 |
+
"text_prefill": "int8",
|
| 234 |
"token_step": "int8",
|
| 235 |
"audio_encoder": "fp32",
|
| 236 |
"audio_decoder": "fp32"
|
|
|
|
| 245 |
"rank": 2,
|
| 246 |
"shape": [
|
| 247 |
1,
|
| 248 |
+
256
|
| 249 |
],
|
| 250 |
"name": "serving_default_args_0"
|
| 251 |
},
|
|
|
|
| 254 |
"rank": 2,
|
| 255 |
"shape": [
|
| 256 |
1,
|
| 257 |
+
256
|
| 258 |
],
|
| 259 |
"name": "serving_default_args_1"
|
| 260 |
},
|
|
|
|
| 263 |
"rank": 4,
|
| 264 |
"shape": [
|
| 265 |
1,
|
| 266 |
+
256,
|
| 267 |
4,
|
| 268 |
64
|
| 269 |
],
|
|
|
|
| 274 |
"rank": 2,
|
| 275 |
"shape": [
|
| 276 |
1,
|
| 277 |
+
256
|
| 278 |
],
|
| 279 |
"name": "serving_default_args_3"
|
| 280 |
},
|
|
|
|
| 322 |
28,
|
| 323 |
1,
|
| 324 |
2,
|
| 325 |
+
2304,
|
| 326 |
128
|
| 327 |
],
|
| 328 |
"name": "serving_default_args_3"
|
|
|
|
| 335 |
8,
|
| 336 |
1,
|
| 337 |
2,
|
| 338 |
+
2304,
|
| 339 |
128
|
| 340 |
],
|
| 341 |
"name": "serving_default_args_4"
|
|
|
|
| 419 |
28,
|
| 420 |
1,
|
| 421 |
2,
|
| 422 |
+
256,
|
| 423 |
128
|
| 424 |
],
|
| 425 |
"name": "serving_default_output_3_output"
|
|
|
|
| 432 |
8,
|
| 433 |
1,
|
| 434 |
2,
|
| 435 |
+
256,
|
| 436 |
128
|
| 437 |
],
|
| 438 |
"name": "serving_default_output_4_output"
|
|
|
|
| 484 |
28,
|
| 485 |
1,
|
| 486 |
2,
|
| 487 |
+
2304,
|
| 488 |
128
|
| 489 |
],
|
| 490 |
"name": "serving_default_output_4_output"
|
|
|
|
| 497 |
8,
|
| 498 |
1,
|
| 499 |
2,
|
| 500 |
+
2304,
|
| 501 |
128
|
| 502 |
],
|
| 503 |
"name": "serving_default_output_5_output"
|
voxcpm2-text-prefill.tflite
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da57bdb8aca9bb82bdef0c24dd6b5ecaec567541b132733c6f49a469da1f4b3f
|
| 3 |
+
size 2083074400
|
voxcpm2-text-prefill_recipe.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
[{"regex": ".*", "operation": "*", "algorithm_key": "min_max_uniform_quantize", "op_config": {"weight_tensor_config": {"num_bits": 8, "symmetric": true, "granularity": "CHANNELWISE", "dtype": "INT"}, "compute_precision": "INTEGER", "explicit_dequantize": false, "skip_checks": false, "min_weight_elements": 0}}]
|
voxcpm2-token-step.tflite
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2db76504ac52032616406afe3dd97078c35188e4eeffe00c0eecc788f816559c
|
| 3 |
+
size 2189485584
|
voxcpm2-token-step_recipe.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
[{"regex": ".*", "operation": "*", "algorithm_key": "min_max_uniform_quantize", "op_config": {"weight_tensor_config": {"num_bits": 8, "symmetric": true, "granularity": "CHANNELWISE", "dtype": "INT"}, "compute_precision": "INTEGER", "explicit_dequantize": false, "skip_checks": false, "min_weight_elements": 0}}]
|