aufklarer commited on
Commit
9ec65a0
·
verified ·
1 Parent(s): 2536d07

INT8 prefill: 7.7 GB FP32 → 1.9 GB INT8 weight-only; audio_encoder/decoder stay FP32 (ai_edge_quantizer rejects Conv ops). Total bundle 10 GB → 4.3 GB, runtime working set 13 GiB → ~5-6 GiB.

Browse files
config.json CHANGED
@@ -216,7 +216,7 @@
216
  "sample_rate": 48000,
217
  "audio_conditioning_sample_rate": 16000,
218
  "text_tokenizer": "tokenizer.json",
219
- "max_text_tokens": 512,
220
  "max_generated_tokens": 2048,
221
  "default_inference_timesteps": 10,
222
  "default_cfg_value": 2.0,
@@ -230,7 +230,7 @@
230
  "source_repo": "openbmb/VoxCPM2",
231
  "voxcpm_src_required": true,
232
  "graph_variants": {
233
- "text_prefill": "fp32",
234
  "token_step": "int8",
235
  "audio_encoder": "fp32",
236
  "audio_decoder": "fp32"
@@ -245,7 +245,7 @@
245
  "rank": 2,
246
  "shape": [
247
  1,
248
- 512
249
  ],
250
  "name": "serving_default_args_0"
251
  },
@@ -254,7 +254,7 @@
254
  "rank": 2,
255
  "shape": [
256
  1,
257
- 512
258
  ],
259
  "name": "serving_default_args_1"
260
  },
@@ -263,7 +263,7 @@
263
  "rank": 4,
264
  "shape": [
265
  1,
266
- 512,
267
  4,
268
  64
269
  ],
@@ -274,7 +274,7 @@
274
  "rank": 2,
275
  "shape": [
276
  1,
277
- 512
278
  ],
279
  "name": "serving_default_args_3"
280
  },
@@ -322,7 +322,7 @@
322
  28,
323
  1,
324
  2,
325
- 2560,
326
  128
327
  ],
328
  "name": "serving_default_args_3"
@@ -335,7 +335,7 @@
335
  8,
336
  1,
337
  2,
338
- 2560,
339
  128
340
  ],
341
  "name": "serving_default_args_4"
@@ -419,7 +419,7 @@
419
  28,
420
  1,
421
  2,
422
- 512,
423
  128
424
  ],
425
  "name": "serving_default_output_3_output"
@@ -432,7 +432,7 @@
432
  8,
433
  1,
434
  2,
435
- 512,
436
  128
437
  ],
438
  "name": "serving_default_output_4_output"
@@ -484,7 +484,7 @@
484
  28,
485
  1,
486
  2,
487
- 2560,
488
  128
489
  ],
490
  "name": "serving_default_output_4_output"
@@ -497,7 +497,7 @@
497
  8,
498
  1,
499
  2,
500
- 2560,
501
  128
502
  ],
503
  "name": "serving_default_output_5_output"
 
216
  "sample_rate": 48000,
217
  "audio_conditioning_sample_rate": 16000,
218
  "text_tokenizer": "tokenizer.json",
219
+ "max_text_tokens": 256,
220
  "max_generated_tokens": 2048,
221
  "default_inference_timesteps": 10,
222
  "default_cfg_value": 2.0,
 
230
  "source_repo": "openbmb/VoxCPM2",
231
  "voxcpm_src_required": true,
232
  "graph_variants": {
233
+ "text_prefill": "int8",
234
  "token_step": "int8",
235
  "audio_encoder": "fp32",
236
  "audio_decoder": "fp32"
 
245
  "rank": 2,
246
  "shape": [
247
  1,
248
+ 256
249
  ],
250
  "name": "serving_default_args_0"
251
  },
 
254
  "rank": 2,
255
  "shape": [
256
  1,
257
+ 256
258
  ],
259
  "name": "serving_default_args_1"
260
  },
 
263
  "rank": 4,
264
  "shape": [
265
  1,
266
+ 256,
267
  4,
268
  64
269
  ],
 
274
  "rank": 2,
275
  "shape": [
276
  1,
277
+ 256
278
  ],
279
  "name": "serving_default_args_3"
280
  },
 
322
  28,
323
  1,
324
  2,
325
+ 2304,
326
  128
327
  ],
328
  "name": "serving_default_args_3"
 
335
  8,
336
  1,
337
  2,
338
+ 2304,
339
  128
340
  ],
341
  "name": "serving_default_args_4"
 
419
  28,
420
  1,
421
  2,
422
+ 256,
423
  128
424
  ],
425
  "name": "serving_default_output_3_output"
 
432
  8,
433
  1,
434
  2,
435
+ 256,
436
  128
437
  ],
438
  "name": "serving_default_output_4_output"
 
484
  28,
485
  1,
486
  2,
487
+ 2304,
488
  128
489
  ],
490
  "name": "serving_default_output_4_output"
 
497
  8,
498
  1,
499
  2,
500
+ 2304,
501
  128
502
  ],
503
  "name": "serving_default_output_5_output"
voxcpm2-text-prefill.tflite CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb63a91e00816b5ca592d14003febb6715ca7d78d789b9298b0f56d06f692a90
3
- size 8280692912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da57bdb8aca9bb82bdef0c24dd6b5ecaec567541b132733c6f49a469da1f4b3f
3
+ size 2083074400
voxcpm2-text-prefill_recipe.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"regex": ".*", "operation": "*", "algorithm_key": "min_max_uniform_quantize", "op_config": {"weight_tensor_config": {"num_bits": 8, "symmetric": true, "granularity": "CHANNELWISE", "dtype": "INT"}, "compute_precision": "INTEGER", "explicit_dequantize": false, "skip_checks": false, "min_weight_elements": 0}}]
voxcpm2-token-step.tflite CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:400937203f24c2f0be073398b9e9790da373ec0de5f36d07d14618bb65087d4f
3
- size 2189489680
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2db76504ac52032616406afe3dd97078c35188e4eeffe00c0eecc788f816559c
3
+ size 2189485584
voxcpm2-token-step_recipe.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"regex": ".*", "operation": "*", "algorithm_key": "min_max_uniform_quantize", "op_config": {"weight_tensor_config": {"num_bits": 8, "symmetric": true, "granularity": "CHANNELWISE", "dtype": "INT"}, "compute_precision": "INTEGER", "explicit_dequantize": false, "skip_checks": false, "min_weight_elements": 0}}]