INT8 prefill: 7.7 GB FP32 → 1.9 GB INT8 weight-only; audio_encoder/decoder stay FP32 (ai_edge_quantizer rejects Conv ops). Total bundle 10 GB → 4.3 GB, runtime working set 13 GiB → ~5-6 GiB.

Browse files

Files changed (5) hide show

config.json +12 -12
voxcpm2-text-prefill.tflite +2 -2
voxcpm2-text-prefill_recipe.json +1 -0
voxcpm2-token-step.tflite +2 -2
voxcpm2-token-step_recipe.json +1 -0

config.json CHANGED Viewed

@@ -216,7 +216,7 @@
   "sample_rate": 48000,
   "audio_conditioning_sample_rate": 16000,
   "text_tokenizer": "tokenizer.json",
-  "max_text_tokens": 512,
   "max_generated_tokens": 2048,
   "default_inference_timesteps": 10,
   "default_cfg_value": 2.0,
@@ -230,7 +230,7 @@
     "source_repo": "openbmb/VoxCPM2",
     "voxcpm_src_required": true,
     "graph_variants": {
-      "text_prefill": "fp32",
       "token_step": "int8",
       "audio_encoder": "fp32",
       "audio_decoder": "fp32"
@@ -245,7 +245,7 @@
         "rank": 2,
         "shape": [
           1,
-          512
         ],
         "name": "serving_default_args_0"
       },
@@ -254,7 +254,7 @@
         "rank": 2,
         "shape": [
           1,
-          512
         ],
         "name": "serving_default_args_1"
       },
@@ -263,7 +263,7 @@
         "rank": 4,
         "shape": [
           1,
-          512,
           4,
           64
         ],
@@ -274,7 +274,7 @@
         "rank": 2,
         "shape": [
           1,
-          512
         ],
         "name": "serving_default_args_3"
       },
@@ -322,7 +322,7 @@
           28,
           1,
           2,
-          2560,
           128
         ],
         "name": "serving_default_args_3"
@@ -335,7 +335,7 @@
           8,
           1,
           2,
-          2560,
           128
         ],
         "name": "serving_default_args_4"
@@ -419,7 +419,7 @@
           28,
           1,
           2,
-          512,
           128
         ],
         "name": "serving_default_output_3_output"
@@ -432,7 +432,7 @@
           8,
           1,
           2,
-          512,
           128
         ],
         "name": "serving_default_output_4_output"
@@ -484,7 +484,7 @@
           28,
           1,
           2,
-          2560,
           128
         ],
         "name": "serving_default_output_4_output"
@@ -497,7 +497,7 @@
           8,
           1,
           2,
-          2560,
           128
         ],
         "name": "serving_default_output_5_output"

   "sample_rate": 48000,
   "audio_conditioning_sample_rate": 16000,
   "text_tokenizer": "tokenizer.json",
+  "max_text_tokens": 256,
   "max_generated_tokens": 2048,
   "default_inference_timesteps": 10,
   "default_cfg_value": 2.0,
     "source_repo": "openbmb/VoxCPM2",
     "voxcpm_src_required": true,
     "graph_variants": {
+      "text_prefill": "int8",
       "token_step": "int8",
       "audio_encoder": "fp32",
       "audio_decoder": "fp32"
         "rank": 2,
         "shape": [
           1,
+          256
         ],
         "name": "serving_default_args_0"
       },
         "rank": 2,
         "shape": [
           1,
+          256
         ],
         "name": "serving_default_args_1"
       },
         "rank": 4,
         "shape": [
           1,
+          256,
           4,
           64
         ],
         "rank": 2,
         "shape": [
           1,
+          256
         ],
         "name": "serving_default_args_3"
       },
           28,
           1,
           2,
+          2304,
           128
         ],
         "name": "serving_default_args_3"
           8,
           1,
           2,
+          2304,
           128
         ],
         "name": "serving_default_args_4"
           28,
           1,
           2,
+          256,
           128
         ],
         "name": "serving_default_output_3_output"
           8,
           1,
           2,
+          256,
           128
         ],
         "name": "serving_default_output_4_output"
           28,
           1,
           2,
+          2304,
           128
         ],
         "name": "serving_default_output_4_output"
           8,
           1,
           2,
+          2304,
           128
         ],
         "name": "serving_default_output_5_output"

voxcpm2-text-prefill.tflite CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bb63a91e00816b5ca592d14003febb6715ca7d78d789b9298b0f56d06f692a90
-size 8280692912

 version https://git-lfs.github.com/spec/v1
+oid sha256:da57bdb8aca9bb82bdef0c24dd6b5ecaec567541b132733c6f49a469da1f4b3f
+size 2083074400

voxcpm2-text-prefill_recipe.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ [{"regex": ".", "operation": "", "algorithm_key": "min_max_uniform_quantize", "op_config": {"weight_tensor_config": {"num_bits": 8, "symmetric": true, "granularity": "CHANNELWISE", "dtype": "INT"}, "compute_precision": "INTEGER", "explicit_dequantize": false, "skip_checks": false, "min_weight_elements": 0}}]

voxcpm2-token-step.tflite CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:400937203f24c2f0be073398b9e9790da373ec0de5f36d07d14618bb65087d4f
-size 2189489680

 version https://git-lfs.github.com/spec/v1
+oid sha256:2db76504ac52032616406afe3dd97078c35188e4eeffe00c0eecc788f816559c
+size 2189485584

voxcpm2-token-step_recipe.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ [{"regex": ".", "operation": "", "algorithm_key": "min_max_uniform_quantize", "op_config": {"weight_tensor_config": {"num_bits": 8, "symmetric": true, "granularity": "CHANNELWISE", "dtype": "INT"}, "compute_precision": "INTEGER", "explicit_dequantize": false, "skip_checks": false, "min_weight_elements": 0}}]