| { | |
| "quantization": "4bit_nf4", | |
| "base_model": "Content/MLModels/quant_fp16_depthformer_base_decoder_step.onnx", | |
| "block_size": 64, | |
| "double_quant": true, | |
| "note": "4-bit quantization requires runtime dequantization" | |
| } |
| { | |
| "quantization": "4bit_nf4", | |
| "base_model": "Content/MLModels/quant_fp16_depthformer_base_decoder_step.onnx", | |
| "block_size": 64, | |
| "double_quant": true, | |
| "note": "4-bit quantization requires runtime dequantization" | |
| } |