{ "quantization": "4bit_nf4", "base_model": "Content/MLModels/quant_fp16_depthformer_base_decoder_step.onnx", "block_size": 64, "double_quant": true, "note": "4-bit quantization requires runtime dequantization" }