default_stage: default_modifiers: QuantizationModifier: config_groups: attention: targets: ['re:model.*attn.*(wkv|wo_a|wo_b|wq_a|wq_b)$', 're:model.*attn\.compressor.*(wgate|wkv)$'] weights: num_bits: 8 type: float symmetric: true group_size: null strategy: block block_structure: [128, 128] dynamic: false actorder: null scale_dtype: null zp_dtype: null observer: memoryless_minmax observer_kwargs: {} input_activations: num_bits: 8 type: float symmetric: true group_size: 128 strategy: group block_structure: null dynamic: true actorder: null scale_dtype: null zp_dtype: null observer: null observer_kwargs: {} output_activations: null format: null experts: targets: ['re:model.*mlp.*(gate|up|down)_proj$'] weights: num_bits: 4 type: float symmetric: true group_size: 16 strategy: tensor_group block_structure: null dynamic: false actorder: null scale_dtype: torch.float8_e4m3fn zp_dtype: null observer: memoryless_minmax observer_kwargs: {} input_activations: num_bits: 4 type: float symmetric: true group_size: 16 strategy: tensor_group block_structure: null dynamic: local actorder: null scale_dtype: torch.float8_e4m3fn zp_dtype: null observer: static_minmax observer_kwargs: {} output_activations: null format: null targets: [Linear] ignore: [lm_head] bypass_divisibility_checks: false