mattbucci's picture
add quantization_config.ignore=['lm_head', 're:.*mlp\\.gate$', 're:.*linear_attn\\.in_proj_a$', 're:.*linear_attn\\.in_proj_b$', 're:.*shared_expert\\..*']
77419df verified