add quantization_config.ignore=['lm_head', 're:.*mlp\\.gate$', 're:.*linear_attn\\.in_proj_a$', 're:.*linear_attn\\.in_proj_b$', 're:.*shared_expert\\..*'] 77419df verified mattbucci commited on 9 days ago