add quantization_config.ignore=['lm_head', 're:.*mlp\\.gate$', 're:.*linear_attn\\.in_proj_a$', 're:.*linear_attn\\.in_proj_b$', 're:.*shared_expert\\..*']
Browse files- config.json +7 -0
config.json
CHANGED
|
@@ -114,6 +114,13 @@
|
|
| 114 |
"norm",
|
| 115 |
"k_norm",
|
| 116 |
"q_norm"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
]
|
| 118 |
},
|
| 119 |
"rms_norm_eps": 1e-06,
|
|
|
|
| 114 |
"norm",
|
| 115 |
"k_norm",
|
| 116 |
"q_norm"
|
| 117 |
+
],
|
| 118 |
+
"ignore": [
|
| 119 |
+
"lm_head",
|
| 120 |
+
"re:.*mlp\\.gate$",
|
| 121 |
+
"re:.*linear_attn\\.in_proj_a$",
|
| 122 |
+
"re:.*linear_attn\\.in_proj_b$",
|
| 123 |
+
"re:.*shared_expert\\..*"
|
| 124 |
]
|
| 125 |
},
|
| 126 |
"rms_norm_eps": 1e-06,
|