Upload folder using huggingface_hub
Browse files- config.json +25 -25
config.json
CHANGED
|
@@ -119,8 +119,8 @@
|
|
| 119 |
},
|
| 120 |
"output_activations": null,
|
| 121 |
"targets": [
|
| 122 |
-
"re:
|
| 123 |
-
"re:
|
| 124 |
],
|
| 125 |
"weights": {
|
| 126 |
"actorder": null,
|
|
@@ -158,7 +158,7 @@
|
|
| 158 |
},
|
| 159 |
"output_activations": null,
|
| 160 |
"targets": [
|
| 161 |
-
"re:
|
| 162 |
],
|
| 163 |
"weights": {
|
| 164 |
"actorder": null,
|
|
@@ -179,27 +179,27 @@
|
|
| 179 |
"format": "mixed-precision",
|
| 180 |
"global_compression_ratio": null,
|
| 181 |
"ignore": [
|
| 182 |
-
"
|
| 183 |
-
"
|
| 184 |
-
"
|
| 185 |
-
"
|
| 186 |
-
"
|
| 187 |
-
"
|
| 188 |
-
"
|
| 189 |
-
"
|
| 190 |
-
"
|
| 191 |
-
"
|
| 192 |
-
"
|
| 193 |
-
"
|
| 194 |
-
"
|
| 195 |
-
"
|
| 196 |
-
"
|
| 197 |
-
"
|
| 198 |
-
"
|
| 199 |
-
"
|
| 200 |
-
"
|
| 201 |
-
"
|
| 202 |
-
"
|
| 203 |
"lm_head"
|
| 204 |
],
|
| 205 |
"kv_cache_scheme": null,
|
|
@@ -235,4 +235,4 @@
|
|
| 235 |
"use_cache": true,
|
| 236 |
"v_head_dim": null,
|
| 237 |
"vocab_size": 129280
|
| 238 |
-
}
|
|
|
|
| 119 |
},
|
| 120 |
"output_activations": null,
|
| 121 |
"targets": [
|
| 122 |
+
"re:.*attn.*(wkv|wo_a|wo_b|wq_a|wq_b)$",
|
| 123 |
+
"re:.*attn\\.compressor.*(wgate|wkv)$"
|
| 124 |
],
|
| 125 |
"weights": {
|
| 126 |
"actorder": null,
|
|
|
|
| 158 |
},
|
| 159 |
"output_activations": null,
|
| 160 |
"targets": [
|
| 161 |
+
"re:.*ffn.*(gate|up|down)_proj$"
|
| 162 |
],
|
| 163 |
"weights": {
|
| 164 |
"actorder": null,
|
|
|
|
| 179 |
"format": "mixed-precision",
|
| 180 |
"global_compression_ratio": null,
|
| 181 |
"ignore": [
|
| 182 |
+
"layers.2.attn.indexer.weights_proj",
|
| 183 |
+
"layers.4.attn.indexer.weights_proj",
|
| 184 |
+
"layers.6.attn.indexer.weights_proj",
|
| 185 |
+
"layers.8.attn.indexer.weights_proj",
|
| 186 |
+
"layers.10.attn.indexer.weights_proj",
|
| 187 |
+
"layers.12.attn.indexer.weights_proj",
|
| 188 |
+
"layers.14.attn.indexer.weights_proj",
|
| 189 |
+
"layers.16.attn.indexer.weights_proj",
|
| 190 |
+
"layers.18.attn.indexer.weights_proj",
|
| 191 |
+
"layers.20.attn.indexer.weights_proj",
|
| 192 |
+
"layers.22.attn.indexer.weights_proj",
|
| 193 |
+
"layers.24.attn.indexer.weights_proj",
|
| 194 |
+
"layers.26.attn.indexer.weights_proj",
|
| 195 |
+
"layers.28.attn.indexer.weights_proj",
|
| 196 |
+
"layers.30.attn.indexer.weights_proj",
|
| 197 |
+
"layers.32.attn.indexer.weights_proj",
|
| 198 |
+
"layers.34.attn.indexer.weights_proj",
|
| 199 |
+
"layers.36.attn.indexer.weights_proj",
|
| 200 |
+
"layers.38.attn.indexer.weights_proj",
|
| 201 |
+
"layers.40.attn.indexer.weights_proj",
|
| 202 |
+
"layers.42.attn.indexer.weights_proj",
|
| 203 |
"lm_head"
|
| 204 |
],
|
| 205 |
"kv_cache_scheme": null,
|
|
|
|
| 235 |
"use_cache": true,
|
| 236 |
"v_head_dim": null,
|
| 237 |
"vocab_size": 129280
|
| 238 |
+
}
|