kylesayrs commited on
Commit
e378b44
·
verified ·
1 Parent(s): 83be997

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DeepseekV4ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "compress_ratios": [
9
+ 0,
10
+ 0,
11
+ 4,
12
+ 128,
13
+ 4
14
+ ],
15
+ "compress_rope_parameters": {
16
+ "beta_fast": 32,
17
+ "beta_slow": 1,
18
+ "factor": 16,
19
+ "original_max_position_embeddings": 65536,
20
+ "partial_rotary_factor": 0.125,
21
+ "rope_theta": 160000.0,
22
+ "rope_type": "yarn",
23
+ "type": "yarn"
24
+ },
25
+ "compress_rope_theta": 160000.0,
26
+ "dtype": "bfloat16",
27
+ "eos_token_id": 1,
28
+ "first_k_dense_replace": null,
29
+ "hc_eps": 1e-06,
30
+ "hc_mult": 4,
31
+ "hc_sinkhorn_iters": 20,
32
+ "head_dim": 512,
33
+ "hidden_act": "silu",
34
+ "hidden_size": 4096,
35
+ "index_head_dim": 128,
36
+ "index_n_heads": 64,
37
+ "index_topk": 512,
38
+ "initializer_range": 0.02,
39
+ "intermediate_size": 18432,
40
+ "kv_lora_rank": null,
41
+ "max_position_embeddings": 1048576,
42
+ "model_type": "deepseek_v4",
43
+ "moe_intermediate_size": 2048,
44
+ "n_group": null,
45
+ "n_routed_experts": 256,
46
+ "n_shared_experts": 1,
47
+ "norm_topk_prob": true,
48
+ "num_attention_heads": 64,
49
+ "num_experts_per_tok": 6,
50
+ "num_hash_layers": 3,
51
+ "num_hidden_layers": 5,
52
+ "num_key_value_heads": 1,
53
+ "num_nextn_predict_layers": 0,
54
+ "o_groups": 8,
55
+ "o_lora_rank": 1024,
56
+ "output_router_logits": false,
57
+ "pad_token_id": null,
58
+ "partial_rotary_factor": 0.125,
59
+ "pretraining_tp": 1,
60
+ "q_lora_rank": 1024,
61
+ "qk_nope_head_dim": 448,
62
+ "qk_rope_head_dim": 64,
63
+ "quantization": {
64
+ "bits": 4,
65
+ "group_size": 32,
66
+ "mode": "mxfp4",
67
+ "model.layers.0.ffn.switch_mlp.down_proj": {
68
+ "bits": 4,
69
+ "group_size": 32,
70
+ "mode": "mxfp4"
71
+ },
72
+ "model.layers.0.ffn.switch_mlp.gate_proj": {
73
+ "bits": 4,
74
+ "group_size": 32,
75
+ "mode": "mxfp4"
76
+ },
77
+ "model.layers.0.ffn.switch_mlp.up_proj": {
78
+ "bits": 4,
79
+ "group_size": 32,
80
+ "mode": "mxfp4"
81
+ },
82
+ "model.layers.1.ffn.switch_mlp.down_proj": {
83
+ "bits": 4,
84
+ "group_size": 32,
85
+ "mode": "mxfp4"
86
+ },
87
+ "model.layers.1.ffn.switch_mlp.gate_proj": {
88
+ "bits": 4,
89
+ "group_size": 32,
90
+ "mode": "mxfp4"
91
+ },
92
+ "model.layers.1.ffn.switch_mlp.up_proj": {
93
+ "bits": 4,
94
+ "group_size": 32,
95
+ "mode": "mxfp4"
96
+ },
97
+ "model.layers.2.ffn.switch_mlp.down_proj": {
98
+ "bits": 4,
99
+ "group_size": 32,
100
+ "mode": "mxfp4"
101
+ },
102
+ "model.layers.2.ffn.switch_mlp.gate_proj": {
103
+ "bits": 4,
104
+ "group_size": 32,
105
+ "mode": "mxfp4"
106
+ },
107
+ "model.layers.2.ffn.switch_mlp.up_proj": {
108
+ "bits": 4,
109
+ "group_size": 32,
110
+ "mode": "mxfp4"
111
+ },
112
+ "model.layers.3.ffn.switch_mlp.down_proj": {
113
+ "bits": 4,
114
+ "group_size": 32,
115
+ "mode": "mxfp4"
116
+ },
117
+ "model.layers.3.ffn.switch_mlp.gate_proj": {
118
+ "bits": 4,
119
+ "group_size": 32,
120
+ "mode": "mxfp4"
121
+ },
122
+ "model.layers.3.ffn.switch_mlp.up_proj": {
123
+ "bits": 4,
124
+ "group_size": 32,
125
+ "mode": "mxfp4"
126
+ },
127
+ "model.layers.4.ffn.switch_mlp.down_proj": {
128
+ "bits": 4,
129
+ "group_size": 32,
130
+ "mode": "mxfp4"
131
+ },
132
+ "model.layers.4.ffn.switch_mlp.gate_proj": {
133
+ "bits": 4,
134
+ "group_size": 32,
135
+ "mode": "mxfp4"
136
+ },
137
+ "model.layers.4.ffn.switch_mlp.up_proj": {
138
+ "bits": 4,
139
+ "group_size": 32,
140
+ "mode": "mxfp4"
141
+ }
142
+ },
143
+ "quantization_config": {
144
+ "config_groups": {
145
+ "group_0": {
146
+ "format": "nvfp4-pack-quantized",
147
+ "input_activations": {
148
+ "actorder": null,
149
+ "block_structure": null,
150
+ "dynamic": "local",
151
+ "group_size": 16,
152
+ "num_bits": 4,
153
+ "observer": "static_minmax",
154
+ "observer_kwargs": {},
155
+ "scale_dtype": "torch.float8_e4m3fn",
156
+ "strategy": "tensor_group",
157
+ "symmetric": true,
158
+ "type": "float",
159
+ "zp_dtype": null
160
+ },
161
+ "output_activations": null,
162
+ "targets": [
163
+ "re:model.*mlp.*(gate|up|down)_proj$"
164
+ ],
165
+ "weights": {
166
+ "actorder": null,
167
+ "block_structure": null,
168
+ "dynamic": false,
169
+ "group_size": 16,
170
+ "num_bits": 4,
171
+ "observer": "memoryless_minmax",
172
+ "observer_kwargs": {},
173
+ "scale_dtype": "torch.float8_e4m3fn",
174
+ "strategy": "tensor_group",
175
+ "symmetric": true,
176
+ "type": "float",
177
+ "zp_dtype": null
178
+ }
179
+ }
180
+ },
181
+ "format": "nvfp4-pack-quantized",
182
+ "global_compression_ratio": null,
183
+ "ignore": [
184
+ "model.layers.0.self_attn.wq_a",
185
+ "model.layers.0.self_attn.wq_b",
186
+ "model.layers.0.self_attn.wkv",
187
+ "model.layers.0.self_attn.wo_b",
188
+ "model.layers.1.self_attn.wq_a",
189
+ "model.layers.1.self_attn.wq_b",
190
+ "model.layers.1.self_attn.wkv",
191
+ "model.layers.1.self_attn.wo_b",
192
+ "model.layers.2.self_attn.wq_a",
193
+ "model.layers.2.self_attn.wq_b",
194
+ "model.layers.2.self_attn.wkv",
195
+ "model.layers.2.self_attn.wo_b",
196
+ "model.layers.2.self_attn.compressor.wkv",
197
+ "model.layers.2.self_attn.compressor.wgate",
198
+ "model.layers.2.self_attn.compressor.indexer.wkv",
199
+ "model.layers.2.self_attn.compressor.indexer.wgate",
200
+ "model.layers.2.self_attn.compressor.indexer.wq_b",
201
+ "model.layers.2.self_attn.compressor.indexer.weights_proj",
202
+ "model.layers.3.self_attn.wq_a",
203
+ "model.layers.3.self_attn.wq_b",
204
+ "model.layers.3.self_attn.wkv",
205
+ "model.layers.3.self_attn.wo_b",
206
+ "model.layers.3.self_attn.compressor.wkv",
207
+ "model.layers.3.self_attn.compressor.wgate",
208
+ "model.layers.4.self_attn.wq_a",
209
+ "model.layers.4.self_attn.wq_b",
210
+ "model.layers.4.self_attn.wkv",
211
+ "model.layers.4.self_attn.wo_b",
212
+ "model.layers.4.self_attn.compressor.wkv",
213
+ "model.layers.4.self_attn.compressor.wgate",
214
+ "model.layers.4.self_attn.compressor.indexer.wkv",
215
+ "model.layers.4.self_attn.compressor.indexer.wgate",
216
+ "model.layers.4.self_attn.compressor.indexer.wq_b",
217
+ "model.layers.4.self_attn.compressor.indexer.weights_proj",
218
+ "lm_head"
219
+ ],
220
+ "kv_cache_scheme": null,
221
+ "quant_method": "compressed-tensors",
222
+ "quantization_status": "compressed",
223
+ "sparsity_config": {},
224
+ "transform_config": {},
225
+ "version": "0.15.1.dev15+g11daf97.d20260427"
226
+ },
227
+ "rms_norm_eps": 1e-06,
228
+ "rope_interleave": true,
229
+ "rope_parameters": {
230
+ "beta_fast": 32,
231
+ "beta_slow": 1,
232
+ "factor": 16,
233
+ "original_max_position_embeddings": 65536,
234
+ "partial_rotary_factor": 0.125,
235
+ "rope_theta": 10000.0,
236
+ "rope_type": "yarn",
237
+ "type": "yarn"
238
+ },
239
+ "rope_theta": 10000.0,
240
+ "routed_scaling_factor": 1.5,
241
+ "router_aux_loss_coef": 0.001,
242
+ "router_jitter_noise": 0.0,
243
+ "scoring_func": "sqrtsoftplus",
244
+ "sliding_window": 128,
245
+ "swiglu_limit": 10.0,
246
+ "tie_word_embeddings": false,
247
+ "topk_group": null,
248
+ "topk_method": "noaux_tc",
249
+ "transformers_version": "5.7.0.dev0",
250
+ "use_cache": true,
251
+ "v_head_dim": null,
252
+ "vocab_size": 129280
253
+ }
generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "do_sample": true,
5
+ "eos_token_id": 1,
6
+ "temperature": 1.0,
7
+ "top_p": 1.0,
8
+ "transformers_version": "5.7.0.dev0"
9
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f0959912b15546c6ebf0b48dbe64c040685e160b0c5fd9b3bf8beaa11401fec
3
+ size 21510156084
recipe.yaml ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ default_stage:
2
+ default_modifiers:
3
+ GPTQModifier:
4
+ config_groups:
5
+ experts:
6
+ targets: ['re:model.*mlp.*(gate|up|down)_proj$']
7
+ weights:
8
+ num_bits: 4
9
+ type: float
10
+ symmetric: true
11
+ group_size: 16
12
+ strategy: tensor_group
13
+ block_structure: null
14
+ dynamic: false
15
+ actorder: null
16
+ scale_dtype: torch.float8_e4m3fn
17
+ zp_dtype: null
18
+ observer: memoryless_minmax
19
+ observer_kwargs: {}
20
+ input_activations:
21
+ num_bits: 4
22
+ type: float
23
+ symmetric: true
24
+ group_size: 16
25
+ strategy: tensor_group
26
+ block_structure: null
27
+ dynamic: local
28
+ actorder: null
29
+ scale_dtype: torch.float8_e4m3fn
30
+ zp_dtype: null
31
+ observer: static_minmax
32
+ observer_kwargs: {}
33
+ output_activations: null
34
+ format: null
35
+ targets: [Linear]
36
+ ignore: [lm_head, 're:model.*self_attn.*re:model.*ffn_hc$']
37
+ bypass_divisibility_checks: false
38
+ block_size: 128
39
+ dampening_frac: 0.01
40
+ actorder: static
41
+ offload_hessians: false
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<|begin▁of▁sentence|>",
4
+ "clean_up_tokenization_spaces": false,
5
+ "eos_token": "<|end▁of▁sentence|>",
6
+ "is_local": true,
7
+ "legacy": true,
8
+ "local_files_only": false,
9
+ "model_max_length": 1048576,
10
+ "pad_token": "<|end▁of▁sentence|>",
11
+ "sp_model_kwargs": {},
12
+ "tokenizer_class": "TokenizersBackend",
13
+ "unk_token": null
14
+ }