vpyn commited on
Commit
cf6dc13
·
verified ·
1 Parent(s): acac654

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +2 -0
  2. amax_checkpoint.safetensors +3 -0
  3. config.json +395 -0
  4. generation_config.json +71 -0
  5. hf_quant_config.json +105 -0
  6. merges.txt +0 -0
  7. model-00001-of-00112.safetensors +3 -0
  8. model-00002-of-00112.safetensors +3 -0
  9. model-00003-of-00112.safetensors +3 -0
  10. model-00004-of-00112.safetensors +3 -0
  11. model-00005-of-00112.safetensors +3 -0
  12. model-00006-of-00112.safetensors +3 -0
  13. model-00007-of-00112.safetensors +3 -0
  14. model-00008-of-00112.safetensors +3 -0
  15. model-00009-of-00112.safetensors +3 -0
  16. model-00010-of-00112.safetensors +3 -0
  17. model-00011-of-00112.safetensors +3 -0
  18. model-00012-of-00112.safetensors +3 -0
  19. model-00013-of-00112.safetensors +3 -0
  20. model-00014-of-00112.safetensors +3 -0
  21. model-00015-of-00112.safetensors +3 -0
  22. model-00016-of-00112.safetensors +3 -0
  23. model-00017-of-00112.safetensors +3 -0
  24. model-00018-of-00112.safetensors +3 -0
  25. model-00019-of-00112.safetensors +3 -0
  26. model-00020-of-00112.safetensors +3 -0
  27. model-00021-of-00112.safetensors +3 -0
  28. model-00022-of-00112.safetensors +3 -0
  29. model-00023-of-00112.safetensors +3 -0
  30. model-00024-of-00112.safetensors +3 -0
  31. model-00025-of-00112.safetensors +3 -0
  32. model-00026-of-00112.safetensors +3 -0
  33. model-00027-of-00112.safetensors +3 -0
  34. model-00028-of-00112.safetensors +3 -0
  35. model-00029-of-00112.safetensors +3 -0
  36. model-00030-of-00112.safetensors +3 -0
  37. model-00031-of-00112.safetensors +3 -0
  38. model-00032-of-00112.safetensors +3 -0
  39. model-00033-of-00112.safetensors +3 -0
  40. model-00034-of-00112.safetensors +3 -0
  41. model-00035-of-00112.safetensors +3 -0
  42. model-00036-of-00112.safetensors +3 -0
  43. model-00037-of-00112.safetensors +3 -0
  44. model-00038-of-00112.safetensors +3 -0
  45. model-00039-of-00112.safetensors +3 -0
  46. model-00040-of-00112.safetensors +3 -0
  47. model-00041-of-00112.safetensors +3 -0
  48. model-00042-of-00112.safetensors +3 -0
  49. model-00043-of-00112.safetensors +3 -0
  50. model-00044-of-00112.safetensors +3 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model.safetensors.index.json filter=lfs diff=lfs merge=lfs -text
37
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
amax_checkpoint.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:665d4558913d18f4a43348d3c2c8c815a0fd91eb1b49cf870c45cf2b907ff8bc
3
+ size 1587908
config.json ADDED
@@ -0,0 +1,395 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_projection_layout": "fused_qkv",
3
+ "vocab_size": 152576,
4
+ "max_position_embeddings": 1048576,
5
+ "hidden_size": 6144,
6
+ "intermediate_size": 16384,
7
+ "num_hidden_layers": 70,
8
+ "num_attention_heads": 128,
9
+ "num_key_value_heads": 8,
10
+ "hidden_act": "silu",
11
+ "initializer_range": 0.02,
12
+ "layernorm_epsilon": 1e-05,
13
+ "use_cache": true,
14
+ "rope_theta": 10000000,
15
+ "rope_parameters": {
16
+ "rope_type": "default",
17
+ "rope_theta": 10000000,
18
+ "partial_rotary_factor": 0.334
19
+ },
20
+ "attention_dropout": 0.0,
21
+ "attention_bias": false,
22
+ "attention_value_scale": 0.612,
23
+ "head_dim": 192,
24
+ "v_head_dim": 128,
25
+ "swa_num_attention_heads": 128,
26
+ "swa_num_key_value_heads": 8,
27
+ "swa_head_dim": 192,
28
+ "swa_v_head_dim": 128,
29
+ "swa_rope_theta": 10000,
30
+ "sliding_window": 128,
31
+ "sliding_window_size": 128,
32
+ "add_full_attention_sink_bias": false,
33
+ "add_swa_attention_sink_bias": true,
34
+ "hybrid_block_size": null,
35
+ "hybrid_layer_pattern": [
36
+ 0,
37
+ 1,
38
+ 1,
39
+ 1,
40
+ 1,
41
+ 1,
42
+ 1,
43
+ 0,
44
+ 1,
45
+ 1,
46
+ 1,
47
+ 1,
48
+ 1,
49
+ 1,
50
+ 1,
51
+ 0,
52
+ 1,
53
+ 1,
54
+ 1,
55
+ 1,
56
+ 1,
57
+ 1,
58
+ 1,
59
+ 0,
60
+ 1,
61
+ 1,
62
+ 1,
63
+ 1,
64
+ 1,
65
+ 1,
66
+ 1,
67
+ 0,
68
+ 1,
69
+ 1,
70
+ 1,
71
+ 1,
72
+ 1,
73
+ 1,
74
+ 1,
75
+ 0,
76
+ 1,
77
+ 1,
78
+ 1,
79
+ 1,
80
+ 1,
81
+ 1,
82
+ 1,
83
+ 0,
84
+ 1,
85
+ 1,
86
+ 1,
87
+ 1,
88
+ 1,
89
+ 1,
90
+ 1,
91
+ 0,
92
+ 1,
93
+ 1,
94
+ 1,
95
+ 1,
96
+ 1,
97
+ 1,
98
+ 0,
99
+ 1,
100
+ 1,
101
+ 1,
102
+ 1,
103
+ 1,
104
+ 1,
105
+ 0
106
+ ],
107
+ "partial_rotary_factor": 0.334,
108
+ "n_routed_experts": 384,
109
+ "moe_intermediate_size": 2048,
110
+ "num_experts_per_tok": 8,
111
+ "routed_scaling_factor": null,
112
+ "scoring_func": "sigmoid",
113
+ "topk_method": "noaux_tc",
114
+ "n_group": 1,
115
+ "topk_group": 1,
116
+ "norm_topk_prob": true,
117
+ "moe_layer_freq": [
118
+ 0,
119
+ 1,
120
+ 1,
121
+ 1,
122
+ 1,
123
+ 1,
124
+ 1,
125
+ 1,
126
+ 1,
127
+ 1,
128
+ 1,
129
+ 1,
130
+ 1,
131
+ 1,
132
+ 1,
133
+ 1,
134
+ 1,
135
+ 1,
136
+ 1,
137
+ 1,
138
+ 1,
139
+ 1,
140
+ 1,
141
+ 1,
142
+ 1,
143
+ 1,
144
+ 1,
145
+ 1,
146
+ 1,
147
+ 1,
148
+ 1,
149
+ 1,
150
+ 1,
151
+ 1,
152
+ 1,
153
+ 1,
154
+ 1,
155
+ 1,
156
+ 1,
157
+ 1,
158
+ 1,
159
+ 1,
160
+ 1,
161
+ 1,
162
+ 1,
163
+ 1,
164
+ 1,
165
+ 1,
166
+ 1,
167
+ 1,
168
+ 1,
169
+ 1,
170
+ 1,
171
+ 1,
172
+ 1,
173
+ 1,
174
+ 1,
175
+ 1,
176
+ 1,
177
+ 1,
178
+ 1,
179
+ 1,
180
+ 1,
181
+ 1,
182
+ 1,
183
+ 1,
184
+ 1,
185
+ 1,
186
+ 1,
187
+ 1
188
+ ],
189
+ "transformers_version": "5.5.3",
190
+ "architectures": [
191
+ "MiMoV2FlashForCausalLM"
192
+ ],
193
+ "output_hidden_states": false,
194
+ "return_dict": true,
195
+ "dtype": "bfloat16",
196
+ "chunk_size_feed_forward": 0,
197
+ "is_encoder_decoder": false,
198
+ "id2label": {
199
+ "0": "LABEL_0",
200
+ "1": "LABEL_1"
201
+ },
202
+ "label2id": {
203
+ "LABEL_0": 0,
204
+ "LABEL_1": 1
205
+ },
206
+ "problem_type": null,
207
+ "_name_or_path": "/models/XiaomiMiMo/MiMo-V2.5-Pro",
208
+ "tie_word_embeddings": false,
209
+ "attention_chunk_size": 128,
210
+ "model_type": "mimo_v2",
211
+ "n_shared_experts": null,
212
+ "layer_types": [
213
+ "full_attention",
214
+ "sliding_attention",
215
+ "sliding_attention",
216
+ "sliding_attention",
217
+ "sliding_attention",
218
+ "sliding_attention",
219
+ "sliding_attention",
220
+ "full_attention",
221
+ "sliding_attention",
222
+ "sliding_attention",
223
+ "sliding_attention",
224
+ "sliding_attention",
225
+ "sliding_attention",
226
+ "sliding_attention",
227
+ "sliding_attention",
228
+ "full_attention",
229
+ "sliding_attention",
230
+ "sliding_attention",
231
+ "sliding_attention",
232
+ "sliding_attention",
233
+ "sliding_attention",
234
+ "sliding_attention",
235
+ "sliding_attention",
236
+ "full_attention",
237
+ "sliding_attention",
238
+ "sliding_attention",
239
+ "sliding_attention",
240
+ "sliding_attention",
241
+ "sliding_attention",
242
+ "sliding_attention",
243
+ "sliding_attention",
244
+ "full_attention",
245
+ "sliding_attention",
246
+ "sliding_attention",
247
+ "sliding_attention",
248
+ "sliding_attention",
249
+ "sliding_attention",
250
+ "sliding_attention",
251
+ "sliding_attention",
252
+ "full_attention",
253
+ "sliding_attention",
254
+ "sliding_attention",
255
+ "sliding_attention",
256
+ "sliding_attention",
257
+ "sliding_attention",
258
+ "sliding_attention",
259
+ "sliding_attention",
260
+ "full_attention",
261
+ "sliding_attention",
262
+ "sliding_attention",
263
+ "sliding_attention",
264
+ "sliding_attention",
265
+ "sliding_attention",
266
+ "sliding_attention",
267
+ "sliding_attention",
268
+ "full_attention",
269
+ "sliding_attention",
270
+ "sliding_attention",
271
+ "sliding_attention",
272
+ "sliding_attention",
273
+ "sliding_attention",
274
+ "sliding_attention",
275
+ "full_attention",
276
+ "sliding_attention",
277
+ "sliding_attention",
278
+ "sliding_attention",
279
+ "sliding_attention",
280
+ "sliding_attention",
281
+ "sliding_attention",
282
+ "full_attention"
283
+ ],
284
+ "output_attentions": false,
285
+ "quantization_config": {
286
+ "config_groups": {
287
+ "group_0": {
288
+ "input_activations": {
289
+ "dynamic": false,
290
+ "num_bits": 4,
291
+ "type": "float",
292
+ "group_size": 16
293
+ },
294
+ "weights": {
295
+ "dynamic": false,
296
+ "num_bits": 4,
297
+ "type": "float",
298
+ "group_size": 16
299
+ },
300
+ "targets": [
301
+ "Linear"
302
+ ]
303
+ }
304
+ },
305
+ "ignore": [
306
+ "lm_head",
307
+ "model.layers.0.self_attn*",
308
+ "model.layers.1.self_attn*",
309
+ "model.layers.10.self_attn*",
310
+ "model.layers.11.self_attn*",
311
+ "model.layers.12.self_attn*",
312
+ "model.layers.13.self_attn*",
313
+ "model.layers.14.self_attn*",
314
+ "model.layers.15.self_attn*",
315
+ "model.layers.16.self_attn*",
316
+ "model.layers.17.self_attn*",
317
+ "model.layers.18.self_attn*",
318
+ "model.layers.19.self_attn*",
319
+ "model.layers.2.self_attn*",
320
+ "model.layers.20.self_attn*",
321
+ "model.layers.21.self_attn*",
322
+ "model.layers.22.self_attn*",
323
+ "model.layers.23.self_attn*",
324
+ "model.layers.24.self_attn*",
325
+ "model.layers.25.self_attn*",
326
+ "model.layers.26.self_attn*",
327
+ "model.layers.27.self_attn*",
328
+ "model.layers.28.self_attn*",
329
+ "model.layers.29.self_attn*",
330
+ "model.layers.3.self_attn*",
331
+ "model.layers.30.self_attn*",
332
+ "model.layers.31.self_attn*",
333
+ "model.layers.32.self_attn*",
334
+ "model.layers.33.self_attn*",
335
+ "model.layers.34.self_attn*",
336
+ "model.layers.35.self_attn*",
337
+ "model.layers.36.self_attn*",
338
+ "model.layers.37.self_attn*",
339
+ "model.layers.38.self_attn*",
340
+ "model.layers.39.self_attn*",
341
+ "model.layers.4.self_attn*",
342
+ "model.layers.40.self_attn*",
343
+ "model.layers.41.self_attn*",
344
+ "model.layers.42.self_attn*",
345
+ "model.layers.43.self_attn*",
346
+ "model.layers.44.self_attn*",
347
+ "model.layers.45.self_attn*",
348
+ "model.layers.46.self_attn*",
349
+ "model.layers.47.self_attn*",
350
+ "model.layers.48.self_attn*",
351
+ "model.layers.49.self_attn*",
352
+ "model.layers.5.self_attn*",
353
+ "model.layers.50.self_attn*",
354
+ "model.layers.51.self_attn*",
355
+ "model.layers.52.self_attn*",
356
+ "model.layers.53.self_attn*",
357
+ "model.layers.54.self_attn*",
358
+ "model.layers.55.self_attn*",
359
+ "model.layers.56.self_attn*",
360
+ "model.layers.57.self_attn*",
361
+ "model.layers.58.self_attn*",
362
+ "model.layers.59.self_attn*",
363
+ "model.layers.6.self_attn*",
364
+ "model.layers.60.self_attn*",
365
+ "model.layers.61.self_attn*",
366
+ "model.layers.62.self_attn*",
367
+ "model.layers.63.self_attn*",
368
+ "model.layers.64.self_attn*",
369
+ "model.layers.65.self_attn*",
370
+ "model.layers.66.self_attn*",
371
+ "model.layers.67.self_attn*",
372
+ "model.layers.68.self_attn*",
373
+ "model.layers.69.self_attn*",
374
+ "model.layers.7.self_attn*",
375
+ "model.layers.8.self_attn*",
376
+ "model.layers.9.self_attn*"
377
+ ],
378
+ "quant_algo": "NVFP4",
379
+ "kv_cache_scheme": {
380
+ "dynamic": false,
381
+ "num_bits": 8,
382
+ "type": "float"
383
+ },
384
+ "producer": {
385
+ "name": "modelopt",
386
+ "version": "0.43.0"
387
+ },
388
+ "quant_method": "modelopt"
389
+ },
390
+ "auto_map": {
391
+ "AutoConfig": "configuration_mimo_v2.MiMoV2Config",
392
+ "AutoModel": "modeling_mimo_v2.MiMoV2Model",
393
+ "AutoModelForCausalLM": "modeling_mimo_v2.MiMoV2ForCausalLM"
394
+ }
395
+ }
generation_config.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "max_length": null,
3
+ "max_new_tokens": null,
4
+ "min_length": null,
5
+ "min_new_tokens": null,
6
+ "early_stopping": null,
7
+ "max_time": null,
8
+ "stop_strings": null,
9
+ "do_sample": null,
10
+ "num_beams": null,
11
+ "use_cache": true,
12
+ "cache_implementation": null,
13
+ "cache_config": null,
14
+ "temperature": null,
15
+ "top_k": null,
16
+ "top_p": null,
17
+ "min_p": null,
18
+ "top_h": null,
19
+ "typical_p": null,
20
+ "epsilon_cutoff": null,
21
+ "eta_cutoff": null,
22
+ "repetition_penalty": null,
23
+ "encoder_repetition_penalty": null,
24
+ "length_penalty": null,
25
+ "no_repeat_ngram_size": null,
26
+ "bad_words_ids": null,
27
+ "renormalize_logits": null,
28
+ "forced_bos_token_id": null,
29
+ "forced_eos_token_id": null,
30
+ "remove_invalid_values": null,
31
+ "exponential_decay_length_penalty": null,
32
+ "suppress_tokens": null,
33
+ "begin_suppress_tokens": null,
34
+ "sequence_bias": null,
35
+ "token_healing": null,
36
+ "guidance_scale": null,
37
+ "watermarking_config": null,
38
+ "num_return_sequences": null,
39
+ "output_attentions": false,
40
+ "output_hidden_states": false,
41
+ "output_scores": null,
42
+ "output_logits": null,
43
+ "return_dict_in_generate": null,
44
+ "pad_token_id": null,
45
+ "bos_token_id": null,
46
+ "eos_token_id": null,
47
+ "encoder_no_repeat_ngram_size": null,
48
+ "decoder_start_token_id": null,
49
+ "is_assistant": null,
50
+ "num_assistant_tokens": null,
51
+ "num_assistant_tokens_schedule": null,
52
+ "assistant_confidence_threshold": null,
53
+ "prompt_lookup_num_tokens": null,
54
+ "max_matching_ngram_size": null,
55
+ "assistant_early_exit": null,
56
+ "assistant_lookbehind": null,
57
+ "target_lookbehind": null,
58
+ "compile_config": null,
59
+ "disable_compile": null,
60
+ "continuous_batching_config": null,
61
+ "low_memory": null,
62
+ "penalty_alpha": null,
63
+ "dola_layers": null,
64
+ "diversity_penalty": null,
65
+ "num_beam_groups": null,
66
+ "constraints": null,
67
+ "force_words_ids": null,
68
+ "prefill_chunk_size": null,
69
+ "_from_model_config": true,
70
+ "transformers_version": "5.5.3"
71
+ }
hf_quant_config.json ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config_groups": {
3
+ "group_0": {
4
+ "input_activations": {
5
+ "dynamic": false,
6
+ "num_bits": 4,
7
+ "type": "float",
8
+ "group_size": 16
9
+ },
10
+ "weights": {
11
+ "dynamic": false,
12
+ "num_bits": 4,
13
+ "type": "float",
14
+ "group_size": 16
15
+ },
16
+ "targets": [
17
+ "Linear"
18
+ ]
19
+ }
20
+ },
21
+ "ignore": [
22
+ "lm_head",
23
+ "model.layers.0.self_attn*",
24
+ "model.layers.1.self_attn*",
25
+ "model.layers.10.self_attn*",
26
+ "model.layers.11.self_attn*",
27
+ "model.layers.12.self_attn*",
28
+ "model.layers.13.self_attn*",
29
+ "model.layers.14.self_attn*",
30
+ "model.layers.15.self_attn*",
31
+ "model.layers.16.self_attn*",
32
+ "model.layers.17.self_attn*",
33
+ "model.layers.18.self_attn*",
34
+ "model.layers.19.self_attn*",
35
+ "model.layers.2.self_attn*",
36
+ "model.layers.20.self_attn*",
37
+ "model.layers.21.self_attn*",
38
+ "model.layers.22.self_attn*",
39
+ "model.layers.23.self_attn*",
40
+ "model.layers.24.self_attn*",
41
+ "model.layers.25.self_attn*",
42
+ "model.layers.26.self_attn*",
43
+ "model.layers.27.self_attn*",
44
+ "model.layers.28.self_attn*",
45
+ "model.layers.29.self_attn*",
46
+ "model.layers.3.self_attn*",
47
+ "model.layers.30.self_attn*",
48
+ "model.layers.31.self_attn*",
49
+ "model.layers.32.self_attn*",
50
+ "model.layers.33.self_attn*",
51
+ "model.layers.34.self_attn*",
52
+ "model.layers.35.self_attn*",
53
+ "model.layers.36.self_attn*",
54
+ "model.layers.37.self_attn*",
55
+ "model.layers.38.self_attn*",
56
+ "model.layers.39.self_attn*",
57
+ "model.layers.4.self_attn*",
58
+ "model.layers.40.self_attn*",
59
+ "model.layers.41.self_attn*",
60
+ "model.layers.42.self_attn*",
61
+ "model.layers.43.self_attn*",
62
+ "model.layers.44.self_attn*",
63
+ "model.layers.45.self_attn*",
64
+ "model.layers.46.self_attn*",
65
+ "model.layers.47.self_attn*",
66
+ "model.layers.48.self_attn*",
67
+ "model.layers.49.self_attn*",
68
+ "model.layers.5.self_attn*",
69
+ "model.layers.50.self_attn*",
70
+ "model.layers.51.self_attn*",
71
+ "model.layers.52.self_attn*",
72
+ "model.layers.53.self_attn*",
73
+ "model.layers.54.self_attn*",
74
+ "model.layers.55.self_attn*",
75
+ "model.layers.56.self_attn*",
76
+ "model.layers.57.self_attn*",
77
+ "model.layers.58.self_attn*",
78
+ "model.layers.59.self_attn*",
79
+ "model.layers.6.self_attn*",
80
+ "model.layers.60.self_attn*",
81
+ "model.layers.61.self_attn*",
82
+ "model.layers.62.self_attn*",
83
+ "model.layers.63.self_attn*",
84
+ "model.layers.64.self_attn*",
85
+ "model.layers.65.self_attn*",
86
+ "model.layers.66.self_attn*",
87
+ "model.layers.67.self_attn*",
88
+ "model.layers.68.self_attn*",
89
+ "model.layers.69.self_attn*",
90
+ "model.layers.7.self_attn*",
91
+ "model.layers.8.self_attn*",
92
+ "model.layers.9.self_attn*"
93
+ ],
94
+ "quant_algo": "NVFP4",
95
+ "kv_cache_scheme": {
96
+ "dynamic": false,
97
+ "num_bits": 8,
98
+ "type": "float"
99
+ },
100
+ "producer": {
101
+ "name": "modelopt",
102
+ "version": "0.43.0"
103
+ },
104
+ "quant_method": "modelopt"
105
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c4970464e06a3bd0662be9123dd9be40c680203fd36969502bffdb235c00a29
3
+ size 5372359096
model-00002-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f34206b252c5ed654271fa74691a72e777ade087c2f6c3833bae7e510c23a079
3
+ size 5373968156
model-00003-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b02eca5040df2b5fca1c2e09afa91515bde40fa2a2827200751661304edf1703
3
+ size 5372396308
model-00004-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c5a9ff54424b07459cdde92756def9778e928a8d2acb423c1a411502aca8327
3
+ size 5371611128
model-00005-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a45f6e9c18f5087dba535c531add73ddcb1fc21913fd55a48407fbc51a174b84
3
+ size 5638234468
model-00006-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:096056bd2725482f1e9eb189e147276cbfe730f8e3acf6ab476fc874d40e41d9
3
+ size 5374745024
model-00007-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:350f0af0896ad9484d82ca3950f4592fec50d363affc9a5d78a2faa76c889ff3
3
+ size 5371610384
model-00008-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5417e4e068e885158fcb810e79c82509ef506fc76837f7511cfcbec30f76db7c
3
+ size 5372396748
model-00009-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b62eee84b0e3bd5313e16bd073ba0b1aebf95ad52b15b7a5b5aa00fd04461860
3
+ size 5371612240
model-00010-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05f86bf2d491d5a3be8e362c0fe3b246ad8cfcdc2f75cefd5840fc087fe150ce
3
+ size 5371609480
model-00011-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b203c6ec56ff811803d494b865445271b70ce2e2d4f7a81a365d334c2438ef7
3
+ size 5372396204
model-00012-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26a7df42d1b89cfa00f2984f1eb23574c0bd08e61083791c2ced2b25f23fd9fc
3
+ size 5371610912
model-00013-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65d59015f1da205b2582259300a9ed6482203c1f6d8bb4ce6742321b90441030
3
+ size 5372396868
model-00014-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c8e97c8ea3fa08d6e4498e858a8e98247959ddb46acedbe745485e1a1eae5cb
3
+ size 5371613104
model-00015-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3d474544051f058cf1a0046b3a8faf8d408385d78883bd991378cd7902fea70
3
+ size 5371609624
model-00016-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f924e2da8ef3116ab9329b0f7ea90ad606713c5a349c5f8b17528c2367cb2843
3
+ size 5372398804
model-00017-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:328389c037689adc8713a0d09aaba1f35744d2ad2148e9d9e301e0a3881aeafe
3
+ size 5371613704
model-00018-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f75e833403c58438576beca925ff554ab8c433add4f38d8b1b49539cb45dfab
3
+ size 5371611752
model-00019-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2332e6ff521419e052a0376efc8bd39c2dea3b0a9506d8ca3620d596eafaa3ec
3
+ size 5372398196
model-00020-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc3edcce8a24a50e1fd56ad50f5edcd041f5b06cbcbdac28d8de78dc6dd646b6
3
+ size 5371612544
model-00021-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec7a73ac5d33673685fd5c642cfde3893493c68346379cc90c111e7428ba95ab
3
+ size 5372399132
model-00022-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e1c1bc2ec1acf291cb90f9e68be14741f40b461c0e312094a5c5c3716b5de18
3
+ size 5371614568
model-00023-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e894b80fe1ea1e4e277491dde8ebee4ffe8e45b77695b0c1af1d7aeda890d901
3
+ size 5371611640
model-00024-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:598668fd790eba120f72593c77c3cf7a1fccc3408006bb84c1597dcbd6a2d0e7
3
+ size 5372398588
model-00025-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed9b7940e0af6211760d4ca7f2b4298f903322aa219a25011e2c2b6b544cc329
3
+ size 5371613184
model-00026-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a2b030b26af2aff7d47cc3b1f0f7f77e69c022f12c7eb8cb2fcf974ddf8986c
3
+ size 5638236724
model-00027-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bd641556c051f821a1994a6f16bcb5f5b2adf244a567dd3ef87f92dfe84fc40
3
+ size 5374747216
model-00028-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d737686cfd44c39bcdba0e45ab5b17797373b963c03bc6d161d35bc1c72c509
3
+ size 5371612440
model-00029-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7ff1b239c32188338288bd584fa80178ed072e99d37ddac7bbb2c72dab76138
3
+ size 5372399028
model-00030-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49feb328765f4e30a74cc7cf58b5060a50ba7ba731801e56ce6273c8783589bf
3
+ size 5371614288
model-00031-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4fd7d0b310f167cf3b6c2792d8f4d1ffab6f1781c7b50fa24fd1a1aba723bd2
3
+ size 5371611896
model-00032-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0714d1153e3073eab02d87b77707fdcc0bd1ec8642e585a8530ca189d40e6a7b
3
+ size 5372398484
model-00033-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11cd1f1430541f78a7fc587470d27261407a779774d4a44863a003224b831df0
3
+ size 5371612968
model-00034-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5e267e67e456edb3213cd53161d48cdd1d06d03da45249c8af3a617617ccfcb
3
+ size 5372399148
model-00035-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38534ab2ed26c8bb329b61e7ab08e763f57d1f7f2790c693ab25aadb276fcad9
3
+ size 5371615152
model-00036-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f0794a77c14b5e19133c2c7363fcc3a557bcc731cc7b9a32408f29d5a69026f
3
+ size 5371611856
model-00037-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dbc93fa2412525e3dec0fc53c5e596065c422fc79cd4a20637e55d9eacf88f9
3
+ size 5372398804
model-00038-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dd0f19dc07af3a6e17aac41f32b69f5ffff2a3b6e438caeb7ae9f59db8223d7
3
+ size 5371613704
model-00039-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ad5474b977a9b6ccc2c8ef188db939b8d70f8f40f0daab0d9b96d1663f98eab
3
+ size 5371611752
model-00040-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9df3c348f83714ca1b9b44818e3d858792ff81e904f0e0d8f06708f07064744a
3
+ size 5372398196
model-00041-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f0250760de8c81ccef71c1857091da01f977dcc904b4512cae84914561edfef
3
+ size 5371612544
model-00042-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fc6cc10e87788ea0df8c7443b79e8800186443caab030b9e2b52a9f91ba0677
3
+ size 5372399132
model-00043-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:945beb91cdb5d9fa7f3f2562ee372cceb56272817c5f3a182a055444a9fa530e
3
+ size 5371614568
model-00044-of-00112.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0db2ef84b694c47dd2d647ab236346401488660281674ad70a163fadeec18aad
3
+ size 5371612008