xinhe commited on
Commit
c626afb
·
verified ·
1 Parent(s): c0a8af2

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. config.json +398 -454
  2. model-00001-of-00098.safetensors +3 -0
  3. model-00002-of-00098.safetensors +3 -0
  4. model-00003-of-00098.safetensors +3 -0
  5. model-00004-of-00098.safetensors +3 -0
  6. model-00005-of-00098.safetensors +3 -0
  7. model-00006-of-00098.safetensors +3 -0
  8. model-00007-of-00098.safetensors +3 -0
  9. model-00008-of-00098.safetensors +3 -0
  10. model-00009-of-00098.safetensors +3 -0
  11. model-00010-of-00098.safetensors +3 -0
  12. model-00011-of-00098.safetensors +3 -0
  13. model-00012-of-00098.safetensors +3 -0
  14. model-00013-of-00098.safetensors +3 -0
  15. model-00014-of-00098.safetensors +3 -0
  16. model-00015-of-00098.safetensors +3 -0
  17. model-00016-of-00098.safetensors +3 -0
  18. model-00017-of-00098.safetensors +3 -0
  19. model-00018-of-00098.safetensors +3 -0
  20. model-00019-of-00098.safetensors +3 -0
  21. model-00020-of-00098.safetensors +3 -0
  22. model-00021-of-00098.safetensors +3 -0
  23. model-00022-of-00098.safetensors +3 -0
  24. model-00023-of-00098.safetensors +3 -0
  25. model-00024-of-00098.safetensors +3 -0
  26. model-00025-of-00098.safetensors +3 -0
  27. model-00026-of-00098.safetensors +3 -0
  28. model-00027-of-00098.safetensors +3 -0
  29. model-00028-of-00098.safetensors +3 -0
  30. model-00029-of-00098.safetensors +3 -0
  31. model-00030-of-00098.safetensors +3 -0
  32. model-00031-of-00098.safetensors +3 -0
  33. model-00032-of-00098.safetensors +3 -0
  34. model-00033-of-00098.safetensors +3 -0
  35. model-00034-of-00098.safetensors +3 -0
  36. model-00035-of-00098.safetensors +3 -0
  37. model-00036-of-00098.safetensors +3 -0
  38. model-00037-of-00098.safetensors +3 -0
  39. model-00038-of-00098.safetensors +3 -0
  40. model-00039-of-00098.safetensors +3 -0
  41. model-00040-of-00098.safetensors +3 -0
  42. model-00041-of-00098.safetensors +3 -0
  43. model-00042-of-00098.safetensors +3 -0
  44. model-00043-of-00098.safetensors +3 -0
  45. model-00044-of-00098.safetensors +3 -0
  46. model-00045-of-00098.safetensors +3 -0
  47. model-00046-of-00098.safetensors +3 -0
  48. model-00047-of-00098.safetensors +3 -0
  49. model-00048-of-00098.safetensors +3 -0
  50. model-00049-of-00098.safetensors +3 -0
config.json CHANGED
@@ -159,76 +159,26 @@
159
  "bits": 16,
160
  "data_type": "float"
161
  },
162
- ".*layers\\.46.*": {
163
- "bits": 8
164
- },
165
- ".*layers\\.47.*": {
166
- "bits": 8
167
- },
168
- ".*mlp.*": {
169
- "bits": 8
170
- },
171
  ".*moe\\.gate.*": {
172
  "bits": 16,
173
  "data_type": "float"
174
  },
175
- ".*self_attn.*": {
176
- "bits": 8
 
177
  },
178
  ".*shared_head.*": {
179
  "bits": 16,
180
  "data_type": "float"
181
  },
182
- "model.layers.0.mlp.down_proj": {
183
- "bits": 8
184
- },
185
- "model.layers.0.mlp.gate_proj": {
186
- "bits": 8
187
- },
188
- "model.layers.0.mlp.up_proj": {
189
- "bits": 8
190
- },
191
  "model.layers.0.self_attn.g_proj": {
192
  "bits": 16,
193
  "data_type": "float"
194
  },
195
- "model.layers.0.self_attn.k_proj": {
196
- "bits": 8
197
- },
198
- "model.layers.0.self_attn.o_proj": {
199
- "bits": 8
200
- },
201
- "model.layers.0.self_attn.q_proj": {
202
- "bits": 8
203
- },
204
- "model.layers.0.self_attn.v_proj": {
205
- "bits": 8
206
- },
207
- "model.layers.1.mlp.down_proj": {
208
- "bits": 8
209
- },
210
- "model.layers.1.mlp.gate_proj": {
211
- "bits": 8
212
- },
213
- "model.layers.1.mlp.up_proj": {
214
- "bits": 8
215
- },
216
  "model.layers.1.self_attn.g_proj": {
217
  "bits": 16,
218
  "data_type": "float"
219
  },
220
- "model.layers.1.self_attn.k_proj": {
221
- "bits": 8
222
- },
223
- "model.layers.1.self_attn.o_proj": {
224
- "bits": 8
225
- },
226
- "model.layers.1.self_attn.q_proj": {
227
- "bits": 8
228
- },
229
- "model.layers.1.self_attn.v_proj": {
230
- "bits": 8
231
- },
232
  "model.layers.10.moe.gate": {
233
  "bits": 16,
234
  "data_type": "float"
@@ -237,17 +187,17 @@
237
  "bits": 16,
238
  "data_type": "float"
239
  },
240
- "model.layers.10.self_attn.k_proj": {
241
- "bits": 8
242
- },
243
- "model.layers.10.self_attn.o_proj": {
244
- "bits": 8
245
  },
246
- "model.layers.10.self_attn.q_proj": {
247
- "bits": 8
 
248
  },
249
- "model.layers.10.self_attn.v_proj": {
250
- "bits": 8
 
251
  },
252
  "model.layers.11.moe.gate": {
253
  "bits": 16,
@@ -257,17 +207,17 @@
257
  "bits": 16,
258
  "data_type": "float"
259
  },
260
- "model.layers.11.self_attn.k_proj": {
261
- "bits": 8
262
- },
263
- "model.layers.11.self_attn.o_proj": {
264
- "bits": 8
265
  },
266
- "model.layers.11.self_attn.q_proj": {
267
- "bits": 8
 
268
  },
269
- "model.layers.11.self_attn.v_proj": {
270
- "bits": 8
 
271
  },
272
  "model.layers.12.moe.gate": {
273
  "bits": 16,
@@ -277,17 +227,17 @@
277
  "bits": 16,
278
  "data_type": "float"
279
  },
280
- "model.layers.12.self_attn.k_proj": {
281
- "bits": 8
282
- },
283
- "model.layers.12.self_attn.o_proj": {
284
- "bits": 8
285
  },
286
- "model.layers.12.self_attn.q_proj": {
287
- "bits": 8
 
288
  },
289
- "model.layers.12.self_attn.v_proj": {
290
- "bits": 8
 
291
  },
292
  "model.layers.13.moe.gate": {
293
  "bits": 16,
@@ -297,17 +247,17 @@
297
  "bits": 16,
298
  "data_type": "float"
299
  },
300
- "model.layers.13.self_attn.k_proj": {
301
- "bits": 8
302
- },
303
- "model.layers.13.self_attn.o_proj": {
304
- "bits": 8
305
  },
306
- "model.layers.13.self_attn.q_proj": {
307
- "bits": 8
 
308
  },
309
- "model.layers.13.self_attn.v_proj": {
310
- "bits": 8
 
311
  },
312
  "model.layers.14.moe.gate": {
313
  "bits": 16,
@@ -317,17 +267,17 @@
317
  "bits": 16,
318
  "data_type": "float"
319
  },
320
- "model.layers.14.self_attn.k_proj": {
321
- "bits": 8
322
- },
323
- "model.layers.14.self_attn.o_proj": {
324
- "bits": 8
325
  },
326
- "model.layers.14.self_attn.q_proj": {
327
- "bits": 8
 
328
  },
329
- "model.layers.14.self_attn.v_proj": {
330
- "bits": 8
 
331
  },
332
  "model.layers.15.moe.gate": {
333
  "bits": 16,
@@ -337,17 +287,17 @@
337
  "bits": 16,
338
  "data_type": "float"
339
  },
340
- "model.layers.15.self_attn.k_proj": {
341
- "bits": 8
342
- },
343
- "model.layers.15.self_attn.o_proj": {
344
- "bits": 8
345
  },
346
- "model.layers.15.self_attn.q_proj": {
347
- "bits": 8
 
348
  },
349
- "model.layers.15.self_attn.v_proj": {
350
- "bits": 8
 
351
  },
352
  "model.layers.16.moe.gate": {
353
  "bits": 16,
@@ -357,17 +307,17 @@
357
  "bits": 16,
358
  "data_type": "float"
359
  },
360
- "model.layers.16.self_attn.k_proj": {
361
- "bits": 8
362
- },
363
- "model.layers.16.self_attn.o_proj": {
364
- "bits": 8
365
  },
366
- "model.layers.16.self_attn.q_proj": {
367
- "bits": 8
 
368
  },
369
- "model.layers.16.self_attn.v_proj": {
370
- "bits": 8
 
371
  },
372
  "model.layers.17.moe.gate": {
373
  "bits": 16,
@@ -377,17 +327,17 @@
377
  "bits": 16,
378
  "data_type": "float"
379
  },
380
- "model.layers.17.self_attn.k_proj": {
381
- "bits": 8
382
- },
383
- "model.layers.17.self_attn.o_proj": {
384
- "bits": 8
385
  },
386
- "model.layers.17.self_attn.q_proj": {
387
- "bits": 8
 
388
  },
389
- "model.layers.17.self_attn.v_proj": {
390
- "bits": 8
 
391
  },
392
  "model.layers.18.moe.gate": {
393
  "bits": 16,
@@ -397,17 +347,17 @@
397
  "bits": 16,
398
  "data_type": "float"
399
  },
400
- "model.layers.18.self_attn.k_proj": {
401
- "bits": 8
402
- },
403
- "model.layers.18.self_attn.o_proj": {
404
- "bits": 8
405
  },
406
- "model.layers.18.self_attn.q_proj": {
407
- "bits": 8
 
408
  },
409
- "model.layers.18.self_attn.v_proj": {
410
- "bits": 8
 
411
  },
412
  "model.layers.19.moe.gate": {
413
  "bits": 16,
@@ -417,43 +367,22 @@
417
  "bits": 16,
418
  "data_type": "float"
419
  },
420
- "model.layers.19.self_attn.k_proj": {
421
- "bits": 8
422
- },
423
- "model.layers.19.self_attn.o_proj": {
424
- "bits": 8
425
- },
426
- "model.layers.19.self_attn.q_proj": {
427
- "bits": 8
428
- },
429
- "model.layers.19.self_attn.v_proj": {
430
- "bits": 8
431
- },
432
- "model.layers.2.mlp.down_proj": {
433
- "bits": 8
434
  },
435
- "model.layers.2.mlp.gate_proj": {
436
- "bits": 8
 
437
  },
438
- "model.layers.2.mlp.up_proj": {
439
- "bits": 8
 
440
  },
441
  "model.layers.2.self_attn.g_proj": {
442
  "bits": 16,
443
  "data_type": "float"
444
  },
445
- "model.layers.2.self_attn.k_proj": {
446
- "bits": 8
447
- },
448
- "model.layers.2.self_attn.o_proj": {
449
- "bits": 8
450
- },
451
- "model.layers.2.self_attn.q_proj": {
452
- "bits": 8
453
- },
454
- "model.layers.2.self_attn.v_proj": {
455
- "bits": 8
456
- },
457
  "model.layers.20.moe.gate": {
458
  "bits": 16,
459
  "data_type": "float"
@@ -462,17 +391,17 @@
462
  "bits": 16,
463
  "data_type": "float"
464
  },
465
- "model.layers.20.self_attn.k_proj": {
466
- "bits": 8
467
- },
468
- "model.layers.20.self_attn.o_proj": {
469
- "bits": 8
470
  },
471
- "model.layers.20.self_attn.q_proj": {
472
- "bits": 8
 
473
  },
474
- "model.layers.20.self_attn.v_proj": {
475
- "bits": 8
 
476
  },
477
  "model.layers.21.moe.gate": {
478
  "bits": 16,
@@ -482,17 +411,17 @@
482
  "bits": 16,
483
  "data_type": "float"
484
  },
485
- "model.layers.21.self_attn.k_proj": {
486
- "bits": 8
487
- },
488
- "model.layers.21.self_attn.o_proj": {
489
- "bits": 8
490
  },
491
- "model.layers.21.self_attn.q_proj": {
492
- "bits": 8
 
493
  },
494
- "model.layers.21.self_attn.v_proj": {
495
- "bits": 8
 
496
  },
497
  "model.layers.22.moe.gate": {
498
  "bits": 16,
@@ -502,17 +431,17 @@
502
  "bits": 16,
503
  "data_type": "float"
504
  },
505
- "model.layers.22.self_attn.k_proj": {
506
- "bits": 8
507
- },
508
- "model.layers.22.self_attn.o_proj": {
509
- "bits": 8
510
  },
511
- "model.layers.22.self_attn.q_proj": {
512
- "bits": 8
 
513
  },
514
- "model.layers.22.self_attn.v_proj": {
515
- "bits": 8
 
516
  },
517
  "model.layers.23.moe.gate": {
518
  "bits": 16,
@@ -522,17 +451,17 @@
522
  "bits": 16,
523
  "data_type": "float"
524
  },
525
- "model.layers.23.self_attn.k_proj": {
526
- "bits": 8
527
- },
528
- "model.layers.23.self_attn.o_proj": {
529
- "bits": 8
530
  },
531
- "model.layers.23.self_attn.q_proj": {
532
- "bits": 8
 
533
  },
534
- "model.layers.23.self_attn.v_proj": {
535
- "bits": 8
 
536
  },
537
  "model.layers.24.moe.gate": {
538
  "bits": 16,
@@ -542,17 +471,17 @@
542
  "bits": 16,
543
  "data_type": "float"
544
  },
545
- "model.layers.24.self_attn.k_proj": {
546
- "bits": 8
547
- },
548
- "model.layers.24.self_attn.o_proj": {
549
- "bits": 8
550
  },
551
- "model.layers.24.self_attn.q_proj": {
552
- "bits": 8
 
553
  },
554
- "model.layers.24.self_attn.v_proj": {
555
- "bits": 8
 
556
  },
557
  "model.layers.25.moe.gate": {
558
  "bits": 16,
@@ -562,17 +491,17 @@
562
  "bits": 16,
563
  "data_type": "float"
564
  },
565
- "model.layers.25.self_attn.k_proj": {
566
- "bits": 8
567
- },
568
- "model.layers.25.self_attn.o_proj": {
569
- "bits": 8
570
  },
571
- "model.layers.25.self_attn.q_proj": {
572
- "bits": 8
 
573
  },
574
- "model.layers.25.self_attn.v_proj": {
575
- "bits": 8
 
576
  },
577
  "model.layers.26.moe.gate": {
578
  "bits": 16,
@@ -582,17 +511,17 @@
582
  "bits": 16,
583
  "data_type": "float"
584
  },
585
- "model.layers.26.self_attn.k_proj": {
586
- "bits": 8
587
- },
588
- "model.layers.26.self_attn.o_proj": {
589
- "bits": 8
590
  },
591
- "model.layers.26.self_attn.q_proj": {
592
- "bits": 8
 
593
  },
594
- "model.layers.26.self_attn.v_proj": {
595
- "bits": 8
 
596
  },
597
  "model.layers.27.moe.gate": {
598
  "bits": 16,
@@ -602,17 +531,17 @@
602
  "bits": 16,
603
  "data_type": "float"
604
  },
605
- "model.layers.27.self_attn.k_proj": {
606
- "bits": 8
607
- },
608
- "model.layers.27.self_attn.o_proj": {
609
- "bits": 8
610
  },
611
- "model.layers.27.self_attn.q_proj": {
612
- "bits": 8
 
613
  },
614
- "model.layers.27.self_attn.v_proj": {
615
- "bits": 8
 
616
  },
617
  "model.layers.28.moe.gate": {
618
  "bits": 16,
@@ -622,17 +551,17 @@
622
  "bits": 16,
623
  "data_type": "float"
624
  },
625
- "model.layers.28.self_attn.k_proj": {
626
- "bits": 8
627
- },
628
- "model.layers.28.self_attn.o_proj": {
629
- "bits": 8
630
  },
631
- "model.layers.28.self_attn.q_proj": {
632
- "bits": 8
 
633
  },
634
- "model.layers.28.self_attn.v_proj": {
635
- "bits": 8
 
636
  },
637
  "model.layers.29.moe.gate": {
638
  "bits": 16,
@@ -642,17 +571,17 @@
642
  "bits": 16,
643
  "data_type": "float"
644
  },
645
- "model.layers.29.self_attn.k_proj": {
646
- "bits": 8
647
- },
648
- "model.layers.29.self_attn.o_proj": {
649
- "bits": 8
650
  },
651
- "model.layers.29.self_attn.q_proj": {
652
- "bits": 8
 
653
  },
654
- "model.layers.29.self_attn.v_proj": {
655
- "bits": 8
 
656
  },
657
  "model.layers.3.moe.gate": {
658
  "bits": 16,
@@ -662,17 +591,17 @@
662
  "bits": 16,
663
  "data_type": "float"
664
  },
665
- "model.layers.3.self_attn.k_proj": {
666
- "bits": 8
667
- },
668
- "model.layers.3.self_attn.o_proj": {
669
- "bits": 8
670
  },
671
- "model.layers.3.self_attn.q_proj": {
672
- "bits": 8
 
673
  },
674
- "model.layers.3.self_attn.v_proj": {
675
- "bits": 8
 
676
  },
677
  "model.layers.30.moe.gate": {
678
  "bits": 16,
@@ -682,17 +611,17 @@
682
  "bits": 16,
683
  "data_type": "float"
684
  },
685
- "model.layers.30.self_attn.k_proj": {
686
- "bits": 8
687
- },
688
- "model.layers.30.self_attn.o_proj": {
689
- "bits": 8
690
  },
691
- "model.layers.30.self_attn.q_proj": {
692
- "bits": 8
 
693
  },
694
- "model.layers.30.self_attn.v_proj": {
695
- "bits": 8
 
696
  },
697
  "model.layers.31.moe.gate": {
698
  "bits": 16,
@@ -702,17 +631,17 @@
702
  "bits": 16,
703
  "data_type": "float"
704
  },
705
- "model.layers.31.self_attn.k_proj": {
706
- "bits": 8
707
- },
708
- "model.layers.31.self_attn.o_proj": {
709
- "bits": 8
710
  },
711
- "model.layers.31.self_attn.q_proj": {
712
- "bits": 8
 
713
  },
714
- "model.layers.31.self_attn.v_proj": {
715
- "bits": 8
 
716
  },
717
  "model.layers.32.moe.gate": {
718
  "bits": 16,
@@ -722,17 +651,17 @@
722
  "bits": 16,
723
  "data_type": "float"
724
  },
725
- "model.layers.32.self_attn.k_proj": {
726
- "bits": 8
727
- },
728
- "model.layers.32.self_attn.o_proj": {
729
- "bits": 8
730
  },
731
- "model.layers.32.self_attn.q_proj": {
732
- "bits": 8
 
733
  },
734
- "model.layers.32.self_attn.v_proj": {
735
- "bits": 8
 
736
  },
737
  "model.layers.33.moe.gate": {
738
  "bits": 16,
@@ -742,17 +671,17 @@
742
  "bits": 16,
743
  "data_type": "float"
744
  },
745
- "model.layers.33.self_attn.k_proj": {
746
- "bits": 8
747
- },
748
- "model.layers.33.self_attn.o_proj": {
749
- "bits": 8
750
  },
751
- "model.layers.33.self_attn.q_proj": {
752
- "bits": 8
 
753
  },
754
- "model.layers.33.self_attn.v_proj": {
755
- "bits": 8
 
756
  },
757
  "model.layers.34.moe.gate": {
758
  "bits": 16,
@@ -762,17 +691,17 @@
762
  "bits": 16,
763
  "data_type": "float"
764
  },
765
- "model.layers.34.self_attn.k_proj": {
766
- "bits": 8
767
- },
768
- "model.layers.34.self_attn.o_proj": {
769
- "bits": 8
770
  },
771
- "model.layers.34.self_attn.q_proj": {
772
- "bits": 8
 
773
  },
774
- "model.layers.34.self_attn.v_proj": {
775
- "bits": 8
 
776
  },
777
  "model.layers.35.moe.gate": {
778
  "bits": 16,
@@ -782,17 +711,17 @@
782
  "bits": 16,
783
  "data_type": "float"
784
  },
785
- "model.layers.35.self_attn.k_proj": {
786
- "bits": 8
787
- },
788
- "model.layers.35.self_attn.o_proj": {
789
- "bits": 8
790
  },
791
- "model.layers.35.self_attn.q_proj": {
792
- "bits": 8
 
793
  },
794
- "model.layers.35.self_attn.v_proj": {
795
- "bits": 8
 
796
  },
797
  "model.layers.36.moe.gate": {
798
  "bits": 16,
@@ -802,17 +731,17 @@
802
  "bits": 16,
803
  "data_type": "float"
804
  },
805
- "model.layers.36.self_attn.k_proj": {
806
- "bits": 8
807
- },
808
- "model.layers.36.self_attn.o_proj": {
809
- "bits": 8
810
  },
811
- "model.layers.36.self_attn.q_proj": {
812
- "bits": 8
 
813
  },
814
- "model.layers.36.self_attn.v_proj": {
815
- "bits": 8
 
816
  },
817
  "model.layers.37.moe.gate": {
818
  "bits": 16,
@@ -822,17 +751,17 @@
822
  "bits": 16,
823
  "data_type": "float"
824
  },
825
- "model.layers.37.self_attn.k_proj": {
826
- "bits": 8
827
- },
828
- "model.layers.37.self_attn.o_proj": {
829
- "bits": 8
830
  },
831
- "model.layers.37.self_attn.q_proj": {
832
- "bits": 8
 
833
  },
834
- "model.layers.37.self_attn.v_proj": {
835
- "bits": 8
 
836
  },
837
  "model.layers.38.moe.gate": {
838
  "bits": 16,
@@ -842,17 +771,17 @@
842
  "bits": 16,
843
  "data_type": "float"
844
  },
845
- "model.layers.38.self_attn.k_proj": {
846
- "bits": 8
847
- },
848
- "model.layers.38.self_attn.o_proj": {
849
- "bits": 8
850
  },
851
- "model.layers.38.self_attn.q_proj": {
852
- "bits": 8
 
853
  },
854
- "model.layers.38.self_attn.v_proj": {
855
- "bits": 8
 
856
  },
857
  "model.layers.39.moe.gate": {
858
  "bits": 16,
@@ -862,17 +791,17 @@
862
  "bits": 16,
863
  "data_type": "float"
864
  },
865
- "model.layers.39.self_attn.k_proj": {
866
- "bits": 8
867
- },
868
- "model.layers.39.self_attn.o_proj": {
869
- "bits": 8
870
  },
871
- "model.layers.39.self_attn.q_proj": {
872
- "bits": 8
 
873
  },
874
- "model.layers.39.self_attn.v_proj": {
875
- "bits": 8
 
876
  },
877
  "model.layers.4.moe.gate": {
878
  "bits": 16,
@@ -882,17 +811,17 @@
882
  "bits": 16,
883
  "data_type": "float"
884
  },
885
- "model.layers.4.self_attn.k_proj": {
886
- "bits": 8
887
- },
888
- "model.layers.4.self_attn.o_proj": {
889
- "bits": 8
890
  },
891
- "model.layers.4.self_attn.q_proj": {
892
- "bits": 8
 
893
  },
894
- "model.layers.4.self_attn.v_proj": {
895
- "bits": 8
 
896
  },
897
  "model.layers.40.moe.gate": {
898
  "bits": 16,
@@ -902,17 +831,17 @@
902
  "bits": 16,
903
  "data_type": "float"
904
  },
905
- "model.layers.40.self_attn.k_proj": {
906
- "bits": 8
907
- },
908
- "model.layers.40.self_attn.o_proj": {
909
- "bits": 8
910
  },
911
- "model.layers.40.self_attn.q_proj": {
912
- "bits": 8
 
913
  },
914
- "model.layers.40.self_attn.v_proj": {
915
- "bits": 8
 
916
  },
917
  "model.layers.41.moe.gate": {
918
  "bits": 16,
@@ -922,17 +851,17 @@
922
  "bits": 16,
923
  "data_type": "float"
924
  },
925
- "model.layers.41.self_attn.k_proj": {
926
- "bits": 8
927
- },
928
- "model.layers.41.self_attn.o_proj": {
929
- "bits": 8
930
  },
931
- "model.layers.41.self_attn.q_proj": {
932
- "bits": 8
 
933
  },
934
- "model.layers.41.self_attn.v_proj": {
935
- "bits": 8
 
936
  },
937
  "model.layers.42.moe.gate": {
938
  "bits": 16,
@@ -942,17 +871,17 @@
942
  "bits": 16,
943
  "data_type": "float"
944
  },
945
- "model.layers.42.self_attn.k_proj": {
946
- "bits": 8
947
- },
948
- "model.layers.42.self_attn.o_proj": {
949
- "bits": 8
950
  },
951
- "model.layers.42.self_attn.q_proj": {
952
- "bits": 8
 
953
  },
954
- "model.layers.42.self_attn.v_proj": {
955
- "bits": 8
 
956
  },
957
  "model.layers.43.moe.gate": {
958
  "bits": 16,
@@ -962,17 +891,17 @@
962
  "bits": 16,
963
  "data_type": "float"
964
  },
965
- "model.layers.43.self_attn.k_proj": {
966
- "bits": 8
967
- },
968
- "model.layers.43.self_attn.o_proj": {
969
- "bits": 8
970
  },
971
- "model.layers.43.self_attn.q_proj": {
972
- "bits": 8
 
973
  },
974
- "model.layers.43.self_attn.v_proj": {
975
- "bits": 8
 
976
  },
977
  "model.layers.44.moe.gate": {
978
  "bits": 16,
@@ -982,17 +911,17 @@
982
  "bits": 16,
983
  "data_type": "float"
984
  },
985
- "model.layers.44.self_attn.k_proj": {
986
- "bits": 8
987
- },
988
- "model.layers.44.self_attn.o_proj": {
989
- "bits": 8
990
  },
991
- "model.layers.44.self_attn.q_proj": {
992
- "bits": 8
 
993
  },
994
- "model.layers.44.self_attn.v_proj": {
995
- "bits": 8
 
996
  },
997
  "model.layers.5.moe.gate": {
998
  "bits": 16,
@@ -1002,17 +931,17 @@
1002
  "bits": 16,
1003
  "data_type": "float"
1004
  },
1005
- "model.layers.5.self_attn.k_proj": {
1006
- "bits": 8
1007
- },
1008
- "model.layers.5.self_attn.o_proj": {
1009
- "bits": 8
1010
  },
1011
- "model.layers.5.self_attn.q_proj": {
1012
- "bits": 8
 
1013
  },
1014
- "model.layers.5.self_attn.v_proj": {
1015
- "bits": 8
 
1016
  },
1017
  "model.layers.6.moe.gate": {
1018
  "bits": 16,
@@ -1022,17 +951,17 @@
1022
  "bits": 16,
1023
  "data_type": "float"
1024
  },
1025
- "model.layers.6.self_attn.k_proj": {
1026
- "bits": 8
1027
- },
1028
- "model.layers.6.self_attn.o_proj": {
1029
- "bits": 8
1030
  },
1031
- "model.layers.6.self_attn.q_proj": {
1032
- "bits": 8
 
1033
  },
1034
- "model.layers.6.self_attn.v_proj": {
1035
- "bits": 8
 
1036
  },
1037
  "model.layers.7.moe.gate": {
1038
  "bits": 16,
@@ -1042,17 +971,17 @@
1042
  "bits": 16,
1043
  "data_type": "float"
1044
  },
1045
- "model.layers.7.self_attn.k_proj": {
1046
- "bits": 8
1047
- },
1048
- "model.layers.7.self_attn.o_proj": {
1049
- "bits": 8
1050
  },
1051
- "model.layers.7.self_attn.q_proj": {
1052
- "bits": 8
 
1053
  },
1054
- "model.layers.7.self_attn.v_proj": {
1055
- "bits": 8
 
1056
  },
1057
  "model.layers.8.moe.gate": {
1058
  "bits": 16,
@@ -1062,17 +991,17 @@
1062
  "bits": 16,
1063
  "data_type": "float"
1064
  },
1065
- "model.layers.8.self_attn.k_proj": {
1066
- "bits": 8
1067
- },
1068
- "model.layers.8.self_attn.o_proj": {
1069
- "bits": 8
1070
  },
1071
- "model.layers.8.self_attn.q_proj": {
1072
- "bits": 8
 
1073
  },
1074
- "model.layers.8.self_attn.v_proj": {
1075
- "bits": 8
 
1076
  },
1077
  "model.layers.9.moe.gate": {
1078
  "bits": 16,
@@ -1082,17 +1011,17 @@
1082
  "bits": 16,
1083
  "data_type": "float"
1084
  },
1085
- "model.layers.9.self_attn.k_proj": {
1086
- "bits": 8
1087
- },
1088
- "model.layers.9.self_attn.o_proj": {
1089
- "bits": 8
1090
  },
1091
- "model.layers.9.self_attn.q_proj": {
1092
- "bits": 8
 
1093
  },
1094
- "model.layers.9.self_attn.v_proj": {
1095
- "bits": 8
 
1096
  },
1097
  "model.layers.45.eh_proj": {
1098
  "bits": 16,
@@ -1134,17 +1063,32 @@
1134
  "bits": 16,
1135
  "data_type": "fp"
1136
  },
 
 
 
 
 
 
 
 
1137
  "model.layers.46.transformer.shared_head.output": {
1138
  "bits": 16,
1139
  "data_type": "fp"
1140
  },
 
 
 
 
 
 
 
 
1141
  "model.layers.47.transformer.shared_head.output": {
1142
  "bits": 16,
1143
  "data_type": "fp"
1144
  }
1145
  },
1146
  "group_size": 128,
1147
- "iters": 0,
1148
  "packing_format": "auto_round:auto_gptq",
1149
  "quant_method": "auto-round",
1150
  "sym": true,
@@ -1332,4 +1276,4 @@
1332
  "full_attention"
1333
  ],
1334
  "zero_centered": true
1335
- }
 
159
  "bits": 16,
160
  "data_type": "float"
161
  },
 
 
 
 
 
 
 
 
 
162
  ".*moe\\.gate.*": {
163
  "bits": 16,
164
  "data_type": "float"
165
  },
166
+ ".*share_expert.*": {
167
+ "bits": 16,
168
+ "data_type": "float"
169
  },
170
  ".*shared_head.*": {
171
  "bits": 16,
172
  "data_type": "float"
173
  },
 
 
 
 
 
 
 
 
 
174
  "model.layers.0.self_attn.g_proj": {
175
  "bits": 16,
176
  "data_type": "float"
177
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  "model.layers.1.self_attn.g_proj": {
179
  "bits": 16,
180
  "data_type": "float"
181
  },
 
 
 
 
 
 
 
 
 
 
 
 
182
  "model.layers.10.moe.gate": {
183
  "bits": 16,
184
  "data_type": "float"
 
187
  "bits": 16,
188
  "data_type": "float"
189
  },
190
+ "model.layers.10.share_expert.down_proj": {
191
+ "bits": 16,
192
+ "data_type": "float"
 
 
193
  },
194
+ "model.layers.10.share_expert.gate_proj": {
195
+ "bits": 16,
196
+ "data_type": "float"
197
  },
198
+ "model.layers.10.share_expert.up_proj": {
199
+ "bits": 16,
200
+ "data_type": "float"
201
  },
202
  "model.layers.11.moe.gate": {
203
  "bits": 16,
 
207
  "bits": 16,
208
  "data_type": "float"
209
  },
210
+ "model.layers.11.share_expert.down_proj": {
211
+ "bits": 16,
212
+ "data_type": "float"
 
 
213
  },
214
+ "model.layers.11.share_expert.gate_proj": {
215
+ "bits": 16,
216
+ "data_type": "float"
217
  },
218
+ "model.layers.11.share_expert.up_proj": {
219
+ "bits": 16,
220
+ "data_type": "float"
221
  },
222
  "model.layers.12.moe.gate": {
223
  "bits": 16,
 
227
  "bits": 16,
228
  "data_type": "float"
229
  },
230
+ "model.layers.12.share_expert.down_proj": {
231
+ "bits": 16,
232
+ "data_type": "float"
 
 
233
  },
234
+ "model.layers.12.share_expert.gate_proj": {
235
+ "bits": 16,
236
+ "data_type": "float"
237
  },
238
+ "model.layers.12.share_expert.up_proj": {
239
+ "bits": 16,
240
+ "data_type": "float"
241
  },
242
  "model.layers.13.moe.gate": {
243
  "bits": 16,
 
247
  "bits": 16,
248
  "data_type": "float"
249
  },
250
+ "model.layers.13.share_expert.down_proj": {
251
+ "bits": 16,
252
+ "data_type": "float"
 
 
253
  },
254
+ "model.layers.13.share_expert.gate_proj": {
255
+ "bits": 16,
256
+ "data_type": "float"
257
  },
258
+ "model.layers.13.share_expert.up_proj": {
259
+ "bits": 16,
260
+ "data_type": "float"
261
  },
262
  "model.layers.14.moe.gate": {
263
  "bits": 16,
 
267
  "bits": 16,
268
  "data_type": "float"
269
  },
270
+ "model.layers.14.share_expert.down_proj": {
271
+ "bits": 16,
272
+ "data_type": "float"
 
 
273
  },
274
+ "model.layers.14.share_expert.gate_proj": {
275
+ "bits": 16,
276
+ "data_type": "float"
277
  },
278
+ "model.layers.14.share_expert.up_proj": {
279
+ "bits": 16,
280
+ "data_type": "float"
281
  },
282
  "model.layers.15.moe.gate": {
283
  "bits": 16,
 
287
  "bits": 16,
288
  "data_type": "float"
289
  },
290
+ "model.layers.15.share_expert.down_proj": {
291
+ "bits": 16,
292
+ "data_type": "float"
 
 
293
  },
294
+ "model.layers.15.share_expert.gate_proj": {
295
+ "bits": 16,
296
+ "data_type": "float"
297
  },
298
+ "model.layers.15.share_expert.up_proj": {
299
+ "bits": 16,
300
+ "data_type": "float"
301
  },
302
  "model.layers.16.moe.gate": {
303
  "bits": 16,
 
307
  "bits": 16,
308
  "data_type": "float"
309
  },
310
+ "model.layers.16.share_expert.down_proj": {
311
+ "bits": 16,
312
+ "data_type": "float"
 
 
313
  },
314
+ "model.layers.16.share_expert.gate_proj": {
315
+ "bits": 16,
316
+ "data_type": "float"
317
  },
318
+ "model.layers.16.share_expert.up_proj": {
319
+ "bits": 16,
320
+ "data_type": "float"
321
  },
322
  "model.layers.17.moe.gate": {
323
  "bits": 16,
 
327
  "bits": 16,
328
  "data_type": "float"
329
  },
330
+ "model.layers.17.share_expert.down_proj": {
331
+ "bits": 16,
332
+ "data_type": "float"
 
 
333
  },
334
+ "model.layers.17.share_expert.gate_proj": {
335
+ "bits": 16,
336
+ "data_type": "float"
337
  },
338
+ "model.layers.17.share_expert.up_proj": {
339
+ "bits": 16,
340
+ "data_type": "float"
341
  },
342
  "model.layers.18.moe.gate": {
343
  "bits": 16,
 
347
  "bits": 16,
348
  "data_type": "float"
349
  },
350
+ "model.layers.18.share_expert.down_proj": {
351
+ "bits": 16,
352
+ "data_type": "float"
 
 
353
  },
354
+ "model.layers.18.share_expert.gate_proj": {
355
+ "bits": 16,
356
+ "data_type": "float"
357
  },
358
+ "model.layers.18.share_expert.up_proj": {
359
+ "bits": 16,
360
+ "data_type": "float"
361
  },
362
  "model.layers.19.moe.gate": {
363
  "bits": 16,
 
367
  "bits": 16,
368
  "data_type": "float"
369
  },
370
+ "model.layers.19.share_expert.down_proj": {
371
+ "bits": 16,
372
+ "data_type": "float"
 
 
 
 
 
 
 
 
 
 
 
373
  },
374
+ "model.layers.19.share_expert.gate_proj": {
375
+ "bits": 16,
376
+ "data_type": "float"
377
  },
378
+ "model.layers.19.share_expert.up_proj": {
379
+ "bits": 16,
380
+ "data_type": "float"
381
  },
382
  "model.layers.2.self_attn.g_proj": {
383
  "bits": 16,
384
  "data_type": "float"
385
  },
 
 
 
 
 
 
 
 
 
 
 
 
386
  "model.layers.20.moe.gate": {
387
  "bits": 16,
388
  "data_type": "float"
 
391
  "bits": 16,
392
  "data_type": "float"
393
  },
394
+ "model.layers.20.share_expert.down_proj": {
395
+ "bits": 16,
396
+ "data_type": "float"
 
 
397
  },
398
+ "model.layers.20.share_expert.gate_proj": {
399
+ "bits": 16,
400
+ "data_type": "float"
401
  },
402
+ "model.layers.20.share_expert.up_proj": {
403
+ "bits": 16,
404
+ "data_type": "float"
405
  },
406
  "model.layers.21.moe.gate": {
407
  "bits": 16,
 
411
  "bits": 16,
412
  "data_type": "float"
413
  },
414
+ "model.layers.21.share_expert.down_proj": {
415
+ "bits": 16,
416
+ "data_type": "float"
 
 
417
  },
418
+ "model.layers.21.share_expert.gate_proj": {
419
+ "bits": 16,
420
+ "data_type": "float"
421
  },
422
+ "model.layers.21.share_expert.up_proj": {
423
+ "bits": 16,
424
+ "data_type": "float"
425
  },
426
  "model.layers.22.moe.gate": {
427
  "bits": 16,
 
431
  "bits": 16,
432
  "data_type": "float"
433
  },
434
+ "model.layers.22.share_expert.down_proj": {
435
+ "bits": 16,
436
+ "data_type": "float"
 
 
437
  },
438
+ "model.layers.22.share_expert.gate_proj": {
439
+ "bits": 16,
440
+ "data_type": "float"
441
  },
442
+ "model.layers.22.share_expert.up_proj": {
443
+ "bits": 16,
444
+ "data_type": "float"
445
  },
446
  "model.layers.23.moe.gate": {
447
  "bits": 16,
 
451
  "bits": 16,
452
  "data_type": "float"
453
  },
454
+ "model.layers.23.share_expert.down_proj": {
455
+ "bits": 16,
456
+ "data_type": "float"
 
 
457
  },
458
+ "model.layers.23.share_expert.gate_proj": {
459
+ "bits": 16,
460
+ "data_type": "float"
461
  },
462
+ "model.layers.23.share_expert.up_proj": {
463
+ "bits": 16,
464
+ "data_type": "float"
465
  },
466
  "model.layers.24.moe.gate": {
467
  "bits": 16,
 
471
  "bits": 16,
472
  "data_type": "float"
473
  },
474
+ "model.layers.24.share_expert.down_proj": {
475
+ "bits": 16,
476
+ "data_type": "float"
 
 
477
  },
478
+ "model.layers.24.share_expert.gate_proj": {
479
+ "bits": 16,
480
+ "data_type": "float"
481
  },
482
+ "model.layers.24.share_expert.up_proj": {
483
+ "bits": 16,
484
+ "data_type": "float"
485
  },
486
  "model.layers.25.moe.gate": {
487
  "bits": 16,
 
491
  "bits": 16,
492
  "data_type": "float"
493
  },
494
+ "model.layers.25.share_expert.down_proj": {
495
+ "bits": 16,
496
+ "data_type": "float"
 
 
497
  },
498
+ "model.layers.25.share_expert.gate_proj": {
499
+ "bits": 16,
500
+ "data_type": "float"
501
  },
502
+ "model.layers.25.share_expert.up_proj": {
503
+ "bits": 16,
504
+ "data_type": "float"
505
  },
506
  "model.layers.26.moe.gate": {
507
  "bits": 16,
 
511
  "bits": 16,
512
  "data_type": "float"
513
  },
514
+ "model.layers.26.share_expert.down_proj": {
515
+ "bits": 16,
516
+ "data_type": "float"
 
 
517
  },
518
+ "model.layers.26.share_expert.gate_proj": {
519
+ "bits": 16,
520
+ "data_type": "float"
521
  },
522
+ "model.layers.26.share_expert.up_proj": {
523
+ "bits": 16,
524
+ "data_type": "float"
525
  },
526
  "model.layers.27.moe.gate": {
527
  "bits": 16,
 
531
  "bits": 16,
532
  "data_type": "float"
533
  },
534
+ "model.layers.27.share_expert.down_proj": {
535
+ "bits": 16,
536
+ "data_type": "float"
 
 
537
  },
538
+ "model.layers.27.share_expert.gate_proj": {
539
+ "bits": 16,
540
+ "data_type": "float"
541
  },
542
+ "model.layers.27.share_expert.up_proj": {
543
+ "bits": 16,
544
+ "data_type": "float"
545
  },
546
  "model.layers.28.moe.gate": {
547
  "bits": 16,
 
551
  "bits": 16,
552
  "data_type": "float"
553
  },
554
+ "model.layers.28.share_expert.down_proj": {
555
+ "bits": 16,
556
+ "data_type": "float"
 
 
557
  },
558
+ "model.layers.28.share_expert.gate_proj": {
559
+ "bits": 16,
560
+ "data_type": "float"
561
  },
562
+ "model.layers.28.share_expert.up_proj": {
563
+ "bits": 16,
564
+ "data_type": "float"
565
  },
566
  "model.layers.29.moe.gate": {
567
  "bits": 16,
 
571
  "bits": 16,
572
  "data_type": "float"
573
  },
574
+ "model.layers.29.share_expert.down_proj": {
575
+ "bits": 16,
576
+ "data_type": "float"
 
 
577
  },
578
+ "model.layers.29.share_expert.gate_proj": {
579
+ "bits": 16,
580
+ "data_type": "float"
581
  },
582
+ "model.layers.29.share_expert.up_proj": {
583
+ "bits": 16,
584
+ "data_type": "float"
585
  },
586
  "model.layers.3.moe.gate": {
587
  "bits": 16,
 
591
  "bits": 16,
592
  "data_type": "float"
593
  },
594
+ "model.layers.3.share_expert.down_proj": {
595
+ "bits": 16,
596
+ "data_type": "float"
 
 
597
  },
598
+ "model.layers.3.share_expert.gate_proj": {
599
+ "bits": 16,
600
+ "data_type": "float"
601
  },
602
+ "model.layers.3.share_expert.up_proj": {
603
+ "bits": 16,
604
+ "data_type": "float"
605
  },
606
  "model.layers.30.moe.gate": {
607
  "bits": 16,
 
611
  "bits": 16,
612
  "data_type": "float"
613
  },
614
+ "model.layers.30.share_expert.down_proj": {
615
+ "bits": 16,
616
+ "data_type": "float"
 
 
617
  },
618
+ "model.layers.30.share_expert.gate_proj": {
619
+ "bits": 16,
620
+ "data_type": "float"
621
  },
622
+ "model.layers.30.share_expert.up_proj": {
623
+ "bits": 16,
624
+ "data_type": "float"
625
  },
626
  "model.layers.31.moe.gate": {
627
  "bits": 16,
 
631
  "bits": 16,
632
  "data_type": "float"
633
  },
634
+ "model.layers.31.share_expert.down_proj": {
635
+ "bits": 16,
636
+ "data_type": "float"
 
 
637
  },
638
+ "model.layers.31.share_expert.gate_proj": {
639
+ "bits": 16,
640
+ "data_type": "float"
641
  },
642
+ "model.layers.31.share_expert.up_proj": {
643
+ "bits": 16,
644
+ "data_type": "float"
645
  },
646
  "model.layers.32.moe.gate": {
647
  "bits": 16,
 
651
  "bits": 16,
652
  "data_type": "float"
653
  },
654
+ "model.layers.32.share_expert.down_proj": {
655
+ "bits": 16,
656
+ "data_type": "float"
 
 
657
  },
658
+ "model.layers.32.share_expert.gate_proj": {
659
+ "bits": 16,
660
+ "data_type": "float"
661
  },
662
+ "model.layers.32.share_expert.up_proj": {
663
+ "bits": 16,
664
+ "data_type": "float"
665
  },
666
  "model.layers.33.moe.gate": {
667
  "bits": 16,
 
671
  "bits": 16,
672
  "data_type": "float"
673
  },
674
+ "model.layers.33.share_expert.down_proj": {
675
+ "bits": 16,
676
+ "data_type": "float"
 
 
677
  },
678
+ "model.layers.33.share_expert.gate_proj": {
679
+ "bits": 16,
680
+ "data_type": "float"
681
  },
682
+ "model.layers.33.share_expert.up_proj": {
683
+ "bits": 16,
684
+ "data_type": "float"
685
  },
686
  "model.layers.34.moe.gate": {
687
  "bits": 16,
 
691
  "bits": 16,
692
  "data_type": "float"
693
  },
694
+ "model.layers.34.share_expert.down_proj": {
695
+ "bits": 16,
696
+ "data_type": "float"
 
 
697
  },
698
+ "model.layers.34.share_expert.gate_proj": {
699
+ "bits": 16,
700
+ "data_type": "float"
701
  },
702
+ "model.layers.34.share_expert.up_proj": {
703
+ "bits": 16,
704
+ "data_type": "float"
705
  },
706
  "model.layers.35.moe.gate": {
707
  "bits": 16,
 
711
  "bits": 16,
712
  "data_type": "float"
713
  },
714
+ "model.layers.35.share_expert.down_proj": {
715
+ "bits": 16,
716
+ "data_type": "float"
 
 
717
  },
718
+ "model.layers.35.share_expert.gate_proj": {
719
+ "bits": 16,
720
+ "data_type": "float"
721
  },
722
+ "model.layers.35.share_expert.up_proj": {
723
+ "bits": 16,
724
+ "data_type": "float"
725
  },
726
  "model.layers.36.moe.gate": {
727
  "bits": 16,
 
731
  "bits": 16,
732
  "data_type": "float"
733
  },
734
+ "model.layers.36.share_expert.down_proj": {
735
+ "bits": 16,
736
+ "data_type": "float"
 
 
737
  },
738
+ "model.layers.36.share_expert.gate_proj": {
739
+ "bits": 16,
740
+ "data_type": "float"
741
  },
742
+ "model.layers.36.share_expert.up_proj": {
743
+ "bits": 16,
744
+ "data_type": "float"
745
  },
746
  "model.layers.37.moe.gate": {
747
  "bits": 16,
 
751
  "bits": 16,
752
  "data_type": "float"
753
  },
754
+ "model.layers.37.share_expert.down_proj": {
755
+ "bits": 16,
756
+ "data_type": "float"
 
 
757
  },
758
+ "model.layers.37.share_expert.gate_proj": {
759
+ "bits": 16,
760
+ "data_type": "float"
761
  },
762
+ "model.layers.37.share_expert.up_proj": {
763
+ "bits": 16,
764
+ "data_type": "float"
765
  },
766
  "model.layers.38.moe.gate": {
767
  "bits": 16,
 
771
  "bits": 16,
772
  "data_type": "float"
773
  },
774
+ "model.layers.38.share_expert.down_proj": {
775
+ "bits": 16,
776
+ "data_type": "float"
 
 
777
  },
778
+ "model.layers.38.share_expert.gate_proj": {
779
+ "bits": 16,
780
+ "data_type": "float"
781
  },
782
+ "model.layers.38.share_expert.up_proj": {
783
+ "bits": 16,
784
+ "data_type": "float"
785
  },
786
  "model.layers.39.moe.gate": {
787
  "bits": 16,
 
791
  "bits": 16,
792
  "data_type": "float"
793
  },
794
+ "model.layers.39.share_expert.down_proj": {
795
+ "bits": 16,
796
+ "data_type": "float"
 
 
797
  },
798
+ "model.layers.39.share_expert.gate_proj": {
799
+ "bits": 16,
800
+ "data_type": "float"
801
  },
802
+ "model.layers.39.share_expert.up_proj": {
803
+ "bits": 16,
804
+ "data_type": "float"
805
  },
806
  "model.layers.4.moe.gate": {
807
  "bits": 16,
 
811
  "bits": 16,
812
  "data_type": "float"
813
  },
814
+ "model.layers.4.share_expert.down_proj": {
815
+ "bits": 16,
816
+ "data_type": "float"
 
 
817
  },
818
+ "model.layers.4.share_expert.gate_proj": {
819
+ "bits": 16,
820
+ "data_type": "float"
821
  },
822
+ "model.layers.4.share_expert.up_proj": {
823
+ "bits": 16,
824
+ "data_type": "float"
825
  },
826
  "model.layers.40.moe.gate": {
827
  "bits": 16,
 
831
  "bits": 16,
832
  "data_type": "float"
833
  },
834
+ "model.layers.40.share_expert.down_proj": {
835
+ "bits": 16,
836
+ "data_type": "float"
 
 
837
  },
838
+ "model.layers.40.share_expert.gate_proj": {
839
+ "bits": 16,
840
+ "data_type": "float"
841
  },
842
+ "model.layers.40.share_expert.up_proj": {
843
+ "bits": 16,
844
+ "data_type": "float"
845
  },
846
  "model.layers.41.moe.gate": {
847
  "bits": 16,
 
851
  "bits": 16,
852
  "data_type": "float"
853
  },
854
+ "model.layers.41.share_expert.down_proj": {
855
+ "bits": 16,
856
+ "data_type": "float"
 
 
857
  },
858
+ "model.layers.41.share_expert.gate_proj": {
859
+ "bits": 16,
860
+ "data_type": "float"
861
  },
862
+ "model.layers.41.share_expert.up_proj": {
863
+ "bits": 16,
864
+ "data_type": "float"
865
  },
866
  "model.layers.42.moe.gate": {
867
  "bits": 16,
 
871
  "bits": 16,
872
  "data_type": "float"
873
  },
874
+ "model.layers.42.share_expert.down_proj": {
875
+ "bits": 16,
876
+ "data_type": "float"
 
 
877
  },
878
+ "model.layers.42.share_expert.gate_proj": {
879
+ "bits": 16,
880
+ "data_type": "float"
881
  },
882
+ "model.layers.42.share_expert.up_proj": {
883
+ "bits": 16,
884
+ "data_type": "float"
885
  },
886
  "model.layers.43.moe.gate": {
887
  "bits": 16,
 
891
  "bits": 16,
892
  "data_type": "float"
893
  },
894
+ "model.layers.43.share_expert.down_proj": {
895
+ "bits": 16,
896
+ "data_type": "float"
 
 
897
  },
898
+ "model.layers.43.share_expert.gate_proj": {
899
+ "bits": 16,
900
+ "data_type": "float"
901
  },
902
+ "model.layers.43.share_expert.up_proj": {
903
+ "bits": 16,
904
+ "data_type": "float"
905
  },
906
  "model.layers.44.moe.gate": {
907
  "bits": 16,
 
911
  "bits": 16,
912
  "data_type": "float"
913
  },
914
+ "model.layers.44.share_expert.down_proj": {
915
+ "bits": 16,
916
+ "data_type": "float"
 
 
917
  },
918
+ "model.layers.44.share_expert.gate_proj": {
919
+ "bits": 16,
920
+ "data_type": "float"
921
  },
922
+ "model.layers.44.share_expert.up_proj": {
923
+ "bits": 16,
924
+ "data_type": "float"
925
  },
926
  "model.layers.5.moe.gate": {
927
  "bits": 16,
 
931
  "bits": 16,
932
  "data_type": "float"
933
  },
934
+ "model.layers.5.share_expert.down_proj": {
935
+ "bits": 16,
936
+ "data_type": "float"
 
 
937
  },
938
+ "model.layers.5.share_expert.gate_proj": {
939
+ "bits": 16,
940
+ "data_type": "float"
941
  },
942
+ "model.layers.5.share_expert.up_proj": {
943
+ "bits": 16,
944
+ "data_type": "float"
945
  },
946
  "model.layers.6.moe.gate": {
947
  "bits": 16,
 
951
  "bits": 16,
952
  "data_type": "float"
953
  },
954
+ "model.layers.6.share_expert.down_proj": {
955
+ "bits": 16,
956
+ "data_type": "float"
 
 
957
  },
958
+ "model.layers.6.share_expert.gate_proj": {
959
+ "bits": 16,
960
+ "data_type": "float"
961
  },
962
+ "model.layers.6.share_expert.up_proj": {
963
+ "bits": 16,
964
+ "data_type": "float"
965
  },
966
  "model.layers.7.moe.gate": {
967
  "bits": 16,
 
971
  "bits": 16,
972
  "data_type": "float"
973
  },
974
+ "model.layers.7.share_expert.down_proj": {
975
+ "bits": 16,
976
+ "data_type": "float"
 
 
977
  },
978
+ "model.layers.7.share_expert.gate_proj": {
979
+ "bits": 16,
980
+ "data_type": "float"
981
  },
982
+ "model.layers.7.share_expert.up_proj": {
983
+ "bits": 16,
984
+ "data_type": "float"
985
  },
986
  "model.layers.8.moe.gate": {
987
  "bits": 16,
 
991
  "bits": 16,
992
  "data_type": "float"
993
  },
994
+ "model.layers.8.share_expert.down_proj": {
995
+ "bits": 16,
996
+ "data_type": "float"
 
 
997
  },
998
+ "model.layers.8.share_expert.gate_proj": {
999
+ "bits": 16,
1000
+ "data_type": "float"
1001
  },
1002
+ "model.layers.8.share_expert.up_proj": {
1003
+ "bits": 16,
1004
+ "data_type": "float"
1005
  },
1006
  "model.layers.9.moe.gate": {
1007
  "bits": 16,
 
1011
  "bits": 16,
1012
  "data_type": "float"
1013
  },
1014
+ "model.layers.9.share_expert.down_proj": {
1015
+ "bits": 16,
1016
+ "data_type": "float"
 
 
1017
  },
1018
+ "model.layers.9.share_expert.gate_proj": {
1019
+ "bits": 16,
1020
+ "data_type": "float"
1021
  },
1022
+ "model.layers.9.share_expert.up_proj": {
1023
+ "bits": 16,
1024
+ "data_type": "float"
1025
  },
1026
  "model.layers.45.eh_proj": {
1027
  "bits": 16,
 
1063
  "bits": 16,
1064
  "data_type": "fp"
1065
  },
1066
+ "model.layers.46.eh_proj": {
1067
+ "bits": 16,
1068
+ "data_type": "fp"
1069
+ },
1070
+ "model.layers.46.self_attn.g_proj": {
1071
+ "bits": 16,
1072
+ "data_type": "fp"
1073
+ },
1074
  "model.layers.46.transformer.shared_head.output": {
1075
  "bits": 16,
1076
  "data_type": "fp"
1077
  },
1078
+ "model.layers.47.eh_proj": {
1079
+ "bits": 16,
1080
+ "data_type": "fp"
1081
+ },
1082
+ "model.layers.47.self_attn.g_proj": {
1083
+ "bits": 16,
1084
+ "data_type": "fp"
1085
+ },
1086
  "model.layers.47.transformer.shared_head.output": {
1087
  "bits": 16,
1088
  "data_type": "fp"
1089
  }
1090
  },
1091
  "group_size": 128,
 
1092
  "packing_format": "auto_round:auto_gptq",
1093
  "quant_method": "auto-round",
1094
  "sym": true,
 
1276
  "full_attention"
1277
  ],
1278
  "zero_centered": true
1279
+ }
model-00001-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:457cc9d06da2f8d61b13bdb3f85eb437e503dee40a794298786fbfb6a4d8b068
3
+ size 1073136648
model-00002-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7c37ec685b4c32d1c4e55a30463a44cd529ca1173e8677fd7483cb9efaea0e5
3
+ size 1073336096
model-00003-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d188a626df928294cd4fb0553e42d1ff067f9c6260c8a533a386e17486b8369f
3
+ size 1073367032
model-00004-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a4c11b7010cfb5c207d27667d00e870c91b443d8d36df0c8ecc68b3a9182e4c
3
+ size 1073335776
model-00005-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b39daa7b48357a5f136fa1b48df061fd108779360ca68b691c5ffa7700c1f7e3
3
+ size 1071992680
model-00006-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95140bfab86fd8c3442b2286cf8d073c8213f4e0ec556a6e1d278a4b3470714c
3
+ size 1073335432
model-00007-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1352eaaae5bcef0d9263432433f1a4712084914376e1a4ee67947a9e4cc371a
3
+ size 1073336288
model-00008-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db0e281c0b57c461e6c737aef76fdd8a55bec18b0b2a3e5da0fc29ec1fbd1971
3
+ size 1071991824
model-00009-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c7897ab2dc1ce8eff3f57266bc8cbceee5b56afab325975d76810000db8eec6
3
+ size 1073336280
model-00010-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6e0df6cf06c3072ddf345e655aa51c5c6095244698dbe9377732b4dd938c0d0
3
+ size 1071992136
model-00011-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf15ca3a54567b0ad6917608a70fe3dc26ba9a4a82c43b26bb8c9f86b4704e70
3
+ size 1073335976
model-00012-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44510a1a6016672018c2cec333bae7a84619e92b7d4948714504a211304f9308
3
+ size 1073367152
model-00013-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:378b55acae01c14789a4934a8e43d26906a9f5c3c4c150780f8228f46aef2a2e
3
+ size 1073335664
model-00014-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a3ce921f10053dd1a9d6c42b6dabe2c5ba660a4b0e7b98cb0ead625484fcb5e
3
+ size 1055774904
model-00015-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fe037ced57c7b48123364e9d3829bd9e1f09557ad91480c7282328a2aa8f3f6
3
+ size 1073207912
model-00016-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b73408cd9e1a4fd73bdd1ec4d596b3ea6046e9e5491ab23de13761415fc6bbd
3
+ size 1073336288
model-00017-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:842cc47f334a628adb02e84632ce3d39e101b8d1cf7810ad3b40c1fa5c8b279e
3
+ size 1071992744
model-00018-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:927efbcf7ee17310ed9d21343be919d7ea98c4305f9875524ce8119a75478325
3
+ size 1073337344
model-00019-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2844ad4318355ef38d45983e5b3ea650712da970a854af7c03e4e5af75280f9
3
+ size 1071993368
model-00020-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fc955199d6dc8d81930c3ee2c1d393d6f499ede78894085634902dceb101572
3
+ size 1073337024
model-00021-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15c16795083a4eacb847f0641df693022ba62ec40ae4052cff040b2f1af425ed
3
+ size 1073368408
model-00022-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1853c82edc9ea3b4d67458824709b6270d84a4520a1a0c633891910399cdb02c
3
+ size 1073336712
model-00023-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1b1d948868d78b113d8c986617aa506c3431462a93030bedb8f6dcca84eef75
3
+ size 1073337464
model-00024-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80c2a39a8bdc0eab75e78fb6c34476c46c79b780afa8a272a8b1b8bfb2d6b2a1
3
+ size 1071992928
model-00025-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d03ed40879d45a30ea7636672e60f632a233c2116e8286f53ca261ce23b8e37
3
+ size 1073337464
model-00026-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b025761327bd03f09653ff32e45b276683e146986d0a4f9d3f2dff8ff0a4529
3
+ size 1071993160
model-00027-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a88f9c95a27a198a1bc872456f7079137efde4b660d29fa8e23cc774ecea21ce
3
+ size 1073337232
model-00028-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f7e3d00f1195536076970bf423933925251b54e669d62b9f03b90b974f37789
3
+ size 1071993488
model-00029-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95fb0e10dae4fef1d06c889656bee93478840611317c1c5dfc81925fe8dcce4e
3
+ size 1073336904
model-00030-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0e214f8db24f856b6acefeb3e5702e1d43c1a3441e4899f1ab74bd63c6a4a2c
3
+ size 1073368584
model-00031-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad910c3e810da44b5ad96ae6c4e62cdf9dd6bea2762c23aa02b878786dcd18ee
3
+ size 1073336528
model-00032-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0296c5d2aea0e42f0e97e90b5d08cdb9a869b7f19247852e96cb861f667ce6e4
3
+ size 1073337464
model-00033-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5b4e03a0cc9ef15d690944e9f1b0cc744df8fe69ac94091505774dbfdeb9d05
3
+ size 1071992944
model-00034-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f749c75f37b54154c7a431db43b568d54ce8690aebbd5f904ce3215133a6d045
3
+ size 1073337448
model-00035-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a001022c9b949acd5fb3c4fd855b3a5eab3a4def493a66f5f5267ea9fbf8a704
3
+ size 1071993272
model-00036-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c945b19aa2e8a68bf09894477561a6566a13d4314841d2e7f4a132821d7c091a
3
+ size 1073337112
model-00037-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18caada793c593e3816344a4e2844ecd7fb77bd91a1db2293808df7902af6c2d
3
+ size 1071993608
model-00038-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:073733f2f953e0c3d9e72896a8b7acbf57cade556e299d428b8930f262f72e87
3
+ size 1073336792
model-00039-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1c25cea900d9a42bbe1b6e925524e2ed12d66c6a9acb6a1e0c3a5ac5a17b685
3
+ size 1073337464
model-00040-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c05f8cc5682e490c106db6f33fb31f7d1eb6e3a4529349d8a0ebca6a063b183
3
+ size 1073367648
model-00041-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa9dae1871ecd0f73720a6e476c31559cee8641917cb0525c47fdb069822c9eb
3
+ size 1073337464
model-00042-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8fce519bf5e99eb154062b45f23a7c9a770f62b7e8b4eaf1939773e206cd2c8
3
+ size 1071993064
model-00043-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6046d69ae0fbaefc598f430bb3276c1f1501570f0de6b1dcbb4bdffa9ffe4f93
3
+ size 1073337336
model-00044-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a513538bbf993d4178a525997148f587de910a17c1b42da192321faa6851638
3
+ size 1071993392
model-00045-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5615d42dba5cc5c393cc7fa4fff20fd989aa5c9cbd0ece491909c3cc5230381
3
+ size 1073337000
model-00046-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb7163f8322b264d70bf4266f2122dd5248ac1323edb3fc089e01224b260bb3a
3
+ size 1071993720
model-00047-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ca914a72228d4ca16b83bc5031ad50bc254855528bb930ea19adaeec0425d93
3
+ size 1073336672
model-00048-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea2379d554931fb82986208ffc99e72bba401a50845765ad4852c95dad8de659
3
+ size 1073337464
model-00049-of-00098.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fd56e1a51e96e2e4d004f82061860fbee586821a92a451e728d3fc5a49201d1
3
+ size 1073367648