CharyZeng commited on
Commit
b773bea
·
verified ·
1 Parent(s): d9556ce

4-layer extract: hidden layers 0,1,2 + MTP

Browse files
config.json CHANGED
@@ -24,7 +24,7 @@
24
  "norm_topk_prob": true,
25
  "num_attention_heads": 64,
26
  "num_experts_per_tok": 6,
27
- "num_hidden_layers": 4,
28
  "num_hash_layers": 3,
29
  "num_key_value_heads": 1,
30
  "num_nextn_predict_layers": 1,
@@ -66,46 +66,6 @@
66
  0,
67
  0,
68
  4,
69
- 128,
70
- 4,
71
- 128,
72
- 4,
73
- 128,
74
- 4,
75
- 128,
76
- 4,
77
- 128,
78
- 4,
79
- 128,
80
- 4,
81
- 128,
82
- 4,
83
- 128,
84
- 4,
85
- 128,
86
- 4,
87
- 128,
88
- 4,
89
- 128,
90
- 4,
91
- 128,
92
- 4,
93
- 128,
94
- 4,
95
- 128,
96
- 4,
97
- 128,
98
- 4,
99
- 128,
100
- 4,
101
- 128,
102
- 4,
103
- 128,
104
- 4,
105
- 128,
106
- 4,
107
- 128,
108
- 4,
109
  0
110
  ]
111
  }
 
24
  "norm_topk_prob": true,
25
  "num_attention_heads": 64,
26
  "num_experts_per_tok": 6,
27
+ "num_hidden_layers": 3,
28
  "num_hash_layers": 3,
29
  "num_key_value_heads": 1,
30
  "num_nextn_predict_layers": 1,
 
66
  0,
67
  0,
68
  4,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  0
70
  ]
71
  }
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b49b08cb592a5f481d69113cc7f098437e09e4803addb92f5a1006f065f5d2c1
3
+ size 10734101852
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f51b7462d4cac23b3e369b88fe7a1d3acfbb4ef6a1ba4ce07de6e6b0d8f6983
3
+ size 5707127668
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff