IshaanMan123 commited on
Commit
aa97c2b
·
verified ·
1 Parent(s): 21dd141

Initial MeridianAI seed (Base: HuggingFaceTB/SmolLM2-360M, Tokenizer: HuggingFaceTB/SmolLM2-360M)

Browse files
checkpoint/config.json CHANGED
@@ -4,41 +4,31 @@
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
- "bos_token_id": 1,
8
- "capacity_factor_eval": 2.0,
9
- "capacity_factor_train": 1.25,
10
- "drop_tks": true,
11
- "dropout_rate": 0.0,
12
- "dtype": "float32",
13
- "eos_token_id": 2,
14
- "expert_parallel": null,
15
- "gated": true,
16
  "head_dim": 64,
17
  "hidden_act": "silu",
18
- "hidden_size": 768,
19
  "initializer_range": 0.02,
20
- "intermediate_size": 2048,
21
- "layer_norm_epsilon": 1e-06,
22
- "max_position_embeddings": 2048,
23
- "min_capacity": 4,
24
  "mlp_bias": false,
25
  "model_type": "llama",
26
- "moe_layer_interval": 4,
27
- "noisy_policy": null,
28
- "num_attention_heads": 12,
29
- "num_experts": 16,
30
- "num_hidden_layers": 12,
31
- "num_key_value_heads": 12,
32
- "pad_token_id": 0,
33
  "pretraining_tp": 1,
34
- "rms_norm_eps": 1e-06,
 
35
  "rope_parameters": {
36
- "rope_theta": 10000.0,
37
  "rope_type": "default"
38
  },
39
- "tie_word_embeddings": false,
40
- "topk": 2,
41
  "transformers_version": "5.3.0",
42
- "use_cache": false,
43
- "vocab_size": 256384
44
  }
 
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "dtype": "bfloat16",
9
+ "eos_token_id": 0,
 
 
 
 
 
 
10
  "head_dim": 64,
11
  "hidden_act": "silu",
12
+ "hidden_size": 960,
13
  "initializer_range": 0.02,
14
+ "intermediate_size": 2560,
15
+ "is_llama_config": true,
16
+ "max_position_embeddings": 8192,
 
17
  "mlp_bias": false,
18
  "model_type": "llama",
19
+ "num_attention_heads": 15,
20
+ "num_hidden_layers": 32,
21
+ "num_key_value_heads": 5,
22
+ "pad_token_id": null,
 
 
 
23
  "pretraining_tp": 1,
24
+ "rms_norm_eps": 1e-05,
25
+ "rope_interleaved": false,
26
  "rope_parameters": {
27
+ "rope_theta": 100000,
28
  "rope_type": "default"
29
  },
30
+ "tie_word_embeddings": true,
 
31
  "transformers_version": "5.3.0",
32
+ "use_cache": true,
33
+ "vocab_size": 49152
34
  }
checkpoint/generation_config.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
  "_from_model_config": true,
3
- "bos_token_id": 1,
4
- "eos_token_id": 2,
5
- "pad_token_id": 0,
6
  "transformers_version": "5.3.0"
7
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 0,
 
5
  "transformers_version": "5.3.0"
6
  }
checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c7906faca1e4ee4fa759182b65591628aa965ea23df3c80108982ed7f81c88c
3
- size 1915051264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7aaff6661428bed033abba9522bec81938678642cca3181fe752b6ca9e1e540f
3
+ size 723674912
checkpoint/tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39daf1fcb0db6fdaac7e1a1902b598c2f147e2ca6720614761d20924b631bc60
3
- size 16853111
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf346d64f6f0fbcefb4c1b6928a98241467dff36c6fbae5fe1785c4ff90667f4
3
+ size 3522871
checkpoint/tokenizer_config.json CHANGED
@@ -1,316 +1,33 @@
1
  {
 
2
  "backend": "tokenizers",
3
- "bos_token": "<s>",
4
- "clean_up_tokenization_spaces": true,
5
- "eos_token": "</s>",
6
- "extra_ids": 300,
7
  "extra_special_tokens": [
8
- "<extra_id_0>",
9
- "<extra_id_1>",
10
- "<extra_id_2>",
11
- "<extra_id_3>",
12
- "<extra_id_4>",
13
- "<extra_id_5>",
14
- "<extra_id_6>",
15
- "<extra_id_7>",
16
- "<extra_id_8>",
17
- "<extra_id_9>",
18
- "<extra_id_10>",
19
- "<extra_id_11>",
20
- "<extra_id_12>",
21
- "<extra_id_13>",
22
- "<extra_id_14>",
23
- "<extra_id_15>",
24
- "<extra_id_16>",
25
- "<extra_id_17>",
26
- "<extra_id_18>",
27
- "<extra_id_19>",
28
- "<extra_id_20>",
29
- "<extra_id_21>",
30
- "<extra_id_22>",
31
- "<extra_id_23>",
32
- "<extra_id_24>",
33
- "<extra_id_25>",
34
- "<extra_id_26>",
35
- "<extra_id_27>",
36
- "<extra_id_28>",
37
- "<extra_id_29>",
38
- "<extra_id_30>",
39
- "<extra_id_31>",
40
- "<extra_id_32>",
41
- "<extra_id_33>",
42
- "<extra_id_34>",
43
- "<extra_id_35>",
44
- "<extra_id_36>",
45
- "<extra_id_37>",
46
- "<extra_id_38>",
47
- "<extra_id_39>",
48
- "<extra_id_40>",
49
- "<extra_id_41>",
50
- "<extra_id_42>",
51
- "<extra_id_43>",
52
- "<extra_id_44>",
53
- "<extra_id_45>",
54
- "<extra_id_46>",
55
- "<extra_id_47>",
56
- "<extra_id_48>",
57
- "<extra_id_49>",
58
- "<extra_id_50>",
59
- "<extra_id_51>",
60
- "<extra_id_52>",
61
- "<extra_id_53>",
62
- "<extra_id_54>",
63
- "<extra_id_55>",
64
- "<extra_id_56>",
65
- "<extra_id_57>",
66
- "<extra_id_58>",
67
- "<extra_id_59>",
68
- "<extra_id_60>",
69
- "<extra_id_61>",
70
- "<extra_id_62>",
71
- "<extra_id_63>",
72
- "<extra_id_64>",
73
- "<extra_id_65>",
74
- "<extra_id_66>",
75
- "<extra_id_67>",
76
- "<extra_id_68>",
77
- "<extra_id_69>",
78
- "<extra_id_70>",
79
- "<extra_id_71>",
80
- "<extra_id_72>",
81
- "<extra_id_73>",
82
- "<extra_id_74>",
83
- "<extra_id_75>",
84
- "<extra_id_76>",
85
- "<extra_id_77>",
86
- "<extra_id_78>",
87
- "<extra_id_79>",
88
- "<extra_id_80>",
89
- "<extra_id_81>",
90
- "<extra_id_82>",
91
- "<extra_id_83>",
92
- "<extra_id_84>",
93
- "<extra_id_85>",
94
- "<extra_id_86>",
95
- "<extra_id_87>",
96
- "<extra_id_88>",
97
- "<extra_id_89>",
98
- "<extra_id_90>",
99
- "<extra_id_91>",
100
- "<extra_id_92>",
101
- "<extra_id_93>",
102
- "<extra_id_94>",
103
- "<extra_id_95>",
104
- "<extra_id_96>",
105
- "<extra_id_97>",
106
- "<extra_id_98>",
107
- "<extra_id_99>",
108
- "<extra_id_100>",
109
- "<extra_id_101>",
110
- "<extra_id_102>",
111
- "<extra_id_103>",
112
- "<extra_id_104>",
113
- "<extra_id_105>",
114
- "<extra_id_106>",
115
- "<extra_id_107>",
116
- "<extra_id_108>",
117
- "<extra_id_109>",
118
- "<extra_id_110>",
119
- "<extra_id_111>",
120
- "<extra_id_112>",
121
- "<extra_id_113>",
122
- "<extra_id_114>",
123
- "<extra_id_115>",
124
- "<extra_id_116>",
125
- "<extra_id_117>",
126
- "<extra_id_118>",
127
- "<extra_id_119>",
128
- "<extra_id_120>",
129
- "<extra_id_121>",
130
- "<extra_id_122>",
131
- "<extra_id_123>",
132
- "<extra_id_124>",
133
- "<extra_id_125>",
134
- "<extra_id_126>",
135
- "<extra_id_127>",
136
- "<extra_id_128>",
137
- "<extra_id_129>",
138
- "<extra_id_130>",
139
- "<extra_id_131>",
140
- "<extra_id_132>",
141
- "<extra_id_133>",
142
- "<extra_id_134>",
143
- "<extra_id_135>",
144
- "<extra_id_136>",
145
- "<extra_id_137>",
146
- "<extra_id_138>",
147
- "<extra_id_139>",
148
- "<extra_id_140>",
149
- "<extra_id_141>",
150
- "<extra_id_142>",
151
- "<extra_id_143>",
152
- "<extra_id_144>",
153
- "<extra_id_145>",
154
- "<extra_id_146>",
155
- "<extra_id_147>",
156
- "<extra_id_148>",
157
- "<extra_id_149>",
158
- "<extra_id_150>",
159
- "<extra_id_151>",
160
- "<extra_id_152>",
161
- "<extra_id_153>",
162
- "<extra_id_154>",
163
- "<extra_id_155>",
164
- "<extra_id_156>",
165
- "<extra_id_157>",
166
- "<extra_id_158>",
167
- "<extra_id_159>",
168
- "<extra_id_160>",
169
- "<extra_id_161>",
170
- "<extra_id_162>",
171
- "<extra_id_163>",
172
- "<extra_id_164>",
173
- "<extra_id_165>",
174
- "<extra_id_166>",
175
- "<extra_id_167>",
176
- "<extra_id_168>",
177
- "<extra_id_169>",
178
- "<extra_id_170>",
179
- "<extra_id_171>",
180
- "<extra_id_172>",
181
- "<extra_id_173>",
182
- "<extra_id_174>",
183
- "<extra_id_175>",
184
- "<extra_id_176>",
185
- "<extra_id_177>",
186
- "<extra_id_178>",
187
- "<extra_id_179>",
188
- "<extra_id_180>",
189
- "<extra_id_181>",
190
- "<extra_id_182>",
191
- "<extra_id_183>",
192
- "<extra_id_184>",
193
- "<extra_id_185>",
194
- "<extra_id_186>",
195
- "<extra_id_187>",
196
- "<extra_id_188>",
197
- "<extra_id_189>",
198
- "<extra_id_190>",
199
- "<extra_id_191>",
200
- "<extra_id_192>",
201
- "<extra_id_193>",
202
- "<extra_id_194>",
203
- "<extra_id_195>",
204
- "<extra_id_196>",
205
- "<extra_id_197>",
206
- "<extra_id_198>",
207
- "<extra_id_199>",
208
- "<extra_id_200>",
209
- "<extra_id_201>",
210
- "<extra_id_202>",
211
- "<extra_id_203>",
212
- "<extra_id_204>",
213
- "<extra_id_205>",
214
- "<extra_id_206>",
215
- "<extra_id_207>",
216
- "<extra_id_208>",
217
- "<extra_id_209>",
218
- "<extra_id_210>",
219
- "<extra_id_211>",
220
- "<extra_id_212>",
221
- "<extra_id_213>",
222
- "<extra_id_214>",
223
- "<extra_id_215>",
224
- "<extra_id_216>",
225
- "<extra_id_217>",
226
- "<extra_id_218>",
227
- "<extra_id_219>",
228
- "<extra_id_220>",
229
- "<extra_id_221>",
230
- "<extra_id_222>",
231
- "<extra_id_223>",
232
- "<extra_id_224>",
233
- "<extra_id_225>",
234
- "<extra_id_226>",
235
- "<extra_id_227>",
236
- "<extra_id_228>",
237
- "<extra_id_229>",
238
- "<extra_id_230>",
239
- "<extra_id_231>",
240
- "<extra_id_232>",
241
- "<extra_id_233>",
242
- "<extra_id_234>",
243
- "<extra_id_235>",
244
- "<extra_id_236>",
245
- "<extra_id_237>",
246
- "<extra_id_238>",
247
- "<extra_id_239>",
248
- "<extra_id_240>",
249
- "<extra_id_241>",
250
- "<extra_id_242>",
251
- "<extra_id_243>",
252
- "<extra_id_244>",
253
- "<extra_id_245>",
254
- "<extra_id_246>",
255
- "<extra_id_247>",
256
- "<extra_id_248>",
257
- "<extra_id_249>",
258
- "<extra_id_250>",
259
- "<extra_id_251>",
260
- "<extra_id_252>",
261
- "<extra_id_253>",
262
- "<extra_id_254>",
263
- "<extra_id_255>",
264
- "<extra_id_256>",
265
- "<extra_id_257>",
266
- "<extra_id_258>",
267
- "<extra_id_259>",
268
- "<extra_id_260>",
269
- "<extra_id_261>",
270
- "<extra_id_262>",
271
- "<extra_id_263>",
272
- "<extra_id_264>",
273
- "<extra_id_265>",
274
- "<extra_id_266>",
275
- "<extra_id_267>",
276
- "<extra_id_268>",
277
- "<extra_id_269>",
278
- "<extra_id_270>",
279
- "<extra_id_271>",
280
- "<extra_id_272>",
281
- "<extra_id_273>",
282
- "<extra_id_274>",
283
- "<extra_id_275>",
284
- "<extra_id_276>",
285
- "<extra_id_277>",
286
- "<extra_id_278>",
287
- "<extra_id_279>",
288
- "<extra_id_280>",
289
- "<extra_id_281>",
290
- "<extra_id_282>",
291
- "<extra_id_283>",
292
- "<extra_id_284>",
293
- "<extra_id_285>",
294
- "<extra_id_286>",
295
- "<extra_id_287>",
296
- "<extra_id_288>",
297
- "<extra_id_289>",
298
- "<extra_id_290>",
299
- "<extra_id_291>",
300
- "<extra_id_292>",
301
- "<extra_id_293>",
302
- "<extra_id_294>",
303
- "<extra_id_295>",
304
- "<extra_id_296>",
305
- "<extra_id_297>",
306
- "<extra_id_298>",
307
- "<extra_id_299>"
308
  ],
309
  "is_local": false,
310
- "model_max_length": 1000000000000000019884624838656,
311
- "pad_token": "<pad>",
312
- "sp_model_kwargs": {},
313
- "spaces_between_special_tokens": false,
314
- "tokenizer_class": "T5Tokenizer",
315
- "unk_token": "<unk>"
316
  }
 
1
  {
2
+ "add_prefix_space": false,
3
  "backend": "tokenizers",
4
+ "bos_token": "<|endoftext|>",
5
+ "clean_up_tokenization_spaces": false,
6
+ "eos_token": "<|endoftext|>",
7
+ "errors": "replace",
8
  "extra_special_tokens": [
9
+ "<|endoftext|>",
10
+ "<|im_start|>",
11
+ "<|im_end|>",
12
+ "<repo_name>",
13
+ "<reponame>",
14
+ "<file_sep>",
15
+ "<filename>",
16
+ "<gh_stars>",
17
+ "<issue_start>",
18
+ "<issue_comment>",
19
+ "<issue_closed>",
20
+ "<jupyter_start>",
21
+ "<jupyter_text>",
22
+ "<jupyter_code>",
23
+ "<jupyter_output>",
24
+ "<jupyter_script>",
25
+ "<empty_output>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  ],
27
  "is_local": false,
28
+ "model_max_length": 8192,
29
+ "pad_token": "<|endoftext|>",
30
+ "tokenizer_class": "GPT2Tokenizer",
31
+ "unk_token": "<|endoftext|>",
32
+ "vocab_size": 49152
 
33
  }