ms8080 commited on
Commit
ca7687c
·
verified ·
1 Parent(s): d53e364

Training in progress, step 500

Browse files
Files changed (4) hide show
  1. config.json +2 -2
  2. model.safetensors +2 -2
  3. tokenizer.json +73 -343
  4. training_args.bin +1 -1
config.json CHANGED
@@ -40,7 +40,7 @@
40
  "tie_word_embeddings": true,
41
  "type_vocab_size": 2,
42
  "use_cache": true,
43
- "vocab_size": 107
44
  },
45
  "decoder_start_token_id": 2,
46
  "dtype": "float32",
@@ -82,7 +82,7 @@
82
  "tie_word_embeddings": true,
83
  "type_vocab_size": 2,
84
  "use_cache": true,
85
- "vocab_size": 107
86
  },
87
  "eos_token_id": 0,
88
  "is_encoder_decoder": true,
 
40
  "tie_word_embeddings": true,
41
  "type_vocab_size": 2,
42
  "use_cache": true,
43
+ "vocab_size": 53
44
  },
45
  "decoder_start_token_id": 2,
46
  "dtype": "float32",
 
82
  "tie_word_embeddings": true,
83
  "type_vocab_size": 2,
84
  "use_cache": true,
85
+ "vocab_size": 53
86
  },
87
  "eos_token_id": 0,
88
  "is_encoder_decoder": true,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:529d1cfbc7b99262063946cb2035d96b85e3b878a3eb7b1908378644e612d942
3
- size 31318412
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad50e0ce57251d513deab0ace53dd950ae4137af1b42753fc6521bc9c53511c6
3
+ size 31207604
tokenizer.json CHANGED
@@ -114,457 +114,187 @@
114
  "8": 14,
115
  "9": 15,
116
  "=": 16,
117
- "46": 17,
118
- "22": 18,
119
- "35": 19,
120
- "25": 20,
121
- "16": 21,
122
- "28": 22,
123
- "41": 23,
124
- "12": 24,
125
- "20": 25,
126
  "14": 26,
127
- "15": 27,
128
- "47": 28,
129
- "27": 29,
130
- "29": 30,
131
  "17": 31,
132
- "18": 32,
133
- "26": 33,
134
- "44": 34,
135
- "11": 35,
136
- "30": 36,
137
- "31": 37,
138
- "45": 38,
139
- "40": 39,
140
- "13": 40,
141
- "21": 41,
142
- "48": 42,
143
- "36": 43,
144
- "23": 44,
145
- "19": 45,
146
- "38": 46,
147
- "43": 47,
148
- "37": 48,
149
- "32": 49,
150
- "24": 50,
151
- "49": 51,
152
- "10": 52,
153
- "34": 53,
154
- "33": 54,
155
- "39": 55,
156
- "42": 56,
157
- "56": 57,
158
- "90": 58,
159
- "93": 59,
160
- "59": 60,
161
- "51": 61,
162
- "53": 62,
163
- "69": 63,
164
- "86": 64,
165
- "95": 65,
166
- "71": 66,
167
- "85": 67,
168
- "87": 68,
169
- "75": 69,
170
- "88": 70,
171
- "61": 71,
172
- "70": 72,
173
- "72": 73,
174
- "80": 74,
175
- "84": 75,
176
- "98": 76,
177
- "64": 77,
178
- "74": 78,
179
- "96": 79,
180
- "52": 80,
181
- "63": 81,
182
- "73": 82,
183
- "77": 83,
184
- "92": 84,
185
- "97": 85,
186
- "99": 86,
187
- "65": 87,
188
- "66": 88,
189
- "68": 89,
190
- "79": 90,
191
- "83": 91,
192
- "55": 92,
193
- "76": 93,
194
- "78": 94,
195
- "89": 95,
196
- "67": 96,
197
- "57": 97,
198
- "60": 98,
199
- "81": 99,
200
- "82": 100,
201
- "50": 101,
202
- "58": 102,
203
- "94": 103,
204
- "62": 104,
205
- "54": 105,
206
- "91": 106
207
  },
208
  "merges": [
209
  [
210
- "4",
211
- "6"
212
- ],
213
- [
214
- "2",
215
- "2"
216
- ],
217
- [
218
- "3",
219
- "5"
220
- ],
221
- [
222
- "2",
223
- "5"
224
  ],
225
  [
226
  "1",
227
- "6"
228
  ],
229
  [
230
- "2",
231
  "8"
232
  ],
233
  [
234
- "4",
235
  "1"
236
  ],
 
 
 
 
237
  [
238
  "1",
239
  "2"
240
  ],
241
  [
242
- "2",
243
- "0"
244
  ],
245
  [
246
  "1",
247
- "4"
248
  ],
249
  [
250
- "1",
251
  "5"
252
  ],
253
- [
254
- "4",
255
- "7"
256
- ],
257
- [
258
- "2",
259
- "7"
260
- ],
261
- [
262
- "2",
263
- "9"
264
- ],
265
- [
266
- "1",
267
- "7"
268
- ],
269
  [
270
  "1",
271
- "8"
272
- ],
273
- [
274
- "2",
275
- "6"
276
  ],
277
  [
278
- "4",
279
  "4"
280
  ],
281
  [
282
  "1",
283
- "1"
284
- ],
285
- [
286
- "3",
287
- "0"
288
- ],
289
- [
290
- "3",
291
- "1"
292
- ],
293
- [
294
- "4",
295
  "5"
296
  ],
297
  [
298
- "4",
299
- "0"
300
- ],
301
- [
302
- "1",
303
  "3"
304
  ],
305
  [
306
- "2",
307
- "1"
308
- ],
309
- [
310
- "4",
311
- "8"
312
- ],
313
- [
314
- "3",
315
  "6"
316
  ],
317
- [
318
- "2",
319
- "3"
320
- ],
321
  [
322
  "1",
323
- "9"
324
- ],
325
- [
326
- "3",
327
- "8"
328
- ],
329
- [
330
- "4",
331
- "3"
332
- ],
333
- [
334
- "3",
335
  "7"
336
  ],
337
  [
338
- "3",
339
  "2"
340
  ],
341
- [
342
- "2",
343
- "4"
344
- ],
345
- [
346
- "4",
347
- "9"
348
- ],
349
  [
350
  "1",
351
- "0"
352
- ],
353
- [
354
- "3",
355
- "4"
356
- ],
357
- [
358
- "3",
359
- "3"
360
- ],
361
- [
362
- "3",
363
- "9"
364
- ],
365
- [
366
- "4",
367
- "2"
368
- ],
369
- [
370
- "5",
371
- "6"
372
  ],
373
  [
374
  "9",
375
- "0"
376
  ],
377
  [
378
  "9",
379
- "3"
380
  ],
381
  [
382
- "5",
383
  "9"
384
  ],
385
  [
386
- "5",
387
- "1"
388
- ],
389
- [
390
- "5",
391
- "3"
392
- ],
393
- [
394
- "6",
395
- "9"
396
  ],
397
  [
398
  "8",
399
- "6"
400
- ],
401
- [
402
- "9",
403
- "5"
404
  ],
405
  [
406
- "7",
407
  "1"
408
  ],
409
  [
410
  "8",
411
- "5"
412
  ],
413
  [
414
  "8",
415
  "7"
416
  ],
417
  [
418
- "7",
419
- "5"
420
- ],
421
- [
422
- "8",
423
- "8"
424
- ],
425
- [
426
- "6",
427
- "1"
428
- ],
429
- [
430
- "7",
431
- "0"
432
- ],
433
- [
434
- "7",
435
  "2"
436
  ],
437
  [
438
- "8",
439
- "0"
440
  ],
441
  [
442
  "8",
443
- "4"
444
- ],
445
- [
446
- "9",
447
- "8"
448
- ],
449
- [
450
- "6",
451
- "4"
452
- ],
453
- [
454
- "7",
455
- "4"
456
- ],
457
- [
458
- "9",
459
  "6"
460
  ],
461
  [
462
- "5",
463
- "2"
464
- ],
465
- [
466
- "6",
467
- "3"
468
- ],
469
- [
470
- "7",
471
- "3"
472
- ],
473
- [
474
- "7",
475
- "7"
476
- ],
477
- [
478
- "9",
479
- "2"
480
- ],
481
- [
482
- "9",
483
- "7"
484
- ],
485
- [
486
- "9",
487
- "9"
488
- ],
489
- [
490
- "6",
491
  "5"
492
  ],
493
  [
494
- "6",
495
- "6"
496
  ],
497
  [
498
- "6",
499
- "8"
500
  ],
501
  [
502
- "7",
503
- "9"
504
  ],
505
  [
506
  "8",
507
  "3"
508
  ],
509
  [
510
- "5",
511
- "5"
512
- ],
513
- [
514
- "7",
515
  "6"
516
  ],
517
  [
518
- "7",
519
- "8"
520
- ],
521
- [
522
- "8",
523
- "9"
524
- ],
525
- [
526
- "6",
527
- "7"
528
- ],
529
- [
530
- "5",
531
  "7"
532
  ],
533
- [
534
- "6",
535
- "0"
536
- ],
537
  [
538
  "8",
539
- "1"
540
- ],
541
- [
542
- "8",
543
- "2"
544
- ],
545
- [
546
- "5",
547
- "0"
548
- ],
549
- [
550
- "5",
551
- "8"
552
- ],
553
- [
554
- "9",
555
- "4"
556
- ],
557
- [
558
- "6",
559
  "2"
560
- ],
561
- [
562
- "5",
563
- "4"
564
- ],
565
- [
566
- "9",
567
- "1"
568
  ]
569
  ]
570
  }
 
114
  "8": 14,
115
  "9": 15,
116
  "=": 16,
117
+ "99": 17,
118
+ "10": 18,
119
+ "98": 19,
120
+ "11": 20,
121
+ "97": 21,
122
+ "12": 22,
123
+ "96": 23,
124
+ "13": 24,
125
+ "95": 25,
126
  "14": 26,
127
+ "94": 27,
128
+ "15": 28,
129
+ "93": 29,
130
+ "16": 30,
131
  "17": 31,
132
+ "92": 32,
133
+ "18": 33,
134
+ "91": 34,
135
+ "90": 35,
136
+ "19": 36,
137
+ "20": 37,
138
+ "89": 38,
139
+ "21": 39,
140
+ "88": 40,
141
+ "87": 41,
142
+ "22": 42,
143
+ "23": 43,
144
+ "86": 44,
145
+ "85": 45,
146
+ "24": 46,
147
+ "25": 47,
148
+ "84": 48,
149
+ "83": 49,
150
+ "26": 50,
151
+ "27": 51,
152
+ "82": 52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  },
154
  "merges": [
155
  [
156
+ "9",
157
+ "9"
 
 
 
 
 
 
 
 
 
 
 
 
158
  ],
159
  [
160
  "1",
161
+ "0"
162
  ],
163
  [
164
+ "9",
165
  "8"
166
  ],
167
  [
168
+ "1",
169
  "1"
170
  ],
171
+ [
172
+ "9",
173
+ "7"
174
+ ],
175
  [
176
  "1",
177
  "2"
178
  ],
179
  [
180
+ "9",
181
+ "6"
182
  ],
183
  [
184
  "1",
185
+ "3"
186
  ],
187
  [
188
+ "9",
189
  "5"
190
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  [
192
  "1",
193
+ "4"
 
 
 
 
194
  ],
195
  [
196
+ "9",
197
  "4"
198
  ],
199
  [
200
  "1",
 
 
 
 
 
 
 
 
 
 
 
 
201
  "5"
202
  ],
203
  [
204
+ "9",
 
 
 
 
205
  "3"
206
  ],
207
  [
208
+ "1",
 
 
 
 
 
 
 
 
209
  "6"
210
  ],
 
 
 
 
211
  [
212
  "1",
 
 
 
 
 
 
 
 
 
 
 
 
213
  "7"
214
  ],
215
  [
216
+ "9",
217
  "2"
218
  ],
 
 
 
 
 
 
 
 
219
  [
220
  "1",
221
+ "8"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  ],
223
  [
224
  "9",
225
+ "1"
226
  ],
227
  [
228
  "9",
229
+ "0"
230
  ],
231
  [
232
+ "1",
233
  "9"
234
  ],
235
  [
236
+ "2",
237
+ "0"
 
 
 
 
 
 
 
 
238
  ],
239
  [
240
  "8",
241
+ "9"
 
 
 
 
242
  ],
243
  [
244
+ "2",
245
  "1"
246
  ],
247
  [
248
  "8",
249
+ "8"
250
  ],
251
  [
252
  "8",
253
  "7"
254
  ],
255
  [
256
+ "2",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  "2"
258
  ],
259
  [
260
+ "2",
261
+ "3"
262
  ],
263
  [
264
  "8",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  "6"
266
  ],
267
  [
268
+ "8",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  "5"
270
  ],
271
  [
272
+ "2",
273
+ "4"
274
  ],
275
  [
276
+ "2",
277
+ "5"
278
  ],
279
  [
280
+ "8",
281
+ "4"
282
  ],
283
  [
284
  "8",
285
  "3"
286
  ],
287
  [
288
+ "2",
 
 
 
 
289
  "6"
290
  ],
291
  [
292
+ "2",
 
 
 
 
 
 
 
 
 
 
 
 
293
  "7"
294
  ],
 
 
 
 
295
  [
296
  "8",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
  "2"
 
 
 
 
 
 
 
 
298
  ]
299
  ]
300
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a492aff492799c2cfcb59338c7d31f03c862fb04432893ba52b02365df3bfca5
3
  size 5329
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bc0c5f388765da2c349e95408bf7877378a694c112513d5dbe615315e619ea0
3
  size 5329