Harley-ml commited on
Commit
1e66ad7
·
verified ·
1 Parent(s): bf6bb2c

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +850 -473
README.md CHANGED
@@ -10,476 +10,853 @@ tags:
10
  - slm
11
  - text-generation
12
  model-index:
13
- - name: Tenete-8M
14
- results:
15
- - task:
16
- type: multiple-choice
17
- name: ANLI R1 (0-Shot)
18
- dataset:
19
- type: anli_r1
20
- name: ANLI R1
21
- metrics:
22
- - name: accuracy
23
- type: acc
24
- value: 0.3690
25
-
26
- - task:
27
- type: multiple-choice
28
- name: ANLI R2 (0-Shot)
29
- dataset:
30
- type: anli_r2
31
- name: ANLI R2
32
- metrics:
33
- - name: accuracy
34
- type: acc
35
- value: 0.3310
36
-
37
- - task:
38
- type: multiple-choice
39
- name: ANLI R3 (0-Shot)
40
- dataset:
41
- type: anli_r3
42
- name: ANLI R3
43
- metrics:
44
- - name: accuracy
45
- type: acc
46
- value: 0.3233
47
-
48
- - task:
49
- type: multiple-choice
50
- name: ARC Challenge (0-Shot)
51
- dataset:
52
- type: arc_challenge
53
- name: ARC Challenge
54
- metrics:
55
- - name: accuracy
56
- type: acc
57
- value: 0.1809
58
- - name: accuracy_norm
59
- type: acc_norm
60
- value: 0.2210
61
-
62
- - task:
63
- type: multiple-choice
64
- name: ARC Easy (0-Shot)
65
- dataset:
66
- type: arc_easy
67
- name: ARC Easy
68
- metrics:
69
- - name: accuracy
70
- type: acc
71
- value: 0.3283
72
- - name: accuracy_norm
73
- type: acc_norm
74
- value: 0.3194
75
-
76
- - task:
77
- type: multiple-choice
78
- name: HellaSwag (0-Shot)
79
- dataset:
80
- type: hellaswag
81
- name: HellaSwag
82
- metrics:
83
- - name: accuracy
84
- type: acc
85
- value: 0.2649
86
- - name: accuracy_norm
87
- type: acc_norm
88
- value: 0.2677
89
-
90
- - task:
91
- type: multiple-choice
92
- name: MMLU (0-Shot)
93
- dataset:
94
- type: mmlu
95
- name: MMLU
96
- metrics:
97
- - name: accuracy
98
- type: acc
99
- value: 0.2300
100
-
101
- - task:
102
- type: multiple-choice
103
- name: MMLU Humanities (0-Shot)
104
- dataset:
105
- type: mmlu
106
- name: MMLU Humanities
107
- metrics:
108
- - name: accuracy
109
- type: acc
110
- value: 0.2429
111
-
112
- - task:
113
- type: multiple-choice
114
- name: MMLU Other (0-Shot)
115
- dataset:
116
- type: mmlu
117
- name: MMLU Other
118
- metrics:
119
- - name: accuracy
120
- type: acc
121
- value: 0.2350
122
-
123
- - task:
124
- type: multiple-choice
125
- name: MMLU Social Sciences (0-Shot)
126
- dataset:
127
- type: mmlu
128
- name: MMLU Social Sciences
129
- metrics:
130
- - name: accuracy
131
- type: acc
132
- value: 0.2168
133
-
134
- - task:
135
- type: multiple-choice
136
- name: MMLU STEM (0-Shot)
137
- dataset:
138
- type: mmlu
139
- name: MMLU STEM
140
- metrics:
141
- - name: accuracy
142
- type: acc
143
- value: 0.2185
144
-
145
- - task:
146
- type: multiple-choice
147
- name: PiQA (0-Shot)
148
- dataset:
149
- type: piqa
150
- name: PiQA
151
- metrics:
152
- - name: accuracy
153
- type: acc
154
- value: 0.5544
155
- - name: accuracy_norm
156
- type: acc_norm
157
- value: 0.5571
158
-
159
- - task:
160
- type: multiple-choice
161
- name: SWAG (0-Shot)
162
- dataset:
163
- type: swag
164
- name: SWAG
165
- metrics:
166
- - name: accuracy
167
- type: acc
168
- value: 0.3024
169
- - name: accuracy_norm
170
- type: acc_norm
171
- value: 0.3297
172
-
173
- - task:
174
- type: multiple-choice
175
- name: TruthfulQA MC1 (0-Shot)
176
- dataset:
177
- type: truthfulqa_mc1
178
- name: TruthfulQA MC1
179
- metrics:
180
- - name: accuracy
181
- type: acc
182
- value: 0.2705
183
-
184
- - task:
185
- type: multiple-choice
186
- name: TruthfulQA MC2 (0-Shot)
187
- dataset:
188
- type: truthfulqa_mc2
189
- name: TruthfulQA MC2
190
- metrics:
191
- - name: accuracy
192
- type: acc
193
- value: 0.4591
194
- - task:
195
- type: text-generation
196
- name: GSM8K (0-Shot)
197
- dataset:
198
- type: gsm8k
199
- name: GSM8K
200
- metrics:
201
- - name: exact_match (flexible-extract)
202
- type: exact_match
203
- value: 0.0114
204
- - name: exact_match (strict-match)
205
- type: exact_match
206
- value: 0.0015
207
-
208
- - task:
209
- type: text-generation
210
- name: TruthfulQA Gen (0-Shot)
211
- dataset:
212
- type: truthfulqa_gen
213
- name: TruthfulQA Gen
214
- metrics:
215
- - name: bleu_acc
216
- type: bleu_acc
217
- value: 0.2399
218
- - name: bleu_diff
219
- type: bleu_diff
220
- value: -1.2697
221
- - name: bleu_max
222
- type: bleu_max
223
- value: 10.7605
224
- - name: rouge1_acc
225
- type: rouge1_acc
226
- value: 0.2864
227
- - name: rouge1_diff
228
- type: rouge1_diff
229
- value: -2.4981
230
- - name: rouge1_max
231
- type: rouge1_max
232
- value: 22.1008
233
- - name: rouge2_acc
234
- type: rouge2_acc
235
- value: 0.0979
236
- - name: rouge2_diff
237
- type: rouge2_diff
238
- value: -1.7592
239
- - name: rouge2_max
240
- type: rouge2_max
241
- value: 11.8332
242
- - name: rougeL_acc
243
- type: rougeL_acc
244
- value: 0.2815
245
- - name: rougeL_diff
246
- type: rougeL_diff
247
- value: -2.2800
248
- - name: rougeL_max
249
- type: rougeL_max
250
- value: 20.7733
251
- - task:
252
- type: multiple-choice
253
- name: ANLI R1 (5-Shot)
254
- dataset:
255
- type: anli_r1
256
- name: ANLI R1
257
- metrics:
258
- - name: accuracy
259
- type: acc
260
- value: 0.3500
261
-
262
- - task:
263
- type: multiple-choice
264
- name: ANLI R2 (5-Shot)
265
- dataset:
266
- type: anli_r2
267
- name: ANLI R2
268
- metrics:
269
- - name: accuracy
270
- type: acc
271
- value: 0.3340
272
-
273
- - task:
274
- type: multiple-choice
275
- name: ANLI R3 (5-Shot)
276
- dataset:
277
- type: anli_r3
278
- name: ANLI R3
279
- metrics:
280
- - name: accuracy
281
- type: acc
282
- value: 0.3250
283
-
284
- - task:
285
- type: multiple-choice
286
- name: ARC Challenge (5-Shot)
287
- dataset:
288
- type: arc_challenge
289
- name: ARC Challenge
290
- metrics:
291
- - name: accuracy
292
- type: acc
293
- value: 0.1843
294
- - name: accuracy_norm
295
- type: acc_norm
296
- value: 0.2184
297
-
298
- - task:
299
- type: multiple-choice
300
- name: ARC Easy (5-Shot)
301
- dataset:
302
- type: arc_easy
303
- name: ARC Easy
304
- metrics:
305
- - name: accuracy
306
- type: acc
307
- value: 0.3380
308
- - name: accuracy_norm
309
- type: acc_norm
310
- value: 0.3215
311
-
312
- - task:
313
- type: multiple-choice
314
- name: HellaSwag (5-Shot)
315
- dataset:
316
- type: hellaswag
317
- name: HellaSwag
318
- metrics:
319
- - name: accuracy
320
- type: acc
321
- value: 0.2644
322
- - name: accuracy_norm
323
- type: acc_norm
324
- value: 0.2657
325
-
326
- - task:
327
- type: multiple-choice
328
- name: MMLU (5-Shot)
329
- dataset:
330
- type: mmlu
331
- name: MMLU
332
- metrics:
333
- - name: accuracy
334
- type: acc
335
- value: 0.2413
336
-
337
- - task:
338
- type: multiple-choice
339
- name: MMLU Humanities (5-Shot)
340
- dataset:
341
- type: mmlu
342
- name: MMLU Humanities
343
- metrics:
344
- - name: accuracy
345
- type: acc
346
- value: 0.2446
347
-
348
- - task:
349
- type: multiple-choice
350
- name: MMLU Other (5-Shot)
351
- dataset:
352
- type: mmlu
353
- name: MMLU Other
354
- metrics:
355
- - name: accuracy
356
- type: acc
357
- value: 0.2288
358
-
359
- - task:
360
- type: multiple-choice
361
- name: MMLU Social Sciences (5-Shot)
362
- dataset:
363
- type: mmlu
364
- name: MMLU Social Sciences
365
- metrics:
366
- - name: accuracy
367
- type: acc
368
- value: 0.2317
369
-
370
- - task:
371
- type: multiple-choice
372
- name: MMLU STEM (5-Shot)
373
- dataset:
374
- type: mmlu
375
- name: MMLU STEM
376
- metrics:
377
- - name: accuracy
378
- type: acc
379
- value: 0.2578
380
-
381
- - task:
382
- type: multiple-choice
383
- name: PiQA (5-Shot)
384
- dataset:
385
- type: piqa
386
- name: PiQA
387
- metrics:
388
- - name: accuracy
389
- type: acc
390
- value: 0.5560
391
- - name: accuracy_norm
392
- type: acc_norm
393
- value: 0.5533
394
-
395
- - task:
396
- type: multiple-choice
397
- name: SWAG (5-Shot)
398
- dataset:
399
- type: swag
400
- name: SWAG
401
- metrics:
402
- - name: accuracy
403
- type: acc
404
- value: 0.2963
405
- - name: accuracy_norm
406
- type: acc_norm
407
- value: 0.3201
408
-
409
- - task:
410
- type: multiple-choice
411
- name: TruthfulQA MC1 (5-Shot)*
412
- dataset:
413
- type: truthfulqa_mc1
414
- name: TruthfulQA MC1
415
- metrics:
416
- - name: accuracy
417
- type: acc
418
- value: 0.2705
419
- - task:
420
- type: multiple-choice
421
- name: TruthfulQA MC2 (5-Shot)*
422
- dataset:
423
- type: truthfulqa_mc2
424
- name: TruthfulQA MC2
425
- metrics:
426
- - name: accuracy
427
- type: acc
428
- value: 0.4591
429
- - task:
430
- type: text-generation
431
- name: GSM8K (5-Shot)
432
- dataset:
433
- type: gsm8k
434
- name: GSM8K
435
- metrics:
436
- - name: exact_match (flexible-extract)
437
- type: exact_match
438
- value: 0.0114
439
- - name: exact_match (strict-match)
440
- type: exact_match
441
- value: 0.0015
442
- - task:
443
- type: text-generation
444
- name: TruthfulQA Gen (5-Shot)*
445
- dataset:
446
- type: truthfulqa_gen
447
- name: TruthfulQA Gen
448
- metrics:
449
- - name: bleu_acc
450
- type: bleu_acc
451
- value: 0.2399
452
- - name: bleu_diff
453
- type: bleu_diff
454
- value: -1.2697
455
- - name: bleu_max
456
- type: bleu_max
457
- value: 10.7605
458
- - name: rouge1_acc
459
- type: rouge1_acc
460
- value: 0.2864
461
- - name: rouge1_diff
462
- type: rouge1_diff
463
- value: -2.4981
464
- - name: rouge1_max
465
- type: rouge1_max
466
- value: 22.1008
467
- - name: rouge2_acc
468
- type: rouge2_acc
469
- value: 0.0979
470
- - name: rouge2_diff
471
- type: rouge2_diff
472
- value: -1.7592
473
- - name: rouge2_max
474
- type: rouge2_max
475
- value: 11.8332
476
- - name: rougeL_acc
477
- type: rougeL_acc
478
- value: 0.2815
479
- - name: rougeL_diff
480
- type: rougeL_diff
481
- value: -2.2800
482
- - name: rougeL_max
483
- type: rougeL_max
484
- value: 20.7733
485
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  - slm
11
  - text-generation
12
  model-index:
13
+ - name: Tenete-8M
14
+ results:
15
+ - task:
16
+ type: multiple-choice
17
+ name: ANLI R1 (0-Shot)
18
+ dataset:
19
+ type: anli_r1
20
+ name: ANLI R1
21
+ metrics:
22
+ - name: accuracy
23
+ type: acc
24
+ value: 0.369
25
+ - task:
26
+ type: multiple-choice
27
+ name: ANLI R2 (0-Shot)
28
+ dataset:
29
+ type: anli_r2
30
+ name: ANLI R2
31
+ metrics:
32
+ - name: accuracy
33
+ type: acc
34
+ value: 0.331
35
+ - task:
36
+ type: multiple-choice
37
+ name: ANLI R3 (0-Shot)
38
+ dataset:
39
+ type: anli_r3
40
+ name: ANLI R3
41
+ metrics:
42
+ - name: accuracy
43
+ type: acc
44
+ value: 0.3233
45
+ - task:
46
+ type: multiple-choice
47
+ name: ARC Challenge (0-Shot)
48
+ dataset:
49
+ type: arc_challenge
50
+ name: ARC Challenge
51
+ metrics:
52
+ - name: accuracy
53
+ type: acc
54
+ value: 0.1809
55
+ - name: accuracy_norm
56
+ type: acc_norm
57
+ value: 0.221
58
+ - task:
59
+ type: multiple-choice
60
+ name: ARC Easy (0-Shot)
61
+ dataset:
62
+ type: arc_easy
63
+ name: ARC Easy
64
+ metrics:
65
+ - name: accuracy
66
+ type: acc
67
+ value: 0.3283
68
+ - name: accuracy_norm
69
+ type: acc_norm
70
+ value: 0.3194
71
+ - task:
72
+ type: multiple-choice
73
+ name: HellaSwag (0-Shot)
74
+ dataset:
75
+ type: hellaswag
76
+ name: HellaSwag
77
+ metrics:
78
+ - name: accuracy
79
+ type: acc
80
+ value: 0.2649
81
+ - name: accuracy_norm
82
+ type: acc_norm
83
+ value: 0.2677
84
+ - task:
85
+ type: multiple-choice
86
+ name: MMLU (0-Shot)
87
+ dataset:
88
+ type: mmlu
89
+ name: MMLU
90
+ metrics:
91
+ - name: accuracy
92
+ type: acc
93
+ value: 0.23
94
+ - task:
95
+ type: multiple-choice
96
+ name: MMLU Humanities (0-Shot)
97
+ dataset:
98
+ type: mmlu
99
+ name: MMLU Humanities
100
+ metrics:
101
+ - name: accuracy
102
+ type: acc
103
+ value: 0.2429
104
+ - task:
105
+ type: multiple-choice
106
+ name: MMLU Other (0-Shot)
107
+ dataset:
108
+ type: mmlu
109
+ name: MMLU Other
110
+ metrics:
111
+ - name: accuracy
112
+ type: acc
113
+ value: 0.235
114
+ - task:
115
+ type: multiple-choice
116
+ name: MMLU Social Sciences (0-Shot)
117
+ dataset:
118
+ type: mmlu
119
+ name: MMLU Social Sciences
120
+ metrics:
121
+ - name: accuracy
122
+ type: acc
123
+ value: 0.2168
124
+ - task:
125
+ type: multiple-choice
126
+ name: MMLU STEM (0-Shot)
127
+ dataset:
128
+ type: mmlu
129
+ name: MMLU STEM
130
+ metrics:
131
+ - name: accuracy
132
+ type: acc
133
+ value: 0.2185
134
+ - task:
135
+ type: multiple-choice
136
+ name: PiQA (0-Shot)
137
+ dataset:
138
+ type: piqa
139
+ name: PiQA
140
+ metrics:
141
+ - name: accuracy
142
+ type: acc
143
+ value: 0.5544
144
+ - name: accuracy_norm
145
+ type: acc_norm
146
+ value: 0.5571
147
+ - task:
148
+ type: multiple-choice
149
+ name: SWAG (0-Shot)
150
+ dataset:
151
+ type: swag
152
+ name: SWAG
153
+ metrics:
154
+ - name: accuracy
155
+ type: acc
156
+ value: 0.3024
157
+ - name: accuracy_norm
158
+ type: acc_norm
159
+ value: 0.3297
160
+ - task:
161
+ type: multiple-choice
162
+ name: TruthfulQA MC1 (0-Shot)
163
+ dataset:
164
+ type: truthfulqa_mc1
165
+ name: TruthfulQA MC1
166
+ metrics:
167
+ - name: accuracy
168
+ type: acc
169
+ value: 0.2705
170
+ - task:
171
+ type: multiple-choice
172
+ name: TruthfulQA MC2 (0-Shot)
173
+ dataset:
174
+ type: truthfulqa_mc2
175
+ name: TruthfulQA MC2
176
+ metrics:
177
+ - name: accuracy
178
+ type: acc
179
+ value: 0.4591
180
+ - task:
181
+ type: text-generation
182
+ name: GSM8K (0-Shot)
183
+ dataset:
184
+ type: gsm8k
185
+ name: GSM8K
186
+ metrics:
187
+ - name: exact_match (flexible-extract)
188
+ type: exact_match
189
+ value: 0.0114
190
+ - name: exact_match (strict-match)
191
+ type: exact_match
192
+ value: 0.0015
193
+ - task:
194
+ type: text-generation
195
+ name: TruthfulQA Gen (0-Shot)
196
+ dataset:
197
+ type: truthfulqa_gen
198
+ name: TruthfulQA Gen
199
+ metrics:
200
+ - name: bleu_acc
201
+ type: bleu_acc
202
+ value: 0.2399
203
+ - name: bleu_diff
204
+ type: bleu_diff
205
+ value: -1.2697
206
+ - name: bleu_max
207
+ type: bleu_max
208
+ value: 10.7605
209
+ - name: rouge1_acc
210
+ type: rouge1_acc
211
+ value: 0.2864
212
+ - name: rouge1_diff
213
+ type: rouge1_diff
214
+ value: -2.4981
215
+ - name: rouge1_max
216
+ type: rouge1_max
217
+ value: 22.1008
218
+ - name: rouge2_acc
219
+ type: rouge2_acc
220
+ value: 0.0979
221
+ - name: rouge2_diff
222
+ type: rouge2_diff
223
+ value: -1.7592
224
+ - name: rouge2_max
225
+ type: rouge2_max
226
+ value: 11.8332
227
+ - name: rougeL_acc
228
+ type: rougeL_acc
229
+ value: 0.2815
230
+ - name: rougeL_diff
231
+ type: rougeL_diff
232
+ value: -2.28
233
+ - name: rougeL_max
234
+ type: rougeL_max
235
+ value: 20.7733
236
+ - task:
237
+ type: multiple-choice
238
+ name: ANLI R1 (5-Shot)
239
+ dataset:
240
+ type: anli_r1
241
+ name: ANLI R1
242
+ metrics:
243
+ - name: accuracy
244
+ type: acc
245
+ value: 0.35
246
+ - task:
247
+ type: multiple-choice
248
+ name: ANLI R2 (5-Shot)
249
+ dataset:
250
+ type: anli_r2
251
+ name: ANLI R2
252
+ metrics:
253
+ - name: accuracy
254
+ type: acc
255
+ value: 0.334
256
+ - task:
257
+ type: multiple-choice
258
+ name: ANLI R3 (5-Shot)
259
+ dataset:
260
+ type: anli_r3
261
+ name: ANLI R3
262
+ metrics:
263
+ - name: accuracy
264
+ type: acc
265
+ value: 0.325
266
+ - task:
267
+ type: multiple-choice
268
+ name: ARC Challenge (5-Shot)
269
+ dataset:
270
+ type: arc_challenge
271
+ name: ARC Challenge
272
+ metrics:
273
+ - name: accuracy
274
+ type: acc
275
+ value: 0.1843
276
+ - name: accuracy_norm
277
+ type: acc_norm
278
+ value: 0.2184
279
+ - task:
280
+ type: multiple-choice
281
+ name: ARC Easy (5-Shot)
282
+ dataset:
283
+ type: arc_easy
284
+ name: ARC Easy
285
+ metrics:
286
+ - name: accuracy
287
+ type: acc
288
+ value: 0.338
289
+ - name: accuracy_norm
290
+ type: acc_norm
291
+ value: 0.3215
292
+ - task:
293
+ type: multiple-choice
294
+ name: HellaSwag (5-Shot)
295
+ dataset:
296
+ type: hellaswag
297
+ name: HellaSwag
298
+ metrics:
299
+ - name: accuracy
300
+ type: acc
301
+ value: 0.2644
302
+ - name: accuracy_norm
303
+ type: acc_norm
304
+ value: 0.2657
305
+ - task:
306
+ type: multiple-choice
307
+ name: MMLU (5-Shot)
308
+ dataset:
309
+ type: mmlu
310
+ name: MMLU
311
+ metrics:
312
+ - name: accuracy
313
+ type: acc
314
+ value: 0.2413
315
+ - task:
316
+ type: multiple-choice
317
+ name: MMLU Humanities (5-Shot)
318
+ dataset:
319
+ type: mmlu
320
+ name: MMLU Humanities
321
+ metrics:
322
+ - name: accuracy
323
+ type: acc
324
+ value: 0.2446
325
+ - task:
326
+ type: multiple-choice
327
+ name: MMLU Other (5-Shot)
328
+ dataset:
329
+ type: mmlu
330
+ name: MMLU Other
331
+ metrics:
332
+ - name: accuracy
333
+ type: acc
334
+ value: 0.2288
335
+ - task:
336
+ type: multiple-choice
337
+ name: MMLU Social Sciences (5-Shot)
338
+ dataset:
339
+ type: mmlu
340
+ name: MMLU Social Sciences
341
+ metrics:
342
+ - name: accuracy
343
+ type: acc
344
+ value: 0.2317
345
+ - task:
346
+ type: multiple-choice
347
+ name: MMLU STEM (5-Shot)
348
+ dataset:
349
+ type: mmlu
350
+ name: MMLU STEM
351
+ metrics:
352
+ - name: accuracy
353
+ type: acc
354
+ value: 0.2578
355
+ - task:
356
+ type: multiple-choice
357
+ name: PiQA (5-Shot)
358
+ dataset:
359
+ type: piqa
360
+ name: PiQA
361
+ metrics:
362
+ - name: accuracy
363
+ type: acc
364
+ value: 0.556
365
+ - name: accuracy_norm
366
+ type: acc_norm
367
+ value: 0.5533
368
+ - task:
369
+ type: multiple-choice
370
+ name: SWAG (5-Shot)
371
+ dataset:
372
+ type: swag
373
+ name: SWAG
374
+ metrics:
375
+ - name: accuracy
376
+ type: acc
377
+ value: 0.2963
378
+ - name: accuracy_norm
379
+ type: acc_norm
380
+ value: 0.3201
381
+ - task:
382
+ type: multiple-choice
383
+ name: TruthfulQA MC1 (5-Shot)*
384
+ dataset:
385
+ type: truthfulqa_mc1
386
+ name: TruthfulQA MC1
387
+ metrics:
388
+ - name: accuracy
389
+ type: acc
390
+ value: 0.2705
391
+ - task:
392
+ type: multiple-choice
393
+ name: TruthfulQA MC2 (5-Shot)*
394
+ dataset:
395
+ type: truthfulqa_mc2
396
+ name: TruthfulQA MC2
397
+ metrics:
398
+ - name: accuracy
399
+ type: acc
400
+ value: 0.4591
401
+ - task:
402
+ type: text-generation
403
+ name: GSM8K (5-Shot)
404
+ dataset:
405
+ type: gsm8k
406
+ name: GSM8K
407
+ metrics:
408
+ - name: exact_match (flexible-extract)
409
+ type: exact_match
410
+ value: 0.0114
411
+ - name: exact_match (strict-match)
412
+ type: exact_match
413
+ value: 0.0015
414
+ - task:
415
+ type: text-generation
416
+ name: TruthfulQA Gen (5-Shot)*
417
+ dataset:
418
+ type: truthfulqa_gen
419
+ name: TruthfulQA Gen
420
+ metrics:
421
+ - name: bleu_acc
422
+ type: bleu_acc
423
+ value: 0.2399
424
+ - name: bleu_diff
425
+ type: bleu_diff
426
+ value: -1.2697
427
+ - name: bleu_max
428
+ type: bleu_max
429
+ value: 10.7605
430
+ - name: rouge1_acc
431
+ type: rouge1_acc
432
+ value: 0.2864
433
+ - name: rouge1_diff
434
+ type: rouge1_diff
435
+ value: -2.4981
436
+ - name: rouge1_max
437
+ type: rouge1_max
438
+ value: 22.1008
439
+ - name: rouge2_acc
440
+ type: rouge2_acc
441
+ value: 0.0979
442
+ - name: rouge2_diff
443
+ type: rouge2_diff
444
+ value: -1.7592
445
+ - name: rouge2_max
446
+ type: rouge2_max
447
+ value: 11.8332
448
+ - name: rougeL_acc
449
+ type: rougeL_acc
450
+ value: 0.2815
451
+ - name: rougeL_diff
452
+ type: rougeL_diff
453
+ value: -2.28
454
+ - name: rougeL_max
455
+ type: rougeL_max
456
+ value: 20.7733
457
+ datasets:
458
+ - kmfoda/booksum
459
+ - nampdn-ai/tiny-textbooks
460
+ - fabiochiu/medium-articles
461
+ ---
462
+
463
+ # Tenete-8M
464
+
465
+ **Tenete-8M** is an **eight-million parameter model** trained on **five hundred and seventy-seven million tokens**.
466
+ While it can't answer "2 + 2" or write a coherent, logically sound essay, it will *surprise* you, and the credit goes to nampdnai's [**tiny-textbooks**](https://huggingface.co/datasets/nampdn-ai/tiny-textbooks).
467
+
468
+ ## Why "Tenete"?
469
+
470
+ Tenete means "**Small Canoe**" in **Taushiro**, an endangered language with only **one fluent speaker**. This seemed the most fitting name. Tenete, being the closest word to "small" in Taushiro that had an English translation, and the fact that the language itself has only one fluent speaker, reflects the **tiny and limited size** that Tenete-8M represents.
471
+
472
+ ## Architecture
473
+
474
+ Tenete-8M uses the **Qwen3 architecture**.
475
+
476
+ | Parameter | value |
477
+ |--------------------------|----------------------|
478
+ | NUM_HIDDEN_LAYERS | 4 |
479
+ | MAX_WINDOW_LAYERS | 3 |
480
+ | HIDDEN_SIZE | 256 |
481
+ | NUM_ATTENTION_HEADS | 4 |
482
+ | NUM_KEY_VALUE_HEADS | 4 |
483
+ | VOCAB_SIZE | 16000 |
484
+ | INTERMEDIATE_SIZE | 1024 |
485
+ | ROPE_THETA | 30000.0 |
486
+ | MAX_POSITION_EMBEDDINGS | 1024 |
487
+ | sliding_window | 384 |
488
+ | TIE_WORD_EMBEDDINGS | True |
489
+
490
+ | Embedding parameters | Non-embedding parameters | Total parameters | % of kv heads out of total heads | % of swa layers out of total layers |
491
+ |----------------------|--------------------------|------------------|----------------------------------|--------------------------------------|
492
+ | 4,096,000 | 4,197,888 | 8,293,888 | 100% | 75% |
493
+
494
+ ## Training
495
+
496
+ Tenete-8M was trained on an **RTX 2060 6GB** for one epoch with a batch size of 4 and a gradient accumulation of 18, resulting in an **effective batch size of 72**.
497
+
498
+ ### Dataset
499
+
500
+ The dataset encompasses **577M tokens**, and includes **3 sources**:
501
+
502
+ 1. **Textbooks**: Web data is too noisy, so we decided to use Tiny-Textbooks, a synthetic dataset generated by
503
+ 2. **Medium Articles**: While web data, especially medium articles, is noisy, we still need human-written examples
504
+ 3. **Books**: Albeit small, books are still needed to instill creativity into the model
505
+ 4. **Q&A**: just sprinkled in, just to add more knowledge and question-answering.
506
+
507
+ We chose to not include code, raw webdata (e.g., fineweb, c4, etc.), and more narrow domains (e.g., arxiv, clinical trials, lesswrong, etc.).
508
+
509
+ #### Stats
510
+
511
+ | Metric | Value |
512
+ |----------------------|--------------------------|
513
+ | tokens | 577M |
514
+ | Words | 384M |
515
+ | Characters | 2.428B |
516
+ | Bits/byte | 1.7054 |
517
+ | Nats/byte | 1.1821 |
518
+ | Nats/token | 5.0926 |
519
+ | Characters/Token | 4.3081509289548 |
520
+
521
+ ### Training Results
522
+
523
+ | Epoch | Train Loss | Eval Loss | Train PPL | Eval PPL | Train BPB | Eval BPB | Train BPW | Eval BPW |
524
+ |-------|------------|-----------|-----------|----------|-----------|----------|-----------|----------|
525
+ | 0.07234 | 6.548 | 4.870 | 698.0 | 130.4 | 2.193 | 1.631 | 14.195 | 10.558 |
526
+ | 0.14470 | 4.297 | 3.816 | 73.5 | 45.4 | 1.439 | 1.278 | 9.313 | 8.273 |
527
+ | 0.21700 | 3.584 | 3.436 | 36.0 | 31.1 | 1.200 | 1.151 | 7.769 | 7.446 |
528
+ | 0.28930 | 3.337 | 3.279 | 28.1 | 26.5 | 1.117 | 1.098 | 7.234 | 7.107 |
529
+ | 0.36170 | 3.217 | 3.184 | 25.0 | 24.1 | 1.077 | 1.066 | 6.974 | 6.903 |
530
+ | 0.43400 | 3.151 | 3.119 | 23.4 | 22.6 | 1.055 | 1.044 | 6.831 | 6.761 |
531
+ | 0.50640 | 3.091 | 3.075 | 22.0 | 21.7 | 1.035 | 1.030 | 6.700 | 6.665 |
532
+ | 0.57870 | 3.045 | 3.036 | 21.0 | 20.8 | 1.020 | 1.017 | 6.599 | 6.580 |
533
+ | 0.65100 | 3.015 | 3.003 | 20.4 | 20.2 | 1.010 | 1.006 | 6.535 | 6.509 |
534
+ | 0.72340 | 2.986 | 2.978 | 19.8 | 19.6 | 1.000 | 0.997 | 6.471 | 6.455 |
535
+ | 0.79570 | 2.963 | 2.958 | 19.4 | 19.3 | 0.992 | 0.991 | 6.422 | 6.411 |
536
+ | 0.86800 | 2.938 | 2.940 | 18.9 | 18.9 | 0.984 | 0.985 | 6.368 | 6.372 |
537
+ | 0.94040 | **2.927** | **2.927** | **18.7** | **18.7** | **0.980** | **0.980** | **6.343** | **6.343** |
538
+
539
+ Note: BPB stands for Bits Per Byte, and BPW stands for Bits Per Word.
540
+ BPB is simply the amount of yes-no questions the model needs to predict the next byte accurately (1.0 BPB = 1 yes-no question), and BPW is the same thing but at the word level.
541
+
542
+ ---
543
+
544
+ We decided to evaluate the model on each source it trained on to see the difference in perplexity.
545
+
546
+ [omitted temporarily]
547
+
548
+ ---
549
+
550
+ ## Benchmarks
551
+
552
+ | Task | Dataset | Metric | 0-shot | 5-shot |
553
+ |------|---------|--------|--------|--------|
554
+ | ANLI R1 | anli_r1 | acc | 0.369 | 0.35 |
555
+ | ANLI R2 | anli_r2 | acc | 0.331 | 0.334 |
556
+ | ANLI R3 | anli_r3 | acc | 0.3233 | 0.325 |
557
+ | ARC Challenge | arc_challenge | acc_norm | 0.221 | 0.2184 |
558
+ | ARC Easy | arc_easy | acc_norm | 0.3194 | 0.3215 |
559
+ | HellaSwag | hellaswag | acc_norm | 0.2677 | 0.2657 |
560
+ | MMLU | mmlu | acc | 0.23 | 0.2413 |
561
+ | MMLU Humanities | mmlu | acc | 0.2429 | 0.2446 |
562
+ | MMLU Other | mmlu | acc | 0.235 | 0.2288 |
563
+ | MMLU Social Sciences | mmlu | acc | 0.2168 | 0.2317 |
564
+ | MMLU STEM | mmlu | acc | 0.2185 | 0.2578 |
565
+ | PiQA | piqa | acc_norm | **0.5571** | 0.5533 |
566
+ | SWAG | swag | acc_norm | 0.3297 | 0.3201 |
567
+ | TruthfulQA MC1 | truthfulqa_mc1 | acc | 0.2705 | 0.2705 |
568
+ | TruthfulQA MC2 | truthfulqa_mc2 | acc | 0.4591 | 0.4591 |
569
+ | GSM8K | gsm8k | exact_match (flexible) | 0.0114 | 0.0114 |
570
+ | TruthfulQA Gen | truthfulqa_gen | rouge1_acc | 0.2864 | 0.2864 |
571
+
572
+ The model achieves random or near-random on most tasks, which is expected. An 8M parameter model cannot store world-level knowledge or thoroughly reason.
573
+
574
+ ### Coherency Benchmark
575
+
576
+ To evaluate the coherency, factuality, and fluency of our (and other) models, we use Qwen3-32B to grade 300 different generations generated from an unconditional prompt.
577
+
578
+ Example configuration:
579
+ ```
580
+ # --- Inference settings (local or huggingface) ---
581
+ NUM_GENERATIONS = 300
582
+ MAX_NEW_TOKENS = 256
583
+ MIN_NEW_TOKENS = 30
584
+ TEMPERATURE = 0.7
585
+ TOP_K = 30
586
+ TOP_P = 0.9
587
+ REP_PENALTY = 1.2
588
+ DO_SAMPLE = True
589
+ INFERENCE_BATCH = 5
590
+
591
+ # --- Judge settings ---
592
+ JUDGE_MODEL = "qwen/qwen3-32b"
593
+ JUDGE_MAX_TOKENS = 80
594
+ JUDGE_TEMPERATURE = 0.0
595
+ ```
596
+
597
+ | Model | Avg Score | Incoherent | Mostly Coherent | Partially Coherent | Coherent |
598
+ |--------------------------------|-----------|------------|-----------------|--------------------|----------|
599
+ | pythia-31m-deduped | 0.3196 | 207 | 53 | 35 | 5 |
600
+ | Tenete-8M | 0.4862 | 133 | 86 | 53 | 28 |
601
+ | gpt2 | 0.5052 | 108 | 96 | 75 | 21 |
602
+
603
+ Our model outperforms Pythia-31M-deduped while being **3.8 times smaller**, and looses to GPT2 by only **~0.02 points**.
604
+ Note: In our other model cards, the score may be lower or higher for the same model, because `MAX_NEW_TOKENS` was set to **80, not 256**. Furthermore, we changed the `judge_prompt` to be more accurate. This doesn't mean the scores from other model cards are invalid; they're just a little bit outdated.
605
+
606
+ ## Generations
607
+
608
+ Prompt: `The`
609
+
610
+ Output:
611
+ ```
612
+ higher with a great taste of the damp and a more vibrant beauty than others. It also has its own unique style. They have been looking for different shades, and they have a wide range of shades to suit their tastes. Some people find themselves in love with a rich variety of styles that are popular among those who feel it's beautiful. For example, many brands like Nielson are known for their bright colors and bold color. Here is a simple recipe:
613
+
614
+ - What is your favorite type of hair?
615
+ - How do you use your skin to make a smooth look?
616
+ - Can I use my hair instead of cotton or wool?
617
+ - Can I use my hair instead of cotton?
618
+
619
+ Section 3: Practical Examples and Real-World Scenarios
620
+ Let's say you want to create an outfit that looks very well on your face. You can choose from a traditional one such as Jyubande, Scikit-learn, Shoeson, Ring and Towl, or Aramis, or Ladies. Your hair should be slightly darker than other options. On the other hand, if you're looking for clothes with a black dress, you may need to paint a white palette (like red or blue) around your skin.
621
+
622
+ Conclusion:
623
+ In this lesson, we learned how to set up a dress for a fashion design by adding color and texture, pairing and tanks, and sewing techniques. By following these guidelines, you will become a savvy designer and be able to create a personal touch of clothing while enjoying a strong look at all things.
624
+ ```
625
+
626
+ Prompt: `Question: What is AI?\nAnswer:`
627
+
628
+ Output:
629
+ ```
630
+ Machine learning is a method of computer programs that allows us to learn from each other and perform tasks more efficiently. This makes it easier for students to find information about the world around them, whether they are interested in learning or not.
631
+ ## III. Why is AI important?
632
+ Answer: AI is essential because it helps us make informed decisions based on data and decision-making. It enables us to identify trends, patterns, and relationships with others, which can help us stay focused and motivated during difficult times. Additionally, AI technology has been used by companies like Amazon, Apple, and Google, helping businesses improve their operations, productivity, and profitability.
633
+ ```
634
+
635
+ Prompt: `'GPT2 is`
636
+
637
+ Output:
638
+ ```
639
+ a type of gympos that uses the OA to produce an image. This technique has many applications in various fields, including:
640
+
641
+ 1. **Interpreting:** An example of this method is a process by which you create an image using the ERGE model to produce images and other images.
642
+
643
+ 2. **Conditioning:** A method can be used to manipulate the image using the ERGE model. For instance, if you have 3 cmd4(3) and 4 cmd5(3).
644
+
645
+ **Glossary:**
646
+ - Image processing: The process of converting images into images.
647
+ - ERPPO: Visual Modeling for images with high quality pixels.
648
+ - Image processing: The process of converting images from image to image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image on image image image image image image image image image image image image image image image image image image? Image image image images image image image image image image image image.
649
+
650
+ ### Section 1: Introduction to Image Image Images
651
+ Artography is a powerful tool that allows us to visualize and visualize images visually. It provides a wide range of tools and techniques that allow us to observe and understand images accurately. In this lesson, we will focus on image image image images image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image images image image image image image image image image image images image image image image image image image image image image image image image image image image image image image image image image image.
652
+
653
+ Example 2: Image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image image or image image image image image images image image image image image image image image image image image image image image.
654
+ ```
655
+
656
+ Prompt: `The color of the sky is`
657
+
658
+ Output:
659
+ ```
660
+ 1.
661
+ 2. It takes four-dimensional space in a triangle, where each inch of its shape has two sides and one or more angles to represent them. This creates a sense of depth and meaning that the sky is beautifully different from other shapes like circles and squares.
662
+ 3. It can be used as a symbol for navigation and appreciation. For example: "The world is just beginning on the earth but still changing itself."
663
+ 4. Color Scheme (rather colors)
664
+ ```
665
+ Here's an example of how I learned about color theory:
666
+ ```
667
+ import matplotlib as pdipis as y = ['Size', 'Size'].last_id(x))
668
+ ```
669
+ Explanation:
670
+ To understand the concept of color theory, let's say you have three sides and one side, with the following equation:
671
+
672
+ Number of sides of length - 1 * 3 = 10^0/9
673
+ Total length - 2 + 5 = 12
674
+ Step 1: Find the top of the left hand
675
+ If there are no sides of length or number, we would be assigned to the bottom of the right hand. For example: "The world is not only darker than traditional, but also lightning."
676
+ ```
677
+ Explanation:
678
+ Now, let's look at some examples of color theory:
679
+
680
+ 1. What is the difference between color theory and composition?
681
+ Answer: Color theory is the study of colors and their properties.
682
+ 2. How do you know which one part of the left hand?
683
+ Answer: To see if it means looking upwards and finding the right pair of shades, they may use geometric patterns or textures.
684
+ 3. How does a color scheme benefit us?
685
+ Answer: A color scheme benefits both sides by providing a visual representation of the same value. Some examples include simplicity, creativity, and minimalism.
686
+ ## Section 4: Practical Examples
687
+ Let's consider an example of how color theory can be applied in everyday life. Suppose you want to add a pair of shoes to your home. You could start by taking a pair of shoes with a black dress, then sketch out the size of the hips. Then, you might use a variety of colors to create a balanced and visually appealing shoe. Once you have chosen the type of shirt, you might choose a pair of shoes that complements the specific design elements. Finally, you might apply the appropriate pair of shoes to create a cohesive experience.
688
+ ```
689
+
690
+ Prompt: `dfuhudfhudishf88383838838EXMAPLEisdu938u39uAIs`
691
+
692
+ Output:
693
+ ```
694
+ Want. We also discussed how to use these techniques and best practices, such as using different strategies for delivering personalized experiences and tracking messages with our loved ones. By understanding the principles of self-reflection and learning from these tips, we can apply these skills to other areas of life.
695
+ ```
696
+
697
+ ---
698
+
699
+ We showcase the bad generations, such as the third generation, and good generations, such as the second generation.
700
+ As you can see, the model is surprisingly coherent for its size, but it isn't as consistent as larger models.
701
+ Some generations are coherent, while others can be well... broken. A larger model would be the same-ish quality consistently.
702
+
703
+ # Limitations
704
+
705
+ 1. Cannot reason, chat, code, or follow prompts accurately all the time
706
+ 2. May loop or generate repetitive sequences
707
+ 3. Almost all generations are unfactual, incoherent, or just wrong
708
+
709
+ ## Use Cases
710
+
711
+ 1. Educational use and testing
712
+ 2. Deployment on edge devices
713
+ 3. Fine-tuned for downstream use
714
+ 4. Or more simply, for fun.
715
+
716
+
717
+ # Inference
718
+
719
+ ```
720
+ # =============================================================================
721
+ # Inference
722
+ # =============================================================================
723
+
724
+
725
+ MODEL_DIR = "Harley-ml/Tenete-8M" # path
726
+ TOKENIZER_PATH = MODEL_DIR
727
+
728
+ # --- Generation settings ---
729
+ PROMPT = "The" # prompt
730
+ MAX_NEW_TOKENS = 256
731
+ TEMPERATURE = 0.7
732
+ TOP_P = 0.95
733
+ TOP_K = 30
734
+ REPETITION_PENALTY = 1.2
735
+ DO_SAMPLE = True
736
+
737
+ # =============================================================================
738
+
739
+ import torch
740
+ from pathlib import Path
741
+ from transformers import (
742
+ AutoModelForCausalLM,
743
+ PreTrainedTokenizerFast,
744
+ AddedToken,
745
+ )
746
+
747
+ # ---------------------------------------------------------------------------
748
+ # Device
749
+ # ---------------------------------------------------------------------------
750
+
751
+ device = (
752
+ "cuda" if torch.cuda.is_available() else
753
+ "mps" if torch.backends.mps.is_available() else
754
+ "cpu"
755
+ )
756
+ print(f"Device : {device}")
757
+
758
+ # ---------------------------------------------------------------------------
759
+ # Tokenizer (mirrors training setup)
760
+ # ---------------------------------------------------------------------------
761
+
762
+ def load_tokenizer(path: str):
763
+ p = Path(path).resolve()
764
+ if not p.exists():
765
+ raise FileNotFoundError(f"Tokenizer not found: {p}")
766
+ tok = PreTrainedTokenizerFast(tokenizer_file=str(p))
767
+ specials = {}
768
+ if tok.bos_token is None: specials["bos_token"] = AddedToken("<|bos|>", special=True)
769
+ if tok.eos_token is None: specials["eos_token"] = AddedToken("<|eos|>", special=True)
770
+ if tok.unk_token is None: specials["unk_token"] = AddedToken("<|unk|>", special=True)
771
+ if tok.pad_token is None:
772
+ if tok.eos_token is not None:
773
+ tok.pad_token = tok.eos_token
774
+ else:
775
+ specials["pad_token"] = AddedToken("<|pad|>", special=True)
776
+ if specials:
777
+ tok.add_special_tokens(specials)
778
+ tok.padding_side = "left"
779
+ return tok
780
+
781
+ print("Loading tokenizer...")
782
+ tokenizer = load_tokenizer(TOKENIZER_PATH)
783
+ print(f" Vocab size : {tokenizer.vocab_size}")
784
+ print(f" BOS : {tokenizer.bos_token!r}")
785
+ print(f" EOS : {tokenizer.eos_token!r}")
786
+ print(f" PAD : {tokenizer.pad_token!r} (id={tokenizer.pad_token_id})")
787
+
788
+ # ---------------------------------------------------------------------------
789
+ # Model
790
+ # ---------------------------------------------------------------------------
791
+
792
+ print(f"\nLoading model from {MODEL_DIR} ...")
793
+ model = AutoModelForCausalLM.from_pretrained(
794
+ MODEL_DIR,
795
+ dtype=torch.float16 if device == "cuda" else torch.float32,
796
+ low_cpu_mem_usage=True,
797
+ )
798
+
799
+ model.eval()
800
+ model.to(device)
801
+
802
+ total_params = sum(p.numel() for p in model.parameters())
803
+ print(f" Parameters : {total_params:,}")
804
+
805
+ # ---------------------------------------------------------------------------
806
+ # Generation helper
807
+ # ---------------------------------------------------------------------------
808
+
809
+ def generate(
810
+ prompt: str = PROMPT,
811
+ max_new_tokens: int = MAX_NEW_TOKENS,
812
+ temperature: float = TEMPERATURE,
813
+ top_p: float = TOP_P,
814
+ top_k: int = TOP_K,
815
+ repetition_penalty: float = REPETITION_PENALTY,
816
+ do_sample: bool = DO_SAMPLE,
817
+ ) -> str:
818
+
819
+ bos = tokenizer.bos_token or ""
820
+ full_prompt = bos + prompt
821
+
822
+ inputs = tokenizer(
823
+ full_prompt,
824
+ return_tensors="pt",
825
+ add_special_tokens=False,
826
+ ).to(device)
827
+ inputs.pop("token_type_ids", None) # Qwen3 doesn't use this
828
+
829
+ gen_kwargs = dict(
830
+ max_new_tokens = max_new_tokens,
831
+ do_sample = do_sample,
832
+ repetition_penalty = repetition_penalty,
833
+ eos_token_id = tokenizer.eos_token_id,
834
+ pad_token_id = tokenizer.pad_token_id,
835
+ )
836
+ if do_sample:
837
+ gen_kwargs["temperature"] = temperature
838
+ gen_kwargs["top_p"] = top_p
839
+ gen_kwargs["top_k"] = top_k
840
+
841
+ with torch.inference_mode():
842
+ output_ids = model.generate(**inputs, **gen_kwargs)
843
+
844
+ # Strip the prompt tokens so we only return what was generated
845
+ prompt_len = inputs["input_ids"].shape[-1]
846
+ new_ids = output_ids[0][prompt_len:]
847
+ return tokenizer.decode(new_ids, skip_special_tokens=True)
848
+
849
+
850
+ # ---------------------------------------------------------------------------
851
+ # Run
852
+ # ---------------------------------------------------------------------------
853
+
854
+ if __name__ == "__main__":
855
+ print(f"\nPrompt : {PROMPT!r}")
856
+ print("-" * 60)
857
+
858
+ output = generate(PROMPT)
859
+
860
+ print("Generated:")
861
+ print(output)
862
+ ```