Lakoc commited on
Commit
3fbf65b
·
verified ·
1 Parent(s): 6a570a1

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +7 -0
  2. tokenizer.json +2132 -0
  3. tokenizer_config.json +10 -0
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "([bos])",
3
+ "eos_token": "([eos])",
4
+ "mask_token": "([mask])",
5
+ "pad_token": "([pad])",
6
+ "unk_token": "([unk])"
7
+ }
tokenizer.json ADDED
@@ -0,0 +1,2132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "([bos])",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "([eos])",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "([unk])",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 3,
35
+ "content": "([pad])",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 4,
44
+ "content": "([mask])",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
+ }
51
+ ],
52
+ "normalizer": null,
53
+ "pre_tokenizer": {
54
+ "type": "Metaspace",
55
+ "replacement": "▁",
56
+ "add_prefix_space": true
57
+ },
58
+ "post_processor": {
59
+ "type": "TemplateProcessing",
60
+ "single": [
61
+ {
62
+ "Sequence": {
63
+ "id": "A",
64
+ "type_id": 0
65
+ }
66
+ },
67
+ {
68
+ "SpecialToken": {
69
+ "id": "([eos])",
70
+ "type_id": 0
71
+ }
72
+ }
73
+ ],
74
+ "pair": [
75
+ {
76
+ "Sequence": {
77
+ "id": "A",
78
+ "type_id": 0
79
+ }
80
+ },
81
+ {
82
+ "SpecialToken": {
83
+ "id": "([eos])",
84
+ "type_id": 0
85
+ }
86
+ },
87
+ {
88
+ "Sequence": {
89
+ "id": "B",
90
+ "type_id": 1
91
+ }
92
+ },
93
+ {
94
+ "SpecialToken": {
95
+ "id": "([eos])",
96
+ "type_id": 1
97
+ }
98
+ }
99
+ ],
100
+ "special_tokens": {
101
+ "([bos])": {
102
+ "id": "([bos])",
103
+ "ids": [
104
+ 0
105
+ ],
106
+ "tokens": [
107
+ "([bos])"
108
+ ]
109
+ },
110
+ "([eos])": {
111
+ "id": "([eos])",
112
+ "ids": [
113
+ 1
114
+ ],
115
+ "tokens": [
116
+ "([eos])"
117
+ ]
118
+ }
119
+ }
120
+ },
121
+ "decoder": {
122
+ "type": "Metaspace",
123
+ "replacement": "▁",
124
+ "add_prefix_space": true
125
+ },
126
+ "model": {
127
+ "type": "Unigram",
128
+ "unk_id": 2,
129
+ "vocab": [
130
+ [
131
+ "([bos])",
132
+ 0.0
133
+ ],
134
+ [
135
+ "([eos])",
136
+ 0.0
137
+ ],
138
+ [
139
+ "([unk])",
140
+ 0.0
141
+ ],
142
+ [
143
+ "([pad])",
144
+ 0.0
145
+ ],
146
+ [
147
+ "([mask])",
148
+ 0.0
149
+ ],
150
+ [
151
+ "▁",
152
+ -2.59281702408172
153
+ ],
154
+ [
155
+ "s",
156
+ -2.846832096965846
157
+ ],
158
+ [
159
+ "a",
160
+ -3.355027224273531
161
+ ],
162
+ [
163
+ "t",
164
+ -3.393961431042454
165
+ ],
166
+ [
167
+ "i",
168
+ -3.4084006872371155
169
+ ],
170
+ [
171
+ "e",
172
+ -3.4593687402770925
173
+ ],
174
+ [
175
+ "▁the",
176
+ -3.809364677119774
177
+ ],
178
+ [
179
+ "unk",
180
+ -3.850929189246447
181
+ ],
182
+ [
183
+ "]",
184
+ -3.850965409526383
185
+ ],
186
+ [
187
+ ")",
188
+ -3.852990425372683
189
+ ],
190
+ [
191
+ "▁([",
192
+ -3.85299042537269
193
+ ],
194
+ [
195
+ "r",
196
+ -4.094359852161929
197
+ ],
198
+ [
199
+ "d",
200
+ -4.11316992845574
201
+ ],
202
+ [
203
+ "o",
204
+ -4.201950348043347
205
+ ],
206
+ [
207
+ "▁and",
208
+ -4.229721028930884
209
+ ],
210
+ [
211
+ "▁to",
212
+ -4.248849013980928
213
+ ],
214
+ [
215
+ "n",
216
+ -4.348259390081129
217
+ ],
218
+ [
219
+ "▁of",
220
+ -4.418050097301856
221
+ ],
222
+ [
223
+ "ing",
224
+ -4.468362591583357
225
+ ],
226
+ [
227
+ "▁is",
228
+ -4.477151270303802
229
+ ],
230
+ [
231
+ "l",
232
+ -4.494056484027906
233
+ ],
234
+ [
235
+ "▁in",
236
+ -4.575750056745386
237
+ ],
238
+ [
239
+ "p",
240
+ -4.584899128833191
241
+ ],
242
+ [
243
+ "y",
244
+ -4.637799480076254
245
+ ],
246
+ [
247
+ "c",
248
+ -4.644663089525897
249
+ ],
250
+ [
251
+ "▁that",
252
+ -4.6603864147085226
253
+ ],
254
+ [
255
+ "u",
256
+ -4.783309159320428
257
+ ],
258
+ [
259
+ "ed",
260
+ -4.846814524751098
261
+ ],
262
+ [
263
+ "▁we",
264
+ -4.880079560393588
265
+ ],
266
+ [
267
+ "▁it",
268
+ -4.893225004775269
269
+ ],
270
+ [
271
+ "m",
272
+ -4.919627253351287
273
+ ],
274
+ [
275
+ "▁you",
276
+ -4.985103298055368
277
+ ],
278
+ [
279
+ "f",
280
+ -5.027307044393842
281
+ ],
282
+ [
283
+ "g",
284
+ -5.171265598312889
285
+ ],
286
+ [
287
+ "▁be",
288
+ -5.249873992154958
289
+ ],
290
+ [
291
+ "k",
292
+ -5.306694390961949
293
+ ],
294
+ [
295
+ "▁thi",
296
+ -5.332036394347231
297
+ ],
298
+ [
299
+ "al",
300
+ -5.358246528230415
301
+ ],
302
+ [
303
+ "w",
304
+ -5.3757230492858845
305
+ ],
306
+ [
307
+ "h",
308
+ -5.380264963144688
309
+ ],
310
+ [
311
+ "▁so",
312
+ -5.387409655248286
313
+ ],
314
+ [
315
+ "▁are",
316
+ -5.400093862250188
317
+ ],
318
+ [
319
+ "or",
320
+ -5.419532476251575
321
+ ],
322
+ [
323
+ "b",
324
+ -5.432614155894891
325
+ ],
326
+ [
327
+ "ly",
328
+ -5.435895869014663
329
+ ],
330
+ [
331
+ "▁not",
332
+ -5.4665966086609945
333
+ ],
334
+ [
335
+ "v",
336
+ -5.509818024110549
337
+ ],
338
+ [
339
+ "in",
340
+ -5.519912000688162
341
+ ],
342
+ [
343
+ "an",
344
+ -5.595795502816262
345
+ ],
346
+ [
347
+ "▁have",
348
+ -5.60883245933184
349
+ ],
350
+ [
351
+ "on",
352
+ -5.6256645337389575
353
+ ],
354
+ [
355
+ "▁for",
356
+ -5.635595512460842
357
+ ],
358
+ [
359
+ "▁do",
360
+ -5.6374099852425505
361
+ ],
362
+ [
363
+ "▁was",
364
+ -5.723276308134322
365
+ ],
366
+ [
367
+ "▁on",
368
+ -5.743095684018304
369
+ ],
370
+ [
371
+ "le",
372
+ -5.76070836248679
373
+ ],
374
+ [
375
+ "re",
376
+ -5.77655479919024
377
+ ],
378
+ [
379
+ "▁they",
380
+ -5.795448251103968
381
+ ],
382
+ [
383
+ "er",
384
+ -5.7992845792341114
385
+ ],
386
+ [
387
+ "ch",
388
+ -5.81478145540922
389
+ ],
390
+ [
391
+ "▁re",
392
+ -5.843487125026712
393
+ ],
394
+ [
395
+ "▁with",
396
+ -5.8874500113813735
397
+ ],
398
+ [
399
+ "ic",
400
+ -5.918799565838919
401
+ ],
402
+ [
403
+ "▁but",
404
+ -5.930793316234986
405
+ ],
406
+ [
407
+ "▁what",
408
+ -5.932288213721186
409
+ ],
410
+ [
411
+ "▁can",
412
+ -5.953510754321824
413
+ ],
414
+ [
415
+ "ur",
416
+ -5.968178990473417
417
+ ],
418
+ [
419
+ "ter",
420
+ -5.968256487230722
421
+ ],
422
+ [
423
+ "▁as",
424
+ -5.972678693374657
425
+ ],
426
+ [
427
+ "ce",
428
+ -6.029926315071711
429
+ ],
430
+ [
431
+ "▁me",
432
+ -6.046376462967448
433
+ ],
434
+ [
435
+ "ent",
436
+ -6.050262468851077
437
+ ],
438
+ [
439
+ "th",
440
+ -6.067805528127913
441
+ ],
442
+ [
443
+ "▁ma",
444
+ -6.131960747326147
445
+ ],
446
+ [
447
+ "▁at",
448
+ -6.138386657031125
449
+ ],
450
+ [
451
+ "about",
452
+ -6.167158251053305
453
+ ],
454
+ [
455
+ "un",
456
+ -6.181842915931661
457
+ ],
458
+ [
459
+ "▁all",
460
+ -6.20424986580228
461
+ ],
462
+ [
463
+ "▁us",
464
+ -6.217295978939266
465
+ ],
466
+ [
467
+ "ve",
468
+ -6.225380529297736
469
+ ],
470
+ [
471
+ "▁there",
472
+ -6.232464937756388
473
+ ],
474
+ [
475
+ "▁my",
476
+ -6.23614114257248
477
+ ],
478
+ [
479
+ "▁an",
480
+ -6.3069852979408
481
+ ],
482
+ [
483
+ "ation",
484
+ -6.3190710988219045
485
+ ],
486
+ [
487
+ "▁he",
488
+ -6.323972547255821
489
+ ],
490
+ [
491
+ "▁like",
492
+ -6.384587060094013
493
+ ],
494
+ [
495
+ "▁people",
496
+ -6.387702184589152
497
+ ],
498
+ [
499
+ "▁co",
500
+ -6.4197126557699224
501
+ ],
502
+ [
503
+ "▁one",
504
+ -6.427689896795764
505
+ ],
506
+ [
507
+ "▁our",
508
+ -6.438420746463613
509
+ ],
510
+ [
511
+ "▁if",
512
+ -6.463213249855276
513
+ ],
514
+ [
515
+ "ies",
516
+ -6.466810786198042
517
+ ],
518
+ [
519
+ "▁from",
520
+ -6.470647565430909
521
+ ],
522
+ [
523
+ "▁mo",
524
+ -6.477409720708605
525
+ ],
526
+ [
527
+ "▁con",
528
+ -6.505790805736119
529
+ ],
530
+ [
531
+ "▁or",
532
+ -6.550931300395687
533
+ ],
534
+ [
535
+ "0",
536
+ -6.56122661435958
537
+ ],
538
+ [
539
+ "nd",
540
+ -6.5668699457644255
541
+ ],
542
+ [
543
+ "▁out",
544
+ -6.5684221767234785
545
+ ],
546
+ [
547
+ "▁just",
548
+ -6.5686010025050425
549
+ ],
550
+ [
551
+ "ge",
552
+ -6.586605784866393
553
+ ],
554
+ [
555
+ "cause",
556
+ -6.588241811302158
557
+ ],
558
+ [
559
+ "▁see",
560
+ -6.5949530396895994
561
+ ],
562
+ [
563
+ "▁when",
564
+ -6.599974572805861
565
+ ],
566
+ [
567
+ "▁would",
568
+ -6.606921677824124
569
+ ],
570
+ [
571
+ "▁ex",
572
+ -6.61041107046502
573
+ ],
574
+ [
575
+ "▁tr",
576
+ -6.618373395542154
577
+ ],
578
+ [
579
+ "▁these",
580
+ -6.62260645328524
581
+ ],
582
+ [
583
+ "▁now",
584
+ -6.632161104955264
585
+ ],
586
+ [
587
+ "▁who",
588
+ -6.635832866112693
589
+ ],
590
+ [
591
+ "▁thing",
592
+ -6.67135143948857
593
+ ],
594
+ [
595
+ "▁know",
596
+ -6.673675896544896
597
+ ],
598
+ [
599
+ "▁up",
600
+ -6.69640943895293
601
+ ],
602
+ [
603
+ "tion",
604
+ -6.707979332616
605
+ ],
606
+ [
607
+ "me",
608
+ -6.709936696310173
609
+ ],
610
+ [
611
+ "▁go",
612
+ -6.710623324221499
613
+ ],
614
+ [
615
+ "▁think",
616
+ -6.714133588775203
617
+ ],
618
+ [
619
+ "▁pa",
620
+ -6.71714944035719
621
+ ],
622
+ [
623
+ "▁se",
624
+ -6.718680072381458
625
+ ],
626
+ [
627
+ "▁will",
628
+ -6.720109781560021
629
+ ],
630
+ [
631
+ "▁ho",
632
+ -6.720170661330384
633
+ ],
634
+ [
635
+ "▁by",
636
+ -6.721095141355006
637
+ ],
638
+ [
639
+ "▁di",
640
+ -6.724710312743682
641
+ ],
642
+ [
643
+ "▁how",
644
+ -6.7254939284453705
645
+ ],
646
+ [
647
+ "ck",
648
+ -6.7313580469977214
649
+ ],
650
+ [
651
+ "very",
652
+ -6.741095758032021
653
+ ],
654
+ [
655
+ "▁get",
656
+ -6.746454814680545
657
+ ],
658
+ [
659
+ "ll",
660
+ -6.760221826176743
661
+ ],
662
+ [
663
+ "▁pro",
664
+ -6.763809825449721
665
+ ],
666
+ [
667
+ "▁no",
668
+ -6.770926703803614
669
+ ],
670
+ [
671
+ "▁had",
672
+ -6.7796436687805155
673
+ ],
674
+ [
675
+ "▁ba",
676
+ -6.786786701804624
677
+ ],
678
+ [
679
+ "▁am",
680
+ -6.786822442257263
681
+ ],
682
+ [
683
+ "ment",
684
+ -6.792776940927245
685
+ ],
686
+ [
687
+ "▁more",
688
+ -6.8025374347670695
689
+ ],
690
+ [
691
+ "▁year",
692
+ -6.812012015308449
693
+ ],
694
+ [
695
+ "▁them",
696
+ -6.817024115499338
697
+ ],
698
+ [
699
+ "ide",
700
+ -6.820768705286692
701
+ ],
702
+ [
703
+ "▁some",
704
+ -6.8293849455715545
705
+ ],
706
+ [
707
+ "ity",
708
+ -6.830203222215266
709
+ ],
710
+ [
711
+ "▁going",
712
+ -6.830635614280581
713
+ ],
714
+ [
715
+ "▁time",
716
+ -6.831841717883933
717
+ ],
718
+ [
719
+ "1",
720
+ -6.847393562136933
721
+ ],
722
+ [
723
+ "ther",
724
+ -6.857068158548849
725
+ ],
726
+ [
727
+ "▁bo",
728
+ -6.857135856112599
729
+ ],
730
+ [
731
+ "▁were",
732
+ -6.863668924034885
733
+ ],
734
+ [
735
+ "▁their",
736
+ -6.8645372292006765
737
+ ],
738
+ [
739
+ "ive",
740
+ -6.869128207203495
741
+ ],
742
+ [
743
+ "▁mu",
744
+ -6.886402777433895
745
+ ],
746
+ [
747
+ "x",
748
+ -6.904422101224576
749
+ ],
750
+ [
751
+ "▁your",
752
+ -6.911538672946254
753
+ ],
754
+ [
755
+ "▁look",
756
+ -6.918280672105752
757
+ ],
758
+ [
759
+ "▁which",
760
+ -6.933308643308452
761
+ ],
762
+ [
763
+ "▁work",
764
+ -6.9381661812620745
765
+ ],
766
+ [
767
+ "▁2",
768
+ -6.9469384737839395
769
+ ],
770
+ [
771
+ "▁want",
772
+ -6.963014386886217
773
+ ],
774
+ [
775
+ "▁really",
776
+ -6.974232818616819
777
+ ],
778
+ [
779
+ "▁mi",
780
+ -6.9811388765277975
781
+ ],
782
+ [
783
+ "▁po",
784
+ -6.989428952910444
785
+ ],
786
+ [
787
+ "▁has",
788
+ -6.99573133490755
789
+ ],
790
+ [
791
+ "▁world",
792
+ -6.9964067060377655
793
+ ],
794
+ [
795
+ "▁br",
796
+ -6.9987555818015785
797
+ ],
798
+ [
799
+ "▁way",
800
+ -6.999734719128316
801
+ ],
802
+ [
803
+ "▁here",
804
+ -7.019074310175704
805
+ ],
806
+ [
807
+ "▁ca",
808
+ -7.019578211637391
809
+ ],
810
+ [
811
+ "us",
812
+ -7.0243519005443495
813
+ ],
814
+ [
815
+ "age",
816
+ -7.041846360171347
817
+ ],
818
+ [
819
+ "z",
820
+ -7.05079536838384
821
+ ],
822
+ [
823
+ "able",
824
+ -7.062413259930416
825
+ ],
826
+ [
827
+ "▁other",
828
+ -7.069336328826822
829
+ ],
830
+ [
831
+ "ng",
832
+ -7.0740562743779325
833
+ ],
834
+ [
835
+ "▁ha",
836
+ -7.083605870276065
837
+ ],
838
+ [
839
+ "▁could",
840
+ -7.091024151027586
841
+ ],
842
+ [
843
+ "▁make",
844
+ -7.095302988139583
845
+ ],
846
+ [
847
+ "▁la",
848
+ -7.10003792423003
849
+ ],
850
+ [
851
+ "one",
852
+ -7.117391704778116
853
+ ],
854
+ [
855
+ "▁actual",
856
+ -7.128473711477547
857
+ ],
858
+ [
859
+ "ver",
860
+ -7.133692396499471
861
+ ],
862
+ [
863
+ "▁into",
864
+ -7.138511341814535
865
+ ],
866
+ [
867
+ "tic",
868
+ -7.139363297407975
869
+ ],
870
+ [
871
+ "ally",
872
+ -7.144123905557578
873
+ ],
874
+ [
875
+ "ers",
876
+ -7.145039705434364
877
+ ],
878
+ [
879
+ "▁where",
880
+ -7.148431169833696
881
+ ],
882
+ [
883
+ "▁fe",
884
+ -7.162143170057698
885
+ ],
886
+ [
887
+ "▁than",
888
+ -7.168255166847715
889
+ ],
890
+ [
891
+ "am",
892
+ -7.194024878731092
893
+ ],
894
+ [
895
+ "▁did",
896
+ -7.200844896395903
897
+ ],
898
+ [
899
+ "▁she",
900
+ -7.214015726235695
901
+ ],
902
+ [
903
+ "ugh",
904
+ -7.215069305179371
905
+ ],
906
+ [
907
+ "ous",
908
+ -7.225648440582878
909
+ ],
910
+ [
911
+ "les",
912
+ -7.227535434909608
913
+ ],
914
+ [
915
+ "▁j",
916
+ -7.227701515834608
917
+ ],
918
+ [
919
+ "5",
920
+ -7.22838724161401
921
+ ],
922
+ [
923
+ "▁pre",
924
+ -7.228412231025503
925
+ ],
926
+ [
927
+ "▁say",
928
+ -7.238257056896082
929
+ ],
930
+ [
931
+ "▁cl",
932
+ -7.245947147236425
933
+ ],
934
+ [
935
+ "▁vi",
936
+ -7.250369116702121
937
+ ],
938
+ [
939
+ "▁li",
940
+ -7.267006788594745
941
+ ],
942
+ [
943
+ "▁new",
944
+ -7.271103757230721
945
+ ],
946
+ [
947
+ "▁been",
948
+ -7.273765287272704
949
+ ],
950
+ [
951
+ "um",
952
+ -7.289790697831917
953
+ ],
954
+ [
955
+ "▁any",
956
+ -7.291267592684676
957
+ ],
958
+ [
959
+ "▁bi",
960
+ -7.295080757654635
961
+ ],
962
+ [
963
+ "▁his",
964
+ -7.299760201510503
965
+ ],
966
+ [
967
+ "▁sp",
968
+ -7.305303730474352
969
+ ],
970
+ [
971
+ "ical",
972
+ -7.307323351984438
973
+ ],
974
+ [
975
+ "hose",
976
+ -7.3114913033516125
977
+ ],
978
+ [
979
+ "▁need",
980
+ -7.313058253270235
981
+ ],
982
+ [
983
+ "▁right",
984
+ -7.322625855860206
985
+ ],
986
+ [
987
+ "▁comp",
988
+ -7.323471947989754
989
+ ],
990
+ [
991
+ "▁take",
992
+ -7.326942026217436
993
+ ],
994
+ [
995
+ "▁even",
996
+ -7.327454700703431
997
+ ],
998
+ [
999
+ "▁over",
1000
+ -7.329218759587572
1001
+ ],
1002
+ [
1003
+ "▁start",
1004
+ -7.339334478402948
1005
+ ],
1006
+ [
1007
+ "▁3",
1008
+ -7.341330209983386
1009
+ ],
1010
+ [
1011
+ "▁car",
1012
+ -7.354030933819425
1013
+ ],
1014
+ [
1015
+ "▁lo",
1016
+ -7.362442350524409
1017
+ ],
1018
+ [
1019
+ "com",
1020
+ -7.374098833333429
1021
+ ],
1022
+ [
1023
+ "▁well",
1024
+ -7.390161285883304
1025
+ ],
1026
+ [
1027
+ "▁something",
1028
+ -7.390165545491165
1029
+ ],
1030
+ [
1031
+ "▁every",
1032
+ -7.390369002934392
1033
+ ],
1034
+ [
1035
+ "▁ar",
1036
+ -7.413232701641375
1037
+ ],
1038
+ [
1039
+ "ize",
1040
+ -7.426412981313161
1041
+ ],
1042
+ [
1043
+ "▁back",
1044
+ -7.427173568621068
1045
+ ],
1046
+ [
1047
+ "▁10",
1048
+ -7.441486980314046
1049
+ ],
1050
+ [
1051
+ "per",
1052
+ -7.445071842052444
1053
+ ],
1054
+ [
1055
+ "▁cr",
1056
+ -7.456267263418599
1057
+ ],
1058
+ [
1059
+ "war",
1060
+ -7.457041121613809
1061
+ ],
1062
+ [
1063
+ "▁fr",
1064
+ -7.459029556799015
1065
+ ],
1066
+ [
1067
+ "▁call",
1068
+ -7.46112374109263
1069
+ ],
1070
+ [
1071
+ "▁most",
1072
+ -7.462118302109969
1073
+ ],
1074
+ [
1075
+ "▁pr",
1076
+ -7.465632258952185
1077
+ ],
1078
+ [
1079
+ "▁also",
1080
+ -7.465693588901637
1081
+ ],
1082
+ [
1083
+ "1s",
1084
+ -7.4673436351197235
1085
+ ],
1086
+ [
1087
+ "j",
1088
+ -7.470293333180523
1089
+ ],
1090
+ [
1091
+ "▁sai",
1092
+ -7.47782202687231
1093
+ ],
1094
+ [
1095
+ "000",
1096
+ -7.483070644854715
1097
+ ],
1098
+ [
1099
+ "▁life",
1100
+ -7.484842783513717
1101
+ ],
1102
+ [
1103
+ "ful",
1104
+ -7.487346680602727
1105
+ ],
1106
+ [
1107
+ "ance",
1108
+ -7.502415813556965
1109
+ ],
1110
+ [
1111
+ "▁ga",
1112
+ -7.505566986745112
1113
+ ],
1114
+ [
1115
+ "▁come",
1116
+ -7.507829702035803
1117
+ ],
1118
+ [
1119
+ "▁kind",
1120
+ -7.509654685818351
1121
+ ],
1122
+ [
1123
+ "ence",
1124
+ -7.530438073529829
1125
+ ],
1126
+ [
1127
+ "▁little",
1128
+ -7.547114481715971
1129
+ ],
1130
+ [
1131
+ "▁lot",
1132
+ -7.55188953364898
1133
+ ],
1134
+ [
1135
+ "▁talk",
1136
+ -7.5589159495899825
1137
+ ],
1138
+ [
1139
+ "▁part",
1140
+ -7.561863039692147
1141
+ ],
1142
+ [
1143
+ "ction",
1144
+ -7.565679549641631
1145
+ ],
1146
+ [
1147
+ "▁happen",
1148
+ -7.57815173458663
1149
+ ],
1150
+ [
1151
+ "▁many",
1152
+ -7.58130503159693
1153
+ ],
1154
+ [
1155
+ "▁her",
1156
+ -7.596658925219376
1157
+ ],
1158
+ [
1159
+ "4",
1160
+ -7.607147336680983
1161
+ ],
1162
+ [
1163
+ "each",
1164
+ -7.612947571030476
1165
+ ],
1166
+ [
1167
+ "▁mean",
1168
+ -7.619962514213633
1169
+ ],
1170
+ [
1171
+ "▁creat",
1172
+ -7.661761922798204
1173
+ ],
1174
+ [
1175
+ "▁different",
1176
+ -7.67122410069279
1177
+ ],
1178
+ [
1179
+ "▁let",
1180
+ -7.677441531038708
1181
+ ],
1182
+ [
1183
+ "▁human",
1184
+ -7.680394078987549
1185
+ ],
1186
+ [
1187
+ "day",
1188
+ -7.685829986670678
1189
+ ],
1190
+ [
1191
+ "00000",
1192
+ -7.687110569454335
1193
+ ],
1194
+ [
1195
+ "▁change",
1196
+ -7.688074055024863
1197
+ ],
1198
+ [
1199
+ "▁show",
1200
+ -7.705816323646177
1201
+ ],
1202
+ [
1203
+ "around",
1204
+ -7.710025462854299
1205
+ ],
1206
+ [
1207
+ "▁good",
1208
+ -7.714050133917926
1209
+ ],
1210
+ [
1211
+ "▁does",
1212
+ -7.715047479814897
1213
+ ],
1214
+ [
1215
+ "▁through",
1216
+ -7.718038148789979
1217
+ ],
1218
+ [
1219
+ "▁fl",
1220
+ -7.7183657794136735
1221
+ ],
1222
+ [
1223
+ "▁bu",
1224
+ -7.718689852896475
1225
+ ],
1226
+ [
1227
+ "▁real",
1228
+ -7.721158921371002
1229
+ ],
1230
+ [
1231
+ "q",
1232
+ -7.722948646486868
1233
+ ],
1234
+ [
1235
+ "▁down",
1236
+ -7.731549014813332
1237
+ ],
1238
+ [
1239
+ "▁why",
1240
+ -7.749744153065748
1241
+ ],
1242
+ [
1243
+ "▁live",
1244
+ -7.757995308103174
1245
+ ],
1246
+ [
1247
+ "ated",
1248
+ -7.769146541041033
1249
+ ],
1250
+ [
1251
+ "▁tell",
1252
+ -7.772980291921325
1253
+ ],
1254
+ [
1255
+ "▁idea",
1256
+ -7.77749698230712
1257
+ ],
1258
+ [
1259
+ "self",
1260
+ -7.79441466402
1261
+ ],
1262
+ [
1263
+ "▁same",
1264
+ -7.806529851170208
1265
+ ],
1266
+ [
1267
+ "▁give",
1268
+ -7.808789063030774
1269
+ ],
1270
+ [
1271
+ "ture",
1272
+ -7.816124821978905
1273
+ ],
1274
+ [
1275
+ "▁gu",
1276
+ -7.822907720829415
1277
+ ],
1278
+ [
1279
+ "▁dec",
1280
+ -7.827497442862535
1281
+ ],
1282
+ [
1283
+ "▁sha",
1284
+ -7.831994042718744
1285
+ ],
1286
+ [
1287
+ "▁cha",
1288
+ -7.835305423960509
1289
+ ],
1290
+ [
1291
+ "rate",
1292
+ -7.840925431144495
1293
+ ],
1294
+ [
1295
+ "▁problem",
1296
+ -7.859746127968331
1297
+ ],
1298
+ [
1299
+ "log",
1300
+ -7.867267186770574
1301
+ ],
1302
+ [
1303
+ "▁fact",
1304
+ -7.876315750422663
1305
+ ],
1306
+ [
1307
+ "9",
1308
+ -7.8859793746573
1309
+ ],
1310
+ [
1311
+ "▁big",
1312
+ -7.900866055217808
1313
+ ],
1314
+ [
1315
+ "par",
1316
+ -7.902588172614095
1317
+ ],
1318
+ [
1319
+ "▁great",
1320
+ -7.907033482030775
1321
+ ],
1322
+ [
1323
+ "▁app",
1324
+ -7.91427941067953
1325
+ ],
1326
+ [
1327
+ "▁find",
1328
+ -7.916187196447025
1329
+ ],
1330
+ [
1331
+ "▁after",
1332
+ -7.940753458457673
1333
+ ],
1334
+ [
1335
+ "▁system",
1336
+ -7.943278029810768
1337
+ ],
1338
+ [
1339
+ "▁place",
1340
+ -7.94700632655662
1341
+ ],
1342
+ [
1343
+ "▁pu",
1344
+ -7.947172864717192
1345
+ ],
1346
+ [
1347
+ "▁gra",
1348
+ -7.964333957878741
1349
+ ],
1350
+ [
1351
+ "▁countr",
1352
+ -7.970206863296095
1353
+ ],
1354
+ [
1355
+ "▁build",
1356
+ -7.976780126354955
1357
+ ],
1358
+ [
1359
+ "▁20",
1360
+ -7.978013543613265
1361
+ ],
1362
+ [
1363
+ "▁hear",
1364
+ -7.978433112810803
1365
+ ],
1366
+ [
1367
+ "▁again",
1368
+ -7.989773163596327
1369
+ ],
1370
+ [
1371
+ "6",
1372
+ -7.990880109971288
1373
+ ],
1374
+ [
1375
+ "▁imp",
1376
+ -7.991637432829929
1377
+ ],
1378
+ [
1379
+ "▁learn",
1380
+ -7.9959131203456355
1381
+ ],
1382
+ [
1383
+ "form",
1384
+ -7.999593417984542
1385
+ ],
1386
+ [
1387
+ "▁own",
1388
+ -8.017045352055355
1389
+ ],
1390
+ [
1391
+ "▁gene",
1392
+ -8.021482294420894
1393
+ ],
1394
+ [
1395
+ "▁long",
1396
+ -8.024830416011637
1397
+ ],
1398
+ [
1399
+ "▁made",
1400
+ -8.025958739187063
1401
+ ],
1402
+ [
1403
+ "▁qu",
1404
+ -8.029481112958162
1405
+ ],
1406
+ [
1407
+ "light",
1408
+ -8.042944650519123
1409
+ ],
1410
+ [
1411
+ "▁question",
1412
+ -8.04733201088698
1413
+ ],
1414
+ [
1415
+ "line",
1416
+ -8.048098265300471
1417
+ ],
1418
+ [
1419
+ "▁should",
1420
+ -8.05086940090139
1421
+ ],
1422
+ [
1423
+ "▁came",
1424
+ -8.062749375963078
1425
+ ],
1426
+ [
1427
+ "ness",
1428
+ -8.075393218062162
1429
+ ],
1430
+ [
1431
+ "8",
1432
+ -8.080287699177687
1433
+ ],
1434
+ [
1435
+ "▁end",
1436
+ -8.087216166105685
1437
+ ],
1438
+ [
1439
+ "▁feel",
1440
+ -8.089768108480714
1441
+ ],
1442
+ [
1443
+ "▁turn",
1444
+ -8.097471494540764
1445
+ ],
1446
+ [
1447
+ "▁person",
1448
+ -8.098872045094318
1449
+ ],
1450
+ [
1451
+ "▁technolog",
1452
+ -8.100599214400695
1453
+ ],
1454
+ [
1455
+ "%",
1456
+ -8.122602302046138
1457
+ ],
1458
+ [
1459
+ "▁hu",
1460
+ -8.126737314662325
1461
+ ],
1462
+ [
1463
+ "▁design",
1464
+ -8.12674245324745
1465
+ ],
1466
+ [
1467
+ "▁help",
1468
+ -8.130318117974053
1469
+ ],
1470
+ [
1471
+ "▁brain",
1472
+ -8.138463246691146
1473
+ ],
1474
+ [
1475
+ "▁last",
1476
+ -8.142073157234577
1477
+ ],
1478
+ [
1479
+ "▁important",
1480
+ -8.147052636620238
1481
+ ],
1482
+ [
1483
+ "▁before",
1484
+ -8.14837919317405
1485
+ ],
1486
+ [
1487
+ "▁high",
1488
+ -8.15195082403437
1489
+ ],
1490
+ [
1491
+ "▁never",
1492
+ -8.164694320286374
1493
+ ],
1494
+ [
1495
+ "▁thought",
1496
+ -8.164796046272262
1497
+ ],
1498
+ [
1499
+ "▁trans",
1500
+ -8.169287856512064
1501
+ ],
1502
+ [
1503
+ "▁him",
1504
+ -8.171411032689104
1505
+ ],
1506
+ [
1507
+ "▁might",
1508
+ -8.183200086027288
1509
+ ],
1510
+ [
1511
+ "7",
1512
+ -8.190653526557211
1513
+ ],
1514
+ [
1515
+ "▁understand",
1516
+ -8.19065373044177
1517
+ ],
1518
+ [
1519
+ "▁interest",
1520
+ -8.196802373595887
1521
+ ],
1522
+ [
1523
+ "▁power",
1524
+ -8.198573739588852
1525
+ ],
1526
+ [
1527
+ "▁better",
1528
+ -8.19988067880684
1529
+ ],
1530
+ [
1531
+ "land",
1532
+ -8.20388344699932
1533
+ ],
1534
+ [
1535
+ "▁found",
1536
+ -8.206082011890224
1537
+ ],
1538
+ [
1539
+ "▁play",
1540
+ -8.215114406919103
1541
+ ],
1542
+ [
1543
+ "▁still",
1544
+ -8.239390820997944
1545
+ ],
1546
+ [
1547
+ "▁fun",
1548
+ -8.239404594536715
1549
+ ],
1550
+ [
1551
+ "▁cit",
1552
+ -8.245195776069933
1553
+ ],
1554
+ [
1555
+ "��point",
1556
+ -8.252267399561559
1557
+ ],
1558
+ [
1559
+ "▁school",
1560
+ -8.256383568070238
1561
+ ],
1562
+ [
1563
+ "▁together",
1564
+ -8.25829247621957
1565
+ ],
1566
+ [
1567
+ "▁old",
1568
+ -8.268629977562462
1569
+ ],
1570
+ [
1571
+ "▁example",
1572
+ -8.27323704876234
1573
+ ],
1574
+ [
1575
+ "▁next",
1576
+ -8.281402530476942
1577
+ ],
1578
+ [
1579
+ "wome",
1580
+ -8.282154732814078
1581
+ ],
1582
+ [
1583
+ "▁state",
1584
+ -8.29468188529561
1585
+ ],
1586
+ [
1587
+ "▁under",
1588
+ -8.305898457550091
1589
+ ],
1590
+ [
1591
+ "▁number",
1592
+ -8.310117772682384
1593
+ ],
1594
+ [
1595
+ "▁course",
1596
+ -8.321718517074732
1597
+ ],
1598
+ [
1599
+ "▁ago",
1600
+ -8.327507330243627
1601
+ ],
1602
+ [
1603
+ "▁water",
1604
+ -8.328825111687745
1605
+ ],
1606
+ [
1607
+ "▁data",
1608
+ -8.333203001750057
1609
+ ],
1610
+ [
1611
+ "▁grow",
1612
+ -8.340873398838944
1613
+ ],
1614
+ [
1615
+ "▁simpl",
1616
+ -8.349218229729566
1617
+ ],
1618
+ [
1619
+ "▁famil",
1620
+ -8.369363631544427
1621
+ ],
1622
+ [
1623
+ "tween",
1624
+ -8.37383437525906
1625
+ ],
1626
+ [
1627
+ "▁develop",
1628
+ -8.374643423007722
1629
+ ],
1630
+ [
1631
+ "que",
1632
+ -8.375200998662995
1633
+ ],
1634
+ [
1635
+ "2",
1636
+ -8.376102518707132
1637
+ ],
1638
+ [
1639
+ "▁america",
1640
+ -8.379573798445456
1641
+ ],
1642
+ [
1643
+ "▁believe",
1644
+ -8.394053582941464
1645
+ ],
1646
+ [
1647
+ "▁small",
1648
+ -8.415568961449141
1649
+ ],
1650
+ [
1651
+ "▁maybe",
1652
+ -8.42368192193646
1653
+ ],
1654
+ [
1655
+ "▁become",
1656
+ -8.424145125887913
1657
+ ],
1658
+ [
1659
+ "room",
1660
+ -8.441630177731476
1661
+ ],
1662
+ [
1663
+ "▁far",
1664
+ -8.444549208994184
1665
+ ],
1666
+ [
1667
+ "▁health",
1668
+ -8.467222838689395
1669
+ ],
1670
+ [
1671
+ "▁space",
1672
+ -8.471483420396034
1673
+ ],
1674
+ [
1675
+ "▁word",
1676
+ -8.478065675740668
1677
+ ],
1678
+ [
1679
+ "▁children",
1680
+ -8.499512226612513
1681
+ ],
1682
+ [
1683
+ "alway",
1684
+ -8.520587524981275
1685
+ ],
1686
+ [
1687
+ "▁reason",
1688
+ -8.526841785089601
1689
+ ],
1690
+ [
1691
+ "▁away",
1692
+ -8.544995251715026
1693
+ ],
1694
+ [
1695
+ "abilit",
1696
+ -8.550836871274203
1697
+ ],
1698
+ [
1699
+ "▁econom",
1700
+ -8.56406438391495
1701
+ ],
1702
+ [
1703
+ "▁experience",
1704
+ -8.567033977916246
1705
+ ],
1706
+ [
1707
+ "▁large",
1708
+ -8.57014429948284
1709
+ ],
1710
+ [
1711
+ "▁everything",
1712
+ -8.57959705440446
1713
+ ],
1714
+ [
1715
+ "▁friend",
1716
+ -8.58504421926433
1717
+ ],
1718
+ [
1719
+ "▁open",
1720
+ -8.591645742993963
1721
+ ],
1722
+ [
1723
+ "▁face",
1724
+ -8.607261919389757
1725
+ ],
1726
+ [
1727
+ "▁communit",
1728
+ -8.610078928970474
1729
+ ],
1730
+ [
1731
+ "▁computer",
1732
+ -8.61327278832886
1733
+ ],
1734
+ [
1735
+ "▁money",
1736
+ -8.620019706055368
1737
+ ],
1738
+ [
1739
+ "▁information",
1740
+ -8.623660752747348
1741
+ ],
1742
+ [
1743
+ "▁social",
1744
+ -8.647076269648476
1745
+ ],
1746
+ [
1747
+ "graph",
1748
+ -8.649930279809734
1749
+ ],
1750
+ [
1751
+ "▁writ",
1752
+ -8.660041940377742
1753
+ ],
1754
+ [
1755
+ "▁walk",
1756
+ -8.660537038157099
1757
+ ],
1758
+ [
1759
+ "▁sense",
1760
+ -8.661222653828128
1761
+ ],
1762
+ [
1763
+ "▁africa",
1764
+ -8.664304453839758
1765
+ ],
1766
+ [
1767
+ "▁picture",
1768
+ -8.666496894472793
1769
+ ],
1770
+ [
1771
+ "▁process",
1772
+ -8.674246917973406
1773
+ ],
1774
+ [
1775
+ "ready",
1776
+ -8.687949675721452
1777
+ ],
1778
+ [
1779
+ "▁group",
1780
+ -8.708420511827635
1781
+ ],
1782
+ [
1783
+ "▁future",
1784
+ -8.719915098813502
1785
+ ],
1786
+ [
1787
+ "▁elect",
1788
+ -8.721907126457397
1789
+ ],
1790
+ [
1791
+ "▁probabl",
1792
+ -8.743286352848367
1793
+ ],
1794
+ [
1795
+ "3",
1796
+ -8.748160451209834
1797
+ ],
1798
+ [
1799
+ "▁energ",
1800
+ -8.749211217327893
1801
+ ],
1802
+ [
1803
+ "▁imagine",
1804
+ -8.752781394092509
1805
+ ],
1806
+ [
1807
+ "▁science",
1808
+ -8.75997395003522
1809
+ ],
1810
+ [
1811
+ "▁food",
1812
+ -8.762990753474401
1813
+ ],
1814
+ [
1815
+ "▁connect",
1816
+ -8.77100174324766
1817
+ ],
1818
+ [
1819
+ "quite",
1820
+ -8.773647509239836
1821
+ ],
1822
+ [
1823
+ "▁government",
1824
+ -8.775722423993372
1825
+ ],
1826
+ [
1827
+ "▁answer",
1828
+ -8.78002900412545
1829
+ ],
1830
+ [
1831
+ "▁global",
1832
+ -8.780619828904241
1833
+ ],
1834
+ [
1835
+ "▁stand",
1836
+ -8.785036546626142
1837
+ ],
1838
+ [
1839
+ "▁keep",
1840
+ -8.788654740788271
1841
+ ],
1842
+ [
1843
+ "▁animal",
1844
+ -8.804862510265878
1845
+ ],
1846
+ [
1847
+ "where",
1848
+ -8.811834342327783
1849
+ ],
1850
+ [
1851
+ "▁possibl",
1852
+ -8.820056966136203
1853
+ ],
1854
+ [
1855
+ "across",
1856
+ -8.830962661034436
1857
+ ],
1858
+ [
1859
+ "▁control",
1860
+ -8.838737620869553
1861
+ ],
1862
+ [
1863
+ "▁research",
1864
+ -8.844606219767416
1865
+ ],
1866
+ [
1867
+ "▁product",
1868
+ -8.844911614902509
1869
+ ],
1870
+ [
1871
+ "▁stuff",
1872
+ -8.845260960850675
1873
+ ],
1874
+ [
1875
+ "▁remember",
1876
+ -8.845915483110893
1877
+ ],
1878
+ [
1879
+ "▁while",
1880
+ -8.85051297862519
1881
+ ],
1882
+ [
1883
+ "▁child",
1884
+ -8.853110416617389
1885
+ ],
1886
+ [
1887
+ "▁business",
1888
+ -8.859769930901034
1889
+ ],
1890
+ [
1891
+ "▁physic",
1892
+ -8.861164207607107
1893
+ ],
1894
+ [
1895
+ "▁environment",
1896
+ -8.873814996112682
1897
+ ],
1898
+ [
1899
+ "▁wonder",
1900
+ -8.873819960339489
1901
+ ],
1902
+ [
1903
+ "▁politic",
1904
+ -8.88738762226837
1905
+ ],
1906
+ [
1907
+ "▁student",
1908
+ -8.89767468539825
1909
+ ],
1910
+ [
1911
+ "▁public",
1912
+ -8.905987909835986
1913
+ ],
1914
+ [
1915
+ "▁societ",
1916
+ -8.929935600634728
1917
+ ],
1918
+ [
1919
+ "▁machine",
1920
+ -8.93135882460237
1921
+ ],
1922
+ [
1923
+ "special",
1924
+ -8.932997840222589
1925
+ ],
1926
+ [
1927
+ "▁particular",
1928
+ -8.952259164739559
1929
+ ],
1930
+ [
1931
+ "▁effect",
1932
+ -8.961776030509103
1933
+ ],
1934
+ [
1935
+ "▁disease",
1936
+ -8.961777244037055
1937
+ ],
1938
+ [
1939
+ "▁study",
1940
+ -8.972872315045484
1941
+ ],
1942
+ [
1943
+ "▁minute",
1944
+ -8.977346606956452
1945
+ ],
1946
+ [
1947
+ "▁discover",
1948
+ -9.00922662860049
1949
+ ],
1950
+ [
1951
+ "▁certain",
1952
+ -9.013088908038048
1953
+ ],
1954
+ [
1955
+ "▁figure",
1956
+ -9.021642649757036
1957
+ ],
1958
+ [
1959
+ "▁wrong",
1960
+ -9.023207113478072
1961
+ ],
1962
+ [
1963
+ "▁market",
1964
+ -9.031848417684849
1965
+ ],
1966
+ [
1967
+ "▁organiz",
1968
+ -9.034216957681174
1969
+ ],
1970
+ [
1971
+ "▁program",
1972
+ -9.04775239624157
1973
+ ],
1974
+ [
1975
+ "▁language",
1976
+ -9.063099143498777
1977
+ ],
1978
+ [
1979
+ "ground",
1980
+ -9.082829041793172
1981
+ ],
1982
+ [
1983
+ "▁education",
1984
+ -9.083659845011791
1985
+ ],
1986
+ [
1987
+ "▁incredibl",
1988
+ -9.089492419062424
1989
+ ],
1990
+ [
1991
+ "▁everybody",
1992
+ -9.097885260712856
1993
+ ],
1994
+ [
1995
+ "▁beautiful",
1996
+ -9.12263050089388
1997
+ ],
1998
+ [
1999
+ "▁challenge",
2000
+ -9.13305180006321
2001
+ ],
2002
+ [
2003
+ "▁individual",
2004
+ -9.136549811286418
2005
+ ],
2006
+ [
2007
+ "▁robot",
2008
+ -9.149778414703448
2009
+ ],
2010
+ [
2011
+ "▁difference",
2012
+ -9.166787774331064
2013
+ ],
2014
+ [
2015
+ "▁object",
2016
+ -9.1731284477078
2017
+ ],
2018
+ [
2019
+ "▁follow",
2020
+ -9.189619820239848
2021
+ ],
2022
+ [
2023
+ "▁success",
2024
+ -9.194249482061554
2025
+ ],
2026
+ [
2027
+ "▁somebody",
2028
+ -9.201701855207816
2029
+ ],
2030
+ [
2031
+ "▁collect",
2032
+ -9.205481082183333
2033
+ ],
2034
+ [
2035
+ "▁natural",
2036
+ -9.227272303426044
2037
+ ],
2038
+ [
2039
+ "▁scientist",
2040
+ -9.28756625454666
2041
+ ],
2042
+ [
2043
+ "▁opportunit",
2044
+ -9.292673515244624
2045
+ ],
2046
+ [
2047
+ "▁support",
2048
+ -9.306075784827351
2049
+ ],
2050
+ [
2051
+ "▁difficult",
2052
+ -9.31336747480424
2053
+ ],
2054
+ [
2055
+ "▁network",
2056
+ -9.314413501104752
2057
+ ],
2058
+ [
2059
+ "▁behavior",
2060
+ -9.345234221100345
2061
+ ],
2062
+ [
2063
+ "▁structure",
2064
+ -9.372588216236457
2065
+ ],
2066
+ [
2067
+ "▁relationship",
2068
+ -9.379266035832249
2069
+ ],
2070
+ [
2071
+ "▁communicat",
2072
+ -9.441462488984255
2073
+ ],
2074
+ [
2075
+ "▁absolute",
2076
+ -9.53481354000388
2077
+ ],
2078
+ [
2079
+ "▁neighbor",
2080
+ -9.567973071869298
2081
+ ],
2082
+ [
2083
+ "[",
2084
+ -10.029248769845
2085
+ ],
2086
+ [
2087
+ "+",
2088
+ -13.924195720156202
2089
+ ],
2090
+ [
2091
+ "£",
2092
+ -13.924295720156202
2093
+ ],
2094
+ [
2095
+ "€",
2096
+ -13.924395720156202
2097
+ ],
2098
+ [
2099
+ "$",
2100
+ -13.924495720156202
2101
+ ],
2102
+ [
2103
+ "(",
2104
+ -13.924595720156203
2105
+ ],
2106
+ [
2107
+ "-",
2108
+ -13.924695720156205
2109
+ ],
2110
+ [
2111
+ ".",
2112
+ -13.924795720156204
2113
+ ],
2114
+ [
2115
+ ">",
2116
+ -13.924895720156186
2117
+ ],
2118
+ [
2119
+ "¢",
2120
+ -13.924895720156202
2121
+ ],
2122
+ [
2123
+ "<",
2124
+ -13.924895720156202
2125
+ ],
2126
+ [
2127
+ "/",
2128
+ -13.924895720156202
2129
+ ]
2130
+ ]
2131
+ }
2132
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "([bos])",
3
+ "clean_up_tokenization_spaces": true,
4
+ "eos_token": "([eos])",
5
+ "mask_token": "([mask])",
6
+ "model_max_length": 1000000000000000019884624838656,
7
+ "pad_token": "([pad])",
8
+ "tokenizer_class": "PreTrainedTokenizerFast",
9
+ "unk_token": "([unk])"
10
+ }