| { |
| "version": "1.0", |
| "truncation": null, |
| "padding": null, |
| "added_tokens": [ |
| { |
| "id": 0, |
| "content": "([bos])", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 1, |
| "content": "([eos])", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 2, |
| "content": "([unk])", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 3, |
| "content": "([pad])", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 4, |
| "content": "([mask])", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| } |
| ], |
| "normalizer": null, |
| "pre_tokenizer": { |
| "type": "ByteLevel", |
| "add_prefix_space": true, |
| "trim_offsets": true, |
| "use_regex": true |
| }, |
| "post_processor": { |
| "type": "TemplateProcessing", |
| "single": [ |
| { |
| "Sequence": { |
| "id": "A", |
| "type_id": 0 |
| } |
| }, |
| { |
| "SpecialToken": { |
| "id": "([eos])", |
| "type_id": 0 |
| } |
| } |
| ], |
| "pair": [ |
| { |
| "Sequence": { |
| "id": "A", |
| "type_id": 0 |
| } |
| }, |
| { |
| "SpecialToken": { |
| "id": "([eos])", |
| "type_id": 0 |
| } |
| }, |
| { |
| "Sequence": { |
| "id": "B", |
| "type_id": 1 |
| } |
| }, |
| { |
| "SpecialToken": { |
| "id": "([eos])", |
| "type_id": 1 |
| } |
| } |
| ], |
| "special_tokens": { |
| "([bos])": { |
| "id": "([bos])", |
| "ids": [ |
| 0 |
| ], |
| "tokens": [ |
| "([bos])" |
| ] |
| }, |
| "([eos])": { |
| "id": "([eos])", |
| "ids": [ |
| 1 |
| ], |
| "tokens": [ |
| "([eos])" |
| ] |
| } |
| } |
| }, |
| "decoder": { |
| "type": "ByteLevel", |
| "add_prefix_space": true, |
| "trim_offsets": true, |
| "use_regex": true |
| }, |
| "model": { |
| "type": "BPE", |
| "dropout": null, |
| "unk_token": null, |
| "continuing_subword_prefix": null, |
| "end_of_word_suffix": null, |
| "fuse_unk": false, |
| "byte_fallback": false, |
| "vocab": { |
| "([bos])": 0, |
| "([eos])": 1, |
| "([unk])": 2, |
| "([pad])": 3, |
| "([mask])": 4, |
| "'": 5, |
| "a": 6, |
| "b": 7, |
| "c": 8, |
| "d": 9, |
| "e": 10, |
| "f": 11, |
| "g": 12, |
| "h": 13, |
| "i": 14, |
| "j": 15, |
| "k": 16, |
| "l": 17, |
| "m": 18, |
| "n": 19, |
| "o": 20, |
| "p": 21, |
| "q": 22, |
| "r": 23, |
| "s": 24, |
| "t": 25, |
| "u": 26, |
| "v": 27, |
| "w": 28, |
| "x": 29, |
| "y": 30, |
| "z": 31, |
| "Ġ": 32, |
| "Ġt": 33, |
| "he": 34, |
| "Ġa": 35, |
| "Ġthe": 36, |
| "in": 37, |
| "Ġs": 38, |
| "Ġw": 39, |
| "Ġo": 40, |
| "re": 41, |
| "nd": 42, |
| "Ġb": 43, |
| "Ġh": 44, |
| "er": 45, |
| "Ġm": 46, |
| "Ġi": 47, |
| "ou": 48, |
| "Ġc": 49, |
| "Ġf": 50, |
| "at": 51, |
| "ed": 52, |
| "Ġand": 53, |
| "en": 54, |
| "Ġto": 55, |
| "Ġof": 56, |
| "on": 57, |
| "is": 58, |
| "Ġd": 59, |
| "ing": 60, |
| "Ġth": 61, |
| "Ġp": 62, |
| "Ġhe": 63, |
| "or": 64, |
| "Ġl": 65, |
| "es": 66, |
| "Ġin": 67, |
| "ll": 68, |
| "it": 69, |
| "ar": 70, |
| "as": 71, |
| "an": 72, |
| "Ġn": 73, |
| "Ġg": 74, |
| "om": 75, |
| "Ġbe": 76, |
| "Ġha": 77, |
| "Ġe": 78, |
| "le": 79, |
| "ot": 80, |
| "Ġy": 81, |
| "ut": 82, |
| "ow": 83, |
| "ic": 84, |
| "Ġwh": 85, |
| "Ġit": 86, |
| "ld": 87, |
| "ve": 88, |
| "Ġthat": 89, |
| "ly": 90, |
| "Ġwas": 91, |
| "id": 92, |
| "se": 93, |
| "st": 94, |
| "Ġon": 95, |
| "gh": 96, |
| "ent": 97, |
| "Ġre": 98, |
| "Ġyou": 99 |
| }, |
| "merges": [ |
| "Ġ t", |
| "h e", |
| "Ġ a", |
| "Ġt he", |
| "i n", |
| "Ġ s", |
| "Ġ w", |
| "Ġ o", |
| "r e", |
| "n d", |
| "Ġ b", |
| "Ġ h", |
| "e r", |
| "Ġ m", |
| "Ġ i", |
| "o u", |
| "Ġ c", |
| "Ġ f", |
| "a t", |
| "e d", |
| "Ġa nd", |
| "e n", |
| "Ġt o", |
| "Ġo f", |
| "o n", |
| "i s", |
| "Ġ d", |
| "in g", |
| "Ġt h", |
| "Ġ p", |
| "Ġ he", |
| "o r", |
| "Ġ l", |
| "e s", |
| "Ġ in", |
| "l l", |
| "i t", |
| "a r", |
| "a s", |
| "a n", |
| "Ġ n", |
| "Ġ g", |
| "o m", |
| "Ġb e", |
| "Ġh a", |
| "Ġ e", |
| "l e", |
| "o t", |
| "Ġ y", |
| "u t", |
| "o w", |
| "i c", |
| "Ġw h", |
| "Ġi t", |
| "l d", |
| "v e", |
| "Ġth at", |
| "l y", |
| "Ġw as", |
| "i d", |
| "s e", |
| "s t", |
| "Ġo n", |
| "g h", |
| "en t", |
| "Ġ re", |
| "Ġy ou" |
| ] |
| } |
| } |