| { | |
| "version": "3.0.0", | |
| "vocab_size": 32773, | |
| "pre_tokenizer": "Whitespace", | |
| "normalizer": "NFKC (NO lowercase)", | |
| "model": "BPE", | |
| "brand_tokens": [ | |
| "AksaraLLM", | |
| "aksarallm", | |
| "AKSARALLM", | |
| "Indonesia", | |
| "indonesia", | |
| "INDONESIA", | |
| "Pancasila", | |
| "pancasila", | |
| "Nusantara", | |
| "nusantara" | |
| ], | |
| "special_tokens": { | |
| "[PAD]": 0, | |
| "[EOS]": 1, | |
| "[BOS]": 2, | |
| "[UNK]": 3, | |
| "[SEP]": 4, | |
| "[MASK]": 5, | |
| "[INST]": 6, | |
| "[/INST]": 7, | |
| "[SYS]": 8, | |
| "[USER]": 9, | |
| "[ASST]": 10, | |
| "[TURN]": 11, | |
| "[LANG_ID]": 12, | |
| "[LANG_JV]": 13, | |
| "[LANG_SU]": 14, | |
| "[LANG_EN]": 15 | |
| } | |
| } |