Ezekiel999 commited on
Commit
faf515e
·
verified ·
1 Parent(s): 859fcfd

Upload tokenizer_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +37 -0
tokenizer_config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "3.0.0",
3
+ "vocab_size": 32773,
4
+ "pre_tokenizer": "Whitespace",
5
+ "normalizer": "NFKC (NO lowercase)",
6
+ "model": "BPE",
7
+ "brand_tokens": [
8
+ "AksaraLLM",
9
+ "aksarallm",
10
+ "AKSARALLM",
11
+ "Indonesia",
12
+ "indonesia",
13
+ "INDONESIA",
14
+ "Pancasila",
15
+ "pancasila",
16
+ "Nusantara",
17
+ "nusantara"
18
+ ],
19
+ "special_tokens": {
20
+ "[PAD]": 0,
21
+ "[EOS]": 1,
22
+ "[BOS]": 2,
23
+ "[UNK]": 3,
24
+ "[SEP]": 4,
25
+ "[MASK]": 5,
26
+ "[INST]": 6,
27
+ "[/INST]": 7,
28
+ "[SYS]": 8,
29
+ "[USER]": 9,
30
+ "[ASST]": 10,
31
+ "[TURN]": 11,
32
+ "[LANG_ID]": 12,
33
+ "[LANG_JV]": 13,
34
+ "[LANG_SU]": 14,
35
+ "[LANG_EN]": 15
36
+ }
37
+ }