harryrobert commited on
Commit
530c35a
·
verified ·
1 Parent(s): 6530613

Upload tokenizer_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +22 -0
tokenizer_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "vocab_size": 8192,
3
+ "special_tokens": [
4
+ "<pad>",
5
+ "<unk>",
6
+ "<bos>",
7
+ "<eos>"
8
+ ],
9
+ "pad_token": "<pad>",
10
+ "unk_token": "<unk>",
11
+ "bos_token": "<bos>",
12
+ "eos_token": "<eos>",
13
+ "pad_id": 0,
14
+ "unk_id": 1,
15
+ "bos_id": 2,
16
+ "eos_id": 3,
17
+ "min_frequency": 2,
18
+ "model_max_length": 128,
19
+ "padding_side": "right",
20
+ "truncation_side": "right",
21
+ "tokenizer_class": "PreTrainedTokenizerFast"
22
+ }