HenrySentinel commited on
Commit
22c34d9
·
verified ·
1 Parent(s): 6f5cb36

Update tokenizer with chat template and pad token

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +2 -12
tokenizer_config.json CHANGED
@@ -1,20 +1,10 @@
1
  {
2
  "add_prefix_space": false,
3
- "added_tokens_decoder": {
4
- "50256": {
5
- "content": "<|endoftext|>",
6
- "lstrip": false,
7
- "normalized": true,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- }
12
- },
13
  "bos_token": "<|endoftext|>",
 
14
  "clean_up_tokenization_spaces": false,
15
  "eos_token": "<|endoftext|>",
16
- "extra_special_tokens": {},
17
- "model_max_length": 1024,
18
  "pad_token": "<|endoftext|>",
19
  "tokenizer_class": "GPT2Tokenizer",
20
  "unk_token": "<|endoftext|>"
 
1
  {
2
  "add_prefix_space": false,
 
 
 
 
 
 
 
 
 
 
3
  "bos_token": "<|endoftext|>",
4
+ "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '<|user|>\n' + message['content'] + '\n' }}{% elif message['role'] == 'assistant' %}{{ '<|assistant|>\n' + message['content'] + eos_token + '\n' }}{% elif message['role'] == 'system' %}{{ '<|system|>\n' + message['content'] + '\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% endif %}",
5
  "clean_up_tokenization_spaces": false,
6
  "eos_token": "<|endoftext|>",
7
+ "model_max_length": 512,
 
8
  "pad_token": "<|endoftext|>",
9
  "tokenizer_class": "GPT2Tokenizer",
10
  "unk_token": "<|endoftext|>"