saattrupdan commited on
Commit
8938a90
·
verified ·
1 Parent(s): 7556e57

Add chat template to tokenizer config

Browse files

When working with the tokenizer in transformers, the chat template is not included and tokenizer.apply_chat_template for instance doesn't work.

This adds the chat template to the config to remedy this. It might also work if the current chat_template.json is replaced with a chat_template.jinja file, only containing the chat template string, but I'm not completely sure about that.

Files changed (1) hide show
  1. tokenizer_config.json +2 -1
tokenizer_config.json CHANGED
@@ -40,5 +40,6 @@
40
  "model_max_length": 1000000000000000019884624838656,
41
  "pad_token": "[PAD]",
42
  "tokenizer_class": "PreTrainedTokenizer",
43
- "unk_token": "[UNK]"
 
44
  }
 
40
  "model_max_length": 1000000000000000019884624838656,
41
  "pad_token": "[PAD]",
42
  "tokenizer_class": "PreTrainedTokenizer",
43
+ "unk_token": "[UNK]",
44
+ "chat_template": "{% for m in messages %}<|im_start|>{{ m['role'] }}\n{{ m['content'] }}<|im_end|>\n{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n<think>\n{% endif %}"
45
  }