Upload tokenizer
Browse files- chat_template.jinja +7 -0
- tokenizer.json +0 -0
- tokenizer_config.json +20 -0
chat_template.jinja
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{{- bos_token -}}{%- set system_prompt = "" -%}{%- set ns = namespace(system_prompt="") -%}{%- if messages[0]["role"] == "system" -%} {%- set ns.system_prompt = messages[0]["content"] -%} {%- set messages = messages[1:] -%}{%- endif -%}{%- if tools -%} {%- set ns.system_prompt = ns.system_prompt + ("
|
| 2 |
+
" if ns.system_prompt else "") + "List of tools: <|tool_list_start|>[" -%} {%- for tool in tools -%} {%- if tool is not string -%} {%- set tool = tool | tojson -%} {%- endif -%} {%- set ns.system_prompt = ns.system_prompt + tool -%} {%- if not loop.last -%} {%- set ns.system_prompt = ns.system_prompt + ", " -%} {%- endif -%} {%- endfor -%} {%- set ns.system_prompt = ns.system_prompt + "]<|tool_list_end|>" -%}{%- endif -%}{%- if ns.system_prompt -%} {{- "<|im_start|>system
|
| 3 |
+
" + ns.system_prompt + "<|im_end|>
|
| 4 |
+
" -}}{%- endif -%}{%- for message in messages -%} {{- "<|im_start|>" + message["role"] + "
|
| 5 |
+
" -}} {%- set content = message["content"] -%} {%- if content is not string -%} {%- set content = content | tojson -%} {%- endif -%} {%- if message["role"] == "tool" -%} {%- set content = "<|tool_response_start|>" + content + "<|tool_response_end|>" -%} {%- endif -%} {{- content + "<|im_end|>
|
| 6 |
+
" -}}{%- endfor -%}{%- if add_generation_prompt -%} {{- "<|im_start|>assistant
|
| 7 |
+
" -}}{%- endif -%}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": null,
|
| 3 |
+
"backend": "tokenizers",
|
| 4 |
+
"bos_token": "<|startoftext|>",
|
| 5 |
+
"clean_up_tokenization_spaces": false,
|
| 6 |
+
"eos_token": "<|im_end|>",
|
| 7 |
+
"is_local": true,
|
| 8 |
+
"legacy": false,
|
| 9 |
+
"model_input_names": [
|
| 10 |
+
"input_ids",
|
| 11 |
+
"attention_mask"
|
| 12 |
+
],
|
| 13 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 14 |
+
"pad_token": "<|pad|>",
|
| 15 |
+
"sp_model_kwargs": {},
|
| 16 |
+
"spaces_between_special_tokens": false,
|
| 17 |
+
"tokenizer_class": "TokenizersBackend",
|
| 18 |
+
"use_default_system_prompt": false,
|
| 19 |
+
"use_fast": true
|
| 20 |
+
}
|