YongganFu commited on
Commit
85eb9d3
·
verified ·
1 Parent(s): f318bfe

Upload tokenizer

Browse files
Files changed (2) hide show
  1. chat_template.jinja +7 -89
  2. tokenizer_config.json +7 -6
chat_template.jinja CHANGED
@@ -1,89 +1,7 @@
1
- {%- if tools %}
2
- {{- '<s>system\n' }}
3
- {%- if messages[0].role == 'system' %}
4
- {{- messages[0].content + '\n\n' }}
5
- {%- endif %}
6
- {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within [AVAILABLE_TOOLS][/AVAILABLE_TOOLS] XML tags:\n[AVAILABLE_TOOLS]" }}
7
- {%- for tool in tools %}
8
- {{- "\n" }}
9
- {{- tool | tojson }}
10
- {%- endfor %}
11
- {{- "\n[/AVAILABLE_TOOLS]\n\nFor each function call, return a json object with function name and arguments within [TOOL_CALLS][SPECIAL_10] XML tags:\n[TOOL_CALLS]\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n[SPECIAL_10]</s>\n" }}
12
- {%- else %}
13
- {%- if messages[0].role == 'system' %}
14
- {{- '<s>system\n' + messages[0].content + '</s>\n' }}
15
- {%- endif %}
16
- {%- endif %}
17
- {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
- {%- for message in messages[::-1] %}
19
- {%- set index = (messages|length - 1) - loop.index0 %}
20
- {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('[TOOL_RESULTS]') and message.content.endswith('[/TOOL_RESULTS]')) %}
21
- {%- set ns.multi_step_tool = false %}
22
- {%- set ns.last_query_index = index %}
23
- {%- endif %}
24
- {%- endfor %}
25
- {%- for message in messages %}
26
- {%- if message.content is string %}
27
- {%- set content = message.content %}
28
- {%- else %}
29
- {%- set content = '' %}
30
- {%- endif %}
31
- {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
32
- {{- '<s>' + message.role + '\n' + content + '</s>' + '\n' }}
33
- {%- elif message.role == "assistant" %}
34
- {%- set reasoning_content = '' %}
35
- {%- if message.reasoning_content is string %}
36
- {%- set reasoning_content = message.reasoning_content %}
37
- {%- else %}
38
- {%- if '<SPECIAL_12>' in content %}
39
- {%- set reasoning_content = content.split('<SPECIAL_12>')[0].rstrip('\n').split('<SPECIAL_11>')[-1].lstrip('\n') %}
40
- {%- set content = content.split('<SPECIAL_12>')[-1].lstrip('\n') %}
41
- {%- endif %}
42
- {%- endif %}
43
- {%- if loop.index0 > ns.last_query_index %}
44
- {%- if loop.last or (not loop.last and reasoning_content) %}
45
- {{- '<s>' + message.role + '\n<SPECIAL_11>\n' + reasoning_content.strip('\n') + '\n<SPECIAL_12>\n\n' + content.lstrip('\n') }}
46
- {%- else %}
47
- {{- '<s>' + message.role + '\n' + content }}
48
- {%- endif %}
49
- {%- else %}
50
- {{- '<s>' + message.role + '\n' + content }}
51
- {%- endif %}
52
- {%- if message.tool_calls %}
53
- {%- for tool_call in message.tool_calls %}
54
- {%- if (loop.first and content) or (not loop.first) %}
55
- {{- '\n' }}
56
- {%- endif %}
57
- {%- if tool_call.function %}
58
- {%- set tool_call = tool_call.function %}
59
- {%- endif %}
60
- {{- '[TOOL_CALLS]\n{"name": "' }}
61
- {{- tool_call.name }}
62
- {{- '", "arguments": ' }}
63
- {%- if tool_call.arguments is string %}
64
- {{- tool_call.arguments }}
65
- {%- else %}
66
- {{- tool_call.arguments | tojson }}
67
- {%- endif %}
68
- {{- '}\n[SPECIAL_10]' }}
69
- {%- endfor %}
70
- {%- endif %}
71
- {{- '</s>\n' }}
72
- {%- elif message.role == "tool" %}
73
- {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
74
- {{- '<s>user' }}
75
- {%- endif %}
76
- {{- '\n[TOOL_RESULTS]\n' }}
77
- {{- content }}
78
- {{- '\n[/TOOL_RESULTS]' }}
79
- {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
80
- {{- '</s>\n' }}
81
- {%- endif %}
82
- {%- endif %}
83
- {%- endfor %}
84
- {%- if add_generation_prompt %}
85
- {{- '<s>assistant\n' }}
86
- {%- if enable_thinking is defined and enable_thinking is false %}
87
- {{- '<SPECIAL_11>\n\n<SPECIAL_12>\n\n' }}
88
- {%- endif %}
89
- {%- endif %}
 
1
+ {{'<SPECIAL_10>System'}}{% for message in messages %}{% if message['role'] == 'system' %}{{'
2
+ ' + message['content'].strip()}}{% endif %}{% endfor %}{{'
3
+ '}}{% for message in messages %}{% if message['role'] == 'user' %}{{ '
4
+ <SPECIAL_11>User
5
+ ' + message['content'].strip() + '
6
+ <SPECIAL_11>Assistant
7
+ ' }}{% elif message['role'] == 'assistant' %}{{ message['content'].strip() }}{% endif %}{% endfor %}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tokenizer_config.json CHANGED
@@ -8004,14 +8004,15 @@
8004
  "special": true
8005
  }
8006
  },
8007
- "bos_token": null,
8008
  "clean_up_tokenization_spaces": false,
8009
  "eos_token": "</s>",
8010
- "errors": "replace",
8011
  "extra_special_tokens": {},
8012
- "model_max_length": 131072,
8013
- "pad_token": "<unk>",
8014
- "split_special_tokens": false,
 
 
8015
  "tokenizer_class": "PreTrainedTokenizerFast",
8016
- "unk_token": null
8017
  }
 
8004
  "special": true
8005
  }
8006
  },
8007
+ "bos_token": "<s>",
8008
  "clean_up_tokenization_spaces": false,
8009
  "eos_token": "</s>",
 
8010
  "extra_special_tokens": {},
8011
+ "model_input_names": [
8012
+ "input_ids",
8013
+ "attention_mask"
8014
+ ],
8015
+ "model_max_length": 8192,
8016
  "tokenizer_class": "PreTrainedTokenizerFast",
8017
+ "unk_token": "<unk>"
8018
  }