medimed commited on
Commit
05651d4
·
verified ·
1 Parent(s): e01a4f6

Upload tokenizer

Browse files
chat_template.jinja CHANGED
@@ -1,30 +1,63 @@
 
1
  {%- if tools %}
2
- {{- '<|im_start|>system\n' }}
 
3
  {%- if messages[0].role == 'system' %}
4
- {{- messages[0].content + '\n\n' }}
 
 
5
  {%- endif %}
6
- {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
 
 
 
 
 
7
  {%- for tool in tools %}
8
- {{- "\n" }}
 
9
  {{- tool | tojson }}
10
  {%- endfor %}
11
- {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
 
 
 
 
 
 
 
12
  {%- else %}
13
  {%- if messages[0].role == 'system' %}
14
- {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
 
 
15
  {%- endif %}
16
  {%- endif %}
17
  {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
- {%- for message in messages[::-1] %}
19
  {%- set index = (messages|length - 1) - loop.index0 %}
20
- {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
 
 
 
 
 
 
 
 
 
 
 
 
21
  {%- set ns.multi_step_tool = false %}
22
  {%- set ns.last_query_index = index %}
23
  {%- endif %}
24
  {%- endfor %}
25
  {%- for message in messages %}
26
  {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
27
- {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
 
 
28
  {%- elif message.role == "assistant" %}
29
  {%- set content = message.content %}
30
  {%- set reasoning_content = '' %}
@@ -32,28 +65,43 @@
32
  {%- set reasoning_content = message.reasoning_content %}
33
  {%- else %}
34
  {%- if '</think>' in message.content %}
35
- {%- set content = message.content.split('</think>')[-1].lstrip('\n') %}
36
- {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
 
 
 
 
37
  {%- endif %}
38
  {%- endif %}
39
  {%- if loop.index0 > ns.last_query_index %}
40
  {%- if loop.last or (not loop.last and reasoning_content) %}
41
- {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
 
 
 
 
 
 
 
42
  {%- else %}
43
- {{- '<|im_start|>' + message.role + '\n' + content }}
 
44
  {%- endif %}
45
  {%- else %}
46
- {{- '<|im_start|>' + message.role + '\n' + content }}
 
47
  {%- endif %}
48
  {%- if message.tool_calls %}
49
  {%- for tool_call in message.tool_calls %}
50
  {%- if (loop.first and content) or (not loop.first) %}
51
- {{- '\n' }}
 
52
  {%- endif %}
53
  {%- if tool_call.function %}
54
  {%- set tool_call = tool_call.function %}
55
  {%- endif %}
56
- {{- '<tool_call>\n{"name": "' }}
 
57
  {{- tool_call.name }}
58
  {{- '", "arguments": ' }}
59
  {%- if tool_call.arguments is string %}
@@ -61,25 +109,36 @@
61
  {%- else %}
62
  {{- tool_call.arguments | tojson }}
63
  {%- endif %}
64
- {{- '}\n</tool_call>' }}
 
65
  {%- endfor %}
66
  {%- endif %}
67
- {{- '<|im_end|>\n' }}
 
68
  {%- elif message.role == "tool" %}
69
  {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
70
  {{- '<|im_start|>user' }}
71
  {%- endif %}
72
- {{- '\n<tool_response>\n' }}
 
 
73
  {{- message.content }}
74
- {{- '\n</tool_response>' }}
 
75
  {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
76
- {{- '<|im_end|>\n' }}
 
77
  {%- endif %}
78
  {%- endif %}
79
  {%- endfor %}
80
  {%- if add_generation_prompt %}
81
- {{- '<|im_start|>assistant\n' }}
 
82
  {%- if enable_thinking is defined and enable_thinking is false %}
83
- {{- '<think>\n\n</think>\n\n' }}
 
 
 
 
84
  {%- endif %}
85
- {%- endif %}
 
1
+
2
  {%- if tools %}
3
+ {{- '<|im_start|>system
4
+ ' }}
5
  {%- if messages[0].role == 'system' %}
6
+ {{- messages[0].content + '
7
+
8
+ ' }}
9
  {%- endif %}
10
+ {{- "# Tools
11
+
12
+ You may call one or more functions to assist with the user query.
13
+
14
+ You are provided with function signatures within <tools></tools> XML tags:
15
+ <tools>" }}
16
  {%- for tool in tools %}
17
+ {{- "
18
+ " }}
19
  {{- tool | tojson }}
20
  {%- endfor %}
21
+ {{- "
22
+ </tools>
23
+
24
+ For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
25
+ <tool_call>
26
+ {\"name\": <function-name>, \"arguments\": <args-json-object>}
27
+ </tool_call><|im_end|>
28
+ " }}
29
  {%- else %}
30
  {%- if messages[0].role == 'system' %}
31
+ {{- '<|im_start|>system
32
+ ' + messages[0].content + '<|im_end|>
33
+ ' }}
34
  {%- endif %}
35
  {%- endif %}
36
  {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
37
+ {%- for forward_message in messages %}
38
  {%- set index = (messages|length - 1) - loop.index0 %}
39
+ {%- set message = messages[index] %}
40
+ {%- set current_content = message.content if message.content is not none else '' %}
41
+ {%- set tool_start = '<tool_response>' %}
42
+ {%- set tool_start_length = tool_start|length %}
43
+ {%- set start_of_message = current_content[:tool_start_length] %}
44
+ {%- set tool_end = '</tool_response>' %}
45
+ {%- set tool_end_length = tool_end|length %}
46
+ {%- set start_pos = (current_content|length) - tool_end_length %}
47
+ {%- if start_pos < 0 %}
48
+ {%- set start_pos = 0 %}
49
+ {%- endif %}
50
+ {%- set end_of_message = current_content[start_pos:] %}
51
+ {%- if ns.multi_step_tool and message.role == "user" and not(start_of_message == tool_start and end_of_message == tool_end) %}
52
  {%- set ns.multi_step_tool = false %}
53
  {%- set ns.last_query_index = index %}
54
  {%- endif %}
55
  {%- endfor %}
56
  {%- for message in messages %}
57
  {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
58
+ {{- '<|im_start|>' + message.role + '
59
+ ' + message.content + '<|im_end|>' + '
60
+ ' }}
61
  {%- elif message.role == "assistant" %}
62
  {%- set content = message.content %}
63
  {%- set reasoning_content = '' %}
 
65
  {%- set reasoning_content = message.reasoning_content %}
66
  {%- else %}
67
  {%- if '</think>' in message.content %}
68
+ {%- set content = (message.content.split('</think>')|last).lstrip('
69
+ ') %}
70
+ {%- set reasoning_content = (message.content.split('</think>')|first).rstrip('
71
+ ') %}
72
+ {%- set reasoning_content = (reasoning_content.split('<think>')|last).lstrip('
73
+ ') %}
74
  {%- endif %}
75
  {%- endif %}
76
  {%- if loop.index0 > ns.last_query_index %}
77
  {%- if loop.last or (not loop.last and reasoning_content) %}
78
+ {{- '<|im_start|>' + message.role + '
79
+ <think>
80
+ ' + reasoning_content.strip('
81
+ ') + '
82
+ </think>
83
+
84
+ ' + content.lstrip('
85
+ ') }}
86
  {%- else %}
87
+ {{- '<|im_start|>' + message.role + '
88
+ ' + content }}
89
  {%- endif %}
90
  {%- else %}
91
+ {{- '<|im_start|>' + message.role + '
92
+ ' + content }}
93
  {%- endif %}
94
  {%- if message.tool_calls %}
95
  {%- for tool_call in message.tool_calls %}
96
  {%- if (loop.first and content) or (not loop.first) %}
97
+ {{- '
98
+ ' }}
99
  {%- endif %}
100
  {%- if tool_call.function %}
101
  {%- set tool_call = tool_call.function %}
102
  {%- endif %}
103
+ {{- '<tool_call>
104
+ {"name": "' }}
105
  {{- tool_call.name }}
106
  {{- '", "arguments": ' }}
107
  {%- if tool_call.arguments is string %}
 
109
  {%- else %}
110
  {{- tool_call.arguments | tojson }}
111
  {%- endif %}
112
+ {{- '}
113
+ </tool_call>' }}
114
  {%- endfor %}
115
  {%- endif %}
116
+ {{- '<|im_end|>
117
+ ' }}
118
  {%- elif message.role == "tool" %}
119
  {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
120
  {{- '<|im_start|>user' }}
121
  {%- endif %}
122
+ {{- '
123
+ <tool_response>
124
+ ' }}
125
  {{- message.content }}
126
+ {{- '
127
+ </tool_response>' }}
128
  {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
129
+ {{- '<|im_end|>
130
+ ' }}
131
  {%- endif %}
132
  {%- endif %}
133
  {%- endfor %}
134
  {%- if add_generation_prompt %}
135
+ {{- '<|im_start|>assistant
136
+ ' }}
137
  {%- if enable_thinking is defined and enable_thinking is false %}
138
+ {{- '<think>
139
+
140
+ </think>
141
+
142
+ ' }}
143
  {%- endif %}
144
+ {%- endif %}
special_tokens_map.json CHANGED
@@ -22,7 +22,7 @@
22
  "single_word": false
23
  },
24
  "pad_token": {
25
- "content": "<|endoftext|>",
26
  "lstrip": false,
27
  "normalized": false,
28
  "rstrip": false,
 
22
  "single_word": false
23
  },
24
  "pad_token": {
25
+ "content": "<|vision_pad|>",
26
  "lstrip": false,
27
  "normalized": false,
28
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -231,8 +231,8 @@
231
  "eos_token": "<|endoftext|>",
232
  "errors": "replace",
233
  "extra_special_tokens": {},
234
- "model_max_length": 131072,
235
- "pad_token": "<|endoftext|>",
236
  "padding_side": "left",
237
  "split_special_tokens": false,
238
  "tokenizer_class": "Qwen2Tokenizer",
 
231
  "eos_token": "<|endoftext|>",
232
  "errors": "replace",
233
  "extra_special_tokens": {},
234
+ "model_max_length": 32768,
235
+ "pad_token": "<|vision_pad|>",
236
  "padding_side": "left",
237
  "split_special_tokens": false,
238
  "tokenizer_class": "Qwen2Tokenizer",