khtsly commited on
Commit
ba99d76
·
verified ·
1 Parent(s): 850b133

Upload 3 files

Browse files
Files changed (3) hide show
  1. chat_template.jinja +126 -0
  2. tokenizer.json +0 -0
  3. tokenizer_config.json +23 -0
chat_template.jinja ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- macro render_content(content) %}
2
+ {%- if content is string %}
3
+ {{- content }}
4
+ {%- elif content is iterable and content is not mapping %}
5
+ {%- for item in content %}
6
+ {%- if 'text' in item %}
7
+ {{- item.text }}
8
+ {%- endif %}
9
+ {%- endfor %}
10
+ {%- elif content is none or content is undefined %}
11
+ {{- '' }}
12
+ {%- endif %}
13
+ {%- endmacro %}
14
+ {%- if not messages %}
15
+ {{- raise_exception('No messages provided.') }}
16
+ {%- endif %}
17
+ {%- if tools and tools is iterable and tools is not mapping %}
18
+ {{- '<|im_start|>system\n' }}
19
+ {{- "# Tools\n\nYou have access to the following functions:\n\n<tools>" }}
20
+ {%- for tool in tools %}
21
+ {{- "\n" }}
22
+ {{- tool | tojson }}
23
+ {%- endfor %}
24
+ {{- "\n</tools>" }}
25
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
26
+ {%- if messages[0].role == 'system' %}
27
+ {%- set content = render_content(messages[0].content)|trim %}
28
+ {%- if content %}
29
+ {{- '\n\n' + content }}
30
+ {%- endif %}
31
+ {%- endif %}
32
+ {{- '<|im_end|>\n' }}
33
+ {%- else %}
34
+ {%- if messages[0].role == 'system' %}
35
+ {%- set content = render_content(messages[0].content)|trim %}
36
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
37
+ {%- endif %}
38
+ {%- endif %}
39
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
40
+ {%- for message in messages[::-1] %}
41
+ {%- set index = (messages|length - 1) - loop.index0 %}
42
+ {%- if ns.multi_step_tool and message.role == "user" %}
43
+ {%- set content = render_content(message.content)|trim %}
44
+ {%- if not(content.startswith('<tool_response>') and content.endswith('</tool_response>')) %}
45
+ {%- set ns.multi_step_tool = false %}
46
+ {%- set ns.last_query_index = index %}
47
+ {%- endif %}
48
+ {%- endif %}
49
+ {%- endfor %}
50
+ {%- if ns.multi_step_tool %}
51
+ {{- raise_exception('No user query found in messages.') }}
52
+ {%- endif %}
53
+ {%- for message in messages %}
54
+ {%- set content = render_content(message.content)|trim %}
55
+ {%- if message.role == "system" %}
56
+ {%- if not loop.first %}
57
+ {{- raise_exception('System message must be at the beginning.') }}
58
+ {%- endif %}
59
+ {%- elif message.role == "user" %}
60
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
61
+ {%- elif message.role == "assistant" %}
62
+ {%- set reasoning_content = '' %}
63
+ {%- if message.reasoning_content is string %}
64
+ {%- set reasoning_content = message.reasoning_content %}
65
+ {%- else %}
66
+ {%- if '</think>' in content %}
67
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
68
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
69
+ {%- endif %}
70
+ {%- endif %}
71
+ {%- set reasoning_content = reasoning_content|trim %}
72
+ {%- if loop.index0 > ns.last_query_index %}
73
+ {{- '<|im_start|>' + message.role + '\n<think>' + reasoning_content + '</think>\n\n' + content }}
74
+ {%- else %}
75
+ {{- '<|im_start|>' + message.role + '\n' + content }}
76
+ {%- endif %}
77
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
78
+ {%- for tool_call in message.tool_calls %}
79
+ {%- if tool_call.function is defined %}
80
+ {%- set tool_call = tool_call.function %}
81
+ {%- endif %}
82
+ {%- if loop.first %}
83
+ {%- if content|trim %}
84
+ {{- '\n\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
85
+ {%- else %}
86
+ {{- '<tool_call>\n<function=' + tool_call.name + '>\n' }}
87
+ {%- endif %}
88
+ {%- else %}
89
+ {{- '\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
90
+ {%- endif %}
91
+ {%- if tool_call.arguments is defined %}
92
+ {%- for args_name, args_value in tool_call.arguments|items %}
93
+ {{- '<parameter=' + args_name + '>\n' }}
94
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
95
+ {{- args_value }}
96
+ {{- '\n</parameter>\n' }}
97
+ {%- endfor %}
98
+ {%- endif %}
99
+ {{- '</function>\n</tool_call>' }}
100
+ {%- endfor %}
101
+ {%- endif %}
102
+ {{- '<|im_end|>\n' }}
103
+ {%- elif message.role == "tool" %}
104
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
105
+ {{- '<|im_start|>user' }}
106
+ {%- endif %}
107
+ {{- '\n<tool_response>\n' }}
108
+ {{- content }}
109
+ {{- '\n</tool_response>' }}
110
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
111
+ {{- '<|im_end|>\n' }}
112
+ {%- elif loop.last %}
113
+ {{- '<|im_end|>\n' }}
114
+ {%- endif %}
115
+ {%- else %}
116
+ {{- raise_exception('Unexpected message role.') }}
117
+ {%- endif %}
118
+ {%- endfor %}
119
+ {%- if add_generation_prompt %}
120
+ {{- '<|im_start|>assistant\n' }}
121
+ {%- if enable_thinking is defined and enable_thinking is false %}
122
+ {{- '<think></think>\n\n' }}
123
+ {%- else %}
124
+ {{- '<think>' }}
125
+ {%- endif %}
126
+ {%- endif %}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": null,
4
+ "eos_token": "<|im_end|>",
5
+ "extra_special_tokens": [
6
+ "<|im_start|>",
7
+ "<|im_end|>",
8
+ "<think>",
9
+ "</think>",
10
+ "<|fim_prefix|>",
11
+ "<|fim_middle|>",
12
+ "<|fim_suffix|>",
13
+ "<tool_call>",
14
+ "</tool_call>",
15
+ "<tool_response>",
16
+ "</tool_response>"
17
+ ],
18
+ "model_max_length": 1000000000000000019884624838656,
19
+ "pad_token": "<|endoftext|>",
20
+ "tokenizer_class": "PreTrainedTokenizerFast",
21
+ "unk_token": null,
22
+ "add_bos_token": false
23
+ }