gss1147 commited on
Commit
224d698
·
verified ·
1 Parent(s): c61135a

Upload tokenizer for dual-GPU DeepSpeed full finetune

Browse files
chat_template.jinja ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if not add_generation_prompt is defined %}
2
+ {%- set add_generation_prompt = false %}
3
+ {%- endif %}
4
+ {%- set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_first_user=true, is_last_user=false) %}
5
+ {%- for message in messages %}
6
+ {%- if message['role'] == 'system' %}
7
+ {%- if ns.is_first_sp %}
8
+ {%- set ns.system_prompt = ns.system_prompt + message['content'] %}
9
+ {%- set ns.is_first_sp = false %}
10
+ {%- else %}
11
+ {% set ns.system_prompt = ns.system_prompt + '
12
+
13
+ ' + message['content'] %}
14
+ {%- endif %}
15
+ {%- endif %}
16
+ {%- endfor %}
17
+ {{- bos_token }}
18
+ {{- ns.system_prompt }}
19
+ {%- if tools %}
20
+ {%- if ns.system_prompt != '' %}
21
+ {{- '
22
+
23
+ # Tools
24
+
25
+ You may call one or more functions to assist with the user query.' }}
26
+ {%- else %}
27
+ {{- '# Tools
28
+
29
+ You may call one or more functions to assist with the user query.' }}
30
+ {%- endif %}
31
+ {{- '
32
+
33
+ You are provided with function signatures within <tools></tools> XML tags:' }}
34
+ {{- '
35
+ <tools>
36
+ ' }}
37
+ {%- for tool in tools %}
38
+ {%- if loop.index0 > 1 %}
39
+ {{- '
40
+ ' }}
41
+ {%- endif %}
42
+ {{- tool | tojson }}
43
+ {%- endfor %}
44
+ {{- '
45
+ </tools>
46
+
47
+ ' }}
48
+ {{- 'For function call returns, you should first print <tool_calls>' }}
49
+ {{- 'For each function call, you should return object like:
50
+ ' }}
51
+ {{- '<tool_call>function_name
52
+ ```json
53
+ function_arguments_in_json_format
54
+ ```</tool_call>' }}
55
+ {{- 'At the end of function call returns, you should print </tool_calls>' }}
56
+ {%- endif %}
57
+ {%- if ns.system_prompt != '' or tools %}
58
+ {{- '<|hy_place▁holder▁no▁3|>' }}
59
+ {%- endif %}
60
+ {%- for message in messages %}
61
+ {%- if message['role'] == 'user' %}
62
+ {%- set ns.is_tool = false %}
63
+ {%- set ns.is_first = false %}
64
+ {%- set ns.is_last_user = true %}
65
+ {{- '<|hy_User|>' + message['content'] + '<|hy_Assistant|>' }}
66
+ {%- endif %}
67
+ {%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}
68
+ {%- set ns.is_last_user = false %}
69
+ {%- if ns.is_tool %}
70
+ {{- '</tool_responses>' + '<|hy_Assistant|>' }}
71
+ {%- endif %}
72
+ {%- set ns.is_first = false %}
73
+ {%- set ns.is_tool = false %}
74
+ {%- set ns.is_output_first = true %}
75
+ {%- for tool in message['tool_calls'] %}
76
+ {%- set arguments = tool['function']['arguments'] %}
77
+ {%- if arguments is not string %}
78
+ {%- set arguments = arguments | tojson %}
79
+ {%- endif %}
80
+ {%- if not ns.is_first %}
81
+ {%- if message['content'] is none %}
82
+ {{- '<tool_calls><tool_call>' + tool['function']['name'] + '
83
+ ' + '```json' + '
84
+ ' + arguments + '
85
+ ' + '```' + '</tool_call>' }}
86
+ {%- else %}
87
+ {{- message['content'] + '<tool_calls><tool_call>' + tool['function']['name'] + '
88
+ ' + '```json' + '
89
+ ' + arguments + '
90
+ ' + '```' + '</tool_call>' }}
91
+ {%- endif %}
92
+ {%- set ns.is_first = true %}
93
+ {%- else %}
94
+ {{- '
95
+ ' + '<tool_call>' + tool['function']['name'] + '
96
+ ' + '```json' + '
97
+ ' + arguments + '
98
+ ' + '```' + '</tool_call>' }}
99
+ {%- endif %}
100
+ {%- endfor %}
101
+ {{- '</tool_calls>' + eos_token }}
102
+ {%- endif %}
103
+ {%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}
104
+ {%- set content = message['content'] %}
105
+ {%- if '<answer>' in content and not loop.last %}
106
+ {%- set content = content.split('<answer>')[-1].strip('</answer>').strip() %}
107
+ {%- endif %}
108
+ {%- set ns.is_last_user = false %}
109
+ {%- if ns.is_tool %}
110
+ {{- '</tool_responses>' + '<|hy_Assistant|>' + content + eos_token }}
111
+ {%- set ns.is_tool = false %}
112
+ {%- else %}
113
+ {{- content + eos_token }}
114
+ {%- endif %}
115
+ {%- endif %}
116
+ {%- if message['role'] == 'tool' %}
117
+ {%- set ns.is_last_user = false %}
118
+ {%- set ns.is_tool = true %}
119
+ {%- if ns.is_output_first %}
120
+ {{- '<|hy_User|>' + '<tool_responses><tool_response>' + message['content'] + '</tool_response>' }}
121
+ {%- set ns.is_output_first = false %}
122
+ {%- else %}
123
+ {{- '
124
+ <tool_response>' + message['content'] + '</tool_response>' }}
125
+ {%- endif %}
126
+ {%- endif %}
127
+ {%- endfor %}
128
+ {%- if ns.is_tool %}
129
+ {{- '</tool_responses>' + '<|hy_Assistant|>' }}
130
+ {%- endif %}
131
+ {%- if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}
132
+ {{- '<|hy_Assistant|>' }}
133
+ {%- endif %}
134
+ {%- if enable_thinking is defined and not enable_thinking %}
135
+ {{- '<think>
136
+
137
+ </think>
138
+ ' }}
139
+ {%- endif %}
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|hy_begin▁of▁sentence|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|hy_place▁holder▁no▁2|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|hy_▁pad▁|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff