Upload folder using huggingface_hub
Browse files- chat_template.jinja +5 -5
- config.json +86 -6
- tokenizer_config.json +6 -3
chat_template.jinja
CHANGED
|
@@ -20,7 +20,7 @@ You may call one or more functions to assist with the user query.
|
|
| 20 |
You are provided with function signatures within <tools></tools> XML tags:
|
| 21 |
<tools>
|
| 22 |
{% for tool in tools %}
|
| 23 |
-
{%- if 'function' in tool -%}
|
| 24 |
{%- set tool = tool['function'] -%}
|
| 25 |
{%- endif -%}
|
| 26 |
{% if tool.defer_loading is not defined or not tool.defer_loading %}
|
|
@@ -48,7 +48,7 @@ For each function call, output the function name and arguments within the follow
|
|
| 48 |
{%- endmacro -%}
|
| 49 |
{%- set ns = namespace(last_user_index=-1, thinking_indices='') -%}
|
| 50 |
{%- for m in messages %}
|
| 51 |
-
{%- if m.role == 'user' %}
|
| 52 |
{%- set ns.last_user_index = loop.index0 -%}
|
| 53 |
{%- elif m.role == 'assistant' %}
|
| 54 |
{%- if m.reasoning_content is string %}
|
|
@@ -58,7 +58,7 @@ For each function call, output the function name and arguments within the follow
|
|
| 58 |
{%- endfor %}
|
| 59 |
{%- set ns.has_thinking = false -%}
|
| 60 |
{%- for m in messages -%}
|
| 61 |
-
{%- if m.role == 'user' -%}<|user|>{{ visible_text(m.content) }}{% set ns.has_thinking = (',' ~ loop.index0 ~ ',') in ns.thinking_indices -%}
|
| 62 |
{%- elif m.role == 'assistant' -%}
|
| 63 |
<|assistant|>
|
| 64 |
{%- set content = visible_text(m.content) %}
|
|
@@ -98,10 +98,10 @@ For each function call, output the function name and arguments within the follow
|
|
| 98 |
{{- '<tool_response><tools>\n' -}}
|
| 99 |
{% for tr in m.content %}
|
| 100 |
{%- for tool in tools -%}
|
| 101 |
-
{%- if 'function' in tool -%}
|
| 102 |
{%- set tool = tool['function'] -%}
|
| 103 |
{%- endif -%}
|
| 104 |
-
{%- if tool.name == tr.name -%}
|
| 105 |
{{- tool_to_json(tool) + '\n' -}}
|
| 106 |
{%- endif -%}
|
| 107 |
{%- endfor -%}
|
|
|
|
| 20 |
You are provided with function signatures within <tools></tools> XML tags:
|
| 21 |
<tools>
|
| 22 |
{% for tool in tools %}
|
| 23 |
+
{%- if tool is not none and tool is mapping and 'function' in tool -%}
|
| 24 |
{%- set tool = tool['function'] -%}
|
| 25 |
{%- endif -%}
|
| 26 |
{% if tool.defer_loading is not defined or not tool.defer_loading %}
|
|
|
|
| 48 |
{%- endmacro -%}
|
| 49 |
{%- set ns = namespace(last_user_index=-1, thinking_indices='') -%}
|
| 50 |
{%- for m in messages %}
|
| 51 |
+
{%- if m is not none and m is mapping and m.role == 'user' %}
|
| 52 |
{%- set ns.last_user_index = loop.index0 -%}
|
| 53 |
{%- elif m.role == 'assistant' %}
|
| 54 |
{%- if m.reasoning_content is string %}
|
|
|
|
| 58 |
{%- endfor %}
|
| 59 |
{%- set ns.has_thinking = false -%}
|
| 60 |
{%- for m in messages -%}
|
| 61 |
+
{%- if m is not none and m is mapping and m.role == 'user' -%}<|user|>{{ visible_text(m.content) }}{% set ns.has_thinking = (',' ~ loop.index0 ~ ',') in ns.thinking_indices -%}
|
| 62 |
{%- elif m.role == 'assistant' -%}
|
| 63 |
<|assistant|>
|
| 64 |
{%- set content = visible_text(m.content) %}
|
|
|
|
| 98 |
{{- '<tool_response><tools>\n' -}}
|
| 99 |
{% for tr in m.content %}
|
| 100 |
{%- for tool in tools -%}
|
| 101 |
+
{%- if tool is not none and tool is mapping and 'function' in tool -%}
|
| 102 |
{%- set tool = tool['function'] -%}
|
| 103 |
{%- endif -%}
|
| 104 |
+
{%- if tool is not none and tool is mapping and tool.name == tr.name -%}
|
| 105 |
{{- tool_to_json(tool) + '\n' -}}
|
| 106 |
{%- endif -%}
|
| 107 |
{%- endfor -%}
|
config.json
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
],
|
| 5 |
"attention_bias": false,
|
| 6 |
"attention_dropout": 0.0,
|
| 7 |
-
"
|
| 8 |
"eos_token_id": [
|
| 9 |
154820,
|
| 10 |
154827,
|
|
@@ -13,7 +13,6 @@
|
|
| 13 |
"ep_size": 1,
|
| 14 |
"first_k_dense_replace": 3,
|
| 15 |
"hidden_act": "silu",
|
| 16 |
-
"head_dim": 64,
|
| 17 |
"hidden_size": 6144,
|
| 18 |
"index_head_dim": 128,
|
| 19 |
"index_n_heads": 32,
|
|
@@ -23,9 +22,89 @@
|
|
| 23 |
"intermediate_size": 12288,
|
| 24 |
"kv_lora_rank": 512,
|
| 25 |
"max_position_embeddings": 202752,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
"moe_intermediate_size": 2048,
|
| 27 |
"moe_layer_freq": 1,
|
| 28 |
-
"model_type": "glm_moe_dsa",
|
| 29 |
"n_group": 1,
|
| 30 |
"n_routed_experts": 256,
|
| 31 |
"n_shared_experts": 1,
|
|
@@ -35,7 +114,7 @@
|
|
| 35 |
"num_hidden_layers": 78,
|
| 36 |
"num_key_value_heads": 64,
|
| 37 |
"num_nextn_predict_layers": 1,
|
| 38 |
-
"pad_token_id":
|
| 39 |
"pretraining_tp": 1,
|
| 40 |
"q_lora_rank": 2048,
|
| 41 |
"qk_head_dim": 256,
|
|
@@ -52,8 +131,9 @@
|
|
| 52 |
"tie_word_embeddings": false,
|
| 53 |
"topk_group": 1,
|
| 54 |
"topk_method": "noaux_tc",
|
| 55 |
-
"transformers_version": "5.
|
|
|
|
| 56 |
"use_cache": true,
|
| 57 |
"v_head_dim": 256,
|
| 58 |
"vocab_size": 154880
|
| 59 |
-
}
|
|
|
|
| 4 |
],
|
| 5 |
"attention_bias": false,
|
| 6 |
"attention_dropout": 0.0,
|
| 7 |
+
"torch_dtype": "bfloat16",
|
| 8 |
"eos_token_id": [
|
| 9 |
154820,
|
| 10 |
154827,
|
|
|
|
| 13 |
"ep_size": 1,
|
| 14 |
"first_k_dense_replace": 3,
|
| 15 |
"hidden_act": "silu",
|
|
|
|
| 16 |
"hidden_size": 6144,
|
| 17 |
"index_head_dim": 128,
|
| 18 |
"index_n_heads": 32,
|
|
|
|
| 22 |
"intermediate_size": 12288,
|
| 23 |
"kv_lora_rank": 512,
|
| 24 |
"max_position_embeddings": 202752,
|
| 25 |
+
"mlp_layer_types": [
|
| 26 |
+
"dense",
|
| 27 |
+
"dense",
|
| 28 |
+
"dense",
|
| 29 |
+
"sparse",
|
| 30 |
+
"sparse",
|
| 31 |
+
"sparse",
|
| 32 |
+
"sparse",
|
| 33 |
+
"sparse",
|
| 34 |
+
"sparse",
|
| 35 |
+
"sparse",
|
| 36 |
+
"sparse",
|
| 37 |
+
"sparse",
|
| 38 |
+
"sparse",
|
| 39 |
+
"sparse",
|
| 40 |
+
"sparse",
|
| 41 |
+
"sparse",
|
| 42 |
+
"sparse",
|
| 43 |
+
"sparse",
|
| 44 |
+
"sparse",
|
| 45 |
+
"sparse",
|
| 46 |
+
"sparse",
|
| 47 |
+
"sparse",
|
| 48 |
+
"sparse",
|
| 49 |
+
"sparse",
|
| 50 |
+
"sparse",
|
| 51 |
+
"sparse",
|
| 52 |
+
"sparse",
|
| 53 |
+
"sparse",
|
| 54 |
+
"sparse",
|
| 55 |
+
"sparse",
|
| 56 |
+
"sparse",
|
| 57 |
+
"sparse",
|
| 58 |
+
"sparse",
|
| 59 |
+
"sparse",
|
| 60 |
+
"sparse",
|
| 61 |
+
"sparse",
|
| 62 |
+
"sparse",
|
| 63 |
+
"sparse",
|
| 64 |
+
"sparse",
|
| 65 |
+
"sparse",
|
| 66 |
+
"sparse",
|
| 67 |
+
"sparse",
|
| 68 |
+
"sparse",
|
| 69 |
+
"sparse",
|
| 70 |
+
"sparse",
|
| 71 |
+
"sparse",
|
| 72 |
+
"sparse",
|
| 73 |
+
"sparse",
|
| 74 |
+
"sparse",
|
| 75 |
+
"sparse",
|
| 76 |
+
"sparse",
|
| 77 |
+
"sparse",
|
| 78 |
+
"sparse",
|
| 79 |
+
"sparse",
|
| 80 |
+
"sparse",
|
| 81 |
+
"sparse",
|
| 82 |
+
"sparse",
|
| 83 |
+
"sparse",
|
| 84 |
+
"sparse",
|
| 85 |
+
"sparse",
|
| 86 |
+
"sparse",
|
| 87 |
+
"sparse",
|
| 88 |
+
"sparse",
|
| 89 |
+
"sparse",
|
| 90 |
+
"sparse",
|
| 91 |
+
"sparse",
|
| 92 |
+
"sparse",
|
| 93 |
+
"sparse",
|
| 94 |
+
"sparse",
|
| 95 |
+
"sparse",
|
| 96 |
+
"sparse",
|
| 97 |
+
"sparse",
|
| 98 |
+
"sparse",
|
| 99 |
+
"sparse",
|
| 100 |
+
"sparse",
|
| 101 |
+
"sparse",
|
| 102 |
+
"sparse",
|
| 103 |
+
"sparse"
|
| 104 |
+
],
|
| 105 |
+
"model_type": "glm_moe_dsa",
|
| 106 |
"moe_intermediate_size": 2048,
|
| 107 |
"moe_layer_freq": 1,
|
|
|
|
| 108 |
"n_group": 1,
|
| 109 |
"n_routed_experts": 256,
|
| 110 |
"n_shared_experts": 1,
|
|
|
|
| 114 |
"num_hidden_layers": 78,
|
| 115 |
"num_key_value_heads": 64,
|
| 116 |
"num_nextn_predict_layers": 1,
|
| 117 |
+
"pad_token_id": 154821,
|
| 118 |
"pretraining_tp": 1,
|
| 119 |
"q_lora_rank": 2048,
|
| 120 |
"qk_head_dim": 256,
|
|
|
|
| 131 |
"tie_word_embeddings": false,
|
| 132 |
"topk_group": 1,
|
| 133 |
"topk_method": "noaux_tc",
|
| 134 |
+
"transformers_version": "5.6.0.dev0",
|
| 135 |
+
"unsloth_fixed": true,
|
| 136 |
"use_cache": true,
|
| 137 |
"v_head_dim": 256,
|
| 138 |
"vocab_size": 154880
|
| 139 |
+
}
|
tokenizer_config.json
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"backend": "tokenizers",
|
|
|
|
| 3 |
"clean_up_tokenization_spaces": false,
|
| 4 |
"do_lower_case": false,
|
| 5 |
"eos_token": "<|endoftext|>",
|
|
@@ -26,8 +27,10 @@
|
|
| 26 |
"is_local": true,
|
| 27 |
"model_max_length": 202752,
|
| 28 |
"model_specific_special_tokens": {},
|
| 29 |
-
"pad_token": "
|
| 30 |
"padding_side": "left",
|
| 31 |
"remove_space": false,
|
| 32 |
-
"tokenizer_class": "TokenizersBackend"
|
| 33 |
-
|
|
|
|
|
|
|
|
|
| 1 |
{
|
| 2 |
"backend": "tokenizers",
|
| 3 |
+
"bos_token": null,
|
| 4 |
"clean_up_tokenization_spaces": false,
|
| 5 |
"do_lower_case": false,
|
| 6 |
"eos_token": "<|endoftext|>",
|
|
|
|
| 27 |
"is_local": true,
|
| 28 |
"model_max_length": 202752,
|
| 29 |
"model_specific_special_tokens": {},
|
| 30 |
+
"pad_token": "[MASK]",
|
| 31 |
"padding_side": "left",
|
| 32 |
"remove_space": false,
|
| 33 |
+
"tokenizer_class": "TokenizersBackend",
|
| 34 |
+
"unk_token": null,
|
| 35 |
+
"chat_template": "[gMASK]<sop>\n{%- if tools -%}\n{%- macro tool_to_json(tool) -%}\n {%- set ns_tool = namespace(first=true) -%}\n {{ '{' -}}\n {%- for k, v in tool.items() -%}\n {%- if k != 'defer_loading' and k != 'strict' -%}\n {%- if not ns_tool.first -%}{{- ', ' -}}{%- endif -%}\n {%- set ns_tool.first = false -%}\n \"{{ k }}\": {{ v | tojson(ensure_ascii=False) }}\n {%- endif -%}\n {%- endfor -%}\n {{- '}' -}}\n{%- endmacro -%}\n<|system|>\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{% for tool in tools %}\n{%- if tool is not none and tool is mapping and 'function' in tool -%}\n {%- set tool = tool['function'] -%}\n{%- endif -%}\n{% if tool.defer_loading is not defined or not tool.defer_loading %}\n{{ tool_to_json(tool) }}\n{% endif %}\n{% endfor %}\n</tools>\n\nFor each function call, output the function name and arguments within the following XML format:\n<tool_call>{function-name}<arg_key>{arg-key-1}</arg_key><arg_value>{arg-value-1}</arg_value><arg_key>{arg-key-2}</arg_key><arg_value>{arg-value-2}</arg_value>...</tool_call>{%- endif -%}\n{%- macro visible_text(content) -%}\n {%- if content is string -%}\n {{- content }}\n {%- elif content is iterable and content is not mapping -%}\n {%- for item in content -%}\n {%- if item is mapping and item.type == 'text' -%}\n {{- item.text }}\n {%- elif item is string -%}\n {{- item }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{- content }}\n {%- endif -%}\n{%- endmacro -%}\n{%- set ns = namespace(last_user_index=-1, thinking_indices='') -%}\n{%- for m in messages %}\n {%- if m is not none and m is mapping and m.role == 'user' %}\n {%- set ns.last_user_index = loop.index0 -%}\n {%- elif m.role == 'assistant' %}\n {%- if m.reasoning_content is string %}\n {%- set ns.thinking_indices = ns.thinking_indices ~ ',' ~ ns.last_user_index ~ ',' -%}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- set ns.has_thinking = false -%}\n{%- for m in messages -%}\n{%- if m is not none and m is mapping and m.role == 'user' -%}<|user|>{{ visible_text(m.content) }}{% set ns.has_thinking = (',' ~ loop.index0 ~ ',') in ns.thinking_indices -%}\n{%- elif m.role == 'assistant' -%}\n<|assistant|>\n{%- set content = visible_text(m.content) %}\n{%- if m.reasoning_content is string %}\n {%- set reasoning_content = m.reasoning_content %}\n{%- elif '</think>' in content %}\n {%- set reasoning_content = content.split('</think>')[0].split('<think>')[-1] %}\n {%- set content = content.split('</think>')[-1] %}\n{%- elif loop.index0 > ns.last_user_index and not (enable_thinking is defined and not enable_thinking) %}\n {%- set reasoning_content = '' %}\n{%- elif loop.index0 < ns.last_user_index and ns.has_thinking %}\n {%- set reasoning_content = '' %}\n{%- endif %}\n{%- if ((clear_thinking is defined and not clear_thinking) or loop.index0 > ns.last_user_index) and reasoning_content is defined -%}\n{{ '<think>' + reasoning_content + '</think>'}}\n{%- else -%}\n{{ '</think>' }}\n{%- endif -%}\n{%- if content.strip() -%}\n{{ content.strip() }}\n{%- endif -%}\n{% if m.tool_calls %}\n{% for tc in m.tool_calls %}\n{%- if tc.function %}\n {%- set tc = tc.function %}\n{%- endif %}\n{{- '<tool_call>' + tc.name -}}\n{% set _args = tc.arguments %}{% for k, v in _args.items() %}<arg_key>{{ k }}</arg_key><arg_value>{{ v | tojson(ensure_ascii=False) if v is not string else v }}</arg_value>{% endfor %}</tool_call>{% endfor %}\n{% endif %}\n{%- elif m.role == 'tool' -%}\n{%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|observation|>' -}}\n{%- endif %}\n{%- if m.content is string -%}\n {{- '<tool_response>' + m.content + '</tool_response>' -}}\n{%- else -%}\n {{- '<tool_response><tools>\\n' -}}\n {% for tr in m.content %}\n {%- for tool in tools -%}\n {%- if tool is not none and tool is mapping and 'function' in tool -%}\n {%- set tool = tool['function'] -%}\n {%- endif -%}\n {%- if tool is not none and tool is mapping and tool.name == tr.name -%}\n {{- tool_to_json(tool) + '\\n' -}}\n {%- endif -%}\n {%- endfor -%}\n {%- endfor -%}\n {{- '</tools></tool_response>' -}}\n{% endif -%}\n{%- elif m.role == 'system' -%}\n<|system|>{{ visible_text(m.content) }}\n{%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n <|assistant|>{{- '</think>' if (enable_thinking is defined and not enable_thinking) else '<think>' -}}\n{%- endif -%}"
|
| 36 |
+
}
|