Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- chat_template.jinja +159 -0
- config.json +151 -0
- configuration_apriel2.py +144 -0
- generation_config.json +6 -0
- model.safetensors.index.json +0 -0
- model_0.safetensors +3 -0
- model_1.safetensors +3 -0
- model_2.safetensors +3 -0
- model_3.safetensors +3 -0
- model_4.safetensors +3 -0
- model_5.safetensors +3 -0
- modeling_apriel2.py +0 -0
- ok +0 -0
- preprocessor_config.json +27 -0
- special_tokens_map.json +30 -0
- tokenizer.json +3 -0
- tokenizer_config.json +0 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
chat_template.jinja
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{# ---------------------------------------------------------------------- #}
|
| 2 |
+
{# ƛƬ Default setup and flags #}
|
| 3 |
+
{# ---------------------------------------------------------------------- #}
|
| 4 |
+
{%- set messages = messages or [] -%}
|
| 5 |
+
{%- set tools = tools or [] -%}
|
| 6 |
+
{%- set add_generation_prompt = add_generation_prompt or false -%}
|
| 7 |
+
{%- set available_tool_string, add_tool_id = '', true -%}
|
| 8 |
+
{%- set add_thoughts = false -%} {# whether to include <thinking> reasoning blocks #}
|
| 9 |
+
{%- set add_generation_prompt = true -%} {# whether to emit reasoning starter before assistant response #}
|
| 10 |
+
{# Optional token placeholders (safe defaults) #}
|
| 11 |
+
{%- set bos_token = bos_token if (bos_token is defined) else '' -%}
|
| 12 |
+
{%- set eos_token = eos_token if (eos_token is defined) else '' -%}
|
| 13 |
+
{# ---------------------------------------------------------------------- #}
|
| 14 |
+
{# Core reasoning prompt and assistant reasoning prefix #}
|
| 15 |
+
{# ---------------------------------------------------------------------- #}
|
| 16 |
+
{%- set reasoning_prompt =
|
| 17 |
+
'You are a thoughtful, systematic AI assistant from ServiceNow Language Models (SLAM) lab. '
|
| 18 |
+
'Analyze each question carefully, present your reasoning step-by-step, then provide the final '
|
| 19 |
+
'response after the marker [BEGIN FINAL RESPONSE].'
|
| 20 |
+
-%}
|
| 21 |
+
{%- set reasoning_asst_turn_start = 'Here are my reasoning steps:\n' -%}
|
| 22 |
+
{# ---------------------------------------------------------------------- #}
|
| 23 |
+
{# Tool list and tool call output format #}
|
| 24 |
+
{# ---------------------------------------------------------------------- #}
|
| 25 |
+
{%- if tools is not none and tools|length > 0 -%}
|
| 26 |
+
{%- set available_tool_string -%}
|
| 27 |
+
You are provided with function signatures within <available_tools></available_tools> XML tags.
|
| 28 |
+
You may call one or more functions to assist with the user query.
|
| 29 |
+
Don't make assumptions about the arguments. You should infer the argument values from previous
|
| 30 |
+
user responses and the system message.
|
| 31 |
+
Here are the available tools:
|
| 32 |
+
<available_tools>
|
| 33 |
+
{% for tool in tools %}{{ tool|string }}{% endfor %}
|
| 34 |
+
|
| 35 |
+
</available_tools>.
|
| 36 |
+
|
| 37 |
+
Return all function calls as a list of JSON objects within <tool_calls></tool_calls> XML tags.
|
| 38 |
+
Each JSON object should contain a function name and arguments as follows:
|
| 39 |
+
<tool_calls>[
|
| 40 |
+
{"name": <function-name-1>, "arguments": <args-dict-1>},
|
| 41 |
+
{"name": <function-name-2>, "arguments": <args-dict-2>},
|
| 42 |
+
...
|
| 43 |
+
]</tool_calls>
|
| 44 |
+
{%- endset -%}
|
| 45 |
+
{%- endif -%}
|
| 46 |
+
{# ---------------------------------------------------------------------- #}
|
| 47 |
+
{# Start system block if first message is not system #}
|
| 48 |
+
{# ---------------------------------------------------------------------- #}
|
| 49 |
+
{%- if messages|length > 0 and messages[0]['role'] != 'system' -%}
|
| 50 |
+
{%- if tools is not none and tools|length > 0 -%}
|
| 51 |
+
{{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' + available_tool_string + '\n' }}
|
| 52 |
+
{%- else -%}
|
| 53 |
+
{{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' }}
|
| 54 |
+
{%- endif -%}
|
| 55 |
+
{%- endif -%}
|
| 56 |
+
{# ---------------------------------------------------------------------- #}
|
| 57 |
+
{# Iterate through messages #}
|
| 58 |
+
{# ---------------------------------------------------------------------- #}
|
| 59 |
+
{%- for message in messages -%}
|
| 60 |
+
|
| 61 |
+
{# ---------------- USER MESSAGE ---------------- #}
|
| 62 |
+
{%- if message['role'] == 'user' -%}
|
| 63 |
+
{{ '<|begin_user|>\n' }}
|
| 64 |
+
{%- if message['content'] is not string -%}
|
| 65 |
+
{%- for chunk in message['content'] -%}
|
| 66 |
+
{%- if chunk['type'] == 'text' -%}
|
| 67 |
+
{{ chunk['text'] }}
|
| 68 |
+
{%- elif chunk['type'] in ['image', 'image_url'] -%}
|
| 69 |
+
{{ '[IMG]' }}
|
| 70 |
+
{%- else -%}
|
| 71 |
+
{{ raise_exception('Unrecognized content type!') }}
|
| 72 |
+
{%- endif -%}
|
| 73 |
+
{%- endfor -%}
|
| 74 |
+
{%- else -%}
|
| 75 |
+
{{ message['content'] }}
|
| 76 |
+
{%- endif -%}
|
| 77 |
+
|
| 78 |
+
{# ---------------- SYSTEM MESSAGE ---------------- #}
|
| 79 |
+
{%- elif message['role'] == 'system' -%}
|
| 80 |
+
{%- if message['content'] is not none and message['content']|length > 0 -%}
|
| 81 |
+
{%- if message['content'] is string -%}
|
| 82 |
+
{%- set system_message = message['content'] -%}
|
| 83 |
+
{%- else -%}
|
| 84 |
+
{%- set system_message = message['content'][0]['text'] -%}
|
| 85 |
+
{%- endif -%}
|
| 86 |
+
{%- else -%}
|
| 87 |
+
{%- set system_message = '' -%}
|
| 88 |
+
{%- endif -%}
|
| 89 |
+
|
| 90 |
+
{%- if tools is not none and tools|length > 0 -%}
|
| 91 |
+
{{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' + system_message + '\n' + available_tool_string + '\n' }}
|
| 92 |
+
{%- else -%}
|
| 93 |
+
{{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' + system_message + '\n' }}
|
| 94 |
+
{%- endif -%}
|
| 95 |
+
|
| 96 |
+
{# ---------------- ASSISTANT MESSAGE ---------------- #}
|
| 97 |
+
{%- elif message['role'] == 'assistant' -%}
|
| 98 |
+
{%- if loop.last -%}
|
| 99 |
+
{%- set add_tool_id = false -%}
|
| 100 |
+
{%- endif -%}
|
| 101 |
+
|
| 102 |
+
{{ '\n<|begin_assistant|>\n' }}
|
| 103 |
+
|
| 104 |
+
{%- if add_thoughts and 'thought' in message and message['thought'] is not none -%}
|
| 105 |
+
<thinking>{{ message['thought'] }}</thinking>
|
| 106 |
+
{%- endif -%}
|
| 107 |
+
|
| 108 |
+
{%- if message['content'] is not none and message['content']|length > 0 -%}
|
| 109 |
+
{%- if message['content'] is not string -%}
|
| 110 |
+
{{ message['content'][0]['text'] }}
|
| 111 |
+
{%- else -%}
|
| 112 |
+
{{ message['content'] }}
|
| 113 |
+
{%- endif -%}
|
| 114 |
+
{%- elif message['chosen'] is not none and message['chosen']|length > 0 -%}
|
| 115 |
+
{{ message['chosen'][0] }}
|
| 116 |
+
{%- endif -%}
|
| 117 |
+
|
| 118 |
+
{# Tool call output #}
|
| 119 |
+
{%- if message['tool_calls'] is not none and message['tool_calls']|length > 0 -%}
|
| 120 |
+
{{ '\n<tool_calls>[' }}
|
| 121 |
+
{%- for tool_call in message['tool_calls'] -%}
|
| 122 |
+
{{ '{"name": "' + tool_call['function']['name'] + '", "arguments": ' + tool_call['function']['arguments']|string }}
|
| 123 |
+
{%- if add_tool_id == true and 'id' in tool_call -%}
|
| 124 |
+
{{ ', "id": "' + tool_call['id'] + '"' }}
|
| 125 |
+
{%- endif -%}
|
| 126 |
+
{{ '}' }}
|
| 127 |
+
{%- if not loop.last -%}{{ ', ' }}{%- endif -%}
|
| 128 |
+
{%- endfor -%}
|
| 129 |
+
{{ ']</tool_calls>' }}
|
| 130 |
+
{%- endif -%}
|
| 131 |
+
|
| 132 |
+
{%- if not loop.last or training_prompt -%}
|
| 133 |
+
{{ '\n<|end|>\n' }}
|
| 134 |
+
{%- endif -%}
|
| 135 |
+
|
| 136 |
+
{# ---------------- TOOL RESULT MESSAGE ---------------- #}
|
| 137 |
+
{%- elif message['role'] == 'tool' -%}
|
| 138 |
+
{%- if message['content'] is string -%}
|
| 139 |
+
{%- set tool_message = message['content'] -%}
|
| 140 |
+
{%- else -%}
|
| 141 |
+
{%- set tool_message = message['content'][0]['text'] -%}
|
| 142 |
+
{%- endif -%}
|
| 143 |
+
{{ '<|begin_tool_result|>\n' + tool_message|string + '\n' }}
|
| 144 |
+
|
| 145 |
+
{# ---------------- CONTENT MESSAGE ---------------- #}
|
| 146 |
+
{%- elif message['role'] == 'content' -%}
|
| 147 |
+
{%- if message['content'] is not string -%}
|
| 148 |
+
{{ '<|begin_content|>\n' + message['content'][0]['text'] + '\n' }}
|
| 149 |
+
{%- else -%}
|
| 150 |
+
{{ '<|begin_content|>\n' + message['content'] + '\n' }}
|
| 151 |
+
{%- endif -%}
|
| 152 |
+
{%- endif -%}
|
| 153 |
+
|
| 154 |
+
{# ---------------- REASONING PROMPT BEFORE NEXT ASSISTANT ---------------- #}
|
| 155 |
+
{%- if loop.last and add_generation_prompt and message['role'] != 'assistant' -%}
|
| 156 |
+
{{ '\n<|begin_assistant|>\n' + reasoning_asst_turn_start }}
|
| 157 |
+
{%- endif -%}
|
| 158 |
+
|
| 159 |
+
{%- endfor -%}
|
config.json
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Apriel2ForConditionalGeneration"
|
| 4 |
+
],
|
| 5 |
+
"auto_map": {
|
| 6 |
+
"AutoConfig": "configuration_apriel2.Apriel2Config",
|
| 7 |
+
"AutoModel": "modeling_apriel2.Apriel2Model",
|
| 8 |
+
"AutoModelForImageTextToText": "modeling_apriel2.Apriel2ForConditionalGeneration"
|
| 9 |
+
},
|
| 10 |
+
"bos_token_id": 1,
|
| 11 |
+
"decoder": {
|
| 12 |
+
"block": {
|
| 13 |
+
"mixer": {
|
| 14 |
+
"main_mixer_name": "attention",
|
| 15 |
+
"mixers": {
|
| 16 |
+
"attention": {
|
| 17 |
+
"add_linear_biases": false,
|
| 18 |
+
"head_groups": 8,
|
| 19 |
+
"head_size": 128,
|
| 20 |
+
"heads": 32,
|
| 21 |
+
"rotary": {
|
| 22 |
+
"theta": 1000000000.0,
|
| 23 |
+
"type": "mistral_1d"
|
| 24 |
+
},
|
| 25 |
+
"type": "attention",
|
| 26 |
+
"window_size": null
|
| 27 |
+
},
|
| 28 |
+
"gdn": {
|
| 29 |
+
"convolution_layer": {
|
| 30 |
+
"kernel_size": 4
|
| 31 |
+
},
|
| 32 |
+
"key_head_dim": 128,
|
| 33 |
+
"key_heads": 8,
|
| 34 |
+
"type": "gdn",
|
| 35 |
+
"value_head_dim": 128,
|
| 36 |
+
"value_heads": 32
|
| 37 |
+
},
|
| 38 |
+
"kda": {
|
| 39 |
+
"convolution_layer": {
|
| 40 |
+
"kernel_size": 4
|
| 41 |
+
},
|
| 42 |
+
"head_dim": 128,
|
| 43 |
+
"heads": 32,
|
| 44 |
+
"normalization": {
|
| 45 |
+
"epsilon": 1e-05
|
| 46 |
+
},
|
| 47 |
+
"type": "kda"
|
| 48 |
+
},
|
| 49 |
+
"sliding_window": {
|
| 50 |
+
"add_linear_biases": false,
|
| 51 |
+
"head_groups": 8,
|
| 52 |
+
"head_size": 128,
|
| 53 |
+
"heads": 32,
|
| 54 |
+
"rotary": {
|
| 55 |
+
"theta": 1000000000.0,
|
| 56 |
+
"type": "mistral_1d"
|
| 57 |
+
},
|
| 58 |
+
"type": "attention",
|
| 59 |
+
"window_size": 4096
|
| 60 |
+
}
|
| 61 |
+
},
|
| 62 |
+
"sampling_strategy": "uniform",
|
| 63 |
+
"type": "stochastic"
|
| 64 |
+
},
|
| 65 |
+
"mlp": {
|
| 66 |
+
"activation": "silu",
|
| 67 |
+
"add_linear_biases": false,
|
| 68 |
+
"gated": true,
|
| 69 |
+
"intermediate_size": 14336,
|
| 70 |
+
"type": "mlp"
|
| 71 |
+
},
|
| 72 |
+
"normalization": {
|
| 73 |
+
"epsilon": 1e-05,
|
| 74 |
+
"type": "rms_norm"
|
| 75 |
+
}
|
| 76 |
+
},
|
| 77 |
+
"num_blocks": 48,
|
| 78 |
+
"type": "fixed"
|
| 79 |
+
},
|
| 80 |
+
"embeddings": {
|
| 81 |
+
"max_position_embeddings": 120000
|
| 82 |
+
},
|
| 83 |
+
"eos_token_id": 2,
|
| 84 |
+
"head": {
|
| 85 |
+
"normalization": {
|
| 86 |
+
"epsilon": 1e-05,
|
| 87 |
+
"type": "rms_norm"
|
| 88 |
+
}
|
| 89 |
+
},
|
| 90 |
+
"hidden_size": 5120,
|
| 91 |
+
"image_token_index": 10,
|
| 92 |
+
"model_type": "apriel2",
|
| 93 |
+
"tie_word_embeddings": false,
|
| 94 |
+
"transformers_version": "4.57.3",
|
| 95 |
+
"use_cache": true,
|
| 96 |
+
"vision_encoder": {
|
| 97 |
+
"adapter": {
|
| 98 |
+
"activation": "gelu_pytorch_tanh",
|
| 99 |
+
"add_linear_biases": true,
|
| 100 |
+
"gated": false,
|
| 101 |
+
"intermediate_size": 5120,
|
| 102 |
+
"type": "mlp"
|
| 103 |
+
},
|
| 104 |
+
"embeddings": {
|
| 105 |
+
"input_channels": 3,
|
| 106 |
+
"normalization": {
|
| 107 |
+
"epsilon": 1e-05,
|
| 108 |
+
"type": "rms_norm"
|
| 109 |
+
},
|
| 110 |
+
"patch_height": 16,
|
| 111 |
+
"patch_width": 16
|
| 112 |
+
},
|
| 113 |
+
"encoder": {
|
| 114 |
+
"block": {
|
| 115 |
+
"mixer": {
|
| 116 |
+
"add_linear_biases": false,
|
| 117 |
+
"causal": false,
|
| 118 |
+
"cross_document_attention": false,
|
| 119 |
+
"head_groups": 16,
|
| 120 |
+
"head_size": 64,
|
| 121 |
+
"heads": 16,
|
| 122 |
+
"rotary": {
|
| 123 |
+
"max_image_size": 1024,
|
| 124 |
+
"patch_size": 16,
|
| 125 |
+
"theta": 10000.0,
|
| 126 |
+
"type": "pixtral_2d"
|
| 127 |
+
},
|
| 128 |
+
"type": "attention"
|
| 129 |
+
},
|
| 130 |
+
"mlp": {
|
| 131 |
+
"activation": "silu",
|
| 132 |
+
"add_linear_biases": false,
|
| 133 |
+
"gated": true,
|
| 134 |
+
"intermediate_size": 4096,
|
| 135 |
+
"type": "mlp"
|
| 136 |
+
},
|
| 137 |
+
"normalization": {
|
| 138 |
+
"epsilon": 1e-05,
|
| 139 |
+
"type": "rms_norm"
|
| 140 |
+
}
|
| 141 |
+
},
|
| 142 |
+
"num_blocks": 24,
|
| 143 |
+
"type": "fixed"
|
| 144 |
+
},
|
| 145 |
+
"hidden_size": 1024,
|
| 146 |
+
"num_channels": 3,
|
| 147 |
+
"num_hidden_layers": 24,
|
| 148 |
+
"patch_size": 16
|
| 149 |
+
},
|
| 150 |
+
"vocab_size": 131072
|
| 151 |
+
}
|
configuration_apriel2.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Apriel2 HuggingFace configuration."""
|
| 2 |
+
|
| 3 |
+
import logging
|
| 4 |
+
from typing import Optional
|
| 5 |
+
|
| 6 |
+
from transformers import PretrainedConfig
|
| 7 |
+
|
| 8 |
+
logger = logging.getLogger(__name__)
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class Apriel2TextConfig(PretrainedConfig):
|
| 12 |
+
model_type = "apriel2_text"
|
| 13 |
+
|
| 14 |
+
def __init__(
|
| 15 |
+
self,
|
| 16 |
+
hidden_size: int = 4096,
|
| 17 |
+
vocab_size: int = 32000,
|
| 18 |
+
decoder: Optional[dict] = None,
|
| 19 |
+
embeddings: Optional[dict] = None,
|
| 20 |
+
head: Optional[dict] = None,
|
| 21 |
+
tie_word_embeddings: bool = False,
|
| 22 |
+
bos_token_id: int = 1,
|
| 23 |
+
eos_token_id: int = 2,
|
| 24 |
+
pad_token_id: Optional[int] = None,
|
| 25 |
+
use_cache: bool = True,
|
| 26 |
+
**kwargs,
|
| 27 |
+
):
|
| 28 |
+
self.hidden_size = hidden_size
|
| 29 |
+
self.vocab_size = vocab_size
|
| 30 |
+
self.use_cache = use_cache
|
| 31 |
+
|
| 32 |
+
self.decoder = decoder or self._default_decoder_config()
|
| 33 |
+
self.embeddings = embeddings or self._default_embeddings_config()
|
| 34 |
+
self.head = head or self._default_head_config()
|
| 35 |
+
|
| 36 |
+
super().__init__(
|
| 37 |
+
bos_token_id=bos_token_id,
|
| 38 |
+
eos_token_id=eos_token_id,
|
| 39 |
+
pad_token_id=pad_token_id,
|
| 40 |
+
tie_word_embeddings=tie_word_embeddings,
|
| 41 |
+
**kwargs,
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
def _default_decoder_config(self) -> dict:
|
| 45 |
+
return {
|
| 46 |
+
"type": "fixed",
|
| 47 |
+
"num_blocks": 32,
|
| 48 |
+
"block": {
|
| 49 |
+
"mixer": {
|
| 50 |
+
"type": "attention",
|
| 51 |
+
"heads": 32,
|
| 52 |
+
"head_groups": 32,
|
| 53 |
+
"head_size": self.hidden_size // 32,
|
| 54 |
+
"rotary": {"type": "default", "theta": 10000.0},
|
| 55 |
+
"add_linear_biases": False,
|
| 56 |
+
},
|
| 57 |
+
"mlp": {
|
| 58 |
+
"type": "mlp",
|
| 59 |
+
"intermediate_size": self.hidden_size * 4,
|
| 60 |
+
"activation": "silu",
|
| 61 |
+
"gated": True,
|
| 62 |
+
"add_linear_biases": False,
|
| 63 |
+
},
|
| 64 |
+
"normalization": {"type": "rms_norm", "epsilon": 1e-5},
|
| 65 |
+
},
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
def _default_embeddings_config(self) -> dict:
|
| 69 |
+
return {
|
| 70 |
+
"max_position_embeddings": 2048,
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
def _default_head_config(self) -> dict:
|
| 74 |
+
return {
|
| 75 |
+
"normalization": {"type": "rms_norm", "epsilon": 1e-5},
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
def get_text_config(self, decoder: bool = False):
|
| 79 |
+
return self
|
| 80 |
+
|
| 81 |
+
def get_block_name(self, layer_idx: int) -> str:
|
| 82 |
+
decoder_type = self.decoder.get("type", "fixed")
|
| 83 |
+
|
| 84 |
+
if decoder_type == "fixed":
|
| 85 |
+
return "block"
|
| 86 |
+
elif decoder_type == "pattern":
|
| 87 |
+
pattern = self.decoder.get("pattern", [])
|
| 88 |
+
if not pattern:
|
| 89 |
+
raise ValueError("Pattern decoder requires 'pattern' field")
|
| 90 |
+
return pattern[layer_idx % len(pattern)]
|
| 91 |
+
else:
|
| 92 |
+
raise ValueError(f"Unknown decoder type: {decoder_type}")
|
| 93 |
+
|
| 94 |
+
def get_block_config(self, layer_idx: int) -> dict:
|
| 95 |
+
decoder_type = self.decoder.get("type", "fixed")
|
| 96 |
+
|
| 97 |
+
if decoder_type == "fixed":
|
| 98 |
+
return self.decoder.get("block", {})
|
| 99 |
+
elif decoder_type == "pattern":
|
| 100 |
+
blocks = self.decoder.get("blocks", {})
|
| 101 |
+
pattern = self.decoder.get("pattern", [])
|
| 102 |
+
if not blocks or not pattern:
|
| 103 |
+
raise ValueError("Pattern decoder requires 'blocks' and 'pattern' fields")
|
| 104 |
+
block_name = pattern[layer_idx % len(pattern)]
|
| 105 |
+
return blocks[block_name]
|
| 106 |
+
else:
|
| 107 |
+
raise ValueError(f"Unknown decoder type: {decoder_type}")
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
class Apriel2Config(Apriel2TextConfig):
|
| 111 |
+
model_type = "apriel2"
|
| 112 |
+
|
| 113 |
+
def __init__(
|
| 114 |
+
self,
|
| 115 |
+
hidden_size: int = 4096,
|
| 116 |
+
vocab_size: int = 32000,
|
| 117 |
+
decoder: Optional[dict] = None,
|
| 118 |
+
embeddings: Optional[dict] = None,
|
| 119 |
+
head: Optional[dict] = None,
|
| 120 |
+
vision_encoder: Optional[dict] = None,
|
| 121 |
+
image_token_index: Optional[int] = None,
|
| 122 |
+
tie_word_embeddings: bool = False,
|
| 123 |
+
bos_token_id: int = 1,
|
| 124 |
+
eos_token_id: int = 2,
|
| 125 |
+
pad_token_id: Optional[int] = None,
|
| 126 |
+
use_cache: bool = True,
|
| 127 |
+
**kwargs,
|
| 128 |
+
):
|
| 129 |
+
super().__init__(
|
| 130 |
+
hidden_size=hidden_size,
|
| 131 |
+
vocab_size=vocab_size,
|
| 132 |
+
decoder=decoder,
|
| 133 |
+
embeddings=embeddings,
|
| 134 |
+
head=head,
|
| 135 |
+
tie_word_embeddings=tie_word_embeddings,
|
| 136 |
+
bos_token_id=bos_token_id,
|
| 137 |
+
eos_token_id=eos_token_id,
|
| 138 |
+
pad_token_id=pad_token_id,
|
| 139 |
+
use_cache=use_cache,
|
| 140 |
+
**kwargs,
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
self.vision_encoder = vision_encoder
|
| 144 |
+
self.image_token_index = image_token_index
|
generation_config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"bos_token_id": 1,
|
| 4 |
+
"eos_token_id": 2,
|
| 5 |
+
"transformers_version": "4.49.0"
|
| 6 |
+
}
|
model.safetensors.index.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
model_0.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d8388e1c0436b65a78bddb703043fa6436b64ab8c16fff8c447e5387137b8de
|
| 3 |
+
size 17425048856
|
model_1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:57e36c17b1e3117255fab5fc00715dfba4ea71b94d2ff4baeae148b489be01eb
|
| 3 |
+
size 17470320128
|
model_2.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0af1648b2f47fa69cc71867d88fb05fc28c949e3a8c84043acd77caa3d4a6a9f
|
| 3 |
+
size 17470320304
|
model_3.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aac25febd9dbc7f067d264c08ad36216501e91c3d4c3a5e223cb32f8fd6e89f7
|
| 3 |
+
size 17470320304
|
model_4.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9ef25d09db12a0c5b9e8cb2d63721ea60b6b9c6c0692df31f19b32286f042095
|
| 3 |
+
size 17470320304
|
model_5.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d7ccbcb5ec330e2ddc9fb19a1efebf610d60940acb47258c9f280951d9ae63ab
|
| 3 |
+
size 12977332728
|
modeling_apriel2.py
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ok
ADDED
|
File without changes
|
preprocessor_config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"do_convert_rgb": true,
|
| 3 |
+
"do_normalize": true,
|
| 4 |
+
"do_rescale": true,
|
| 5 |
+
"do_resize": true,
|
| 6 |
+
"image_mean": [
|
| 7 |
+
0.48145466,
|
| 8 |
+
0.4578275,
|
| 9 |
+
0.40821073
|
| 10 |
+
],
|
| 11 |
+
"image_processor_type": "PixtralImageProcessor",
|
| 12 |
+
"image_std": [
|
| 13 |
+
0.26862954,
|
| 14 |
+
0.26130258,
|
| 15 |
+
0.27577711
|
| 16 |
+
],
|
| 17 |
+
"patch_size": {
|
| 18 |
+
"height": 16,
|
| 19 |
+
"width": 16
|
| 20 |
+
},
|
| 21 |
+
"processor_class": "PixtralProcessor",
|
| 22 |
+
"resample": 3,
|
| 23 |
+
"rescale_factor": 0.00392156862745098,
|
| 24 |
+
"size": {
|
| 25 |
+
"longest_edge": 1024
|
| 26 |
+
}
|
| 27 |
+
}
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "</s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": {
|
| 17 |
+
"content": "<pad>",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"unk_token": {
|
| 24 |
+
"content": "<unk>",
|
| 25 |
+
"lstrip": false,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
}
|
| 30 |
+
}
|
tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50c4196bd3d61abf4a6f9d116435140b8ac0606e15eb4d02e235f9036257dc3e
|
| 3 |
+
size 17077327
|
tokenizer_config.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|