diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..9376bc3207892a52bb2477893f4cd053b5d35e7e --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,249 @@ +{%- set reasoning = reasoning if reasoning is not undefined else (false if reasoning_effort is defined and reasoning_effort | lower == "none" else true) -%} +{%- set grounding = grounding | default("disabled") | upper %} +{%- set grounding_enabled = grounding == "ENABLED" %} +{%- set tools_or_docs_exist = tools or documents %} +{%- set render_tools_section = true %} +{%- set render_grounding = grounding_enabled and tools_or_docs_exist %} +{%- set render_platform_instruction_override = true if platform_instruction_override else false %} +{%- set has_developer_instruction = developer_instruction or developer_instruction == "" %} +{%- set render_developer_instruction = true if developer_instruction else false %} +{%- set convert_first_system_msg = convert_first_system_msg | default(true) -%} +{%- set skip_thinking = skip_thinking | default(false) -%} +{{ bos_token }} +{%- macro document_turn(documents) -%} +{# format documents into chat turn -#} +<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{%- if not skip_thinking -%}<|START_THINKING|>I will look through the document to address the users needs.<|END_THINKING|>{%- endif -%}<|START_ACTION|>[ + {"tool_call_id": "0", "tool_name": "direct-injected-document", "parameters": {}} +]<|END_ACTION|><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|><|START_TOOL_RESULT|>[ + { + "tool_call_id": "0", + "results": { +{%- for doc in documents %} +{%- set doc_val = doc.data if doc.data else doc %} + + "{{ loop.index0 }}": {{ doc_val|tojson }}{% if not loop.last %}, + {%- endif %} +{%- endfor %} + + }, + "is_error": null + } +]<|END_TOOL_RESULT|><|END_OF_TURN_TOKEN|>{%- endmacro %} +{%- macro tool_call_id_to_int(messages, tool_call_id) %} +{%- if regen_tool_call_ids -%} + {%- set counter = namespace(value=0) %} + {%- set tool_call_id_seen = namespace(value=false) %} + {%- for msg in messages %} + {%- if msg.tool_calls %} + {%- for tool_call in msg.tool_calls %} + {%- if tool_call.id == tool_call_id and not tool_call_id_seen.value -%} + {{ counter.value }} + {%- set tool_call_id_seen.value = true %} + {%- endif %} + {%- set counter.value = counter.value + 1 %} + {%- endfor %} + {%- endif %} + {%- endfor %} +{%- else -%} + {{ tool_call_id }} +{%- endif -%} +{%- endmacro %} +{%- macro format_tool_message(messages, tool_msg) -%} +{#- format tool message #}{ + "tool_call_id": "{{ tool_call_id_to_int(messages, tool_msg.tool_call_id) }}", + "results": { + {%- if tool_msg.content is mapping or tool_msg.content is string %} + + {% if tool_msg.content is string -%} + {%- set text_wrapper = {"content": tool_msg.content} -%} + {%- else -%} + {%- set text_wrapper = tool_msg.content -%} + {%- endif %} + "0": {{ text_wrapper|tojson }} + {%- else %} + {%- for content in tool_msg.content %} + + "{{ loop.index0 }}": {{ print_tool_content(content) }}{% if not loop.last %},{% endif %} + {%- endfor %} + {%- endif %} + + }, + "is_error": null + } +{%- endmacro -%} +{%- macro print_tool_content(item) %} +{%- if item.type|lower == "text" -%} +{%- set text_wrapper = {"content": item.text} -%} +{{ text_wrapper|tojson }} +{%- elif item.type|lower == "document" and item.document and "data" in item.document -%} +{{ item.document.data|tojson }} +{%- else -%} +{{ item|tojson }} +{%- endif -%} +{%- endmacro %} +{%- macro print_msg(msg) %} + {%- if msg is string -%} +<|START_TEXT|>{{ msg }}<|END_TEXT|> + {%- elif msg.content is string -%} +<|START_TEXT|>{{ msg.content }}<|END_TEXT|> + {%- else %} + {%- set last_was_text = namespace(value=false) %} + {%- for content in msg.content %} + {%- if content.type|lower == "text" -%} + {%- if not last_was_text.value -%} + <|START_TEXT|> + {%- endif -%} + {{ content.text }} + {%- if loop.last -%} + <|END_TEXT|> + {%- endif %} + {%- set last_was_text.value = true -%} + {%- else -%} + {%- if last_was_text.value -%} + <|END_TEXT|> + {%- endif -%} + {%- set last_was_text.value = false -%} + {%- endif -%} + {%- if content.type|lower == "image" -%} + {%- if content.data -%} +{{ content.data }} + {%- else -%} +<|IMG_PATCH|> + {%- endif -%} + {%- endif -%} + {%- endfor %} + {%- endif %} +{%- endmacro %} +{%- macro print_thinking(msg) %} + {%- if msg.thinking -%} +{{ msg.thinking }} + {%- elif msg.content and msg.content[0].thinking -%} +{{ msg.content[0].thinking }} + {%- endif %} +{%- endmacro %} +{%- if messages and messages[0]['role']|lower == 'system' and not has_developer_instruction and convert_first_system_msg %}{%- set developer_instruction = messages[0] %}{%- set render_developer_instruction = true %}{%- set initial_instruction_message = true %}{% endif %} +{%- set json_object = true if response_format and response_format.type == "json_object" else false %} +{%- set json_schema = (response_format.json_schema or response_format.schema) if response_format %} +{%- set json_mode = json_object or json_schema %} +{%- set tool_idx = namespace(value=0) %} +{%- set tool_ids_seen = namespace(value=[]) %} +{%- set regen_tool_call_ids = regen_tool_call_ids | default(true) -%} +{%- set sent_documents = namespace(value=false) -%} + +{%- if render_tools_section or render_platform_instruction_override or render_grounding or json_mode -%} +<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|><|START_TEXT|> +{%- elif not render_developer_instruction -%} +<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|> +{%- endif %} + +{%- set rendered_platform_turn_chunk = false %} + +{%- if render_platform_instruction_override -%} +{{ platform_instruction_override }} +{% set rendered_platform_turn_chunk = true %} +{%- else %} +{%- endif %} + +{%- if render_grounding -%} +{%- if rendered_platform_turn_chunk %} + +{% endif -%} +Note that both your responses and reflections can be grounded. Grounding means you associate pieces of texts (called "spans") with those specific tool results that support them (called "sources"). And you use a pair of tags "" and "" to indicate when a span can be grounded onto a list of sources, listing them out in the closing tag. Sources from the same tool call are grouped together and listed as "{tool_call_id}:[{list of result indices}]", before they are joined together by ",". E.g., "span" means that "span" is supported by result 1 and 2 from "tool_call_id=0" as well as result 0 from "tool_call_id=1". +{% set rendered_platform_turn_chunk = true %} +{%- endif %} + +{%- if render_tools_section %} +{%- if rendered_platform_turn_chunk %} + +{% endif %} +# Available Tools +```json +[ +{% if tools_or_docs_exist %} +{%- if documents %} + {"name": "direct-injected-document", "description": "This is a special tool to directly inject user-uploaded documents into the chat as additional context. DO NOT use this tool by yourself!", "parameters": {"type": "object", "properties": {}, "required": []}, "responses": {"200": {"description": "Successfully returned a list of chunked text snippets from the directly uploaded documents.", "content": {"application/json": {"schema": {"type": "array", "items": {"type": "object", "required": ["url", "snippet"], "properties": {"url": {"type": "string", "description": "The url of the uploaded document."}, "snippet": {"type": "string", "description": "The text snippet for the returned document chunk."}}}}}}}}} + {%- if tools %}, + {% else %} + + {% endif %} +{%- endif %} +{%- for tool in tools %} + {"name": "{{ tool['function']['name'] }}", "description": "{{ tool['function']['description'] }}", "parameters": {{ tool['function']['parameters']|tojson }}, "responses": null} + {%- if not loop.last %},{% endif %} + +{% endfor %} +{%- else %} + +{% endif %} +] +``` +{%- set rendered_platform_turn_chunk = true %} +{%- endif -%} + +{%- if json_mode -%} +{%- if rendered_platform_turn_chunk %} + + +{% endif -%} +When generating JSON objects, do not generate block markers. Generate an object directly without prefixing with ```json. Return only the JSON and nothing else. + {%- if json_schema %} + +Your output should adhere to the following json schema: +{{ json_schema }} + {%- endif -%} +{%- set rendered_platform_turn_chunk = true %} +{%- endif %} +{%- if rendered_platform_turn_chunk -%} +<|END_TEXT|><|END_OF_TURN_TOKEN|> +{%- elif not render_developer_instruction -%} +<|END_OF_TURN_TOKEN|> +{%- endif %} +{%- if render_developer_instruction -%} +<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{ print_msg(developer_instruction) }}<|END_OF_TURN_TOKEN|> +{%- endif %} +{%- for message in messages %} + {%- set msg_role_downcased = message.role | lower %} + {%- if msg_role_downcased == 'system' and (not (loop.first and initial_instruction_message)) -%} +<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{ print_msg(message) }}<|END_OF_TURN_TOKEN|> + {%- elif msg_role_downcased == 'user' -%} +<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{ print_msg(message) }}<|END_OF_TURN_TOKEN|> + {%- if documents and not sent_documents.value %}{%- set sent_documents.value = true %}{% set tool_idx.value = tool_idx.value + 1 %}{{ document_turn(documents) }}{% endif %} + {%- elif msg_role_downcased == 'assistant' or msg_role_downcased == 'chatbot' -%} +<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|> + {%- if message.tool_calls %} + {% if not skip_thinking %} + {% if message.tool_plan -%} + <|START_THINKING|>{{ message.tool_plan }}<|END_THINKING|> + {%- elif message.thinking or (message.content and message.content[0].type == "thinking") -%} + <|START_THINKING|>{{ print_thinking(message) }}<|END_THINKING|> + {%- endif %} + {%- endif %}<|START_ACTION|>[ + {%- for tc in message.tool_calls %} + + {"tool_call_id": "{%- if regen_tool_call_ids -%}{{ tool_idx.value }}{%- else -%}{{ tc.id }}{%- endif -%}", "tool_name": "{{ tc['function']['name'] }}", "parameters": {{ tc['function']['arguments']|tojson }}}{% if not loop.last %},{% endif %} + {%- set tool_idx.value = tool_idx.value + 1 %} + {%- endfor %} + +]<|END_ACTION|><|END_OF_TURN_TOKEN|> + {%- else -%} + {% if (message.thinking or (message.content and message.content[0].type == "thinking")) and not skip_thinking -%} + <|START_THINKING|>{{ print_thinking(message) }}<|END_THINKING|> + {%- endif -%} + {{ print_msg(message) }}<|END_OF_TURN_TOKEN|> + {%- endif %} + {%- elif msg_role_downcased == 'tool' and message.tool_call_id not in tool_ids_seen.value -%} +<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|><|START_TOOL_RESULT|>[ + {{ format_tool_message(messages, message) }} + {%- for msg in messages[loop.index0 + 1:] %} + + {%- if msg.role | lower == 'tool' %}, + {{ format_tool_message(messages, msg) }} + {%- set tool_ids_seen.value = tool_ids_seen.value + [msg.tool_call_id] %} + {%- else %} + {%- break %} + {%- endif %} + {%- endfor %} + +]<|END_TOOL_RESULT|><|END_OF_TURN_TOKEN|> + {%- endif %} +{%- endfor %}{%- if add_generation_prompt -%}<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{% if reasoning %}<|START_THINKING|>{% else %}<|START_THINKING|><|END_THINKING|>{% endif %}{%- endif %} diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..449de0e32bcd21370030dafaf8be5c5e4597cfa0 --- /dev/null +++ b/config.json @@ -0,0 +1,485 @@ +{ + "adapter_layer_norm_eps": 1e-06, + "alignment_activation_fn": "swiglu", + "alignment_intermediate_size": 12288, + "architectures": [ + "Cohere2VisionForConditionalGeneration" + ], + "downsample_factor": 2, + "dtype": "bfloat16", + "enable_adapter_layer_norm": false, + "image_token": 255031, + "image_token_id": 255031, + "max_splits_per_img": 12, + "model_type": "cohere2_vision", + "quantization_config": { + "config_groups": { + "group_0": { + "format": "float-quantized", + "input_activations": { + "actorder": null, + "block_structure": null, + "dynamic": true, + "group_size": null, + "num_bits": 8, + "observer": null, + "observer_kwargs": {}, + "scale_dtype": null, + "strategy": "token", + "symmetric": true, + "type": "float", + "zp_dtype": null + }, + "output_activations": null, + "targets": [ + "Linear" + ], + "weights": { + "actorder": null, + "block_structure": null, + "dynamic": false, + "group_size": null, + "num_bits": 8, + "observer": "memoryless_minmax", + "observer_kwargs": {}, + "scale_dtype": null, + "strategy": "channel", + "symmetric": true, + "type": "float", + "zp_dtype": null + } + } + }, + "format": "float-quantized", + "global_compression_ratio": null, + "ignore": [ + "model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj", + "model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj", + "model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj", + "model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj", + "model.vision_tower.vision_model.encoder.layers.0.mlp.fc1", + "model.vision_tower.vision_model.encoder.layers.0.mlp.fc2", + "model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj", + "model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj", + "model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj", + "model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj", + "model.vision_tower.vision_model.encoder.layers.1.mlp.fc1", + "model.vision_tower.vision_model.encoder.layers.1.mlp.fc2", + "model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj", + "model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj", + "model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj", + "model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj", + "model.vision_tower.vision_model.encoder.layers.2.mlp.fc1", + "model.vision_tower.vision_model.encoder.layers.2.mlp.fc2", + "model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj", + "model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj", + "model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj", + "model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj", + "model.vision_tower.vision_model.encoder.layers.3.mlp.fc1", + "model.vision_tower.vision_model.encoder.layers.3.mlp.fc2", + "model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj", + "model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj", + "model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj", + "model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj", + "model.vision_tower.vision_model.encoder.layers.4.mlp.fc1", + "model.vision_tower.vision_model.encoder.layers.4.mlp.fc2", + "model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj", + "model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj", + "model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj", + "model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj", + "model.vision_tower.vision_model.encoder.layers.5.mlp.fc1", + "model.vision_tower.vision_model.encoder.layers.5.mlp.fc2", + "model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj", + "model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj", + "model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj", + "model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj", + "model.vision_tower.vision_model.encoder.layers.6.mlp.fc1", + "model.vision_tower.vision_model.encoder.layers.6.mlp.fc2", + "model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj", + "model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj", + "model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj", + "model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj", + "model.vision_tower.vision_model.encoder.layers.7.mlp.fc1", + "model.vision_tower.vision_model.encoder.layers.7.mlp.fc2", + "model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj", + "model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj", + "model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj", + "model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj", + "model.vision_tower.vision_model.encoder.layers.8.mlp.fc1", + "model.vision_tower.vision_model.encoder.layers.8.mlp.fc2", + "model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj", + "model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj", + "model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj", + "model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj", + "model.vision_tower.vision_model.encoder.layers.9.mlp.fc1", + "model.vision_tower.vision_model.encoder.layers.9.mlp.fc2", + "model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj", + "model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj", + "model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj", + "model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj", + "model.vision_tower.vision_model.encoder.layers.10.mlp.fc1", + "model.vision_tower.vision_model.encoder.layers.10.mlp.fc2", + "model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj", + "model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj", + "model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj", + "model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj", + "model.vision_tower.vision_model.encoder.layers.11.mlp.fc1", + "model.vision_tower.vision_model.encoder.layers.11.mlp.fc2", + "model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj", + "model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj", + "model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj", + "model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj", + "model.vision_tower.vision_model.encoder.layers.12.mlp.fc1", + "model.vision_tower.vision_model.encoder.layers.12.mlp.fc2", + "model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj", + "model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj", + "model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj", + "model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj", + "model.vision_tower.vision_model.encoder.layers.13.mlp.fc1", + "model.vision_tower.vision_model.encoder.layers.13.mlp.fc2", + "model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj", + "model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj", + "model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj", + "model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj", + "model.vision_tower.vision_model.encoder.layers.14.mlp.fc1", + "model.vision_tower.vision_model.encoder.layers.14.mlp.fc2", + "model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj", + "model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj", + "model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj", + "model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj", + "model.vision_tower.vision_model.encoder.layers.15.mlp.fc1", + "model.vision_tower.vision_model.encoder.layers.15.mlp.fc2", + "model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj", + "model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj", + "model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj", + "model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj", + "model.vision_tower.vision_model.encoder.layers.16.mlp.fc1", + "model.vision_tower.vision_model.encoder.layers.16.mlp.fc2", + "model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj", + "model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj", + "model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj", + "model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj", + "model.vision_tower.vision_model.encoder.layers.17.mlp.fc1", + "model.vision_tower.vision_model.encoder.layers.17.mlp.fc2", + "model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj", + "model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj", + "model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj", + "model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj", + "model.vision_tower.vision_model.encoder.layers.18.mlp.fc1", + "model.vision_tower.vision_model.encoder.layers.18.mlp.fc2", + "model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj", + "model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj", + "model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj", + "model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj", + "model.vision_tower.vision_model.encoder.layers.19.mlp.fc1", + "model.vision_tower.vision_model.encoder.layers.19.mlp.fc2", + "model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj", + "model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj", + "model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj", + "model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj", + "model.vision_tower.vision_model.encoder.layers.20.mlp.fc1", + "model.vision_tower.vision_model.encoder.layers.20.mlp.fc2", + "model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj", + "model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj", + "model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj", + "model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj", + "model.vision_tower.vision_model.encoder.layers.21.mlp.fc1", + "model.vision_tower.vision_model.encoder.layers.21.mlp.fc2", + "model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj", + "model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj", + "model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj", + "model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj", + "model.vision_tower.vision_model.encoder.layers.22.mlp.fc1", + "model.vision_tower.vision_model.encoder.layers.22.mlp.fc2", + "model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj", + "model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj", + "model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj", + "model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj", + "model.vision_tower.vision_model.encoder.layers.23.mlp.fc1", + "model.vision_tower.vision_model.encoder.layers.23.mlp.fc2", + "model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj", + "model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj", + "model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj", + "model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj", + "model.vision_tower.vision_model.encoder.layers.24.mlp.fc1", + "model.vision_tower.vision_model.encoder.layers.24.mlp.fc2", + "model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj", + "model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj", + "model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj", + "model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj", + "model.vision_tower.vision_model.encoder.layers.25.mlp.fc1", + "model.vision_tower.vision_model.encoder.layers.25.mlp.fc2", + "model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj", + "model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj", + "model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj", + "model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj", + "model.vision_tower.vision_model.encoder.layers.26.mlp.fc1", + "model.vision_tower.vision_model.encoder.layers.26.mlp.fc2", + "model.multi_modal_projector.linear_1", + "model.multi_modal_projector.linear_2", + "model.language_model.layers.0.self_attn.q_proj", + "model.language_model.layers.0.self_attn.k_proj", + "model.language_model.layers.0.self_attn.v_proj", + "model.language_model.layers.0.self_attn.o_proj", + "model.language_model.layers.0.mlp.gate", + "model.language_model.layers.1.self_attn.q_proj", + "model.language_model.layers.1.self_attn.k_proj", + "model.language_model.layers.1.self_attn.v_proj", + "model.language_model.layers.1.self_attn.o_proj", + "model.language_model.layers.1.mlp.gate", + "model.language_model.layers.2.self_attn.q_proj", + "model.language_model.layers.2.self_attn.k_proj", + "model.language_model.layers.2.self_attn.v_proj", + "model.language_model.layers.2.self_attn.o_proj", + "model.language_model.layers.2.mlp.gate", + "model.language_model.layers.3.self_attn.q_proj", + "model.language_model.layers.3.self_attn.k_proj", + "model.language_model.layers.3.self_attn.v_proj", + "model.language_model.layers.3.self_attn.o_proj", + "model.language_model.layers.3.mlp.gate", + "model.language_model.layers.4.self_attn.q_proj", + "model.language_model.layers.4.self_attn.k_proj", + "model.language_model.layers.4.self_attn.v_proj", + "model.language_model.layers.4.self_attn.o_proj", + "model.language_model.layers.4.mlp.gate", + "model.language_model.layers.5.self_attn.q_proj", + "model.language_model.layers.5.self_attn.k_proj", + "model.language_model.layers.5.self_attn.v_proj", + "model.language_model.layers.5.self_attn.o_proj", + "model.language_model.layers.5.mlp.gate", + "model.language_model.layers.6.self_attn.q_proj", + "model.language_model.layers.6.self_attn.k_proj", + "model.language_model.layers.6.self_attn.v_proj", + "model.language_model.layers.6.self_attn.o_proj", + "model.language_model.layers.6.mlp.gate", + "model.language_model.layers.7.self_attn.q_proj", + "model.language_model.layers.7.self_attn.k_proj", + "model.language_model.layers.7.self_attn.v_proj", + "model.language_model.layers.7.self_attn.o_proj", + "model.language_model.layers.7.mlp.gate", + "model.language_model.layers.8.self_attn.q_proj", + "model.language_model.layers.8.self_attn.k_proj", + "model.language_model.layers.8.self_attn.v_proj", + "model.language_model.layers.8.self_attn.o_proj", + "model.language_model.layers.8.mlp.gate", + "model.language_model.layers.9.self_attn.q_proj", + "model.language_model.layers.9.self_attn.k_proj", + "model.language_model.layers.9.self_attn.v_proj", + "model.language_model.layers.9.self_attn.o_proj", + "model.language_model.layers.9.mlp.gate", + "model.language_model.layers.10.self_attn.q_proj", + "model.language_model.layers.10.self_attn.k_proj", + "model.language_model.layers.10.self_attn.v_proj", + "model.language_model.layers.10.self_attn.o_proj", + "model.language_model.layers.10.mlp.gate", + "model.language_model.layers.11.self_attn.q_proj", + "model.language_model.layers.11.self_attn.k_proj", + "model.language_model.layers.11.self_attn.v_proj", + "model.language_model.layers.11.self_attn.o_proj", + "model.language_model.layers.11.mlp.gate", + "model.language_model.layers.12.self_attn.q_proj", + "model.language_model.layers.12.self_attn.k_proj", + "model.language_model.layers.12.self_attn.v_proj", + "model.language_model.layers.12.self_attn.o_proj", + "model.language_model.layers.12.mlp.gate", + "model.language_model.layers.13.self_attn.q_proj", + "model.language_model.layers.13.self_attn.k_proj", + "model.language_model.layers.13.self_attn.v_proj", + "model.language_model.layers.13.self_attn.o_proj", + "model.language_model.layers.13.mlp.gate", + "model.language_model.layers.14.self_attn.q_proj", + "model.language_model.layers.14.self_attn.k_proj", + "model.language_model.layers.14.self_attn.v_proj", + "model.language_model.layers.14.self_attn.o_proj", + "model.language_model.layers.14.mlp.gate", + "model.language_model.layers.15.self_attn.q_proj", + "model.language_model.layers.15.self_attn.k_proj", + "model.language_model.layers.15.self_attn.v_proj", + "model.language_model.layers.15.self_attn.o_proj", + "model.language_model.layers.15.mlp.gate", + "model.language_model.layers.16.self_attn.q_proj", + "model.language_model.layers.16.self_attn.k_proj", + "model.language_model.layers.16.self_attn.v_proj", + "model.language_model.layers.16.self_attn.o_proj", + "model.language_model.layers.16.mlp.gate", + "model.language_model.layers.17.self_attn.q_proj", + "model.language_model.layers.17.self_attn.k_proj", + "model.language_model.layers.17.self_attn.v_proj", + "model.language_model.layers.17.self_attn.o_proj", + "model.language_model.layers.17.mlp.gate", + "model.language_model.layers.18.self_attn.q_proj", + "model.language_model.layers.18.self_attn.k_proj", + "model.language_model.layers.18.self_attn.v_proj", + "model.language_model.layers.18.self_attn.o_proj", + "model.language_model.layers.18.mlp.gate", + "model.language_model.layers.19.self_attn.q_proj", + "model.language_model.layers.19.self_attn.k_proj", + "model.language_model.layers.19.self_attn.v_proj", + "model.language_model.layers.19.self_attn.o_proj", + "model.language_model.layers.19.mlp.gate", + "model.language_model.layers.20.self_attn.q_proj", + "model.language_model.layers.20.self_attn.k_proj", + "model.language_model.layers.20.self_attn.v_proj", + "model.language_model.layers.20.self_attn.o_proj", + "model.language_model.layers.20.mlp.gate", + "model.language_model.layers.21.self_attn.q_proj", + "model.language_model.layers.21.self_attn.k_proj", + "model.language_model.layers.21.self_attn.v_proj", + "model.language_model.layers.21.self_attn.o_proj", + "model.language_model.layers.21.mlp.gate", + "model.language_model.layers.22.self_attn.q_proj", + "model.language_model.layers.22.self_attn.k_proj", + "model.language_model.layers.22.self_attn.v_proj", + "model.language_model.layers.22.self_attn.o_proj", + "model.language_model.layers.22.mlp.gate", + "model.language_model.layers.23.self_attn.q_proj", + "model.language_model.layers.23.self_attn.k_proj", + "model.language_model.layers.23.self_attn.v_proj", + "model.language_model.layers.23.self_attn.o_proj", + "model.language_model.layers.23.mlp.gate", + "model.language_model.layers.24.self_attn.q_proj", + "model.language_model.layers.24.self_attn.k_proj", + "model.language_model.layers.24.self_attn.v_proj", + "model.language_model.layers.24.self_attn.o_proj", + "model.language_model.layers.24.mlp.gate", + "model.language_model.layers.25.self_attn.q_proj", + "model.language_model.layers.25.self_attn.k_proj", + "model.language_model.layers.25.self_attn.v_proj", + "model.language_model.layers.25.self_attn.o_proj", + "model.language_model.layers.25.mlp.gate", + "model.language_model.layers.26.self_attn.q_proj", + "model.language_model.layers.26.self_attn.k_proj", + "model.language_model.layers.26.self_attn.v_proj", + "model.language_model.layers.26.self_attn.o_proj", + "model.language_model.layers.26.mlp.gate", + "model.language_model.layers.27.self_attn.q_proj", + "model.language_model.layers.27.self_attn.k_proj", + "model.language_model.layers.27.self_attn.v_proj", + "model.language_model.layers.27.self_attn.o_proj", + "model.language_model.layers.27.mlp.gate", + "model.language_model.layers.28.self_attn.q_proj", + "model.language_model.layers.28.self_attn.k_proj", + "model.language_model.layers.28.self_attn.v_proj", + "model.language_model.layers.28.self_attn.o_proj", + "model.language_model.layers.28.mlp.gate", + "model.language_model.layers.29.self_attn.q_proj", + "model.language_model.layers.29.self_attn.k_proj", + "model.language_model.layers.29.self_attn.v_proj", + "model.language_model.layers.29.self_attn.o_proj", + "model.language_model.layers.29.mlp.gate", + "model.language_model.layers.30.self_attn.q_proj", + "model.language_model.layers.30.self_attn.k_proj", + "model.language_model.layers.30.self_attn.v_proj", + "model.language_model.layers.30.self_attn.o_proj", + "model.language_model.layers.30.mlp.gate", + "model.language_model.layers.31.self_attn.q_proj", + "model.language_model.layers.31.self_attn.k_proj", + "model.language_model.layers.31.self_attn.v_proj", + "model.language_model.layers.31.self_attn.o_proj", + "model.language_model.layers.31.mlp.gate", + "lm_head" + ], + "kv_cache_scheme": null, + "quant_method": "compressed-tensors", + "quantization_status": "compressed", + "sparsity_config": {}, + "transform_config": {}, + "version": "0.15.1.dev6+g077e752" + }, + "text_config": { + "_sliding_window_pattern": 4, + "architectures": [ + "Cohere2MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 2, + "expert_selection_fn": "sigmoid", + "first_k_dense_replace": 0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 4096, + "layer_norm_eps": 1e-05, + "layer_switch": 4, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "logit_scale": 1.0, + "max_position_embeddings": 500000, + "model_type": "cohere2_moe", + "norm_topk_prob": true, + "num_attention_heads": 128, + "num_experts": 128, + "num_experts_per_tok": 8, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "num_shared_experts": 4, + "order_of_interleaved_layers": "local_attn_first", + "position_embedding_type": "rope_gptj", + "prefix_dense_intermediate_size": 16384, + "prefix_dense_sliding_window_pattern": 1, + "rms_norm_eps": null, + "rope_scaling": null, + "rope_theta": 50000, + "rotary_pct": 1.0, + "shared_expert_combination_strategy": "average", + "sliding_window": 4096, + "use_cache": true, + "use_embedding_sharing": true, + "use_gated_activation": true, + "use_parallel_block": true, + "use_parallel_embedding": false, + "use_qk_norm": false, + "vocab_size": 262144 + }, + "transformers_version": "4.56.2", + "vision_config": { + "attention_dropout": 0.0, + "hidden_act": "gelu_pytorch_tanh", + "hidden_size": 1152, + "image_size": 512, + "intermediate_size": 4304, + "layer_norm_eps": 1e-06, + "model_type": "siglip_vision_model", + "num_attention_heads": 16, + "num_channels": 3, + "num_hidden_layers": 27, + "patch_size": 16, + "vision_use_head": false + }, + "vision_feature_layer": -1, + "vision_feature_select_strategy": "full" +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..359afc456acafabbc3c67307b73a0ef26805eb82 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 2, + "eos_token_id": 255001, + "pad_token_id": 0, + "transformers_version": "5.8.0" +} diff --git a/model-00001-of-00046.safetensors b/model-00001-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..566c2fec7b54cfb89c82c8221671b1f4830b8294 --- /dev/null +++ b/model-00001-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f10b022320b8082a648f42535ecb260d5dfa1e246814efa1853a974cde99048e +size 4985574712 diff --git a/model-00003-of-00046.safetensors b/model-00003-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..986f1eae47a5f76ae6d44f4b3c151cc73786da93 --- /dev/null +++ b/model-00003-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baf42b4a8027bbd89e140a531c30ade3aef37df64e8f099237dfddc38d44e2f6 +size 4986211288 diff --git a/model-00004-of-00046.safetensors b/model-00004-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..818ff2a13b76018f5d8dab3052cfbfea935ec008 --- /dev/null +++ b/model-00004-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5f33166dc234816589f18c658a901e1af4d4cd6f9f0572d31af05388bd3fcbb +size 4986235208 diff --git a/model-00005-of-00046.safetensors b/model-00005-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1d6d1ef2cb0867961005bb0cadf322d6b16faafc --- /dev/null +++ b/model-00005-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6364d8dabb37ce0438d286416e3a8ea011601fc940b45e38727c06f0916820a8 +size 4986235200 diff --git a/model-00007-of-00046.safetensors b/model-00007-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a3036753e71065953ff0351d34a971a65c155b12 --- /dev/null +++ b/model-00007-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db9ed2b662cf5046132ad406c9d2b4254520424b69992259e87a45350962667e +size 4986235136 diff --git a/model-00008-of-00046.safetensors b/model-00008-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..151571927fb19a54b632ce47dedc2219a6453f9c --- /dev/null +++ b/model-00008-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e292fdfc0a48feae7234b12d491c4eb09ab91cd5252063733759e4cb3bacd958 +size 4986235200 diff --git a/model-00010-of-00046.safetensors b/model-00010-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9337736e22b59fa18c3b7cafe6162a71eeaac8fd --- /dev/null +++ b/model-00010-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9f224c025646fd5f14c09a5f29eb6fcc3559378d0248c8e8231fb1bce1ebce5 +size 4986270416 diff --git a/model-00011-of-00046.safetensors b/model-00011-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b85398c05650088d594f1c11a577daeadf965ab7 --- /dev/null +++ b/model-00011-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78a1428ab2e5ad4e678c4abb1f0c045f3f1e68cec70213ae143aeae4b5266d31 +size 4986235208 diff --git a/model-00012-of-00046.safetensors b/model-00012-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3a3a8ea6404b061744bd1e4b9b4f0fd7e1d58fc5 --- /dev/null +++ b/model-00012-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9743318110a46e78be8fcf46046dfc40da7cb04100ec0d2706160387d811d835 +size 4986235200 diff --git a/model-00014-of-00046.safetensors b/model-00014-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1963d0549371a1e0708c7579b84d16af4e5e616a --- /dev/null +++ b/model-00014-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4464a2398d4d47a5825f0970a0efbd60d73137768b4dc70e6aef8e51912bfd2a +size 4986235120 diff --git a/model-00015-of-00046.safetensors b/model-00015-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6281f83a4725318191a8d44ac58357cdf40f14cd --- /dev/null +++ b/model-00015-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff2c801d695287d6077c9b0bcf0aa5b744320100b9bf3c3fa1b9ca9f2d8416c6 +size 4986236072 diff --git a/model-00016-of-00046.safetensors b/model-00016-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9f577eb04495eca90fb73ef918788d38a76c7f33 --- /dev/null +++ b/model-00016-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47ab09071e466ab7f1376992530107201b39ba2052ce26f36af93eaefaa9fb50 +size 4901309656 diff --git a/model-00017-of-00046.safetensors b/model-00017-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d1fff3cb9c684a49152bc2557ad6c2bf4951d760 --- /dev/null +++ b/model-00017-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a655a06ff0ff4de0d5c1f5fed84dad0079a200a16514290cadf52797fe260167 +size 4986346824 diff --git a/model-00018-of-00046.safetensors b/model-00018-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..428cca393ba2f556935d2665886d196e714ee54c --- /dev/null +++ b/model-00018-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b629f6286302e384005e657df9f193f170a86eb1bedf0e105007e8fec1168e8c +size 4986235760 diff --git a/model-00019-of-00046.safetensors b/model-00019-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d65ab10022a8aeb4784f333e3a356999dc35f3ee --- /dev/null +++ b/model-00019-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fff6fb479c5e39e79832e5fb60375b771bf7548d907c6a8f83fda7eeaa6932e2 +size 4986235744 diff --git a/model-00021-of-00046.safetensors b/model-00021-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4d09399080619234a6979d5801d78bcaff8e29f9 --- /dev/null +++ b/model-00021-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9eaa9ecf4a43a62e35d1f85b86b969ef2ebb07b4009d783a4411f928f2cee47 +size 4986235664 diff --git a/model-00022-of-00046.safetensors b/model-00022-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3f4840c1efef764671e05a3760fe0e3c9aee2a9b --- /dev/null +++ b/model-00022-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80d1ac3a335622811f1a60698436405ea533b66140079ef2d9725fdc0b15e9fc +size 4986235744 diff --git a/model-00023-of-00046.safetensors b/model-00023-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1e304fc3bc51ca91e6eed7baac5ca71230ed2456 --- /dev/null +++ b/model-00023-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2470c933b89fee950af61549328e82c247df8507d4415b1bccff730d6bc37ac5 +size 4986235792 diff --git a/model-00024-of-00046.safetensors b/model-00024-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..84196942f64315ac73a78e2f3da758146c3782b1 --- /dev/null +++ b/model-00024-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:973a9dc14150bd3377177d8f541660ae2de76037b06f0772972efbe89d575725 +size 4985349128 diff --git a/model-00025-of-00046.safetensors b/model-00025-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6d3e06244ac69ce9f4e10514f3c86b4a8b0cbf2a --- /dev/null +++ b/model-00025-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22b6912ca5df3455e33232818b0daa136ff545368e3193c6c6c0c305b337cc5f +size 4986235752 diff --git a/model-00026-of-00046.safetensors b/model-00026-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..18d76629b0962f091316d3b46188edb1809a1696 --- /dev/null +++ b/model-00026-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47ad15496ba433835bc8806d4f4a2148aaf79c88398200051c6bb7fb800ca414 +size 4986235744 diff --git a/model-00027-of-00046.safetensors b/model-00027-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cd82aa07c5964e53d897618673dec1b2d931aa10 --- /dev/null +++ b/model-00027-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61b7fafcb2755201c0f69af438e139229a88764b38a20ea9f260a1ece83ef7b9 +size 4985349304 diff --git a/model-00028-of-00046.safetensors b/model-00028-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c0d9b23494cefd72f6102da0f9860c0babcd9747 --- /dev/null +++ b/model-00028-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f709ef66505e38e61886e0c7fbbf47607a81408cc82e8257759f13ceae7ae81 +size 4986235640 diff --git a/model-00029-of-00046.safetensors b/model-00029-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..981f4fc088ce4a518f90fd0a6ec24d85871b7f79 --- /dev/null +++ b/model-00029-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:690d81b9f91cc472e223ce6291ca77762ef6c7a9186ea8b6a71a2f3bc2ee7e9a +size 4986235744 diff --git a/model-00030-of-00046.safetensors b/model-00030-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9deae2960ce60b7f6b9a10415789d190eb24f54e --- /dev/null +++ b/model-00030-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a54343d4927c261e37121d5f5774a65cc8e8dc46dcc590953eb8120c4ce78e1c +size 4986235760 diff --git a/model-00031-of-00046.safetensors b/model-00031-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c7c0ca17c2afc3340a1f8d3a7f4523e36e7082d4 --- /dev/null +++ b/model-00031-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7045df425934e1d72c81201767dd174042a98d6cf0e42962615b0c64f0b69570 +size 4985349184 diff --git a/model-00032-of-00046.safetensors b/model-00032-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e63489e25c313ee3b36bdf4200f87a9f6e5d6801 --- /dev/null +++ b/model-00032-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7638d2a8f4213d894a1ec4e913112d7ac7d08bfc1bda698e941b7b9b2acea2c +size 4986235720 diff --git a/model-00033-of-00046.safetensors b/model-00033-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6b3b27ebfea0961d43c5f27cb76fd61d8a666919 --- /dev/null +++ b/model-00033-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba4c5eb2cac7d764d28430d20efed69c93ee969b61efdb6d98078feb6c27e19c +size 4986235744 diff --git a/model-00034-of-00046.safetensors b/model-00034-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26a4919f0d806ca8b4ffe370e589172b68cdd698 --- /dev/null +++ b/model-00034-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31674a68b9dc81ec139f60dfcd32db0e79d086ce822de0eee781be93714d0990 +size 4985349336 diff --git a/model-00035-of-00046.safetensors b/model-00035-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e0d5df612f94c833c783713599d696c6436791cb --- /dev/null +++ b/model-00035-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:677c33a2c2f90df8a3f9248d03ede7e4306abbbb2b551858c01aeb16a242f857 +size 4986235624 diff --git a/model-00036-of-00046.safetensors b/model-00036-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..79fc3ab3d1bc0436f79651e09831f946d0c78d76 --- /dev/null +++ b/model-00036-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e18eb0b8b2e2f68024cef6442ca0712a622157c8549288095d53b70f2572f095 +size 4986235752 diff --git a/model-00037-of-00046.safetensors b/model-00037-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4df621aeb322c46d5efeeea65c8b68bc1cc8781 --- /dev/null +++ b/model-00037-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a1d8a0c99e3c7e0e2e526cab19c239c8897dbfcff0bdea2e298d70066ebf101 +size 4986235744 diff --git a/model-00038-of-00046.safetensors b/model-00038-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..20cacf9107d003f6910f42f48fe97a48e141ca8a --- /dev/null +++ b/model-00038-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:086ed8648f57f41f7e9eafe0503ff0f0290239645a2cd077304603172776371d +size 4985349232 diff --git a/model-00039-of-00046.safetensors b/model-00039-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a98b3a2891c67321e33d273f6c3b3ffb6b908668 --- /dev/null +++ b/model-00039-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c2b0e74d2202cbd514ab57818169f5fe6213578b2d03d45eab119cc6b820e34 +size 4986235696 diff --git a/model-00040-of-00046.safetensors b/model-00040-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..186c73b6196a47c557889abfe8f896c2b7f4b598 --- /dev/null +++ b/model-00040-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f85f4d1977963631e370d66fec60362972f5d05ce667dd28b52aa2e269cf1d71 +size 4986235744 diff --git a/model-00041-of-00046.safetensors b/model-00041-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..68dec7753a92cddd86e58190f431eb5c8d989fcf --- /dev/null +++ b/model-00041-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf3c7bffa9ec453b3014290203210cb8147d13380142f06228e8fcaa813374e1 +size 4985313936 diff --git a/model-00042-of-00046.safetensors b/model-00042-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..77169abc97dc9f2d1304f97b3798f85124c319b5 --- /dev/null +++ b/model-00042-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6baab86d8d80204e5f08d53838f3a1eb77406a26338ad6db6f7d0042241b5d1 +size 4986270976 diff --git a/model-00043-of-00046.safetensors b/model-00043-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c01b21a60987c3f7a7f0fec8e26551ef92a970d5 --- /dev/null +++ b/model-00043-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:521768cf614bb40e19fca7d31b8ac81bf6be280f7c821841f6f5b46a20949782 +size 4986235760 diff --git a/model-00044-of-00046.safetensors b/model-00044-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..375c5c24cd381b634e8f077a40514187e624d02d --- /dev/null +++ b/model-00044-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24aa465053244ccaa5f4dbcbb8181bd907ea75000283b50b9ed9914c80d53907 +size 4986235744 diff --git a/model-00045-of-00046.safetensors b/model-00045-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..81cae91fd7fd85535292f0e3283631356c2003be --- /dev/null +++ b/model-00045-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd57dac3ea18437bebdbd32479bc7a62baa4e6937f0d17af06a15f9473965568 +size 4985349264 diff --git a/model-00046-of-00046.safetensors b/model-00046-of-00046.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2868677063aca83120af8e51eccbc09676d7a0e9 --- /dev/null +++ b/model-00046-of-00046.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6b68b241cdc907edf572d1ba7a3d7c64ac048c0332ef9537d0f880b3d158873 +size 856042920 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..afa6b31dfc00733c3b061b424ae1379923c5e778 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,25411 @@ +{ + "metadata": { + "total_parameters": 218801789168, + "total_size": 225002697184 + }, + "weight_map": { + "model.language_model.embed_tokens.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.input_layernorm.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.0.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.0.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.0.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.0.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.0.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.0.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.1.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.1.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.1.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.1.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.1.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.1.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.10.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.10.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.10.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.10.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.10.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.10.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.100.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.100.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.100.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.100.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.100.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.100.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.101.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.101.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.101.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.101.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.101.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.101.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.102.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.102.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.102.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.102.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.102.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.102.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.103.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.103.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.103.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.103.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.103.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.103.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.104.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.104.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.104.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.104.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.104.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.104.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.105.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.105.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.105.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.105.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.105.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.105.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.106.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.106.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.106.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.106.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.106.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.106.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.107.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.107.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.107.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.107.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.107.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.107.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.108.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.108.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.108.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.108.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.108.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.108.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.109.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.109.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.109.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.109.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.109.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.109.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.11.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.11.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.11.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.11.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.11.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.11.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.110.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.110.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.110.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.110.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.110.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.110.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.111.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.111.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.111.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.111.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.111.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.111.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.112.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.112.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.112.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.112.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.112.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.112.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.113.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.113.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.113.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.113.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.113.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.113.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.114.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.114.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.114.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.114.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.114.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.114.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.115.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.115.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.115.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.115.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.115.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.115.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.116.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.116.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.116.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.116.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.116.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.116.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.117.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.117.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.117.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.117.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.117.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.117.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.118.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.118.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.118.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.118.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.118.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.118.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.119.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.119.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.119.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.119.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.119.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.119.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.12.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.12.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.12.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.12.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.12.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.12.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.120.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.120.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.120.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.120.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.120.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.120.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.121.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.121.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.121.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.121.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.121.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.121.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.122.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.122.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.122.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.122.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.122.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.122.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.123.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.123.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.123.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.123.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.123.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.123.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.124.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.124.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.124.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.124.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.124.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.124.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.125.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.125.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.125.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.125.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.125.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.125.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.126.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.126.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.126.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.126.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.126.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.126.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.127.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.127.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.127.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.127.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.127.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.127.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.13.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.13.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.13.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.13.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.13.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.13.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.14.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.14.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.14.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.14.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.14.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.14.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.15.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.15.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.15.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.15.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.15.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.15.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.16.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.16.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.16.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.16.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.16.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.16.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.17.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.17.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.17.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.17.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.17.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.17.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.18.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.18.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.18.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.18.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.18.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.18.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.19.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.19.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.19.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.19.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.19.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.19.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.2.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.2.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.2.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.2.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.2.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.2.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.20.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.20.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.20.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.20.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.20.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.20.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.21.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.21.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.21.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.21.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.21.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.21.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.22.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.22.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.22.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.22.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.22.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.22.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.23.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.23.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.23.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.23.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.23.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.23.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.24.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.24.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.24.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.24.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.24.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.24.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.25.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.25.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.25.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.25.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.25.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.25.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.26.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.26.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.26.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.26.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.26.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.26.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.27.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.27.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.27.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.27.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.27.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.27.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.28.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.28.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.28.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.28.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.28.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.28.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.29.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.29.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.29.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.29.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.29.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.29.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.3.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.3.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.3.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.3.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.3.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.3.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.30.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.30.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.30.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.30.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.30.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.30.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.31.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.31.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.31.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.31.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.31.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.31.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.32.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.32.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.32.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.32.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.32.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.32.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.33.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.33.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.33.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.33.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.33.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.33.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.34.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.34.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.34.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.34.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.34.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.34.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.35.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.35.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.35.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.35.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.35.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.35.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.36.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.36.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.36.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.36.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.36.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.36.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.37.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.37.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.37.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.37.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.37.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.37.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.38.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.38.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.38.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.38.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.38.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.38.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.39.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.39.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.39.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.39.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.39.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.39.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.4.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.4.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.4.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.4.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.4.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.4.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.40.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.40.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.40.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.40.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.40.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.40.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.41.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.41.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.41.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.41.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.41.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.41.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.42.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.42.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.42.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.42.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.42.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.42.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.43.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.43.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.43.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.43.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.43.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.43.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.44.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.44.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.44.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.44.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.44.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.44.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.45.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.45.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.45.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.45.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.45.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.45.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.46.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.46.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.46.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.46.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.46.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.46.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.47.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.47.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.47.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.47.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.47.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.47.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.48.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.48.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.48.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.48.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.48.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.48.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.49.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.49.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.49.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.49.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.49.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.49.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.5.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.5.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.5.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.5.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.5.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.5.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.50.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.50.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.50.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.50.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.50.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.50.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.51.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.51.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.51.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.51.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.51.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.51.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.52.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.52.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.52.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.52.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.52.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.52.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.53.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.53.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.53.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.53.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.53.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.53.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.54.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.54.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.54.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.54.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.54.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.54.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.55.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.55.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.55.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.55.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.55.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.55.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.56.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.56.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.56.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.56.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.56.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.56.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.57.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.57.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.57.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.57.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.57.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.57.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.58.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.58.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.58.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.58.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.58.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.58.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.59.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.59.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.59.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.59.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.59.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.59.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.6.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.6.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.6.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.6.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.6.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.6.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.60.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.60.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.60.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.60.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.60.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.60.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.61.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.61.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.61.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.61.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.61.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.61.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.62.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.62.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.62.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.62.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.62.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.62.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.63.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.63.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.63.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.63.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.63.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.63.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.64.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.64.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.64.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.64.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.64.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.64.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.65.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.65.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.65.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.65.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.65.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.65.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.66.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.66.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.66.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.66.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.66.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.66.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.67.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.67.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.67.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.67.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.67.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.67.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.68.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.68.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.68.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.68.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.68.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.68.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.69.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.69.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.69.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.69.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.69.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.69.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.7.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.7.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.7.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.7.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.7.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.7.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.70.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.70.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.70.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.70.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.70.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.70.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.71.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.71.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.71.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.71.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.71.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.71.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.72.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.72.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.72.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.72.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.72.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.72.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.73.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.73.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.73.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.73.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.73.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.73.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.74.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.74.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.74.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.74.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.74.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.74.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.75.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.75.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.75.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.75.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.75.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.75.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.76.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.76.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.76.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.76.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.76.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.76.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.77.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.77.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.77.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.77.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.77.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.77.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.78.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.78.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.78.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.78.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.78.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.78.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.79.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.79.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.79.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.79.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.79.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.79.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.8.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.8.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.8.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.8.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.8.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.8.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.80.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.80.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.80.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.80.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.80.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.80.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.81.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.81.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.81.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.81.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.81.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.81.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.82.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.82.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.82.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.82.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.82.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.82.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.83.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.83.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.83.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.83.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.83.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.83.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.84.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.84.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.84.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.84.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.84.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.84.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.85.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.85.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.85.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.85.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.85.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.85.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.86.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.86.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.86.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.86.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.86.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.86.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.87.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.87.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.87.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.87.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.87.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.87.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.88.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.88.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.88.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.88.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.88.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.88.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.89.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.89.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.89.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.89.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.89.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.89.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.9.down_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.9.down_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.9.gate_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.9.gate_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.9.up_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.9.up_proj.weight_scale": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.90.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.90.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.90.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.90.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.90.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.90.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.91.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.91.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.91.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.91.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.91.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.91.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.92.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.92.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.92.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.92.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.92.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.92.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.93.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.93.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.93.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.93.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.93.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.93.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.94.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.94.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.94.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.94.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.94.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.94.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.95.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.95.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.95.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.95.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.95.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.95.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.96.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.96.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.96.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.96.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.96.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.96.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.97.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.97.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.97.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.97.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.97.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.97.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.98.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.98.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.98.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.98.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.98.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.98.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.99.down_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.99.down_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.99.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.99.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.99.up_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.gate.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.mlp.shared_experts.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.0.mlp.shared_experts.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.0.mlp.shared_experts.gate_proj.weight": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.shared_experts.gate_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.mlp.shared_experts.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.0.mlp.shared_experts.up_proj.weight_scale": "model-00002-of-00046.safetensors", + "model.language_model.layers.0.self_attn.k_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.self_attn.o_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.self_attn.q_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.0.self_attn.v_proj.weight": "model-00001-of-00046.safetensors", + "model.language_model.layers.1.input_layernorm.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.0.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.0.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.0.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.0.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.0.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.0.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.1.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.1.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.1.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.1.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.1.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.1.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.10.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.10.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.10.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.10.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.10.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.10.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.100.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.100.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.100.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.100.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.100.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.100.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.101.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.101.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.101.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.101.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.101.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.101.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.102.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.102.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.102.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.102.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.102.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.102.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.103.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.103.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.103.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.103.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.103.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.103.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.104.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.104.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.104.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.104.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.104.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.104.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.105.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.105.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.105.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.105.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.105.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.105.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.106.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.106.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.106.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.106.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.106.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.106.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.107.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.107.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.107.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.107.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.107.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.107.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.108.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.108.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.108.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.108.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.108.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.108.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.109.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.109.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.109.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.109.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.109.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.109.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.11.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.11.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.11.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.11.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.11.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.11.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.110.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.110.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.110.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.110.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.110.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.110.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.111.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.111.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.111.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.111.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.111.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.111.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.112.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.112.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.112.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.112.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.112.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.112.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.113.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.113.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.113.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.113.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.113.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.113.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.114.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.114.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.114.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.114.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.114.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.114.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.115.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.115.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.115.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.115.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.115.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.115.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.116.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.116.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.116.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.116.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.116.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.116.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.117.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.117.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.117.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.117.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.117.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.117.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.118.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.118.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.118.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.118.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.118.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.118.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.119.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.119.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.119.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.119.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.119.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.119.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.12.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.12.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.12.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.12.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.12.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.12.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.120.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.120.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.120.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.120.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.120.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.120.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.121.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.121.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.121.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.121.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.121.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.121.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.122.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.122.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.122.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.122.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.122.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.122.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.123.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.123.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.123.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.123.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.123.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.123.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.124.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.124.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.124.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.124.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.124.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.124.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.125.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.125.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.125.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.125.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.125.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.125.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.126.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.126.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.126.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.126.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.126.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.126.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.127.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.127.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.127.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.127.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.127.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.127.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.13.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.13.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.13.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.13.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.13.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.13.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.14.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.14.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.14.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.14.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.14.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.14.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.15.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.15.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.15.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.15.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.15.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.15.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.16.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.16.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.16.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.16.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.16.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.16.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.17.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.17.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.17.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.17.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.17.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.17.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.18.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.18.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.18.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.18.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.18.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.18.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.19.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.19.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.19.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.19.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.19.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.19.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.2.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.2.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.2.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.2.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.2.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.2.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.20.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.20.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.20.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.20.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.20.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.20.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.21.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.21.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.21.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.21.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.21.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.21.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.22.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.22.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.22.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.22.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.22.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.22.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.23.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.23.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.23.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.23.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.23.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.23.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.24.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.24.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.24.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.24.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.24.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.24.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.25.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.25.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.25.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.25.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.25.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.25.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.26.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.26.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.26.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.26.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.26.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.26.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.27.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.27.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.27.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.27.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.27.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.27.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.28.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.28.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.28.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.28.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.28.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.28.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.29.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.29.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.29.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.29.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.29.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.29.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.3.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.3.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.3.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.3.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.3.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.3.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.30.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.30.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.30.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.30.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.30.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.30.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.31.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.31.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.31.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.31.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.31.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.31.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.32.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.32.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.32.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.32.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.32.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.32.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.33.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.33.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.33.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.33.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.33.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.33.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.34.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.34.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.34.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.34.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.34.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.34.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.35.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.35.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.35.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.35.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.35.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.35.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.36.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.36.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.36.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.36.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.36.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.36.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.37.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.37.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.37.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.37.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.37.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.37.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.38.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.38.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.38.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.38.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.38.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.38.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.39.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.39.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.39.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.39.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.39.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.39.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.4.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.4.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.4.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.4.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.4.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.4.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.40.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.40.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.40.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.40.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.40.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.40.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.41.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.41.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.41.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.41.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.41.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.41.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.42.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.42.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.42.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.42.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.42.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.42.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.43.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.43.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.43.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.43.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.43.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.43.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.44.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.44.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.44.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.44.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.44.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.44.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.45.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.45.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.45.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.45.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.45.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.45.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.46.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.46.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.46.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.46.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.46.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.46.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.47.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.47.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.47.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.47.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.47.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.47.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.48.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.48.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.48.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.48.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.48.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.48.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.49.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.49.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.49.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.49.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.49.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.49.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.5.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.5.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.5.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.5.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.5.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.5.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.50.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.50.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.50.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.50.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.50.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.50.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.51.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.51.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.51.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.51.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.51.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.51.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.52.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.52.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.52.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.52.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.52.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.52.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.53.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.53.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.53.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.53.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.53.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.53.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.54.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.54.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.54.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.54.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.54.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.54.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.55.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.55.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.55.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.55.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.55.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.55.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.56.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.56.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.56.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.56.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.56.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.56.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.57.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.57.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.57.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.57.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.57.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.57.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.58.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.58.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.58.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.58.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.58.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.58.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.59.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.59.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.59.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.59.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.59.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.59.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.6.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.6.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.6.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.6.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.6.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.6.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.60.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.60.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.60.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.60.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.60.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.60.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.61.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.61.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.61.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.61.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.61.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.61.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.62.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.62.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.62.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.62.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.62.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.62.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.63.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.63.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.63.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.63.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.63.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.63.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.64.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.64.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.64.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.64.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.64.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.64.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.65.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.65.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.65.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.65.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.65.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.65.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.66.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.66.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.66.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.66.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.66.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.66.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.67.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.67.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.67.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.67.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.67.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.67.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.68.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.68.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.68.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.68.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.68.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.68.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.69.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.69.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.69.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.69.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.69.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.69.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.7.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.7.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.7.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.7.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.7.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.7.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.70.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.70.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.70.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.70.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.70.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.70.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.71.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.71.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.71.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.71.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.71.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.71.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.72.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.72.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.72.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.72.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.72.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.72.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.73.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.73.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.73.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.73.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.73.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.73.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.74.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.74.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.74.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.74.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.74.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.74.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.75.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.75.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.75.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.75.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.75.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.75.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.76.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.76.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.76.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.76.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.76.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.76.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.77.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.77.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.77.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.77.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.77.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.77.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.78.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.78.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.78.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.78.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.78.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.78.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.79.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.79.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.79.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.79.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.79.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.79.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.8.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.8.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.8.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.8.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.8.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.8.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.80.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.80.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.80.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.80.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.80.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.80.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.81.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.81.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.81.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.81.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.81.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.81.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.82.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.82.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.82.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.82.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.82.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.82.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.83.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.83.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.83.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.83.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.83.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.83.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.84.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.84.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.84.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.84.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.84.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.84.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.85.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.85.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.85.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.85.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.85.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.85.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.86.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.86.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.86.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.86.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.86.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.86.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.87.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.87.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.87.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.87.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.87.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.87.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.88.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.88.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.88.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.88.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.88.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.88.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.89.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.89.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.89.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.89.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.89.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.89.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.9.down_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.9.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.9.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.9.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.9.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.9.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.90.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.90.down_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.90.gate_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.90.gate_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.90.up_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.90.up_proj.weight_scale": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.91.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.91.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.91.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.91.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.91.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.91.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.92.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.92.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.92.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.92.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.92.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.92.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.93.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.93.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.93.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.93.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.93.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.93.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.94.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.94.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.94.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.94.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.94.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.94.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.95.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.95.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.95.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.95.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.95.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.95.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.96.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.96.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.96.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.96.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.96.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.96.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.97.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.97.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.97.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.97.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.97.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.97.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.98.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.98.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.98.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.98.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.98.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.98.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.99.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.99.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.99.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.99.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.99.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.gate.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.mlp.shared_experts.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.shared_experts.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.shared_experts.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.shared_experts.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.shared_experts.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.mlp.shared_experts.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.1.self_attn.k_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.self_attn.o_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.self_attn.q_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.1.self_attn.v_proj.weight": "model-00003-of-00046.safetensors", + "model.language_model.layers.10.input_layernorm.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.0.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.0.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.0.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.0.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.0.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.0.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.1.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.1.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.1.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.1.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.1.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.1.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.10.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.10.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.10.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.10.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.10.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.10.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.100.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.100.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.100.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.100.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.100.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.100.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.101.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.101.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.101.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.101.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.101.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.101.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.102.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.102.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.102.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.102.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.102.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.102.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.103.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.103.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.103.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.103.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.103.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.103.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.104.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.104.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.104.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.104.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.104.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.104.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.105.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.105.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.105.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.105.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.105.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.105.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.106.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.106.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.106.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.106.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.106.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.106.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.107.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.107.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.107.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.107.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.107.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.107.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.108.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.108.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.108.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.108.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.108.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.108.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.109.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.109.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.109.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.109.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.109.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.109.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.11.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.11.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.11.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.11.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.11.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.11.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.110.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.110.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.110.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.110.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.110.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.110.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.111.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.111.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.111.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.111.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.111.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.111.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.112.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.112.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.112.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.112.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.112.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.112.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.113.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.113.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.113.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.113.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.113.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.113.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.114.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.114.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.114.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.114.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.114.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.114.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.115.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.115.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.115.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.115.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.115.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.115.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.116.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.116.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.116.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.116.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.116.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.116.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.117.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.117.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.117.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.117.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.117.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.117.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.118.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.118.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.118.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.118.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.118.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.118.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.119.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.119.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.119.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.119.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.119.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.119.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.12.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.12.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.12.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.12.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.12.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.12.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.120.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.120.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.120.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.120.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.120.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.120.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.121.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.121.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.121.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.121.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.121.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.121.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.122.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.122.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.122.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.122.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.122.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.122.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.123.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.123.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.123.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.123.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.123.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.123.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.124.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.124.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.124.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.124.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.124.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.124.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.125.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.125.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.125.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.125.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.125.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.125.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.126.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.126.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.126.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.126.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.126.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.126.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.127.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.127.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.127.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.127.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.127.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.127.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.13.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.13.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.13.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.13.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.13.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.13.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.14.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.14.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.14.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.14.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.14.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.14.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.15.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.15.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.15.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.15.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.15.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.15.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.16.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.16.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.16.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.16.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.16.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.16.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.17.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.17.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.17.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.17.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.17.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.17.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.18.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.18.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.18.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.18.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.18.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.18.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.19.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.19.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.19.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.19.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.19.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.19.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.2.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.2.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.2.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.2.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.2.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.2.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.20.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.20.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.20.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.20.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.20.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.20.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.21.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.21.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.21.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.21.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.21.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.21.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.22.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.22.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.22.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.22.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.22.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.22.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.23.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.23.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.23.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.23.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.23.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.23.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.24.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.24.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.24.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.24.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.24.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.24.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.25.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.25.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.25.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.25.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.25.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.25.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.26.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.26.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.26.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.26.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.26.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.26.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.27.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.27.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.27.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.27.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.27.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.27.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.28.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.28.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.28.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.28.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.28.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.28.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.29.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.29.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.29.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.29.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.29.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.29.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.3.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.3.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.3.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.3.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.3.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.3.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.30.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.30.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.30.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.30.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.30.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.30.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.31.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.31.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.31.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.31.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.31.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.31.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.32.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.32.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.32.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.32.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.32.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.32.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.33.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.33.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.33.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.33.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.33.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.33.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.34.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.34.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.34.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.34.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.34.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.34.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.35.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.35.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.35.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.35.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.35.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.35.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.36.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.36.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.36.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.36.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.36.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.36.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.37.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.37.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.37.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.37.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.37.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.37.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.38.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.38.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.38.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.38.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.38.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.38.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.39.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.39.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.39.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.39.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.39.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.39.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.4.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.4.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.4.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.4.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.4.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.4.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.40.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.40.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.40.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.40.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.40.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.40.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.41.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.41.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.41.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.41.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.41.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.41.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.42.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.42.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.42.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.42.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.42.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.42.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.43.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.43.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.43.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.43.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.43.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.43.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.44.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.44.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.44.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.44.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.44.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.44.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.45.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.45.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.45.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.45.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.45.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.45.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.46.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.46.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.46.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.46.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.46.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.46.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.47.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.47.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.47.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.47.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.47.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.47.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.48.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.48.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.48.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.48.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.48.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.48.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.49.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.49.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.49.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.49.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.49.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.49.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.5.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.5.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.5.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.5.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.5.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.5.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.50.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.50.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.50.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.50.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.50.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.50.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.51.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.51.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.51.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.51.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.51.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.51.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.52.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.52.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.52.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.52.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.52.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.52.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.53.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.53.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.53.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.53.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.53.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.53.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.54.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.54.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.54.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.54.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.54.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.54.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.55.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.55.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.55.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.55.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.55.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.55.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.56.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.56.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.56.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.56.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.56.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.56.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.57.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.57.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.57.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.57.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.57.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.57.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.58.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.58.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.58.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.58.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.58.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.58.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.59.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.59.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.59.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.59.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.59.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.59.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.6.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.6.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.6.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.6.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.6.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.6.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.60.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.60.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.60.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.60.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.60.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.60.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.61.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.61.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.61.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.61.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.61.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.61.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.62.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.62.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.62.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.62.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.62.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.62.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.63.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.63.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.63.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.63.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.63.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.63.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.64.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.64.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.64.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.64.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.64.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.64.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.65.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.65.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.65.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.65.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.65.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.65.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.66.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.66.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.66.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.66.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.66.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.66.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.67.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.67.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.67.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.67.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.67.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.67.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.68.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.68.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.68.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.68.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.68.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.68.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.69.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.69.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.69.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.69.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.69.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.69.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.7.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.7.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.7.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.7.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.7.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.7.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.70.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.70.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.70.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.70.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.70.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.70.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.71.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.71.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.71.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.71.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.71.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.71.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.72.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.72.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.72.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.72.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.72.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.72.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.73.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.73.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.73.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.73.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.73.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.73.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.74.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.74.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.74.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.74.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.74.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.74.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.75.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.75.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.75.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.75.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.75.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.75.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.76.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.76.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.76.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.76.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.76.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.76.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.77.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.77.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.77.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.77.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.77.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.77.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.78.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.78.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.78.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.78.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.78.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.78.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.79.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.79.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.79.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.79.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.79.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.79.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.8.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.8.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.8.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.8.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.8.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.8.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.80.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.80.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.80.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.80.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.80.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.80.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.81.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.81.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.81.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.81.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.81.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.81.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.82.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.82.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.82.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.82.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.82.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.82.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.83.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.83.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.83.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.83.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.83.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.83.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.84.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.84.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.84.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.84.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.84.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.84.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.85.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.85.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.85.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.85.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.85.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.85.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.86.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.86.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.86.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.86.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.86.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.86.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.87.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.87.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.87.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.87.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.87.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.87.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.88.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.88.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.88.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.88.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.88.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.88.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.89.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.89.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.89.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.89.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.89.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.89.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.9.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.9.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.9.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.9.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.9.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.9.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.90.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.90.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.90.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.90.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.90.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.90.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.91.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.91.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.91.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.91.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.91.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.91.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.92.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.92.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.92.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.92.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.92.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.92.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.93.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.93.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.93.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.93.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.93.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.93.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.94.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.94.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.94.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.94.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.94.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.94.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.95.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.95.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.95.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.95.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.95.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.95.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.96.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.96.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.96.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.96.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.96.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.96.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.97.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.97.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.97.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.97.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.97.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.97.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.98.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.98.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.98.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.98.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.98.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.98.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.99.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.99.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.99.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.99.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.99.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.experts.99.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.gate.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.mlp.shared_experts.down_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.shared_experts.down_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.shared_experts.gate_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.shared_experts.gate_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.shared_experts.up_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.mlp.shared_experts.up_proj.weight_scale": "model-00016-of-00046.safetensors", + "model.language_model.layers.10.self_attn.k_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.self_attn.o_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.self_attn.q_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.10.self_attn.v_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.11.input_layernorm.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.0.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.0.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.0.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.0.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.0.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.0.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.1.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.1.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.1.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.1.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.1.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.1.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.10.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.10.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.10.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.10.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.10.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.10.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.100.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.100.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.100.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.100.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.100.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.100.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.101.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.101.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.101.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.101.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.101.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.101.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.102.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.102.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.102.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.102.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.102.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.102.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.103.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.103.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.103.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.103.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.103.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.103.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.104.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.104.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.104.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.104.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.104.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.104.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.105.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.105.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.105.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.105.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.105.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.105.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.106.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.106.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.106.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.106.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.106.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.106.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.107.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.107.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.107.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.107.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.107.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.107.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.108.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.108.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.108.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.108.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.108.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.108.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.109.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.109.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.109.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.109.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.109.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.109.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.11.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.11.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.11.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.11.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.11.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.11.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.110.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.110.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.110.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.110.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.110.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.110.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.111.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.111.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.111.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.111.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.111.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.111.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.112.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.112.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.112.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.112.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.112.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.112.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.113.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.113.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.113.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.113.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.113.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.113.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.114.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.114.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.114.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.114.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.114.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.114.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.115.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.115.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.115.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.115.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.115.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.115.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.116.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.116.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.116.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.116.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.116.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.116.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.117.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.117.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.117.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.117.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.117.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.117.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.118.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.118.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.118.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.118.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.118.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.118.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.119.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.119.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.119.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.119.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.119.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.119.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.12.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.12.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.12.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.12.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.12.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.12.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.120.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.120.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.120.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.120.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.120.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.120.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.121.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.121.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.121.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.121.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.121.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.121.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.122.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.122.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.122.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.122.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.122.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.122.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.123.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.123.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.123.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.123.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.123.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.123.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.124.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.124.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.124.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.124.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.124.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.124.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.125.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.125.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.125.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.125.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.125.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.125.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.126.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.126.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.126.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.126.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.126.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.126.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.127.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.127.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.127.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.127.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.127.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.127.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.13.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.13.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.13.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.13.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.13.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.13.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.14.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.14.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.14.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.14.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.14.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.14.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.15.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.15.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.15.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.15.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.15.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.15.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.16.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.16.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.16.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.16.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.16.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.16.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.17.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.17.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.17.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.17.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.17.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.17.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.18.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.18.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.18.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.18.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.18.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.18.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.19.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.19.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.19.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.19.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.19.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.19.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.2.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.2.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.2.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.2.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.2.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.2.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.20.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.20.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.20.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.20.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.20.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.20.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.21.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.21.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.21.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.21.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.21.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.21.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.22.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.22.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.22.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.22.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.22.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.22.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.23.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.23.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.23.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.23.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.23.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.23.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.24.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.24.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.24.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.24.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.24.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.24.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.25.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.25.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.25.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.25.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.25.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.25.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.26.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.26.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.26.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.26.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.26.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.26.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.27.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.27.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.27.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.27.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.27.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.27.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.28.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.28.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.28.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.28.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.28.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.28.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.29.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.29.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.29.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.29.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.29.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.29.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.3.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.3.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.3.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.3.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.3.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.3.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.30.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.30.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.30.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.30.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.30.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.30.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.31.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.31.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.31.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.31.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.31.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.31.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.32.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.32.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.32.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.32.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.32.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.32.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.33.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.33.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.33.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.33.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.33.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.33.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.34.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.34.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.34.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.34.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.34.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.34.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.35.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.35.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.35.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.35.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.35.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.35.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.36.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.36.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.36.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.36.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.36.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.36.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.37.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.37.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.37.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.37.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.37.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.37.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.38.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.38.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.38.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.38.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.38.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.38.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.39.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.39.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.39.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.39.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.39.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.39.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.4.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.4.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.4.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.4.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.4.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.4.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.40.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.40.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.40.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.40.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.40.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.40.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.41.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.41.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.41.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.41.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.41.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.41.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.42.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.42.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.42.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.42.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.42.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.42.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.43.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.43.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.43.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.43.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.43.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.43.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.44.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.44.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.44.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.44.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.44.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.44.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.45.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.45.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.45.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.45.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.45.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.45.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.46.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.46.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.46.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.46.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.46.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.46.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.47.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.47.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.47.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.47.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.47.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.47.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.48.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.48.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.48.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.48.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.48.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.48.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.49.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.49.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.49.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.49.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.49.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.49.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.5.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.5.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.5.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.5.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.5.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.5.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.50.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.50.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.50.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.50.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.50.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.50.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.51.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.51.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.51.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.51.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.51.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.51.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.52.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.52.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.52.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.52.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.52.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.52.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.53.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.53.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.53.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.53.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.53.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.53.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.54.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.54.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.54.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.54.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.54.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.54.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.55.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.55.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.55.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.55.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.55.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.55.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.56.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.56.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.56.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.56.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.56.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.56.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.57.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.57.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.57.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.57.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.57.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.57.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.58.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.58.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.58.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.58.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.58.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.58.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.59.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.59.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.59.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.59.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.59.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.59.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.6.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.6.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.6.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.6.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.6.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.6.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.60.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.60.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.60.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.60.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.60.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.60.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.61.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.61.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.61.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.61.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.61.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.61.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.62.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.62.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.62.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.62.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.62.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.62.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.63.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.63.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.63.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.63.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.63.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.63.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.64.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.64.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.64.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.64.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.64.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.64.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.65.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.65.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.65.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.65.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.65.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.65.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.66.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.66.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.66.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.66.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.66.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.66.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.67.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.67.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.67.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.67.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.67.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.67.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.68.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.68.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.68.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.68.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.68.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.68.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.69.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.69.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.69.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.69.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.69.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.69.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.7.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.7.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.7.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.7.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.7.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.7.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.70.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.70.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.70.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.70.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.70.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.70.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.71.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.71.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.71.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.71.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.71.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.71.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.72.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.72.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.72.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.72.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.72.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.72.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.73.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.73.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.73.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.73.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.73.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.73.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.74.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.74.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.74.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.74.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.74.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.74.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.75.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.75.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.75.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.75.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.75.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.75.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.76.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.76.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.76.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.76.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.76.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.76.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.77.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.77.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.77.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.77.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.77.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.77.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.78.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.78.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.78.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.78.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.78.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.78.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.79.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.79.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.79.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.79.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.79.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.79.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.8.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.8.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.8.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.8.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.8.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.8.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.80.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.80.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.80.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.80.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.80.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.80.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.81.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.81.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.81.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.81.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.81.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.81.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.82.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.82.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.82.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.82.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.82.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.82.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.83.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.83.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.83.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.83.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.83.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.83.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.84.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.84.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.84.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.84.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.84.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.84.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.85.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.85.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.85.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.85.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.85.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.85.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.86.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.86.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.86.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.86.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.86.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.86.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.87.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.87.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.87.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.87.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.87.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.87.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.88.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.88.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.88.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.88.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.88.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.88.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.89.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.89.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.89.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.89.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.89.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.89.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.9.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.9.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.9.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.9.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.9.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.9.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.90.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.90.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.90.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.90.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.90.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.90.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.91.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.91.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.91.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.91.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.91.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.91.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.92.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.92.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.92.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.92.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.92.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.92.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.93.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.93.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.93.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.93.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.93.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.93.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.94.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.94.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.94.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.94.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.94.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.94.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.95.down_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.95.down_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.95.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.95.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.95.up_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.95.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.96.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.96.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.96.gate_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.96.gate_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.96.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.96.up_proj.weight_scale": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.97.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.97.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.97.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.97.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.97.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.97.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.98.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.98.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.98.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.98.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.98.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.98.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.99.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.99.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.99.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.99.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.99.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.experts.99.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.gate.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.mlp.shared_experts.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.shared_experts.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.shared_experts.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.shared_experts.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.shared_experts.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.mlp.shared_experts.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.11.self_attn.k_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.11.self_attn.o_proj.weight": "model-00017-of-00046.safetensors", + "model.language_model.layers.11.self_attn.q_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.11.self_attn.v_proj.weight": "model-00016-of-00046.safetensors", + "model.language_model.layers.12.input_layernorm.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.0.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.0.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.0.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.0.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.0.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.0.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.1.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.1.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.1.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.1.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.1.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.1.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.10.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.10.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.10.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.10.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.10.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.10.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.100.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.100.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.100.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.100.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.100.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.100.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.101.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.101.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.101.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.101.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.101.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.101.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.102.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.102.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.102.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.102.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.102.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.102.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.103.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.103.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.103.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.103.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.103.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.103.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.104.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.104.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.104.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.104.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.104.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.104.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.105.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.105.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.105.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.105.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.105.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.105.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.106.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.106.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.106.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.106.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.106.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.106.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.107.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.107.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.107.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.107.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.107.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.107.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.108.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.108.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.108.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.108.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.108.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.108.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.109.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.109.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.109.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.109.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.109.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.109.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.11.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.11.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.11.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.11.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.11.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.11.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.110.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.110.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.110.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.110.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.110.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.110.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.111.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.111.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.111.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.111.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.111.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.111.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.112.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.112.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.112.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.112.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.112.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.112.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.113.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.113.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.113.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.113.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.113.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.113.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.114.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.114.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.114.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.114.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.114.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.114.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.115.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.115.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.115.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.115.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.115.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.115.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.116.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.116.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.116.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.116.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.116.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.116.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.117.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.117.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.117.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.117.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.117.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.117.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.118.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.118.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.118.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.118.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.118.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.118.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.119.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.119.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.119.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.119.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.119.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.119.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.12.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.12.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.12.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.12.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.12.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.12.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.120.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.120.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.120.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.120.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.120.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.120.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.121.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.121.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.121.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.121.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.121.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.121.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.122.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.122.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.122.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.122.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.122.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.122.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.123.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.123.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.123.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.123.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.123.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.123.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.124.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.124.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.124.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.124.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.124.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.124.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.125.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.125.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.125.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.125.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.125.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.125.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.126.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.126.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.126.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.126.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.126.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.126.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.127.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.127.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.127.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.127.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.127.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.127.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.13.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.13.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.13.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.13.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.13.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.13.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.14.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.14.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.14.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.14.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.14.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.14.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.15.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.15.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.15.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.15.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.15.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.15.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.16.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.16.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.16.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.16.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.16.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.16.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.17.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.17.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.17.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.17.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.17.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.17.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.18.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.18.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.18.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.18.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.18.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.18.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.19.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.19.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.19.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.19.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.19.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.19.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.2.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.2.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.2.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.2.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.2.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.2.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.20.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.20.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.20.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.20.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.20.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.20.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.21.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.21.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.21.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.21.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.21.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.21.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.22.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.22.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.22.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.22.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.22.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.22.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.23.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.23.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.23.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.23.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.23.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.23.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.24.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.24.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.24.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.24.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.24.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.24.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.25.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.25.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.25.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.25.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.25.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.25.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.26.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.26.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.26.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.26.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.26.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.26.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.27.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.27.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.27.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.27.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.27.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.27.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.28.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.28.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.28.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.28.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.28.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.28.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.29.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.29.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.29.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.29.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.29.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.29.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.3.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.3.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.3.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.3.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.3.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.3.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.30.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.30.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.30.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.30.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.30.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.30.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.31.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.31.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.31.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.31.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.31.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.31.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.32.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.32.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.32.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.32.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.32.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.32.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.33.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.33.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.33.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.33.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.33.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.33.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.34.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.34.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.34.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.34.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.34.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.34.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.35.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.35.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.35.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.35.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.35.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.35.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.36.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.36.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.36.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.36.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.36.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.36.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.37.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.37.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.37.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.37.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.37.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.37.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.38.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.38.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.38.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.38.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.38.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.38.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.39.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.39.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.39.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.39.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.39.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.39.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.4.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.4.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.4.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.4.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.4.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.4.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.40.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.40.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.40.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.40.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.40.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.40.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.41.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.41.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.41.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.41.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.41.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.41.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.42.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.42.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.42.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.42.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.42.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.42.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.43.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.43.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.43.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.43.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.43.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.43.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.44.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.44.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.44.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.44.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.44.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.44.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.45.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.45.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.45.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.45.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.45.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.45.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.46.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.46.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.46.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.46.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.46.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.46.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.47.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.47.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.47.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.47.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.47.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.47.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.48.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.48.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.48.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.48.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.48.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.48.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.49.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.49.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.49.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.49.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.49.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.49.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.5.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.5.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.5.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.5.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.5.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.5.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.50.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.50.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.50.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.50.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.50.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.50.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.51.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.51.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.51.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.51.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.51.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.51.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.52.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.52.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.52.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.52.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.52.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.52.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.53.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.53.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.53.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.53.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.53.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.53.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.54.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.54.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.54.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.54.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.54.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.54.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.55.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.55.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.55.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.55.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.55.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.55.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.56.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.56.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.56.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.56.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.56.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.56.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.57.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.57.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.57.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.57.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.57.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.57.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.58.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.58.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.58.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.58.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.58.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.58.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.59.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.59.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.59.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.59.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.59.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.59.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.6.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.6.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.6.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.6.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.6.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.6.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.60.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.60.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.60.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.60.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.60.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.60.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.61.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.61.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.61.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.61.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.61.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.61.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.62.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.62.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.62.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.62.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.62.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.62.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.63.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.63.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.63.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.63.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.63.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.63.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.64.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.64.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.64.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.64.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.64.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.64.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.65.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.65.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.65.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.65.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.65.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.65.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.66.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.66.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.66.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.66.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.66.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.66.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.67.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.67.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.67.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.67.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.67.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.67.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.68.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.68.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.68.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.68.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.68.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.68.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.69.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.69.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.69.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.69.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.69.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.69.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.7.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.7.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.7.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.7.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.7.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.7.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.70.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.70.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.70.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.70.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.70.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.70.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.71.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.71.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.71.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.71.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.71.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.71.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.72.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.72.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.72.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.72.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.72.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.72.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.73.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.73.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.73.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.73.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.73.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.73.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.74.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.74.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.74.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.74.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.74.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.74.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.75.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.75.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.75.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.75.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.75.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.75.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.76.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.76.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.76.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.76.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.76.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.76.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.77.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.77.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.77.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.77.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.77.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.77.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.78.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.78.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.78.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.78.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.78.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.78.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.79.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.79.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.79.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.79.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.79.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.79.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.8.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.8.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.8.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.8.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.8.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.8.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.80.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.80.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.80.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.80.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.80.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.80.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.81.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.81.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.81.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.81.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.81.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.81.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.82.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.82.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.82.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.82.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.82.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.82.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.83.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.83.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.83.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.83.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.83.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.83.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.84.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.84.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.84.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.84.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.84.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.84.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.85.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.85.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.85.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.85.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.85.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.85.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.86.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.86.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.86.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.86.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.86.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.86.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.87.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.87.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.87.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.87.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.87.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.87.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.88.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.88.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.88.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.88.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.88.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.88.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.89.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.89.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.89.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.89.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.89.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.89.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.9.down_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.9.down_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.9.gate_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.9.gate_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.9.up_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.9.up_proj.weight_scale": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.90.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.90.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.90.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.90.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.90.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.90.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.91.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.91.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.91.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.91.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.91.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.91.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.92.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.92.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.92.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.92.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.92.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.92.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.93.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.93.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.93.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.93.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.93.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.93.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.94.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.94.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.94.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.94.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.94.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.94.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.95.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.95.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.95.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.95.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.95.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.95.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.96.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.96.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.96.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.96.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.96.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.96.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.97.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.97.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.97.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.97.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.97.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.97.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.98.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.98.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.98.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.98.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.98.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.98.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.99.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.99.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.99.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.99.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.99.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.experts.99.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.gate.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.mlp.shared_experts.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.shared_experts.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.shared_experts.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.shared_experts.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.shared_experts.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.mlp.shared_experts.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.12.self_attn.k_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.self_attn.o_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.self_attn.q_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.12.self_attn.v_proj.weight": "model-00018-of-00046.safetensors", + "model.language_model.layers.13.input_layernorm.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.0.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.0.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.0.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.0.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.0.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.0.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.1.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.1.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.1.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.1.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.1.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.1.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.10.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.10.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.10.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.10.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.10.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.10.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.100.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.100.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.100.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.100.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.100.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.100.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.101.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.101.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.101.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.101.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.101.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.101.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.102.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.102.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.102.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.102.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.102.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.102.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.103.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.103.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.103.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.103.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.103.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.103.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.104.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.104.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.104.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.104.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.104.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.104.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.105.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.105.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.105.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.105.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.105.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.105.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.106.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.106.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.106.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.106.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.106.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.106.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.107.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.107.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.107.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.107.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.107.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.107.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.108.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.108.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.108.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.108.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.108.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.108.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.109.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.109.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.109.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.109.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.109.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.109.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.11.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.11.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.11.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.11.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.11.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.11.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.110.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.110.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.110.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.110.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.110.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.110.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.111.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.111.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.111.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.111.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.111.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.111.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.112.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.112.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.112.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.112.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.112.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.112.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.113.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.113.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.113.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.113.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.113.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.113.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.114.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.114.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.114.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.114.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.114.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.114.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.115.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.115.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.115.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.115.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.115.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.115.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.116.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.116.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.116.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.116.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.116.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.116.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.117.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.117.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.117.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.117.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.117.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.117.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.118.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.118.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.118.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.118.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.118.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.118.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.119.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.119.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.119.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.119.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.119.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.119.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.12.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.12.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.12.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.12.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.12.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.12.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.120.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.120.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.120.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.120.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.120.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.120.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.121.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.121.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.121.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.121.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.121.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.121.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.122.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.122.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.122.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.122.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.122.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.122.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.123.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.123.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.123.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.123.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.123.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.123.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.124.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.124.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.124.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.124.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.124.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.124.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.125.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.125.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.125.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.125.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.125.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.125.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.126.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.126.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.126.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.126.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.126.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.126.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.127.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.127.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.127.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.127.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.127.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.127.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.13.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.13.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.13.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.13.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.13.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.13.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.14.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.14.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.14.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.14.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.14.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.14.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.15.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.15.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.15.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.15.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.15.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.15.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.16.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.16.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.16.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.16.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.16.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.16.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.17.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.17.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.17.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.17.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.17.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.17.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.18.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.18.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.18.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.18.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.18.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.18.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.19.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.19.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.19.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.19.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.19.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.19.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.2.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.2.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.2.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.2.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.2.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.2.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.20.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.20.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.20.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.20.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.20.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.20.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.21.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.21.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.21.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.21.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.21.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.21.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.22.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.22.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.22.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.22.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.22.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.22.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.23.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.23.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.23.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.23.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.23.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.23.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.24.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.24.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.24.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.24.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.24.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.24.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.25.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.25.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.25.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.25.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.25.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.25.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.26.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.26.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.26.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.26.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.26.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.26.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.27.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.27.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.27.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.27.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.27.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.27.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.28.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.28.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.28.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.28.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.28.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.28.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.29.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.29.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.29.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.29.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.29.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.29.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.3.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.3.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.3.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.3.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.3.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.3.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.30.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.30.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.30.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.30.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.30.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.30.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.31.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.31.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.31.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.31.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.31.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.31.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.32.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.32.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.32.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.32.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.32.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.32.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.33.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.33.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.33.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.33.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.33.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.33.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.34.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.34.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.34.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.34.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.34.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.34.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.35.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.35.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.35.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.35.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.35.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.35.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.36.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.36.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.36.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.36.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.36.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.36.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.37.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.37.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.37.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.37.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.37.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.37.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.38.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.38.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.38.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.38.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.38.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.38.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.39.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.39.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.39.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.39.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.39.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.39.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.4.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.4.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.4.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.4.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.4.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.4.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.40.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.40.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.40.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.40.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.40.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.40.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.41.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.41.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.41.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.41.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.41.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.41.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.42.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.42.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.42.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.42.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.42.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.42.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.43.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.43.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.43.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.43.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.43.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.43.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.44.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.44.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.44.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.44.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.44.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.44.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.45.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.45.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.45.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.45.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.45.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.45.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.46.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.46.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.46.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.46.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.46.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.46.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.47.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.47.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.47.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.47.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.47.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.47.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.48.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.48.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.48.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.48.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.48.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.48.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.49.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.49.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.49.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.49.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.49.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.49.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.5.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.5.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.5.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.5.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.5.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.5.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.50.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.50.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.50.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.50.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.50.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.50.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.51.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.51.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.51.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.51.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.51.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.51.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.52.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.52.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.52.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.52.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.52.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.52.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.53.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.53.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.53.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.53.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.53.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.53.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.54.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.54.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.54.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.54.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.54.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.54.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.55.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.55.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.55.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.55.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.55.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.55.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.56.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.56.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.56.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.56.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.56.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.56.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.57.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.57.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.57.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.57.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.57.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.57.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.58.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.58.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.58.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.58.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.58.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.58.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.59.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.59.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.59.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.59.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.59.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.59.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.6.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.6.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.6.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.6.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.6.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.6.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.60.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.60.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.60.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.60.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.60.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.60.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.61.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.61.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.61.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.61.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.61.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.61.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.62.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.62.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.62.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.62.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.62.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.62.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.63.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.63.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.63.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.63.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.63.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.63.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.64.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.64.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.64.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.64.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.64.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.64.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.65.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.65.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.65.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.65.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.65.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.65.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.66.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.66.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.66.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.66.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.66.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.66.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.67.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.67.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.67.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.67.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.67.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.67.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.68.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.68.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.68.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.68.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.68.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.68.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.69.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.69.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.69.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.69.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.69.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.69.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.7.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.7.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.7.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.7.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.7.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.7.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.70.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.70.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.70.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.70.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.70.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.70.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.71.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.71.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.71.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.71.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.71.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.71.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.72.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.72.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.72.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.72.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.72.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.72.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.73.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.73.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.73.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.73.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.73.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.73.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.74.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.74.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.74.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.74.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.74.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.74.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.75.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.75.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.75.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.75.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.75.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.75.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.76.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.76.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.76.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.76.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.76.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.76.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.77.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.77.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.77.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.77.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.77.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.77.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.78.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.78.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.78.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.78.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.78.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.78.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.79.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.79.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.79.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.79.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.79.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.79.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.8.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.8.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.8.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.8.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.8.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.8.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.80.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.80.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.80.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.80.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.80.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.80.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.81.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.81.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.81.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.81.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.81.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.81.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.82.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.82.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.82.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.82.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.82.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.82.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.83.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.83.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.83.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.83.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.83.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.83.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.84.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.84.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.84.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.84.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.84.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.84.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.85.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.85.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.85.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.85.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.85.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.85.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.86.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.86.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.86.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.86.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.86.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.86.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.87.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.87.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.87.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.87.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.87.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.87.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.88.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.88.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.88.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.88.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.88.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.88.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.89.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.89.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.89.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.89.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.89.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.89.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.9.down_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.9.down_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.9.gate_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.9.gate_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.9.up_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.9.up_proj.weight_scale": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.90.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.90.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.90.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.90.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.90.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.90.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.91.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.91.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.91.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.91.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.91.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.91.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.92.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.92.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.92.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.92.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.92.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.92.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.93.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.93.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.93.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.93.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.93.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.93.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.94.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.94.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.94.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.94.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.94.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.94.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.95.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.95.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.95.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.95.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.95.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.95.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.96.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.96.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.96.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.96.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.96.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.96.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.97.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.97.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.97.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.97.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.97.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.97.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.98.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.98.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.98.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.98.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.98.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.98.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.99.down_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.99.down_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.99.gate_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.99.gate_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.99.up_proj.weight": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.experts.99.up_proj.weight_scale": "model-00020-of-00046.safetensors", + "model.language_model.layers.13.mlp.gate.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.mlp.shared_experts.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.shared_experts.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.shared_experts.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.shared_experts.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.shared_experts.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.mlp.shared_experts.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.13.self_attn.k_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.self_attn.o_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.self_attn.q_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.13.self_attn.v_proj.weight": "model-00019-of-00046.safetensors", + "model.language_model.layers.14.input_layernorm.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.0.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.0.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.0.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.0.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.0.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.0.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.1.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.1.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.1.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.1.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.1.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.1.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.10.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.10.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.10.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.10.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.10.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.10.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.100.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.100.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.100.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.100.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.100.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.100.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.101.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.101.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.101.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.101.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.101.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.101.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.102.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.102.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.102.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.102.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.102.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.102.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.103.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.103.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.103.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.103.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.103.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.103.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.104.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.104.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.104.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.104.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.104.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.104.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.105.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.105.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.105.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.105.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.105.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.105.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.106.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.106.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.106.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.106.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.106.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.106.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.107.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.107.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.107.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.107.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.107.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.107.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.108.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.108.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.108.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.108.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.108.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.108.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.109.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.109.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.109.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.109.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.109.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.109.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.11.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.11.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.11.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.11.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.11.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.11.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.110.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.110.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.110.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.110.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.110.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.110.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.111.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.111.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.111.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.111.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.111.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.111.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.112.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.112.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.112.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.112.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.112.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.112.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.113.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.113.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.113.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.113.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.113.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.113.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.114.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.114.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.114.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.114.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.114.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.114.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.115.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.115.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.115.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.115.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.115.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.115.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.116.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.116.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.116.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.116.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.116.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.116.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.117.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.117.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.117.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.117.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.117.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.117.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.118.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.118.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.118.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.118.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.118.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.118.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.119.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.119.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.119.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.119.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.119.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.119.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.12.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.12.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.12.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.12.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.12.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.12.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.120.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.120.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.120.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.120.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.120.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.120.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.121.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.121.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.121.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.121.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.121.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.121.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.122.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.122.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.122.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.122.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.122.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.122.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.123.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.123.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.123.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.123.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.123.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.123.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.124.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.124.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.124.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.124.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.124.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.124.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.125.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.125.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.125.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.125.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.125.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.125.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.126.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.126.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.126.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.126.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.126.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.126.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.127.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.127.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.127.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.127.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.127.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.127.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.13.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.13.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.13.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.13.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.13.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.13.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.14.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.14.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.14.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.14.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.14.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.14.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.15.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.15.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.15.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.15.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.15.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.15.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.16.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.16.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.16.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.16.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.16.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.16.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.17.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.17.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.17.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.17.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.17.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.17.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.18.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.18.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.18.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.18.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.18.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.18.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.19.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.19.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.19.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.19.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.19.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.19.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.2.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.2.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.2.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.2.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.2.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.2.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.20.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.20.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.20.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.20.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.20.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.20.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.21.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.21.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.21.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.21.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.21.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.21.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.22.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.22.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.22.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.22.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.22.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.22.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.23.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.23.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.23.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.23.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.23.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.23.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.24.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.24.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.24.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.24.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.24.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.24.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.25.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.25.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.25.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.25.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.25.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.25.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.26.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.26.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.26.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.26.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.26.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.26.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.27.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.27.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.27.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.27.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.27.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.27.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.28.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.28.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.28.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.28.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.28.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.28.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.29.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.29.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.29.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.29.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.29.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.29.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.3.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.3.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.3.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.3.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.3.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.3.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.30.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.30.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.30.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.30.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.30.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.30.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.31.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.31.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.31.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.31.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.31.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.31.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.32.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.32.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.32.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.32.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.32.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.32.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.33.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.33.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.33.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.33.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.33.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.33.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.34.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.34.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.34.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.34.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.34.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.34.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.35.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.35.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.35.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.35.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.35.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.35.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.36.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.36.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.36.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.36.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.36.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.36.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.37.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.37.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.37.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.37.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.37.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.37.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.38.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.38.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.38.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.38.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.38.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.38.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.39.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.39.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.39.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.39.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.39.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.39.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.4.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.4.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.4.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.4.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.4.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.4.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.40.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.40.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.40.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.40.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.40.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.40.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.41.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.41.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.41.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.41.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.41.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.41.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.42.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.42.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.42.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.42.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.42.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.42.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.43.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.43.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.43.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.43.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.43.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.43.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.44.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.44.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.44.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.44.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.44.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.44.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.45.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.45.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.45.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.45.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.45.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.45.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.46.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.46.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.46.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.46.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.46.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.46.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.47.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.47.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.47.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.47.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.47.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.47.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.48.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.48.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.48.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.48.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.48.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.48.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.49.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.49.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.49.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.49.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.49.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.49.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.5.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.5.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.5.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.5.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.5.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.5.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.50.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.50.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.50.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.50.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.50.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.50.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.51.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.51.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.51.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.51.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.51.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.51.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.52.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.52.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.52.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.52.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.52.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.52.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.53.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.53.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.53.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.53.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.53.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.53.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.54.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.54.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.54.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.54.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.54.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.54.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.55.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.55.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.55.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.55.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.55.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.55.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.56.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.56.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.56.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.56.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.56.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.56.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.57.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.57.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.57.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.57.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.57.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.57.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.58.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.58.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.58.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.58.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.58.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.58.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.59.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.59.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.59.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.59.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.59.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.59.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.6.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.6.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.6.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.6.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.6.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.6.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.60.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.60.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.60.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.60.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.60.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.60.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.61.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.61.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.61.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.61.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.61.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.61.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.62.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.62.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.62.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.62.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.62.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.62.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.63.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.63.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.63.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.63.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.63.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.63.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.64.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.64.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.64.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.64.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.64.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.64.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.65.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.65.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.65.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.65.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.65.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.65.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.66.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.66.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.66.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.66.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.66.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.66.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.67.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.67.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.67.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.67.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.67.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.67.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.68.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.68.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.68.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.68.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.68.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.68.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.69.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.69.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.69.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.69.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.69.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.69.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.7.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.7.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.7.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.7.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.7.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.7.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.70.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.70.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.70.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.70.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.70.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.70.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.71.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.71.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.71.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.71.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.71.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.71.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.72.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.72.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.72.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.72.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.72.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.72.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.73.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.73.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.73.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.73.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.73.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.73.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.74.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.74.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.74.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.74.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.74.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.74.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.75.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.75.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.75.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.75.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.75.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.75.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.76.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.76.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.76.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.76.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.76.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.76.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.77.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.77.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.77.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.77.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.77.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.77.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.78.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.78.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.78.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.78.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.78.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.78.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.79.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.79.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.79.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.79.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.79.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.79.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.8.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.8.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.8.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.8.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.8.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.8.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.80.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.80.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.80.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.80.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.80.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.80.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.81.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.81.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.81.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.81.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.81.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.81.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.82.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.82.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.82.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.82.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.82.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.82.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.83.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.83.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.83.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.83.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.83.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.83.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.84.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.84.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.84.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.84.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.84.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.84.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.85.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.85.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.85.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.85.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.85.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.85.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.86.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.86.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.86.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.86.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.86.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.86.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.87.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.87.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.87.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.87.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.87.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.87.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.88.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.88.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.88.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.88.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.88.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.88.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.89.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.89.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.89.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.89.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.89.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.89.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.9.down_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.9.down_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.9.gate_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.9.gate_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.9.up_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.9.up_proj.weight_scale": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.90.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.90.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.90.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.90.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.90.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.90.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.91.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.91.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.91.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.91.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.91.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.91.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.92.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.92.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.92.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.92.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.92.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.92.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.93.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.93.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.93.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.93.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.93.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.93.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.94.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.94.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.94.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.94.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.94.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.94.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.95.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.95.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.95.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.95.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.95.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.95.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.96.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.96.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.96.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.96.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.96.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.96.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.97.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.97.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.97.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.97.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.97.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.97.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.98.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.98.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.98.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.98.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.98.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.98.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.99.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.99.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.99.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.99.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.99.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.experts.99.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.gate.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.mlp.shared_experts.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.shared_experts.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.shared_experts.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.shared_experts.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.shared_experts.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.mlp.shared_experts.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.14.self_attn.k_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.self_attn.o_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.self_attn.q_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.14.self_attn.v_proj.weight": "model-00021-of-00046.safetensors", + "model.language_model.layers.15.input_layernorm.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.0.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.0.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.0.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.0.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.0.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.0.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.1.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.1.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.1.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.1.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.1.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.1.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.10.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.10.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.10.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.10.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.10.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.10.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.100.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.100.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.100.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.100.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.100.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.100.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.101.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.101.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.101.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.101.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.101.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.101.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.102.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.102.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.102.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.102.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.102.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.102.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.103.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.103.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.103.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.103.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.103.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.103.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.104.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.104.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.104.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.104.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.104.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.104.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.105.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.105.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.105.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.105.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.105.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.105.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.106.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.106.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.106.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.106.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.106.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.106.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.107.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.107.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.107.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.107.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.107.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.107.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.108.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.108.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.108.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.108.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.108.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.108.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.109.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.109.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.109.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.109.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.109.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.109.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.11.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.11.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.11.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.11.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.11.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.11.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.110.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.110.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.110.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.110.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.110.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.110.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.111.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.111.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.111.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.111.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.111.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.111.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.112.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.112.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.112.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.112.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.112.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.112.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.113.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.113.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.113.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.113.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.113.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.113.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.114.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.114.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.114.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.114.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.114.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.114.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.115.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.115.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.115.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.115.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.115.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.115.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.116.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.116.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.116.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.116.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.116.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.116.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.117.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.117.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.117.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.117.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.117.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.117.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.118.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.118.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.118.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.118.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.118.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.118.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.119.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.119.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.119.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.119.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.119.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.119.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.12.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.12.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.12.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.12.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.12.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.12.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.120.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.120.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.120.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.120.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.120.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.120.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.121.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.121.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.121.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.121.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.121.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.121.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.122.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.122.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.122.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.122.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.122.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.122.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.123.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.123.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.123.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.123.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.123.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.123.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.124.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.124.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.124.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.124.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.124.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.124.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.125.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.125.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.125.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.125.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.125.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.125.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.126.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.126.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.126.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.126.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.126.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.126.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.127.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.127.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.127.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.127.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.127.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.127.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.13.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.13.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.13.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.13.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.13.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.13.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.14.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.14.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.14.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.14.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.14.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.14.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.15.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.15.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.15.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.15.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.15.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.15.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.16.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.16.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.16.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.16.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.16.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.16.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.17.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.17.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.17.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.17.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.17.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.17.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.18.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.18.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.18.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.18.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.18.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.18.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.19.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.19.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.19.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.19.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.19.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.19.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.2.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.2.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.2.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.2.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.2.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.2.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.20.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.20.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.20.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.20.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.20.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.20.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.21.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.21.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.21.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.21.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.21.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.21.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.22.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.22.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.22.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.22.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.22.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.22.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.23.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.23.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.23.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.23.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.23.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.23.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.24.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.24.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.24.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.24.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.24.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.24.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.25.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.25.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.25.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.25.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.25.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.25.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.26.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.26.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.26.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.26.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.26.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.26.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.27.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.27.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.27.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.27.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.27.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.27.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.28.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.28.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.28.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.28.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.28.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.28.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.29.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.29.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.29.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.29.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.29.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.29.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.3.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.3.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.3.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.3.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.3.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.3.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.30.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.30.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.30.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.30.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.30.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.30.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.31.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.31.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.31.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.31.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.31.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.31.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.32.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.32.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.32.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.32.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.32.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.32.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.33.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.33.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.33.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.33.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.33.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.33.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.34.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.34.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.34.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.34.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.34.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.34.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.35.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.35.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.35.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.35.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.35.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.35.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.36.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.36.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.36.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.36.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.36.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.36.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.37.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.37.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.37.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.37.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.37.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.37.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.38.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.38.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.38.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.38.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.38.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.38.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.39.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.39.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.39.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.39.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.39.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.39.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.4.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.4.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.4.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.4.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.4.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.4.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.40.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.40.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.40.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.40.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.40.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.40.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.41.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.41.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.41.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.41.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.41.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.41.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.42.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.42.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.42.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.42.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.42.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.42.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.43.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.43.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.43.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.43.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.43.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.43.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.44.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.44.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.44.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.44.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.44.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.44.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.45.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.45.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.45.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.45.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.45.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.45.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.46.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.46.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.46.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.46.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.46.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.46.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.47.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.47.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.47.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.47.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.47.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.47.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.48.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.48.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.48.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.48.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.48.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.48.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.49.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.49.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.49.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.49.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.49.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.49.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.5.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.5.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.5.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.5.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.5.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.5.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.50.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.50.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.50.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.50.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.50.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.50.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.51.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.51.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.51.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.51.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.51.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.51.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.52.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.52.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.52.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.52.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.52.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.52.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.53.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.53.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.53.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.53.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.53.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.53.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.54.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.54.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.54.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.54.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.54.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.54.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.55.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.55.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.55.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.55.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.55.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.55.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.56.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.56.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.56.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.56.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.56.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.56.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.57.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.57.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.57.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.57.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.57.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.57.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.58.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.58.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.58.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.58.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.58.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.58.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.59.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.59.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.59.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.59.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.59.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.59.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.6.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.6.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.6.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.6.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.6.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.6.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.60.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.60.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.60.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.60.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.60.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.60.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.61.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.61.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.61.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.61.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.61.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.61.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.62.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.62.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.62.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.62.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.62.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.62.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.63.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.63.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.63.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.63.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.63.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.63.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.64.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.64.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.64.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.64.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.64.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.64.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.65.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.65.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.65.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.65.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.65.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.65.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.66.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.66.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.66.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.66.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.66.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.66.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.67.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.67.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.67.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.67.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.67.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.67.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.68.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.68.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.68.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.68.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.68.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.68.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.69.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.69.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.69.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.69.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.69.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.69.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.7.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.7.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.7.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.7.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.7.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.7.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.70.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.70.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.70.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.70.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.70.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.70.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.71.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.71.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.71.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.71.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.71.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.71.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.72.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.72.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.72.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.72.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.72.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.72.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.73.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.73.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.73.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.73.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.73.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.73.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.74.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.74.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.74.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.74.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.74.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.74.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.75.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.75.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.75.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.75.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.75.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.75.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.76.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.76.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.76.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.76.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.76.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.76.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.77.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.77.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.77.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.77.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.77.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.77.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.78.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.78.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.78.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.78.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.78.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.78.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.79.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.79.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.79.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.79.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.79.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.79.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.8.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.8.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.8.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.8.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.8.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.8.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.80.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.80.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.80.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.80.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.80.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.80.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.81.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.81.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.81.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.81.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.81.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.81.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.82.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.82.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.82.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.82.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.82.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.82.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.83.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.83.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.83.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.83.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.83.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.83.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.84.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.84.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.84.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.84.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.84.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.84.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.85.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.85.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.85.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.85.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.85.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.85.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.86.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.86.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.86.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.86.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.86.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.86.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.87.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.87.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.87.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.87.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.87.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.87.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.88.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.88.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.88.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.88.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.88.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.88.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.89.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.89.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.89.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.89.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.89.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.89.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.9.down_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.9.down_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.9.gate_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.9.gate_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.9.up_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.9.up_proj.weight_scale": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.90.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.90.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.90.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.90.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.90.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.90.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.91.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.91.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.91.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.91.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.91.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.91.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.92.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.92.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.92.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.92.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.92.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.92.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.93.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.93.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.93.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.93.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.93.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.93.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.94.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.94.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.94.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.94.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.94.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.94.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.95.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.95.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.95.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.95.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.95.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.95.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.96.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.96.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.96.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.96.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.96.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.96.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.97.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.97.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.97.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.97.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.97.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.97.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.98.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.98.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.98.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.98.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.98.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.98.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.99.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.99.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.99.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.99.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.99.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.experts.99.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.gate.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.mlp.shared_experts.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.shared_experts.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.shared_experts.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.shared_experts.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.shared_experts.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.mlp.shared_experts.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.15.self_attn.k_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.self_attn.o_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.self_attn.q_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.15.self_attn.v_proj.weight": "model-00022-of-00046.safetensors", + "model.language_model.layers.16.input_layernorm.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.0.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.0.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.0.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.0.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.0.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.0.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.1.down_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.1.down_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.1.gate_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.1.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.1.up_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.1.up_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.10.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.10.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.10.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.10.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.10.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.10.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.100.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.100.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.100.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.100.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.100.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.100.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.101.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.101.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.101.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.101.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.101.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.101.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.102.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.102.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.102.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.102.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.102.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.102.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.103.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.103.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.103.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.103.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.103.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.103.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.104.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.104.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.104.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.104.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.104.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.104.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.105.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.105.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.105.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.105.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.105.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.105.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.106.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.106.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.106.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.106.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.106.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.106.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.107.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.107.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.107.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.107.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.107.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.107.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.108.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.108.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.108.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.108.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.108.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.108.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.109.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.109.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.109.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.109.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.109.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.109.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.11.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.11.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.11.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.11.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.11.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.11.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.110.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.110.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.110.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.110.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.110.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.110.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.111.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.111.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.111.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.111.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.111.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.111.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.112.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.112.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.112.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.112.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.112.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.112.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.113.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.113.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.113.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.113.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.113.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.113.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.114.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.114.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.114.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.114.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.114.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.114.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.115.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.115.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.115.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.115.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.115.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.115.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.116.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.116.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.116.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.116.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.116.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.116.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.117.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.117.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.117.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.117.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.117.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.117.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.118.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.118.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.118.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.118.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.118.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.118.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.119.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.119.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.119.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.119.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.119.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.119.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.12.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.12.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.12.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.12.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.12.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.12.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.120.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.120.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.120.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.120.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.120.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.120.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.121.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.121.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.121.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.121.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.121.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.121.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.122.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.122.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.122.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.122.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.122.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.122.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.123.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.123.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.123.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.123.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.123.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.123.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.124.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.124.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.124.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.124.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.124.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.124.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.125.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.125.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.125.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.125.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.125.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.125.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.126.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.126.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.126.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.126.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.126.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.126.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.127.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.127.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.127.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.127.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.127.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.127.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.13.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.13.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.13.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.13.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.13.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.13.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.14.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.14.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.14.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.14.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.14.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.14.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.15.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.15.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.15.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.15.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.15.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.15.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.16.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.16.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.16.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.16.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.16.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.16.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.17.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.17.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.17.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.17.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.17.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.17.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.18.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.18.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.18.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.18.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.18.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.18.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.19.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.19.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.19.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.19.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.19.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.19.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.2.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.2.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.2.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.2.gate_proj.weight_scale": "model-00023-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.2.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.2.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.20.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.20.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.20.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.20.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.20.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.20.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.21.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.21.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.21.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.21.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.21.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.21.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.22.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.22.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.22.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.22.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.22.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.22.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.23.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.23.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.23.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.23.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.23.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.23.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.24.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.24.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.24.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.24.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.24.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.24.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.25.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.25.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.25.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.25.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.25.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.25.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.26.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.26.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.26.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.26.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.26.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.26.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.27.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.27.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.27.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.27.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.27.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.27.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.28.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.28.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.28.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.28.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.28.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.28.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.29.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.29.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.29.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.29.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.29.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.29.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.3.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.3.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.3.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.3.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.3.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.3.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.30.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.30.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.30.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.30.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.30.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.30.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.31.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.31.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.31.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.31.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.31.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.31.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.32.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.32.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.32.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.32.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.32.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.32.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.33.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.33.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.33.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.33.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.33.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.33.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.34.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.34.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.34.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.34.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.34.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.34.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.35.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.35.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.35.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.35.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.35.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.35.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.36.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.36.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.36.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.36.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.36.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.36.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.37.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.37.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.37.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.37.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.37.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.37.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.38.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.38.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.38.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.38.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.38.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.38.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.39.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.39.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.39.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.39.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.39.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.39.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.4.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.4.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.4.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.4.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.4.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.4.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.40.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.40.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.40.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.40.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.40.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.40.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.41.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.41.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.41.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.41.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.41.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.41.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.42.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.42.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.42.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.42.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.42.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.42.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.43.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.43.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.43.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.43.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.43.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.43.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.44.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.44.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.44.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.44.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.44.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.44.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.45.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.45.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.45.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.45.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.45.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.45.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.46.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.46.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.46.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.46.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.46.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.46.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.47.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.47.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.47.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.47.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.47.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.47.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.48.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.48.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.48.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.48.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.48.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.48.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.49.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.49.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.49.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.49.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.49.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.49.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.5.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.5.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.5.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.5.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.5.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.5.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.50.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.50.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.50.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.50.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.50.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.50.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.51.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.51.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.51.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.51.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.51.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.51.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.52.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.52.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.52.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.52.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.52.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.52.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.53.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.53.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.53.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.53.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.53.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.53.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.54.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.54.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.54.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.54.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.54.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.54.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.55.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.55.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.55.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.55.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.55.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.55.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.56.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.56.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.56.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.56.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.56.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.56.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.57.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.57.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.57.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.57.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.57.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.57.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.58.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.58.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.58.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.58.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.58.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.58.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.59.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.59.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.59.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.59.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.59.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.59.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.6.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.6.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.6.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.6.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.6.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.6.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.60.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.60.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.60.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.60.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.60.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.60.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.61.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.61.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.61.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.61.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.61.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.61.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.62.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.62.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.62.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.62.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.62.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.62.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.63.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.63.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.63.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.63.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.63.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.63.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.64.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.64.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.64.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.64.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.64.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.64.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.65.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.65.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.65.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.65.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.65.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.65.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.66.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.66.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.66.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.66.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.66.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.66.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.67.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.67.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.67.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.67.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.67.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.67.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.68.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.68.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.68.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.68.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.68.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.68.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.69.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.69.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.69.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.69.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.69.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.69.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.7.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.7.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.7.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.7.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.7.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.7.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.70.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.70.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.70.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.70.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.70.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.70.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.71.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.71.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.71.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.71.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.71.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.71.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.72.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.72.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.72.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.72.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.72.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.72.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.73.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.73.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.73.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.73.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.73.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.73.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.74.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.74.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.74.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.74.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.74.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.74.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.75.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.75.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.75.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.75.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.75.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.75.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.76.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.76.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.76.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.76.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.76.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.76.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.77.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.77.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.77.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.77.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.77.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.77.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.78.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.78.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.78.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.78.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.78.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.78.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.79.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.79.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.79.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.79.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.79.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.79.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.8.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.8.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.8.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.8.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.8.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.8.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.80.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.80.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.80.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.80.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.80.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.80.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.81.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.81.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.81.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.81.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.81.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.81.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.82.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.82.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.82.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.82.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.82.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.82.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.83.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.83.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.83.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.83.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.83.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.83.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.84.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.84.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.84.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.84.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.84.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.84.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.85.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.85.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.85.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.85.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.85.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.85.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.86.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.86.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.86.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.86.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.86.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.86.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.87.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.87.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.87.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.87.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.87.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.87.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.88.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.88.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.88.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.88.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.88.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.88.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.89.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.89.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.89.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.89.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.89.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.89.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.9.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.9.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.9.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.9.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.9.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.9.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.90.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.90.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.90.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.90.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.90.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.90.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.91.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.91.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.91.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.91.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.91.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.91.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.92.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.92.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.92.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.92.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.92.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.92.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.93.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.93.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.93.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.93.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.93.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.93.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.94.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.94.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.94.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.94.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.94.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.94.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.95.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.95.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.95.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.95.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.95.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.95.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.96.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.96.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.96.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.96.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.96.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.96.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.97.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.97.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.97.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.97.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.97.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.97.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.98.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.98.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.98.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.98.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.98.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.98.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.99.down_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.99.down_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.99.gate_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.99.gate_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.99.up_proj.weight": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.experts.99.up_proj.weight_scale": "model-00024-of-00046.safetensors", + "model.language_model.layers.16.mlp.gate.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.16.mlp.shared_experts.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.shared_experts.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.shared_experts.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.shared_experts.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.shared_experts.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.mlp.shared_experts.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.16.self_attn.k_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.16.self_attn.o_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.16.self_attn.q_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.16.self_attn.v_proj.weight": "model-00023-of-00046.safetensors", + "model.language_model.layers.17.input_layernorm.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.0.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.0.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.0.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.0.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.0.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.0.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.1.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.1.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.1.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.1.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.1.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.1.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.10.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.10.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.10.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.10.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.10.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.10.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.100.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.100.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.100.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.100.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.100.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.100.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.101.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.101.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.101.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.101.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.101.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.101.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.102.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.102.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.102.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.102.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.102.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.102.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.103.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.103.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.103.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.103.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.103.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.103.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.104.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.104.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.104.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.104.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.104.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.104.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.105.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.105.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.105.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.105.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.105.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.105.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.106.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.106.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.106.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.106.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.106.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.106.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.107.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.107.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.107.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.107.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.107.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.107.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.108.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.108.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.108.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.108.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.108.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.108.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.109.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.109.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.109.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.109.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.109.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.109.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.11.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.11.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.11.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.11.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.11.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.11.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.110.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.110.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.110.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.110.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.110.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.110.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.111.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.111.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.111.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.111.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.111.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.111.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.112.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.112.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.112.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.112.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.112.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.112.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.113.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.113.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.113.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.113.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.113.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.113.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.114.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.114.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.114.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.114.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.114.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.114.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.115.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.115.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.115.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.115.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.115.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.115.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.116.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.116.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.116.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.116.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.116.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.116.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.117.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.117.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.117.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.117.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.117.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.117.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.118.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.118.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.118.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.118.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.118.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.118.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.119.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.119.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.119.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.119.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.119.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.119.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.12.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.12.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.12.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.12.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.12.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.12.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.120.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.120.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.120.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.120.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.120.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.120.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.121.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.121.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.121.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.121.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.121.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.121.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.122.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.122.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.122.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.122.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.122.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.122.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.123.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.123.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.123.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.123.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.123.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.123.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.124.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.124.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.124.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.124.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.124.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.124.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.125.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.125.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.125.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.125.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.125.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.125.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.126.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.126.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.126.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.126.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.126.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.126.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.127.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.127.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.127.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.127.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.127.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.127.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.13.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.13.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.13.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.13.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.13.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.13.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.14.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.14.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.14.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.14.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.14.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.14.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.15.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.15.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.15.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.15.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.15.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.15.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.16.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.16.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.16.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.16.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.16.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.16.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.17.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.17.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.17.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.17.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.17.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.17.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.18.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.18.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.18.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.18.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.18.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.18.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.19.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.19.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.19.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.19.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.19.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.19.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.2.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.2.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.2.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.2.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.2.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.2.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.20.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.20.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.20.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.20.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.20.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.20.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.21.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.21.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.21.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.21.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.21.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.21.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.22.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.22.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.22.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.22.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.22.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.22.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.23.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.23.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.23.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.23.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.23.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.23.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.24.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.24.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.24.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.24.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.24.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.24.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.25.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.25.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.25.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.25.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.25.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.25.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.26.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.26.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.26.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.26.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.26.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.26.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.27.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.27.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.27.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.27.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.27.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.27.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.28.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.28.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.28.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.28.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.28.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.28.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.29.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.29.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.29.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.29.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.29.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.29.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.3.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.3.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.3.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.3.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.3.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.3.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.30.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.30.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.30.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.30.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.30.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.30.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.31.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.31.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.31.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.31.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.31.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.31.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.32.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.32.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.32.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.32.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.32.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.32.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.33.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.33.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.33.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.33.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.33.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.33.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.34.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.34.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.34.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.34.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.34.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.34.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.35.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.35.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.35.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.35.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.35.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.35.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.36.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.36.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.36.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.36.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.36.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.36.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.37.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.37.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.37.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.37.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.37.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.37.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.38.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.38.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.38.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.38.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.38.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.38.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.39.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.39.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.39.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.39.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.39.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.39.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.4.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.4.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.4.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.4.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.4.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.4.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.40.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.40.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.40.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.40.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.40.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.40.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.41.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.41.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.41.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.41.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.41.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.41.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.42.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.42.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.42.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.42.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.42.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.42.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.43.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.43.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.43.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.43.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.43.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.43.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.44.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.44.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.44.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.44.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.44.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.44.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.45.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.45.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.45.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.45.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.45.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.45.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.46.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.46.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.46.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.46.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.46.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.46.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.47.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.47.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.47.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.47.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.47.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.47.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.48.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.48.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.48.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.48.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.48.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.48.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.49.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.49.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.49.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.49.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.49.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.49.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.5.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.5.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.5.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.5.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.5.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.5.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.50.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.50.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.50.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.50.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.50.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.50.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.51.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.51.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.51.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.51.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.51.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.51.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.52.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.52.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.52.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.52.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.52.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.52.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.53.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.53.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.53.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.53.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.53.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.53.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.54.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.54.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.54.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.54.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.54.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.54.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.55.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.55.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.55.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.55.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.55.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.55.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.56.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.56.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.56.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.56.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.56.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.56.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.57.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.57.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.57.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.57.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.57.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.57.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.58.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.58.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.58.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.58.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.58.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.58.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.59.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.59.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.59.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.59.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.59.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.59.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.6.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.6.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.6.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.6.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.6.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.6.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.60.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.60.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.60.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.60.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.60.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.60.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.61.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.61.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.61.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.61.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.61.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.61.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.62.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.62.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.62.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.62.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.62.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.62.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.63.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.63.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.63.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.63.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.63.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.63.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.64.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.64.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.64.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.64.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.64.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.64.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.65.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.65.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.65.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.65.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.65.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.65.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.66.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.66.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.66.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.66.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.66.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.66.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.67.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.67.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.67.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.67.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.67.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.67.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.68.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.68.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.68.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.68.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.68.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.68.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.69.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.69.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.69.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.69.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.69.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.69.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.7.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.7.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.7.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.7.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.7.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.7.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.70.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.70.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.70.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.70.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.70.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.70.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.71.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.71.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.71.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.71.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.71.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.71.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.72.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.72.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.72.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.72.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.72.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.72.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.73.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.73.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.73.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.73.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.73.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.73.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.74.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.74.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.74.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.74.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.74.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.74.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.75.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.75.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.75.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.75.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.75.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.75.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.76.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.76.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.76.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.76.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.76.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.76.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.77.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.77.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.77.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.77.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.77.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.77.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.78.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.78.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.78.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.78.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.78.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.78.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.79.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.79.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.79.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.79.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.79.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.79.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.8.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.8.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.8.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.8.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.8.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.8.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.80.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.80.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.80.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.80.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.80.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.80.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.81.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.81.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.81.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.81.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.81.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.81.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.82.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.82.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.82.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.82.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.82.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.82.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.83.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.83.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.83.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.83.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.83.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.83.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.84.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.84.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.84.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.84.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.84.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.84.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.85.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.85.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.85.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.85.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.85.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.85.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.86.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.86.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.86.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.86.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.86.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.86.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.87.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.87.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.87.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.87.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.87.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.87.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.88.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.88.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.88.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.88.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.88.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.88.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.89.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.89.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.89.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.89.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.89.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.89.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.9.down_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.9.down_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.9.gate_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.9.gate_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.9.up_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.9.up_proj.weight_scale": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.90.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.90.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.90.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.90.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.90.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.90.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.91.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.91.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.91.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.91.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.91.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.91.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.92.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.92.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.92.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.92.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.92.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.92.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.93.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.93.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.93.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.93.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.93.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.93.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.94.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.94.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.94.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.94.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.94.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.94.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.95.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.95.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.95.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.95.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.95.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.95.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.96.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.96.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.96.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.96.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.96.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.96.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.97.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.97.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.97.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.97.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.97.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.97.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.98.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.98.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.98.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.98.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.98.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.98.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.99.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.99.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.99.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.99.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.99.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.experts.99.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.gate.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.mlp.shared_experts.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.shared_experts.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.shared_experts.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.shared_experts.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.shared_experts.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.mlp.shared_experts.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.17.self_attn.k_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.self_attn.o_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.self_attn.q_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.17.self_attn.v_proj.weight": "model-00025-of-00046.safetensors", + "model.language_model.layers.18.input_layernorm.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.0.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.0.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.0.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.0.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.0.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.0.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.1.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.1.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.1.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.1.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.1.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.1.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.10.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.10.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.10.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.10.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.10.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.10.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.100.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.100.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.100.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.100.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.100.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.100.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.101.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.101.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.101.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.101.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.101.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.101.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.102.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.102.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.102.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.102.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.102.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.102.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.103.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.103.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.103.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.103.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.103.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.103.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.104.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.104.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.104.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.104.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.104.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.104.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.105.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.105.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.105.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.105.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.105.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.105.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.106.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.106.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.106.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.106.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.106.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.106.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.107.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.107.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.107.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.107.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.107.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.107.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.108.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.108.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.108.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.108.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.108.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.108.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.109.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.109.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.109.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.109.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.109.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.109.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.11.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.11.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.11.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.11.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.11.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.11.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.110.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.110.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.110.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.110.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.110.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.110.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.111.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.111.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.111.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.111.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.111.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.111.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.112.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.112.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.112.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.112.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.112.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.112.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.113.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.113.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.113.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.113.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.113.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.113.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.114.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.114.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.114.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.114.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.114.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.114.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.115.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.115.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.115.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.115.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.115.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.115.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.116.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.116.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.116.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.116.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.116.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.116.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.117.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.117.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.117.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.117.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.117.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.117.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.118.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.118.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.118.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.118.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.118.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.118.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.119.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.119.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.119.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.119.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.119.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.119.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.12.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.12.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.12.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.12.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.12.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.12.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.120.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.120.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.120.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.120.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.120.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.120.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.121.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.121.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.121.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.121.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.121.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.121.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.122.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.122.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.122.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.122.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.122.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.122.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.123.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.123.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.123.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.123.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.123.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.123.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.124.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.124.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.124.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.124.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.124.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.124.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.125.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.125.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.125.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.125.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.125.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.125.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.126.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.126.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.126.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.126.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.126.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.126.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.127.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.127.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.127.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.127.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.127.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.127.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.13.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.13.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.13.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.13.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.13.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.13.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.14.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.14.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.14.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.14.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.14.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.14.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.15.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.15.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.15.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.15.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.15.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.15.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.16.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.16.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.16.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.16.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.16.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.16.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.17.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.17.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.17.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.17.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.17.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.17.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.18.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.18.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.18.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.18.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.18.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.18.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.19.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.19.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.19.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.19.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.19.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.19.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.2.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.2.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.2.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.2.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.2.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.2.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.20.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.20.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.20.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.20.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.20.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.20.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.21.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.21.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.21.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.21.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.21.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.21.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.22.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.22.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.22.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.22.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.22.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.22.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.23.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.23.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.23.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.23.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.23.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.23.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.24.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.24.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.24.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.24.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.24.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.24.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.25.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.25.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.25.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.25.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.25.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.25.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.26.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.26.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.26.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.26.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.26.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.26.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.27.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.27.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.27.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.27.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.27.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.27.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.28.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.28.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.28.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.28.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.28.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.28.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.29.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.29.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.29.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.29.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.29.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.29.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.3.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.3.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.3.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.3.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.3.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.3.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.30.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.30.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.30.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.30.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.30.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.30.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.31.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.31.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.31.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.31.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.31.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.31.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.32.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.32.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.32.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.32.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.32.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.32.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.33.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.33.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.33.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.33.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.33.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.33.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.34.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.34.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.34.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.34.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.34.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.34.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.35.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.35.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.35.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.35.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.35.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.35.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.36.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.36.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.36.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.36.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.36.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.36.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.37.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.37.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.37.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.37.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.37.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.37.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.38.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.38.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.38.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.38.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.38.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.38.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.39.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.39.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.39.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.39.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.39.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.39.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.4.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.4.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.4.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.4.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.4.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.4.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.40.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.40.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.40.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.40.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.40.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.40.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.41.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.41.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.41.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.41.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.41.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.41.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.42.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.42.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.42.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.42.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.42.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.42.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.43.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.43.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.43.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.43.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.43.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.43.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.44.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.44.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.44.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.44.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.44.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.44.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.45.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.45.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.45.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.45.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.45.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.45.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.46.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.46.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.46.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.46.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.46.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.46.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.47.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.47.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.47.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.47.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.47.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.47.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.48.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.48.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.48.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.48.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.48.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.48.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.49.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.49.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.49.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.49.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.49.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.49.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.5.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.5.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.5.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.5.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.5.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.5.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.50.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.50.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.50.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.50.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.50.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.50.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.51.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.51.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.51.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.51.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.51.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.51.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.52.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.52.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.52.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.52.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.52.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.52.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.53.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.53.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.53.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.53.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.53.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.53.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.54.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.54.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.54.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.54.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.54.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.54.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.55.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.55.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.55.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.55.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.55.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.55.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.56.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.56.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.56.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.56.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.56.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.56.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.57.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.57.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.57.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.57.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.57.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.57.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.58.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.58.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.58.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.58.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.58.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.58.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.59.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.59.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.59.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.59.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.59.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.59.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.6.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.6.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.6.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.6.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.6.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.6.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.60.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.60.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.60.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.60.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.60.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.60.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.61.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.61.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.61.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.61.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.61.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.61.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.62.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.62.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.62.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.62.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.62.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.62.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.63.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.63.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.63.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.63.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.63.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.63.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.64.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.64.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.64.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.64.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.64.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.64.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.65.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.65.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.65.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.65.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.65.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.65.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.66.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.66.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.66.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.66.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.66.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.66.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.67.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.67.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.67.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.67.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.67.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.67.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.68.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.68.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.68.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.68.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.68.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.68.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.69.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.69.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.69.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.69.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.69.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.69.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.7.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.7.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.7.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.7.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.7.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.7.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.70.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.70.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.70.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.70.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.70.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.70.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.71.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.71.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.71.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.71.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.71.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.71.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.72.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.72.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.72.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.72.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.72.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.72.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.73.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.73.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.73.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.73.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.73.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.73.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.74.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.74.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.74.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.74.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.74.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.74.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.75.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.75.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.75.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.75.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.75.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.75.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.76.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.76.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.76.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.76.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.76.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.76.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.77.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.77.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.77.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.77.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.77.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.77.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.78.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.78.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.78.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.78.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.78.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.78.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.79.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.79.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.79.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.79.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.79.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.79.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.8.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.8.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.8.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.8.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.8.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.8.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.80.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.80.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.80.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.80.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.80.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.80.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.81.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.81.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.81.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.81.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.81.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.81.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.82.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.82.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.82.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.82.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.82.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.82.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.83.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.83.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.83.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.83.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.83.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.83.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.84.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.84.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.84.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.84.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.84.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.84.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.85.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.85.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.85.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.85.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.85.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.85.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.86.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.86.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.86.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.86.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.86.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.86.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.87.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.87.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.87.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.87.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.87.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.87.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.88.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.88.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.88.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.88.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.88.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.88.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.89.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.89.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.89.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.89.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.89.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.89.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.9.down_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.9.down_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.9.gate_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.9.gate_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.9.up_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.9.up_proj.weight_scale": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.90.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.90.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.90.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.90.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.90.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.90.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.91.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.91.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.91.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.91.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.91.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.91.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.92.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.92.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.92.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.92.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.92.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.92.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.93.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.93.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.93.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.93.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.93.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.93.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.94.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.94.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.94.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.94.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.94.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.94.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.95.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.95.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.95.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.95.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.95.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.95.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.96.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.96.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.96.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.96.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.96.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.96.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.97.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.97.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.97.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.97.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.97.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.97.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.98.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.98.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.98.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.98.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.98.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.98.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.99.down_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.99.down_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.99.gate_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.99.gate_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.99.up_proj.weight": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.experts.99.up_proj.weight_scale": "model-00027-of-00046.safetensors", + "model.language_model.layers.18.mlp.gate.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.mlp.shared_experts.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.shared_experts.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.shared_experts.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.shared_experts.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.shared_experts.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.mlp.shared_experts.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.18.self_attn.k_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.self_attn.o_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.self_attn.q_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.18.self_attn.v_proj.weight": "model-00026-of-00046.safetensors", + "model.language_model.layers.19.input_layernorm.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.0.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.0.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.0.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.0.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.0.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.0.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.1.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.1.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.1.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.1.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.1.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.1.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.10.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.10.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.10.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.10.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.10.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.10.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.100.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.100.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.100.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.100.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.100.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.100.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.101.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.101.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.101.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.101.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.101.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.101.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.102.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.102.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.102.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.102.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.102.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.102.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.103.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.103.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.103.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.103.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.103.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.103.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.104.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.104.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.104.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.104.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.104.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.104.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.105.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.105.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.105.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.105.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.105.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.105.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.106.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.106.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.106.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.106.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.106.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.106.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.107.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.107.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.107.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.107.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.107.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.107.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.108.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.108.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.108.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.108.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.108.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.108.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.109.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.109.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.109.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.109.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.109.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.109.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.11.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.11.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.11.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.11.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.11.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.11.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.110.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.110.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.110.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.110.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.110.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.110.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.111.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.111.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.111.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.111.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.111.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.111.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.112.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.112.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.112.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.112.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.112.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.112.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.113.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.113.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.113.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.113.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.113.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.113.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.114.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.114.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.114.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.114.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.114.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.114.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.115.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.115.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.115.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.115.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.115.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.115.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.116.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.116.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.116.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.116.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.116.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.116.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.117.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.117.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.117.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.117.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.117.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.117.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.118.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.118.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.118.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.118.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.118.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.118.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.119.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.119.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.119.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.119.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.119.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.119.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.12.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.12.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.12.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.12.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.12.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.12.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.120.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.120.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.120.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.120.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.120.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.120.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.121.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.121.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.121.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.121.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.121.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.121.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.122.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.122.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.122.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.122.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.122.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.122.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.123.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.123.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.123.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.123.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.123.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.123.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.124.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.124.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.124.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.124.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.124.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.124.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.125.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.125.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.125.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.125.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.125.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.125.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.126.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.126.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.126.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.126.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.126.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.126.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.127.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.127.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.127.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.127.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.127.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.127.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.13.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.13.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.13.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.13.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.13.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.13.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.14.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.14.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.14.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.14.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.14.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.14.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.15.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.15.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.15.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.15.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.15.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.15.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.16.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.16.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.16.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.16.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.16.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.16.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.17.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.17.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.17.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.17.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.17.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.17.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.18.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.18.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.18.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.18.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.18.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.18.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.19.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.19.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.19.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.19.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.19.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.19.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.2.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.2.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.2.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.2.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.2.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.2.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.20.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.20.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.20.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.20.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.20.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.20.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.21.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.21.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.21.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.21.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.21.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.21.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.22.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.22.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.22.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.22.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.22.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.22.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.23.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.23.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.23.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.23.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.23.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.23.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.24.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.24.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.24.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.24.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.24.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.24.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.25.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.25.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.25.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.25.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.25.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.25.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.26.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.26.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.26.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.26.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.26.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.26.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.27.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.27.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.27.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.27.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.27.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.27.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.28.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.28.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.28.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.28.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.28.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.28.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.29.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.29.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.29.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.29.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.29.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.29.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.3.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.3.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.3.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.3.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.3.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.3.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.30.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.30.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.30.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.30.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.30.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.30.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.31.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.31.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.31.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.31.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.31.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.31.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.32.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.32.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.32.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.32.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.32.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.32.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.33.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.33.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.33.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.33.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.33.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.33.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.34.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.34.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.34.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.34.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.34.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.34.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.35.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.35.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.35.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.35.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.35.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.35.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.36.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.36.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.36.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.36.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.36.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.36.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.37.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.37.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.37.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.37.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.37.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.37.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.38.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.38.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.38.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.38.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.38.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.38.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.39.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.39.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.39.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.39.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.39.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.39.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.4.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.4.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.4.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.4.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.4.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.4.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.40.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.40.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.40.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.40.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.40.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.40.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.41.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.41.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.41.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.41.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.41.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.41.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.42.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.42.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.42.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.42.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.42.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.42.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.43.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.43.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.43.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.43.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.43.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.43.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.44.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.44.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.44.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.44.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.44.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.44.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.45.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.45.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.45.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.45.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.45.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.45.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.46.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.46.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.46.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.46.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.46.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.46.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.47.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.47.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.47.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.47.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.47.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.47.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.48.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.48.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.48.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.48.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.48.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.48.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.49.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.49.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.49.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.49.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.49.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.49.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.5.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.5.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.5.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.5.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.5.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.5.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.50.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.50.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.50.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.50.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.50.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.50.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.51.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.51.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.51.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.51.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.51.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.51.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.52.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.52.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.52.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.52.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.52.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.52.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.53.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.53.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.53.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.53.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.53.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.53.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.54.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.54.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.54.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.54.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.54.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.54.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.55.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.55.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.55.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.55.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.55.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.55.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.56.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.56.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.56.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.56.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.56.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.56.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.57.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.57.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.57.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.57.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.57.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.57.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.58.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.58.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.58.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.58.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.58.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.58.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.59.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.59.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.59.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.59.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.59.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.59.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.6.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.6.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.6.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.6.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.6.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.6.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.60.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.60.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.60.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.60.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.60.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.60.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.61.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.61.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.61.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.61.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.61.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.61.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.62.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.62.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.62.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.62.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.62.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.62.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.63.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.63.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.63.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.63.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.63.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.63.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.64.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.64.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.64.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.64.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.64.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.64.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.65.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.65.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.65.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.65.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.65.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.65.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.66.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.66.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.66.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.66.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.66.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.66.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.67.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.67.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.67.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.67.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.67.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.67.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.68.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.68.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.68.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.68.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.68.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.68.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.69.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.69.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.69.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.69.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.69.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.69.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.7.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.7.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.7.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.7.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.7.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.7.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.70.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.70.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.70.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.70.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.70.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.70.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.71.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.71.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.71.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.71.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.71.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.71.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.72.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.72.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.72.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.72.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.72.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.72.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.73.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.73.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.73.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.73.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.73.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.73.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.74.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.74.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.74.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.74.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.74.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.74.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.75.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.75.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.75.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.75.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.75.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.75.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.76.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.76.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.76.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.76.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.76.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.76.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.77.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.77.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.77.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.77.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.77.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.77.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.78.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.78.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.78.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.78.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.78.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.78.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.79.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.79.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.79.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.79.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.79.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.79.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.8.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.8.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.8.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.8.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.8.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.8.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.80.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.80.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.80.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.80.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.80.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.80.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.81.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.81.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.81.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.81.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.81.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.81.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.82.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.82.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.82.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.82.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.82.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.82.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.83.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.83.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.83.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.83.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.83.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.83.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.84.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.84.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.84.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.84.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.84.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.84.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.85.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.85.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.85.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.85.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.85.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.85.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.86.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.86.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.86.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.86.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.86.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.86.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.87.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.87.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.87.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.87.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.87.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.87.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.88.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.88.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.88.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.88.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.88.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.88.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.89.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.89.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.89.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.89.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.89.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.89.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.9.down_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.9.down_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.9.gate_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.9.gate_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.9.up_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.9.up_proj.weight_scale": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.90.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.90.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.90.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.90.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.90.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.90.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.91.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.91.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.91.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.91.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.91.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.91.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.92.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.92.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.92.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.92.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.92.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.92.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.93.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.93.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.93.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.93.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.93.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.93.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.94.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.94.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.94.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.94.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.94.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.94.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.95.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.95.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.95.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.95.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.95.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.95.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.96.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.96.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.96.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.96.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.96.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.96.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.97.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.97.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.97.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.97.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.97.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.97.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.98.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.98.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.98.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.98.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.98.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.98.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.99.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.99.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.99.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.99.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.99.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.experts.99.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.gate.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.mlp.shared_experts.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.shared_experts.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.shared_experts.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.shared_experts.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.shared_experts.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.mlp.shared_experts.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.19.self_attn.k_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.self_attn.o_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.self_attn.q_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.19.self_attn.v_proj.weight": "model-00028-of-00046.safetensors", + "model.language_model.layers.2.input_layernorm.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.0.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.0.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.0.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.0.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.0.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.0.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.1.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.1.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.1.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.1.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.1.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.1.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.10.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.10.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.10.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.10.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.10.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.10.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.100.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.100.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.100.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.100.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.100.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.100.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.101.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.101.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.101.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.101.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.101.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.101.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.102.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.102.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.102.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.102.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.102.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.102.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.103.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.103.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.103.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.103.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.103.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.103.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.104.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.104.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.104.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.104.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.104.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.104.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.105.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.105.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.105.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.105.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.105.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.105.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.106.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.106.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.106.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.106.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.106.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.106.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.107.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.107.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.107.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.107.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.107.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.107.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.108.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.108.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.108.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.108.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.108.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.108.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.109.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.109.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.109.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.109.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.109.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.109.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.11.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.11.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.11.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.11.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.11.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.11.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.110.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.110.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.110.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.110.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.110.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.110.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.111.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.111.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.111.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.111.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.111.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.111.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.112.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.112.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.112.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.112.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.112.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.112.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.113.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.113.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.113.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.113.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.113.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.113.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.114.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.114.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.114.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.114.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.114.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.114.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.115.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.115.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.115.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.115.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.115.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.115.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.116.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.116.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.116.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.116.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.116.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.116.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.117.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.117.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.117.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.117.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.117.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.117.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.118.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.118.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.118.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.118.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.118.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.118.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.119.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.119.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.119.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.119.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.119.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.119.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.12.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.12.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.12.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.12.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.12.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.12.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.120.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.120.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.120.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.120.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.120.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.120.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.121.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.121.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.121.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.121.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.121.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.121.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.122.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.122.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.122.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.122.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.122.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.122.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.123.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.123.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.123.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.123.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.123.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.123.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.124.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.124.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.124.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.124.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.124.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.124.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.125.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.125.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.125.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.125.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.125.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.125.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.126.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.126.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.126.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.126.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.126.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.126.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.127.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.127.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.127.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.127.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.127.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.127.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.13.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.13.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.13.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.13.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.13.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.13.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.14.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.14.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.14.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.14.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.14.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.14.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.15.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.15.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.15.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.15.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.15.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.15.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.16.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.16.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.16.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.16.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.16.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.16.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.17.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.17.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.17.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.17.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.17.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.17.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.18.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.18.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.18.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.18.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.18.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.18.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.19.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.19.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.19.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.19.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.19.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.19.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.2.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.2.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.2.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.2.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.2.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.2.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.20.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.20.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.20.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.20.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.20.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.20.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.21.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.21.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.21.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.21.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.21.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.21.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.22.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.22.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.22.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.22.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.22.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.22.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.23.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.23.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.23.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.23.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.23.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.23.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.24.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.24.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.24.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.24.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.24.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.24.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.25.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.25.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.25.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.25.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.25.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.25.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.26.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.26.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.26.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.26.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.26.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.26.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.27.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.27.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.27.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.27.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.27.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.27.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.28.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.28.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.28.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.28.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.28.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.28.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.29.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.29.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.29.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.29.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.29.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.29.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.3.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.3.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.3.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.3.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.3.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.3.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.30.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.30.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.30.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.30.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.30.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.30.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.31.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.31.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.31.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.31.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.31.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.31.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.32.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.32.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.32.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.32.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.32.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.32.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.33.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.33.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.33.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.33.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.33.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.33.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.34.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.34.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.34.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.34.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.34.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.34.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.35.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.35.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.35.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.35.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.35.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.35.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.36.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.36.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.36.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.36.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.36.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.36.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.37.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.37.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.37.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.37.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.37.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.37.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.38.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.38.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.38.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.38.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.38.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.38.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.39.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.39.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.39.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.39.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.39.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.39.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.4.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.4.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.4.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.4.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.4.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.4.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.40.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.40.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.40.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.40.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.40.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.40.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.41.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.41.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.41.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.41.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.41.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.41.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.42.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.42.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.42.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.42.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.42.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.42.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.43.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.43.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.43.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.43.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.43.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.43.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.44.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.44.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.44.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.44.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.44.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.44.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.45.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.45.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.45.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.45.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.45.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.45.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.46.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.46.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.46.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.46.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.46.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.46.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.47.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.47.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.47.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.47.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.47.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.47.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.48.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.48.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.48.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.48.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.48.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.48.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.49.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.49.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.49.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.49.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.49.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.49.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.5.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.5.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.5.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.5.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.5.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.5.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.50.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.50.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.50.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.50.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.50.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.50.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.51.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.51.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.51.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.51.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.51.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.51.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.52.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.52.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.52.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.52.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.52.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.52.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.53.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.53.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.53.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.53.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.53.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.53.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.54.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.54.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.54.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.54.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.54.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.54.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.55.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.55.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.55.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.55.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.55.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.55.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.56.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.56.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.56.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.56.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.56.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.56.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.57.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.57.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.57.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.57.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.57.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.57.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.58.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.58.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.58.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.58.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.58.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.58.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.59.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.59.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.59.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.59.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.59.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.59.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.6.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.6.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.6.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.6.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.6.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.6.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.60.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.60.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.60.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.60.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.60.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.60.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.61.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.61.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.61.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.61.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.61.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.61.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.62.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.62.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.62.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.62.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.62.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.62.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.63.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.63.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.63.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.63.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.63.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.63.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.64.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.64.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.64.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.64.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.64.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.64.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.65.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.65.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.65.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.65.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.65.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.65.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.66.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.66.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.66.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.66.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.66.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.66.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.67.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.67.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.67.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.67.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.67.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.67.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.68.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.68.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.68.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.68.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.68.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.68.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.69.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.69.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.69.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.69.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.69.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.69.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.7.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.7.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.7.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.7.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.7.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.7.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.70.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.70.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.70.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.70.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.70.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.70.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.71.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.71.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.71.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.71.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.71.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.71.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.72.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.72.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.72.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.72.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.72.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.72.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.73.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.73.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.73.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.73.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.73.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.73.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.74.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.74.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.74.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.74.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.74.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.74.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.75.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.75.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.75.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.75.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.75.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.75.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.76.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.76.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.76.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.76.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.76.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.76.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.77.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.77.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.77.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.77.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.77.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.77.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.78.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.78.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.78.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.78.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.78.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.78.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.79.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.79.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.79.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.79.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.79.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.79.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.8.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.8.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.8.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.8.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.8.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.8.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.80.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.80.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.80.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.80.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.80.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.80.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.81.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.81.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.81.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.81.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.81.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.81.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.82.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.82.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.82.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.82.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.82.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.82.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.83.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.83.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.83.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.83.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.83.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.83.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.84.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.84.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.84.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.84.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.84.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.84.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.85.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.85.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.85.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.85.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.85.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.85.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.86.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.86.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.86.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.86.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.86.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.86.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.87.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.87.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.87.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.87.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.87.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.87.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.88.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.88.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.88.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.88.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.88.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.88.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.89.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.89.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.89.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.89.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.89.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.89.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.9.down_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.9.down_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.9.gate_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.9.gate_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.9.up_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.9.up_proj.weight_scale": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.90.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.90.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.90.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.90.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.90.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.90.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.91.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.91.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.91.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.91.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.91.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.91.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.92.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.92.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.92.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.92.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.92.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.92.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.93.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.93.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.93.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.93.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.93.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.93.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.94.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.94.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.94.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.94.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.94.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.94.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.95.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.95.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.95.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.95.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.95.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.95.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.96.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.96.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.96.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.96.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.96.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.96.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.97.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.97.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.97.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.97.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.97.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.97.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.98.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.98.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.98.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.98.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.98.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.98.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.99.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.99.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.99.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.99.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.99.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.experts.99.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.gate.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.mlp.shared_experts.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.shared_experts.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.shared_experts.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.shared_experts.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.shared_experts.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.mlp.shared_experts.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.2.self_attn.k_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.self_attn.o_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.self_attn.q_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.2.self_attn.v_proj.weight": "model-00004-of-00046.safetensors", + "model.language_model.layers.20.input_layernorm.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.0.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.0.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.0.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.0.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.0.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.0.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.1.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.1.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.1.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.1.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.1.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.1.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.10.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.10.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.10.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.10.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.10.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.10.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.100.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.100.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.100.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.100.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.100.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.100.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.101.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.101.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.101.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.101.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.101.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.101.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.102.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.102.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.102.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.102.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.102.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.102.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.103.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.103.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.103.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.103.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.103.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.103.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.104.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.104.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.104.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.104.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.104.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.104.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.105.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.105.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.105.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.105.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.105.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.105.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.106.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.106.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.106.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.106.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.106.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.106.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.107.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.107.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.107.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.107.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.107.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.107.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.108.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.108.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.108.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.108.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.108.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.108.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.109.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.109.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.109.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.109.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.109.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.109.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.11.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.11.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.11.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.11.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.11.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.11.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.110.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.110.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.110.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.110.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.110.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.110.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.111.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.111.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.111.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.111.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.111.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.111.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.112.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.112.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.112.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.112.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.112.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.112.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.113.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.113.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.113.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.113.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.113.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.113.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.114.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.114.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.114.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.114.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.114.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.114.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.115.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.115.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.115.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.115.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.115.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.115.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.116.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.116.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.116.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.116.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.116.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.116.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.117.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.117.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.117.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.117.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.117.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.117.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.118.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.118.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.118.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.118.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.118.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.118.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.119.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.119.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.119.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.119.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.119.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.119.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.12.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.12.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.12.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.12.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.12.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.12.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.120.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.120.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.120.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.120.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.120.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.120.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.121.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.121.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.121.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.121.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.121.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.121.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.122.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.122.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.122.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.122.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.122.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.122.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.123.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.123.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.123.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.123.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.123.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.123.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.124.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.124.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.124.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.124.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.124.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.124.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.125.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.125.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.125.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.125.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.125.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.125.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.126.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.126.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.126.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.126.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.126.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.126.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.127.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.127.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.127.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.127.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.127.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.127.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.13.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.13.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.13.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.13.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.13.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.13.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.14.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.14.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.14.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.14.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.14.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.14.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.15.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.15.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.15.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.15.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.15.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.15.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.16.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.16.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.16.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.16.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.16.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.16.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.17.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.17.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.17.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.17.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.17.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.17.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.18.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.18.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.18.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.18.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.18.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.18.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.19.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.19.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.19.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.19.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.19.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.19.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.2.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.2.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.2.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.2.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.2.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.2.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.20.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.20.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.20.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.20.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.20.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.20.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.21.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.21.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.21.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.21.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.21.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.21.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.22.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.22.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.22.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.22.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.22.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.22.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.23.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.23.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.23.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.23.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.23.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.23.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.24.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.24.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.24.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.24.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.24.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.24.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.25.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.25.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.25.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.25.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.25.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.25.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.26.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.26.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.26.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.26.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.26.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.26.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.27.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.27.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.27.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.27.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.27.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.27.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.28.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.28.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.28.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.28.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.28.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.28.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.29.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.29.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.29.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.29.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.29.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.29.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.3.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.3.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.3.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.3.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.3.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.3.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.30.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.30.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.30.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.30.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.30.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.30.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.31.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.31.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.31.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.31.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.31.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.31.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.32.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.32.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.32.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.32.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.32.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.32.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.33.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.33.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.33.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.33.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.33.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.33.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.34.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.34.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.34.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.34.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.34.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.34.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.35.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.35.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.35.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.35.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.35.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.35.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.36.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.36.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.36.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.36.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.36.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.36.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.37.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.37.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.37.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.37.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.37.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.37.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.38.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.38.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.38.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.38.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.38.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.38.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.39.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.39.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.39.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.39.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.39.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.39.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.4.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.4.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.4.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.4.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.4.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.4.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.40.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.40.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.40.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.40.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.40.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.40.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.41.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.41.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.41.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.41.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.41.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.41.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.42.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.42.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.42.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.42.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.42.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.42.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.43.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.43.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.43.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.43.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.43.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.43.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.44.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.44.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.44.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.44.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.44.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.44.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.45.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.45.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.45.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.45.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.45.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.45.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.46.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.46.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.46.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.46.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.46.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.46.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.47.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.47.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.47.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.47.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.47.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.47.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.48.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.48.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.48.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.48.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.48.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.48.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.49.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.49.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.49.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.49.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.49.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.49.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.5.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.5.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.5.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.5.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.5.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.5.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.50.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.50.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.50.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.50.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.50.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.50.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.51.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.51.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.51.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.51.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.51.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.51.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.52.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.52.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.52.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.52.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.52.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.52.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.53.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.53.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.53.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.53.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.53.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.53.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.54.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.54.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.54.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.54.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.54.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.54.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.55.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.55.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.55.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.55.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.55.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.55.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.56.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.56.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.56.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.56.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.56.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.56.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.57.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.57.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.57.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.57.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.57.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.57.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.58.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.58.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.58.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.58.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.58.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.58.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.59.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.59.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.59.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.59.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.59.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.59.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.6.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.6.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.6.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.6.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.6.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.6.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.60.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.60.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.60.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.60.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.60.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.60.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.61.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.61.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.61.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.61.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.61.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.61.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.62.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.62.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.62.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.62.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.62.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.62.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.63.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.63.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.63.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.63.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.63.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.63.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.64.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.64.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.64.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.64.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.64.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.64.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.65.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.65.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.65.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.65.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.65.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.65.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.66.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.66.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.66.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.66.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.66.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.66.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.67.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.67.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.67.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.67.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.67.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.67.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.68.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.68.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.68.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.68.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.68.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.68.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.69.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.69.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.69.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.69.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.69.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.69.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.7.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.7.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.7.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.7.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.7.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.7.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.70.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.70.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.70.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.70.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.70.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.70.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.71.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.71.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.71.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.71.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.71.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.71.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.72.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.72.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.72.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.72.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.72.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.72.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.73.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.73.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.73.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.73.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.73.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.73.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.74.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.74.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.74.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.74.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.74.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.74.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.75.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.75.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.75.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.75.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.75.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.75.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.76.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.76.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.76.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.76.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.76.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.76.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.77.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.77.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.77.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.77.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.77.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.77.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.78.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.78.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.78.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.78.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.78.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.78.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.79.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.79.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.79.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.79.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.79.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.79.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.8.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.8.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.8.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.8.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.8.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.8.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.80.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.80.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.80.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.80.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.80.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.80.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.81.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.81.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.81.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.81.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.81.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.81.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.82.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.82.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.82.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.82.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.82.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.82.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.83.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.83.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.83.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.83.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.83.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.83.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.84.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.84.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.84.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.84.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.84.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.84.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.85.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.85.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.85.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.85.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.85.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.85.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.86.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.86.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.86.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.86.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.86.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.86.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.87.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.87.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.87.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.87.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.87.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.87.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.88.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.88.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.88.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.88.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.88.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.88.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.89.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.89.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.89.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.89.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.89.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.89.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.9.down_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.9.down_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.9.gate_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.9.gate_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.9.up_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.9.up_proj.weight_scale": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.90.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.90.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.90.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.90.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.90.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.90.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.91.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.91.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.91.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.91.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.91.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.91.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.92.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.92.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.92.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.92.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.92.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.92.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.93.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.93.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.93.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.93.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.93.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.93.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.94.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.94.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.94.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.94.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.94.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.94.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.95.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.95.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.95.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.95.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.95.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.95.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.96.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.96.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.96.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.96.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.96.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.96.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.97.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.97.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.97.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.97.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.97.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.97.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.98.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.98.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.98.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.98.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.98.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.98.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.99.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.99.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.99.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.99.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.99.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.experts.99.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.gate.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.mlp.shared_experts.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.shared_experts.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.shared_experts.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.shared_experts.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.shared_experts.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.mlp.shared_experts.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.20.self_attn.k_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.self_attn.o_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.self_attn.q_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.20.self_attn.v_proj.weight": "model-00029-of-00046.safetensors", + "model.language_model.layers.21.input_layernorm.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.0.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.0.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.0.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.0.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.0.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.0.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.1.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.1.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.1.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.1.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.1.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.1.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.10.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.10.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.10.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.10.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.10.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.10.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.100.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.100.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.100.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.100.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.100.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.100.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.101.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.101.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.101.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.101.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.101.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.101.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.102.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.102.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.102.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.102.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.102.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.102.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.103.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.103.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.103.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.103.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.103.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.103.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.104.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.104.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.104.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.104.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.104.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.104.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.105.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.105.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.105.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.105.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.105.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.105.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.106.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.106.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.106.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.106.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.106.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.106.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.107.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.107.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.107.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.107.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.107.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.107.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.108.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.108.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.108.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.108.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.108.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.108.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.109.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.109.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.109.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.109.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.109.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.109.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.11.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.11.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.11.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.11.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.11.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.11.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.110.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.110.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.110.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.110.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.110.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.110.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.111.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.111.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.111.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.111.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.111.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.111.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.112.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.112.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.112.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.112.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.112.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.112.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.113.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.113.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.113.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.113.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.113.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.113.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.114.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.114.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.114.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.114.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.114.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.114.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.115.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.115.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.115.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.115.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.115.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.115.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.116.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.116.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.116.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.116.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.116.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.116.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.117.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.117.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.117.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.117.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.117.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.117.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.118.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.118.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.118.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.118.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.118.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.118.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.119.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.119.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.119.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.119.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.119.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.119.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.12.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.12.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.12.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.12.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.12.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.12.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.120.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.120.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.120.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.120.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.120.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.120.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.121.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.121.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.121.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.121.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.121.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.121.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.122.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.122.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.122.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.122.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.122.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.122.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.123.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.123.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.123.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.123.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.123.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.123.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.124.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.124.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.124.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.124.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.124.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.124.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.125.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.125.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.125.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.125.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.125.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.125.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.126.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.126.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.126.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.126.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.126.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.126.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.127.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.127.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.127.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.127.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.127.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.127.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.13.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.13.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.13.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.13.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.13.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.13.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.14.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.14.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.14.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.14.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.14.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.14.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.15.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.15.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.15.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.15.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.15.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.15.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.16.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.16.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.16.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.16.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.16.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.16.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.17.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.17.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.17.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.17.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.17.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.17.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.18.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.18.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.18.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.18.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.18.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.18.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.19.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.19.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.19.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.19.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.19.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.19.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.2.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.2.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.2.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.2.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.2.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.2.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.20.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.20.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.20.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.20.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.20.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.20.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.21.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.21.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.21.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.21.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.21.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.21.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.22.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.22.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.22.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.22.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.22.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.22.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.23.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.23.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.23.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.23.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.23.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.23.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.24.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.24.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.24.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.24.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.24.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.24.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.25.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.25.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.25.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.25.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.25.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.25.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.26.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.26.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.26.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.26.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.26.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.26.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.27.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.27.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.27.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.27.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.27.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.27.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.28.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.28.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.28.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.28.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.28.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.28.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.29.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.29.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.29.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.29.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.29.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.29.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.3.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.3.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.3.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.3.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.3.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.3.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.30.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.30.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.30.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.30.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.30.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.30.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.31.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.31.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.31.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.31.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.31.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.31.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.32.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.32.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.32.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.32.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.32.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.32.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.33.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.33.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.33.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.33.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.33.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.33.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.34.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.34.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.34.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.34.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.34.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.34.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.35.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.35.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.35.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.35.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.35.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.35.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.36.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.36.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.36.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.36.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.36.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.36.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.37.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.37.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.37.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.37.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.37.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.37.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.38.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.38.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.38.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.38.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.38.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.38.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.39.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.39.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.39.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.39.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.39.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.39.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.4.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.4.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.4.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.4.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.4.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.4.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.40.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.40.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.40.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.40.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.40.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.40.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.41.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.41.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.41.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.41.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.41.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.41.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.42.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.42.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.42.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.42.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.42.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.42.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.43.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.43.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.43.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.43.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.43.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.43.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.44.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.44.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.44.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.44.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.44.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.44.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.45.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.45.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.45.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.45.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.45.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.45.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.46.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.46.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.46.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.46.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.46.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.46.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.47.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.47.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.47.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.47.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.47.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.47.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.48.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.48.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.48.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.48.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.48.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.48.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.49.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.49.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.49.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.49.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.49.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.49.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.5.down_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.5.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.5.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.5.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.5.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.5.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.50.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.50.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.50.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.50.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.50.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.50.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.51.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.51.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.51.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.51.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.51.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.51.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.52.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.52.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.52.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.52.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.52.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.52.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.53.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.53.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.53.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.53.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.53.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.53.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.54.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.54.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.54.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.54.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.54.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.54.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.55.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.55.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.55.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.55.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.55.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.55.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.56.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.56.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.56.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.56.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.56.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.56.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.57.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.57.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.57.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.57.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.57.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.57.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.58.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.58.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.58.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.58.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.58.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.58.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.59.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.59.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.59.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.59.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.59.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.59.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.6.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.6.down_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.6.gate_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.6.gate_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.6.up_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.6.up_proj.weight_scale": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.60.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.60.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.60.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.60.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.60.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.60.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.61.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.61.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.61.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.61.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.61.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.61.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.62.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.62.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.62.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.62.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.62.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.62.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.63.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.63.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.63.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.63.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.63.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.63.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.64.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.64.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.64.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.64.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.64.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.64.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.65.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.65.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.65.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.65.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.65.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.65.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.66.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.66.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.66.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.66.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.66.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.66.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.67.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.67.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.67.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.67.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.67.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.67.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.68.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.68.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.68.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.68.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.68.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.68.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.69.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.69.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.69.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.69.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.69.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.69.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.7.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.7.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.7.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.7.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.7.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.7.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.70.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.70.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.70.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.70.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.70.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.70.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.71.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.71.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.71.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.71.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.71.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.71.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.72.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.72.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.72.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.72.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.72.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.72.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.73.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.73.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.73.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.73.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.73.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.73.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.74.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.74.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.74.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.74.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.74.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.74.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.75.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.75.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.75.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.75.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.75.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.75.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.76.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.76.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.76.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.76.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.76.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.76.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.77.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.77.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.77.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.77.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.77.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.77.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.78.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.78.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.78.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.78.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.78.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.78.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.79.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.79.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.79.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.79.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.79.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.79.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.8.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.8.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.8.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.8.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.8.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.8.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.80.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.80.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.80.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.80.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.80.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.80.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.81.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.81.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.81.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.81.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.81.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.81.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.82.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.82.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.82.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.82.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.82.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.82.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.83.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.83.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.83.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.83.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.83.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.83.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.84.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.84.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.84.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.84.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.84.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.84.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.85.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.85.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.85.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.85.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.85.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.85.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.86.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.86.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.86.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.86.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.86.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.86.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.87.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.87.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.87.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.87.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.87.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.87.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.88.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.88.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.88.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.88.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.88.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.88.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.89.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.89.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.89.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.89.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.89.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.89.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.9.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.9.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.9.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.9.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.9.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.9.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.90.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.90.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.90.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.90.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.90.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.90.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.91.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.91.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.91.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.91.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.91.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.91.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.92.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.92.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.92.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.92.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.92.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.92.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.93.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.93.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.93.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.93.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.93.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.93.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.94.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.94.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.94.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.94.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.94.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.94.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.95.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.95.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.95.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.95.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.95.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.95.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.96.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.96.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.96.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.96.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.96.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.96.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.97.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.97.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.97.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.97.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.97.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.97.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.98.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.98.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.98.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.98.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.98.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.98.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.99.down_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.99.down_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.99.gate_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.99.gate_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.99.up_proj.weight": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.experts.99.up_proj.weight_scale": "model-00031-of-00046.safetensors", + "model.language_model.layers.21.mlp.gate.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.mlp.shared_experts.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.shared_experts.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.shared_experts.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.shared_experts.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.shared_experts.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.mlp.shared_experts.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.21.self_attn.k_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.self_attn.o_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.self_attn.q_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.21.self_attn.v_proj.weight": "model-00030-of-00046.safetensors", + "model.language_model.layers.22.input_layernorm.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.0.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.0.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.0.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.0.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.0.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.0.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.1.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.1.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.1.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.1.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.1.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.1.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.10.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.10.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.10.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.10.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.10.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.10.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.100.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.100.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.100.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.100.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.100.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.100.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.101.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.101.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.101.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.101.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.101.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.101.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.102.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.102.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.102.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.102.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.102.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.102.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.103.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.103.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.103.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.103.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.103.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.103.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.104.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.104.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.104.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.104.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.104.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.104.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.105.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.105.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.105.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.105.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.105.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.105.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.106.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.106.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.106.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.106.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.106.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.106.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.107.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.107.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.107.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.107.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.107.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.107.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.108.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.108.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.108.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.108.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.108.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.108.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.109.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.109.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.109.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.109.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.109.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.109.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.11.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.11.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.11.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.11.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.11.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.11.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.110.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.110.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.110.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.110.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.110.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.110.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.111.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.111.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.111.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.111.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.111.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.111.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.112.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.112.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.112.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.112.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.112.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.112.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.113.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.113.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.113.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.113.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.113.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.113.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.114.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.114.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.114.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.114.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.114.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.114.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.115.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.115.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.115.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.115.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.115.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.115.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.116.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.116.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.116.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.116.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.116.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.116.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.117.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.117.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.117.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.117.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.117.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.117.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.118.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.118.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.118.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.118.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.118.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.118.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.119.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.119.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.119.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.119.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.119.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.119.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.12.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.12.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.12.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.12.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.12.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.12.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.120.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.120.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.120.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.120.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.120.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.120.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.121.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.121.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.121.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.121.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.121.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.121.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.122.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.122.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.122.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.122.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.122.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.122.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.123.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.123.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.123.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.123.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.123.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.123.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.124.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.124.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.124.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.124.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.124.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.124.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.125.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.125.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.125.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.125.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.125.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.125.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.126.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.126.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.126.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.126.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.126.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.126.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.127.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.127.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.127.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.127.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.127.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.127.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.13.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.13.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.13.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.13.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.13.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.13.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.14.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.14.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.14.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.14.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.14.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.14.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.15.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.15.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.15.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.15.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.15.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.15.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.16.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.16.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.16.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.16.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.16.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.16.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.17.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.17.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.17.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.17.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.17.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.17.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.18.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.18.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.18.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.18.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.18.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.18.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.19.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.19.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.19.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.19.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.19.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.19.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.2.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.2.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.2.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.2.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.2.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.2.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.20.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.20.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.20.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.20.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.20.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.20.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.21.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.21.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.21.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.21.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.21.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.21.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.22.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.22.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.22.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.22.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.22.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.22.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.23.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.23.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.23.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.23.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.23.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.23.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.24.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.24.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.24.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.24.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.24.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.24.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.25.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.25.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.25.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.25.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.25.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.25.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.26.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.26.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.26.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.26.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.26.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.26.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.27.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.27.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.27.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.27.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.27.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.27.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.28.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.28.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.28.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.28.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.28.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.28.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.29.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.29.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.29.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.29.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.29.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.29.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.3.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.3.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.3.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.3.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.3.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.3.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.30.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.30.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.30.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.30.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.30.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.30.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.31.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.31.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.31.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.31.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.31.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.31.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.32.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.32.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.32.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.32.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.32.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.32.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.33.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.33.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.33.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.33.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.33.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.33.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.34.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.34.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.34.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.34.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.34.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.34.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.35.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.35.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.35.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.35.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.35.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.35.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.36.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.36.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.36.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.36.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.36.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.36.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.37.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.37.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.37.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.37.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.37.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.37.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.38.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.38.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.38.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.38.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.38.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.38.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.39.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.39.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.39.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.39.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.39.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.39.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.4.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.4.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.4.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.4.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.4.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.4.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.40.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.40.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.40.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.40.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.40.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.40.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.41.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.41.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.41.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.41.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.41.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.41.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.42.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.42.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.42.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.42.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.42.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.42.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.43.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.43.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.43.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.43.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.43.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.43.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.44.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.44.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.44.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.44.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.44.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.44.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.45.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.45.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.45.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.45.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.45.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.45.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.46.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.46.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.46.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.46.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.46.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.46.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.47.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.47.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.47.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.47.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.47.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.47.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.48.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.48.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.48.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.48.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.48.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.48.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.49.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.49.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.49.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.49.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.49.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.49.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.5.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.5.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.5.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.5.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.5.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.5.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.50.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.50.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.50.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.50.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.50.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.50.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.51.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.51.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.51.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.51.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.51.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.51.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.52.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.52.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.52.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.52.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.52.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.52.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.53.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.53.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.53.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.53.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.53.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.53.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.54.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.54.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.54.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.54.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.54.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.54.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.55.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.55.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.55.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.55.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.55.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.55.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.56.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.56.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.56.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.56.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.56.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.56.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.57.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.57.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.57.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.57.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.57.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.57.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.58.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.58.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.58.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.58.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.58.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.58.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.59.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.59.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.59.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.59.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.59.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.59.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.6.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.6.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.6.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.6.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.6.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.6.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.60.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.60.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.60.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.60.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.60.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.60.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.61.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.61.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.61.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.61.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.61.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.61.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.62.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.62.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.62.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.62.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.62.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.62.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.63.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.63.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.63.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.63.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.63.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.63.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.64.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.64.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.64.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.64.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.64.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.64.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.65.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.65.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.65.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.65.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.65.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.65.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.66.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.66.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.66.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.66.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.66.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.66.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.67.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.67.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.67.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.67.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.67.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.67.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.68.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.68.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.68.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.68.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.68.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.68.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.69.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.69.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.69.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.69.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.69.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.69.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.7.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.7.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.7.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.7.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.7.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.7.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.70.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.70.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.70.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.70.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.70.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.70.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.71.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.71.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.71.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.71.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.71.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.71.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.72.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.72.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.72.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.72.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.72.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.72.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.73.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.73.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.73.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.73.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.73.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.73.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.74.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.74.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.74.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.74.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.74.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.74.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.75.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.75.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.75.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.75.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.75.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.75.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.76.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.76.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.76.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.76.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.76.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.76.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.77.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.77.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.77.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.77.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.77.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.77.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.78.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.78.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.78.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.78.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.78.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.78.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.79.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.79.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.79.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.79.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.79.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.79.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.8.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.8.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.8.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.8.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.8.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.8.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.80.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.80.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.80.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.80.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.80.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.80.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.81.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.81.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.81.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.81.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.81.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.81.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.82.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.82.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.82.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.82.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.82.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.82.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.83.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.83.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.83.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.83.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.83.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.83.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.84.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.84.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.84.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.84.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.84.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.84.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.85.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.85.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.85.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.85.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.85.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.85.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.86.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.86.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.86.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.86.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.86.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.86.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.87.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.87.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.87.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.87.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.87.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.87.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.88.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.88.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.88.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.88.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.88.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.88.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.89.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.89.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.89.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.89.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.89.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.89.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.9.down_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.9.down_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.9.gate_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.9.gate_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.9.up_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.9.up_proj.weight_scale": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.90.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.90.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.90.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.90.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.90.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.90.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.91.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.91.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.91.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.91.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.91.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.91.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.92.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.92.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.92.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.92.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.92.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.92.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.93.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.93.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.93.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.93.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.93.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.93.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.94.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.94.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.94.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.94.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.94.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.94.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.95.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.95.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.95.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.95.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.95.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.95.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.96.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.96.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.96.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.96.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.96.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.96.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.97.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.97.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.97.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.97.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.97.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.97.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.98.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.98.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.98.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.98.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.98.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.98.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.99.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.99.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.99.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.99.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.99.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.experts.99.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.gate.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.mlp.shared_experts.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.shared_experts.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.shared_experts.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.shared_experts.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.shared_experts.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.mlp.shared_experts.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.22.self_attn.k_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.self_attn.o_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.self_attn.q_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.22.self_attn.v_proj.weight": "model-00032-of-00046.safetensors", + "model.language_model.layers.23.input_layernorm.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.0.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.0.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.0.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.0.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.0.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.0.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.1.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.1.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.1.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.1.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.1.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.1.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.10.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.10.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.10.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.10.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.10.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.10.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.100.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.100.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.100.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.100.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.100.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.100.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.101.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.101.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.101.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.101.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.101.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.101.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.102.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.102.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.102.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.102.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.102.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.102.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.103.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.103.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.103.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.103.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.103.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.103.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.104.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.104.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.104.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.104.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.104.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.104.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.105.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.105.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.105.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.105.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.105.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.105.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.106.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.106.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.106.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.106.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.106.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.106.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.107.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.107.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.107.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.107.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.107.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.107.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.108.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.108.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.108.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.108.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.108.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.108.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.109.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.109.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.109.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.109.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.109.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.109.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.11.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.11.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.11.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.11.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.11.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.11.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.110.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.110.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.110.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.110.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.110.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.110.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.111.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.111.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.111.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.111.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.111.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.111.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.112.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.112.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.112.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.112.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.112.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.112.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.113.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.113.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.113.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.113.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.113.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.113.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.114.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.114.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.114.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.114.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.114.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.114.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.115.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.115.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.115.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.115.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.115.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.115.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.116.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.116.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.116.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.116.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.116.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.116.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.117.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.117.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.117.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.117.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.117.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.117.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.118.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.118.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.118.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.118.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.118.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.118.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.119.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.119.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.119.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.119.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.119.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.119.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.12.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.12.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.12.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.12.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.12.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.12.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.120.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.120.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.120.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.120.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.120.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.120.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.121.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.121.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.121.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.121.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.121.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.121.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.122.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.122.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.122.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.122.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.122.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.122.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.123.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.123.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.123.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.123.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.123.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.123.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.124.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.124.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.124.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.124.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.124.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.124.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.125.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.125.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.125.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.125.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.125.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.125.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.126.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.126.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.126.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.126.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.126.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.126.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.127.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.127.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.127.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.127.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.127.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.127.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.13.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.13.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.13.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.13.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.13.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.13.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.14.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.14.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.14.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.14.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.14.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.14.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.15.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.15.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.15.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.15.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.15.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.15.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.16.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.16.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.16.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.16.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.16.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.16.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.17.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.17.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.17.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.17.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.17.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.17.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.18.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.18.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.18.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.18.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.18.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.18.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.19.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.19.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.19.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.19.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.19.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.19.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.2.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.2.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.2.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.2.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.2.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.2.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.20.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.20.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.20.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.20.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.20.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.20.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.21.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.21.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.21.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.21.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.21.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.21.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.22.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.22.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.22.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.22.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.22.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.22.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.23.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.23.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.23.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.23.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.23.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.23.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.24.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.24.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.24.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.24.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.24.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.24.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.25.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.25.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.25.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.25.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.25.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.25.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.26.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.26.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.26.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.26.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.26.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.26.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.27.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.27.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.27.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.27.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.27.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.27.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.28.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.28.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.28.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.28.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.28.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.28.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.29.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.29.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.29.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.29.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.29.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.29.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.3.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.3.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.3.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.3.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.3.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.3.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.30.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.30.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.30.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.30.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.30.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.30.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.31.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.31.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.31.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.31.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.31.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.31.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.32.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.32.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.32.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.32.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.32.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.32.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.33.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.33.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.33.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.33.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.33.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.33.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.34.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.34.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.34.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.34.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.34.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.34.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.35.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.35.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.35.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.35.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.35.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.35.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.36.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.36.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.36.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.36.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.36.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.36.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.37.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.37.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.37.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.37.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.37.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.37.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.38.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.38.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.38.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.38.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.38.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.38.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.39.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.39.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.39.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.39.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.39.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.39.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.4.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.4.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.4.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.4.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.4.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.4.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.40.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.40.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.40.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.40.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.40.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.40.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.41.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.41.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.41.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.41.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.41.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.41.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.42.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.42.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.42.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.42.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.42.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.42.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.43.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.43.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.43.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.43.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.43.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.43.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.44.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.44.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.44.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.44.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.44.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.44.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.45.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.45.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.45.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.45.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.45.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.45.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.46.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.46.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.46.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.46.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.46.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.46.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.47.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.47.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.47.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.47.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.47.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.47.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.48.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.48.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.48.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.48.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.48.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.48.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.49.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.49.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.49.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.49.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.49.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.49.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.5.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.5.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.5.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.5.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.5.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.5.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.50.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.50.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.50.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.50.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.50.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.50.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.51.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.51.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.51.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.51.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.51.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.51.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.52.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.52.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.52.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.52.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.52.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.52.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.53.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.53.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.53.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.53.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.53.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.53.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.54.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.54.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.54.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.54.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.54.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.54.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.55.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.55.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.55.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.55.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.55.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.55.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.56.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.56.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.56.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.56.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.56.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.56.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.57.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.57.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.57.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.57.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.57.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.57.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.58.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.58.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.58.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.58.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.58.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.58.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.59.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.59.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.59.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.59.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.59.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.59.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.6.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.6.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.6.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.6.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.6.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.6.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.60.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.60.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.60.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.60.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.60.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.60.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.61.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.61.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.61.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.61.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.61.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.61.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.62.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.62.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.62.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.62.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.62.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.62.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.63.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.63.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.63.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.63.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.63.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.63.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.64.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.64.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.64.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.64.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.64.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.64.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.65.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.65.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.65.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.65.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.65.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.65.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.66.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.66.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.66.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.66.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.66.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.66.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.67.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.67.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.67.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.67.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.67.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.67.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.68.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.68.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.68.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.68.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.68.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.68.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.69.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.69.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.69.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.69.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.69.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.69.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.7.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.7.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.7.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.7.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.7.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.7.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.70.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.70.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.70.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.70.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.70.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.70.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.71.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.71.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.71.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.71.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.71.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.71.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.72.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.72.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.72.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.72.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.72.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.72.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.73.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.73.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.73.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.73.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.73.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.73.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.74.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.74.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.74.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.74.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.74.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.74.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.75.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.75.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.75.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.75.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.75.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.75.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.76.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.76.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.76.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.76.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.76.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.76.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.77.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.77.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.77.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.77.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.77.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.77.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.78.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.78.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.78.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.78.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.78.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.78.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.79.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.79.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.79.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.79.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.79.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.79.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.8.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.8.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.8.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.8.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.8.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.8.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.80.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.80.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.80.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.80.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.80.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.80.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.81.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.81.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.81.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.81.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.81.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.81.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.82.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.82.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.82.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.82.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.82.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.82.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.83.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.83.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.83.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.83.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.83.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.83.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.84.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.84.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.84.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.84.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.84.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.84.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.85.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.85.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.85.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.85.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.85.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.85.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.86.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.86.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.86.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.86.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.86.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.86.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.87.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.87.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.87.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.87.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.87.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.87.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.88.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.88.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.88.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.88.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.88.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.88.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.89.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.89.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.89.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.89.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.89.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.89.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.9.down_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.9.down_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.9.gate_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.9.gate_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.9.up_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.9.up_proj.weight_scale": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.90.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.90.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.90.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.90.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.90.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.90.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.91.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.91.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.91.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.91.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.91.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.91.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.92.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.92.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.92.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.92.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.92.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.92.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.93.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.93.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.93.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.93.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.93.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.93.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.94.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.94.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.94.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.94.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.94.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.94.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.95.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.95.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.95.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.95.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.95.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.95.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.96.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.96.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.96.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.96.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.96.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.96.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.97.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.97.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.97.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.97.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.97.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.97.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.98.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.98.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.98.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.98.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.98.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.98.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.99.down_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.99.down_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.99.gate_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.99.gate_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.99.up_proj.weight": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.experts.99.up_proj.weight_scale": "model-00034-of-00046.safetensors", + "model.language_model.layers.23.mlp.gate.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.mlp.shared_experts.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.23.mlp.shared_experts.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.23.mlp.shared_experts.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.23.mlp.shared_experts.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.23.mlp.shared_experts.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.23.mlp.shared_experts.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.23.self_attn.k_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.self_attn.o_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.self_attn.q_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.23.self_attn.v_proj.weight": "model-00033-of-00046.safetensors", + "model.language_model.layers.24.input_layernorm.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.0.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.0.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.0.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.0.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.0.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.0.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.1.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.1.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.1.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.1.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.1.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.1.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.10.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.10.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.10.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.10.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.10.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.10.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.100.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.100.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.100.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.100.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.100.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.100.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.101.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.101.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.101.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.101.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.101.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.101.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.102.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.102.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.102.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.102.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.102.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.102.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.103.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.103.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.103.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.103.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.103.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.103.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.104.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.104.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.104.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.104.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.104.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.104.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.105.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.105.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.105.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.105.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.105.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.105.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.106.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.106.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.106.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.106.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.106.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.106.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.107.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.107.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.107.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.107.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.107.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.107.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.108.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.108.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.108.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.108.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.108.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.108.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.109.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.109.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.109.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.109.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.109.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.109.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.11.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.11.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.11.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.11.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.11.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.11.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.110.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.110.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.110.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.110.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.110.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.110.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.111.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.111.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.111.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.111.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.111.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.111.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.112.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.112.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.112.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.112.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.112.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.112.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.113.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.113.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.113.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.113.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.113.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.113.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.114.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.114.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.114.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.114.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.114.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.114.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.115.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.115.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.115.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.115.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.115.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.115.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.116.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.116.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.116.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.116.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.116.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.116.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.117.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.117.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.117.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.117.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.117.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.117.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.118.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.118.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.118.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.118.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.118.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.118.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.119.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.119.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.119.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.119.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.119.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.119.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.12.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.12.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.12.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.12.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.12.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.12.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.120.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.120.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.120.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.120.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.120.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.120.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.121.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.121.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.121.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.121.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.121.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.121.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.122.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.122.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.122.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.122.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.122.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.122.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.123.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.123.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.123.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.123.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.123.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.123.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.124.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.124.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.124.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.124.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.124.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.124.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.125.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.125.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.125.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.125.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.125.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.125.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.126.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.126.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.126.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.126.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.126.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.126.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.127.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.127.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.127.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.127.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.127.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.127.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.13.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.13.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.13.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.13.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.13.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.13.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.14.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.14.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.14.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.14.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.14.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.14.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.15.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.15.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.15.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.15.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.15.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.15.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.16.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.16.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.16.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.16.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.16.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.16.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.17.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.17.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.17.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.17.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.17.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.17.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.18.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.18.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.18.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.18.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.18.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.18.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.19.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.19.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.19.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.19.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.19.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.19.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.2.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.2.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.2.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.2.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.2.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.2.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.20.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.20.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.20.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.20.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.20.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.20.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.21.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.21.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.21.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.21.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.21.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.21.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.22.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.22.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.22.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.22.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.22.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.22.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.23.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.23.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.23.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.23.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.23.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.23.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.24.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.24.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.24.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.24.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.24.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.24.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.25.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.25.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.25.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.25.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.25.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.25.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.26.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.26.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.26.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.26.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.26.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.26.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.27.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.27.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.27.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.27.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.27.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.27.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.28.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.28.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.28.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.28.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.28.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.28.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.29.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.29.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.29.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.29.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.29.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.29.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.3.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.3.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.3.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.3.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.3.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.3.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.30.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.30.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.30.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.30.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.30.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.30.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.31.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.31.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.31.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.31.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.31.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.31.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.32.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.32.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.32.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.32.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.32.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.32.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.33.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.33.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.33.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.33.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.33.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.33.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.34.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.34.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.34.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.34.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.34.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.34.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.35.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.35.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.35.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.35.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.35.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.35.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.36.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.36.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.36.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.36.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.36.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.36.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.37.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.37.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.37.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.37.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.37.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.37.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.38.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.38.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.38.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.38.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.38.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.38.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.39.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.39.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.39.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.39.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.39.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.39.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.4.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.4.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.4.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.4.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.4.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.4.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.40.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.40.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.40.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.40.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.40.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.40.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.41.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.41.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.41.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.41.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.41.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.41.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.42.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.42.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.42.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.42.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.42.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.42.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.43.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.43.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.43.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.43.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.43.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.43.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.44.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.44.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.44.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.44.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.44.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.44.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.45.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.45.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.45.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.45.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.45.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.45.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.46.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.46.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.46.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.46.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.46.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.46.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.47.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.47.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.47.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.47.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.47.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.47.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.48.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.48.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.48.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.48.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.48.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.48.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.49.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.49.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.49.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.49.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.49.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.49.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.5.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.5.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.5.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.5.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.5.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.5.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.50.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.50.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.50.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.50.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.50.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.50.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.51.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.51.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.51.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.51.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.51.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.51.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.52.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.52.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.52.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.52.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.52.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.52.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.53.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.53.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.53.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.53.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.53.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.53.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.54.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.54.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.54.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.54.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.54.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.54.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.55.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.55.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.55.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.55.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.55.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.55.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.56.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.56.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.56.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.56.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.56.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.56.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.57.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.57.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.57.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.57.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.57.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.57.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.58.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.58.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.58.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.58.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.58.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.58.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.59.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.59.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.59.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.59.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.59.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.59.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.6.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.6.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.6.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.6.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.6.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.6.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.60.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.60.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.60.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.60.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.60.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.60.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.61.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.61.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.61.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.61.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.61.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.61.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.62.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.62.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.62.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.62.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.62.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.62.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.63.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.63.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.63.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.63.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.63.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.63.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.64.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.64.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.64.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.64.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.64.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.64.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.65.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.65.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.65.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.65.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.65.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.65.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.66.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.66.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.66.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.66.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.66.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.66.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.67.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.67.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.67.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.67.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.67.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.67.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.68.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.68.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.68.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.68.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.68.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.68.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.69.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.69.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.69.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.69.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.69.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.69.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.7.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.7.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.7.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.7.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.7.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.7.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.70.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.70.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.70.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.70.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.70.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.70.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.71.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.71.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.71.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.71.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.71.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.71.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.72.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.72.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.72.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.72.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.72.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.72.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.73.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.73.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.73.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.73.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.73.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.73.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.74.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.74.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.74.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.74.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.74.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.74.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.75.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.75.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.75.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.75.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.75.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.75.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.76.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.76.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.76.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.76.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.76.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.76.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.77.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.77.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.77.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.77.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.77.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.77.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.78.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.78.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.78.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.78.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.78.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.78.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.79.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.79.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.79.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.79.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.79.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.79.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.8.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.8.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.8.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.8.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.8.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.8.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.80.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.80.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.80.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.80.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.80.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.80.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.81.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.81.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.81.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.81.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.81.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.81.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.82.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.82.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.82.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.82.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.82.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.82.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.83.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.83.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.83.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.83.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.83.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.83.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.84.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.84.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.84.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.84.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.84.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.84.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.85.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.85.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.85.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.85.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.85.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.85.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.86.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.86.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.86.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.86.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.86.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.86.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.87.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.87.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.87.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.87.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.87.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.87.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.88.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.88.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.88.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.88.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.88.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.88.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.89.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.89.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.89.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.89.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.89.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.89.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.9.down_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.9.down_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.9.gate_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.9.gate_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.9.up_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.9.up_proj.weight_scale": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.90.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.90.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.90.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.90.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.90.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.90.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.91.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.91.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.91.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.91.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.91.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.91.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.92.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.92.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.92.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.92.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.92.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.92.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.93.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.93.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.93.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.93.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.93.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.93.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.94.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.94.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.94.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.94.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.94.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.94.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.95.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.95.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.95.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.95.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.95.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.95.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.96.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.96.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.96.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.96.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.96.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.96.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.97.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.97.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.97.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.97.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.97.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.97.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.98.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.98.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.98.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.98.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.98.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.98.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.99.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.99.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.99.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.99.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.99.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.experts.99.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.gate.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.mlp.shared_experts.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.shared_experts.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.shared_experts.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.shared_experts.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.shared_experts.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.mlp.shared_experts.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.24.self_attn.k_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.self_attn.o_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.self_attn.q_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.24.self_attn.v_proj.weight": "model-00035-of-00046.safetensors", + "model.language_model.layers.25.input_layernorm.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.0.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.0.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.0.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.0.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.0.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.0.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.1.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.1.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.1.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.1.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.1.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.1.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.10.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.10.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.10.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.10.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.10.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.10.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.100.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.100.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.100.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.100.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.100.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.100.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.101.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.101.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.101.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.101.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.101.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.101.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.102.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.102.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.102.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.102.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.102.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.102.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.103.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.103.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.103.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.103.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.103.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.103.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.104.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.104.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.104.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.104.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.104.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.104.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.105.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.105.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.105.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.105.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.105.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.105.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.106.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.106.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.106.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.106.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.106.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.106.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.107.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.107.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.107.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.107.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.107.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.107.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.108.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.108.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.108.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.108.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.108.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.108.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.109.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.109.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.109.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.109.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.109.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.109.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.11.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.11.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.11.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.11.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.11.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.11.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.110.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.110.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.110.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.110.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.110.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.110.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.111.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.111.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.111.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.111.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.111.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.111.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.112.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.112.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.112.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.112.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.112.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.112.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.113.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.113.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.113.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.113.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.113.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.113.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.114.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.114.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.114.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.114.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.114.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.114.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.115.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.115.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.115.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.115.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.115.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.115.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.116.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.116.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.116.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.116.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.116.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.116.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.117.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.117.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.117.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.117.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.117.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.117.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.118.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.118.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.118.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.118.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.118.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.118.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.119.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.119.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.119.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.119.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.119.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.119.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.12.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.12.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.12.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.12.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.12.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.12.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.120.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.120.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.120.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.120.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.120.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.120.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.121.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.121.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.121.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.121.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.121.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.121.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.122.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.122.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.122.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.122.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.122.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.122.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.123.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.123.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.123.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.123.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.123.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.123.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.124.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.124.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.124.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.124.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.124.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.124.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.125.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.125.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.125.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.125.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.125.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.125.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.126.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.126.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.126.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.126.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.126.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.126.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.127.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.127.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.127.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.127.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.127.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.127.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.13.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.13.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.13.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.13.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.13.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.13.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.14.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.14.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.14.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.14.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.14.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.14.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.15.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.15.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.15.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.15.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.15.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.15.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.16.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.16.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.16.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.16.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.16.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.16.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.17.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.17.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.17.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.17.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.17.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.17.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.18.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.18.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.18.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.18.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.18.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.18.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.19.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.19.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.19.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.19.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.19.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.19.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.2.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.2.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.2.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.2.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.2.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.2.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.20.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.20.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.20.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.20.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.20.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.20.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.21.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.21.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.21.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.21.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.21.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.21.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.22.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.22.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.22.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.22.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.22.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.22.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.23.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.23.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.23.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.23.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.23.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.23.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.24.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.24.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.24.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.24.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.24.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.24.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.25.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.25.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.25.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.25.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.25.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.25.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.26.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.26.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.26.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.26.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.26.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.26.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.27.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.27.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.27.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.27.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.27.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.27.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.28.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.28.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.28.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.28.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.28.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.28.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.29.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.29.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.29.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.29.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.29.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.29.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.3.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.3.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.3.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.3.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.3.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.3.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.30.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.30.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.30.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.30.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.30.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.30.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.31.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.31.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.31.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.31.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.31.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.31.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.32.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.32.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.32.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.32.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.32.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.32.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.33.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.33.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.33.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.33.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.33.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.33.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.34.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.34.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.34.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.34.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.34.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.34.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.35.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.35.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.35.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.35.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.35.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.35.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.36.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.36.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.36.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.36.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.36.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.36.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.37.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.37.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.37.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.37.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.37.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.37.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.38.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.38.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.38.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.38.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.38.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.38.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.39.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.39.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.39.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.39.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.39.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.39.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.4.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.4.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.4.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.4.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.4.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.4.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.40.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.40.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.40.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.40.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.40.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.40.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.41.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.41.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.41.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.41.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.41.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.41.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.42.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.42.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.42.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.42.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.42.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.42.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.43.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.43.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.43.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.43.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.43.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.43.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.44.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.44.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.44.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.44.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.44.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.44.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.45.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.45.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.45.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.45.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.45.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.45.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.46.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.46.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.46.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.46.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.46.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.46.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.47.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.47.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.47.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.47.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.47.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.47.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.48.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.48.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.48.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.48.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.48.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.48.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.49.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.49.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.49.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.49.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.49.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.49.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.5.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.5.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.5.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.5.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.5.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.5.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.50.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.50.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.50.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.50.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.50.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.50.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.51.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.51.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.51.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.51.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.51.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.51.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.52.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.52.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.52.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.52.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.52.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.52.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.53.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.53.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.53.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.53.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.53.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.53.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.54.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.54.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.54.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.54.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.54.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.54.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.55.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.55.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.55.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.55.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.55.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.55.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.56.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.56.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.56.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.56.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.56.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.56.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.57.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.57.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.57.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.57.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.57.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.57.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.58.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.58.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.58.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.58.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.58.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.58.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.59.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.59.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.59.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.59.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.59.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.59.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.6.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.6.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.6.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.6.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.6.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.6.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.60.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.60.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.60.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.60.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.60.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.60.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.61.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.61.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.61.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.61.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.61.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.61.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.62.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.62.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.62.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.62.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.62.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.62.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.63.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.63.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.63.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.63.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.63.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.63.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.64.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.64.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.64.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.64.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.64.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.64.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.65.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.65.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.65.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.65.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.65.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.65.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.66.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.66.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.66.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.66.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.66.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.66.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.67.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.67.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.67.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.67.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.67.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.67.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.68.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.68.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.68.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.68.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.68.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.68.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.69.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.69.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.69.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.69.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.69.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.69.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.7.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.7.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.7.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.7.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.7.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.7.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.70.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.70.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.70.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.70.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.70.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.70.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.71.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.71.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.71.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.71.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.71.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.71.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.72.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.72.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.72.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.72.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.72.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.72.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.73.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.73.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.73.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.73.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.73.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.73.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.74.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.74.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.74.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.74.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.74.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.74.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.75.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.75.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.75.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.75.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.75.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.75.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.76.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.76.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.76.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.76.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.76.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.76.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.77.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.77.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.77.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.77.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.77.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.77.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.78.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.78.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.78.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.78.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.78.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.78.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.79.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.79.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.79.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.79.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.79.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.79.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.8.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.8.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.8.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.8.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.8.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.8.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.80.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.80.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.80.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.80.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.80.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.80.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.81.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.81.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.81.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.81.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.81.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.81.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.82.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.82.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.82.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.82.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.82.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.82.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.83.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.83.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.83.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.83.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.83.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.83.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.84.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.84.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.84.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.84.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.84.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.84.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.85.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.85.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.85.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.85.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.85.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.85.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.86.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.86.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.86.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.86.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.86.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.86.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.87.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.87.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.87.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.87.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.87.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.87.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.88.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.88.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.88.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.88.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.88.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.88.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.89.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.89.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.89.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.89.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.89.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.89.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.9.down_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.9.down_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.9.gate_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.9.gate_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.9.up_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.9.up_proj.weight_scale": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.90.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.90.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.90.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.90.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.90.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.90.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.91.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.91.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.91.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.91.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.91.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.91.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.92.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.92.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.92.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.92.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.92.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.92.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.93.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.93.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.93.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.93.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.93.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.93.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.94.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.94.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.94.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.94.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.94.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.94.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.95.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.95.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.95.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.95.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.95.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.95.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.96.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.96.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.96.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.96.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.96.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.96.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.97.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.97.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.97.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.97.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.97.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.97.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.98.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.98.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.98.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.98.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.98.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.98.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.99.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.99.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.99.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.99.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.99.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.experts.99.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.gate.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.mlp.shared_experts.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.shared_experts.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.shared_experts.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.shared_experts.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.shared_experts.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.mlp.shared_experts.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.25.self_attn.k_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.self_attn.o_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.self_attn.q_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.25.self_attn.v_proj.weight": "model-00036-of-00046.safetensors", + "model.language_model.layers.26.input_layernorm.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.0.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.0.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.0.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.0.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.0.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.0.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.1.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.1.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.1.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.1.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.1.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.1.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.10.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.10.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.10.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.10.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.10.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.10.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.100.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.100.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.100.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.100.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.100.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.100.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.101.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.101.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.101.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.101.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.101.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.101.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.102.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.102.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.102.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.102.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.102.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.102.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.103.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.103.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.103.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.103.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.103.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.103.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.104.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.104.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.104.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.104.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.104.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.104.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.105.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.105.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.105.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.105.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.105.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.105.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.106.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.106.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.106.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.106.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.106.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.106.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.107.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.107.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.107.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.107.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.107.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.107.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.108.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.108.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.108.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.108.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.108.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.108.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.109.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.109.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.109.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.109.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.109.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.109.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.11.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.11.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.11.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.11.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.11.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.11.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.110.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.110.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.110.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.110.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.110.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.110.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.111.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.111.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.111.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.111.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.111.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.111.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.112.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.112.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.112.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.112.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.112.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.112.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.113.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.113.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.113.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.113.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.113.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.113.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.114.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.114.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.114.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.114.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.114.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.114.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.115.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.115.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.115.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.115.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.115.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.115.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.116.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.116.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.116.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.116.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.116.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.116.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.117.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.117.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.117.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.117.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.117.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.117.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.118.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.118.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.118.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.118.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.118.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.118.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.119.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.119.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.119.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.119.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.119.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.119.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.12.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.12.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.12.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.12.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.12.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.12.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.120.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.120.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.120.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.120.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.120.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.120.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.121.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.121.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.121.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.121.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.121.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.121.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.122.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.122.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.122.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.122.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.122.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.122.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.123.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.123.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.123.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.123.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.123.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.123.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.124.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.124.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.124.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.124.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.124.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.124.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.125.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.125.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.125.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.125.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.125.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.125.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.126.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.126.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.126.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.126.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.126.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.126.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.127.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.127.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.127.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.127.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.127.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.127.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.13.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.13.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.13.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.13.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.13.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.13.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.14.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.14.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.14.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.14.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.14.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.14.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.15.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.15.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.15.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.15.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.15.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.15.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.16.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.16.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.16.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.16.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.16.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.16.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.17.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.17.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.17.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.17.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.17.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.17.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.18.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.18.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.18.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.18.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.18.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.18.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.19.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.19.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.19.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.19.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.19.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.19.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.2.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.2.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.2.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.2.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.2.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.2.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.20.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.20.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.20.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.20.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.20.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.20.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.21.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.21.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.21.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.21.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.21.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.21.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.22.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.22.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.22.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.22.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.22.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.22.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.23.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.23.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.23.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.23.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.23.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.23.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.24.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.24.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.24.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.24.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.24.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.24.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.25.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.25.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.25.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.25.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.25.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.25.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.26.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.26.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.26.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.26.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.26.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.26.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.27.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.27.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.27.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.27.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.27.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.27.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.28.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.28.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.28.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.28.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.28.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.28.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.29.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.29.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.29.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.29.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.29.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.29.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.3.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.3.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.3.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.3.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.3.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.3.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.30.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.30.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.30.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.30.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.30.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.30.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.31.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.31.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.31.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.31.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.31.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.31.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.32.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.32.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.32.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.32.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.32.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.32.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.33.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.33.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.33.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.33.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.33.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.33.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.34.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.34.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.34.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.34.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.34.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.34.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.35.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.35.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.35.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.35.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.35.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.35.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.36.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.36.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.36.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.36.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.36.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.36.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.37.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.37.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.37.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.37.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.37.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.37.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.38.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.38.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.38.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.38.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.38.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.38.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.39.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.39.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.39.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.39.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.39.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.39.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.4.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.4.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.4.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.4.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.4.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.4.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.40.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.40.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.40.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.40.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.40.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.40.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.41.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.41.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.41.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.41.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.41.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.41.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.42.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.42.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.42.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.42.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.42.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.42.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.43.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.43.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.43.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.43.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.43.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.43.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.44.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.44.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.44.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.44.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.44.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.44.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.45.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.45.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.45.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.45.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.45.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.45.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.46.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.46.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.46.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.46.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.46.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.46.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.47.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.47.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.47.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.47.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.47.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.47.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.48.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.48.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.48.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.48.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.48.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.48.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.49.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.49.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.49.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.49.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.49.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.49.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.5.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.5.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.5.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.5.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.5.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.5.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.50.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.50.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.50.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.50.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.50.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.50.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.51.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.51.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.51.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.51.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.51.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.51.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.52.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.52.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.52.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.52.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.52.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.52.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.53.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.53.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.53.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.53.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.53.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.53.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.54.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.54.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.54.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.54.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.54.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.54.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.55.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.55.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.55.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.55.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.55.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.55.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.56.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.56.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.56.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.56.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.56.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.56.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.57.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.57.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.57.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.57.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.57.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.57.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.58.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.58.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.58.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.58.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.58.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.58.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.59.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.59.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.59.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.59.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.59.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.59.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.6.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.6.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.6.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.6.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.6.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.6.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.60.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.60.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.60.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.60.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.60.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.60.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.61.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.61.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.61.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.61.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.61.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.61.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.62.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.62.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.62.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.62.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.62.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.62.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.63.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.63.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.63.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.63.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.63.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.63.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.64.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.64.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.64.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.64.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.64.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.64.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.65.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.65.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.65.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.65.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.65.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.65.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.66.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.66.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.66.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.66.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.66.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.66.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.67.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.67.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.67.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.67.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.67.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.67.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.68.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.68.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.68.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.68.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.68.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.68.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.69.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.69.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.69.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.69.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.69.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.69.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.7.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.7.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.7.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.7.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.7.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.7.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.70.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.70.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.70.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.70.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.70.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.70.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.71.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.71.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.71.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.71.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.71.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.71.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.72.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.72.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.72.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.72.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.72.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.72.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.73.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.73.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.73.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.73.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.73.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.73.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.74.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.74.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.74.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.74.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.74.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.74.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.75.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.75.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.75.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.75.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.75.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.75.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.76.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.76.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.76.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.76.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.76.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.76.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.77.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.77.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.77.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.77.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.77.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.77.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.78.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.78.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.78.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.78.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.78.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.78.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.79.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.79.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.79.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.79.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.79.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.79.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.8.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.8.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.8.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.8.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.8.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.8.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.80.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.80.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.80.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.80.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.80.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.80.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.81.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.81.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.81.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.81.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.81.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.81.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.82.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.82.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.82.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.82.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.82.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.82.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.83.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.83.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.83.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.83.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.83.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.83.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.84.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.84.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.84.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.84.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.84.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.84.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.85.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.85.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.85.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.85.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.85.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.85.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.86.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.86.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.86.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.86.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.86.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.86.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.87.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.87.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.87.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.87.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.87.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.87.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.88.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.88.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.88.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.88.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.88.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.88.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.89.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.89.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.89.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.89.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.89.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.89.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.9.down_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.9.down_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.9.gate_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.9.gate_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.9.up_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.9.up_proj.weight_scale": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.90.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.90.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.90.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.90.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.90.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.90.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.91.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.91.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.91.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.91.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.91.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.91.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.92.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.92.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.92.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.92.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.92.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.92.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.93.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.93.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.93.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.93.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.93.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.93.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.94.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.94.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.94.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.94.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.94.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.94.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.95.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.95.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.95.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.95.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.95.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.95.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.96.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.96.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.96.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.96.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.96.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.96.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.97.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.97.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.97.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.97.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.97.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.97.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.98.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.98.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.98.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.98.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.98.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.98.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.99.down_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.99.down_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.99.gate_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.99.gate_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.99.up_proj.weight": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.experts.99.up_proj.weight_scale": "model-00038-of-00046.safetensors", + "model.language_model.layers.26.mlp.gate.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.mlp.shared_experts.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.shared_experts.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.shared_experts.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.shared_experts.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.shared_experts.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.mlp.shared_experts.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.26.self_attn.k_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.self_attn.o_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.self_attn.q_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.26.self_attn.v_proj.weight": "model-00037-of-00046.safetensors", + "model.language_model.layers.27.input_layernorm.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.0.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.0.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.0.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.0.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.0.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.0.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.1.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.1.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.1.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.1.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.1.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.1.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.10.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.10.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.10.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.10.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.10.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.10.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.100.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.100.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.100.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.100.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.100.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.100.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.101.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.101.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.101.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.101.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.101.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.101.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.102.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.102.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.102.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.102.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.102.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.102.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.103.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.103.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.103.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.103.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.103.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.103.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.104.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.104.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.104.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.104.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.104.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.104.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.105.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.105.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.105.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.105.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.105.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.105.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.106.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.106.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.106.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.106.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.106.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.106.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.107.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.107.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.107.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.107.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.107.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.107.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.108.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.108.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.108.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.108.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.108.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.108.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.109.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.109.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.109.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.109.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.109.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.109.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.11.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.11.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.11.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.11.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.11.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.11.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.110.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.110.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.110.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.110.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.110.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.110.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.111.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.111.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.111.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.111.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.111.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.111.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.112.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.112.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.112.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.112.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.112.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.112.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.113.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.113.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.113.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.113.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.113.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.113.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.114.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.114.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.114.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.114.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.114.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.114.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.115.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.115.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.115.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.115.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.115.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.115.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.116.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.116.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.116.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.116.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.116.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.116.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.117.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.117.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.117.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.117.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.117.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.117.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.118.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.118.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.118.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.118.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.118.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.118.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.119.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.119.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.119.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.119.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.119.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.119.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.12.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.12.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.12.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.12.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.12.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.12.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.120.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.120.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.120.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.120.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.120.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.120.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.121.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.121.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.121.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.121.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.121.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.121.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.122.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.122.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.122.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.122.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.122.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.122.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.123.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.123.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.123.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.123.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.123.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.123.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.124.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.124.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.124.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.124.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.124.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.124.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.125.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.125.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.125.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.125.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.125.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.125.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.126.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.126.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.126.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.126.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.126.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.126.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.127.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.127.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.127.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.127.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.127.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.127.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.13.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.13.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.13.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.13.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.13.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.13.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.14.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.14.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.14.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.14.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.14.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.14.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.15.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.15.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.15.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.15.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.15.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.15.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.16.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.16.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.16.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.16.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.16.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.16.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.17.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.17.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.17.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.17.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.17.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.17.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.18.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.18.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.18.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.18.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.18.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.18.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.19.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.19.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.19.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.19.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.19.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.19.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.2.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.2.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.2.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.2.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.2.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.2.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.20.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.20.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.20.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.20.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.20.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.20.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.21.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.21.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.21.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.21.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.21.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.21.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.22.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.22.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.22.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.22.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.22.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.22.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.23.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.23.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.23.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.23.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.23.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.23.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.24.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.24.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.24.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.24.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.24.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.24.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.25.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.25.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.25.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.25.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.25.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.25.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.26.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.26.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.26.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.26.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.26.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.26.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.27.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.27.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.27.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.27.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.27.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.27.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.28.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.28.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.28.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.28.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.28.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.28.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.29.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.29.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.29.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.29.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.29.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.29.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.3.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.3.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.3.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.3.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.3.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.3.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.30.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.30.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.30.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.30.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.30.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.30.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.31.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.31.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.31.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.31.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.31.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.31.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.32.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.32.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.32.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.32.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.32.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.32.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.33.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.33.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.33.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.33.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.33.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.33.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.34.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.34.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.34.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.34.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.34.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.34.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.35.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.35.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.35.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.35.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.35.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.35.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.36.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.36.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.36.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.36.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.36.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.36.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.37.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.37.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.37.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.37.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.37.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.37.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.38.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.38.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.38.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.38.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.38.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.38.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.39.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.39.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.39.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.39.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.39.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.39.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.4.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.4.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.4.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.4.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.4.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.4.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.40.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.40.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.40.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.40.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.40.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.40.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.41.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.41.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.41.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.41.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.41.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.41.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.42.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.42.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.42.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.42.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.42.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.42.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.43.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.43.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.43.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.43.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.43.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.43.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.44.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.44.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.44.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.44.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.44.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.44.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.45.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.45.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.45.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.45.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.45.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.45.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.46.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.46.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.46.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.46.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.46.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.46.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.47.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.47.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.47.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.47.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.47.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.47.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.48.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.48.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.48.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.48.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.48.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.48.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.49.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.49.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.49.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.49.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.49.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.49.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.5.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.5.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.5.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.5.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.5.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.5.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.50.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.50.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.50.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.50.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.50.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.50.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.51.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.51.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.51.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.51.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.51.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.51.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.52.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.52.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.52.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.52.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.52.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.52.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.53.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.53.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.53.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.53.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.53.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.53.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.54.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.54.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.54.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.54.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.54.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.54.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.55.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.55.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.55.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.55.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.55.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.55.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.56.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.56.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.56.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.56.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.56.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.56.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.57.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.57.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.57.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.57.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.57.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.57.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.58.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.58.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.58.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.58.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.58.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.58.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.59.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.59.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.59.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.59.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.59.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.59.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.6.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.6.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.6.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.6.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.6.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.6.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.60.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.60.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.60.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.60.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.60.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.60.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.61.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.61.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.61.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.61.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.61.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.61.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.62.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.62.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.62.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.62.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.62.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.62.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.63.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.63.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.63.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.63.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.63.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.63.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.64.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.64.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.64.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.64.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.64.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.64.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.65.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.65.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.65.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.65.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.65.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.65.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.66.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.66.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.66.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.66.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.66.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.66.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.67.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.67.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.67.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.67.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.67.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.67.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.68.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.68.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.68.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.68.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.68.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.68.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.69.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.69.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.69.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.69.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.69.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.69.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.7.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.7.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.7.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.7.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.7.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.7.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.70.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.70.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.70.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.70.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.70.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.70.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.71.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.71.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.71.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.71.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.71.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.71.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.72.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.72.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.72.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.72.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.72.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.72.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.73.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.73.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.73.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.73.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.73.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.73.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.74.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.74.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.74.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.74.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.74.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.74.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.75.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.75.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.75.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.75.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.75.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.75.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.76.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.76.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.76.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.76.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.76.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.76.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.77.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.77.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.77.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.77.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.77.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.77.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.78.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.78.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.78.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.78.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.78.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.78.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.79.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.79.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.79.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.79.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.79.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.79.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.8.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.8.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.8.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.8.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.8.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.8.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.80.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.80.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.80.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.80.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.80.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.80.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.81.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.81.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.81.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.81.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.81.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.81.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.82.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.82.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.82.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.82.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.82.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.82.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.83.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.83.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.83.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.83.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.83.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.83.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.84.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.84.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.84.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.84.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.84.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.84.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.85.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.85.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.85.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.85.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.85.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.85.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.86.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.86.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.86.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.86.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.86.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.86.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.87.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.87.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.87.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.87.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.87.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.87.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.88.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.88.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.88.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.88.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.88.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.88.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.89.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.89.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.89.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.89.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.89.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.89.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.9.down_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.9.down_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.9.gate_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.9.gate_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.9.up_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.9.up_proj.weight_scale": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.90.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.90.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.90.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.90.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.90.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.90.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.91.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.91.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.91.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.91.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.91.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.91.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.92.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.92.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.92.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.92.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.92.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.92.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.93.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.93.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.93.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.93.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.93.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.93.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.94.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.94.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.94.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.94.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.94.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.94.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.95.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.95.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.95.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.95.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.95.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.95.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.96.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.96.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.96.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.96.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.96.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.96.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.97.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.97.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.97.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.97.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.97.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.97.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.98.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.98.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.98.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.98.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.98.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.98.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.99.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.99.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.99.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.99.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.99.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.experts.99.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.gate.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.mlp.shared_experts.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.shared_experts.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.shared_experts.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.shared_experts.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.shared_experts.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.mlp.shared_experts.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.27.self_attn.k_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.self_attn.o_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.self_attn.q_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.27.self_attn.v_proj.weight": "model-00039-of-00046.safetensors", + "model.language_model.layers.28.input_layernorm.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.0.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.0.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.0.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.0.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.0.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.0.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.1.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.1.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.1.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.1.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.1.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.1.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.10.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.10.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.10.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.10.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.10.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.10.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.100.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.100.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.100.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.100.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.100.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.100.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.101.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.101.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.101.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.101.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.101.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.101.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.102.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.102.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.102.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.102.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.102.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.102.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.103.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.103.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.103.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.103.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.103.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.103.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.104.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.104.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.104.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.104.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.104.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.104.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.105.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.105.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.105.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.105.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.105.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.105.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.106.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.106.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.106.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.106.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.106.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.106.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.107.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.107.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.107.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.107.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.107.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.107.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.108.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.108.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.108.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.108.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.108.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.108.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.109.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.109.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.109.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.109.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.109.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.109.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.11.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.11.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.11.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.11.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.11.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.11.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.110.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.110.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.110.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.110.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.110.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.110.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.111.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.111.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.111.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.111.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.111.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.111.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.112.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.112.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.112.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.112.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.112.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.112.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.113.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.113.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.113.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.113.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.113.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.113.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.114.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.114.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.114.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.114.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.114.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.114.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.115.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.115.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.115.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.115.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.115.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.115.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.116.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.116.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.116.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.116.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.116.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.116.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.117.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.117.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.117.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.117.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.117.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.117.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.118.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.118.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.118.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.118.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.118.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.118.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.119.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.119.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.119.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.119.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.119.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.119.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.12.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.12.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.12.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.12.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.12.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.12.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.120.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.120.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.120.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.120.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.120.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.120.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.121.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.121.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.121.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.121.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.121.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.121.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.122.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.122.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.122.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.122.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.122.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.122.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.123.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.123.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.123.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.123.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.123.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.123.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.124.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.124.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.124.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.124.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.124.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.124.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.125.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.125.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.125.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.125.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.125.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.125.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.126.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.126.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.126.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.126.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.126.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.126.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.127.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.127.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.127.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.127.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.127.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.127.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.13.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.13.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.13.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.13.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.13.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.13.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.14.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.14.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.14.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.14.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.14.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.14.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.15.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.15.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.15.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.15.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.15.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.15.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.16.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.16.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.16.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.16.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.16.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.16.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.17.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.17.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.17.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.17.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.17.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.17.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.18.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.18.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.18.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.18.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.18.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.18.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.19.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.19.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.19.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.19.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.19.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.19.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.2.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.2.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.2.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.2.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.2.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.2.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.20.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.20.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.20.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.20.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.20.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.20.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.21.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.21.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.21.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.21.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.21.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.21.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.22.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.22.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.22.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.22.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.22.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.22.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.23.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.23.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.23.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.23.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.23.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.23.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.24.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.24.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.24.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.24.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.24.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.24.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.25.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.25.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.25.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.25.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.25.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.25.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.26.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.26.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.26.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.26.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.26.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.26.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.27.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.27.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.27.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.27.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.27.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.27.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.28.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.28.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.28.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.28.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.28.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.28.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.29.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.29.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.29.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.29.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.29.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.29.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.3.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.3.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.3.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.3.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.3.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.3.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.30.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.30.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.30.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.30.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.30.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.30.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.31.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.31.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.31.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.31.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.31.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.31.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.32.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.32.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.32.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.32.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.32.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.32.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.33.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.33.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.33.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.33.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.33.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.33.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.34.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.34.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.34.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.34.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.34.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.34.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.35.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.35.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.35.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.35.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.35.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.35.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.36.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.36.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.36.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.36.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.36.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.36.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.37.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.37.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.37.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.37.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.37.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.37.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.38.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.38.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.38.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.38.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.38.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.38.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.39.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.39.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.39.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.39.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.39.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.39.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.4.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.4.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.4.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.4.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.4.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.4.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.40.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.40.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.40.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.40.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.40.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.40.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.41.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.41.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.41.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.41.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.41.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.41.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.42.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.42.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.42.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.42.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.42.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.42.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.43.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.43.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.43.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.43.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.43.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.43.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.44.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.44.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.44.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.44.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.44.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.44.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.45.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.45.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.45.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.45.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.45.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.45.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.46.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.46.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.46.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.46.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.46.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.46.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.47.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.47.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.47.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.47.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.47.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.47.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.48.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.48.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.48.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.48.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.48.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.48.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.49.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.49.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.49.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.49.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.49.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.49.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.5.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.5.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.5.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.5.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.5.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.5.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.50.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.50.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.50.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.50.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.50.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.50.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.51.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.51.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.51.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.51.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.51.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.51.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.52.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.52.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.52.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.52.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.52.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.52.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.53.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.53.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.53.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.53.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.53.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.53.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.54.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.54.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.54.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.54.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.54.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.54.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.55.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.55.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.55.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.55.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.55.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.55.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.56.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.56.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.56.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.56.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.56.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.56.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.57.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.57.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.57.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.57.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.57.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.57.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.58.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.58.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.58.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.58.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.58.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.58.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.59.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.59.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.59.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.59.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.59.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.59.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.6.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.6.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.6.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.6.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.6.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.6.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.60.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.60.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.60.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.60.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.60.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.60.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.61.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.61.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.61.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.61.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.61.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.61.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.62.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.62.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.62.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.62.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.62.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.62.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.63.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.63.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.63.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.63.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.63.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.63.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.64.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.64.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.64.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.64.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.64.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.64.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.65.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.65.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.65.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.65.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.65.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.65.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.66.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.66.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.66.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.66.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.66.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.66.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.67.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.67.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.67.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.67.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.67.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.67.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.68.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.68.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.68.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.68.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.68.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.68.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.69.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.69.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.69.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.69.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.69.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.69.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.7.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.7.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.7.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.7.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.7.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.7.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.70.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.70.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.70.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.70.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.70.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.70.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.71.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.71.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.71.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.71.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.71.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.71.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.72.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.72.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.72.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.72.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.72.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.72.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.73.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.73.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.73.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.73.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.73.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.73.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.74.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.74.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.74.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.74.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.74.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.74.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.75.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.75.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.75.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.75.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.75.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.75.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.76.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.76.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.76.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.76.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.76.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.76.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.77.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.77.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.77.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.77.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.77.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.77.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.78.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.78.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.78.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.78.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.78.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.78.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.79.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.79.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.79.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.79.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.79.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.79.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.8.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.8.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.8.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.8.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.8.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.8.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.80.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.80.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.80.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.80.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.80.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.80.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.81.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.81.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.81.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.81.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.81.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.81.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.82.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.82.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.82.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.82.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.82.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.82.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.83.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.83.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.83.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.83.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.83.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.83.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.84.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.84.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.84.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.84.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.84.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.84.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.85.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.85.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.85.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.85.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.85.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.85.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.86.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.86.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.86.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.86.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.86.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.86.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.87.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.87.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.87.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.87.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.87.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.87.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.88.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.88.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.88.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.88.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.88.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.88.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.89.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.89.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.89.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.89.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.89.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.89.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.9.down_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.9.down_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.9.gate_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.9.gate_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.9.up_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.9.up_proj.weight_scale": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.90.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.90.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.90.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.90.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.90.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.90.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.91.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.91.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.91.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.91.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.91.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.91.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.92.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.92.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.92.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.92.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.92.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.92.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.93.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.93.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.93.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.93.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.93.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.93.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.94.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.94.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.94.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.94.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.94.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.94.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.95.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.95.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.95.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.95.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.95.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.95.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.96.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.96.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.96.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.96.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.96.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.96.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.97.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.97.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.97.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.97.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.97.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.97.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.98.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.98.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.98.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.98.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.98.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.98.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.99.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.99.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.99.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.99.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.99.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.experts.99.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.gate.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.mlp.shared_experts.down_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.shared_experts.down_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.shared_experts.gate_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.shared_experts.gate_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.shared_experts.up_proj.weight": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.mlp.shared_experts.up_proj.weight_scale": "model-00041-of-00046.safetensors", + "model.language_model.layers.28.self_attn.k_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.self_attn.o_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.self_attn.q_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.28.self_attn.v_proj.weight": "model-00040-of-00046.safetensors", + "model.language_model.layers.29.input_layernorm.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.0.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.0.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.0.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.0.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.0.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.0.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.1.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.1.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.1.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.1.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.1.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.1.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.10.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.10.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.10.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.10.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.10.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.10.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.100.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.100.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.100.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.100.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.100.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.100.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.101.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.101.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.101.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.101.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.101.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.101.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.102.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.102.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.102.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.102.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.102.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.102.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.103.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.103.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.103.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.103.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.103.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.103.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.104.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.104.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.104.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.104.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.104.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.104.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.105.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.105.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.105.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.105.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.105.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.105.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.106.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.106.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.106.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.106.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.106.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.106.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.107.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.107.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.107.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.107.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.107.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.107.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.108.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.108.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.108.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.108.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.108.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.108.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.109.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.109.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.109.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.109.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.109.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.109.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.11.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.11.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.11.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.11.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.11.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.11.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.110.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.110.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.110.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.110.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.110.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.110.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.111.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.111.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.111.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.111.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.111.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.111.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.112.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.112.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.112.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.112.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.112.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.112.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.113.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.113.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.113.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.113.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.113.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.113.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.114.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.114.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.114.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.114.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.114.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.114.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.115.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.115.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.115.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.115.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.115.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.115.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.116.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.116.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.116.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.116.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.116.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.116.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.117.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.117.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.117.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.117.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.117.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.117.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.118.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.118.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.118.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.118.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.118.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.118.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.119.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.119.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.119.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.119.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.119.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.119.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.12.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.12.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.12.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.12.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.12.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.12.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.120.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.120.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.120.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.120.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.120.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.120.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.121.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.121.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.121.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.121.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.121.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.121.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.122.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.122.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.122.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.122.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.122.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.122.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.123.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.123.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.123.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.123.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.123.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.123.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.124.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.124.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.124.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.124.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.124.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.124.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.125.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.125.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.125.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.125.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.125.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.125.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.126.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.126.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.126.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.126.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.126.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.126.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.127.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.127.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.127.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.127.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.127.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.127.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.13.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.13.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.13.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.13.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.13.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.13.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.14.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.14.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.14.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.14.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.14.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.14.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.15.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.15.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.15.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.15.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.15.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.15.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.16.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.16.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.16.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.16.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.16.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.16.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.17.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.17.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.17.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.17.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.17.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.17.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.18.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.18.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.18.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.18.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.18.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.18.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.19.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.19.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.19.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.19.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.19.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.19.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.2.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.2.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.2.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.2.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.2.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.2.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.20.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.20.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.20.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.20.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.20.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.20.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.21.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.21.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.21.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.21.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.21.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.21.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.22.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.22.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.22.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.22.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.22.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.22.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.23.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.23.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.23.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.23.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.23.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.23.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.24.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.24.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.24.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.24.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.24.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.24.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.25.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.25.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.25.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.25.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.25.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.25.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.26.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.26.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.26.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.26.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.26.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.26.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.27.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.27.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.27.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.27.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.27.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.27.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.28.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.28.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.28.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.28.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.28.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.28.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.29.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.29.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.29.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.29.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.29.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.29.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.3.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.3.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.3.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.3.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.3.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.3.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.30.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.30.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.30.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.30.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.30.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.30.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.31.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.31.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.31.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.31.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.31.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.31.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.32.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.32.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.32.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.32.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.32.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.32.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.33.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.33.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.33.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.33.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.33.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.33.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.34.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.34.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.34.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.34.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.34.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.34.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.35.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.35.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.35.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.35.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.35.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.35.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.36.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.36.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.36.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.36.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.36.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.36.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.37.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.37.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.37.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.37.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.37.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.37.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.38.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.38.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.38.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.38.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.38.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.38.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.39.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.39.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.39.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.39.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.39.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.39.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.4.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.4.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.4.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.4.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.4.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.4.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.40.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.40.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.40.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.40.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.40.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.40.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.41.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.41.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.41.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.41.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.41.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.41.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.42.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.42.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.42.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.42.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.42.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.42.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.43.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.43.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.43.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.43.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.43.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.43.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.44.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.44.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.44.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.44.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.44.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.44.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.45.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.45.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.45.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.45.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.45.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.45.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.46.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.46.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.46.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.46.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.46.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.46.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.47.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.47.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.47.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.47.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.47.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.47.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.48.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.48.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.48.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.48.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.48.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.48.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.49.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.49.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.49.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.49.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.49.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.49.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.5.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.5.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.5.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.5.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.5.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.5.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.50.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.50.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.50.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.50.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.50.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.50.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.51.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.51.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.51.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.51.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.51.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.51.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.52.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.52.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.52.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.52.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.52.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.52.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.53.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.53.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.53.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.53.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.53.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.53.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.54.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.54.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.54.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.54.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.54.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.54.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.55.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.55.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.55.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.55.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.55.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.55.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.56.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.56.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.56.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.56.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.56.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.56.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.57.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.57.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.57.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.57.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.57.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.57.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.58.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.58.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.58.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.58.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.58.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.58.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.59.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.59.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.59.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.59.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.59.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.59.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.6.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.6.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.6.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.6.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.6.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.6.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.60.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.60.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.60.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.60.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.60.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.60.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.61.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.61.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.61.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.61.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.61.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.61.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.62.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.62.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.62.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.62.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.62.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.62.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.63.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.63.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.63.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.63.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.63.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.63.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.64.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.64.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.64.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.64.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.64.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.64.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.65.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.65.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.65.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.65.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.65.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.65.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.66.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.66.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.66.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.66.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.66.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.66.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.67.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.67.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.67.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.67.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.67.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.67.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.68.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.68.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.68.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.68.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.68.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.68.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.69.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.69.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.69.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.69.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.69.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.69.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.7.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.7.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.7.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.7.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.7.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.7.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.70.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.70.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.70.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.70.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.70.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.70.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.71.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.71.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.71.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.71.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.71.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.71.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.72.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.72.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.72.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.72.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.72.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.72.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.73.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.73.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.73.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.73.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.73.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.73.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.74.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.74.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.74.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.74.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.74.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.74.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.75.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.75.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.75.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.75.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.75.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.75.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.76.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.76.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.76.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.76.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.76.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.76.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.77.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.77.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.77.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.77.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.77.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.77.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.78.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.78.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.78.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.78.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.78.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.78.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.79.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.79.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.79.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.79.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.79.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.79.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.8.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.8.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.8.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.8.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.8.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.8.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.80.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.80.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.80.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.80.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.80.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.80.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.81.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.81.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.81.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.81.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.81.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.81.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.82.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.82.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.82.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.82.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.82.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.82.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.83.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.83.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.83.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.83.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.83.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.83.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.84.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.84.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.84.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.84.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.84.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.84.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.85.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.85.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.85.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.85.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.85.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.85.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.86.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.86.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.86.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.86.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.86.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.86.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.87.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.87.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.87.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.87.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.87.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.87.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.88.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.88.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.88.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.88.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.88.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.88.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.89.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.89.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.89.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.89.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.89.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.89.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.9.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.9.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.9.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.9.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.9.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.9.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.90.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.90.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.90.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.90.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.90.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.90.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.91.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.91.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.91.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.91.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.91.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.91.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.92.down_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.92.down_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.92.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.92.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.92.up_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.92.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.93.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.93.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.93.gate_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.93.gate_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.93.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.93.up_proj.weight_scale": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.94.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.94.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.94.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.94.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.94.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.94.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.95.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.95.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.95.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.95.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.95.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.95.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.96.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.96.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.96.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.96.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.96.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.96.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.97.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.97.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.97.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.97.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.97.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.97.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.98.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.98.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.98.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.98.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.98.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.98.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.99.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.99.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.99.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.99.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.99.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.experts.99.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.gate.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.mlp.shared_experts.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.shared_experts.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.shared_experts.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.shared_experts.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.shared_experts.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.mlp.shared_experts.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.29.self_attn.k_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.self_attn.o_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.self_attn.q_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.29.self_attn.v_proj.weight": "model-00042-of-00046.safetensors", + "model.language_model.layers.3.input_layernorm.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.0.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.0.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.0.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.0.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.0.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.0.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.1.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.1.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.1.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.1.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.1.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.1.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.10.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.10.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.10.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.10.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.10.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.10.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.100.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.100.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.100.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.100.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.100.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.100.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.101.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.101.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.101.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.101.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.101.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.101.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.102.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.102.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.102.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.102.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.102.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.102.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.103.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.103.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.103.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.103.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.103.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.103.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.104.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.104.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.104.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.104.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.104.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.104.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.105.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.105.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.105.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.105.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.105.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.105.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.106.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.106.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.106.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.106.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.106.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.106.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.107.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.107.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.107.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.107.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.107.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.107.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.108.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.108.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.108.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.108.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.108.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.108.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.109.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.109.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.109.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.109.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.109.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.109.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.11.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.11.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.11.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.11.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.11.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.11.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.110.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.110.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.110.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.110.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.110.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.110.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.111.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.111.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.111.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.111.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.111.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.111.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.112.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.112.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.112.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.112.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.112.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.112.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.113.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.113.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.113.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.113.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.113.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.113.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.114.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.114.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.114.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.114.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.114.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.114.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.115.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.115.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.115.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.115.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.115.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.115.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.116.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.116.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.116.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.116.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.116.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.116.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.117.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.117.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.117.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.117.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.117.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.117.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.118.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.118.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.118.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.118.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.118.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.118.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.119.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.119.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.119.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.119.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.119.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.119.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.12.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.12.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.12.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.12.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.12.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.12.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.120.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.120.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.120.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.120.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.120.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.120.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.121.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.121.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.121.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.121.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.121.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.121.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.122.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.122.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.122.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.122.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.122.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.122.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.123.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.123.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.123.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.123.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.123.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.123.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.124.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.124.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.124.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.124.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.124.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.124.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.125.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.125.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.125.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.125.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.125.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.125.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.126.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.126.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.126.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.126.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.126.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.126.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.127.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.127.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.127.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.127.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.127.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.127.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.13.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.13.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.13.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.13.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.13.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.13.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.14.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.14.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.14.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.14.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.14.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.14.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.15.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.15.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.15.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.15.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.15.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.15.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.16.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.16.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.16.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.16.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.16.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.16.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.17.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.17.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.17.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.17.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.17.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.17.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.18.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.18.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.18.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.18.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.18.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.18.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.19.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.19.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.19.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.19.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.19.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.19.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.2.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.2.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.2.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.2.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.2.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.2.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.20.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.20.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.20.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.20.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.20.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.20.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.21.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.21.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.21.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.21.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.21.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.21.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.22.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.22.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.22.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.22.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.22.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.22.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.23.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.23.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.23.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.23.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.23.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.23.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.24.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.24.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.24.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.24.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.24.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.24.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.25.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.25.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.25.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.25.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.25.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.25.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.26.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.26.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.26.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.26.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.26.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.26.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.27.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.27.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.27.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.27.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.27.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.27.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.28.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.28.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.28.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.28.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.28.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.28.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.29.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.29.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.29.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.29.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.29.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.29.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.3.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.3.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.3.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.3.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.3.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.3.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.30.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.30.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.30.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.30.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.30.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.30.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.31.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.31.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.31.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.31.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.31.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.31.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.32.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.32.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.32.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.32.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.32.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.32.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.33.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.33.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.33.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.33.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.33.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.33.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.34.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.34.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.34.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.34.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.34.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.34.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.35.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.35.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.35.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.35.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.35.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.35.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.36.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.36.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.36.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.36.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.36.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.36.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.37.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.37.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.37.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.37.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.37.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.37.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.38.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.38.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.38.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.38.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.38.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.38.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.39.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.39.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.39.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.39.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.39.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.39.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.4.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.4.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.4.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.4.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.4.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.4.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.40.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.40.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.40.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.40.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.40.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.40.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.41.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.41.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.41.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.41.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.41.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.41.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.42.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.42.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.42.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.42.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.42.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.42.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.43.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.43.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.43.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.43.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.43.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.43.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.44.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.44.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.44.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.44.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.44.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.44.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.45.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.45.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.45.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.45.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.45.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.45.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.46.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.46.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.46.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.46.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.46.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.46.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.47.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.47.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.47.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.47.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.47.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.47.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.48.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.48.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.48.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.48.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.48.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.48.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.49.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.49.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.49.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.49.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.49.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.49.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.5.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.5.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.5.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.5.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.5.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.5.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.50.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.50.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.50.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.50.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.50.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.50.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.51.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.51.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.51.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.51.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.51.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.51.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.52.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.52.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.52.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.52.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.52.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.52.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.53.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.53.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.53.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.53.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.53.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.53.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.54.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.54.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.54.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.54.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.54.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.54.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.55.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.55.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.55.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.55.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.55.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.55.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.56.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.56.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.56.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.56.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.56.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.56.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.57.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.57.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.57.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.57.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.57.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.57.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.58.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.58.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.58.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.58.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.58.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.58.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.59.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.59.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.59.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.59.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.59.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.59.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.6.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.6.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.6.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.6.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.6.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.6.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.60.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.60.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.60.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.60.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.60.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.60.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.61.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.61.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.61.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.61.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.61.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.61.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.62.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.62.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.62.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.62.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.62.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.62.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.63.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.63.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.63.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.63.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.63.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.63.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.64.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.64.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.64.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.64.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.64.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.64.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.65.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.65.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.65.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.65.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.65.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.65.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.66.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.66.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.66.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.66.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.66.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.66.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.67.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.67.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.67.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.67.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.67.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.67.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.68.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.68.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.68.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.68.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.68.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.68.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.69.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.69.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.69.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.69.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.69.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.69.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.7.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.7.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.7.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.7.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.7.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.7.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.70.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.70.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.70.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.70.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.70.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.70.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.71.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.71.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.71.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.71.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.71.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.71.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.72.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.72.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.72.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.72.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.72.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.72.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.73.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.73.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.73.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.73.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.73.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.73.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.74.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.74.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.74.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.74.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.74.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.74.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.75.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.75.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.75.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.75.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.75.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.75.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.76.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.76.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.76.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.76.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.76.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.76.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.77.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.77.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.77.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.77.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.77.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.77.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.78.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.78.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.78.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.78.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.78.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.78.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.79.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.79.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.79.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.79.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.79.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.79.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.8.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.8.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.8.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.8.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.8.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.8.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.80.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.80.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.80.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.80.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.80.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.80.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.81.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.81.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.81.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.81.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.81.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.81.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.82.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.82.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.82.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.82.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.82.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.82.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.83.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.83.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.83.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.83.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.83.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.83.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.84.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.84.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.84.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.84.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.84.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.84.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.85.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.85.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.85.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.85.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.85.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.85.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.86.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.86.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.86.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.86.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.86.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.86.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.87.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.87.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.87.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.87.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.87.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.87.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.88.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.88.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.88.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.88.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.88.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.88.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.89.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.89.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.89.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.89.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.89.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.89.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.9.down_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.9.down_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.9.gate_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.9.gate_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.9.up_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.9.up_proj.weight_scale": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.90.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.90.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.90.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.90.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.90.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.90.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.91.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.91.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.91.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.91.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.91.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.91.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.92.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.92.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.92.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.92.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.92.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.92.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.93.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.93.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.93.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.93.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.93.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.93.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.94.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.94.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.94.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.94.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.94.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.94.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.95.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.95.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.95.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.95.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.95.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.95.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.96.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.96.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.96.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.96.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.96.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.96.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.97.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.97.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.97.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.97.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.97.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.97.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.98.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.98.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.98.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.98.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.98.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.98.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.99.down_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.99.down_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.99.gate_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.99.gate_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.99.up_proj.weight": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.experts.99.up_proj.weight_scale": "model-00006-of-00046.safetensors", + "model.language_model.layers.3.mlp.gate.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.mlp.shared_experts.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.shared_experts.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.shared_experts.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.shared_experts.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.shared_experts.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.mlp.shared_experts.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.3.self_attn.k_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.self_attn.o_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.self_attn.q_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.3.self_attn.v_proj.weight": "model-00005-of-00046.safetensors", + "model.language_model.layers.30.input_layernorm.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.0.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.0.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.0.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.0.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.0.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.0.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.1.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.1.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.1.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.1.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.1.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.1.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.10.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.10.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.10.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.10.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.10.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.10.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.100.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.100.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.100.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.100.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.100.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.100.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.101.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.101.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.101.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.101.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.101.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.101.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.102.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.102.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.102.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.102.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.102.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.102.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.103.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.103.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.103.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.103.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.103.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.103.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.104.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.104.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.104.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.104.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.104.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.104.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.105.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.105.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.105.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.105.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.105.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.105.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.106.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.106.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.106.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.106.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.106.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.106.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.107.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.107.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.107.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.107.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.107.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.107.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.108.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.108.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.108.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.108.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.108.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.108.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.109.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.109.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.109.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.109.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.109.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.109.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.11.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.11.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.11.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.11.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.11.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.11.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.110.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.110.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.110.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.110.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.110.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.110.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.111.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.111.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.111.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.111.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.111.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.111.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.112.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.112.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.112.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.112.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.112.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.112.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.113.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.113.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.113.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.113.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.113.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.113.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.114.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.114.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.114.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.114.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.114.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.114.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.115.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.115.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.115.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.115.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.115.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.115.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.116.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.116.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.116.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.116.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.116.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.116.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.117.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.117.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.117.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.117.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.117.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.117.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.118.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.118.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.118.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.118.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.118.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.118.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.119.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.119.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.119.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.119.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.119.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.119.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.12.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.12.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.12.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.12.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.12.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.12.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.120.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.120.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.120.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.120.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.120.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.120.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.121.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.121.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.121.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.121.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.121.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.121.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.122.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.122.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.122.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.122.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.122.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.122.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.123.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.123.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.123.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.123.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.123.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.123.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.124.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.124.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.124.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.124.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.124.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.124.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.125.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.125.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.125.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.125.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.125.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.125.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.126.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.126.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.126.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.126.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.126.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.126.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.127.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.127.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.127.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.127.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.127.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.127.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.13.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.13.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.13.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.13.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.13.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.13.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.14.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.14.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.14.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.14.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.14.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.14.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.15.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.15.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.15.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.15.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.15.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.15.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.16.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.16.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.16.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.16.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.16.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.16.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.17.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.17.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.17.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.17.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.17.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.17.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.18.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.18.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.18.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.18.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.18.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.18.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.19.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.19.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.19.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.19.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.19.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.19.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.2.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.2.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.2.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.2.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.2.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.2.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.20.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.20.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.20.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.20.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.20.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.20.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.21.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.21.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.21.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.21.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.21.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.21.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.22.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.22.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.22.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.22.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.22.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.22.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.23.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.23.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.23.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.23.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.23.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.23.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.24.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.24.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.24.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.24.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.24.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.24.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.25.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.25.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.25.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.25.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.25.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.25.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.26.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.26.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.26.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.26.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.26.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.26.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.27.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.27.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.27.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.27.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.27.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.27.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.28.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.28.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.28.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.28.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.28.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.28.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.29.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.29.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.29.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.29.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.29.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.29.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.3.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.3.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.3.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.3.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.3.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.3.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.30.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.30.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.30.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.30.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.30.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.30.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.31.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.31.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.31.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.31.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.31.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.31.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.32.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.32.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.32.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.32.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.32.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.32.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.33.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.33.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.33.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.33.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.33.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.33.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.34.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.34.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.34.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.34.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.34.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.34.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.35.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.35.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.35.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.35.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.35.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.35.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.36.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.36.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.36.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.36.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.36.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.36.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.37.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.37.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.37.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.37.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.37.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.37.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.38.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.38.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.38.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.38.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.38.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.38.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.39.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.39.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.39.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.39.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.39.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.39.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.4.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.4.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.4.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.4.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.4.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.4.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.40.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.40.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.40.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.40.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.40.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.40.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.41.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.41.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.41.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.41.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.41.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.41.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.42.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.42.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.42.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.42.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.42.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.42.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.43.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.43.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.43.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.43.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.43.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.43.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.44.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.44.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.44.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.44.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.44.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.44.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.45.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.45.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.45.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.45.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.45.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.45.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.46.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.46.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.46.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.46.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.46.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.46.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.47.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.47.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.47.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.47.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.47.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.47.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.48.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.48.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.48.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.48.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.48.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.48.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.49.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.49.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.49.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.49.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.49.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.49.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.5.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.5.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.5.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.5.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.5.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.5.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.50.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.50.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.50.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.50.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.50.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.50.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.51.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.51.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.51.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.51.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.51.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.51.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.52.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.52.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.52.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.52.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.52.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.52.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.53.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.53.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.53.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.53.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.53.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.53.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.54.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.54.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.54.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.54.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.54.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.54.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.55.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.55.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.55.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.55.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.55.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.55.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.56.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.56.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.56.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.56.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.56.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.56.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.57.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.57.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.57.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.57.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.57.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.57.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.58.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.58.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.58.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.58.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.58.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.58.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.59.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.59.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.59.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.59.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.59.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.59.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.6.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.6.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.6.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.6.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.6.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.6.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.60.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.60.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.60.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.60.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.60.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.60.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.61.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.61.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.61.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.61.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.61.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.61.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.62.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.62.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.62.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.62.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.62.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.62.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.63.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.63.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.63.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.63.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.63.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.63.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.64.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.64.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.64.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.64.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.64.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.64.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.65.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.65.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.65.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.65.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.65.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.65.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.66.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.66.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.66.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.66.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.66.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.66.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.67.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.67.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.67.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.67.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.67.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.67.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.68.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.68.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.68.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.68.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.68.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.68.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.69.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.69.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.69.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.69.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.69.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.69.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.7.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.7.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.7.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.7.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.7.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.7.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.70.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.70.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.70.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.70.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.70.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.70.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.71.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.71.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.71.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.71.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.71.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.71.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.72.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.72.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.72.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.72.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.72.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.72.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.73.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.73.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.73.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.73.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.73.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.73.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.74.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.74.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.74.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.74.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.74.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.74.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.75.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.75.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.75.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.75.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.75.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.75.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.76.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.76.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.76.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.76.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.76.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.76.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.77.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.77.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.77.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.77.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.77.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.77.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.78.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.78.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.78.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.78.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.78.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.78.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.79.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.79.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.79.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.79.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.79.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.79.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.8.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.8.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.8.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.8.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.8.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.8.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.80.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.80.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.80.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.80.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.80.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.80.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.81.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.81.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.81.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.81.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.81.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.81.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.82.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.82.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.82.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.82.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.82.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.82.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.83.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.83.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.83.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.83.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.83.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.83.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.84.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.84.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.84.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.84.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.84.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.84.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.85.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.85.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.85.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.85.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.85.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.85.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.86.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.86.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.86.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.86.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.86.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.86.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.87.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.87.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.87.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.87.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.87.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.87.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.88.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.88.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.88.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.88.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.88.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.88.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.89.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.89.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.89.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.89.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.89.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.89.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.9.down_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.9.down_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.9.gate_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.9.gate_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.9.up_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.9.up_proj.weight_scale": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.90.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.90.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.90.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.90.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.90.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.90.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.91.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.91.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.91.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.91.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.91.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.91.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.92.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.92.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.92.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.92.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.92.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.92.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.93.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.93.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.93.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.93.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.93.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.93.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.94.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.94.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.94.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.94.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.94.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.94.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.95.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.95.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.95.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.95.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.95.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.95.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.96.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.96.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.96.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.96.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.96.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.96.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.97.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.97.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.97.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.97.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.97.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.97.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.98.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.98.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.98.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.98.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.98.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.98.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.99.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.99.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.99.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.99.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.99.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.experts.99.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.gate.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.mlp.shared_experts.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.shared_experts.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.shared_experts.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.shared_experts.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.shared_experts.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.mlp.shared_experts.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.30.self_attn.k_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.self_attn.o_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.self_attn.q_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.30.self_attn.v_proj.weight": "model-00043-of-00046.safetensors", + "model.language_model.layers.31.input_layernorm.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.0.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.0.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.0.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.0.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.0.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.0.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.1.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.1.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.1.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.1.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.1.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.1.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.10.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.10.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.10.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.10.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.10.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.10.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.100.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.100.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.100.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.100.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.100.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.100.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.101.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.101.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.101.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.101.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.101.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.101.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.102.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.102.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.102.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.102.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.102.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.102.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.103.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.103.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.103.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.103.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.103.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.103.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.104.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.104.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.104.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.104.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.104.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.104.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.105.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.105.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.105.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.105.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.105.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.105.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.106.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.106.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.106.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.106.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.106.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.106.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.107.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.107.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.107.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.107.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.107.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.107.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.108.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.108.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.108.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.108.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.108.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.108.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.109.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.109.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.109.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.109.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.109.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.109.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.11.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.11.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.11.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.11.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.11.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.11.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.110.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.110.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.110.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.110.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.110.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.110.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.111.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.111.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.111.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.111.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.111.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.111.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.112.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.112.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.112.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.112.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.112.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.112.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.113.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.113.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.113.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.113.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.113.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.113.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.114.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.114.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.114.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.114.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.114.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.114.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.115.down_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.115.down_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.115.gate_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.115.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.115.up_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.115.up_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.116.down_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.116.down_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.116.gate_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.116.gate_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.116.up_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.116.up_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.117.down_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.117.down_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.117.gate_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.117.gate_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.117.up_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.117.up_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.118.down_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.118.down_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.118.gate_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.118.gate_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.118.up_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.118.up_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.119.down_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.119.down_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.119.gate_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.119.gate_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.119.up_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.119.up_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.12.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.12.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.12.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.12.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.12.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.12.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.120.down_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.120.down_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.120.gate_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.120.gate_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.120.up_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.120.up_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.121.down_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.121.down_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.121.gate_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.121.gate_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.121.up_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.121.up_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.122.down_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.122.down_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.122.gate_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.122.gate_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.122.up_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.122.up_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.123.down_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.123.down_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.123.gate_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.123.gate_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.123.up_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.123.up_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.124.down_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.124.down_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.124.gate_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.124.gate_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.124.up_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.124.up_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.125.down_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.125.down_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.125.gate_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.125.gate_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.125.up_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.125.up_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.126.down_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.126.down_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.126.gate_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.126.gate_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.126.up_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.126.up_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.127.down_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.127.down_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.127.gate_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.127.gate_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.127.up_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.127.up_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.13.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.13.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.13.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.13.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.13.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.13.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.14.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.14.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.14.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.14.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.14.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.14.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.15.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.15.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.15.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.15.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.15.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.15.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.16.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.16.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.16.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.16.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.16.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.16.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.17.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.17.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.17.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.17.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.17.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.17.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.18.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.18.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.18.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.18.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.18.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.18.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.19.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.19.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.19.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.19.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.19.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.19.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.2.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.2.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.2.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.2.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.2.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.2.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.20.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.20.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.20.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.20.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.20.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.20.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.21.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.21.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.21.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.21.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.21.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.21.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.22.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.22.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.22.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.22.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.22.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.22.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.23.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.23.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.23.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.23.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.23.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.23.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.24.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.24.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.24.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.24.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.24.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.24.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.25.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.25.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.25.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.25.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.25.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.25.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.26.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.26.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.26.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.26.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.26.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.26.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.27.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.27.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.27.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.27.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.27.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.27.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.28.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.28.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.28.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.28.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.28.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.28.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.29.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.29.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.29.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.29.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.29.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.29.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.3.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.3.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.3.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.3.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.3.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.3.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.30.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.30.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.30.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.30.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.30.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.30.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.31.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.31.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.31.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.31.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.31.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.31.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.32.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.32.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.32.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.32.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.32.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.32.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.33.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.33.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.33.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.33.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.33.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.33.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.34.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.34.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.34.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.34.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.34.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.34.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.35.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.35.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.35.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.35.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.35.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.35.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.36.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.36.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.36.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.36.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.36.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.36.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.37.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.37.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.37.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.37.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.37.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.37.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.38.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.38.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.38.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.38.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.38.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.38.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.39.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.39.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.39.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.39.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.39.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.39.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.4.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.4.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.4.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.4.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.4.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.4.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.40.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.40.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.40.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.40.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.40.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.40.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.41.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.41.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.41.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.41.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.41.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.41.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.42.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.42.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.42.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.42.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.42.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.42.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.43.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.43.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.43.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.43.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.43.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.43.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.44.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.44.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.44.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.44.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.44.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.44.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.45.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.45.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.45.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.45.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.45.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.45.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.46.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.46.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.46.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.46.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.46.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.46.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.47.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.47.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.47.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.47.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.47.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.47.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.48.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.48.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.48.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.48.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.48.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.48.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.49.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.49.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.49.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.49.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.49.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.49.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.5.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.5.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.5.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.5.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.5.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.5.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.50.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.50.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.50.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.50.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.50.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.50.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.51.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.51.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.51.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.51.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.51.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.51.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.52.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.52.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.52.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.52.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.52.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.52.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.53.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.53.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.53.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.53.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.53.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.53.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.54.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.54.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.54.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.54.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.54.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.54.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.55.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.55.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.55.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.55.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.55.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.55.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.56.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.56.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.56.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.56.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.56.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.56.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.57.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.57.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.57.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.57.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.57.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.57.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.58.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.58.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.58.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.58.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.58.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.58.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.59.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.59.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.59.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.59.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.59.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.59.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.6.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.6.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.6.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.6.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.6.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.6.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.60.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.60.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.60.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.60.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.60.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.60.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.61.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.61.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.61.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.61.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.61.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.61.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.62.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.62.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.62.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.62.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.62.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.62.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.63.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.63.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.63.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.63.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.63.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.63.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.64.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.64.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.64.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.64.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.64.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.64.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.65.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.65.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.65.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.65.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.65.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.65.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.66.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.66.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.66.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.66.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.66.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.66.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.67.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.67.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.67.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.67.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.67.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.67.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.68.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.68.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.68.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.68.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.68.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.68.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.69.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.69.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.69.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.69.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.69.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.69.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.7.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.7.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.7.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.7.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.7.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.7.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.70.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.70.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.70.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.70.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.70.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.70.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.71.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.71.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.71.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.71.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.71.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.71.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.72.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.72.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.72.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.72.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.72.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.72.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.73.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.73.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.73.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.73.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.73.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.73.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.74.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.74.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.74.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.74.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.74.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.74.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.75.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.75.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.75.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.75.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.75.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.75.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.76.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.76.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.76.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.76.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.76.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.76.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.77.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.77.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.77.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.77.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.77.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.77.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.78.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.78.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.78.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.78.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.78.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.78.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.79.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.79.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.79.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.79.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.79.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.79.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.8.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.8.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.8.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.8.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.8.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.8.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.80.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.80.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.80.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.80.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.80.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.80.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.81.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.81.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.81.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.81.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.81.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.81.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.82.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.82.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.82.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.82.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.82.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.82.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.83.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.83.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.83.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.83.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.83.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.83.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.84.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.84.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.84.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.84.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.84.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.84.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.85.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.85.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.85.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.85.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.85.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.85.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.86.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.86.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.86.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.86.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.86.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.86.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.87.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.87.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.87.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.87.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.87.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.87.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.88.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.88.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.88.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.88.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.88.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.88.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.89.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.89.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.89.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.89.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.89.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.89.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.9.down_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.9.down_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.9.gate_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.9.gate_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.9.up_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.9.up_proj.weight_scale": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.90.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.90.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.90.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.90.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.90.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.90.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.91.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.91.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.91.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.91.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.91.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.91.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.92.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.92.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.92.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.92.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.92.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.92.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.93.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.93.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.93.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.93.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.93.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.93.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.94.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.94.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.94.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.94.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.94.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.94.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.95.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.95.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.95.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.95.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.95.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.95.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.96.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.96.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.96.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.96.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.96.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.96.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.97.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.97.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.97.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.97.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.97.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.97.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.98.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.98.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.98.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.98.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.98.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.98.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.99.down_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.99.down_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.99.gate_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.99.gate_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.99.up_proj.weight": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.experts.99.up_proj.weight_scale": "model-00045-of-00046.safetensors", + "model.language_model.layers.31.mlp.gate.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.mlp.shared_experts.down_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.shared_experts.down_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.shared_experts.gate_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.shared_experts.gate_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.shared_experts.up_proj.weight": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.mlp.shared_experts.up_proj.weight_scale": "model-00046-of-00046.safetensors", + "model.language_model.layers.31.self_attn.k_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.self_attn.o_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.self_attn.q_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.31.self_attn.v_proj.weight": "model-00044-of-00046.safetensors", + "model.language_model.layers.4.input_layernorm.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.0.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.0.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.0.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.0.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.0.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.0.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.1.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.1.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.1.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.1.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.1.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.1.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.10.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.10.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.10.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.10.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.10.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.10.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.100.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.100.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.100.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.100.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.100.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.100.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.101.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.101.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.101.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.101.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.101.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.101.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.102.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.102.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.102.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.102.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.102.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.102.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.103.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.103.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.103.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.103.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.103.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.103.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.104.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.104.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.104.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.104.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.104.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.104.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.105.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.105.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.105.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.105.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.105.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.105.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.106.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.106.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.106.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.106.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.106.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.106.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.107.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.107.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.107.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.107.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.107.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.107.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.108.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.108.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.108.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.108.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.108.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.108.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.109.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.109.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.109.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.109.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.109.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.109.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.11.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.11.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.11.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.11.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.11.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.11.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.110.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.110.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.110.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.110.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.110.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.110.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.111.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.111.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.111.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.111.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.111.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.111.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.112.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.112.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.112.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.112.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.112.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.112.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.113.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.113.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.113.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.113.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.113.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.113.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.114.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.114.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.114.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.114.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.114.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.114.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.115.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.115.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.115.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.115.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.115.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.115.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.116.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.116.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.116.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.116.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.116.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.116.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.117.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.117.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.117.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.117.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.117.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.117.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.118.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.118.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.118.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.118.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.118.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.118.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.119.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.119.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.119.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.119.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.119.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.119.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.12.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.12.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.12.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.12.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.12.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.12.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.120.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.120.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.120.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.120.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.120.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.120.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.121.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.121.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.121.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.121.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.121.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.121.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.122.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.122.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.122.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.122.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.122.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.122.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.123.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.123.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.123.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.123.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.123.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.123.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.124.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.124.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.124.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.124.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.124.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.124.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.125.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.125.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.125.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.125.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.125.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.125.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.126.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.126.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.126.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.126.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.126.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.126.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.127.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.127.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.127.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.127.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.127.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.127.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.13.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.13.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.13.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.13.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.13.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.13.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.14.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.14.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.14.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.14.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.14.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.14.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.15.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.15.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.15.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.15.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.15.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.15.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.16.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.16.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.16.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.16.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.16.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.16.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.17.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.17.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.17.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.17.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.17.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.17.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.18.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.18.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.18.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.18.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.18.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.18.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.19.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.19.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.19.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.19.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.19.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.19.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.2.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.2.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.2.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.2.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.2.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.2.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.20.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.20.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.20.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.20.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.20.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.20.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.21.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.21.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.21.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.21.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.21.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.21.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.22.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.22.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.22.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.22.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.22.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.22.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.23.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.23.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.23.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.23.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.23.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.23.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.24.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.24.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.24.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.24.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.24.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.24.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.25.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.25.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.25.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.25.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.25.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.25.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.26.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.26.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.26.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.26.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.26.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.26.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.27.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.27.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.27.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.27.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.27.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.27.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.28.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.28.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.28.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.28.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.28.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.28.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.29.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.29.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.29.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.29.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.29.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.29.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.3.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.3.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.3.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.3.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.3.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.3.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.30.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.30.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.30.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.30.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.30.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.30.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.31.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.31.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.31.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.31.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.31.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.31.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.32.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.32.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.32.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.32.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.32.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.32.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.33.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.33.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.33.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.33.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.33.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.33.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.34.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.34.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.34.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.34.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.34.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.34.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.35.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.35.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.35.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.35.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.35.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.35.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.36.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.36.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.36.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.36.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.36.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.36.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.37.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.37.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.37.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.37.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.37.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.37.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.38.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.38.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.38.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.38.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.38.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.38.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.39.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.39.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.39.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.39.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.39.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.39.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.4.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.4.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.4.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.4.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.4.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.4.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.40.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.40.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.40.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.40.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.40.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.40.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.41.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.41.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.41.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.41.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.41.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.41.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.42.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.42.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.42.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.42.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.42.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.42.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.43.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.43.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.43.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.43.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.43.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.43.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.44.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.44.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.44.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.44.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.44.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.44.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.45.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.45.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.45.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.45.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.45.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.45.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.46.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.46.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.46.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.46.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.46.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.46.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.47.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.47.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.47.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.47.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.47.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.47.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.48.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.48.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.48.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.48.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.48.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.48.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.49.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.49.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.49.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.49.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.49.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.49.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.5.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.5.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.5.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.5.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.5.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.5.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.50.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.50.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.50.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.50.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.50.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.50.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.51.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.51.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.51.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.51.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.51.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.51.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.52.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.52.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.52.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.52.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.52.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.52.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.53.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.53.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.53.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.53.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.53.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.53.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.54.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.54.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.54.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.54.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.54.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.54.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.55.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.55.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.55.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.55.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.55.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.55.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.56.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.56.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.56.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.56.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.56.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.56.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.57.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.57.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.57.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.57.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.57.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.57.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.58.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.58.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.58.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.58.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.58.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.58.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.59.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.59.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.59.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.59.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.59.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.59.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.6.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.6.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.6.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.6.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.6.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.6.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.60.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.60.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.60.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.60.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.60.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.60.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.61.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.61.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.61.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.61.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.61.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.61.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.62.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.62.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.62.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.62.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.62.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.62.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.63.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.63.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.63.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.63.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.63.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.63.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.64.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.64.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.64.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.64.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.64.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.64.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.65.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.65.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.65.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.65.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.65.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.65.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.66.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.66.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.66.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.66.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.66.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.66.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.67.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.67.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.67.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.67.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.67.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.67.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.68.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.68.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.68.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.68.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.68.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.68.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.69.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.69.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.69.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.69.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.69.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.69.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.7.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.7.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.7.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.7.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.7.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.7.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.70.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.70.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.70.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.70.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.70.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.70.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.71.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.71.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.71.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.71.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.71.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.71.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.72.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.72.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.72.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.72.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.72.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.72.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.73.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.73.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.73.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.73.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.73.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.73.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.74.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.74.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.74.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.74.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.74.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.74.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.75.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.75.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.75.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.75.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.75.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.75.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.76.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.76.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.76.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.76.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.76.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.76.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.77.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.77.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.77.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.77.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.77.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.77.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.78.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.78.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.78.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.78.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.78.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.78.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.79.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.79.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.79.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.79.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.79.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.79.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.8.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.8.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.8.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.8.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.8.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.8.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.80.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.80.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.80.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.80.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.80.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.80.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.81.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.81.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.81.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.81.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.81.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.81.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.82.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.82.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.82.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.82.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.82.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.82.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.83.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.83.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.83.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.83.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.83.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.83.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.84.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.84.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.84.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.84.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.84.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.84.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.85.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.85.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.85.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.85.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.85.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.85.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.86.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.86.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.86.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.86.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.86.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.86.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.87.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.87.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.87.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.87.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.87.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.87.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.88.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.88.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.88.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.88.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.88.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.88.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.89.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.89.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.89.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.89.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.89.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.89.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.9.down_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.9.down_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.9.gate_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.9.gate_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.9.up_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.9.up_proj.weight_scale": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.90.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.90.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.90.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.90.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.90.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.90.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.91.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.91.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.91.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.91.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.91.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.91.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.92.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.92.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.92.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.92.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.92.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.92.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.93.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.93.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.93.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.93.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.93.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.93.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.94.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.94.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.94.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.94.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.94.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.94.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.95.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.95.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.95.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.95.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.95.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.95.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.96.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.96.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.96.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.96.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.96.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.96.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.97.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.97.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.97.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.97.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.97.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.97.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.98.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.98.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.98.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.98.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.98.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.98.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.99.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.99.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.99.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.99.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.99.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.experts.99.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.gate.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.mlp.shared_experts.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.shared_experts.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.shared_experts.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.shared_experts.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.shared_experts.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.mlp.shared_experts.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.4.self_attn.k_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.self_attn.o_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.self_attn.q_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.4.self_attn.v_proj.weight": "model-00007-of-00046.safetensors", + "model.language_model.layers.5.input_layernorm.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.0.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.0.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.0.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.0.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.0.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.0.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.1.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.1.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.1.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.1.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.1.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.1.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.10.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.10.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.10.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.10.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.10.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.10.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.100.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.100.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.100.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.100.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.100.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.100.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.101.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.101.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.101.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.101.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.101.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.101.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.102.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.102.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.102.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.102.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.102.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.102.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.103.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.103.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.103.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.103.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.103.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.103.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.104.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.104.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.104.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.104.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.104.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.104.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.105.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.105.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.105.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.105.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.105.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.105.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.106.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.106.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.106.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.106.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.106.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.106.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.107.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.107.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.107.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.107.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.107.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.107.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.108.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.108.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.108.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.108.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.108.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.108.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.109.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.109.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.109.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.109.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.109.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.109.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.11.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.11.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.11.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.11.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.11.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.11.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.110.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.110.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.110.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.110.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.110.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.110.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.111.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.111.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.111.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.111.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.111.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.111.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.112.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.112.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.112.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.112.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.112.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.112.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.113.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.113.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.113.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.113.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.113.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.113.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.114.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.114.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.114.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.114.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.114.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.114.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.115.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.115.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.115.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.115.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.115.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.115.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.116.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.116.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.116.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.116.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.116.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.116.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.117.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.117.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.117.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.117.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.117.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.117.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.118.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.118.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.118.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.118.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.118.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.118.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.119.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.119.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.119.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.119.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.119.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.119.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.12.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.12.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.12.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.12.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.12.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.12.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.120.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.120.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.120.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.120.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.120.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.120.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.121.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.121.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.121.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.121.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.121.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.121.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.122.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.122.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.122.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.122.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.122.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.122.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.123.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.123.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.123.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.123.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.123.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.123.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.124.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.124.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.124.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.124.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.124.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.124.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.125.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.125.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.125.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.125.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.125.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.125.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.126.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.126.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.126.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.126.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.126.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.126.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.127.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.127.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.127.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.127.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.127.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.127.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.13.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.13.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.13.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.13.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.13.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.13.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.14.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.14.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.14.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.14.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.14.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.14.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.15.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.15.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.15.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.15.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.15.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.15.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.16.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.16.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.16.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.16.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.16.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.16.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.17.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.17.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.17.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.17.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.17.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.17.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.18.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.18.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.18.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.18.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.18.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.18.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.19.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.19.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.19.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.19.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.19.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.19.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.2.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.2.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.2.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.2.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.2.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.2.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.20.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.20.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.20.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.20.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.20.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.20.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.21.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.21.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.21.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.21.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.21.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.21.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.22.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.22.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.22.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.22.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.22.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.22.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.23.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.23.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.23.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.23.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.23.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.23.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.24.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.24.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.24.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.24.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.24.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.24.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.25.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.25.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.25.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.25.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.25.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.25.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.26.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.26.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.26.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.26.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.26.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.26.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.27.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.27.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.27.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.27.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.27.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.27.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.28.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.28.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.28.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.28.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.28.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.28.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.29.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.29.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.29.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.29.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.29.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.29.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.3.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.3.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.3.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.3.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.3.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.3.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.30.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.30.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.30.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.30.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.30.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.30.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.31.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.31.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.31.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.31.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.31.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.31.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.32.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.32.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.32.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.32.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.32.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.32.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.33.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.33.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.33.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.33.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.33.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.33.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.34.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.34.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.34.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.34.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.34.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.34.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.35.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.35.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.35.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.35.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.35.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.35.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.36.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.36.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.36.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.36.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.36.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.36.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.37.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.37.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.37.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.37.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.37.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.37.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.38.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.38.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.38.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.38.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.38.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.38.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.39.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.39.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.39.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.39.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.39.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.39.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.4.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.4.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.4.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.4.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.4.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.4.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.40.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.40.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.40.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.40.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.40.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.40.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.41.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.41.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.41.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.41.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.41.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.41.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.42.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.42.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.42.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.42.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.42.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.42.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.43.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.43.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.43.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.43.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.43.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.43.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.44.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.44.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.44.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.44.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.44.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.44.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.45.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.45.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.45.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.45.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.45.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.45.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.46.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.46.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.46.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.46.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.46.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.46.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.47.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.47.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.47.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.47.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.47.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.47.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.48.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.48.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.48.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.48.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.48.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.48.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.49.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.49.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.49.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.49.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.49.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.49.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.5.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.5.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.5.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.5.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.5.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.5.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.50.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.50.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.50.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.50.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.50.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.50.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.51.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.51.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.51.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.51.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.51.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.51.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.52.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.52.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.52.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.52.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.52.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.52.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.53.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.53.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.53.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.53.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.53.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.53.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.54.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.54.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.54.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.54.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.54.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.54.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.55.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.55.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.55.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.55.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.55.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.55.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.56.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.56.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.56.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.56.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.56.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.56.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.57.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.57.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.57.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.57.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.57.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.57.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.58.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.58.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.58.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.58.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.58.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.58.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.59.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.59.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.59.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.59.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.59.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.59.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.6.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.6.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.6.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.6.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.6.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.6.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.60.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.60.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.60.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.60.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.60.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.60.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.61.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.61.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.61.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.61.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.61.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.61.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.62.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.62.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.62.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.62.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.62.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.62.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.63.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.63.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.63.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.63.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.63.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.63.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.64.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.64.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.64.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.64.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.64.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.64.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.65.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.65.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.65.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.65.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.65.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.65.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.66.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.66.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.66.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.66.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.66.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.66.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.67.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.67.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.67.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.67.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.67.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.67.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.68.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.68.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.68.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.68.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.68.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.68.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.69.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.69.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.69.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.69.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.69.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.69.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.7.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.7.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.7.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.7.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.7.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.7.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.70.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.70.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.70.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.70.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.70.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.70.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.71.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.71.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.71.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.71.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.71.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.71.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.72.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.72.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.72.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.72.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.72.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.72.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.73.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.73.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.73.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.73.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.73.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.73.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.74.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.74.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.74.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.74.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.74.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.74.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.75.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.75.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.75.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.75.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.75.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.75.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.76.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.76.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.76.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.76.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.76.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.76.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.77.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.77.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.77.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.77.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.77.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.77.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.78.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.78.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.78.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.78.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.78.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.78.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.79.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.79.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.79.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.79.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.79.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.79.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.8.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.8.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.8.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.8.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.8.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.8.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.80.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.80.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.80.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.80.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.80.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.80.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.81.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.81.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.81.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.81.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.81.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.81.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.82.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.82.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.82.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.82.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.82.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.82.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.83.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.83.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.83.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.83.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.83.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.83.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.84.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.84.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.84.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.84.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.84.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.84.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.85.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.85.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.85.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.85.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.85.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.85.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.86.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.86.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.86.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.86.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.86.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.86.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.87.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.87.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.87.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.87.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.87.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.87.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.88.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.88.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.88.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.88.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.88.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.88.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.89.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.89.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.89.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.89.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.89.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.89.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.9.down_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.9.down_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.9.gate_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.9.gate_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.9.up_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.9.up_proj.weight_scale": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.90.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.90.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.90.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.90.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.90.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.90.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.91.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.91.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.91.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.91.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.91.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.91.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.92.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.92.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.92.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.92.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.92.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.92.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.93.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.93.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.93.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.93.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.93.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.93.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.94.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.94.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.94.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.94.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.94.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.94.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.95.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.95.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.95.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.95.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.95.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.95.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.96.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.96.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.96.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.96.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.96.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.96.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.97.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.97.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.97.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.97.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.97.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.97.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.98.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.98.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.98.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.98.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.98.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.98.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.99.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.99.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.99.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.99.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.99.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.experts.99.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.gate.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.mlp.shared_experts.down_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.shared_experts.down_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.shared_experts.gate_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.shared_experts.gate_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.shared_experts.up_proj.weight": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.mlp.shared_experts.up_proj.weight_scale": "model-00009-of-00046.safetensors", + "model.language_model.layers.5.self_attn.k_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.self_attn.o_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.self_attn.q_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.5.self_attn.v_proj.weight": "model-00008-of-00046.safetensors", + "model.language_model.layers.6.input_layernorm.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.0.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.0.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.0.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.0.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.0.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.0.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.1.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.1.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.1.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.1.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.1.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.1.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.10.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.10.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.10.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.10.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.10.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.10.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.100.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.100.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.100.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.100.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.100.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.100.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.101.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.101.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.101.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.101.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.101.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.101.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.102.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.102.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.102.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.102.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.102.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.102.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.103.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.103.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.103.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.103.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.103.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.103.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.104.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.104.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.104.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.104.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.104.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.104.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.105.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.105.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.105.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.105.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.105.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.105.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.106.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.106.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.106.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.106.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.106.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.106.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.107.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.107.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.107.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.107.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.107.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.107.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.108.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.108.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.108.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.108.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.108.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.108.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.109.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.109.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.109.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.109.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.109.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.109.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.11.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.11.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.11.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.11.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.11.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.11.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.110.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.110.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.110.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.110.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.110.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.110.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.111.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.111.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.111.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.111.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.111.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.111.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.112.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.112.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.112.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.112.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.112.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.112.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.113.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.113.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.113.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.113.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.113.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.113.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.114.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.114.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.114.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.114.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.114.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.114.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.115.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.115.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.115.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.115.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.115.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.115.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.116.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.116.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.116.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.116.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.116.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.116.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.117.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.117.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.117.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.117.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.117.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.117.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.118.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.118.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.118.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.118.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.118.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.118.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.119.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.119.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.119.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.119.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.119.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.119.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.12.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.12.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.12.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.12.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.12.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.12.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.120.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.120.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.120.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.120.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.120.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.120.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.121.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.121.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.121.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.121.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.121.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.121.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.122.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.122.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.122.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.122.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.122.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.122.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.123.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.123.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.123.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.123.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.123.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.123.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.124.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.124.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.124.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.124.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.124.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.124.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.125.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.125.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.125.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.125.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.125.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.125.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.126.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.126.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.126.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.126.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.126.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.126.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.127.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.127.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.127.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.127.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.127.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.127.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.13.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.13.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.13.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.13.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.13.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.13.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.14.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.14.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.14.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.14.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.14.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.14.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.15.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.15.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.15.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.15.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.15.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.15.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.16.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.16.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.16.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.16.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.16.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.16.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.17.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.17.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.17.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.17.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.17.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.17.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.18.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.18.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.18.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.18.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.18.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.18.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.19.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.19.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.19.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.19.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.19.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.19.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.2.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.2.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.2.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.2.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.2.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.2.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.20.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.20.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.20.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.20.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.20.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.20.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.21.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.21.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.21.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.21.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.21.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.21.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.22.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.22.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.22.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.22.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.22.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.22.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.23.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.23.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.23.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.23.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.23.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.23.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.24.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.24.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.24.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.24.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.24.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.24.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.25.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.25.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.25.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.25.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.25.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.25.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.26.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.26.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.26.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.26.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.26.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.26.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.27.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.27.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.27.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.27.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.27.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.27.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.28.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.28.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.28.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.28.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.28.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.28.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.29.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.29.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.29.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.29.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.29.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.29.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.3.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.3.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.3.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.3.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.3.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.3.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.30.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.30.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.30.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.30.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.30.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.30.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.31.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.31.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.31.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.31.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.31.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.31.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.32.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.32.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.32.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.32.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.32.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.32.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.33.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.33.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.33.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.33.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.33.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.33.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.34.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.34.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.34.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.34.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.34.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.34.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.35.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.35.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.35.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.35.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.35.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.35.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.36.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.36.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.36.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.36.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.36.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.36.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.37.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.37.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.37.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.37.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.37.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.37.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.38.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.38.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.38.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.38.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.38.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.38.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.39.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.39.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.39.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.39.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.39.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.39.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.4.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.4.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.4.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.4.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.4.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.4.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.40.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.40.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.40.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.40.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.40.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.40.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.41.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.41.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.41.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.41.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.41.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.41.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.42.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.42.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.42.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.42.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.42.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.42.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.43.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.43.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.43.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.43.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.43.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.43.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.44.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.44.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.44.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.44.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.44.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.44.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.45.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.45.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.45.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.45.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.45.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.45.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.46.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.46.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.46.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.46.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.46.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.46.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.47.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.47.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.47.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.47.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.47.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.47.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.48.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.48.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.48.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.48.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.48.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.48.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.49.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.49.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.49.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.49.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.49.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.49.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.5.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.5.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.5.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.5.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.5.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.5.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.50.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.50.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.50.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.50.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.50.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.50.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.51.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.51.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.51.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.51.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.51.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.51.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.52.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.52.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.52.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.52.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.52.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.52.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.53.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.53.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.53.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.53.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.53.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.53.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.54.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.54.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.54.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.54.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.54.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.54.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.55.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.55.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.55.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.55.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.55.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.55.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.56.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.56.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.56.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.56.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.56.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.56.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.57.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.57.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.57.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.57.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.57.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.57.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.58.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.58.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.58.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.58.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.58.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.58.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.59.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.59.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.59.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.59.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.59.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.59.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.6.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.6.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.6.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.6.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.6.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.6.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.60.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.60.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.60.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.60.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.60.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.60.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.61.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.61.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.61.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.61.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.61.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.61.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.62.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.62.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.62.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.62.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.62.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.62.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.63.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.63.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.63.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.63.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.63.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.63.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.64.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.64.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.64.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.64.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.64.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.64.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.65.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.65.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.65.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.65.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.65.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.65.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.66.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.66.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.66.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.66.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.66.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.66.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.67.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.67.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.67.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.67.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.67.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.67.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.68.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.68.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.68.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.68.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.68.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.68.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.69.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.69.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.69.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.69.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.69.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.69.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.7.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.7.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.7.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.7.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.7.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.7.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.70.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.70.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.70.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.70.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.70.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.70.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.71.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.71.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.71.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.71.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.71.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.71.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.72.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.72.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.72.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.72.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.72.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.72.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.73.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.73.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.73.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.73.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.73.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.73.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.74.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.74.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.74.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.74.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.74.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.74.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.75.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.75.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.75.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.75.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.75.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.75.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.76.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.76.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.76.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.76.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.76.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.76.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.77.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.77.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.77.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.77.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.77.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.77.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.78.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.78.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.78.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.78.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.78.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.78.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.79.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.79.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.79.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.79.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.79.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.79.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.8.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.8.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.8.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.8.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.8.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.8.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.80.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.80.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.80.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.80.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.80.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.80.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.81.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.81.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.81.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.81.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.81.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.81.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.82.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.82.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.82.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.82.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.82.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.82.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.83.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.83.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.83.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.83.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.83.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.83.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.84.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.84.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.84.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.84.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.84.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.84.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.85.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.85.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.85.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.85.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.85.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.85.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.86.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.86.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.86.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.86.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.86.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.86.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.87.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.87.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.87.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.87.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.87.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.87.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.88.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.88.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.88.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.88.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.88.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.88.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.89.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.89.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.89.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.89.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.89.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.89.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.9.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.9.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.9.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.9.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.9.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.9.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.90.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.90.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.90.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.90.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.90.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.90.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.91.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.91.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.91.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.91.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.91.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.91.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.92.down_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.92.down_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.92.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.92.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.92.up_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.92.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.93.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.93.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.93.gate_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.93.gate_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.93.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.93.up_proj.weight_scale": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.94.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.94.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.94.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.94.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.94.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.94.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.95.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.95.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.95.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.95.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.95.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.95.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.96.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.96.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.96.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.96.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.96.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.96.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.97.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.97.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.97.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.97.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.97.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.97.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.98.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.98.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.98.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.98.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.98.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.98.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.99.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.99.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.99.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.99.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.99.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.experts.99.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.gate.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.mlp.shared_experts.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.shared_experts.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.shared_experts.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.shared_experts.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.shared_experts.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.mlp.shared_experts.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.6.self_attn.k_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.self_attn.o_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.self_attn.q_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.6.self_attn.v_proj.weight": "model-00010-of-00046.safetensors", + "model.language_model.layers.7.input_layernorm.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.0.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.0.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.0.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.0.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.0.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.0.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.1.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.1.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.1.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.1.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.1.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.1.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.10.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.10.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.10.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.10.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.10.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.10.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.100.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.100.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.100.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.100.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.100.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.100.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.101.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.101.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.101.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.101.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.101.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.101.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.102.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.102.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.102.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.102.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.102.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.102.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.103.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.103.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.103.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.103.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.103.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.103.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.104.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.104.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.104.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.104.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.104.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.104.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.105.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.105.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.105.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.105.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.105.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.105.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.106.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.106.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.106.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.106.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.106.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.106.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.107.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.107.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.107.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.107.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.107.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.107.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.108.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.108.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.108.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.108.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.108.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.108.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.109.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.109.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.109.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.109.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.109.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.109.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.11.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.11.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.11.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.11.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.11.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.11.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.110.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.110.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.110.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.110.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.110.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.110.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.111.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.111.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.111.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.111.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.111.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.111.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.112.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.112.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.112.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.112.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.112.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.112.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.113.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.113.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.113.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.113.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.113.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.113.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.114.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.114.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.114.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.114.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.114.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.114.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.115.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.115.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.115.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.115.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.115.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.115.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.116.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.116.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.116.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.116.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.116.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.116.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.117.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.117.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.117.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.117.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.117.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.117.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.118.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.118.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.118.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.118.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.118.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.118.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.119.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.119.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.119.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.119.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.119.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.119.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.12.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.12.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.12.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.12.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.12.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.12.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.120.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.120.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.120.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.120.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.120.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.120.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.121.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.121.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.121.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.121.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.121.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.121.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.122.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.122.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.122.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.122.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.122.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.122.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.123.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.123.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.123.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.123.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.123.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.123.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.124.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.124.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.124.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.124.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.124.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.124.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.125.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.125.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.125.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.125.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.125.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.125.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.126.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.126.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.126.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.126.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.126.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.126.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.127.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.127.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.127.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.127.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.127.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.127.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.13.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.13.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.13.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.13.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.13.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.13.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.14.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.14.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.14.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.14.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.14.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.14.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.15.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.15.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.15.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.15.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.15.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.15.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.16.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.16.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.16.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.16.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.16.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.16.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.17.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.17.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.17.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.17.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.17.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.17.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.18.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.18.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.18.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.18.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.18.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.18.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.19.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.19.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.19.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.19.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.19.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.19.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.2.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.2.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.2.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.2.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.2.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.2.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.20.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.20.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.20.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.20.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.20.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.20.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.21.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.21.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.21.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.21.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.21.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.21.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.22.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.22.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.22.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.22.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.22.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.22.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.23.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.23.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.23.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.23.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.23.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.23.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.24.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.24.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.24.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.24.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.24.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.24.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.25.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.25.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.25.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.25.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.25.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.25.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.26.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.26.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.26.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.26.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.26.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.26.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.27.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.27.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.27.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.27.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.27.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.27.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.28.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.28.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.28.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.28.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.28.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.28.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.29.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.29.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.29.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.29.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.29.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.29.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.3.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.3.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.3.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.3.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.3.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.3.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.30.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.30.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.30.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.30.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.30.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.30.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.31.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.31.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.31.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.31.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.31.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.31.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.32.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.32.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.32.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.32.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.32.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.32.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.33.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.33.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.33.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.33.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.33.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.33.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.34.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.34.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.34.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.34.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.34.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.34.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.35.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.35.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.35.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.35.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.35.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.35.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.36.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.36.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.36.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.36.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.36.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.36.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.37.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.37.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.37.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.37.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.37.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.37.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.38.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.38.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.38.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.38.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.38.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.38.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.39.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.39.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.39.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.39.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.39.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.39.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.4.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.4.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.4.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.4.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.4.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.4.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.40.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.40.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.40.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.40.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.40.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.40.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.41.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.41.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.41.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.41.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.41.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.41.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.42.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.42.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.42.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.42.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.42.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.42.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.43.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.43.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.43.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.43.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.43.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.43.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.44.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.44.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.44.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.44.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.44.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.44.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.45.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.45.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.45.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.45.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.45.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.45.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.46.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.46.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.46.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.46.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.46.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.46.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.47.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.47.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.47.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.47.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.47.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.47.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.48.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.48.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.48.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.48.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.48.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.48.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.49.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.49.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.49.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.49.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.49.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.49.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.5.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.5.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.5.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.5.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.5.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.5.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.50.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.50.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.50.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.50.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.50.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.50.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.51.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.51.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.51.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.51.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.51.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.51.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.52.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.52.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.52.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.52.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.52.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.52.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.53.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.53.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.53.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.53.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.53.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.53.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.54.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.54.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.54.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.54.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.54.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.54.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.55.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.55.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.55.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.55.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.55.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.55.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.56.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.56.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.56.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.56.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.56.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.56.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.57.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.57.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.57.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.57.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.57.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.57.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.58.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.58.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.58.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.58.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.58.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.58.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.59.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.59.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.59.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.59.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.59.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.59.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.6.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.6.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.6.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.6.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.6.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.6.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.60.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.60.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.60.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.60.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.60.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.60.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.61.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.61.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.61.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.61.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.61.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.61.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.62.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.62.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.62.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.62.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.62.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.62.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.63.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.63.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.63.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.63.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.63.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.63.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.64.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.64.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.64.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.64.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.64.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.64.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.65.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.65.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.65.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.65.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.65.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.65.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.66.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.66.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.66.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.66.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.66.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.66.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.67.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.67.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.67.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.67.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.67.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.67.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.68.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.68.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.68.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.68.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.68.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.68.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.69.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.69.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.69.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.69.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.69.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.69.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.7.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.7.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.7.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.7.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.7.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.7.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.70.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.70.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.70.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.70.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.70.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.70.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.71.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.71.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.71.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.71.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.71.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.71.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.72.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.72.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.72.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.72.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.72.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.72.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.73.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.73.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.73.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.73.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.73.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.73.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.74.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.74.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.74.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.74.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.74.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.74.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.75.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.75.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.75.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.75.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.75.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.75.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.76.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.76.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.76.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.76.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.76.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.76.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.77.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.77.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.77.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.77.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.77.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.77.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.78.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.78.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.78.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.78.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.78.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.78.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.79.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.79.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.79.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.79.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.79.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.79.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.8.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.8.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.8.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.8.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.8.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.8.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.80.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.80.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.80.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.80.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.80.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.80.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.81.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.81.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.81.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.81.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.81.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.81.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.82.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.82.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.82.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.82.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.82.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.82.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.83.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.83.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.83.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.83.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.83.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.83.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.84.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.84.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.84.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.84.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.84.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.84.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.85.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.85.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.85.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.85.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.85.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.85.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.86.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.86.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.86.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.86.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.86.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.86.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.87.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.87.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.87.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.87.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.87.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.87.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.88.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.88.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.88.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.88.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.88.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.88.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.89.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.89.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.89.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.89.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.89.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.89.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.9.down_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.9.down_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.9.gate_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.9.gate_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.9.up_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.9.up_proj.weight_scale": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.90.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.90.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.90.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.90.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.90.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.90.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.91.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.91.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.91.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.91.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.91.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.91.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.92.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.92.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.92.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.92.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.92.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.92.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.93.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.93.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.93.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.93.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.93.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.93.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.94.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.94.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.94.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.94.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.94.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.94.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.95.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.95.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.95.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.95.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.95.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.95.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.96.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.96.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.96.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.96.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.96.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.96.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.97.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.97.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.97.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.97.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.97.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.97.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.98.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.98.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.98.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.98.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.98.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.98.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.99.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.99.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.99.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.99.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.99.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.experts.99.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.gate.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.mlp.shared_experts.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.shared_experts.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.shared_experts.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.shared_experts.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.shared_experts.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.mlp.shared_experts.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.7.self_attn.k_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.self_attn.o_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.self_attn.q_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.7.self_attn.v_proj.weight": "model-00011-of-00046.safetensors", + "model.language_model.layers.8.input_layernorm.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.0.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.0.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.0.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.0.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.0.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.0.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.1.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.1.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.1.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.1.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.1.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.1.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.10.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.10.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.10.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.10.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.10.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.10.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.100.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.100.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.100.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.100.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.100.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.100.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.101.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.101.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.101.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.101.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.101.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.101.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.102.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.102.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.102.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.102.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.102.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.102.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.103.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.103.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.103.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.103.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.103.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.103.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.104.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.104.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.104.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.104.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.104.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.104.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.105.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.105.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.105.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.105.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.105.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.105.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.106.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.106.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.106.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.106.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.106.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.106.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.107.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.107.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.107.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.107.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.107.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.107.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.108.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.108.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.108.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.108.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.108.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.108.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.109.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.109.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.109.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.109.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.109.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.109.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.11.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.11.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.11.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.11.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.11.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.11.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.110.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.110.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.110.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.110.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.110.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.110.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.111.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.111.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.111.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.111.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.111.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.111.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.112.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.112.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.112.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.112.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.112.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.112.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.113.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.113.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.113.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.113.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.113.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.113.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.114.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.114.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.114.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.114.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.114.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.114.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.115.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.115.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.115.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.115.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.115.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.115.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.116.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.116.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.116.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.116.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.116.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.116.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.117.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.117.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.117.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.117.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.117.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.117.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.118.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.118.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.118.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.118.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.118.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.118.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.119.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.119.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.119.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.119.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.119.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.119.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.12.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.12.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.12.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.12.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.12.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.12.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.120.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.120.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.120.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.120.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.120.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.120.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.121.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.121.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.121.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.121.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.121.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.121.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.122.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.122.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.122.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.122.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.122.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.122.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.123.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.123.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.123.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.123.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.123.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.123.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.124.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.124.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.124.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.124.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.124.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.124.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.125.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.125.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.125.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.125.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.125.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.125.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.126.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.126.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.126.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.126.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.126.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.126.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.127.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.127.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.127.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.127.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.127.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.127.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.13.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.13.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.13.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.13.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.13.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.13.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.14.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.14.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.14.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.14.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.14.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.14.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.15.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.15.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.15.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.15.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.15.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.15.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.16.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.16.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.16.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.16.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.16.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.16.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.17.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.17.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.17.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.17.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.17.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.17.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.18.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.18.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.18.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.18.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.18.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.18.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.19.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.19.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.19.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.19.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.19.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.19.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.2.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.2.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.2.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.2.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.2.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.2.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.20.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.20.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.20.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.20.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.20.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.20.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.21.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.21.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.21.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.21.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.21.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.21.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.22.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.22.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.22.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.22.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.22.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.22.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.23.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.23.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.23.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.23.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.23.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.23.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.24.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.24.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.24.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.24.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.24.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.24.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.25.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.25.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.25.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.25.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.25.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.25.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.26.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.26.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.26.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.26.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.26.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.26.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.27.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.27.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.27.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.27.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.27.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.27.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.28.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.28.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.28.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.28.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.28.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.28.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.29.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.29.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.29.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.29.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.29.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.29.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.3.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.3.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.3.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.3.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.3.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.3.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.30.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.30.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.30.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.30.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.30.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.30.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.31.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.31.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.31.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.31.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.31.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.31.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.32.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.32.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.32.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.32.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.32.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.32.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.33.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.33.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.33.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.33.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.33.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.33.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.34.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.34.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.34.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.34.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.34.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.34.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.35.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.35.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.35.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.35.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.35.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.35.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.36.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.36.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.36.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.36.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.36.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.36.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.37.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.37.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.37.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.37.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.37.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.37.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.38.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.38.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.38.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.38.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.38.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.38.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.39.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.39.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.39.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.39.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.39.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.39.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.4.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.4.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.4.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.4.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.4.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.4.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.40.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.40.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.40.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.40.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.40.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.40.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.41.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.41.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.41.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.41.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.41.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.41.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.42.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.42.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.42.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.42.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.42.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.42.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.43.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.43.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.43.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.43.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.43.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.43.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.44.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.44.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.44.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.44.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.44.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.44.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.45.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.45.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.45.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.45.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.45.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.45.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.46.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.46.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.46.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.46.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.46.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.46.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.47.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.47.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.47.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.47.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.47.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.47.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.48.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.48.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.48.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.48.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.48.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.48.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.49.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.49.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.49.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.49.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.49.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.49.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.5.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.5.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.5.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.5.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.5.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.5.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.50.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.50.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.50.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.50.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.50.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.50.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.51.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.51.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.51.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.51.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.51.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.51.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.52.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.52.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.52.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.52.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.52.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.52.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.53.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.53.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.53.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.53.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.53.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.53.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.54.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.54.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.54.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.54.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.54.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.54.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.55.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.55.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.55.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.55.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.55.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.55.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.56.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.56.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.56.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.56.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.56.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.56.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.57.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.57.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.57.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.57.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.57.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.57.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.58.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.58.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.58.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.58.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.58.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.58.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.59.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.59.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.59.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.59.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.59.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.59.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.6.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.6.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.6.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.6.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.6.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.6.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.60.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.60.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.60.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.60.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.60.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.60.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.61.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.61.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.61.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.61.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.61.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.61.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.62.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.62.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.62.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.62.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.62.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.62.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.63.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.63.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.63.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.63.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.63.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.63.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.64.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.64.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.64.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.64.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.64.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.64.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.65.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.65.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.65.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.65.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.65.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.65.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.66.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.66.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.66.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.66.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.66.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.66.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.67.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.67.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.67.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.67.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.67.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.67.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.68.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.68.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.68.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.68.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.68.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.68.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.69.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.69.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.69.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.69.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.69.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.69.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.7.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.7.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.7.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.7.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.7.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.7.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.70.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.70.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.70.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.70.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.70.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.70.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.71.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.71.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.71.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.71.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.71.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.71.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.72.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.72.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.72.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.72.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.72.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.72.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.73.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.73.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.73.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.73.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.73.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.73.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.74.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.74.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.74.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.74.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.74.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.74.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.75.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.75.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.75.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.75.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.75.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.75.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.76.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.76.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.76.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.76.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.76.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.76.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.77.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.77.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.77.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.77.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.77.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.77.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.78.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.78.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.78.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.78.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.78.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.78.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.79.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.79.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.79.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.79.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.79.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.79.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.8.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.8.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.8.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.8.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.8.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.8.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.80.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.80.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.80.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.80.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.80.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.80.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.81.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.81.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.81.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.81.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.81.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.81.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.82.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.82.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.82.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.82.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.82.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.82.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.83.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.83.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.83.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.83.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.83.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.83.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.84.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.84.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.84.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.84.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.84.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.84.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.85.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.85.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.85.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.85.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.85.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.85.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.86.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.86.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.86.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.86.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.86.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.86.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.87.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.87.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.87.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.87.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.87.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.87.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.88.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.88.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.88.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.88.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.88.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.88.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.89.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.89.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.89.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.89.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.89.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.89.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.9.down_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.9.down_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.9.gate_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.9.gate_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.9.up_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.9.up_proj.weight_scale": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.90.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.90.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.90.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.90.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.90.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.90.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.91.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.91.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.91.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.91.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.91.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.91.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.92.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.92.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.92.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.92.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.92.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.92.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.93.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.93.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.93.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.93.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.93.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.93.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.94.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.94.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.94.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.94.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.94.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.94.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.95.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.95.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.95.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.95.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.95.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.95.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.96.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.96.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.96.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.96.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.96.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.96.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.97.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.97.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.97.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.97.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.97.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.97.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.98.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.98.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.98.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.98.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.98.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.98.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.99.down_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.99.down_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.99.gate_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.99.gate_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.99.up_proj.weight": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.experts.99.up_proj.weight_scale": "model-00013-of-00046.safetensors", + "model.language_model.layers.8.mlp.gate.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.mlp.shared_experts.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.shared_experts.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.shared_experts.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.shared_experts.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.shared_experts.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.mlp.shared_experts.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.8.self_attn.k_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.self_attn.o_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.self_attn.q_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.8.self_attn.v_proj.weight": "model-00012-of-00046.safetensors", + "model.language_model.layers.9.input_layernorm.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.0.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.0.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.0.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.0.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.0.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.0.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.1.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.1.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.1.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.1.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.1.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.1.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.10.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.10.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.10.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.10.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.10.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.10.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.100.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.100.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.100.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.100.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.100.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.100.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.101.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.101.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.101.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.101.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.101.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.101.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.102.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.102.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.102.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.102.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.102.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.102.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.103.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.103.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.103.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.103.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.103.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.103.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.104.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.104.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.104.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.104.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.104.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.104.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.105.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.105.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.105.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.105.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.105.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.105.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.106.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.106.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.106.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.106.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.106.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.106.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.107.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.107.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.107.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.107.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.107.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.107.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.108.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.108.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.108.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.108.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.108.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.108.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.109.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.109.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.109.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.109.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.109.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.109.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.11.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.11.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.11.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.11.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.11.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.11.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.110.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.110.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.110.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.110.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.110.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.110.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.111.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.111.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.111.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.111.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.111.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.111.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.112.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.112.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.112.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.112.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.112.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.112.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.113.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.113.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.113.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.113.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.113.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.113.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.114.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.114.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.114.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.114.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.114.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.114.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.115.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.115.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.115.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.115.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.115.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.115.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.116.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.116.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.116.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.116.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.116.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.116.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.117.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.117.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.117.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.117.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.117.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.117.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.118.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.118.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.118.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.118.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.118.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.118.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.119.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.119.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.119.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.119.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.119.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.119.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.12.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.12.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.12.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.12.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.12.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.12.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.120.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.120.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.120.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.120.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.120.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.120.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.121.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.121.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.121.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.121.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.121.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.121.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.122.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.122.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.122.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.122.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.122.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.122.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.123.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.123.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.123.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.123.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.123.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.123.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.124.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.124.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.124.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.124.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.124.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.124.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.125.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.125.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.125.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.125.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.125.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.125.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.126.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.126.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.126.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.126.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.126.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.126.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.127.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.127.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.127.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.127.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.127.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.127.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.13.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.13.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.13.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.13.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.13.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.13.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.14.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.14.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.14.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.14.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.14.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.14.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.15.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.15.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.15.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.15.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.15.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.15.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.16.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.16.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.16.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.16.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.16.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.16.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.17.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.17.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.17.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.17.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.17.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.17.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.18.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.18.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.18.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.18.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.18.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.18.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.19.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.19.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.19.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.19.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.19.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.19.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.2.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.2.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.2.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.2.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.2.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.2.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.20.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.20.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.20.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.20.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.20.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.20.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.21.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.21.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.21.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.21.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.21.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.21.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.22.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.22.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.22.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.22.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.22.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.22.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.23.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.23.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.23.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.23.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.23.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.23.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.24.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.24.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.24.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.24.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.24.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.24.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.25.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.25.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.25.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.25.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.25.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.25.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.26.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.26.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.26.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.26.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.26.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.26.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.27.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.27.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.27.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.27.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.27.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.27.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.28.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.28.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.28.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.28.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.28.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.28.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.29.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.29.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.29.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.29.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.29.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.29.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.3.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.3.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.3.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.3.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.3.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.3.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.30.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.30.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.30.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.30.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.30.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.30.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.31.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.31.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.31.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.31.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.31.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.31.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.32.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.32.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.32.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.32.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.32.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.32.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.33.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.33.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.33.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.33.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.33.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.33.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.34.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.34.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.34.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.34.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.34.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.34.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.35.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.35.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.35.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.35.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.35.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.35.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.36.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.36.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.36.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.36.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.36.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.36.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.37.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.37.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.37.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.37.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.37.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.37.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.38.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.38.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.38.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.38.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.38.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.38.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.39.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.39.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.39.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.39.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.39.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.39.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.4.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.4.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.4.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.4.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.4.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.4.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.40.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.40.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.40.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.40.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.40.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.40.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.41.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.41.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.41.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.41.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.41.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.41.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.42.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.42.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.42.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.42.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.42.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.42.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.43.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.43.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.43.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.43.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.43.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.43.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.44.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.44.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.44.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.44.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.44.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.44.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.45.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.45.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.45.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.45.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.45.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.45.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.46.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.46.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.46.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.46.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.46.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.46.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.47.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.47.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.47.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.47.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.47.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.47.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.48.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.48.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.48.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.48.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.48.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.48.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.49.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.49.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.49.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.49.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.49.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.49.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.5.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.5.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.5.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.5.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.5.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.5.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.50.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.50.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.50.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.50.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.50.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.50.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.51.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.51.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.51.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.51.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.51.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.51.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.52.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.52.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.52.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.52.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.52.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.52.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.53.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.53.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.53.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.53.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.53.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.53.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.54.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.54.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.54.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.54.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.54.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.54.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.55.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.55.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.55.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.55.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.55.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.55.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.56.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.56.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.56.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.56.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.56.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.56.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.57.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.57.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.57.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.57.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.57.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.57.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.58.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.58.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.58.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.58.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.58.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.58.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.59.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.59.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.59.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.59.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.59.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.59.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.6.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.6.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.6.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.6.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.6.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.6.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.60.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.60.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.60.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.60.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.60.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.60.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.61.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.61.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.61.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.61.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.61.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.61.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.62.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.62.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.62.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.62.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.62.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.62.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.63.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.63.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.63.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.63.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.63.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.63.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.64.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.64.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.64.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.64.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.64.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.64.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.65.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.65.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.65.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.65.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.65.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.65.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.66.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.66.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.66.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.66.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.66.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.66.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.67.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.67.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.67.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.67.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.67.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.67.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.68.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.68.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.68.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.68.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.68.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.68.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.69.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.69.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.69.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.69.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.69.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.69.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.7.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.7.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.7.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.7.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.7.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.7.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.70.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.70.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.70.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.70.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.70.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.70.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.71.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.71.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.71.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.71.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.71.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.71.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.72.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.72.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.72.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.72.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.72.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.72.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.73.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.73.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.73.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.73.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.73.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.73.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.74.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.74.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.74.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.74.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.74.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.74.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.75.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.75.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.75.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.75.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.75.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.75.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.76.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.76.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.76.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.76.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.76.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.76.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.77.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.77.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.77.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.77.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.77.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.77.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.78.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.78.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.78.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.78.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.78.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.78.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.79.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.79.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.79.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.79.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.79.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.79.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.8.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.8.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.8.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.8.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.8.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.8.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.80.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.80.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.80.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.80.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.80.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.80.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.81.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.81.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.81.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.81.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.81.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.81.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.82.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.82.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.82.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.82.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.82.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.82.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.83.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.83.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.83.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.83.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.83.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.83.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.84.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.84.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.84.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.84.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.84.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.84.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.85.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.85.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.85.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.85.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.85.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.85.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.86.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.86.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.86.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.86.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.86.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.86.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.87.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.87.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.87.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.87.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.87.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.87.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.88.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.88.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.88.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.88.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.88.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.88.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.89.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.89.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.89.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.89.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.89.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.89.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.9.down_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.9.down_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.9.gate_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.9.gate_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.9.up_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.9.up_proj.weight_scale": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.90.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.90.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.90.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.90.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.90.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.90.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.91.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.91.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.91.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.91.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.91.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.91.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.92.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.92.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.92.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.92.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.92.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.92.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.93.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.93.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.93.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.93.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.93.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.93.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.94.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.94.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.94.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.94.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.94.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.94.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.95.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.95.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.95.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.95.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.95.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.95.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.96.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.96.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.96.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.96.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.96.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.96.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.97.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.97.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.97.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.97.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.97.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.97.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.98.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.98.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.98.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.98.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.98.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.98.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.99.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.99.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.99.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.99.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.99.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.experts.99.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.gate.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.mlp.shared_experts.down_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.shared_experts.down_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.shared_experts.gate_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.shared_experts.gate_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.shared_experts.up_proj.weight": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.mlp.shared_experts.up_proj.weight_scale": "model-00015-of-00046.safetensors", + "model.language_model.layers.9.self_attn.k_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.self_attn.o_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.self_attn.q_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.layers.9.self_attn.v_proj.weight": "model-00014-of-00046.safetensors", + "model.language_model.norm.weight": "model-00046-of-00046.safetensors", + "model.multi_modal_projector.linear_1.bias": "model-00001-of-00046.safetensors", + "model.multi_modal_projector.linear_1.weight": "model-00001-of-00046.safetensors", + "model.multi_modal_projector.linear_2.bias": "model-00001-of-00046.safetensors", + "model.multi_modal_projector.linear_2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.embeddings.patch_embedding.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.24.layer_norm1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.24.layer_norm1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.24.layer_norm2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.24.layer_norm2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.24.mlp.fc1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.24.mlp.fc1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.24.mlp.fc2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.24.mlp.fc2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.25.layer_norm1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.25.layer_norm1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.25.layer_norm2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.25.layer_norm2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.25.mlp.fc1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.25.mlp.fc1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.25.mlp.fc2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.25.mlp.fc2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.26.layer_norm1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.26.layer_norm1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.26.layer_norm2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.26.layer_norm2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.26.mlp.fc1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.26.mlp.fc1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.26.mlp.fc2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.26.mlp.fc2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.post_layernorm.bias": "model-00001-of-00046.safetensors", + "model.vision_tower.vision_model.post_layernorm.weight": "model-00001-of-00046.safetensors" + } +} diff --git a/preprocessor_config.json b/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..246f2ca181c02d307dacbb02ac50dba7fb841c3a --- /dev/null +++ b/preprocessor_config.json @@ -0,0 +1,42 @@ +{ + "crop_size": null, + "crop_to_patches": true, + "data_format": "channels_first", + "default_to_square": true, + "device": null, + "disable_grouping": null, + "do_center_crop": null, + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "downsample_factor": 2, + "end_of_img_token": "<|END_OF_IMG|>", + "image_mean": [ + 0.5, + 0.5, + 0.5 + ], + "image_processor_type": "Cohere2VisionImageProcessorFast", + "image_std": [ + 0.5, + 0.5, + 0.5 + ], + "img_line_break_token": "<|IMG_LINE_BREAK|>", + "img_patch_token": "<|IMG_PATCH|>", + "img_size": 512, + "input_data_format": null, + "max_patches": 12, + "min_patches": 1, + "patch_size": 16, + "processor_class": "Cohere2VisionProcessor", + "resample": 3, + "rescale_factor": 0.00392156862745098, + "return_tensors": null, + "size": { + "height": 512, + "width": 512 + }, + "start_of_img_token": "<|START_OF_IMG|>" +} \ No newline at end of file diff --git a/recipe.yaml b/recipe.yaml new file mode 100644 index 0000000000000000000000000000000000000000..174fe420bdeda70fe8f5fef22038ca2afa9e1238 --- /dev/null +++ b/recipe.yaml @@ -0,0 +1,8 @@ +default_stage: + default_modifiers: + QuantizationModifier: + targets: [Linear] + ignore: ['re:.*lm_head', 're:model.multi_modal_projector.*', 're:model.vision_tower.*', + 're:.*mlp.gate$', 're:.*self_attn'] + scheme: FP8_DYNAMIC + bypass_divisibility_checks: false diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..2427aa852fcdae3002e977491b2b3200a9d3d2fc --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,34 @@ +{ + "boi_token": "<|START_OF_IMG|>", + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eoi_token": "<|END_OF_IMG|>", + "eos_token": { + "content": "<|END_OF_TURN_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "image_token": "<|IMG_PATCH|>", + "img_line_break_token": "<|IMG_LINE_BREAK|>", + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..a8680c4bce4114a529aaa37430b3af664ce2b7ef --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a17e995a435e5ddc664625bc76b760d8da5301f6e17d6eefdac2d6605685796a +size 28217461 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8dbccfde78f5c3d8079c69a49b8b79d7ba37672 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,326 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255000": { + "content": "<|START_OF_TURN_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255001": { + "content": "<|END_OF_TURN_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255002": { + "content": "<|USER_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255003": { + "content": "<|CHATBOT_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255004": { + "content": "<|SYSTEM_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255005": { + "content": "<|NEW_FILE|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255006": { + "content": "<|BEGINNING_OF_PREFIX_FIM_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255007": { + "content": "<|BEGINNING_OF_MIDDLE_FIM_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255008": { + "content": "<|BEGINNING_OF_SUFFIX_FIM_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255009": { + "content": "<|END_OF_MIDDLE_FIM_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255010": { + "content": "<|START_THINKING|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255011": { + "content": "<|END_THINKING|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255012": { + "content": "<|START_TEXT|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255013": { + "content": "<|END_TEXT|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255014": { + "content": "<|START_ACTION|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255015": { + "content": "<|END_ACTION|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255016": { + "content": "<|START_TOOL_RESULT|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255017": { + "content": "<|END_TOOL_RESULT|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255018": { + "content": "<|USER_0_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255019": { + "content": "<|USER_1_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255020": { + "content": "<|USER_2_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255021": { + "content": "<|USER_3_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255022": { + "content": "<|USER_4_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255023": { + "content": "<|USER_5_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255024": { + "content": "<|USER_6_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255025": { + "content": "<|USER_7_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255026": { + "content": "<|USER_8_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255027": { + "content": "<|USER_9_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255028": { + "content": "<|START_OF_IMG|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255029": { + "content": "<|END_OF_IMG|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255030": { + "content": "<|IMG_LINE_BREAK|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255031": { + "content": "<|IMG_PATCH|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "boi_token": "<|START_OF_IMG|>", + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eoi_token": "<|END_OF_IMG|>", + "eos_token": "<|END_OF_TURN_TOKEN|>", + "extra_special_tokens": { + "boi_token": "<|START_OF_IMG|>", + "eoi_token": "<|END_OF_IMG|>", + "image_token": "<|IMG_PATCH|>", + "img_line_break_token": "<|IMG_LINE_BREAK|>" + }, + "image_token": "<|IMG_PATCH|>", + "img_line_break_token": "<|IMG_LINE_BREAK|>", + "legacy": true, + "merges_file": null, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "CohereTokenizer", + "unk_token": "", + "use_default_system_prompt": false, + "vocab_file": null +}