{ "metadata": { "total_size": 1845802880, "num_shards": 1, "step": 45444, "format": "safetensors_state_dict" }, "weight_map": { "transformer.wte.weight": "model-00001-of-00001.safetensors", "transformer.h.0.attn.c_q.weight": "model-00001-of-00001.safetensors", "transformer.h.0.attn.c_k.weight": "model-00001-of-00001.safetensors", "transformer.h.0.attn.c_v.weight": "model-00001-of-00001.safetensors", "transformer.h.0.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.0.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.0.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.1.attn.c_q.weight": "model-00001-of-00001.safetensors", "transformer.h.1.attn.c_k.weight": "model-00001-of-00001.safetensors", "transformer.h.1.attn.c_v.weight": "model-00001-of-00001.safetensors", "transformer.h.1.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.1.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.1.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.2.attn.c_q.weight": "model-00001-of-00001.safetensors", "transformer.h.2.attn.c_k.weight": "model-00001-of-00001.safetensors", "transformer.h.2.attn.c_v.weight": "model-00001-of-00001.safetensors", "transformer.h.2.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.router_bias": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.uniform_load": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.router.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.experts.2.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.experts.2.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.experts.3.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.experts.3.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.experts.4.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.experts.4.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.experts.5.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.experts.5.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.experts.6.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.experts.6.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.experts.7.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.experts.7.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.adjugate_experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.adjugate_experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.adjugate_experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.adjugate_experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.router_context_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.router_context_scale_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.router_context_selection_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.3.attn.c_q.weight": "model-00001-of-00001.safetensors", "transformer.h.3.attn.c_k.weight": "model-00001-of-00001.safetensors", "transformer.h.3.attn.c_v.weight": "model-00001-of-00001.safetensors", "transformer.h.3.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.3.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.3.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.4.attn.c_q.weight": "model-00001-of-00001.safetensors", "transformer.h.4.attn.c_k.weight": "model-00001-of-00001.safetensors", "transformer.h.4.attn.c_v.weight": "model-00001-of-00001.safetensors", "transformer.h.4.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.4.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.4.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.5.attn.c_q.weight": "model-00001-of-00001.safetensors", "transformer.h.5.attn.c_k.weight": "model-00001-of-00001.safetensors", "transformer.h.5.attn.c_v.weight": "model-00001-of-00001.safetensors", "transformer.h.5.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.router_bias": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.uniform_load": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.router.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.experts.2.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.experts.2.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.experts.3.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.experts.3.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.experts.4.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.experts.4.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.experts.5.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.experts.5.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.experts.6.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.experts.6.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.experts.7.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.experts.7.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.adjugate_experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.adjugate_experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.adjugate_experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.adjugate_experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.router_context_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.router_context_scale_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.router_context_selection_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.6.attn.c_q.weight": "model-00001-of-00001.safetensors", "transformer.h.6.attn.c_k.weight": "model-00001-of-00001.safetensors", "transformer.h.6.attn.c_v.weight": "model-00001-of-00001.safetensors", "transformer.h.6.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.6.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.6.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.7.attn.c_q.weight": "model-00001-of-00001.safetensors", "transformer.h.7.attn.c_k.weight": "model-00001-of-00001.safetensors", "transformer.h.7.attn.c_v.weight": "model-00001-of-00001.safetensors", "transformer.h.7.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.7.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.7.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.8.attn.c_q.weight": "model-00001-of-00001.safetensors", "transformer.h.8.attn.c_k.weight": "model-00001-of-00001.safetensors", "transformer.h.8.attn.c_v.weight": "model-00001-of-00001.safetensors", "transformer.h.8.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.router_bias": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.uniform_load": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.router.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.experts.2.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.experts.2.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.experts.3.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.experts.3.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.experts.4.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.experts.4.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.experts.5.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.experts.5.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.experts.6.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.experts.6.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.experts.7.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.experts.7.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.adjugate_experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.adjugate_experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.adjugate_experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.adjugate_experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.router_context_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.router_context_scale_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.router_context_selection_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.9.attn.c_q.weight": "model-00001-of-00001.safetensors", "transformer.h.9.attn.c_k.weight": "model-00001-of-00001.safetensors", "transformer.h.9.attn.c_v.weight": "model-00001-of-00001.safetensors", "transformer.h.9.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.9.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.9.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.10.attn.c_q.weight": "model-00001-of-00001.safetensors", "transformer.h.10.attn.c_k.weight": "model-00001-of-00001.safetensors", "transformer.h.10.attn.c_v.weight": "model-00001-of-00001.safetensors", "transformer.h.10.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.10.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.10.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.11.attn.c_q.weight": "model-00001-of-00001.safetensors", "transformer.h.11.attn.c_k.weight": "model-00001-of-00001.safetensors", "transformer.h.11.attn.c_v.weight": "model-00001-of-00001.safetensors", "transformer.h.11.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.router_bias": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.uniform_load": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.router.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.experts.2.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.experts.2.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.experts.3.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.experts.3.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.experts.4.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.experts.4.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.experts.5.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.experts.5.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.experts.6.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.experts.6.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.experts.7.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.experts.7.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.adjugate_experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.adjugate_experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.adjugate_experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.adjugate_experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.router_context_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.router_context_scale_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.router_context_selection_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.12.attn.c_q.weight": "model-00001-of-00001.safetensors", "transformer.h.12.attn.c_k.weight": "model-00001-of-00001.safetensors", "transformer.h.12.attn.c_v.weight": "model-00001-of-00001.safetensors", "transformer.h.12.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.12.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.12.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.13.attn.c_q.weight": "model-00001-of-00001.safetensors", "transformer.h.13.attn.c_k.weight": "model-00001-of-00001.safetensors", "transformer.h.13.attn.c_v.weight": "model-00001-of-00001.safetensors", "transformer.h.13.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.13.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.13.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.14.attn.c_q.weight": "model-00001-of-00001.safetensors", "transformer.h.14.attn.c_k.weight": "model-00001-of-00001.safetensors", "transformer.h.14.attn.c_v.weight": "model-00001-of-00001.safetensors", "transformer.h.14.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.14.mlp.router_bias": "model-00001-of-00001.safetensors", "transformer.h.14.mlp.uniform_load": "model-00001-of-00001.safetensors", "transformer.h.14.mlp.router.weight": "model-00001-of-00001.safetensors", "transformer.h.14.mlp.experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.14.mlp.experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.14.mlp.experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.14.mlp.experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.14.mlp.experts.2.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.14.mlp.experts.2.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.14.mlp.experts.3.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.14.mlp.experts.3.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.14.mlp.experts.4.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.14.mlp.experts.4.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.14.mlp.experts.5.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.14.mlp.experts.5.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.14.mlp.experts.6.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.14.mlp.experts.6.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.14.mlp.experts.7.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.14.mlp.experts.7.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.14.mlp.adjugate_experts.0.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.14.mlp.adjugate_experts.0.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.14.mlp.adjugate_experts.1.w1.weight": "model-00001-of-00001.safetensors", "transformer.h.14.mlp.adjugate_experts.1.w2.weight": "model-00001-of-00001.safetensors", "transformer.h.14.mlp.router_context_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.14.mlp.router_context_scale_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.14.mlp.router_context_selection_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.15.attn.c_q.weight": "model-00001-of-00001.safetensors", "transformer.h.15.attn.c_k.weight": "model-00001-of-00001.safetensors", "transformer.h.15.attn.c_v.weight": "model-00001-of-00001.safetensors", "transformer.h.15.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.15.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.15.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "lm_head.weight": "model-00001-of-00001.safetensors" } }