{ "metadata": { "total_size": 5979840000 }, "weight_map": { "lm_head.weight": "model-00002-of-00002.safetensors", "mlm_head.weight": "model-00002-of-00002.safetensors", "model.embed_seq_id.weight": "model-00001-of-00002.safetensors", "model.embed_tokens.weight": "model-00001-of-00002.safetensors", "model.layers.0.block_sparse_moe.experts.mlp.v1": "model-00001-of-00002.safetensors", "model.layers.0.block_sparse_moe.experts.mlp.w1": "model-00001-of-00002.safetensors", "model.layers.0.block_sparse_moe.experts.mlp.w2": "model-00001-of-00002.safetensors", "model.layers.0.block_sparse_moe.router.layer.weight": "model-00001-of-00002.safetensors", "model.layers.0.norm_attn_norm.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.0.norm_attn_norm.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.0.norm_attn_norm.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.0.norm_attn_norm.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.0.norm_attn_norm.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.0.norm_attn_norm.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.1.block_sparse_moe.experts.mlp.v1": "model-00001-of-00002.safetensors", "model.layers.1.block_sparse_moe.experts.mlp.w1": "model-00001-of-00002.safetensors", "model.layers.1.block_sparse_moe.experts.mlp.w2": "model-00001-of-00002.safetensors", "model.layers.1.block_sparse_moe.router.layer.weight": "model-00001-of-00002.safetensors", "model.layers.1.norm_attn_norm.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.1.norm_attn_norm.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.1.norm_attn_norm.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.1.norm_attn_norm.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.1.norm_attn_norm.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.1.norm_attn_norm.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.10.block_sparse_moe.experts.mlp.v1": "model-00001-of-00002.safetensors", "model.layers.10.block_sparse_moe.experts.mlp.w1": "model-00001-of-00002.safetensors", "model.layers.10.block_sparse_moe.experts.mlp.w2": "model-00001-of-00002.safetensors", "model.layers.10.block_sparse_moe.router.layer.weight": "model-00001-of-00002.safetensors", "model.layers.10.norm_attn_norm.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.10.norm_attn_norm.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.10.norm_attn_norm.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.10.norm_attn_norm.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.10.norm_attn_norm.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.10.norm_attn_norm.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.11.block_sparse_moe.experts.mlp.v1": "model-00001-of-00002.safetensors", "model.layers.11.block_sparse_moe.experts.mlp.w1": "model-00001-of-00002.safetensors", "model.layers.11.block_sparse_moe.experts.mlp.w2": "model-00001-of-00002.safetensors", "model.layers.11.block_sparse_moe.router.layer.weight": "model-00001-of-00002.safetensors", "model.layers.11.norm_attn_norm.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.11.norm_attn_norm.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.11.norm_attn_norm.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.11.norm_attn_norm.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.11.norm_attn_norm.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.11.norm_attn_norm.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.12.block_sparse_moe.experts.mlp.v1": "model-00001-of-00002.safetensors", "model.layers.12.block_sparse_moe.experts.mlp.w1": "model-00001-of-00002.safetensors", "model.layers.12.block_sparse_moe.experts.mlp.w2": "model-00001-of-00002.safetensors", "model.layers.12.block_sparse_moe.router.layer.weight": "model-00001-of-00002.safetensors", "model.layers.12.norm_attn_norm.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.12.norm_attn_norm.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.12.norm_attn_norm.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.12.norm_attn_norm.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.12.norm_attn_norm.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.12.norm_attn_norm.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.13.block_sparse_moe.experts.mlp.v1": "model-00001-of-00002.safetensors", "model.layers.13.block_sparse_moe.experts.mlp.w1": "model-00001-of-00002.safetensors", "model.layers.13.block_sparse_moe.experts.mlp.w2": "model-00001-of-00002.safetensors", "model.layers.13.block_sparse_moe.router.layer.weight": "model-00001-of-00002.safetensors", "model.layers.13.norm_attn_norm.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.13.norm_attn_norm.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.13.norm_attn_norm.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.13.norm_attn_norm.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.13.norm_attn_norm.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.13.norm_attn_norm.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.14.block_sparse_moe.experts.mlp.v1": "model-00001-of-00002.safetensors", "model.layers.14.block_sparse_moe.experts.mlp.w1": "model-00001-of-00002.safetensors", "model.layers.14.block_sparse_moe.experts.mlp.w2": "model-00001-of-00002.safetensors", "model.layers.14.block_sparse_moe.router.layer.weight": "model-00001-of-00002.safetensors", "model.layers.14.norm_attn_norm.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.14.norm_attn_norm.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.14.norm_attn_norm.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.14.norm_attn_norm.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.14.norm_attn_norm.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.14.norm_attn_norm.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.15.block_sparse_moe.experts.mlp.v1": "model-00001-of-00002.safetensors", "model.layers.15.block_sparse_moe.experts.mlp.w1": "model-00001-of-00002.safetensors", "model.layers.15.block_sparse_moe.experts.mlp.w2": "model-00001-of-00002.safetensors", "model.layers.15.block_sparse_moe.router.layer.weight": "model-00001-of-00002.safetensors", "model.layers.15.norm_attn_norm.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.15.norm_attn_norm.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.15.norm_attn_norm.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.15.norm_attn_norm.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.15.norm_attn_norm.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.15.norm_attn_norm.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.16.block_sparse_moe.experts.mlp.v1": "model-00001-of-00002.safetensors", "model.layers.16.block_sparse_moe.experts.mlp.w1": "model-00001-of-00002.safetensors", "model.layers.16.block_sparse_moe.experts.mlp.w2": "model-00001-of-00002.safetensors", "model.layers.16.block_sparse_moe.router.layer.weight": "model-00001-of-00002.safetensors", "model.layers.16.norm_attn_norm.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.16.norm_attn_norm.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.16.norm_attn_norm.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.16.norm_attn_norm.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.16.norm_attn_norm.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.16.norm_attn_norm.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.17.block_sparse_moe.experts.mlp.v1": "model-00001-of-00002.safetensors", "model.layers.17.block_sparse_moe.experts.mlp.w1": "model-00001-of-00002.safetensors", "model.layers.17.block_sparse_moe.experts.mlp.w2": "model-00001-of-00002.safetensors", "model.layers.17.block_sparse_moe.router.layer.weight": "model-00001-of-00002.safetensors", "model.layers.17.norm_attn_norm.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.17.norm_attn_norm.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.17.norm_attn_norm.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.17.norm_attn_norm.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.17.norm_attn_norm.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.17.norm_attn_norm.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.18.block_sparse_moe.experts.mlp.v1": "model-00001-of-00002.safetensors", "model.layers.18.block_sparse_moe.experts.mlp.w1": "model-00001-of-00002.safetensors", "model.layers.18.block_sparse_moe.experts.mlp.w2": "model-00001-of-00002.safetensors", "model.layers.18.block_sparse_moe.router.layer.weight": "model-00001-of-00002.safetensors", "model.layers.18.norm_attn_norm.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.18.norm_attn_norm.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.18.norm_attn_norm.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.18.norm_attn_norm.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.18.norm_attn_norm.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.18.norm_attn_norm.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.19.block_sparse_moe.experts.mlp.v1": "model-00001-of-00002.safetensors", "model.layers.19.block_sparse_moe.experts.mlp.w1": "model-00001-of-00002.safetensors", "model.layers.19.block_sparse_moe.experts.mlp.w2": "model-00001-of-00002.safetensors", "model.layers.19.block_sparse_moe.router.layer.weight": "model-00001-of-00002.safetensors", "model.layers.19.norm_attn_norm.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.19.norm_attn_norm.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.19.norm_attn_norm.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.19.norm_attn_norm.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.19.norm_attn_norm.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.19.norm_attn_norm.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.2.block_sparse_moe.experts.mlp.v1": "model-00001-of-00002.safetensors", "model.layers.2.block_sparse_moe.experts.mlp.w1": "model-00001-of-00002.safetensors", "model.layers.2.block_sparse_moe.experts.mlp.w2": "model-00001-of-00002.safetensors", "model.layers.2.block_sparse_moe.router.layer.weight": "model-00001-of-00002.safetensors", "model.layers.2.norm_attn_norm.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.2.norm_attn_norm.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.2.norm_attn_norm.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.2.norm_attn_norm.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.2.norm_attn_norm.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.2.norm_attn_norm.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.20.block_sparse_moe.experts.mlp.v1": "model-00002-of-00002.safetensors", "model.layers.20.block_sparse_moe.experts.mlp.w1": "model-00002-of-00002.safetensors", "model.layers.20.block_sparse_moe.experts.mlp.w2": "model-00002-of-00002.safetensors", "model.layers.20.block_sparse_moe.router.layer.weight": "model-00001-of-00002.safetensors", "model.layers.20.norm_attn_norm.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.20.norm_attn_norm.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.20.norm_attn_norm.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.20.norm_attn_norm.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.20.norm_attn_norm.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.20.norm_attn_norm.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.21.block_sparse_moe.experts.mlp.v1": "model-00002-of-00002.safetensors", "model.layers.21.block_sparse_moe.experts.mlp.w1": "model-00002-of-00002.safetensors", "model.layers.21.block_sparse_moe.experts.mlp.w2": "model-00002-of-00002.safetensors", "model.layers.21.block_sparse_moe.router.layer.weight": "model-00002-of-00002.safetensors", "model.layers.21.norm_attn_norm.input_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.21.norm_attn_norm.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.21.norm_attn_norm.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "model.layers.21.norm_attn_norm.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "model.layers.21.norm_attn_norm.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "model.layers.21.norm_attn_norm.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "model.layers.22.block_sparse_moe.experts.mlp.v1": "model-00002-of-00002.safetensors", "model.layers.22.block_sparse_moe.experts.mlp.w1": "model-00002-of-00002.safetensors", "model.layers.22.block_sparse_moe.experts.mlp.w2": "model-00002-of-00002.safetensors", "model.layers.22.block_sparse_moe.router.layer.weight": "model-00002-of-00002.safetensors", "model.layers.22.norm_attn_norm.input_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.22.norm_attn_norm.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.22.norm_attn_norm.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "model.layers.22.norm_attn_norm.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "model.layers.22.norm_attn_norm.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "model.layers.22.norm_attn_norm.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "model.layers.23.block_sparse_moe.experts.mlp.v1": "model-00002-of-00002.safetensors", "model.layers.23.block_sparse_moe.experts.mlp.w1": "model-00002-of-00002.safetensors", "model.layers.23.block_sparse_moe.experts.mlp.w2": "model-00002-of-00002.safetensors", "model.layers.23.block_sparse_moe.router.layer.weight": "model-00002-of-00002.safetensors", "model.layers.23.norm_attn_norm.input_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.23.norm_attn_norm.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.23.norm_attn_norm.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "model.layers.23.norm_attn_norm.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "model.layers.23.norm_attn_norm.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "model.layers.23.norm_attn_norm.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "model.layers.3.block_sparse_moe.experts.mlp.v1": "model-00001-of-00002.safetensors", "model.layers.3.block_sparse_moe.experts.mlp.w1": "model-00001-of-00002.safetensors", "model.layers.3.block_sparse_moe.experts.mlp.w2": "model-00001-of-00002.safetensors", "model.layers.3.block_sparse_moe.router.layer.weight": "model-00001-of-00002.safetensors", "model.layers.3.norm_attn_norm.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.3.norm_attn_norm.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.3.norm_attn_norm.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.3.norm_attn_norm.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.3.norm_attn_norm.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.3.norm_attn_norm.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.4.block_sparse_moe.experts.mlp.v1": "model-00001-of-00002.safetensors", "model.layers.4.block_sparse_moe.experts.mlp.w1": "model-00001-of-00002.safetensors", "model.layers.4.block_sparse_moe.experts.mlp.w2": "model-00001-of-00002.safetensors", "model.layers.4.block_sparse_moe.router.layer.weight": "model-00001-of-00002.safetensors", "model.layers.4.norm_attn_norm.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.4.norm_attn_norm.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.4.norm_attn_norm.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.4.norm_attn_norm.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.4.norm_attn_norm.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.4.norm_attn_norm.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.5.block_sparse_moe.experts.mlp.v1": "model-00001-of-00002.safetensors", "model.layers.5.block_sparse_moe.experts.mlp.w1": "model-00001-of-00002.safetensors", "model.layers.5.block_sparse_moe.experts.mlp.w2": "model-00001-of-00002.safetensors", "model.layers.5.block_sparse_moe.router.layer.weight": "model-00001-of-00002.safetensors", "model.layers.5.norm_attn_norm.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.5.norm_attn_norm.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.5.norm_attn_norm.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.5.norm_attn_norm.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.5.norm_attn_norm.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.5.norm_attn_norm.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.6.block_sparse_moe.experts.mlp.v1": "model-00001-of-00002.safetensors", "model.layers.6.block_sparse_moe.experts.mlp.w1": "model-00001-of-00002.safetensors", "model.layers.6.block_sparse_moe.experts.mlp.w2": "model-00001-of-00002.safetensors", "model.layers.6.block_sparse_moe.router.layer.weight": "model-00001-of-00002.safetensors", "model.layers.6.norm_attn_norm.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.6.norm_attn_norm.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.6.norm_attn_norm.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.6.norm_attn_norm.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.6.norm_attn_norm.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.6.norm_attn_norm.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.7.block_sparse_moe.experts.mlp.v1": "model-00001-of-00002.safetensors", "model.layers.7.block_sparse_moe.experts.mlp.w1": "model-00001-of-00002.safetensors", "model.layers.7.block_sparse_moe.experts.mlp.w2": "model-00001-of-00002.safetensors", "model.layers.7.block_sparse_moe.router.layer.weight": "model-00001-of-00002.safetensors", "model.layers.7.norm_attn_norm.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.7.norm_attn_norm.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.7.norm_attn_norm.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.7.norm_attn_norm.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.7.norm_attn_norm.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.7.norm_attn_norm.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.8.block_sparse_moe.experts.mlp.v1": "model-00001-of-00002.safetensors", "model.layers.8.block_sparse_moe.experts.mlp.w1": "model-00001-of-00002.safetensors", "model.layers.8.block_sparse_moe.experts.mlp.w2": "model-00001-of-00002.safetensors", "model.layers.8.block_sparse_moe.router.layer.weight": "model-00001-of-00002.safetensors", "model.layers.8.norm_attn_norm.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.8.norm_attn_norm.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.8.norm_attn_norm.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.8.norm_attn_norm.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.8.norm_attn_norm.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.8.norm_attn_norm.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.9.block_sparse_moe.experts.mlp.v1": "model-00001-of-00002.safetensors", "model.layers.9.block_sparse_moe.experts.mlp.w1": "model-00001-of-00002.safetensors", "model.layers.9.block_sparse_moe.experts.mlp.w2": "model-00001-of-00002.safetensors", "model.layers.9.block_sparse_moe.router.layer.weight": "model-00001-of-00002.safetensors", "model.layers.9.norm_attn_norm.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.9.norm_attn_norm.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.9.norm_attn_norm.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.9.norm_attn_norm.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.9.norm_attn_norm.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.9.norm_attn_norm.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.norm.weight": "model-00002-of-00002.safetensors" } }