base_Medical_continuous / model.safetensors.index.json
harshad317's picture
Uploading the base model
2ca81ec verified
{
"metadata": {
"total_size": 1845802880,
"num_shards": 1,
"step": 45444,
"format": "safetensors_state_dict"
},
"weight_map": {
"transformer.wte.weight": "model-00001-of-00001.safetensors",
"transformer.h.0.attn.c_q.weight": "model-00001-of-00001.safetensors",
"transformer.h.0.attn.c_k.weight": "model-00001-of-00001.safetensors",
"transformer.h.0.attn.c_v.weight": "model-00001-of-00001.safetensors",
"transformer.h.0.attn.c_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.0.mlp.c_fc.weight": "model-00001-of-00001.safetensors",
"transformer.h.0.mlp.c_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.1.attn.c_q.weight": "model-00001-of-00001.safetensors",
"transformer.h.1.attn.c_k.weight": "model-00001-of-00001.safetensors",
"transformer.h.1.attn.c_v.weight": "model-00001-of-00001.safetensors",
"transformer.h.1.attn.c_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.1.mlp.c_fc.weight": "model-00001-of-00001.safetensors",
"transformer.h.1.mlp.c_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.2.attn.c_q.weight": "model-00001-of-00001.safetensors",
"transformer.h.2.attn.c_k.weight": "model-00001-of-00001.safetensors",
"transformer.h.2.attn.c_v.weight": "model-00001-of-00001.safetensors",
"transformer.h.2.attn.c_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.2.mlp.router_bias": "model-00001-of-00001.safetensors",
"transformer.h.2.mlp.uniform_load": "model-00001-of-00001.safetensors",
"transformer.h.2.mlp.router.weight": "model-00001-of-00001.safetensors",
"transformer.h.2.mlp.experts.0.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.2.mlp.experts.0.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.2.mlp.experts.1.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.2.mlp.experts.1.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.2.mlp.experts.2.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.2.mlp.experts.2.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.2.mlp.experts.3.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.2.mlp.experts.3.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.2.mlp.experts.4.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.2.mlp.experts.4.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.2.mlp.experts.5.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.2.mlp.experts.5.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.2.mlp.experts.6.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.2.mlp.experts.6.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.2.mlp.experts.7.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.2.mlp.experts.7.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.2.mlp.adjugate_experts.0.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.2.mlp.adjugate_experts.0.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.2.mlp.adjugate_experts.1.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.2.mlp.adjugate_experts.1.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.2.mlp.router_context_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.2.mlp.router_context_scale_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.2.mlp.router_context_selection_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.3.attn.c_q.weight": "model-00001-of-00001.safetensors",
"transformer.h.3.attn.c_k.weight": "model-00001-of-00001.safetensors",
"transformer.h.3.attn.c_v.weight": "model-00001-of-00001.safetensors",
"transformer.h.3.attn.c_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.3.mlp.c_fc.weight": "model-00001-of-00001.safetensors",
"transformer.h.3.mlp.c_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.4.attn.c_q.weight": "model-00001-of-00001.safetensors",
"transformer.h.4.attn.c_k.weight": "model-00001-of-00001.safetensors",
"transformer.h.4.attn.c_v.weight": "model-00001-of-00001.safetensors",
"transformer.h.4.attn.c_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.4.mlp.c_fc.weight": "model-00001-of-00001.safetensors",
"transformer.h.4.mlp.c_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.5.attn.c_q.weight": "model-00001-of-00001.safetensors",
"transformer.h.5.attn.c_k.weight": "model-00001-of-00001.safetensors",
"transformer.h.5.attn.c_v.weight": "model-00001-of-00001.safetensors",
"transformer.h.5.attn.c_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.5.mlp.router_bias": "model-00001-of-00001.safetensors",
"transformer.h.5.mlp.uniform_load": "model-00001-of-00001.safetensors",
"transformer.h.5.mlp.router.weight": "model-00001-of-00001.safetensors",
"transformer.h.5.mlp.experts.0.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.5.mlp.experts.0.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.5.mlp.experts.1.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.5.mlp.experts.1.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.5.mlp.experts.2.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.5.mlp.experts.2.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.5.mlp.experts.3.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.5.mlp.experts.3.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.5.mlp.experts.4.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.5.mlp.experts.4.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.5.mlp.experts.5.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.5.mlp.experts.5.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.5.mlp.experts.6.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.5.mlp.experts.6.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.5.mlp.experts.7.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.5.mlp.experts.7.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.5.mlp.adjugate_experts.0.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.5.mlp.adjugate_experts.0.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.5.mlp.adjugate_experts.1.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.5.mlp.adjugate_experts.1.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.5.mlp.router_context_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.5.mlp.router_context_scale_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.5.mlp.router_context_selection_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.6.attn.c_q.weight": "model-00001-of-00001.safetensors",
"transformer.h.6.attn.c_k.weight": "model-00001-of-00001.safetensors",
"transformer.h.6.attn.c_v.weight": "model-00001-of-00001.safetensors",
"transformer.h.6.attn.c_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.6.mlp.c_fc.weight": "model-00001-of-00001.safetensors",
"transformer.h.6.mlp.c_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.7.attn.c_q.weight": "model-00001-of-00001.safetensors",
"transformer.h.7.attn.c_k.weight": "model-00001-of-00001.safetensors",
"transformer.h.7.attn.c_v.weight": "model-00001-of-00001.safetensors",
"transformer.h.7.attn.c_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.7.mlp.c_fc.weight": "model-00001-of-00001.safetensors",
"transformer.h.7.mlp.c_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.8.attn.c_q.weight": "model-00001-of-00001.safetensors",
"transformer.h.8.attn.c_k.weight": "model-00001-of-00001.safetensors",
"transformer.h.8.attn.c_v.weight": "model-00001-of-00001.safetensors",
"transformer.h.8.attn.c_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.8.mlp.router_bias": "model-00001-of-00001.safetensors",
"transformer.h.8.mlp.uniform_load": "model-00001-of-00001.safetensors",
"transformer.h.8.mlp.router.weight": "model-00001-of-00001.safetensors",
"transformer.h.8.mlp.experts.0.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.8.mlp.experts.0.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.8.mlp.experts.1.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.8.mlp.experts.1.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.8.mlp.experts.2.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.8.mlp.experts.2.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.8.mlp.experts.3.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.8.mlp.experts.3.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.8.mlp.experts.4.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.8.mlp.experts.4.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.8.mlp.experts.5.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.8.mlp.experts.5.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.8.mlp.experts.6.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.8.mlp.experts.6.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.8.mlp.experts.7.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.8.mlp.experts.7.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.8.mlp.adjugate_experts.0.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.8.mlp.adjugate_experts.0.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.8.mlp.adjugate_experts.1.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.8.mlp.adjugate_experts.1.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.8.mlp.router_context_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.8.mlp.router_context_scale_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.8.mlp.router_context_selection_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.9.attn.c_q.weight": "model-00001-of-00001.safetensors",
"transformer.h.9.attn.c_k.weight": "model-00001-of-00001.safetensors",
"transformer.h.9.attn.c_v.weight": "model-00001-of-00001.safetensors",
"transformer.h.9.attn.c_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.9.mlp.c_fc.weight": "model-00001-of-00001.safetensors",
"transformer.h.9.mlp.c_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.10.attn.c_q.weight": "model-00001-of-00001.safetensors",
"transformer.h.10.attn.c_k.weight": "model-00001-of-00001.safetensors",
"transformer.h.10.attn.c_v.weight": "model-00001-of-00001.safetensors",
"transformer.h.10.attn.c_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.10.mlp.c_fc.weight": "model-00001-of-00001.safetensors",
"transformer.h.10.mlp.c_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.11.attn.c_q.weight": "model-00001-of-00001.safetensors",
"transformer.h.11.attn.c_k.weight": "model-00001-of-00001.safetensors",
"transformer.h.11.attn.c_v.weight": "model-00001-of-00001.safetensors",
"transformer.h.11.attn.c_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.11.mlp.router_bias": "model-00001-of-00001.safetensors",
"transformer.h.11.mlp.uniform_load": "model-00001-of-00001.safetensors",
"transformer.h.11.mlp.router.weight": "model-00001-of-00001.safetensors",
"transformer.h.11.mlp.experts.0.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.11.mlp.experts.0.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.11.mlp.experts.1.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.11.mlp.experts.1.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.11.mlp.experts.2.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.11.mlp.experts.2.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.11.mlp.experts.3.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.11.mlp.experts.3.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.11.mlp.experts.4.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.11.mlp.experts.4.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.11.mlp.experts.5.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.11.mlp.experts.5.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.11.mlp.experts.6.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.11.mlp.experts.6.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.11.mlp.experts.7.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.11.mlp.experts.7.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.11.mlp.adjugate_experts.0.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.11.mlp.adjugate_experts.0.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.11.mlp.adjugate_experts.1.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.11.mlp.adjugate_experts.1.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.11.mlp.router_context_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.11.mlp.router_context_scale_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.11.mlp.router_context_selection_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.12.attn.c_q.weight": "model-00001-of-00001.safetensors",
"transformer.h.12.attn.c_k.weight": "model-00001-of-00001.safetensors",
"transformer.h.12.attn.c_v.weight": "model-00001-of-00001.safetensors",
"transformer.h.12.attn.c_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.12.mlp.c_fc.weight": "model-00001-of-00001.safetensors",
"transformer.h.12.mlp.c_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.13.attn.c_q.weight": "model-00001-of-00001.safetensors",
"transformer.h.13.attn.c_k.weight": "model-00001-of-00001.safetensors",
"transformer.h.13.attn.c_v.weight": "model-00001-of-00001.safetensors",
"transformer.h.13.attn.c_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.13.mlp.c_fc.weight": "model-00001-of-00001.safetensors",
"transformer.h.13.mlp.c_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.14.attn.c_q.weight": "model-00001-of-00001.safetensors",
"transformer.h.14.attn.c_k.weight": "model-00001-of-00001.safetensors",
"transformer.h.14.attn.c_v.weight": "model-00001-of-00001.safetensors",
"transformer.h.14.attn.c_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.14.mlp.router_bias": "model-00001-of-00001.safetensors",
"transformer.h.14.mlp.uniform_load": "model-00001-of-00001.safetensors",
"transformer.h.14.mlp.router.weight": "model-00001-of-00001.safetensors",
"transformer.h.14.mlp.experts.0.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.14.mlp.experts.0.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.14.mlp.experts.1.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.14.mlp.experts.1.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.14.mlp.experts.2.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.14.mlp.experts.2.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.14.mlp.experts.3.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.14.mlp.experts.3.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.14.mlp.experts.4.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.14.mlp.experts.4.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.14.mlp.experts.5.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.14.mlp.experts.5.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.14.mlp.experts.6.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.14.mlp.experts.6.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.14.mlp.experts.7.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.14.mlp.experts.7.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.14.mlp.adjugate_experts.0.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.14.mlp.adjugate_experts.0.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.14.mlp.adjugate_experts.1.w1.weight": "model-00001-of-00001.safetensors",
"transformer.h.14.mlp.adjugate_experts.1.w2.weight": "model-00001-of-00001.safetensors",
"transformer.h.14.mlp.router_context_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.14.mlp.router_context_scale_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.14.mlp.router_context_selection_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.15.attn.c_q.weight": "model-00001-of-00001.safetensors",
"transformer.h.15.attn.c_k.weight": "model-00001-of-00001.safetensors",
"transformer.h.15.attn.c_v.weight": "model-00001-of-00001.safetensors",
"transformer.h.15.attn.c_proj.weight": "model-00001-of-00001.safetensors",
"transformer.h.15.mlp.c_fc.weight": "model-00001-of-00001.safetensors",
"transformer.h.15.mlp.c_proj.weight": "model-00001-of-00001.safetensors",
"lm_head.weight": "model-00001-of-00001.safetensors"
}
}