{ "version": 2, "model_id": "deepseek-ai/DeepSeek-V4-Pro", "model_config": { "moe": { "model_type": "deepseek_v4", "num_layers": 61, "hidden_size": 7168, "intermediate_size": 18432, "moe_intermediate_size": 3072, "n_routed_experts": 384, "n_shared_experts": 1, "num_experts_per_tok": 6, "first_k_dense_replace": 0, "torch_dtype": "bfloat16", "quant_method": "fp8", "templates": { "expert_gate_proj": "layers.{layer}.ffn.experts.{e}.w1.weight", "expert_up_proj": "layers.{layer}.ffn.experts.{e}.w3.weight", "expert_down_proj": "layers.{layer}.ffn.experts.{e}.w2.weight", "fused_gate_proj": "layers.{layer}.ffn.experts.w1", "fused_down_proj": "layers.{layer}.ffn.experts.w2", "shared_down_proj": [ "layers.{layer}.ffn.shared_experts.w2.weight" ], "router": [ "layers.{layer}.ffn.gate.weight", "layers.{layer}.ffn.router.weight" ], "dense_down_proj": [ "layers.{layer}.ffn.w2.weight" ] } } }, "num_feats": 64, "dtype": "float32", "gate_vector_semantics": "right_singular_vectors_of_gate_proj", "layers": { "0": { "shape": [ 384, 64, 7168 ], "file_offset": 0 }, "1": { "shape": [ 384, 64, 7168 ], "file_offset": 704643072 }, "2": { "shape": [ 384, 64, 7168 ], "file_offset": 1409286144 }, "3": { "shape": [ 384, 64, 7168 ], "file_offset": 2113929216 }, "4": { "shape": [ 384, 64, 7168 ], "file_offset": 2818572288 }, "5": { "shape": [ 384, 64, 7168 ], "file_offset": 3523215360 }, "6": { "shape": [ 384, 64, 7168 ], "file_offset": 4227858432 }, "7": { "shape": [ 384, 64, 7168 ], "file_offset": 4932501504 }, "8": { "shape": [ 384, 64, 7168 ], "file_offset": 5637144576 }, "9": { "shape": [ 384, 64, 7168 ], "file_offset": 6341787648 }, "10": { "shape": [ 384, 64, 7168 ], "file_offset": 7046430720 }, "11": { "shape": [ 384, 64, 7168 ], "file_offset": 7751073792 }, "12": { "shape": [ 384, 64, 7168 ], "file_offset": 8455716864 }, "13": { "shape": [ 384, 64, 7168 ], "file_offset": 9160359936 }, "14": { "shape": [ 384, 64, 7168 ], "file_offset": 9865003008 }, "15": { "shape": [ 384, 64, 7168 ], "file_offset": 10569646080 }, "16": { "shape": [ 384, 64, 7168 ], "file_offset": 11274289152 }, "17": { "shape": [ 384, 64, 7168 ], "file_offset": 11978932224 }, "18": { "shape": [ 384, 64, 7168 ], "file_offset": 12683575296 }, "19": { "shape": [ 384, 64, 7168 ], "file_offset": 13388218368 }, "20": { "shape": [ 384, 64, 7168 ], "file_offset": 14092861440 }, "21": { "shape": [ 384, 64, 7168 ], "file_offset": 14797504512 }, "22": { "shape": [ 384, 64, 7168 ], "file_offset": 15502147584 }, "23": { "shape": [ 384, 64, 7168 ], "file_offset": 16206790656 }, "24": { "shape": [ 384, 64, 7168 ], "file_offset": 16911433728 }, "25": { "shape": [ 384, 64, 7168 ], "file_offset": 17616076800 }, "26": { "shape": [ 384, 64, 7168 ], "file_offset": 18320719872 }, "27": { "shape": [ 384, 64, 7168 ], "file_offset": 19025362944 }, "28": { "shape": [ 384, 64, 7168 ], "file_offset": 19730006016 }, "29": { "shape": [ 384, 64, 7168 ], "file_offset": 20434649088 }, "30": { "shape": [ 384, 64, 7168 ], "file_offset": 21139292160 }, "31": { "shape": [ 384, 64, 7168 ], "file_offset": 21843935232 }, "32": { "shape": [ 384, 64, 7168 ], "file_offset": 22548578304 }, "33": { "shape": [ 384, 64, 7168 ], "file_offset": 23253221376 }, "34": { "shape": [ 384, 64, 7168 ], "file_offset": 23957864448 }, "35": { "shape": [ 384, 64, 7168 ], "file_offset": 24662507520 }, "36": { "shape": [ 384, 64, 7168 ], "file_offset": 25367150592 }, "37": { "shape": [ 384, 64, 7168 ], "file_offset": 26071793664 }, "38": { "shape": [ 384, 64, 7168 ], "file_offset": 26776436736 }, "39": { "shape": [ 384, 64, 7168 ], "file_offset": 27481079808 }, "40": { "shape": [ 384, 64, 7168 ], "file_offset": 28185722880 }, "41": { "shape": [ 384, 64, 7168 ], "file_offset": 28890365952 }, "42": { "shape": [ 384, 64, 7168 ], "file_offset": 29595009024 }, "43": { "shape": [ 384, 64, 7168 ], "file_offset": 30299652096 }, "44": { "shape": [ 384, 64, 7168 ], "file_offset": 31004295168 }, "45": { "shape": [ 384, 64, 7168 ], "file_offset": 31708938240 }, "46": { "shape": [ 384, 64, 7168 ], "file_offset": 32413581312 }, "47": { "shape": [ 384, 64, 7168 ], "file_offset": 33118224384 }, "48": { "shape": [ 384, 64, 7168 ], "file_offset": 33822867456 }, "49": { "shape": [ 384, 64, 7168 ], "file_offset": 34527510528 }, "50": { "shape": [ 384, 64, 7168 ], "file_offset": 35232153600 }, "51": { "shape": [ 384, 64, 7168 ], "file_offset": 35936796672 }, "52": { "shape": [ 384, 64, 7168 ], "file_offset": 36641439744 }, "53": { "shape": [ 384, 64, 7168 ], "file_offset": 37346082816 }, "54": { "shape": [ 384, 64, 7168 ], "file_offset": 38050725888 }, "55": { "shape": [ 384, 64, 7168 ], "file_offset": 38755368960 }, "56": { "shape": [ 384, 64, 7168 ], "file_offset": 39460012032 }, "57": { "shape": [ 384, 64, 7168 ], "file_offset": 40164655104 }, "58": { "shape": [ 384, 64, 7168 ], "file_offset": 40869298176 }, "59": { "shape": [ 384, 64, 7168 ], "file_offset": 41573941248 }, "60": { "shape": [ 384, 64, 7168 ], "file_offset": 42278584320 } }, "layer_stats": { "0": { "median_var64": 0.0711, "q25_var64": 0.0709, "q75_var64": 0.0714, "n_experts": 384 }, "1": { "median_var64": 0.0666, "q25_var64": 0.0665, "q75_var64": 0.0668, "n_experts": 384 }, "2": { "median_var64": 0.0581, "q25_var64": 0.0581, "q75_var64": 0.0582, "n_experts": 384 }, "3": { "median_var64": 0.0648, "q25_var64": 0.0625, "q75_var64": 0.0674, "n_experts": 384 }, "4": { "median_var64": 0.0656, "q25_var64": 0.0631, "q75_var64": 0.0691, "n_experts": 384 }, "5": { "median_var64": 0.0653, "q25_var64": 0.0633, "q75_var64": 0.0682, "n_experts": 384 }, "6": { "median_var64": 0.0655, "q25_var64": 0.0636, "q75_var64": 0.0685, "n_experts": 384 }, "7": { "median_var64": 0.0652, "q25_var64": 0.063, "q75_var64": 0.0672, "n_experts": 384 }, "8": { "median_var64": 0.0631, "q25_var64": 0.0612, "q75_var64": 0.0667, "n_experts": 384 }, "9": { "median_var64": 0.0632, "q25_var64": 0.0604, "q75_var64": 0.0675, "n_experts": 384 }, "10": { "median_var64": 0.0642, "q25_var64": 0.061, "q75_var64": 0.0678, "n_experts": 384 }, "11": { "median_var64": 0.0642, "q25_var64": 0.061, "q75_var64": 0.0683, "n_experts": 384 }, "12": { "median_var64": 0.0657, "q25_var64": 0.0623, "q75_var64": 0.0703, "n_experts": 384 }, "13": { "median_var64": 0.0649, "q25_var64": 0.0617, "q75_var64": 0.0695, "n_experts": 384 }, "14": { "median_var64": 0.0674, "q25_var64": 0.0626, "q75_var64": 0.076, "n_experts": 384 }, "15": { "median_var64": 0.0683, "q25_var64": 0.062, "q75_var64": 0.0762, "n_experts": 384 }, "16": { "median_var64": 0.0716, "q25_var64": 0.0664, "q75_var64": 0.079, "n_experts": 384 }, "17": { "median_var64": 0.0739, "q25_var64": 0.0665, "q75_var64": 0.0815, "n_experts": 384 }, "18": { "median_var64": 0.0775, "q25_var64": 0.0696, "q75_var64": 0.0874, "n_experts": 384 }, "19": { "median_var64": 0.0804, "q25_var64": 0.0698, "q75_var64": 0.0916, "n_experts": 384 }, "20": { "median_var64": 0.0864, "q25_var64": 0.0748, "q75_var64": 0.1002, "n_experts": 384 }, "21": { "median_var64": 0.0856, "q25_var64": 0.0765, "q75_var64": 0.0979, "n_experts": 384 }, "22": { "median_var64": 0.0932, "q25_var64": 0.0828, "q75_var64": 0.1056, "n_experts": 384 }, "23": { "median_var64": 0.0851, "q25_var64": 0.0771, "q75_var64": 0.0948, "n_experts": 384 }, "24": { "median_var64": 0.0918, "q25_var64": 0.0806, "q75_var64": 0.1027, "n_experts": 384 }, "25": { "median_var64": 0.0724, "q25_var64": 0.0648, "q75_var64": 0.0859, "n_experts": 384 }, "26": { "median_var64": 0.0833, "q25_var64": 0.0699, "q75_var64": 0.1023, "n_experts": 384 }, "27": { "median_var64": 0.0694, "q25_var64": 0.0633, "q75_var64": 0.0796, "n_experts": 384 }, "28": { "median_var64": 0.0731, "q25_var64": 0.0646, "q75_var64": 0.0824, "n_experts": 384 }, "29": { "median_var64": 0.0598, "q25_var64": 0.0574, "q75_var64": 0.0697, "n_experts": 384 }, "30": { "median_var64": 0.0673, "q25_var64": 0.0607, "q75_var64": 0.0812, "n_experts": 384 }, "31": { "median_var64": 0.0571, "q25_var64": 0.056, "q75_var64": 0.0617, "n_experts": 384 }, "32": { "median_var64": 0.0595, "q25_var64": 0.0565, "q75_var64": 0.0713, "n_experts": 384 }, "33": { "median_var64": 0.0623, "q25_var64": 0.0569, "q75_var64": 0.0765, "n_experts": 384 }, "34": { "median_var64": 0.0659, "q25_var64": 0.0607, "q75_var64": 0.082, "n_experts": 384 }, "35": { "median_var64": 0.0595, "q25_var64": 0.0568, "q75_var64": 0.0666, "n_experts": 384 }, "36": { "median_var64": 0.0626, "q25_var64": 0.059, "q75_var64": 0.0725, "n_experts": 384 }, "37": { "median_var64": 0.0585, "q25_var64": 0.0567, "q75_var64": 0.0644, "n_experts": 384 }, "38": { "median_var64": 0.0613, "q25_var64": 0.0582, "q75_var64": 0.0676, "n_experts": 384 }, "39": { "median_var64": 0.0577, "q25_var64": 0.0569, "q75_var64": 0.0615, "n_experts": 384 }, "40": { "median_var64": 0.0603, "q25_var64": 0.0584, "q75_var64": 0.0648, "n_experts": 384 }, "41": { "median_var64": 0.0576, "q25_var64": 0.0566, "q75_var64": 0.0602, "n_experts": 384 }, "42": { "median_var64": 0.0601, "q25_var64": 0.0576, "q75_var64": 0.0658, "n_experts": 384 }, "43": { "median_var64": 0.0567, "q25_var64": 0.0561, "q75_var64": 0.0595, "n_experts": 384 }, "44": { "median_var64": 0.0581, "q25_var64": 0.0569, "q75_var64": 0.0618, "n_experts": 384 }, "45": { "median_var64": 0.0567, "q25_var64": 0.056, "q75_var64": 0.0593, "n_experts": 384 }, "46": { "median_var64": 0.058, "q25_var64": 0.0565, "q75_var64": 0.0621, "n_experts": 384 }, "47": { "median_var64": 0.0569, "q25_var64": 0.0561, "q75_var64": 0.0604, "n_experts": 384 }, "48": { "median_var64": 0.0577, "q25_var64": 0.0563, "q75_var64": 0.0615, "n_experts": 384 }, "49": { "median_var64": 0.0592, "q25_var64": 0.0568, "q75_var64": 0.0655, "n_experts": 384 }, "50": { "median_var64": 0.0587, "q25_var64": 0.0566, "q75_var64": 0.0637, "n_experts": 384 }, "51": { "median_var64": 0.0578, "q25_var64": 0.0565, "q75_var64": 0.0612, "n_experts": 384 }, "52": { "median_var64": 0.0599, "q25_var64": 0.0575, "q75_var64": 0.0663, "n_experts": 384 }, "53": { "median_var64": 0.0699, "q25_var64": 0.063, "q75_var64": 0.0824, "n_experts": 384 }, "54": { "median_var64": 0.0673, "q25_var64": 0.0616, "q75_var64": 0.0763, "n_experts": 384 }, "55": { "median_var64": 0.067, "q25_var64": 0.0614, "q75_var64": 0.0785, "n_experts": 384 }, "56": { "median_var64": 0.0686, "q25_var64": 0.0625, "q75_var64": 0.0782, "n_experts": 384 }, "57": { "median_var64": 0.0814, "q25_var64": 0.071, "q75_var64": 0.0931, "n_experts": 384 }, "58": { "median_var64": 0.0729, "q25_var64": 0.0655, "q75_var64": 0.0858, "n_experts": 384 }, "59": { "median_var64": 0.077, "q25_var64": 0.0689, "q75_var64": 0.0879, "n_experts": 384 }, "60": { "median_var64": 0.0758, "q25_var64": 0.068, "q75_var64": 0.0892, "n_experts": 384 } } }