| { |
| "version": 2, |
| "model_id": "deepseek-ai/DeepSeek-V4-Pro", |
| "model_config": { |
| "moe": { |
| "model_type": "deepseek_v4", |
| "num_layers": 61, |
| "hidden_size": 7168, |
| "intermediate_size": 18432, |
| "moe_intermediate_size": 3072, |
| "n_routed_experts": 384, |
| "n_shared_experts": 1, |
| "num_experts_per_tok": 6, |
| "first_k_dense_replace": 0, |
| "torch_dtype": "bfloat16", |
| "quant_method": "fp8", |
| "templates": { |
| "expert_gate_proj": "layers.{layer}.ffn.experts.{e}.w1.weight", |
| "expert_up_proj": "layers.{layer}.ffn.experts.{e}.w3.weight", |
| "expert_down_proj": "layers.{layer}.ffn.experts.{e}.w2.weight", |
| "fused_gate_proj": "layers.{layer}.ffn.experts.w1", |
| "fused_down_proj": "layers.{layer}.ffn.experts.w2", |
| "shared_down_proj": [ |
| "layers.{layer}.ffn.shared_experts.w2.weight" |
| ], |
| "router": [ |
| "layers.{layer}.ffn.gate.weight", |
| "layers.{layer}.ffn.router.weight" |
| ], |
| "dense_down_proj": [ |
| "layers.{layer}.ffn.w2.weight" |
| ] |
| } |
| } |
| }, |
| "num_feats": 64, |
| "dtype": "float32", |
| "gate_vector_semantics": "right_singular_vectors_of_gate_proj", |
| "layers": { |
| "0": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 0 |
| }, |
| "1": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 704643072 |
| }, |
| "2": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 1409286144 |
| }, |
| "3": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 2113929216 |
| }, |
| "4": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 2818572288 |
| }, |
| "5": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 3523215360 |
| }, |
| "6": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 4227858432 |
| }, |
| "7": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 4932501504 |
| }, |
| "8": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 5637144576 |
| }, |
| "9": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 6341787648 |
| }, |
| "10": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 7046430720 |
| }, |
| "11": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 7751073792 |
| }, |
| "12": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 8455716864 |
| }, |
| "13": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 9160359936 |
| }, |
| "14": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 9865003008 |
| }, |
| "15": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 10569646080 |
| }, |
| "16": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 11274289152 |
| }, |
| "17": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 11978932224 |
| }, |
| "18": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 12683575296 |
| }, |
| "19": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 13388218368 |
| }, |
| "20": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 14092861440 |
| }, |
| "21": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 14797504512 |
| }, |
| "22": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 15502147584 |
| }, |
| "23": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 16206790656 |
| }, |
| "24": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 16911433728 |
| }, |
| "25": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 17616076800 |
| }, |
| "26": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 18320719872 |
| }, |
| "27": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 19025362944 |
| }, |
| "28": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 19730006016 |
| }, |
| "29": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 20434649088 |
| }, |
| "30": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 21139292160 |
| }, |
| "31": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 21843935232 |
| }, |
| "32": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 22548578304 |
| }, |
| "33": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 23253221376 |
| }, |
| "34": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 23957864448 |
| }, |
| "35": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 24662507520 |
| }, |
| "36": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 25367150592 |
| }, |
| "37": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 26071793664 |
| }, |
| "38": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 26776436736 |
| }, |
| "39": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 27481079808 |
| }, |
| "40": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 28185722880 |
| }, |
| "41": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 28890365952 |
| }, |
| "42": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 29595009024 |
| }, |
| "43": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 30299652096 |
| }, |
| "44": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 31004295168 |
| }, |
| "45": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 31708938240 |
| }, |
| "46": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 32413581312 |
| }, |
| "47": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 33118224384 |
| }, |
| "48": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 33822867456 |
| }, |
| "49": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 34527510528 |
| }, |
| "50": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 35232153600 |
| }, |
| "51": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 35936796672 |
| }, |
| "52": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 36641439744 |
| }, |
| "53": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 37346082816 |
| }, |
| "54": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 38050725888 |
| }, |
| "55": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 38755368960 |
| }, |
| "56": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 39460012032 |
| }, |
| "57": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 40164655104 |
| }, |
| "58": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 40869298176 |
| }, |
| "59": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 41573941248 |
| }, |
| "60": { |
| "shape": [ |
| 384, |
| 64, |
| 7168 |
| ], |
| "file_offset": 42278584320 |
| } |
| }, |
| "layer_stats": { |
| "0": { |
| "median_var64": 0.0711, |
| "q25_var64": 0.0709, |
| "q75_var64": 0.0714, |
| "n_experts": 384 |
| }, |
| "1": { |
| "median_var64": 0.0666, |
| "q25_var64": 0.0665, |
| "q75_var64": 0.0668, |
| "n_experts": 384 |
| }, |
| "2": { |
| "median_var64": 0.0581, |
| "q25_var64": 0.0581, |
| "q75_var64": 0.0582, |
| "n_experts": 384 |
| }, |
| "3": { |
| "median_var64": 0.0648, |
| "q25_var64": 0.0625, |
| "q75_var64": 0.0674, |
| "n_experts": 384 |
| }, |
| "4": { |
| "median_var64": 0.0656, |
| "q25_var64": 0.0631, |
| "q75_var64": 0.0691, |
| "n_experts": 384 |
| }, |
| "5": { |
| "median_var64": 0.0653, |
| "q25_var64": 0.0633, |
| "q75_var64": 0.0682, |
| "n_experts": 384 |
| }, |
| "6": { |
| "median_var64": 0.0655, |
| "q25_var64": 0.0636, |
| "q75_var64": 0.0685, |
| "n_experts": 384 |
| }, |
| "7": { |
| "median_var64": 0.0652, |
| "q25_var64": 0.063, |
| "q75_var64": 0.0672, |
| "n_experts": 384 |
| }, |
| "8": { |
| "median_var64": 0.0631, |
| "q25_var64": 0.0612, |
| "q75_var64": 0.0667, |
| "n_experts": 384 |
| }, |
| "9": { |
| "median_var64": 0.0632, |
| "q25_var64": 0.0604, |
| "q75_var64": 0.0675, |
| "n_experts": 384 |
| }, |
| "10": { |
| "median_var64": 0.0642, |
| "q25_var64": 0.061, |
| "q75_var64": 0.0678, |
| "n_experts": 384 |
| }, |
| "11": { |
| "median_var64": 0.0642, |
| "q25_var64": 0.061, |
| "q75_var64": 0.0683, |
| "n_experts": 384 |
| }, |
| "12": { |
| "median_var64": 0.0657, |
| "q25_var64": 0.0623, |
| "q75_var64": 0.0703, |
| "n_experts": 384 |
| }, |
| "13": { |
| "median_var64": 0.0649, |
| "q25_var64": 0.0617, |
| "q75_var64": 0.0695, |
| "n_experts": 384 |
| }, |
| "14": { |
| "median_var64": 0.0674, |
| "q25_var64": 0.0626, |
| "q75_var64": 0.076, |
| "n_experts": 384 |
| }, |
| "15": { |
| "median_var64": 0.0683, |
| "q25_var64": 0.062, |
| "q75_var64": 0.0762, |
| "n_experts": 384 |
| }, |
| "16": { |
| "median_var64": 0.0716, |
| "q25_var64": 0.0664, |
| "q75_var64": 0.079, |
| "n_experts": 384 |
| }, |
| "17": { |
| "median_var64": 0.0739, |
| "q25_var64": 0.0665, |
| "q75_var64": 0.0815, |
| "n_experts": 384 |
| }, |
| "18": { |
| "median_var64": 0.0775, |
| "q25_var64": 0.0696, |
| "q75_var64": 0.0874, |
| "n_experts": 384 |
| }, |
| "19": { |
| "median_var64": 0.0804, |
| "q25_var64": 0.0698, |
| "q75_var64": 0.0916, |
| "n_experts": 384 |
| }, |
| "20": { |
| "median_var64": 0.0864, |
| "q25_var64": 0.0748, |
| "q75_var64": 0.1002, |
| "n_experts": 384 |
| }, |
| "21": { |
| "median_var64": 0.0856, |
| "q25_var64": 0.0765, |
| "q75_var64": 0.0979, |
| "n_experts": 384 |
| }, |
| "22": { |
| "median_var64": 0.0932, |
| "q25_var64": 0.0828, |
| "q75_var64": 0.1056, |
| "n_experts": 384 |
| }, |
| "23": { |
| "median_var64": 0.0851, |
| "q25_var64": 0.0771, |
| "q75_var64": 0.0948, |
| "n_experts": 384 |
| }, |
| "24": { |
| "median_var64": 0.0918, |
| "q25_var64": 0.0806, |
| "q75_var64": 0.1027, |
| "n_experts": 384 |
| }, |
| "25": { |
| "median_var64": 0.0724, |
| "q25_var64": 0.0648, |
| "q75_var64": 0.0859, |
| "n_experts": 384 |
| }, |
| "26": { |
| "median_var64": 0.0833, |
| "q25_var64": 0.0699, |
| "q75_var64": 0.1023, |
| "n_experts": 384 |
| }, |
| "27": { |
| "median_var64": 0.0694, |
| "q25_var64": 0.0633, |
| "q75_var64": 0.0796, |
| "n_experts": 384 |
| }, |
| "28": { |
| "median_var64": 0.0731, |
| "q25_var64": 0.0646, |
| "q75_var64": 0.0824, |
| "n_experts": 384 |
| }, |
| "29": { |
| "median_var64": 0.0598, |
| "q25_var64": 0.0574, |
| "q75_var64": 0.0697, |
| "n_experts": 384 |
| }, |
| "30": { |
| "median_var64": 0.0673, |
| "q25_var64": 0.0607, |
| "q75_var64": 0.0812, |
| "n_experts": 384 |
| }, |
| "31": { |
| "median_var64": 0.0571, |
| "q25_var64": 0.056, |
| "q75_var64": 0.0617, |
| "n_experts": 384 |
| }, |
| "32": { |
| "median_var64": 0.0595, |
| "q25_var64": 0.0565, |
| "q75_var64": 0.0713, |
| "n_experts": 384 |
| }, |
| "33": { |
| "median_var64": 0.0623, |
| "q25_var64": 0.0569, |
| "q75_var64": 0.0765, |
| "n_experts": 384 |
| }, |
| "34": { |
| "median_var64": 0.0659, |
| "q25_var64": 0.0607, |
| "q75_var64": 0.082, |
| "n_experts": 384 |
| }, |
| "35": { |
| "median_var64": 0.0595, |
| "q25_var64": 0.0568, |
| "q75_var64": 0.0666, |
| "n_experts": 384 |
| }, |
| "36": { |
| "median_var64": 0.0626, |
| "q25_var64": 0.059, |
| "q75_var64": 0.0725, |
| "n_experts": 384 |
| }, |
| "37": { |
| "median_var64": 0.0585, |
| "q25_var64": 0.0567, |
| "q75_var64": 0.0644, |
| "n_experts": 384 |
| }, |
| "38": { |
| "median_var64": 0.0613, |
| "q25_var64": 0.0582, |
| "q75_var64": 0.0676, |
| "n_experts": 384 |
| }, |
| "39": { |
| "median_var64": 0.0577, |
| "q25_var64": 0.0569, |
| "q75_var64": 0.0615, |
| "n_experts": 384 |
| }, |
| "40": { |
| "median_var64": 0.0603, |
| "q25_var64": 0.0584, |
| "q75_var64": 0.0648, |
| "n_experts": 384 |
| }, |
| "41": { |
| "median_var64": 0.0576, |
| "q25_var64": 0.0566, |
| "q75_var64": 0.0602, |
| "n_experts": 384 |
| }, |
| "42": { |
| "median_var64": 0.0601, |
| "q25_var64": 0.0576, |
| "q75_var64": 0.0658, |
| "n_experts": 384 |
| }, |
| "43": { |
| "median_var64": 0.0567, |
| "q25_var64": 0.0561, |
| "q75_var64": 0.0595, |
| "n_experts": 384 |
| }, |
| "44": { |
| "median_var64": 0.0581, |
| "q25_var64": 0.0569, |
| "q75_var64": 0.0618, |
| "n_experts": 384 |
| }, |
| "45": { |
| "median_var64": 0.0567, |
| "q25_var64": 0.056, |
| "q75_var64": 0.0593, |
| "n_experts": 384 |
| }, |
| "46": { |
| "median_var64": 0.058, |
| "q25_var64": 0.0565, |
| "q75_var64": 0.0621, |
| "n_experts": 384 |
| }, |
| "47": { |
| "median_var64": 0.0569, |
| "q25_var64": 0.0561, |
| "q75_var64": 0.0604, |
| "n_experts": 384 |
| }, |
| "48": { |
| "median_var64": 0.0577, |
| "q25_var64": 0.0563, |
| "q75_var64": 0.0615, |
| "n_experts": 384 |
| }, |
| "49": { |
| "median_var64": 0.0592, |
| "q25_var64": 0.0568, |
| "q75_var64": 0.0655, |
| "n_experts": 384 |
| }, |
| "50": { |
| "median_var64": 0.0587, |
| "q25_var64": 0.0566, |
| "q75_var64": 0.0637, |
| "n_experts": 384 |
| }, |
| "51": { |
| "median_var64": 0.0578, |
| "q25_var64": 0.0565, |
| "q75_var64": 0.0612, |
| "n_experts": 384 |
| }, |
| "52": { |
| "median_var64": 0.0599, |
| "q25_var64": 0.0575, |
| "q75_var64": 0.0663, |
| "n_experts": 384 |
| }, |
| "53": { |
| "median_var64": 0.0699, |
| "q25_var64": 0.063, |
| "q75_var64": 0.0824, |
| "n_experts": 384 |
| }, |
| "54": { |
| "median_var64": 0.0673, |
| "q25_var64": 0.0616, |
| "q75_var64": 0.0763, |
| "n_experts": 384 |
| }, |
| "55": { |
| "median_var64": 0.067, |
| "q25_var64": 0.0614, |
| "q75_var64": 0.0785, |
| "n_experts": 384 |
| }, |
| "56": { |
| "median_var64": 0.0686, |
| "q25_var64": 0.0625, |
| "q75_var64": 0.0782, |
| "n_experts": 384 |
| }, |
| "57": { |
| "median_var64": 0.0814, |
| "q25_var64": 0.071, |
| "q75_var64": 0.0931, |
| "n_experts": 384 |
| }, |
| "58": { |
| "median_var64": 0.0729, |
| "q25_var64": 0.0655, |
| "q75_var64": 0.0858, |
| "n_experts": 384 |
| }, |
| "59": { |
| "median_var64": 0.077, |
| "q25_var64": 0.0689, |
| "q75_var64": 0.0879, |
| "n_experts": 384 |
| }, |
| "60": { |
| "median_var64": 0.0758, |
| "q25_var64": 0.068, |
| "q75_var64": 0.0892, |
| "n_experts": 384 |
| } |
| } |
| } |