deepseek-v4-pro-vindex / gate_vectors_index.json
mikeumus-divincian's picture
Add gate_vectors_index.json
ca47267 verified
{
"version": 2,
"model_id": "deepseek-ai/DeepSeek-V4-Pro",
"model_config": {
"moe": {
"model_type": "deepseek_v4",
"num_layers": 61,
"hidden_size": 7168,
"intermediate_size": 18432,
"moe_intermediate_size": 3072,
"n_routed_experts": 384,
"n_shared_experts": 1,
"num_experts_per_tok": 6,
"first_k_dense_replace": 0,
"torch_dtype": "bfloat16",
"quant_method": "fp8",
"templates": {
"expert_gate_proj": "layers.{layer}.ffn.experts.{e}.w1.weight",
"expert_up_proj": "layers.{layer}.ffn.experts.{e}.w3.weight",
"expert_down_proj": "layers.{layer}.ffn.experts.{e}.w2.weight",
"fused_gate_proj": "layers.{layer}.ffn.experts.w1",
"fused_down_proj": "layers.{layer}.ffn.experts.w2",
"shared_down_proj": [
"layers.{layer}.ffn.shared_experts.w2.weight"
],
"router": [
"layers.{layer}.ffn.gate.weight",
"layers.{layer}.ffn.router.weight"
],
"dense_down_proj": [
"layers.{layer}.ffn.w2.weight"
]
}
}
},
"num_feats": 64,
"dtype": "float32",
"gate_vector_semantics": "right_singular_vectors_of_gate_proj",
"layers": {
"0": {
"shape": [
384,
64,
7168
],
"file_offset": 0
},
"1": {
"shape": [
384,
64,
7168
],
"file_offset": 704643072
},
"2": {
"shape": [
384,
64,
7168
],
"file_offset": 1409286144
},
"3": {
"shape": [
384,
64,
7168
],
"file_offset": 2113929216
},
"4": {
"shape": [
384,
64,
7168
],
"file_offset": 2818572288
},
"5": {
"shape": [
384,
64,
7168
],
"file_offset": 3523215360
},
"6": {
"shape": [
384,
64,
7168
],
"file_offset": 4227858432
},
"7": {
"shape": [
384,
64,
7168
],
"file_offset": 4932501504
},
"8": {
"shape": [
384,
64,
7168
],
"file_offset": 5637144576
},
"9": {
"shape": [
384,
64,
7168
],
"file_offset": 6341787648
},
"10": {
"shape": [
384,
64,
7168
],
"file_offset": 7046430720
},
"11": {
"shape": [
384,
64,
7168
],
"file_offset": 7751073792
},
"12": {
"shape": [
384,
64,
7168
],
"file_offset": 8455716864
},
"13": {
"shape": [
384,
64,
7168
],
"file_offset": 9160359936
},
"14": {
"shape": [
384,
64,
7168
],
"file_offset": 9865003008
},
"15": {
"shape": [
384,
64,
7168
],
"file_offset": 10569646080
},
"16": {
"shape": [
384,
64,
7168
],
"file_offset": 11274289152
},
"17": {
"shape": [
384,
64,
7168
],
"file_offset": 11978932224
},
"18": {
"shape": [
384,
64,
7168
],
"file_offset": 12683575296
},
"19": {
"shape": [
384,
64,
7168
],
"file_offset": 13388218368
},
"20": {
"shape": [
384,
64,
7168
],
"file_offset": 14092861440
},
"21": {
"shape": [
384,
64,
7168
],
"file_offset": 14797504512
},
"22": {
"shape": [
384,
64,
7168
],
"file_offset": 15502147584
},
"23": {
"shape": [
384,
64,
7168
],
"file_offset": 16206790656
},
"24": {
"shape": [
384,
64,
7168
],
"file_offset": 16911433728
},
"25": {
"shape": [
384,
64,
7168
],
"file_offset": 17616076800
},
"26": {
"shape": [
384,
64,
7168
],
"file_offset": 18320719872
},
"27": {
"shape": [
384,
64,
7168
],
"file_offset": 19025362944
},
"28": {
"shape": [
384,
64,
7168
],
"file_offset": 19730006016
},
"29": {
"shape": [
384,
64,
7168
],
"file_offset": 20434649088
},
"30": {
"shape": [
384,
64,
7168
],
"file_offset": 21139292160
},
"31": {
"shape": [
384,
64,
7168
],
"file_offset": 21843935232
},
"32": {
"shape": [
384,
64,
7168
],
"file_offset": 22548578304
},
"33": {
"shape": [
384,
64,
7168
],
"file_offset": 23253221376
},
"34": {
"shape": [
384,
64,
7168
],
"file_offset": 23957864448
},
"35": {
"shape": [
384,
64,
7168
],
"file_offset": 24662507520
},
"36": {
"shape": [
384,
64,
7168
],
"file_offset": 25367150592
},
"37": {
"shape": [
384,
64,
7168
],
"file_offset": 26071793664
},
"38": {
"shape": [
384,
64,
7168
],
"file_offset": 26776436736
},
"39": {
"shape": [
384,
64,
7168
],
"file_offset": 27481079808
},
"40": {
"shape": [
384,
64,
7168
],
"file_offset": 28185722880
},
"41": {
"shape": [
384,
64,
7168
],
"file_offset": 28890365952
},
"42": {
"shape": [
384,
64,
7168
],
"file_offset": 29595009024
},
"43": {
"shape": [
384,
64,
7168
],
"file_offset": 30299652096
},
"44": {
"shape": [
384,
64,
7168
],
"file_offset": 31004295168
},
"45": {
"shape": [
384,
64,
7168
],
"file_offset": 31708938240
},
"46": {
"shape": [
384,
64,
7168
],
"file_offset": 32413581312
},
"47": {
"shape": [
384,
64,
7168
],
"file_offset": 33118224384
},
"48": {
"shape": [
384,
64,
7168
],
"file_offset": 33822867456
},
"49": {
"shape": [
384,
64,
7168
],
"file_offset": 34527510528
},
"50": {
"shape": [
384,
64,
7168
],
"file_offset": 35232153600
},
"51": {
"shape": [
384,
64,
7168
],
"file_offset": 35936796672
},
"52": {
"shape": [
384,
64,
7168
],
"file_offset": 36641439744
},
"53": {
"shape": [
384,
64,
7168
],
"file_offset": 37346082816
},
"54": {
"shape": [
384,
64,
7168
],
"file_offset": 38050725888
},
"55": {
"shape": [
384,
64,
7168
],
"file_offset": 38755368960
},
"56": {
"shape": [
384,
64,
7168
],
"file_offset": 39460012032
},
"57": {
"shape": [
384,
64,
7168
],
"file_offset": 40164655104
},
"58": {
"shape": [
384,
64,
7168
],
"file_offset": 40869298176
},
"59": {
"shape": [
384,
64,
7168
],
"file_offset": 41573941248
},
"60": {
"shape": [
384,
64,
7168
],
"file_offset": 42278584320
}
},
"layer_stats": {
"0": {
"median_var64": 0.0711,
"q25_var64": 0.0709,
"q75_var64": 0.0714,
"n_experts": 384
},
"1": {
"median_var64": 0.0666,
"q25_var64": 0.0665,
"q75_var64": 0.0668,
"n_experts": 384
},
"2": {
"median_var64": 0.0581,
"q25_var64": 0.0581,
"q75_var64": 0.0582,
"n_experts": 384
},
"3": {
"median_var64": 0.0648,
"q25_var64": 0.0625,
"q75_var64": 0.0674,
"n_experts": 384
},
"4": {
"median_var64": 0.0656,
"q25_var64": 0.0631,
"q75_var64": 0.0691,
"n_experts": 384
},
"5": {
"median_var64": 0.0653,
"q25_var64": 0.0633,
"q75_var64": 0.0682,
"n_experts": 384
},
"6": {
"median_var64": 0.0655,
"q25_var64": 0.0636,
"q75_var64": 0.0685,
"n_experts": 384
},
"7": {
"median_var64": 0.0652,
"q25_var64": 0.063,
"q75_var64": 0.0672,
"n_experts": 384
},
"8": {
"median_var64": 0.0631,
"q25_var64": 0.0612,
"q75_var64": 0.0667,
"n_experts": 384
},
"9": {
"median_var64": 0.0632,
"q25_var64": 0.0604,
"q75_var64": 0.0675,
"n_experts": 384
},
"10": {
"median_var64": 0.0642,
"q25_var64": 0.061,
"q75_var64": 0.0678,
"n_experts": 384
},
"11": {
"median_var64": 0.0642,
"q25_var64": 0.061,
"q75_var64": 0.0683,
"n_experts": 384
},
"12": {
"median_var64": 0.0657,
"q25_var64": 0.0623,
"q75_var64": 0.0703,
"n_experts": 384
},
"13": {
"median_var64": 0.0649,
"q25_var64": 0.0617,
"q75_var64": 0.0695,
"n_experts": 384
},
"14": {
"median_var64": 0.0674,
"q25_var64": 0.0626,
"q75_var64": 0.076,
"n_experts": 384
},
"15": {
"median_var64": 0.0683,
"q25_var64": 0.062,
"q75_var64": 0.0762,
"n_experts": 384
},
"16": {
"median_var64": 0.0716,
"q25_var64": 0.0664,
"q75_var64": 0.079,
"n_experts": 384
},
"17": {
"median_var64": 0.0739,
"q25_var64": 0.0665,
"q75_var64": 0.0815,
"n_experts": 384
},
"18": {
"median_var64": 0.0775,
"q25_var64": 0.0696,
"q75_var64": 0.0874,
"n_experts": 384
},
"19": {
"median_var64": 0.0804,
"q25_var64": 0.0698,
"q75_var64": 0.0916,
"n_experts": 384
},
"20": {
"median_var64": 0.0864,
"q25_var64": 0.0748,
"q75_var64": 0.1002,
"n_experts": 384
},
"21": {
"median_var64": 0.0856,
"q25_var64": 0.0765,
"q75_var64": 0.0979,
"n_experts": 384
},
"22": {
"median_var64": 0.0932,
"q25_var64": 0.0828,
"q75_var64": 0.1056,
"n_experts": 384
},
"23": {
"median_var64": 0.0851,
"q25_var64": 0.0771,
"q75_var64": 0.0948,
"n_experts": 384
},
"24": {
"median_var64": 0.0918,
"q25_var64": 0.0806,
"q75_var64": 0.1027,
"n_experts": 384
},
"25": {
"median_var64": 0.0724,
"q25_var64": 0.0648,
"q75_var64": 0.0859,
"n_experts": 384
},
"26": {
"median_var64": 0.0833,
"q25_var64": 0.0699,
"q75_var64": 0.1023,
"n_experts": 384
},
"27": {
"median_var64": 0.0694,
"q25_var64": 0.0633,
"q75_var64": 0.0796,
"n_experts": 384
},
"28": {
"median_var64": 0.0731,
"q25_var64": 0.0646,
"q75_var64": 0.0824,
"n_experts": 384
},
"29": {
"median_var64": 0.0598,
"q25_var64": 0.0574,
"q75_var64": 0.0697,
"n_experts": 384
},
"30": {
"median_var64": 0.0673,
"q25_var64": 0.0607,
"q75_var64": 0.0812,
"n_experts": 384
},
"31": {
"median_var64": 0.0571,
"q25_var64": 0.056,
"q75_var64": 0.0617,
"n_experts": 384
},
"32": {
"median_var64": 0.0595,
"q25_var64": 0.0565,
"q75_var64": 0.0713,
"n_experts": 384
},
"33": {
"median_var64": 0.0623,
"q25_var64": 0.0569,
"q75_var64": 0.0765,
"n_experts": 384
},
"34": {
"median_var64": 0.0659,
"q25_var64": 0.0607,
"q75_var64": 0.082,
"n_experts": 384
},
"35": {
"median_var64": 0.0595,
"q25_var64": 0.0568,
"q75_var64": 0.0666,
"n_experts": 384
},
"36": {
"median_var64": 0.0626,
"q25_var64": 0.059,
"q75_var64": 0.0725,
"n_experts": 384
},
"37": {
"median_var64": 0.0585,
"q25_var64": 0.0567,
"q75_var64": 0.0644,
"n_experts": 384
},
"38": {
"median_var64": 0.0613,
"q25_var64": 0.0582,
"q75_var64": 0.0676,
"n_experts": 384
},
"39": {
"median_var64": 0.0577,
"q25_var64": 0.0569,
"q75_var64": 0.0615,
"n_experts": 384
},
"40": {
"median_var64": 0.0603,
"q25_var64": 0.0584,
"q75_var64": 0.0648,
"n_experts": 384
},
"41": {
"median_var64": 0.0576,
"q25_var64": 0.0566,
"q75_var64": 0.0602,
"n_experts": 384
},
"42": {
"median_var64": 0.0601,
"q25_var64": 0.0576,
"q75_var64": 0.0658,
"n_experts": 384
},
"43": {
"median_var64": 0.0567,
"q25_var64": 0.0561,
"q75_var64": 0.0595,
"n_experts": 384
},
"44": {
"median_var64": 0.0581,
"q25_var64": 0.0569,
"q75_var64": 0.0618,
"n_experts": 384
},
"45": {
"median_var64": 0.0567,
"q25_var64": 0.056,
"q75_var64": 0.0593,
"n_experts": 384
},
"46": {
"median_var64": 0.058,
"q25_var64": 0.0565,
"q75_var64": 0.0621,
"n_experts": 384
},
"47": {
"median_var64": 0.0569,
"q25_var64": 0.0561,
"q75_var64": 0.0604,
"n_experts": 384
},
"48": {
"median_var64": 0.0577,
"q25_var64": 0.0563,
"q75_var64": 0.0615,
"n_experts": 384
},
"49": {
"median_var64": 0.0592,
"q25_var64": 0.0568,
"q75_var64": 0.0655,
"n_experts": 384
},
"50": {
"median_var64": 0.0587,
"q25_var64": 0.0566,
"q75_var64": 0.0637,
"n_experts": 384
},
"51": {
"median_var64": 0.0578,
"q25_var64": 0.0565,
"q75_var64": 0.0612,
"n_experts": 384
},
"52": {
"median_var64": 0.0599,
"q25_var64": 0.0575,
"q75_var64": 0.0663,
"n_experts": 384
},
"53": {
"median_var64": 0.0699,
"q25_var64": 0.063,
"q75_var64": 0.0824,
"n_experts": 384
},
"54": {
"median_var64": 0.0673,
"q25_var64": 0.0616,
"q75_var64": 0.0763,
"n_experts": 384
},
"55": {
"median_var64": 0.067,
"q25_var64": 0.0614,
"q75_var64": 0.0785,
"n_experts": 384
},
"56": {
"median_var64": 0.0686,
"q25_var64": 0.0625,
"q75_var64": 0.0782,
"n_experts": 384
},
"57": {
"median_var64": 0.0814,
"q25_var64": 0.071,
"q75_var64": 0.0931,
"n_experts": 384
},
"58": {
"median_var64": 0.0729,
"q25_var64": 0.0655,
"q75_var64": 0.0858,
"n_experts": 384
},
"59": {
"median_var64": 0.077,
"q25_var64": 0.0689,
"q75_var64": 0.0879,
"n_experts": 384
},
"60": {
"median_var64": 0.0758,
"q25_var64": 0.068,
"q75_var64": 0.0892,
"n_experts": 384
}
}
}