deepseek-v4-flash-vindex / gate_vectors_index.json
mikeumus-divincian's picture
Add gate_vectors_index.json
5d15eda verified
{
"version": 2,
"model_id": "unsloth/DeepSeek-V4-Flash",
"model_config": {
"moe": {
"model_type": "deepseek_v4",
"num_layers": 43,
"hidden_size": 4096,
"intermediate_size": 18432,
"moe_intermediate_size": 2048,
"n_routed_experts": 256,
"n_shared_experts": 1,
"num_experts_per_tok": 6,
"first_k_dense_replace": 0,
"torch_dtype": "bfloat16",
"quant_method": "fp8",
"templates": {
"expert_gate_proj": "layers.{layer}.ffn.experts.{e}.w1.weight",
"expert_up_proj": "layers.{layer}.ffn.experts.{e}.w3.weight",
"expert_down_proj": "layers.{layer}.ffn.experts.{e}.w2.weight",
"fused_gate_proj": "layers.{layer}.ffn.experts.w1",
"fused_down_proj": "layers.{layer}.ffn.experts.w2",
"shared_down_proj": [
"layers.{layer}.ffn.shared_experts.w2.weight"
],
"router": [
"layers.{layer}.ffn.gate.weight",
"layers.{layer}.ffn.router.weight"
],
"dense_down_proj": [
"layers.{layer}.ffn.w2.weight"
]
}
}
},
"num_feats": 64,
"dtype": "float32",
"gate_vector_semantics": "right_singular_vectors_of_gate_proj",
"layers": {
"0": {
"shape": [
256,
64,
4096
],
"file_offset": 0
},
"1": {
"shape": [
256,
64,
4096
],
"file_offset": 268435456
},
"2": {
"shape": [
256,
64,
4096
],
"file_offset": 536870912
},
"3": {
"shape": [
256,
64,
4096
],
"file_offset": 805306368
},
"4": {
"shape": [
256,
64,
4096
],
"file_offset": 1073741824
},
"5": {
"shape": [
256,
64,
4096
],
"file_offset": 1342177280
},
"6": {
"shape": [
256,
64,
4096
],
"file_offset": 1610612736
},
"7": {
"shape": [
256,
64,
4096
],
"file_offset": 1879048192
},
"8": {
"shape": [
256,
64,
4096
],
"file_offset": 2147483648
},
"9": {
"shape": [
256,
64,
4096
],
"file_offset": 2415919104
},
"10": {
"shape": [
256,
64,
4096
],
"file_offset": 2684354560
},
"11": {
"shape": [
256,
64,
4096
],
"file_offset": 2952790016
},
"12": {
"shape": [
256,
64,
4096
],
"file_offset": 3221225472
},
"13": {
"shape": [
256,
64,
4096
],
"file_offset": 3489660928
},
"14": {
"shape": [
256,
64,
4096
],
"file_offset": 3758096384
},
"15": {
"shape": [
256,
64,
4096
],
"file_offset": 4026531840
},
"16": {
"shape": [
256,
64,
4096
],
"file_offset": 4294967296
},
"17": {
"shape": [
256,
64,
4096
],
"file_offset": 4563402752
},
"18": {
"shape": [
256,
64,
4096
],
"file_offset": 4831838208
},
"19": {
"shape": [
256,
64,
4096
],
"file_offset": 5100273664
},
"20": {
"shape": [
256,
64,
4096
],
"file_offset": 5368709120
},
"21": {
"shape": [
256,
64,
4096
],
"file_offset": 5637144576
},
"22": {
"shape": [
256,
64,
4096
],
"file_offset": 5905580032
},
"23": {
"shape": [
256,
64,
4096
],
"file_offset": 6174015488
},
"24": {
"shape": [
256,
64,
4096
],
"file_offset": 6442450944
},
"25": {
"shape": [
256,
64,
4096
],
"file_offset": 6710886400
},
"26": {
"shape": [
256,
64,
4096
],
"file_offset": 6979321856
},
"27": {
"shape": [
256,
64,
4096
],
"file_offset": 7247757312
},
"28": {
"shape": [
256,
64,
4096
],
"file_offset": 7516192768
},
"29": {
"shape": [
256,
64,
4096
],
"file_offset": 7784628224
},
"30": {
"shape": [
256,
64,
4096
],
"file_offset": 8053063680
},
"31": {
"shape": [
256,
64,
4096
],
"file_offset": 8321499136
},
"32": {
"shape": [
256,
64,
4096
],
"file_offset": 8589934592
},
"33": {
"shape": [
256,
64,
4096
],
"file_offset": 8858370048
},
"34": {
"shape": [
256,
64,
4096
],
"file_offset": 9126805504
},
"35": {
"shape": [
256,
64,
4096
],
"file_offset": 9395240960
},
"36": {
"shape": [
256,
64,
4096
],
"file_offset": 9663676416
},
"37": {
"shape": [
256,
64,
4096
],
"file_offset": 9932111872
},
"38": {
"shape": [
256,
64,
4096
],
"file_offset": 10200547328
},
"39": {
"shape": [
256,
64,
4096
],
"file_offset": 10468982784
},
"40": {
"shape": [
256,
64,
4096
],
"file_offset": 10737418240
},
"41": {
"shape": [
256,
64,
4096
],
"file_offset": 11005853696
},
"42": {
"shape": [
256,
64,
4096
],
"file_offset": 11274289152
}
},
"layer_stats": {
"0": {
"median_var64": 0.1002,
"q25_var64": 0.0998,
"q75_var64": 0.1007,
"n_experts": 256
},
"1": {
"median_var64": 0.0916,
"q25_var64": 0.0914,
"q75_var64": 0.0917,
"n_experts": 256
},
"2": {
"median_var64": 0.0868,
"q25_var64": 0.0868,
"q75_var64": 0.0869,
"n_experts": 256
},
"3": {
"median_var64": 0.104,
"q25_var64": 0.0981,
"q75_var64": 0.1091,
"n_experts": 256
},
"4": {
"median_var64": 0.108,
"q25_var64": 0.1023,
"q75_var64": 0.1174,
"n_experts": 256
},
"5": {
"median_var64": 0.1097,
"q25_var64": 0.1016,
"q75_var64": 0.1185,
"n_experts": 256
},
"6": {
"median_var64": 0.1092,
"q25_var64": 0.1021,
"q75_var64": 0.1159,
"n_experts": 256
},
"7": {
"median_var64": 0.1071,
"q25_var64": 0.0997,
"q75_var64": 0.1169,
"n_experts": 256
},
"8": {
"median_var64": 0.1092,
"q25_var64": 0.1028,
"q75_var64": 0.1163,
"n_experts": 256
},
"9": {
"median_var64": 0.1081,
"q25_var64": 0.1013,
"q75_var64": 0.1201,
"n_experts": 256
},
"10": {
"median_var64": 0.116,
"q25_var64": 0.1083,
"q75_var64": 0.1319,
"n_experts": 256
},
"11": {
"median_var64": 0.1182,
"q25_var64": 0.1054,
"q75_var64": 0.1328,
"n_experts": 256
},
"12": {
"median_var64": 0.1284,
"q25_var64": 0.1123,
"q75_var64": 0.1444,
"n_experts": 256
},
"13": {
"median_var64": 0.1291,
"q25_var64": 0.115,
"q75_var64": 0.1447,
"n_experts": 256
},
"14": {
"median_var64": 0.1378,
"q25_var64": 0.1191,
"q75_var64": 0.154,
"n_experts": 256
},
"15": {
"median_var64": 0.1421,
"q25_var64": 0.1152,
"q75_var64": 0.1615,
"n_experts": 256
},
"16": {
"median_var64": 0.1533,
"q25_var64": 0.134,
"q75_var64": 0.1781,
"n_experts": 256
},
"17": {
"median_var64": 0.1587,
"q25_var64": 0.1385,
"q75_var64": 0.1802,
"n_experts": 256
},
"18": {
"median_var64": 0.1649,
"q25_var64": 0.1433,
"q75_var64": 0.1846,
"n_experts": 256
},
"19": {
"median_var64": 0.1268,
"q25_var64": 0.1112,
"q75_var64": 0.1432,
"n_experts": 256
},
"20": {
"median_var64": 0.1575,
"q25_var64": 0.1407,
"q75_var64": 0.1816,
"n_experts": 256
},
"21": {
"median_var64": 0.1449,
"q25_var64": 0.1164,
"q75_var64": 0.1679,
"n_experts": 256
},
"22": {
"median_var64": 0.1376,
"q25_var64": 0.1083,
"q75_var64": 0.1613,
"n_experts": 256
},
"23": {
"median_var64": 0.0919,
"q25_var64": 0.0881,
"q75_var64": 0.1025,
"n_experts": 256
},
"24": {
"median_var64": 0.1051,
"q25_var64": 0.0942,
"q75_var64": 0.1209,
"n_experts": 256
},
"25": {
"median_var64": 0.0918,
"q25_var64": 0.0877,
"q75_var64": 0.105,
"n_experts": 256
},
"26": {
"median_var64": 0.0965,
"q25_var64": 0.0908,
"q75_var64": 0.1096,
"n_experts": 256
},
"27": {
"median_var64": 0.0869,
"q25_var64": 0.0852,
"q75_var64": 0.0934,
"n_experts": 256
},
"28": {
"median_var64": 0.0939,
"q25_var64": 0.0892,
"q75_var64": 0.1041,
"n_experts": 256
},
"29": {
"median_var64": 0.0931,
"q25_var64": 0.0877,
"q75_var64": 0.109,
"n_experts": 256
},
"30": {
"median_var64": 0.0944,
"q25_var64": 0.0886,
"q75_var64": 0.1132,
"n_experts": 256
},
"31": {
"median_var64": 0.0917,
"q25_var64": 0.0875,
"q75_var64": 0.1096,
"n_experts": 256
},
"32": {
"median_var64": 0.0953,
"q25_var64": 0.0901,
"q75_var64": 0.1042,
"n_experts": 256
},
"33": {
"median_var64": 0.0947,
"q25_var64": 0.0892,
"q75_var64": 0.1062,
"n_experts": 256
},
"34": {
"median_var64": 0.0925,
"q25_var64": 0.0893,
"q75_var64": 0.103,
"n_experts": 256
},
"35": {
"median_var64": 0.0989,
"q25_var64": 0.0919,
"q75_var64": 0.1154,
"n_experts": 256
},
"36": {
"median_var64": 0.0964,
"q25_var64": 0.0902,
"q75_var64": 0.1098,
"n_experts": 256
},
"37": {
"median_var64": 0.0974,
"q25_var64": 0.0916,
"q75_var64": 0.1123,
"n_experts": 256
},
"38": {
"median_var64": 0.1017,
"q25_var64": 0.0939,
"q75_var64": 0.1144,
"n_experts": 256
},
"39": {
"median_var64": 0.1248,
"q25_var64": 0.112,
"q75_var64": 0.1414,
"n_experts": 256
},
"40": {
"median_var64": 0.1186,
"q25_var64": 0.1047,
"q75_var64": 0.1371,
"n_experts": 256
},
"41": {
"median_var64": 0.1214,
"q25_var64": 0.1065,
"q75_var64": 0.1415,
"n_experts": 256
},
"42": {
"median_var64": 0.1191,
"q25_var64": 0.1082,
"q75_var64": 0.1362,
"n_experts": 256
}
}
}