deepseek-v4-flash-vindex / phase1_moe_svd.json
mikeumus-divincian's picture
Add phase1_moe_svd.json
9bcc2b3 verified
{
"0": {
"layer": 0,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0848,
"q25_var64": 0.0844,
"q75_var64": 0.0852,
"mean_s0": 2.88,
"std_s0": 0.21,
"mean_s0_ratio": 1.02,
"n_experts": 256
},
"shared_expert": {
"var64": 0.1268,
"s0": 3.53,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.5374,
"s0": 7.86,
"s0_s1": 1.15,
"shape": [
256,
4096
]
}
},
"1": {
"layer": 1,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0866,
"q25_var64": 0.0865,
"q75_var64": 0.0868,
"mean_s0": 2.84,
"std_s0": 0.09,
"mean_s0_ratio": 1.01,
"n_experts": 256
},
"shared_expert": {
"var64": 0.1906,
"s0": 3.8,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4951,
"s0": 4.92,
"s0_s1": 1.44,
"shape": [
256,
4096
]
}
},
"2": {
"layer": 2,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0865,
"q25_var64": 0.0863,
"q75_var64": 0.0867,
"mean_s0": 2.87,
"std_s0": 0.35,
"mean_s0_ratio": 1.02,
"n_experts": 256
},
"shared_expert": {
"var64": 0.2442,
"s0": 3.8,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4392,
"s0": 3.65,
"s0_s1": 1.07,
"shape": [
256,
4096
]
}
},
"3": {
"layer": 3,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0998,
"q25_var64": 0.095,
"q75_var64": 0.1042,
"mean_s0": 3.72,
"std_s0": 0.47,
"mean_s0_ratio": 1.11,
"n_experts": 256
},
"shared_expert": {
"var64": 0.0948,
"s0": 4.65,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4697,
"s0": 6.69,
"s0_s1": 1.09,
"shape": [
256,
4096
]
}
},
"4": {
"layer": 4,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1017,
"q25_var64": 0.097,
"q75_var64": 0.1068,
"mean_s0": 3.9,
"std_s0": 0.53,
"mean_s0_ratio": 1.12,
"n_experts": 256
},
"shared_expert": {
"var64": 0.1239,
"s0": 4.34,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4817,
"s0": 6.34,
"s0_s1": 1.15,
"shape": [
256,
4096
]
}
},
"5": {
"layer": 5,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1062,
"q25_var64": 0.0981,
"q75_var64": 0.1157,
"mean_s0": 4.2,
"std_s0": 0.65,
"mean_s0_ratio": 1.16,
"n_experts": 256
},
"shared_expert": {
"var64": 0.1554,
"s0": 3.83,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4402,
"s0": 6.17,
"s0_s1": 1.23,
"shape": [
256,
4096
]
}
},
"6": {
"layer": 6,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1017,
"q25_var64": 0.0963,
"q75_var64": 0.1079,
"mean_s0": 4.11,
"std_s0": 0.62,
"mean_s0_ratio": 1.16,
"n_experts": 256
},
"shared_expert": {
"var64": 0.1669,
"s0": 4.28,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.437,
"s0": 5.39,
"s0_s1": 1.12,
"shape": [
256,
4096
]
}
},
"7": {
"layer": 7,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1055,
"q25_var64": 0.0982,
"q75_var64": 0.1137,
"mean_s0": 4.26,
"std_s0": 0.56,
"mean_s0_ratio": 1.19,
"n_experts": 256
},
"shared_expert": {
"var64": 0.1686,
"s0": 4.15,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4548,
"s0": 5.14,
"s0_s1": 1.07,
"shape": [
256,
4096
]
}
},
"8": {
"layer": 8,
"is_moe": true,
"routed_experts": {
"median_var64": 0.104,
"q25_var64": 0.0992,
"q75_var64": 0.1135,
"mean_s0": 4.28,
"std_s0": 0.68,
"mean_s0_ratio": 1.19,
"n_experts": 256
},
"shared_expert": {
"var64": 0.194,
"s0": 4.24,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4371,
"s0": 5.56,
"s0_s1": 1.14,
"shape": [
256,
4096
]
}
},
"9": {
"layer": 9,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1122,
"q25_var64": 0.105,
"q75_var64": 0.1233,
"mean_s0": 4.62,
"std_s0": 0.8,
"mean_s0_ratio": 1.2,
"n_experts": 256
},
"shared_expert": {
"var64": 0.2007,
"s0": 4.39,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4314,
"s0": 6.46,
"s0_s1": 1.24,
"shape": [
256,
4096
]
}
},
"10": {
"layer": 10,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1172,
"q25_var64": 0.1082,
"q75_var64": 0.1314,
"mean_s0": 4.78,
"std_s0": 0.81,
"mean_s0_ratio": 1.21,
"n_experts": 256
},
"shared_expert": {
"var64": 0.1916,
"s0": 4.74,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4471,
"s0": 6.66,
"s0_s1": 1.3,
"shape": [
256,
4096
]
}
},
"11": {
"layer": 11,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1153,
"q25_var64": 0.1045,
"q75_var64": 0.1313,
"mean_s0": 4.62,
"std_s0": 1.02,
"mean_s0_ratio": 1.19,
"n_experts": 256
},
"shared_expert": {
"var64": 0.1849,
"s0": 4.12,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4657,
"s0": 6.93,
"s0_s1": 1.29,
"shape": [
256,
4096
]
}
},
"12": {
"layer": 12,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1208,
"q25_var64": 0.109,
"q75_var64": 0.1364,
"mean_s0": 4.9,
"std_s0": 0.9,
"mean_s0_ratio": 1.22,
"n_experts": 256
},
"shared_expert": {
"var64": 0.2135,
"s0": 4.62,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4562,
"s0": 6.02,
"s0_s1": 1.18,
"shape": [
256,
4096
]
}
},
"13": {
"layer": 13,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1286,
"q25_var64": 0.1127,
"q75_var64": 0.1449,
"mean_s0": 5.15,
"std_s0": 0.97,
"mean_s0_ratio": 1.26,
"n_experts": 256
},
"shared_expert": {
"var64": 0.2054,
"s0": 4.42,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4671,
"s0": 6.36,
"s0_s1": 1.25,
"shape": [
256,
4096
]
}
},
"14": {
"layer": 14,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1426,
"q25_var64": 0.1234,
"q75_var64": 0.1575,
"mean_s0": 5.46,
"std_s0": 1.03,
"mean_s0_ratio": 1.25,
"n_experts": 256
},
"shared_expert": {
"var64": 0.2041,
"s0": 4.94,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4733,
"s0": 6.23,
"s0_s1": 1.15,
"shape": [
256,
4096
]
}
},
"15": {
"layer": 15,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1413,
"q25_var64": 0.1179,
"q75_var64": 0.1633,
"mean_s0": 5.49,
"std_s0": 1.18,
"mean_s0_ratio": 1.24,
"n_experts": 256
},
"shared_expert": {
"var64": 0.2034,
"s0": 4.99,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4797,
"s0": 6.75,
"s0_s1": 1.22,
"shape": [
256,
4096
]
}
},
"16": {
"layer": 16,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1471,
"q25_var64": 0.1232,
"q75_var64": 0.1748,
"mean_s0": 5.65,
"std_s0": 1.25,
"mean_s0_ratio": 1.2,
"n_experts": 256
},
"shared_expert": {
"var64": 0.1954,
"s0": 5.52,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4886,
"s0": 6.54,
"s0_s1": 1.22,
"shape": [
256,
4096
]
}
},
"17": {
"layer": 17,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1523,
"q25_var64": 0.1307,
"q75_var64": 0.1797,
"mean_s0": 5.99,
"std_s0": 1.35,
"mean_s0_ratio": 1.22,
"n_experts": 256
},
"shared_expert": {
"var64": 0.1902,
"s0": 4.94,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4963,
"s0": 6.21,
"s0_s1": 1.26,
"shape": [
256,
4096
]
}
},
"18": {
"layer": 18,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1487,
"q25_var64": 0.1313,
"q75_var64": 0.17,
"mean_s0": 5.9,
"std_s0": 1.36,
"mean_s0_ratio": 1.19,
"n_experts": 256
},
"shared_expert": {
"var64": 0.1949,
"s0": 6.07,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4994,
"s0": 5.86,
"s0_s1": 1.17,
"shape": [
256,
4096
]
}
},
"19": {
"layer": 19,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1157,
"q25_var64": 0.1058,
"q75_var64": 0.1274,
"mean_s0": 4.61,
"std_s0": 0.89,
"mean_s0_ratio": 1.13,
"n_experts": 256
},
"shared_expert": {
"var64": 0.1279,
"s0": 4.84,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4813,
"s0": 8.17,
"s0_s1": 1.25,
"shape": [
256,
4096
]
}
},
"20": {
"layer": 20,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1415,
"q25_var64": 0.1242,
"q75_var64": 0.1635,
"mean_s0": 5.69,
"std_s0": 1.37,
"mean_s0_ratio": 1.17,
"n_experts": 256
},
"shared_expert": {
"var64": 0.215,
"s0": 6.22,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.5111,
"s0": 6.1,
"s0_s1": 1.25,
"shape": [
256,
4096
]
}
},
"21": {
"layer": 21,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1292,
"q25_var64": 0.1037,
"q75_var64": 0.1529,
"mean_s0": 5.3,
"std_s0": 1.5,
"mean_s0_ratio": 1.17,
"n_experts": 256
},
"shared_expert": {
"var64": 0.2144,
"s0": 5.84,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4869,
"s0": 6.86,
"s0_s1": 1.1,
"shape": [
256,
4096
]
}
},
"22": {
"layer": 22,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1209,
"q25_var64": 0.0982,
"q75_var64": 0.1496,
"mean_s0": 5.0,
"std_s0": 1.21,
"mean_s0_ratio": 1.15,
"n_experts": 256
},
"shared_expert": {
"var64": 0.2101,
"s0": 4.86,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4821,
"s0": 7.01,
"s0_s1": 1.23,
"shape": [
256,
4096
]
}
},
"23": {
"layer": 23,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0888,
"q25_var64": 0.0873,
"q75_var64": 0.0948,
"mean_s0": 3.42,
"std_s0": 0.76,
"mean_s0_ratio": 1.08,
"n_experts": 256
},
"shared_expert": {
"var64": 0.1911,
"s0": 5.09,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.484,
"s0": 9.02,
"s0_s1": 1.1,
"shape": [
256,
4096
]
}
},
"24": {
"layer": 24,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0916,
"q25_var64": 0.0878,
"q75_var64": 0.1033,
"mean_s0": 3.65,
"std_s0": 0.9,
"mean_s0_ratio": 1.1,
"n_experts": 256
},
"shared_expert": {
"var64": 0.1699,
"s0": 4.09,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4696,
"s0": 7.31,
"s0_s1": 1.13,
"shape": [
256,
4096
]
}
},
"25": {
"layer": 25,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0953,
"q25_var64": 0.0903,
"q75_var64": 0.109,
"mean_s0": 3.96,
"std_s0": 1.16,
"mean_s0_ratio": 1.14,
"n_experts": 256
},
"shared_expert": {
"var64": 0.2279,
"s0": 4.9,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4568,
"s0": 7.36,
"s0_s1": 1.05,
"shape": [
256,
4096
]
}
},
"26": {
"layer": 26,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0954,
"q25_var64": 0.0915,
"q75_var64": 0.1058,
"mean_s0": 3.86,
"std_s0": 0.99,
"mean_s0_ratio": 1.11,
"n_experts": 256
},
"shared_expert": {
"var64": 0.199,
"s0": 4.62,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4361,
"s0": 6.16,
"s0_s1": 1.18,
"shape": [
256,
4096
]
}
},
"27": {
"layer": 27,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0981,
"q25_var64": 0.0943,
"q75_var64": 0.1058,
"mean_s0": 3.96,
"std_s0": 1.23,
"mean_s0_ratio": 1.13,
"n_experts": 256
},
"shared_expert": {
"var64": 0.3269,
"s0": 7.17,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.444,
"s0": 7.06,
"s0_s1": 1.13,
"shape": [
256,
4096
]
}
},
"28": {
"layer": 28,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0938,
"q25_var64": 0.09,
"q75_var64": 0.1045,
"mean_s0": 3.95,
"std_s0": 1.19,
"mean_s0_ratio": 1.14,
"n_experts": 256
},
"shared_expert": {
"var64": 0.2398,
"s0": 4.73,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4454,
"s0": 6.37,
"s0_s1": 1.19,
"shape": [
256,
4096
]
}
},
"29": {
"layer": 29,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0937,
"q25_var64": 0.089,
"q75_var64": 0.11,
"mean_s0": 3.95,
"std_s0": 1.2,
"mean_s0_ratio": 1.12,
"n_experts": 256
},
"shared_expert": {
"var64": 0.2463,
"s0": 5.85,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4549,
"s0": 6.69,
"s0_s1": 1.14,
"shape": [
256,
4096
]
}
},
"30": {
"layer": 30,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0968,
"q25_var64": 0.0905,
"q75_var64": 0.1132,
"mean_s0": 4.16,
"std_s0": 1.32,
"mean_s0_ratio": 1.14,
"n_experts": 256
},
"shared_expert": {
"var64": 0.2351,
"s0": 6.71,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4442,
"s0": 6.39,
"s0_s1": 1.17,
"shape": [
256,
4096
]
}
},
"31": {
"layer": 31,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0947,
"q25_var64": 0.0906,
"q75_var64": 0.1092,
"mean_s0": 4.03,
"std_s0": 1.18,
"mean_s0_ratio": 1.14,
"n_experts": 256
},
"shared_expert": {
"var64": 0.2778,
"s0": 7.04,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4478,
"s0": 6.75,
"s0_s1": 1.14,
"shape": [
256,
4096
]
}
},
"32": {
"layer": 32,
"is_moe": true,
"routed_experts": {
"median_var64": 0.092,
"q25_var64": 0.0892,
"q75_var64": 0.101,
"mean_s0": 3.83,
"std_s0": 1.11,
"mean_s0_ratio": 1.11,
"n_experts": 256
},
"shared_expert": {
"var64": 0.2878,
"s0": 6.94,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4465,
"s0": 5.95,
"s0_s1": 1.14,
"shape": [
256,
4096
]
}
},
"33": {
"layer": 33,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0925,
"q25_var64": 0.0898,
"q75_var64": 0.1035,
"mean_s0": 3.79,
"std_s0": 1.06,
"mean_s0_ratio": 1.11,
"n_experts": 256
},
"shared_expert": {
"var64": 0.2721,
"s0": 5.62,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4406,
"s0": 5.59,
"s0_s1": 1.05,
"shape": [
256,
4096
]
}
},
"34": {
"layer": 34,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0922,
"q25_var64": 0.09,
"q75_var64": 0.1012,
"mean_s0": 3.71,
"std_s0": 1.17,
"mean_s0_ratio": 1.09,
"n_experts": 256
},
"shared_expert": {
"var64": 0.2554,
"s0": 6.2,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4352,
"s0": 5.0,
"s0_s1": 1.08,
"shape": [
256,
4096
]
}
},
"35": {
"layer": 35,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1022,
"q25_var64": 0.0955,
"q75_var64": 0.1166,
"mean_s0": 4.32,
"std_s0": 1.84,
"mean_s0_ratio": 1.13,
"n_experts": 256
},
"shared_expert": {
"var64": 0.2325,
"s0": 6.58,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4464,
"s0": 5.85,
"s0_s1": 1.13,
"shape": [
256,
4096
]
}
},
"36": {
"layer": 36,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0958,
"q25_var64": 0.0918,
"q75_var64": 0.1045,
"mean_s0": 3.78,
"std_s0": 1.0,
"mean_s0_ratio": 1.1,
"n_experts": 256
},
"shared_expert": {
"var64": 0.1882,
"s0": 7.17,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.452,
"s0": 4.96,
"s0_s1": 1.2,
"shape": [
256,
4096
]
}
},
"37": {
"layer": 37,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0949,
"q25_var64": 0.0904,
"q75_var64": 0.1048,
"mean_s0": 4.02,
"std_s0": 1.65,
"mean_s0_ratio": 1.13,
"n_experts": 256
},
"shared_expert": {
"var64": 0.2981,
"s0": 8.46,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4593,
"s0": 4.93,
"s0_s1": 1.09,
"shape": [
256,
4096
]
}
},
"38": {
"layer": 38,
"is_moe": true,
"routed_experts": {
"median_var64": 0.098,
"q25_var64": 0.0917,
"q75_var64": 0.1083,
"mean_s0": 4.0,
"std_s0": 1.14,
"mean_s0_ratio": 1.1,
"n_experts": 256
},
"shared_expert": {
"var64": 0.2166,
"s0": 8.46,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4608,
"s0": 4.84,
"s0_s1": 1.09,
"shape": [
256,
4096
]
}
},
"39": {
"layer": 39,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1116,
"q25_var64": 0.1027,
"q75_var64": 0.1299,
"mean_s0": 5.53,
"std_s0": 2.34,
"mean_s0_ratio": 1.25,
"n_experts": 256
},
"shared_expert": {
"var64": 0.2764,
"s0": 8.83,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4941,
"s0": 4.11,
"s0_s1": 1.06,
"shape": [
256,
4096
]
}
},
"40": {
"layer": 40,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1027,
"q25_var64": 0.0943,
"q75_var64": 0.1168,
"mean_s0": 5.01,
"std_s0": 1.97,
"mean_s0_ratio": 1.19,
"n_experts": 256
},
"shared_expert": {
"var64": 0.3241,
"s0": 9.94,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.4982,
"s0": 4.8,
"s0_s1": 1.15,
"shape": [
256,
4096
]
}
},
"41": {
"layer": 41,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0988,
"q25_var64": 0.0921,
"q75_var64": 0.1127,
"mean_s0": 5.16,
"std_s0": 2.69,
"mean_s0_ratio": 1.16,
"n_experts": 256
},
"shared_expert": {
"var64": 0.2952,
"s0": 8.3,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.49,
"s0": 5.52,
"s0_s1": 1.22,
"shape": [
256,
4096
]
}
},
"42": {
"layer": 42,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0997,
"q25_var64": 0.0938,
"q75_var64": 0.1085,
"mean_s0": 5.85,
"std_s0": 4.75,
"mean_s0_ratio": 1.18,
"n_experts": 256
},
"shared_expert": {
"var64": 0.2863,
"s0": 16.44,
"shape": [
4096,
2048
]
},
"router": {
"var64": 0.5181,
"s0": 6.69,
"s0_s1": 1.35,
"shape": [
256,
4096
]
}
}
}