kimi-k2-instruct-vindex / phase1_moe_svd.json
mikeumus-divincian's picture
Add phase1_moe_svd.json
c2b5063 verified
{
"0": {
"layer": 0,
"is_moe": false,
"dense": {
"var64": 0.0373,
"s0": 10.7,
"shape": [
7168,
18432
]
}
},
"1": {
"layer": 1,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0824,
"q25_var64": 0.0758,
"q75_var64": 0.0866,
"mean_s0": 4.69,
"std_s0": 0.95,
"mean_s0_ratio": 1.11,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1127,
"s0": 8.05,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.5682,
"s0": 26.89,
"s0_s1": 1.25,
"shape": [
384,
7168
]
}
},
"2": {
"layer": 2,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0883,
"q25_var64": 0.0827,
"q75_var64": 0.0918,
"mean_s0": 5.09,
"std_s0": 0.81,
"mean_s0_ratio": 1.14,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1041,
"s0": 7.36,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.393,
"s0": 14.9,
"s0_s1": 1.0,
"shape": [
384,
7168
]
}
},
"3": {
"layer": 3,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0901,
"q25_var64": 0.0868,
"q75_var64": 0.0934,
"mean_s0": 5.05,
"std_s0": 0.61,
"mean_s0_ratio": 1.12,
"n_experts": 384
},
"shared_expert": {
"var64": 0.127,
"s0": 7.47,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3403,
"s0": 15.09,
"s0_s1": 1.06,
"shape": [
384,
7168
]
}
},
"4": {
"layer": 4,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0906,
"q25_var64": 0.0869,
"q75_var64": 0.0966,
"mean_s0": 5.3,
"std_s0": 0.68,
"mean_s0_ratio": 1.15,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1212,
"s0": 6.56,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.35,
"s0": 15.06,
"s0_s1": 1.08,
"shape": [
384,
7168
]
}
},
"5": {
"layer": 5,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0888,
"q25_var64": 0.0849,
"q75_var64": 0.0935,
"mean_s0": 5.25,
"std_s0": 0.71,
"mean_s0_ratio": 1.15,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1513,
"s0": 7.33,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3367,
"s0": 15.28,
"s0_s1": 1.21,
"shape": [
384,
7168
]
}
},
"6": {
"layer": 6,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0865,
"q25_var64": 0.083,
"q75_var64": 0.0911,
"mean_s0": 5.0,
"std_s0": 0.67,
"mean_s0_ratio": 1.15,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1625,
"s0": 7.12,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3258,
"s0": 13.32,
"s0_s1": 1.08,
"shape": [
384,
7168
]
}
},
"7": {
"layer": 7,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0885,
"q25_var64": 0.0845,
"q75_var64": 0.0929,
"mean_s0": 5.11,
"std_s0": 0.66,
"mean_s0_ratio": 1.15,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1665,
"s0": 6.61,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3163,
"s0": 12.09,
"s0_s1": 1.05,
"shape": [
384,
7168
]
}
},
"8": {
"layer": 8,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0898,
"q25_var64": 0.085,
"q75_var64": 0.0943,
"mean_s0": 5.17,
"std_s0": 0.66,
"mean_s0_ratio": 1.16,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1639,
"s0": 6.76,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3089,
"s0": 12.21,
"s0_s1": 1.1,
"shape": [
384,
7168
]
}
},
"9": {
"layer": 9,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0912,
"q25_var64": 0.0867,
"q75_var64": 0.0967,
"mean_s0": 5.28,
"std_s0": 0.71,
"mean_s0_ratio": 1.17,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1638,
"s0": 6.9,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3083,
"s0": 12.38,
"s0_s1": 1.1,
"shape": [
384,
7168
]
}
},
"10": {
"layer": 10,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0919,
"q25_var64": 0.0873,
"q75_var64": 0.0985,
"mean_s0": 5.42,
"std_s0": 0.8,
"mean_s0_ratio": 1.18,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1611,
"s0": 6.26,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3106,
"s0": 11.63,
"s0_s1": 1.08,
"shape": [
384,
7168
]
}
},
"11": {
"layer": 11,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0946,
"q25_var64": 0.0885,
"q75_var64": 0.1006,
"mean_s0": 5.56,
"std_s0": 0.89,
"mean_s0_ratio": 1.19,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1712,
"s0": 6.68,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3061,
"s0": 11.2,
"s0_s1": 1.04,
"shape": [
384,
7168
]
}
},
"12": {
"layer": 12,
"is_moe": true,
"routed_experts": {
"median_var64": 0.098,
"q25_var64": 0.0915,
"q75_var64": 0.1053,
"mean_s0": 5.8,
"std_s0": 0.91,
"mean_s0_ratio": 1.2,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1517,
"s0": 6.36,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3161,
"s0": 10.69,
"s0_s1": 1.04,
"shape": [
384,
7168
]
}
},
"13": {
"layer": 13,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1008,
"q25_var64": 0.0926,
"q75_var64": 0.1085,
"mean_s0": 6.03,
"std_s0": 1.01,
"mean_s0_ratio": 1.23,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1701,
"s0": 7.06,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3245,
"s0": 10.72,
"s0_s1": 1.06,
"shape": [
384,
7168
]
}
},
"14": {
"layer": 14,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1007,
"q25_var64": 0.0912,
"q75_var64": 0.109,
"mean_s0": 6.11,
"std_s0": 1.04,
"mean_s0_ratio": 1.24,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1624,
"s0": 6.23,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3245,
"s0": 10.31,
"s0_s1": 1.04,
"shape": [
384,
7168
]
}
},
"15": {
"layer": 15,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1006,
"q25_var64": 0.0902,
"q75_var64": 0.1102,
"mean_s0": 6.21,
"std_s0": 1.14,
"mean_s0_ratio": 1.24,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1904,
"s0": 7.38,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3269,
"s0": 10.12,
"s0_s1": 1.02,
"shape": [
384,
7168
]
}
},
"16": {
"layer": 16,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0989,
"q25_var64": 0.0897,
"q75_var64": 0.1117,
"mean_s0": 6.12,
"std_s0": 1.13,
"mean_s0_ratio": 1.23,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1764,
"s0": 7.03,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.328,
"s0": 10.11,
"s0_s1": 1.08,
"shape": [
384,
7168
]
}
},
"17": {
"layer": 17,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1036,
"q25_var64": 0.0912,
"q75_var64": 0.1152,
"mean_s0": 6.38,
"std_s0": 1.24,
"mean_s0_ratio": 1.24,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1793,
"s0": 7.02,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3172,
"s0": 9.52,
"s0_s1": 1.07,
"shape": [
384,
7168
]
}
},
"18": {
"layer": 18,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1039,
"q25_var64": 0.0895,
"q75_var64": 0.1176,
"mean_s0": 6.36,
"std_s0": 1.3,
"mean_s0_ratio": 1.23,
"n_experts": 384
},
"shared_expert": {
"var64": 0.2005,
"s0": 7.53,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3238,
"s0": 9.68,
"s0_s1": 1.1,
"shape": [
384,
7168
]
}
},
"19": {
"layer": 19,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1056,
"q25_var64": 0.0925,
"q75_var64": 0.121,
"mean_s0": 6.44,
"std_s0": 1.32,
"mean_s0_ratio": 1.22,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1877,
"s0": 7.54,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.317,
"s0": 9.43,
"s0_s1": 1.12,
"shape": [
384,
7168
]
}
},
"20": {
"layer": 20,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1053,
"q25_var64": 0.0901,
"q75_var64": 0.122,
"mean_s0": 6.56,
"std_s0": 1.42,
"mean_s0_ratio": 1.25,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1874,
"s0": 7.33,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3196,
"s0": 9.25,
"s0_s1": 1.14,
"shape": [
384,
7168
]
}
},
"21": {
"layer": 21,
"is_moe": true,
"routed_experts": {
"median_var64": 0.107,
"q25_var64": 0.0927,
"q75_var64": 0.124,
"mean_s0": 6.66,
"std_s0": 1.46,
"mean_s0_ratio": 1.24,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1809,
"s0": 8.29,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3178,
"s0": 8.62,
"s0_s1": 1.08,
"shape": [
384,
7168
]
}
},
"22": {
"layer": 22,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1082,
"q25_var64": 0.0912,
"q75_var64": 0.1254,
"mean_s0": 6.84,
"std_s0": 1.6,
"mean_s0_ratio": 1.24,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1963,
"s0": 8.41,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3277,
"s0": 9.11,
"s0_s1": 1.2,
"shape": [
384,
7168
]
}
},
"23": {
"layer": 23,
"is_moe": true,
"routed_experts": {
"median_var64": 0.105,
"q25_var64": 0.0857,
"q75_var64": 0.1226,
"mean_s0": 6.57,
"std_s0": 1.63,
"mean_s0_ratio": 1.23,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1836,
"s0": 7.33,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3275,
"s0": 9.25,
"s0_s1": 1.25,
"shape": [
384,
7168
]
}
},
"24": {
"layer": 24,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1066,
"q25_var64": 0.0887,
"q75_var64": 0.1278,
"mean_s0": 6.81,
"std_s0": 1.67,
"mean_s0_ratio": 1.23,
"n_experts": 384
},
"shared_expert": {
"var64": 0.201,
"s0": 8.72,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.331,
"s0": 8.97,
"s0_s1": 1.15,
"shape": [
384,
7168
]
}
},
"25": {
"layer": 25,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1016,
"q25_var64": 0.0864,
"q75_var64": 0.1222,
"mean_s0": 6.68,
"std_s0": 1.76,
"mean_s0_ratio": 1.23,
"n_experts": 384
},
"shared_expert": {
"var64": 0.2033,
"s0": 8.59,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3307,
"s0": 8.75,
"s0_s1": 1.09,
"shape": [
384,
7168
]
}
},
"26": {
"layer": 26,
"is_moe": true,
"routed_experts": {
"median_var64": 0.1003,
"q25_var64": 0.0852,
"q75_var64": 0.1226,
"mean_s0": 6.65,
"std_s0": 1.71,
"mean_s0_ratio": 1.24,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1798,
"s0": 7.47,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3257,
"s0": 8.48,
"s0_s1": 1.11,
"shape": [
384,
7168
]
}
},
"27": {
"layer": 27,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0973,
"q25_var64": 0.0849,
"q75_var64": 0.1213,
"mean_s0": 6.56,
"std_s0": 1.75,
"mean_s0_ratio": 1.21,
"n_experts": 384
},
"shared_expert": {
"var64": 0.2018,
"s0": 9.15,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3246,
"s0": 8.54,
"s0_s1": 1.09,
"shape": [
384,
7168
]
}
},
"28": {
"layer": 28,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0938,
"q25_var64": 0.084,
"q75_var64": 0.1134,
"mean_s0": 6.38,
"std_s0": 1.69,
"mean_s0_ratio": 1.22,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1913,
"s0": 8.05,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3196,
"s0": 8.99,
"s0_s1": 1.21,
"shape": [
384,
7168
]
}
},
"29": {
"layer": 29,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0923,
"q25_var64": 0.0816,
"q75_var64": 0.1099,
"mean_s0": 6.17,
"std_s0": 1.64,
"mean_s0_ratio": 1.2,
"n_experts": 384
},
"shared_expert": {
"var64": 0.2067,
"s0": 9.68,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3241,
"s0": 8.27,
"s0_s1": 1.11,
"shape": [
384,
7168
]
}
},
"30": {
"layer": 30,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0887,
"q25_var64": 0.0808,
"q75_var64": 0.1053,
"mean_s0": 5.97,
"std_s0": 1.51,
"mean_s0_ratio": 1.19,
"n_experts": 384
},
"shared_expert": {
"var64": 0.198,
"s0": 8.47,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3189,
"s0": 7.42,
"s0_s1": 1.03,
"shape": [
384,
7168
]
}
},
"31": {
"layer": 31,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0882,
"q25_var64": 0.0805,
"q75_var64": 0.1005,
"mean_s0": 5.87,
"std_s0": 1.46,
"mean_s0_ratio": 1.2,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1928,
"s0": 7.81,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3221,
"s0": 7.39,
"s0_s1": 1.04,
"shape": [
384,
7168
]
}
},
"32": {
"layer": 32,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0854,
"q25_var64": 0.08,
"q75_var64": 0.099,
"mean_s0": 5.76,
"std_s0": 1.42,
"mean_s0_ratio": 1.2,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1734,
"s0": 6.74,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3212,
"s0": 7.2,
"s0_s1": 1.01,
"shape": [
384,
7168
]
}
},
"33": {
"layer": 33,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0863,
"q25_var64": 0.0797,
"q75_var64": 0.0977,
"mean_s0": 5.69,
"std_s0": 1.39,
"mean_s0_ratio": 1.18,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1844,
"s0": 7.76,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3126,
"s0": 6.95,
"s0_s1": 1.02,
"shape": [
384,
7168
]
}
},
"34": {
"layer": 34,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0855,
"q25_var64": 0.0797,
"q75_var64": 0.0937,
"mean_s0": 5.47,
"std_s0": 1.27,
"mean_s0_ratio": 1.16,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1759,
"s0": 7.64,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3174,
"s0": 7.07,
"s0_s1": 1.04,
"shape": [
384,
7168
]
}
},
"35": {
"layer": 35,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0835,
"q25_var64": 0.0798,
"q75_var64": 0.0937,
"mean_s0": 5.36,
"std_s0": 1.24,
"mean_s0_ratio": 1.15,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1869,
"s0": 8.0,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3224,
"s0": 7.25,
"s0_s1": 1.08,
"shape": [
384,
7168
]
}
},
"36": {
"layer": 36,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0838,
"q25_var64": 0.0797,
"q75_var64": 0.0944,
"mean_s0": 5.35,
"std_s0": 1.29,
"mean_s0_ratio": 1.15,
"n_experts": 384
},
"shared_expert": {
"var64": 0.2021,
"s0": 8.27,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3192,
"s0": 7.19,
"s0_s1": 1.08,
"shape": [
384,
7168
]
}
},
"37": {
"layer": 37,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0834,
"q25_var64": 0.0794,
"q75_var64": 0.0939,
"mean_s0": 5.24,
"std_s0": 1.25,
"mean_s0_ratio": 1.13,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1936,
"s0": 8.42,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3227,
"s0": 6.63,
"s0_s1": 1.02,
"shape": [
384,
7168
]
}
},
"38": {
"layer": 38,
"is_moe": true,
"routed_experts": {
"median_var64": 0.083,
"q25_var64": 0.0789,
"q75_var64": 0.0904,
"mean_s0": 5.13,
"std_s0": 1.17,
"mean_s0_ratio": 1.13,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1899,
"s0": 7.06,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3145,
"s0": 6.21,
"s0_s1": 1.06,
"shape": [
384,
7168
]
}
},
"39": {
"layer": 39,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0823,
"q25_var64": 0.079,
"q75_var64": 0.0911,
"mean_s0": 5.02,
"std_s0": 1.16,
"mean_s0_ratio": 1.11,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1876,
"s0": 6.88,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3198,
"s0": 6.36,
"s0_s1": 1.07,
"shape": [
384,
7168
]
}
},
"40": {
"layer": 40,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0823,
"q25_var64": 0.0796,
"q75_var64": 0.0878,
"mean_s0": 4.89,
"std_s0": 1.02,
"mean_s0_ratio": 1.1,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1709,
"s0": 6.94,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3158,
"s0": 6.25,
"s0_s1": 1.07,
"shape": [
384,
7168
]
}
},
"41": {
"layer": 41,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0829,
"q25_var64": 0.0795,
"q75_var64": 0.0903,
"mean_s0": 4.91,
"std_s0": 1.04,
"mean_s0_ratio": 1.1,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1735,
"s0": 7.57,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3159,
"s0": 6.03,
"s0_s1": 1.1,
"shape": [
384,
7168
]
}
},
"42": {
"layer": 42,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0835,
"q25_var64": 0.0802,
"q75_var64": 0.089,
"mean_s0": 4.93,
"std_s0": 1.05,
"mean_s0_ratio": 1.1,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1951,
"s0": 7.85,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3156,
"s0": 5.87,
"s0_s1": 1.12,
"shape": [
384,
7168
]
}
},
"43": {
"layer": 43,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0824,
"q25_var64": 0.0786,
"q75_var64": 0.0907,
"mean_s0": 4.92,
"std_s0": 1.05,
"mean_s0_ratio": 1.1,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1947,
"s0": 7.69,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3135,
"s0": 5.64,
"s0_s1": 1.09,
"shape": [
384,
7168
]
}
},
"44": {
"layer": 44,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0827,
"q25_var64": 0.0791,
"q75_var64": 0.0905,
"mean_s0": 5.0,
"std_s0": 1.14,
"mean_s0_ratio": 1.11,
"n_experts": 384
},
"shared_expert": {
"var64": 0.2133,
"s0": 8.59,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3076,
"s0": 5.35,
"s0_s1": 1.08,
"shape": [
384,
7168
]
}
},
"45": {
"layer": 45,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0826,
"q25_var64": 0.0792,
"q75_var64": 0.0883,
"mean_s0": 4.85,
"std_s0": 1.01,
"mean_s0_ratio": 1.1,
"n_experts": 384
},
"shared_expert": {
"var64": 0.2006,
"s0": 7.54,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.312,
"s0": 5.27,
"s0_s1": 1.09,
"shape": [
384,
7168
]
}
},
"46": {
"layer": 46,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0833,
"q25_var64": 0.0795,
"q75_var64": 0.0896,
"mean_s0": 4.89,
"std_s0": 1.05,
"mean_s0_ratio": 1.09,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1878,
"s0": 7.28,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3125,
"s0": 5.16,
"s0_s1": 1.1,
"shape": [
384,
7168
]
}
},
"47": {
"layer": 47,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0833,
"q25_var64": 0.0799,
"q75_var64": 0.0893,
"mean_s0": 4.79,
"std_s0": 0.91,
"mean_s0_ratio": 1.09,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1801,
"s0": 7.55,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3063,
"s0": 4.89,
"s0_s1": 1.07,
"shape": [
384,
7168
]
}
},
"48": {
"layer": 48,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0831,
"q25_var64": 0.0806,
"q75_var64": 0.0897,
"mean_s0": 4.78,
"std_s0": 0.94,
"mean_s0_ratio": 1.08,
"n_experts": 384
},
"shared_expert": {
"var64": 0.181,
"s0": 8.07,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3137,
"s0": 5.08,
"s0_s1": 1.08,
"shape": [
384,
7168
]
}
},
"49": {
"layer": 49,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0851,
"q25_var64": 0.0812,
"q75_var64": 0.0917,
"mean_s0": 4.93,
"std_s0": 1.03,
"mean_s0_ratio": 1.09,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1804,
"s0": 7.38,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3127,
"s0": 4.81,
"s0_s1": 1.07,
"shape": [
384,
7168
]
}
},
"50": {
"layer": 50,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0848,
"q25_var64": 0.0818,
"q75_var64": 0.0917,
"mean_s0": 5.01,
"std_s0": 1.19,
"mean_s0_ratio": 1.09,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1817,
"s0": 7.56,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3103,
"s0": 4.57,
"s0_s1": 1.07,
"shape": [
384,
7168
]
}
},
"51": {
"layer": 51,
"is_moe": true,
"routed_experts": {
"median_var64": 0.085,
"q25_var64": 0.0818,
"q75_var64": 0.0929,
"mean_s0": 5.01,
"std_s0": 1.18,
"mean_s0_ratio": 1.1,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1766,
"s0": 7.63,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.308,
"s0": 4.49,
"s0_s1": 1.12,
"shape": [
384,
7168
]
}
},
"52": {
"layer": 52,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0876,
"q25_var64": 0.0833,
"q75_var64": 0.0938,
"mean_s0": 5.25,
"std_s0": 1.65,
"mean_s0_ratio": 1.12,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1841,
"s0": 7.68,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3099,
"s0": 4.48,
"s0_s1": 1.11,
"shape": [
384,
7168
]
}
},
"53": {
"layer": 53,
"is_moe": true,
"routed_experts": {
"median_var64": 0.087,
"q25_var64": 0.0833,
"q75_var64": 0.0947,
"mean_s0": 5.26,
"std_s0": 1.46,
"mean_s0_ratio": 1.11,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1912,
"s0": 8.26,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3049,
"s0": 4.14,
"s0_s1": 1.08,
"shape": [
384,
7168
]
}
},
"54": {
"layer": 54,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0887,
"q25_var64": 0.0843,
"q75_var64": 0.0965,
"mean_s0": 5.51,
"std_s0": 1.41,
"mean_s0_ratio": 1.14,
"n_experts": 384
},
"shared_expert": {
"var64": 0.2034,
"s0": 8.45,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3038,
"s0": 4.05,
"s0_s1": 1.05,
"shape": [
384,
7168
]
}
},
"55": {
"layer": 55,
"is_moe": true,
"routed_experts": {
"median_var64": 0.086,
"q25_var64": 0.0811,
"q75_var64": 0.0957,
"mean_s0": 5.6,
"std_s0": 1.82,
"mean_s0_ratio": 1.15,
"n_experts": 384
},
"shared_expert": {
"var64": 0.2073,
"s0": 7.98,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3091,
"s0": 4.33,
"s0_s1": 1.15,
"shape": [
384,
7168
]
}
},
"56": {
"layer": 56,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0863,
"q25_var64": 0.0815,
"q75_var64": 0.0965,
"mean_s0": 5.69,
"std_s0": 1.93,
"mean_s0_ratio": 1.16,
"n_experts": 384
},
"shared_expert": {
"var64": 0.2141,
"s0": 8.61,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3167,
"s0": 4.19,
"s0_s1": 1.11,
"shape": [
384,
7168
]
}
},
"57": {
"layer": 57,
"is_moe": true,
"routed_experts": {
"median_var64": 0.086,
"q25_var64": 0.0797,
"q75_var64": 0.0987,
"mean_s0": 5.79,
"std_s0": 2.19,
"mean_s0_ratio": 1.16,
"n_experts": 384
},
"shared_expert": {
"var64": 0.2212,
"s0": 10.11,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3266,
"s0": 4.64,
"s0_s1": 1.2,
"shape": [
384,
7168
]
}
},
"58": {
"layer": 58,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0853,
"q25_var64": 0.0797,
"q75_var64": 0.0955,
"mean_s0": 5.75,
"std_s0": 2.08,
"mean_s0_ratio": 1.15,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1874,
"s0": 7.05,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3379,
"s0": 4.87,
"s0_s1": 1.13,
"shape": [
384,
7168
]
}
},
"59": {
"layer": 59,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0846,
"q25_var64": 0.0797,
"q75_var64": 0.0928,
"mean_s0": 5.58,
"std_s0": 1.81,
"mean_s0_ratio": 1.13,
"n_experts": 384
},
"shared_expert": {
"var64": 0.1663,
"s0": 5.87,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3446,
"s0": 5.4,
"s0_s1": 1.12,
"shape": [
384,
7168
]
}
},
"60": {
"layer": 60,
"is_moe": true,
"routed_experts": {
"median_var64": 0.0863,
"q25_var64": 0.0811,
"q75_var64": 0.0944,
"mean_s0": 5.52,
"std_s0": 1.19,
"mean_s0_ratio": 1.1,
"n_experts": 384
},
"shared_expert": {
"var64": 0.2663,
"s0": 6.47,
"shape": [
7168,
2048
]
},
"router": {
"var64": 0.3667,
"s0": 5.54,
"s0_s1": 1.06,
"shape": [
384,
7168
]
}
}
}