{ "0": { "layer": 0, "is_moe": true, "routed_experts": { "median_var64": 0.0848, "q25_var64": 0.0844, "q75_var64": 0.0852, "mean_s0": 2.88, "std_s0": 0.21, "mean_s0_ratio": 1.02, "n_experts": 256 }, "shared_expert": { "var64": 0.1268, "s0": 3.53, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.5374, "s0": 7.86, "s0_s1": 1.15, "shape": [ 256, 4096 ] } }, "1": { "layer": 1, "is_moe": true, "routed_experts": { "median_var64": 0.0866, "q25_var64": 0.0865, "q75_var64": 0.0868, "mean_s0": 2.84, "std_s0": 0.09, "mean_s0_ratio": 1.01, "n_experts": 256 }, "shared_expert": { "var64": 0.1906, "s0": 3.8, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4951, "s0": 4.92, "s0_s1": 1.44, "shape": [ 256, 4096 ] } }, "2": { "layer": 2, "is_moe": true, "routed_experts": { "median_var64": 0.0865, "q25_var64": 0.0863, "q75_var64": 0.0867, "mean_s0": 2.87, "std_s0": 0.35, "mean_s0_ratio": 1.02, "n_experts": 256 }, "shared_expert": { "var64": 0.2442, "s0": 3.8, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4392, "s0": 3.65, "s0_s1": 1.07, "shape": [ 256, 4096 ] } }, "3": { "layer": 3, "is_moe": true, "routed_experts": { "median_var64": 0.0998, "q25_var64": 0.095, "q75_var64": 0.1042, "mean_s0": 3.72, "std_s0": 0.47, "mean_s0_ratio": 1.11, "n_experts": 256 }, "shared_expert": { "var64": 0.0948, "s0": 4.65, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4697, "s0": 6.69, "s0_s1": 1.09, "shape": [ 256, 4096 ] } }, "4": { "layer": 4, "is_moe": true, "routed_experts": { "median_var64": 0.1017, "q25_var64": 0.097, "q75_var64": 0.1068, "mean_s0": 3.9, "std_s0": 0.53, "mean_s0_ratio": 1.12, "n_experts": 256 }, "shared_expert": { "var64": 0.1239, "s0": 4.34, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4817, "s0": 6.34, "s0_s1": 1.15, "shape": [ 256, 4096 ] } }, "5": { "layer": 5, "is_moe": true, "routed_experts": { "median_var64": 0.1062, "q25_var64": 0.0981, "q75_var64": 0.1157, "mean_s0": 4.2, "std_s0": 0.65, "mean_s0_ratio": 1.16, "n_experts": 256 }, "shared_expert": { "var64": 0.1554, "s0": 3.83, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4402, "s0": 6.17, "s0_s1": 1.23, "shape": [ 256, 4096 ] } }, "6": { "layer": 6, "is_moe": true, "routed_experts": { "median_var64": 0.1017, "q25_var64": 0.0963, "q75_var64": 0.1079, "mean_s0": 4.11, "std_s0": 0.62, "mean_s0_ratio": 1.16, "n_experts": 256 }, "shared_expert": { "var64": 0.1669, "s0": 4.28, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.437, "s0": 5.39, "s0_s1": 1.12, "shape": [ 256, 4096 ] } }, "7": { "layer": 7, "is_moe": true, "routed_experts": { "median_var64": 0.1055, "q25_var64": 0.0982, "q75_var64": 0.1137, "mean_s0": 4.26, "std_s0": 0.56, "mean_s0_ratio": 1.19, "n_experts": 256 }, "shared_expert": { "var64": 0.1686, "s0": 4.15, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4548, "s0": 5.14, "s0_s1": 1.07, "shape": [ 256, 4096 ] } }, "8": { "layer": 8, "is_moe": true, "routed_experts": { "median_var64": 0.104, "q25_var64": 0.0992, "q75_var64": 0.1135, "mean_s0": 4.28, "std_s0": 0.68, "mean_s0_ratio": 1.19, "n_experts": 256 }, "shared_expert": { "var64": 0.194, "s0": 4.24, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4371, "s0": 5.56, "s0_s1": 1.14, "shape": [ 256, 4096 ] } }, "9": { "layer": 9, "is_moe": true, "routed_experts": { "median_var64": 0.1122, "q25_var64": 0.105, "q75_var64": 0.1233, "mean_s0": 4.62, "std_s0": 0.8, "mean_s0_ratio": 1.2, "n_experts": 256 }, "shared_expert": { "var64": 0.2007, "s0": 4.39, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4314, "s0": 6.46, "s0_s1": 1.24, "shape": [ 256, 4096 ] } }, "10": { "layer": 10, "is_moe": true, "routed_experts": { "median_var64": 0.1172, "q25_var64": 0.1082, "q75_var64": 0.1314, "mean_s0": 4.78, "std_s0": 0.81, "mean_s0_ratio": 1.21, "n_experts": 256 }, "shared_expert": { "var64": 0.1916, "s0": 4.74, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4471, "s0": 6.66, "s0_s1": 1.3, "shape": [ 256, 4096 ] } }, "11": { "layer": 11, "is_moe": true, "routed_experts": { "median_var64": 0.1153, "q25_var64": 0.1045, "q75_var64": 0.1313, "mean_s0": 4.62, "std_s0": 1.02, "mean_s0_ratio": 1.19, "n_experts": 256 }, "shared_expert": { "var64": 0.1849, "s0": 4.12, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4657, "s0": 6.93, "s0_s1": 1.29, "shape": [ 256, 4096 ] } }, "12": { "layer": 12, "is_moe": true, "routed_experts": { "median_var64": 0.1208, "q25_var64": 0.109, "q75_var64": 0.1364, "mean_s0": 4.9, "std_s0": 0.9, "mean_s0_ratio": 1.22, "n_experts": 256 }, "shared_expert": { "var64": 0.2135, "s0": 4.62, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4562, "s0": 6.02, "s0_s1": 1.18, "shape": [ 256, 4096 ] } }, "13": { "layer": 13, "is_moe": true, "routed_experts": { "median_var64": 0.1286, "q25_var64": 0.1127, "q75_var64": 0.1449, "mean_s0": 5.15, "std_s0": 0.97, "mean_s0_ratio": 1.26, "n_experts": 256 }, "shared_expert": { "var64": 0.2054, "s0": 4.42, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4671, "s0": 6.36, "s0_s1": 1.25, "shape": [ 256, 4096 ] } }, "14": { "layer": 14, "is_moe": true, "routed_experts": { "median_var64": 0.1426, "q25_var64": 0.1234, "q75_var64": 0.1575, "mean_s0": 5.46, "std_s0": 1.03, "mean_s0_ratio": 1.25, "n_experts": 256 }, "shared_expert": { "var64": 0.2041, "s0": 4.94, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4733, "s0": 6.23, "s0_s1": 1.15, "shape": [ 256, 4096 ] } }, "15": { "layer": 15, "is_moe": true, "routed_experts": { "median_var64": 0.1413, "q25_var64": 0.1179, "q75_var64": 0.1633, "mean_s0": 5.49, "std_s0": 1.18, "mean_s0_ratio": 1.24, "n_experts": 256 }, "shared_expert": { "var64": 0.2034, "s0": 4.99, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4797, "s0": 6.75, "s0_s1": 1.22, "shape": [ 256, 4096 ] } }, "16": { "layer": 16, "is_moe": true, "routed_experts": { "median_var64": 0.1471, "q25_var64": 0.1232, "q75_var64": 0.1748, "mean_s0": 5.65, "std_s0": 1.25, "mean_s0_ratio": 1.2, "n_experts": 256 }, "shared_expert": { "var64": 0.1954, "s0": 5.52, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4886, "s0": 6.54, "s0_s1": 1.22, "shape": [ 256, 4096 ] } }, "17": { "layer": 17, "is_moe": true, "routed_experts": { "median_var64": 0.1523, "q25_var64": 0.1307, "q75_var64": 0.1797, "mean_s0": 5.99, "std_s0": 1.35, "mean_s0_ratio": 1.22, "n_experts": 256 }, "shared_expert": { "var64": 0.1902, "s0": 4.94, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4963, "s0": 6.21, "s0_s1": 1.26, "shape": [ 256, 4096 ] } }, "18": { "layer": 18, "is_moe": true, "routed_experts": { "median_var64": 0.1487, "q25_var64": 0.1313, "q75_var64": 0.17, "mean_s0": 5.9, "std_s0": 1.36, "mean_s0_ratio": 1.19, "n_experts": 256 }, "shared_expert": { "var64": 0.1949, "s0": 6.07, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4994, "s0": 5.86, "s0_s1": 1.17, "shape": [ 256, 4096 ] } }, "19": { "layer": 19, "is_moe": true, "routed_experts": { "median_var64": 0.1157, "q25_var64": 0.1058, "q75_var64": 0.1274, "mean_s0": 4.61, "std_s0": 0.89, "mean_s0_ratio": 1.13, "n_experts": 256 }, "shared_expert": { "var64": 0.1279, "s0": 4.84, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4813, "s0": 8.17, "s0_s1": 1.25, "shape": [ 256, 4096 ] } }, "20": { "layer": 20, "is_moe": true, "routed_experts": { "median_var64": 0.1415, "q25_var64": 0.1242, "q75_var64": 0.1635, "mean_s0": 5.69, "std_s0": 1.37, "mean_s0_ratio": 1.17, "n_experts": 256 }, "shared_expert": { "var64": 0.215, "s0": 6.22, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.5111, "s0": 6.1, "s0_s1": 1.25, "shape": [ 256, 4096 ] } }, "21": { "layer": 21, "is_moe": true, "routed_experts": { "median_var64": 0.1292, "q25_var64": 0.1037, "q75_var64": 0.1529, "mean_s0": 5.3, "std_s0": 1.5, "mean_s0_ratio": 1.17, "n_experts": 256 }, "shared_expert": { "var64": 0.2144, "s0": 5.84, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4869, "s0": 6.86, "s0_s1": 1.1, "shape": [ 256, 4096 ] } }, "22": { "layer": 22, "is_moe": true, "routed_experts": { "median_var64": 0.1209, "q25_var64": 0.0982, "q75_var64": 0.1496, "mean_s0": 5.0, "std_s0": 1.21, "mean_s0_ratio": 1.15, "n_experts": 256 }, "shared_expert": { "var64": 0.2101, "s0": 4.86, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4821, "s0": 7.01, "s0_s1": 1.23, "shape": [ 256, 4096 ] } }, "23": { "layer": 23, "is_moe": true, "routed_experts": { "median_var64": 0.0888, "q25_var64": 0.0873, "q75_var64": 0.0948, "mean_s0": 3.42, "std_s0": 0.76, "mean_s0_ratio": 1.08, "n_experts": 256 }, "shared_expert": { "var64": 0.1911, "s0": 5.09, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.484, "s0": 9.02, "s0_s1": 1.1, "shape": [ 256, 4096 ] } }, "24": { "layer": 24, "is_moe": true, "routed_experts": { "median_var64": 0.0916, "q25_var64": 0.0878, "q75_var64": 0.1033, "mean_s0": 3.65, "std_s0": 0.9, "mean_s0_ratio": 1.1, "n_experts": 256 }, "shared_expert": { "var64": 0.1699, "s0": 4.09, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4696, "s0": 7.31, "s0_s1": 1.13, "shape": [ 256, 4096 ] } }, "25": { "layer": 25, "is_moe": true, "routed_experts": { "median_var64": 0.0953, "q25_var64": 0.0903, "q75_var64": 0.109, "mean_s0": 3.96, "std_s0": 1.16, "mean_s0_ratio": 1.14, "n_experts": 256 }, "shared_expert": { "var64": 0.2279, "s0": 4.9, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4568, "s0": 7.36, "s0_s1": 1.05, "shape": [ 256, 4096 ] } }, "26": { "layer": 26, "is_moe": true, "routed_experts": { "median_var64": 0.0954, "q25_var64": 0.0915, "q75_var64": 0.1058, "mean_s0": 3.86, "std_s0": 0.99, "mean_s0_ratio": 1.11, "n_experts": 256 }, "shared_expert": { "var64": 0.199, "s0": 4.62, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4361, "s0": 6.16, "s0_s1": 1.18, "shape": [ 256, 4096 ] } }, "27": { "layer": 27, "is_moe": true, "routed_experts": { "median_var64": 0.0981, "q25_var64": 0.0943, "q75_var64": 0.1058, "mean_s0": 3.96, "std_s0": 1.23, "mean_s0_ratio": 1.13, "n_experts": 256 }, "shared_expert": { "var64": 0.3269, "s0": 7.17, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.444, "s0": 7.06, "s0_s1": 1.13, "shape": [ 256, 4096 ] } }, "28": { "layer": 28, "is_moe": true, "routed_experts": { "median_var64": 0.0938, "q25_var64": 0.09, "q75_var64": 0.1045, "mean_s0": 3.95, "std_s0": 1.19, "mean_s0_ratio": 1.14, "n_experts": 256 }, "shared_expert": { "var64": 0.2398, "s0": 4.73, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4454, "s0": 6.37, "s0_s1": 1.19, "shape": [ 256, 4096 ] } }, "29": { "layer": 29, "is_moe": true, "routed_experts": { "median_var64": 0.0937, "q25_var64": 0.089, "q75_var64": 0.11, "mean_s0": 3.95, "std_s0": 1.2, "mean_s0_ratio": 1.12, "n_experts": 256 }, "shared_expert": { "var64": 0.2463, "s0": 5.85, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4549, "s0": 6.69, "s0_s1": 1.14, "shape": [ 256, 4096 ] } }, "30": { "layer": 30, "is_moe": true, "routed_experts": { "median_var64": 0.0968, "q25_var64": 0.0905, "q75_var64": 0.1132, "mean_s0": 4.16, "std_s0": 1.32, "mean_s0_ratio": 1.14, "n_experts": 256 }, "shared_expert": { "var64": 0.2351, "s0": 6.71, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4442, "s0": 6.39, "s0_s1": 1.17, "shape": [ 256, 4096 ] } }, "31": { "layer": 31, "is_moe": true, "routed_experts": { "median_var64": 0.0947, "q25_var64": 0.0906, "q75_var64": 0.1092, "mean_s0": 4.03, "std_s0": 1.18, "mean_s0_ratio": 1.14, "n_experts": 256 }, "shared_expert": { "var64": 0.2778, "s0": 7.04, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4478, "s0": 6.75, "s0_s1": 1.14, "shape": [ 256, 4096 ] } }, "32": { "layer": 32, "is_moe": true, "routed_experts": { "median_var64": 0.092, "q25_var64": 0.0892, "q75_var64": 0.101, "mean_s0": 3.83, "std_s0": 1.11, "mean_s0_ratio": 1.11, "n_experts": 256 }, "shared_expert": { "var64": 0.2878, "s0": 6.94, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4465, "s0": 5.95, "s0_s1": 1.14, "shape": [ 256, 4096 ] } }, "33": { "layer": 33, "is_moe": true, "routed_experts": { "median_var64": 0.0925, "q25_var64": 0.0898, "q75_var64": 0.1035, "mean_s0": 3.79, "std_s0": 1.06, "mean_s0_ratio": 1.11, "n_experts": 256 }, "shared_expert": { "var64": 0.2721, "s0": 5.62, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4406, "s0": 5.59, "s0_s1": 1.05, "shape": [ 256, 4096 ] } }, "34": { "layer": 34, "is_moe": true, "routed_experts": { "median_var64": 0.0922, "q25_var64": 0.09, "q75_var64": 0.1012, "mean_s0": 3.71, "std_s0": 1.17, "mean_s0_ratio": 1.09, "n_experts": 256 }, "shared_expert": { "var64": 0.2554, "s0": 6.2, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4352, "s0": 5.0, "s0_s1": 1.08, "shape": [ 256, 4096 ] } }, "35": { "layer": 35, "is_moe": true, "routed_experts": { "median_var64": 0.1022, "q25_var64": 0.0955, "q75_var64": 0.1166, "mean_s0": 4.32, "std_s0": 1.84, "mean_s0_ratio": 1.13, "n_experts": 256 }, "shared_expert": { "var64": 0.2325, "s0": 6.58, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4464, "s0": 5.85, "s0_s1": 1.13, "shape": [ 256, 4096 ] } }, "36": { "layer": 36, "is_moe": true, "routed_experts": { "median_var64": 0.0958, "q25_var64": 0.0918, "q75_var64": 0.1045, "mean_s0": 3.78, "std_s0": 1.0, "mean_s0_ratio": 1.1, "n_experts": 256 }, "shared_expert": { "var64": 0.1882, "s0": 7.17, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.452, "s0": 4.96, "s0_s1": 1.2, "shape": [ 256, 4096 ] } }, "37": { "layer": 37, "is_moe": true, "routed_experts": { "median_var64": 0.0949, "q25_var64": 0.0904, "q75_var64": 0.1048, "mean_s0": 4.02, "std_s0": 1.65, "mean_s0_ratio": 1.13, "n_experts": 256 }, "shared_expert": { "var64": 0.2981, "s0": 8.46, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4593, "s0": 4.93, "s0_s1": 1.09, "shape": [ 256, 4096 ] } }, "38": { "layer": 38, "is_moe": true, "routed_experts": { "median_var64": 0.098, "q25_var64": 0.0917, "q75_var64": 0.1083, "mean_s0": 4.0, "std_s0": 1.14, "mean_s0_ratio": 1.1, "n_experts": 256 }, "shared_expert": { "var64": 0.2166, "s0": 8.46, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4608, "s0": 4.84, "s0_s1": 1.09, "shape": [ 256, 4096 ] } }, "39": { "layer": 39, "is_moe": true, "routed_experts": { "median_var64": 0.1116, "q25_var64": 0.1027, "q75_var64": 0.1299, "mean_s0": 5.53, "std_s0": 2.34, "mean_s0_ratio": 1.25, "n_experts": 256 }, "shared_expert": { "var64": 0.2764, "s0": 8.83, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4941, "s0": 4.11, "s0_s1": 1.06, "shape": [ 256, 4096 ] } }, "40": { "layer": 40, "is_moe": true, "routed_experts": { "median_var64": 0.1027, "q25_var64": 0.0943, "q75_var64": 0.1168, "mean_s0": 5.01, "std_s0": 1.97, "mean_s0_ratio": 1.19, "n_experts": 256 }, "shared_expert": { "var64": 0.3241, "s0": 9.94, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.4982, "s0": 4.8, "s0_s1": 1.15, "shape": [ 256, 4096 ] } }, "41": { "layer": 41, "is_moe": true, "routed_experts": { "median_var64": 0.0988, "q25_var64": 0.0921, "q75_var64": 0.1127, "mean_s0": 5.16, "std_s0": 2.69, "mean_s0_ratio": 1.16, "n_experts": 256 }, "shared_expert": { "var64": 0.2952, "s0": 8.3, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.49, "s0": 5.52, "s0_s1": 1.22, "shape": [ 256, 4096 ] } }, "42": { "layer": 42, "is_moe": true, "routed_experts": { "median_var64": 0.0997, "q25_var64": 0.0938, "q75_var64": 0.1085, "mean_s0": 5.85, "std_s0": 4.75, "mean_s0_ratio": 1.18, "n_experts": 256 }, "shared_expert": { "var64": 0.2863, "s0": 16.44, "shape": [ 4096, 2048 ] }, "router": { "var64": 0.5181, "s0": 6.69, "s0_s1": 1.35, "shape": [ 256, 4096 ] } } }