{ "0": { "layer": 0, "is_moe": false, "dense": { "var64": 0.0373, "s0": 10.7, "shape": [ 7168, 18432 ] } }, "1": { "layer": 1, "is_moe": true, "routed_experts": { "median_var64": 0.0824, "q25_var64": 0.0758, "q75_var64": 0.0866, "mean_s0": 4.69, "std_s0": 0.95, "mean_s0_ratio": 1.11, "n_experts": 384 }, "shared_expert": { "var64": 0.1127, "s0": 8.05, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.5682, "s0": 26.89, "s0_s1": 1.25, "shape": [ 384, 7168 ] } }, "2": { "layer": 2, "is_moe": true, "routed_experts": { "median_var64": 0.0883, "q25_var64": 0.0827, "q75_var64": 0.0918, "mean_s0": 5.09, "std_s0": 0.81, "mean_s0_ratio": 1.14, "n_experts": 384 }, "shared_expert": { "var64": 0.1041, "s0": 7.36, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.393, "s0": 14.9, "s0_s1": 1.0, "shape": [ 384, 7168 ] } }, "3": { "layer": 3, "is_moe": true, "routed_experts": { "median_var64": 0.0901, "q25_var64": 0.0868, "q75_var64": 0.0934, "mean_s0": 5.05, "std_s0": 0.61, "mean_s0_ratio": 1.12, "n_experts": 384 }, "shared_expert": { "var64": 0.127, "s0": 7.47, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3403, "s0": 15.09, "s0_s1": 1.06, "shape": [ 384, 7168 ] } }, "4": { "layer": 4, "is_moe": true, "routed_experts": { "median_var64": 0.0906, "q25_var64": 0.0869, "q75_var64": 0.0966, "mean_s0": 5.3, "std_s0": 0.68, "mean_s0_ratio": 1.15, "n_experts": 384 }, "shared_expert": { "var64": 0.1212, "s0": 6.56, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.35, "s0": 15.06, "s0_s1": 1.08, "shape": [ 384, 7168 ] } }, "5": { "layer": 5, "is_moe": true, "routed_experts": { "median_var64": 0.0888, "q25_var64": 0.0849, "q75_var64": 0.0935, "mean_s0": 5.25, "std_s0": 0.71, "mean_s0_ratio": 1.15, "n_experts": 384 }, "shared_expert": { "var64": 0.1513, "s0": 7.33, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3367, "s0": 15.28, "s0_s1": 1.21, "shape": [ 384, 7168 ] } }, "6": { "layer": 6, "is_moe": true, "routed_experts": { "median_var64": 0.0865, "q25_var64": 0.083, "q75_var64": 0.0911, "mean_s0": 5.0, "std_s0": 0.67, "mean_s0_ratio": 1.15, "n_experts": 384 }, "shared_expert": { "var64": 0.1625, "s0": 7.12, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3258, "s0": 13.32, "s0_s1": 1.08, "shape": [ 384, 7168 ] } }, "7": { "layer": 7, "is_moe": true, "routed_experts": { "median_var64": 0.0885, "q25_var64": 0.0845, "q75_var64": 0.0929, "mean_s0": 5.11, "std_s0": 0.66, "mean_s0_ratio": 1.15, "n_experts": 384 }, "shared_expert": { "var64": 0.1665, "s0": 6.61, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3163, "s0": 12.09, "s0_s1": 1.05, "shape": [ 384, 7168 ] } }, "8": { "layer": 8, "is_moe": true, "routed_experts": { "median_var64": 0.0898, "q25_var64": 0.085, "q75_var64": 0.0943, "mean_s0": 5.17, "std_s0": 0.66, "mean_s0_ratio": 1.16, "n_experts": 384 }, "shared_expert": { "var64": 0.1639, "s0": 6.76, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3089, "s0": 12.21, "s0_s1": 1.1, "shape": [ 384, 7168 ] } }, "9": { "layer": 9, "is_moe": true, "routed_experts": { "median_var64": 0.0912, "q25_var64": 0.0867, "q75_var64": 0.0967, "mean_s0": 5.28, "std_s0": 0.71, "mean_s0_ratio": 1.17, "n_experts": 384 }, "shared_expert": { "var64": 0.1638, "s0": 6.9, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3083, "s0": 12.38, "s0_s1": 1.1, "shape": [ 384, 7168 ] } }, "10": { "layer": 10, "is_moe": true, "routed_experts": { "median_var64": 0.0919, "q25_var64": 0.0873, "q75_var64": 0.0985, "mean_s0": 5.42, "std_s0": 0.8, "mean_s0_ratio": 1.18, "n_experts": 384 }, "shared_expert": { "var64": 0.1611, "s0": 6.26, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3106, "s0": 11.63, "s0_s1": 1.08, "shape": [ 384, 7168 ] } }, "11": { "layer": 11, "is_moe": true, "routed_experts": { "median_var64": 0.0946, "q25_var64": 0.0885, "q75_var64": 0.1006, "mean_s0": 5.56, "std_s0": 0.89, "mean_s0_ratio": 1.19, "n_experts": 384 }, "shared_expert": { "var64": 0.1712, "s0": 6.68, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3061, "s0": 11.2, "s0_s1": 1.04, "shape": [ 384, 7168 ] } }, "12": { "layer": 12, "is_moe": true, "routed_experts": { "median_var64": 0.098, "q25_var64": 0.0915, "q75_var64": 0.1053, "mean_s0": 5.8, "std_s0": 0.91, "mean_s0_ratio": 1.2, "n_experts": 384 }, "shared_expert": { "var64": 0.1517, "s0": 6.36, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3161, "s0": 10.69, "s0_s1": 1.04, "shape": [ 384, 7168 ] } }, "13": { "layer": 13, "is_moe": true, "routed_experts": { "median_var64": 0.1008, "q25_var64": 0.0926, "q75_var64": 0.1085, "mean_s0": 6.03, "std_s0": 1.01, "mean_s0_ratio": 1.23, "n_experts": 384 }, "shared_expert": { "var64": 0.1701, "s0": 7.06, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3245, "s0": 10.72, "s0_s1": 1.06, "shape": [ 384, 7168 ] } }, "14": { "layer": 14, "is_moe": true, "routed_experts": { "median_var64": 0.1007, "q25_var64": 0.0912, "q75_var64": 0.109, "mean_s0": 6.11, "std_s0": 1.04, "mean_s0_ratio": 1.24, "n_experts": 384 }, "shared_expert": { "var64": 0.1624, "s0": 6.23, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3245, "s0": 10.31, "s0_s1": 1.04, "shape": [ 384, 7168 ] } }, "15": { "layer": 15, "is_moe": true, "routed_experts": { "median_var64": 0.1006, "q25_var64": 0.0902, "q75_var64": 0.1102, "mean_s0": 6.21, "std_s0": 1.14, "mean_s0_ratio": 1.24, "n_experts": 384 }, "shared_expert": { "var64": 0.1904, "s0": 7.38, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3269, "s0": 10.12, "s0_s1": 1.02, "shape": [ 384, 7168 ] } }, "16": { "layer": 16, "is_moe": true, "routed_experts": { "median_var64": 0.0989, "q25_var64": 0.0897, "q75_var64": 0.1117, "mean_s0": 6.12, "std_s0": 1.13, "mean_s0_ratio": 1.23, "n_experts": 384 }, "shared_expert": { "var64": 0.1764, "s0": 7.03, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.328, "s0": 10.11, "s0_s1": 1.08, "shape": [ 384, 7168 ] } }, "17": { "layer": 17, "is_moe": true, "routed_experts": { "median_var64": 0.1036, "q25_var64": 0.0912, "q75_var64": 0.1152, "mean_s0": 6.38, "std_s0": 1.24, "mean_s0_ratio": 1.24, "n_experts": 384 }, "shared_expert": { "var64": 0.1793, "s0": 7.02, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3172, "s0": 9.52, "s0_s1": 1.07, "shape": [ 384, 7168 ] } }, "18": { "layer": 18, "is_moe": true, "routed_experts": { "median_var64": 0.1039, "q25_var64": 0.0895, "q75_var64": 0.1176, "mean_s0": 6.36, "std_s0": 1.3, "mean_s0_ratio": 1.23, "n_experts": 384 }, "shared_expert": { "var64": 0.2005, "s0": 7.53, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3238, "s0": 9.68, "s0_s1": 1.1, "shape": [ 384, 7168 ] } }, "19": { "layer": 19, "is_moe": true, "routed_experts": { "median_var64": 0.1056, "q25_var64": 0.0925, "q75_var64": 0.121, "mean_s0": 6.44, "std_s0": 1.32, "mean_s0_ratio": 1.22, "n_experts": 384 }, "shared_expert": { "var64": 0.1877, "s0": 7.54, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.317, "s0": 9.43, "s0_s1": 1.12, "shape": [ 384, 7168 ] } }, "20": { "layer": 20, "is_moe": true, "routed_experts": { "median_var64": 0.1053, "q25_var64": 0.0901, "q75_var64": 0.122, "mean_s0": 6.56, "std_s0": 1.42, "mean_s0_ratio": 1.25, "n_experts": 384 }, "shared_expert": { "var64": 0.1874, "s0": 7.33, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3196, "s0": 9.25, "s0_s1": 1.14, "shape": [ 384, 7168 ] } }, "21": { "layer": 21, "is_moe": true, "routed_experts": { "median_var64": 0.107, "q25_var64": 0.0927, "q75_var64": 0.124, "mean_s0": 6.66, "std_s0": 1.46, "mean_s0_ratio": 1.24, "n_experts": 384 }, "shared_expert": { "var64": 0.1809, "s0": 8.29, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3178, "s0": 8.62, "s0_s1": 1.08, "shape": [ 384, 7168 ] } }, "22": { "layer": 22, "is_moe": true, "routed_experts": { "median_var64": 0.1082, "q25_var64": 0.0912, "q75_var64": 0.1254, "mean_s0": 6.84, "std_s0": 1.6, "mean_s0_ratio": 1.24, "n_experts": 384 }, "shared_expert": { "var64": 0.1963, "s0": 8.41, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3277, "s0": 9.11, "s0_s1": 1.2, "shape": [ 384, 7168 ] } }, "23": { "layer": 23, "is_moe": true, "routed_experts": { "median_var64": 0.105, "q25_var64": 0.0857, "q75_var64": 0.1226, "mean_s0": 6.57, "std_s0": 1.63, "mean_s0_ratio": 1.23, "n_experts": 384 }, "shared_expert": { "var64": 0.1836, "s0": 7.33, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3275, "s0": 9.25, "s0_s1": 1.25, "shape": [ 384, 7168 ] } }, "24": { "layer": 24, "is_moe": true, "routed_experts": { "median_var64": 0.1066, "q25_var64": 0.0887, "q75_var64": 0.1278, "mean_s0": 6.81, "std_s0": 1.67, "mean_s0_ratio": 1.23, "n_experts": 384 }, "shared_expert": { "var64": 0.201, "s0": 8.72, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.331, "s0": 8.97, "s0_s1": 1.15, "shape": [ 384, 7168 ] } }, "25": { "layer": 25, "is_moe": true, "routed_experts": { "median_var64": 0.1016, "q25_var64": 0.0864, "q75_var64": 0.1222, "mean_s0": 6.68, "std_s0": 1.76, "mean_s0_ratio": 1.23, "n_experts": 384 }, "shared_expert": { "var64": 0.2033, "s0": 8.59, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3307, "s0": 8.75, "s0_s1": 1.09, "shape": [ 384, 7168 ] } }, "26": { "layer": 26, "is_moe": true, "routed_experts": { "median_var64": 0.1003, "q25_var64": 0.0852, "q75_var64": 0.1226, "mean_s0": 6.65, "std_s0": 1.71, "mean_s0_ratio": 1.24, "n_experts": 384 }, "shared_expert": { "var64": 0.1798, "s0": 7.47, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3257, "s0": 8.48, "s0_s1": 1.11, "shape": [ 384, 7168 ] } }, "27": { "layer": 27, "is_moe": true, "routed_experts": { "median_var64": 0.0973, "q25_var64": 0.0849, "q75_var64": 0.1213, "mean_s0": 6.56, "std_s0": 1.75, "mean_s0_ratio": 1.21, "n_experts": 384 }, "shared_expert": { "var64": 0.2018, "s0": 9.15, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3246, "s0": 8.54, "s0_s1": 1.09, "shape": [ 384, 7168 ] } }, "28": { "layer": 28, "is_moe": true, "routed_experts": { "median_var64": 0.0938, "q25_var64": 0.084, "q75_var64": 0.1134, "mean_s0": 6.38, "std_s0": 1.69, "mean_s0_ratio": 1.22, "n_experts": 384 }, "shared_expert": { "var64": 0.1913, "s0": 8.05, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3196, "s0": 8.99, "s0_s1": 1.21, "shape": [ 384, 7168 ] } }, "29": { "layer": 29, "is_moe": true, "routed_experts": { "median_var64": 0.0923, "q25_var64": 0.0816, "q75_var64": 0.1099, "mean_s0": 6.17, "std_s0": 1.64, "mean_s0_ratio": 1.2, "n_experts": 384 }, "shared_expert": { "var64": 0.2067, "s0": 9.68, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3241, "s0": 8.27, "s0_s1": 1.11, "shape": [ 384, 7168 ] } }, "30": { "layer": 30, "is_moe": true, "routed_experts": { "median_var64": 0.0887, "q25_var64": 0.0808, "q75_var64": 0.1053, "mean_s0": 5.97, "std_s0": 1.51, "mean_s0_ratio": 1.19, "n_experts": 384 }, "shared_expert": { "var64": 0.198, "s0": 8.47, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3189, "s0": 7.42, "s0_s1": 1.03, "shape": [ 384, 7168 ] } }, "31": { "layer": 31, "is_moe": true, "routed_experts": { "median_var64": 0.0882, "q25_var64": 0.0805, "q75_var64": 0.1005, "mean_s0": 5.87, "std_s0": 1.46, "mean_s0_ratio": 1.2, "n_experts": 384 }, "shared_expert": { "var64": 0.1928, "s0": 7.81, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3221, "s0": 7.39, "s0_s1": 1.04, "shape": [ 384, 7168 ] } }, "32": { "layer": 32, "is_moe": true, "routed_experts": { "median_var64": 0.0854, "q25_var64": 0.08, "q75_var64": 0.099, "mean_s0": 5.76, "std_s0": 1.42, "mean_s0_ratio": 1.2, "n_experts": 384 }, "shared_expert": { "var64": 0.1734, "s0": 6.74, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3212, "s0": 7.2, "s0_s1": 1.01, "shape": [ 384, 7168 ] } }, "33": { "layer": 33, "is_moe": true, "routed_experts": { "median_var64": 0.0863, "q25_var64": 0.0797, "q75_var64": 0.0977, "mean_s0": 5.69, "std_s0": 1.39, "mean_s0_ratio": 1.18, "n_experts": 384 }, "shared_expert": { "var64": 0.1844, "s0": 7.76, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3126, "s0": 6.95, "s0_s1": 1.02, "shape": [ 384, 7168 ] } }, "34": { "layer": 34, "is_moe": true, "routed_experts": { "median_var64": 0.0855, "q25_var64": 0.0797, "q75_var64": 0.0937, "mean_s0": 5.47, "std_s0": 1.27, "mean_s0_ratio": 1.16, "n_experts": 384 }, "shared_expert": { "var64": 0.1759, "s0": 7.64, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3174, "s0": 7.07, "s0_s1": 1.04, "shape": [ 384, 7168 ] } }, "35": { "layer": 35, "is_moe": true, "routed_experts": { "median_var64": 0.0835, "q25_var64": 0.0798, "q75_var64": 0.0937, "mean_s0": 5.36, "std_s0": 1.24, "mean_s0_ratio": 1.15, "n_experts": 384 }, "shared_expert": { "var64": 0.1869, "s0": 8.0, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3224, "s0": 7.25, "s0_s1": 1.08, "shape": [ 384, 7168 ] } }, "36": { "layer": 36, "is_moe": true, "routed_experts": { "median_var64": 0.0838, "q25_var64": 0.0797, "q75_var64": 0.0944, "mean_s0": 5.35, "std_s0": 1.29, "mean_s0_ratio": 1.15, "n_experts": 384 }, "shared_expert": { "var64": 0.2021, "s0": 8.27, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3192, "s0": 7.19, "s0_s1": 1.08, "shape": [ 384, 7168 ] } }, "37": { "layer": 37, "is_moe": true, "routed_experts": { "median_var64": 0.0834, "q25_var64": 0.0794, "q75_var64": 0.0939, "mean_s0": 5.24, "std_s0": 1.25, "mean_s0_ratio": 1.13, "n_experts": 384 }, "shared_expert": { "var64": 0.1936, "s0": 8.42, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3227, "s0": 6.63, "s0_s1": 1.02, "shape": [ 384, 7168 ] } }, "38": { "layer": 38, "is_moe": true, "routed_experts": { "median_var64": 0.083, "q25_var64": 0.0789, "q75_var64": 0.0904, "mean_s0": 5.13, "std_s0": 1.17, "mean_s0_ratio": 1.13, "n_experts": 384 }, "shared_expert": { "var64": 0.1899, "s0": 7.06, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3145, "s0": 6.21, "s0_s1": 1.06, "shape": [ 384, 7168 ] } }, "39": { "layer": 39, "is_moe": true, "routed_experts": { "median_var64": 0.0823, "q25_var64": 0.079, "q75_var64": 0.0911, "mean_s0": 5.02, "std_s0": 1.16, "mean_s0_ratio": 1.11, "n_experts": 384 }, "shared_expert": { "var64": 0.1876, "s0": 6.88, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3198, "s0": 6.36, "s0_s1": 1.07, "shape": [ 384, 7168 ] } }, "40": { "layer": 40, "is_moe": true, "routed_experts": { "median_var64": 0.0823, "q25_var64": 0.0796, "q75_var64": 0.0878, "mean_s0": 4.89, "std_s0": 1.02, "mean_s0_ratio": 1.1, "n_experts": 384 }, "shared_expert": { "var64": 0.1709, "s0": 6.94, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3158, "s0": 6.25, "s0_s1": 1.07, "shape": [ 384, 7168 ] } }, "41": { "layer": 41, "is_moe": true, "routed_experts": { "median_var64": 0.0829, "q25_var64": 0.0795, "q75_var64": 0.0903, "mean_s0": 4.91, "std_s0": 1.04, "mean_s0_ratio": 1.1, "n_experts": 384 }, "shared_expert": { "var64": 0.1735, "s0": 7.57, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3159, "s0": 6.03, "s0_s1": 1.1, "shape": [ 384, 7168 ] } }, "42": { "layer": 42, "is_moe": true, "routed_experts": { "median_var64": 0.0835, "q25_var64": 0.0802, "q75_var64": 0.089, "mean_s0": 4.93, "std_s0": 1.05, "mean_s0_ratio": 1.1, "n_experts": 384 }, "shared_expert": { "var64": 0.1951, "s0": 7.85, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3156, "s0": 5.87, "s0_s1": 1.12, "shape": [ 384, 7168 ] } }, "43": { "layer": 43, "is_moe": true, "routed_experts": { "median_var64": 0.0824, "q25_var64": 0.0786, "q75_var64": 0.0907, "mean_s0": 4.92, "std_s0": 1.05, "mean_s0_ratio": 1.1, "n_experts": 384 }, "shared_expert": { "var64": 0.1947, "s0": 7.69, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3135, "s0": 5.64, "s0_s1": 1.09, "shape": [ 384, 7168 ] } }, "44": { "layer": 44, "is_moe": true, "routed_experts": { "median_var64": 0.0827, "q25_var64": 0.0791, "q75_var64": 0.0905, "mean_s0": 5.0, "std_s0": 1.14, "mean_s0_ratio": 1.11, "n_experts": 384 }, "shared_expert": { "var64": 0.2133, "s0": 8.59, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3076, "s0": 5.35, "s0_s1": 1.08, "shape": [ 384, 7168 ] } }, "45": { "layer": 45, "is_moe": true, "routed_experts": { "median_var64": 0.0826, "q25_var64": 0.0792, "q75_var64": 0.0883, "mean_s0": 4.85, "std_s0": 1.01, "mean_s0_ratio": 1.1, "n_experts": 384 }, "shared_expert": { "var64": 0.2006, "s0": 7.54, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.312, "s0": 5.27, "s0_s1": 1.09, "shape": [ 384, 7168 ] } }, "46": { "layer": 46, "is_moe": true, "routed_experts": { "median_var64": 0.0833, "q25_var64": 0.0795, "q75_var64": 0.0896, "mean_s0": 4.89, "std_s0": 1.05, "mean_s0_ratio": 1.09, "n_experts": 384 }, "shared_expert": { "var64": 0.1878, "s0": 7.28, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3125, "s0": 5.16, "s0_s1": 1.1, "shape": [ 384, 7168 ] } }, "47": { "layer": 47, "is_moe": true, "routed_experts": { "median_var64": 0.0833, "q25_var64": 0.0799, "q75_var64": 0.0893, "mean_s0": 4.79, "std_s0": 0.91, "mean_s0_ratio": 1.09, "n_experts": 384 }, "shared_expert": { "var64": 0.1801, "s0": 7.55, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3063, "s0": 4.89, "s0_s1": 1.07, "shape": [ 384, 7168 ] } }, "48": { "layer": 48, "is_moe": true, "routed_experts": { "median_var64": 0.0831, "q25_var64": 0.0806, "q75_var64": 0.0897, "mean_s0": 4.78, "std_s0": 0.94, "mean_s0_ratio": 1.08, "n_experts": 384 }, "shared_expert": { "var64": 0.181, "s0": 8.07, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3137, "s0": 5.08, "s0_s1": 1.08, "shape": [ 384, 7168 ] } }, "49": { "layer": 49, "is_moe": true, "routed_experts": { "median_var64": 0.0851, "q25_var64": 0.0812, "q75_var64": 0.0917, "mean_s0": 4.93, "std_s0": 1.03, "mean_s0_ratio": 1.09, "n_experts": 384 }, "shared_expert": { "var64": 0.1804, "s0": 7.38, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3127, "s0": 4.81, "s0_s1": 1.07, "shape": [ 384, 7168 ] } }, "50": { "layer": 50, "is_moe": true, "routed_experts": { "median_var64": 0.0848, "q25_var64": 0.0818, "q75_var64": 0.0917, "mean_s0": 5.01, "std_s0": 1.19, "mean_s0_ratio": 1.09, "n_experts": 384 }, "shared_expert": { "var64": 0.1817, "s0": 7.56, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3103, "s0": 4.57, "s0_s1": 1.07, "shape": [ 384, 7168 ] } }, "51": { "layer": 51, "is_moe": true, "routed_experts": { "median_var64": 0.085, "q25_var64": 0.0818, "q75_var64": 0.0929, "mean_s0": 5.01, "std_s0": 1.18, "mean_s0_ratio": 1.1, "n_experts": 384 }, "shared_expert": { "var64": 0.1766, "s0": 7.63, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.308, "s0": 4.49, "s0_s1": 1.12, "shape": [ 384, 7168 ] } }, "52": { "layer": 52, "is_moe": true, "routed_experts": { "median_var64": 0.0876, "q25_var64": 0.0833, "q75_var64": 0.0938, "mean_s0": 5.25, "std_s0": 1.65, "mean_s0_ratio": 1.12, "n_experts": 384 }, "shared_expert": { "var64": 0.1841, "s0": 7.68, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3099, "s0": 4.48, "s0_s1": 1.11, "shape": [ 384, 7168 ] } }, "53": { "layer": 53, "is_moe": true, "routed_experts": { "median_var64": 0.087, "q25_var64": 0.0833, "q75_var64": 0.0947, "mean_s0": 5.26, "std_s0": 1.46, "mean_s0_ratio": 1.11, "n_experts": 384 }, "shared_expert": { "var64": 0.1912, "s0": 8.26, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3049, "s0": 4.14, "s0_s1": 1.08, "shape": [ 384, 7168 ] } }, "54": { "layer": 54, "is_moe": true, "routed_experts": { "median_var64": 0.0887, "q25_var64": 0.0843, "q75_var64": 0.0965, "mean_s0": 5.51, "std_s0": 1.41, "mean_s0_ratio": 1.14, "n_experts": 384 }, "shared_expert": { "var64": 0.2034, "s0": 8.45, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3038, "s0": 4.05, "s0_s1": 1.05, "shape": [ 384, 7168 ] } }, "55": { "layer": 55, "is_moe": true, "routed_experts": { "median_var64": 0.086, "q25_var64": 0.0811, "q75_var64": 0.0957, "mean_s0": 5.6, "std_s0": 1.82, "mean_s0_ratio": 1.15, "n_experts": 384 }, "shared_expert": { "var64": 0.2073, "s0": 7.98, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3091, "s0": 4.33, "s0_s1": 1.15, "shape": [ 384, 7168 ] } }, "56": { "layer": 56, "is_moe": true, "routed_experts": { "median_var64": 0.0863, "q25_var64": 0.0815, "q75_var64": 0.0965, "mean_s0": 5.69, "std_s0": 1.93, "mean_s0_ratio": 1.16, "n_experts": 384 }, "shared_expert": { "var64": 0.2141, "s0": 8.61, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3167, "s0": 4.19, "s0_s1": 1.11, "shape": [ 384, 7168 ] } }, "57": { "layer": 57, "is_moe": true, "routed_experts": { "median_var64": 0.086, "q25_var64": 0.0797, "q75_var64": 0.0987, "mean_s0": 5.79, "std_s0": 2.19, "mean_s0_ratio": 1.16, "n_experts": 384 }, "shared_expert": { "var64": 0.2212, "s0": 10.11, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3266, "s0": 4.64, "s0_s1": 1.2, "shape": [ 384, 7168 ] } }, "58": { "layer": 58, "is_moe": true, "routed_experts": { "median_var64": 0.0853, "q25_var64": 0.0797, "q75_var64": 0.0955, "mean_s0": 5.75, "std_s0": 2.08, "mean_s0_ratio": 1.15, "n_experts": 384 }, "shared_expert": { "var64": 0.1874, "s0": 7.05, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3379, "s0": 4.87, "s0_s1": 1.13, "shape": [ 384, 7168 ] } }, "59": { "layer": 59, "is_moe": true, "routed_experts": { "median_var64": 0.0846, "q25_var64": 0.0797, "q75_var64": 0.0928, "mean_s0": 5.58, "std_s0": 1.81, "mean_s0_ratio": 1.13, "n_experts": 384 }, "shared_expert": { "var64": 0.1663, "s0": 5.87, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3446, "s0": 5.4, "s0_s1": 1.12, "shape": [ 384, 7168 ] } }, "60": { "layer": 60, "is_moe": true, "routed_experts": { "median_var64": 0.0863, "q25_var64": 0.0811, "q75_var64": 0.0944, "mean_s0": 5.52, "std_s0": 1.19, "mean_s0_ratio": 1.1, "n_experts": 384 }, "shared_expert": { "var64": 0.2663, "s0": 6.47, "shape": [ 7168, 2048 ] }, "router": { "var64": 0.3667, "s0": 5.54, "s0_s1": 1.06, "shape": [ 384, 7168 ] } } }