{ "version": 2, "model_id": "unsloth/DeepSeek-V4-Flash", "model_config": { "moe": { "model_type": "deepseek_v4", "num_layers": 43, "hidden_size": 4096, "intermediate_size": 18432, "moe_intermediate_size": 2048, "n_routed_experts": 256, "n_shared_experts": 1, "num_experts_per_tok": 6, "first_k_dense_replace": 0, "torch_dtype": "bfloat16", "quant_method": "fp8", "templates": { "expert_gate_proj": "layers.{layer}.ffn.experts.{e}.w1.weight", "expert_up_proj": "layers.{layer}.ffn.experts.{e}.w3.weight", "expert_down_proj": "layers.{layer}.ffn.experts.{e}.w2.weight", "fused_gate_proj": "layers.{layer}.ffn.experts.w1", "fused_down_proj": "layers.{layer}.ffn.experts.w2", "shared_down_proj": [ "layers.{layer}.ffn.shared_experts.w2.weight" ], "router": [ "layers.{layer}.ffn.gate.weight", "layers.{layer}.ffn.router.weight" ], "dense_down_proj": [ "layers.{layer}.ffn.w2.weight" ] } } }, "num_feats": 64, "dtype": "float32", "gate_vector_semantics": "right_singular_vectors_of_gate_proj", "layers": { "0": { "shape": [ 256, 64, 4096 ], "file_offset": 0 }, "1": { "shape": [ 256, 64, 4096 ], "file_offset": 268435456 }, "2": { "shape": [ 256, 64, 4096 ], "file_offset": 536870912 }, "3": { "shape": [ 256, 64, 4096 ], "file_offset": 805306368 }, "4": { "shape": [ 256, 64, 4096 ], "file_offset": 1073741824 }, "5": { "shape": [ 256, 64, 4096 ], "file_offset": 1342177280 }, "6": { "shape": [ 256, 64, 4096 ], "file_offset": 1610612736 }, "7": { "shape": [ 256, 64, 4096 ], "file_offset": 1879048192 }, "8": { "shape": [ 256, 64, 4096 ], "file_offset": 2147483648 }, "9": { "shape": [ 256, 64, 4096 ], "file_offset": 2415919104 }, "10": { "shape": [ 256, 64, 4096 ], "file_offset": 2684354560 }, "11": { "shape": [ 256, 64, 4096 ], "file_offset": 2952790016 }, "12": { "shape": [ 256, 64, 4096 ], "file_offset": 3221225472 }, "13": { "shape": [ 256, 64, 4096 ], "file_offset": 3489660928 }, "14": { "shape": [ 256, 64, 4096 ], "file_offset": 3758096384 }, "15": { "shape": [ 256, 64, 4096 ], "file_offset": 4026531840 }, "16": { "shape": [ 256, 64, 4096 ], "file_offset": 4294967296 }, "17": { "shape": [ 256, 64, 4096 ], "file_offset": 4563402752 }, "18": { "shape": [ 256, 64, 4096 ], "file_offset": 4831838208 }, "19": { "shape": [ 256, 64, 4096 ], "file_offset": 5100273664 }, "20": { "shape": [ 256, 64, 4096 ], "file_offset": 5368709120 }, "21": { "shape": [ 256, 64, 4096 ], "file_offset": 5637144576 }, "22": { "shape": [ 256, 64, 4096 ], "file_offset": 5905580032 }, "23": { "shape": [ 256, 64, 4096 ], "file_offset": 6174015488 }, "24": { "shape": [ 256, 64, 4096 ], "file_offset": 6442450944 }, "25": { "shape": [ 256, 64, 4096 ], "file_offset": 6710886400 }, "26": { "shape": [ 256, 64, 4096 ], "file_offset": 6979321856 }, "27": { "shape": [ 256, 64, 4096 ], "file_offset": 7247757312 }, "28": { "shape": [ 256, 64, 4096 ], "file_offset": 7516192768 }, "29": { "shape": [ 256, 64, 4096 ], "file_offset": 7784628224 }, "30": { "shape": [ 256, 64, 4096 ], "file_offset": 8053063680 }, "31": { "shape": [ 256, 64, 4096 ], "file_offset": 8321499136 }, "32": { "shape": [ 256, 64, 4096 ], "file_offset": 8589934592 }, "33": { "shape": [ 256, 64, 4096 ], "file_offset": 8858370048 }, "34": { "shape": [ 256, 64, 4096 ], "file_offset": 9126805504 }, "35": { "shape": [ 256, 64, 4096 ], "file_offset": 9395240960 }, "36": { "shape": [ 256, 64, 4096 ], "file_offset": 9663676416 }, "37": { "shape": [ 256, 64, 4096 ], "file_offset": 9932111872 }, "38": { "shape": [ 256, 64, 4096 ], "file_offset": 10200547328 }, "39": { "shape": [ 256, 64, 4096 ], "file_offset": 10468982784 }, "40": { "shape": [ 256, 64, 4096 ], "file_offset": 10737418240 }, "41": { "shape": [ 256, 64, 4096 ], "file_offset": 11005853696 }, "42": { "shape": [ 256, 64, 4096 ], "file_offset": 11274289152 } }, "layer_stats": { "0": { "median_var64": 0.1002, "q25_var64": 0.0998, "q75_var64": 0.1007, "n_experts": 256 }, "1": { "median_var64": 0.0916, "q25_var64": 0.0914, "q75_var64": 0.0917, "n_experts": 256 }, "2": { "median_var64": 0.0868, "q25_var64": 0.0868, "q75_var64": 0.0869, "n_experts": 256 }, "3": { "median_var64": 0.104, "q25_var64": 0.0981, "q75_var64": 0.1091, "n_experts": 256 }, "4": { "median_var64": 0.108, "q25_var64": 0.1023, "q75_var64": 0.1174, "n_experts": 256 }, "5": { "median_var64": 0.1097, "q25_var64": 0.1016, "q75_var64": 0.1185, "n_experts": 256 }, "6": { "median_var64": 0.1092, "q25_var64": 0.1021, "q75_var64": 0.1159, "n_experts": 256 }, "7": { "median_var64": 0.1071, "q25_var64": 0.0997, "q75_var64": 0.1169, "n_experts": 256 }, "8": { "median_var64": 0.1092, "q25_var64": 0.1028, "q75_var64": 0.1163, "n_experts": 256 }, "9": { "median_var64": 0.1081, "q25_var64": 0.1013, "q75_var64": 0.1201, "n_experts": 256 }, "10": { "median_var64": 0.116, "q25_var64": 0.1083, "q75_var64": 0.1319, "n_experts": 256 }, "11": { "median_var64": 0.1182, "q25_var64": 0.1054, "q75_var64": 0.1328, "n_experts": 256 }, "12": { "median_var64": 0.1284, "q25_var64": 0.1123, "q75_var64": 0.1444, "n_experts": 256 }, "13": { "median_var64": 0.1291, "q25_var64": 0.115, "q75_var64": 0.1447, "n_experts": 256 }, "14": { "median_var64": 0.1378, "q25_var64": 0.1191, "q75_var64": 0.154, "n_experts": 256 }, "15": { "median_var64": 0.1421, "q25_var64": 0.1152, "q75_var64": 0.1615, "n_experts": 256 }, "16": { "median_var64": 0.1533, "q25_var64": 0.134, "q75_var64": 0.1781, "n_experts": 256 }, "17": { "median_var64": 0.1587, "q25_var64": 0.1385, "q75_var64": 0.1802, "n_experts": 256 }, "18": { "median_var64": 0.1649, "q25_var64": 0.1433, "q75_var64": 0.1846, "n_experts": 256 }, "19": { "median_var64": 0.1268, "q25_var64": 0.1112, "q75_var64": 0.1432, "n_experts": 256 }, "20": { "median_var64": 0.1575, "q25_var64": 0.1407, "q75_var64": 0.1816, "n_experts": 256 }, "21": { "median_var64": 0.1449, "q25_var64": 0.1164, "q75_var64": 0.1679, "n_experts": 256 }, "22": { "median_var64": 0.1376, "q25_var64": 0.1083, "q75_var64": 0.1613, "n_experts": 256 }, "23": { "median_var64": 0.0919, "q25_var64": 0.0881, "q75_var64": 0.1025, "n_experts": 256 }, "24": { "median_var64": 0.1051, "q25_var64": 0.0942, "q75_var64": 0.1209, "n_experts": 256 }, "25": { "median_var64": 0.0918, "q25_var64": 0.0877, "q75_var64": 0.105, "n_experts": 256 }, "26": { "median_var64": 0.0965, "q25_var64": 0.0908, "q75_var64": 0.1096, "n_experts": 256 }, "27": { "median_var64": 0.0869, "q25_var64": 0.0852, "q75_var64": 0.0934, "n_experts": 256 }, "28": { "median_var64": 0.0939, "q25_var64": 0.0892, "q75_var64": 0.1041, "n_experts": 256 }, "29": { "median_var64": 0.0931, "q25_var64": 0.0877, "q75_var64": 0.109, "n_experts": 256 }, "30": { "median_var64": 0.0944, "q25_var64": 0.0886, "q75_var64": 0.1132, "n_experts": 256 }, "31": { "median_var64": 0.0917, "q25_var64": 0.0875, "q75_var64": 0.1096, "n_experts": 256 }, "32": { "median_var64": 0.0953, "q25_var64": 0.0901, "q75_var64": 0.1042, "n_experts": 256 }, "33": { "median_var64": 0.0947, "q25_var64": 0.0892, "q75_var64": 0.1062, "n_experts": 256 }, "34": { "median_var64": 0.0925, "q25_var64": 0.0893, "q75_var64": 0.103, "n_experts": 256 }, "35": { "median_var64": 0.0989, "q25_var64": 0.0919, "q75_var64": 0.1154, "n_experts": 256 }, "36": { "median_var64": 0.0964, "q25_var64": 0.0902, "q75_var64": 0.1098, "n_experts": 256 }, "37": { "median_var64": 0.0974, "q25_var64": 0.0916, "q75_var64": 0.1123, "n_experts": 256 }, "38": { "median_var64": 0.1017, "q25_var64": 0.0939, "q75_var64": 0.1144, "n_experts": 256 }, "39": { "median_var64": 0.1248, "q25_var64": 0.112, "q75_var64": 0.1414, "n_experts": 256 }, "40": { "median_var64": 0.1186, "q25_var64": 0.1047, "q75_var64": 0.1371, "n_experts": 256 }, "41": { "median_var64": 0.1214, "q25_var64": 0.1065, "q75_var64": 0.1415, "n_experts": 256 }, "42": { "median_var64": 0.1191, "q25_var64": 0.1082, "q75_var64": 0.1362, "n_experts": 256 } } }