| { |
| "version": 2, |
| "model_id": "unsloth/DeepSeek-V4-Flash", |
| "model_config": { |
| "moe": { |
| "model_type": "deepseek_v4", |
| "num_layers": 43, |
| "hidden_size": 4096, |
| "intermediate_size": 18432, |
| "moe_intermediate_size": 2048, |
| "n_routed_experts": 256, |
| "n_shared_experts": 1, |
| "num_experts_per_tok": 6, |
| "first_k_dense_replace": 0, |
| "torch_dtype": "bfloat16", |
| "quant_method": "fp8", |
| "templates": { |
| "expert_gate_proj": "layers.{layer}.ffn.experts.{e}.w1.weight", |
| "expert_up_proj": "layers.{layer}.ffn.experts.{e}.w3.weight", |
| "expert_down_proj": "layers.{layer}.ffn.experts.{e}.w2.weight", |
| "fused_gate_proj": "layers.{layer}.ffn.experts.w1", |
| "fused_down_proj": "layers.{layer}.ffn.experts.w2", |
| "shared_down_proj": [ |
| "layers.{layer}.ffn.shared_experts.w2.weight" |
| ], |
| "router": [ |
| "layers.{layer}.ffn.gate.weight", |
| "layers.{layer}.ffn.router.weight" |
| ], |
| "dense_down_proj": [ |
| "layers.{layer}.ffn.w2.weight" |
| ] |
| } |
| } |
| }, |
| "num_feats": 64, |
| "dtype": "float32", |
| "gate_vector_semantics": "right_singular_vectors_of_gate_proj", |
| "layers": { |
| "0": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 0 |
| }, |
| "1": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 268435456 |
| }, |
| "2": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 536870912 |
| }, |
| "3": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 805306368 |
| }, |
| "4": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 1073741824 |
| }, |
| "5": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 1342177280 |
| }, |
| "6": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 1610612736 |
| }, |
| "7": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 1879048192 |
| }, |
| "8": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 2147483648 |
| }, |
| "9": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 2415919104 |
| }, |
| "10": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 2684354560 |
| }, |
| "11": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 2952790016 |
| }, |
| "12": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 3221225472 |
| }, |
| "13": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 3489660928 |
| }, |
| "14": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 3758096384 |
| }, |
| "15": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 4026531840 |
| }, |
| "16": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 4294967296 |
| }, |
| "17": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 4563402752 |
| }, |
| "18": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 4831838208 |
| }, |
| "19": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 5100273664 |
| }, |
| "20": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 5368709120 |
| }, |
| "21": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 5637144576 |
| }, |
| "22": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 5905580032 |
| }, |
| "23": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 6174015488 |
| }, |
| "24": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 6442450944 |
| }, |
| "25": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 6710886400 |
| }, |
| "26": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 6979321856 |
| }, |
| "27": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 7247757312 |
| }, |
| "28": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 7516192768 |
| }, |
| "29": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 7784628224 |
| }, |
| "30": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 8053063680 |
| }, |
| "31": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 8321499136 |
| }, |
| "32": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 8589934592 |
| }, |
| "33": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 8858370048 |
| }, |
| "34": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 9126805504 |
| }, |
| "35": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 9395240960 |
| }, |
| "36": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 9663676416 |
| }, |
| "37": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 9932111872 |
| }, |
| "38": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 10200547328 |
| }, |
| "39": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 10468982784 |
| }, |
| "40": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 10737418240 |
| }, |
| "41": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 11005853696 |
| }, |
| "42": { |
| "shape": [ |
| 256, |
| 64, |
| 4096 |
| ], |
| "file_offset": 11274289152 |
| } |
| }, |
| "layer_stats": { |
| "0": { |
| "median_var64": 0.1002, |
| "q25_var64": 0.0998, |
| "q75_var64": 0.1007, |
| "n_experts": 256 |
| }, |
| "1": { |
| "median_var64": 0.0916, |
| "q25_var64": 0.0914, |
| "q75_var64": 0.0917, |
| "n_experts": 256 |
| }, |
| "2": { |
| "median_var64": 0.0868, |
| "q25_var64": 0.0868, |
| "q75_var64": 0.0869, |
| "n_experts": 256 |
| }, |
| "3": { |
| "median_var64": 0.104, |
| "q25_var64": 0.0981, |
| "q75_var64": 0.1091, |
| "n_experts": 256 |
| }, |
| "4": { |
| "median_var64": 0.108, |
| "q25_var64": 0.1023, |
| "q75_var64": 0.1174, |
| "n_experts": 256 |
| }, |
| "5": { |
| "median_var64": 0.1097, |
| "q25_var64": 0.1016, |
| "q75_var64": 0.1185, |
| "n_experts": 256 |
| }, |
| "6": { |
| "median_var64": 0.1092, |
| "q25_var64": 0.1021, |
| "q75_var64": 0.1159, |
| "n_experts": 256 |
| }, |
| "7": { |
| "median_var64": 0.1071, |
| "q25_var64": 0.0997, |
| "q75_var64": 0.1169, |
| "n_experts": 256 |
| }, |
| "8": { |
| "median_var64": 0.1092, |
| "q25_var64": 0.1028, |
| "q75_var64": 0.1163, |
| "n_experts": 256 |
| }, |
| "9": { |
| "median_var64": 0.1081, |
| "q25_var64": 0.1013, |
| "q75_var64": 0.1201, |
| "n_experts": 256 |
| }, |
| "10": { |
| "median_var64": 0.116, |
| "q25_var64": 0.1083, |
| "q75_var64": 0.1319, |
| "n_experts": 256 |
| }, |
| "11": { |
| "median_var64": 0.1182, |
| "q25_var64": 0.1054, |
| "q75_var64": 0.1328, |
| "n_experts": 256 |
| }, |
| "12": { |
| "median_var64": 0.1284, |
| "q25_var64": 0.1123, |
| "q75_var64": 0.1444, |
| "n_experts": 256 |
| }, |
| "13": { |
| "median_var64": 0.1291, |
| "q25_var64": 0.115, |
| "q75_var64": 0.1447, |
| "n_experts": 256 |
| }, |
| "14": { |
| "median_var64": 0.1378, |
| "q25_var64": 0.1191, |
| "q75_var64": 0.154, |
| "n_experts": 256 |
| }, |
| "15": { |
| "median_var64": 0.1421, |
| "q25_var64": 0.1152, |
| "q75_var64": 0.1615, |
| "n_experts": 256 |
| }, |
| "16": { |
| "median_var64": 0.1533, |
| "q25_var64": 0.134, |
| "q75_var64": 0.1781, |
| "n_experts": 256 |
| }, |
| "17": { |
| "median_var64": 0.1587, |
| "q25_var64": 0.1385, |
| "q75_var64": 0.1802, |
| "n_experts": 256 |
| }, |
| "18": { |
| "median_var64": 0.1649, |
| "q25_var64": 0.1433, |
| "q75_var64": 0.1846, |
| "n_experts": 256 |
| }, |
| "19": { |
| "median_var64": 0.1268, |
| "q25_var64": 0.1112, |
| "q75_var64": 0.1432, |
| "n_experts": 256 |
| }, |
| "20": { |
| "median_var64": 0.1575, |
| "q25_var64": 0.1407, |
| "q75_var64": 0.1816, |
| "n_experts": 256 |
| }, |
| "21": { |
| "median_var64": 0.1449, |
| "q25_var64": 0.1164, |
| "q75_var64": 0.1679, |
| "n_experts": 256 |
| }, |
| "22": { |
| "median_var64": 0.1376, |
| "q25_var64": 0.1083, |
| "q75_var64": 0.1613, |
| "n_experts": 256 |
| }, |
| "23": { |
| "median_var64": 0.0919, |
| "q25_var64": 0.0881, |
| "q75_var64": 0.1025, |
| "n_experts": 256 |
| }, |
| "24": { |
| "median_var64": 0.1051, |
| "q25_var64": 0.0942, |
| "q75_var64": 0.1209, |
| "n_experts": 256 |
| }, |
| "25": { |
| "median_var64": 0.0918, |
| "q25_var64": 0.0877, |
| "q75_var64": 0.105, |
| "n_experts": 256 |
| }, |
| "26": { |
| "median_var64": 0.0965, |
| "q25_var64": 0.0908, |
| "q75_var64": 0.1096, |
| "n_experts": 256 |
| }, |
| "27": { |
| "median_var64": 0.0869, |
| "q25_var64": 0.0852, |
| "q75_var64": 0.0934, |
| "n_experts": 256 |
| }, |
| "28": { |
| "median_var64": 0.0939, |
| "q25_var64": 0.0892, |
| "q75_var64": 0.1041, |
| "n_experts": 256 |
| }, |
| "29": { |
| "median_var64": 0.0931, |
| "q25_var64": 0.0877, |
| "q75_var64": 0.109, |
| "n_experts": 256 |
| }, |
| "30": { |
| "median_var64": 0.0944, |
| "q25_var64": 0.0886, |
| "q75_var64": 0.1132, |
| "n_experts": 256 |
| }, |
| "31": { |
| "median_var64": 0.0917, |
| "q25_var64": 0.0875, |
| "q75_var64": 0.1096, |
| "n_experts": 256 |
| }, |
| "32": { |
| "median_var64": 0.0953, |
| "q25_var64": 0.0901, |
| "q75_var64": 0.1042, |
| "n_experts": 256 |
| }, |
| "33": { |
| "median_var64": 0.0947, |
| "q25_var64": 0.0892, |
| "q75_var64": 0.1062, |
| "n_experts": 256 |
| }, |
| "34": { |
| "median_var64": 0.0925, |
| "q25_var64": 0.0893, |
| "q75_var64": 0.103, |
| "n_experts": 256 |
| }, |
| "35": { |
| "median_var64": 0.0989, |
| "q25_var64": 0.0919, |
| "q75_var64": 0.1154, |
| "n_experts": 256 |
| }, |
| "36": { |
| "median_var64": 0.0964, |
| "q25_var64": 0.0902, |
| "q75_var64": 0.1098, |
| "n_experts": 256 |
| }, |
| "37": { |
| "median_var64": 0.0974, |
| "q25_var64": 0.0916, |
| "q75_var64": 0.1123, |
| "n_experts": 256 |
| }, |
| "38": { |
| "median_var64": 0.1017, |
| "q25_var64": 0.0939, |
| "q75_var64": 0.1144, |
| "n_experts": 256 |
| }, |
| "39": { |
| "median_var64": 0.1248, |
| "q25_var64": 0.112, |
| "q75_var64": 0.1414, |
| "n_experts": 256 |
| }, |
| "40": { |
| "median_var64": 0.1186, |
| "q25_var64": 0.1047, |
| "q75_var64": 0.1371, |
| "n_experts": 256 |
| }, |
| "41": { |
| "median_var64": 0.1214, |
| "q25_var64": 0.1065, |
| "q75_var64": 0.1415, |
| "n_experts": 256 |
| }, |
| "42": { |
| "median_var64": 0.1191, |
| "q25_var64": 0.1082, |
| "q75_var64": 0.1362, |
| "n_experts": 256 |
| } |
| } |
| } |