mikeumus-divincian's picture
Add meta.json
b5d3986 verified
{
"model_id": "unsloth/DeepSeek-V4-Flash",
"moe_config": {
"model_type": "deepseek_v4",
"num_layers": 43,
"hidden_size": 4096,
"intermediate_size": 18432,
"moe_intermediate_size": 2048,
"n_routed_experts": 256,
"n_shared_experts": 1,
"num_experts_per_tok": 6,
"first_k_dense_replace": 0,
"torch_dtype": "bfloat16",
"quant_method": "fp8",
"templates": {
"expert_gate_proj": "layers.{layer}.ffn.experts.{e}.w1.weight",
"expert_up_proj": "layers.{layer}.ffn.experts.{e}.w3.weight",
"expert_down_proj": "layers.{layer}.ffn.experts.{e}.w2.weight",
"fused_gate_proj": "layers.{layer}.ffn.experts.w1",
"fused_down_proj": "layers.{layer}.ffn.experts.w2",
"shared_down_proj": [
"layers.{layer}.ffn.shared_experts.w2.weight"
],
"router": [
"layers.{layer}.ffn.gate.weight",
"layers.{layer}.ffn.router.weight"
],
"dense_down_proj": [
"layers.{layer}.ffn.w2.weight"
]
}
},
"num_feats": 64,
"provenance": {
"aggregator_type": "moe",
"quant_format": "fp8",
"probe_mode": "weight_svd_per_expert",
"fp8_handling": "cast_to_bfloat16_for_svd"
}
}