{ "model_id": "deepseek-ai/DeepSeek-V4-Pro", "moe_config": { "model_type": "deepseek_v4", "num_layers": 61, "hidden_size": 7168, "intermediate_size": 18432, "moe_intermediate_size": 3072, "n_routed_experts": 384, "n_shared_experts": 1, "num_experts_per_tok": 6, "first_k_dense_replace": 0, "torch_dtype": "bfloat16", "quant_method": "fp8", "templates": { "expert_gate_proj": "layers.{layer}.ffn.experts.{e}.w1.weight", "expert_up_proj": "layers.{layer}.ffn.experts.{e}.w3.weight", "expert_down_proj": "layers.{layer}.ffn.experts.{e}.w2.weight", "fused_gate_proj": "layers.{layer}.ffn.experts.w1", "fused_down_proj": "layers.{layer}.ffn.experts.w2", "shared_down_proj": [ "layers.{layer}.ffn.shared_experts.w2.weight" ], "router": [ "layers.{layer}.ffn.gate.weight", "layers.{layer}.ffn.router.weight" ], "dense_down_proj": [ "layers.{layer}.ffn.w2.weight" ] } }, "num_feats": 64, "provenance": { "aggregator_type": "moe", "quant_format": "fp8", "probe_mode": "weight_svd_per_expert", "fp8_handling": "cast_to_bfloat16_for_svd" } }