mikeumus-divincian commited on
Commit
af4e1c7
·
verified ·
1 Parent(s): ca47267

Add meta.json

Browse files
Files changed (1) hide show
  1. meta.json +40 -0
meta.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_id": "deepseek-ai/DeepSeek-V4-Pro",
3
+ "moe_config": {
4
+ "model_type": "deepseek_v4",
5
+ "num_layers": 61,
6
+ "hidden_size": 7168,
7
+ "intermediate_size": 18432,
8
+ "moe_intermediate_size": 3072,
9
+ "n_routed_experts": 384,
10
+ "n_shared_experts": 1,
11
+ "num_experts_per_tok": 6,
12
+ "first_k_dense_replace": 0,
13
+ "torch_dtype": "bfloat16",
14
+ "quant_method": "fp8",
15
+ "templates": {
16
+ "expert_gate_proj": "layers.{layer}.ffn.experts.{e}.w1.weight",
17
+ "expert_up_proj": "layers.{layer}.ffn.experts.{e}.w3.weight",
18
+ "expert_down_proj": "layers.{layer}.ffn.experts.{e}.w2.weight",
19
+ "fused_gate_proj": "layers.{layer}.ffn.experts.w1",
20
+ "fused_down_proj": "layers.{layer}.ffn.experts.w2",
21
+ "shared_down_proj": [
22
+ "layers.{layer}.ffn.shared_experts.w2.weight"
23
+ ],
24
+ "router": [
25
+ "layers.{layer}.ffn.gate.weight",
26
+ "layers.{layer}.ffn.router.weight"
27
+ ],
28
+ "dense_down_proj": [
29
+ "layers.{layer}.ffn.w2.weight"
30
+ ]
31
+ }
32
+ },
33
+ "num_feats": 64,
34
+ "provenance": {
35
+ "aggregator_type": "moe",
36
+ "quant_format": "fp8",
37
+ "probe_mode": "weight_svd_per_expert",
38
+ "fp8_handling": "cast_to_bfloat16_for_svd"
39
+ }
40
+ }