pastapaul's picture
Add files using upload-large-folder tool
7b436d4 verified
{
"architectures": [
"DeepseekV4ForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 0,
"compress_ratios": [
0,
0,
4,
128,
4,
128,
4,
128,
4,
128,
4,
128,
4,
128,
4,
128,
4,
128,
4,
128,
4,
128,
4,
128,
4,
128,
4,
128,
4,
128,
4,
128,
4,
128,
4,
128,
4,
128,
4,
128,
4,
128,
4,
0
],
"compress_rope_theta": 160000,
"eos_token_id": 1,
"expert_dtype": "fp4",
"hc_eps": 1e-06,
"hc_mult": 4,
"hc_sinkhorn_iters": 20,
"head_dim": 512,
"hidden_act": "silu",
"hidden_size": 4096,
"index_head_dim": 128,
"index_n_heads": 64,
"index_topk": 512,
"initializer_range": 0.02,
"max_position_embeddings": 1048576,
"model_type": "deepseek_v4",
"moe_intermediate_size": 2048,
"n_routed_experts": 256,
"n_shared_experts": 1,
"norm_topk_prob": true,
"num_attention_heads": 64,
"num_experts_per_tok": 6,
"num_hash_layers": 3,
"num_hidden_layers": 43,
"num_key_value_heads": 1,
"num_nextn_predict_layers": 1,
"o_groups": 8,
"o_lora_rank": 1024,
"q_lora_rank": 1024,
"qk_rope_head_dim": 64,
"quantization_config": {
"config_groups": {
"group_0": {
"format": "float-quantized",
"input_activations": {
"actorder": null,
"block_structure": null,
"dynamic": true,
"group_size": 128,
"num_bits": 8,
"observer": null,
"observer_kwargs": {},
"scale_dtype": null,
"strategy": "group",
"symmetric": true,
"type": "float",
"zp_dtype": null
},
"output_activations": null,
"targets": [
"re:.*\\.attn\\.(fused_wqa_wkv|compressor\\.fused_wkv_wgate|wq_b|wo_a|wo_b)$"
],
"weights": {
"actorder": null,
"block_structure": [
128,
128
],
"dynamic": false,
"group_size": null,
"num_bits": 8,
"observer": "memoryless_minmax",
"observer_kwargs": {},
"scale_dtype": null,
"strategy": "block",
"symmetric": true,
"type": "float",
"zp_dtype": null
}
},
"group_1": {
"format": "nvfp4-pack-quantized",
"input_activations": {
"actorder": null,
"block_structure": null,
"dynamic": "local",
"group_size": 16,
"num_bits": 4,
"observer": "static_minmax",
"observer_kwargs": {},
"scale_dtype": "torch.float8_e4m3fn",
"strategy": "tensor_group",
"symmetric": true,
"type": "float",
"zp_dtype": null
},
"output_activations": null,
"targets": [
"re:.*\\.ffn\\.experts\\.\\d+\\.(gate_proj|up_proj|down_proj)$"
],
"weights": {
"actorder": null,
"block_structure": null,
"dynamic": false,
"group_size": 16,
"num_bits": 4,
"observer": "memoryless_minmax",
"observer_kwargs": {},
"scale_dtype": "torch.float8_e4m3fn",
"strategy": "tensor_group",
"symmetric": true,
"type": "float",
"zp_dtype": null
}
}
},
"format": "mixed-precision",
"global_compression_ratio": null,
"ignore": [
"model.transformer.layers.0.ffn.shared_experts.w1",
"model.transformer.layers.0.ffn.shared_experts.w2",
"model.transformer.layers.0.ffn.shared_experts.w3",
"model.transformer.layers.1.ffn.shared_experts.w1",
"model.transformer.layers.1.ffn.shared_experts.w2",
"model.transformer.layers.1.ffn.shared_experts.w3",
"model.transformer.layers.2.attn.compressor.wkv",
"model.transformer.layers.2.attn.compressor.wgate",
"model.transformer.layers.2.attn.indexer.wq_b",
"model.transformer.layers.2.attn.indexer.weights_proj",
"model.transformer.layers.2.attn.indexer.compressor.wkv",
"model.transformer.layers.2.attn.indexer.compressor.wgate",
"model.transformer.layers.2.ffn.shared_experts.w1",
"model.transformer.layers.2.ffn.shared_experts.w2",
"model.transformer.layers.2.ffn.shared_experts.w3",
"model.transformer.layers.3.attn.compressor.wkv",
"model.transformer.layers.3.attn.compressor.wgate",
"model.transformer.layers.3.ffn.shared_experts.w1",
"model.transformer.layers.3.ffn.shared_experts.w2",
"model.transformer.layers.3.ffn.shared_experts.w3",
"model.transformer.layers.4.attn.compressor.wkv",
"model.transformer.layers.4.attn.compressor.wgate",
"model.transformer.layers.4.attn.indexer.wq_b",
"model.transformer.layers.4.attn.indexer.weights_proj",
"model.transformer.layers.4.attn.indexer.compressor.wkv",
"model.transformer.layers.4.attn.indexer.compressor.wgate",
"model.transformer.layers.4.ffn.shared_experts.w1",
"model.transformer.layers.4.ffn.shared_experts.w2",
"model.transformer.layers.4.ffn.shared_experts.w3",
"model.transformer.layers.5.attn.compressor.wkv",
"model.transformer.layers.5.attn.compressor.wgate",
"model.transformer.layers.5.ffn.shared_experts.w1",
"model.transformer.layers.5.ffn.shared_experts.w2",
"model.transformer.layers.5.ffn.shared_experts.w3",
"model.transformer.layers.6.attn.compressor.wkv",
"model.transformer.layers.6.attn.compressor.wgate",
"model.transformer.layers.6.attn.indexer.wq_b",
"model.transformer.layers.6.attn.indexer.weights_proj",
"model.transformer.layers.6.attn.indexer.compressor.wkv",
"model.transformer.layers.6.attn.indexer.compressor.wgate",
"model.transformer.layers.6.ffn.shared_experts.w1",
"model.transformer.layers.6.ffn.shared_experts.w2",
"model.transformer.layers.6.ffn.shared_experts.w3",
"model.transformer.layers.7.attn.compressor.wkv",
"model.transformer.layers.7.attn.compressor.wgate",
"model.transformer.layers.7.ffn.shared_experts.w1",
"model.transformer.layers.7.ffn.shared_experts.w2",
"model.transformer.layers.7.ffn.shared_experts.w3",
"model.transformer.layers.8.attn.compressor.wkv",
"model.transformer.layers.8.attn.compressor.wgate",
"model.transformer.layers.8.attn.indexer.wq_b",
"model.transformer.layers.8.attn.indexer.weights_proj",
"model.transformer.layers.8.attn.indexer.compressor.wkv",
"model.transformer.layers.8.attn.indexer.compressor.wgate",
"model.transformer.layers.8.ffn.shared_experts.w1",
"model.transformer.layers.8.ffn.shared_experts.w2",
"model.transformer.layers.8.ffn.shared_experts.w3",
"model.transformer.layers.9.attn.compressor.wkv",
"model.transformer.layers.9.attn.compressor.wgate",
"model.transformer.layers.9.ffn.shared_experts.w1",
"model.transformer.layers.9.ffn.shared_experts.w2",
"model.transformer.layers.9.ffn.shared_experts.w3",
"model.transformer.layers.10.attn.compressor.wkv",
"model.transformer.layers.10.attn.compressor.wgate",
"model.transformer.layers.10.attn.indexer.wq_b",
"model.transformer.layers.10.attn.indexer.weights_proj",
"model.transformer.layers.10.attn.indexer.compressor.wkv",
"model.transformer.layers.10.attn.indexer.compressor.wgate",
"model.transformer.layers.10.ffn.shared_experts.w1",
"model.transformer.layers.10.ffn.shared_experts.w2",
"model.transformer.layers.10.ffn.shared_experts.w3",
"model.transformer.layers.11.attn.compressor.wkv",
"model.transformer.layers.11.attn.compressor.wgate",
"model.transformer.layers.11.ffn.shared_experts.w1",
"model.transformer.layers.11.ffn.shared_experts.w2",
"model.transformer.layers.11.ffn.shared_experts.w3",
"model.transformer.layers.12.attn.compressor.wkv",
"model.transformer.layers.12.attn.compressor.wgate",
"model.transformer.layers.12.attn.indexer.wq_b",
"model.transformer.layers.12.attn.indexer.weights_proj",
"model.transformer.layers.12.attn.indexer.compressor.wkv",
"model.transformer.layers.12.attn.indexer.compressor.wgate",
"model.transformer.layers.12.ffn.shared_experts.w1",
"model.transformer.layers.12.ffn.shared_experts.w2",
"model.transformer.layers.12.ffn.shared_experts.w3",
"model.transformer.layers.13.attn.compressor.wkv",
"model.transformer.layers.13.attn.compressor.wgate",
"model.transformer.layers.13.ffn.shared_experts.w1",
"model.transformer.layers.13.ffn.shared_experts.w2",
"model.transformer.layers.13.ffn.shared_experts.w3",
"model.transformer.layers.14.attn.compressor.wkv",
"model.transformer.layers.14.attn.compressor.wgate",
"model.transformer.layers.14.attn.indexer.wq_b",
"model.transformer.layers.14.attn.indexer.weights_proj",
"model.transformer.layers.14.attn.indexer.compressor.wkv",
"model.transformer.layers.14.attn.indexer.compressor.wgate",
"model.transformer.layers.14.ffn.shared_experts.w1",
"model.transformer.layers.14.ffn.shared_experts.w2",
"model.transformer.layers.14.ffn.shared_experts.w3",
"model.transformer.layers.15.attn.compressor.wkv",
"model.transformer.layers.15.attn.compressor.wgate",
"model.transformer.layers.15.ffn.shared_experts.w1",
"model.transformer.layers.15.ffn.shared_experts.w2",
"model.transformer.layers.15.ffn.shared_experts.w3",
"model.transformer.layers.16.attn.compressor.wkv",
"model.transformer.layers.16.attn.compressor.wgate",
"model.transformer.layers.16.attn.indexer.wq_b",
"model.transformer.layers.16.attn.indexer.weights_proj",
"model.transformer.layers.16.attn.indexer.compressor.wkv",
"model.transformer.layers.16.attn.indexer.compressor.wgate",
"model.transformer.layers.16.ffn.shared_experts.w1",
"model.transformer.layers.16.ffn.shared_experts.w2",
"model.transformer.layers.16.ffn.shared_experts.w3",
"model.transformer.layers.17.attn.compressor.wkv",
"model.transformer.layers.17.attn.compressor.wgate",
"model.transformer.layers.17.ffn.shared_experts.w1",
"model.transformer.layers.17.ffn.shared_experts.w2",
"model.transformer.layers.17.ffn.shared_experts.w3",
"model.transformer.layers.18.attn.compressor.wkv",
"model.transformer.layers.18.attn.compressor.wgate",
"model.transformer.layers.18.attn.indexer.wq_b",
"model.transformer.layers.18.attn.indexer.weights_proj",
"model.transformer.layers.18.attn.indexer.compressor.wkv",
"model.transformer.layers.18.attn.indexer.compressor.wgate",
"model.transformer.layers.18.ffn.shared_experts.w1",
"model.transformer.layers.18.ffn.shared_experts.w2",
"model.transformer.layers.18.ffn.shared_experts.w3",
"model.transformer.layers.19.attn.compressor.wkv",
"model.transformer.layers.19.attn.compressor.wgate",
"model.transformer.layers.19.ffn.shared_experts.w1",
"model.transformer.layers.19.ffn.shared_experts.w2",
"model.transformer.layers.19.ffn.shared_experts.w3",
"model.transformer.layers.20.attn.compressor.wkv",
"model.transformer.layers.20.attn.compressor.wgate",
"model.transformer.layers.20.attn.indexer.wq_b",
"model.transformer.layers.20.attn.indexer.weights_proj",
"model.transformer.layers.20.attn.indexer.compressor.wkv",
"model.transformer.layers.20.attn.indexer.compressor.wgate",
"model.transformer.layers.20.ffn.shared_experts.w1",
"model.transformer.layers.20.ffn.shared_experts.w2",
"model.transformer.layers.20.ffn.shared_experts.w3",
"model.transformer.layers.21.attn.compressor.wkv",
"model.transformer.layers.21.attn.compressor.wgate",
"model.transformer.layers.21.ffn.shared_experts.w1",
"model.transformer.layers.21.ffn.shared_experts.w2",
"model.transformer.layers.21.ffn.shared_experts.w3",
"model.transformer.layers.22.attn.compressor.wkv",
"model.transformer.layers.22.attn.compressor.wgate",
"model.transformer.layers.22.attn.indexer.wq_b",
"model.transformer.layers.22.attn.indexer.weights_proj",
"model.transformer.layers.22.attn.indexer.compressor.wkv",
"model.transformer.layers.22.attn.indexer.compressor.wgate",
"model.transformer.layers.22.ffn.shared_experts.w1",
"model.transformer.layers.22.ffn.shared_experts.w2",
"model.transformer.layers.22.ffn.shared_experts.w3",
"model.transformer.layers.23.attn.compressor.wkv",
"model.transformer.layers.23.attn.compressor.wgate",
"model.transformer.layers.23.ffn.shared_experts.w1",
"model.transformer.layers.23.ffn.shared_experts.w2",
"model.transformer.layers.23.ffn.shared_experts.w3",
"model.transformer.layers.24.attn.compressor.wkv",
"model.transformer.layers.24.attn.compressor.wgate",
"model.transformer.layers.24.attn.indexer.wq_b",
"model.transformer.layers.24.attn.indexer.weights_proj",
"model.transformer.layers.24.attn.indexer.compressor.wkv",
"model.transformer.layers.24.attn.indexer.compressor.wgate",
"model.transformer.layers.24.ffn.shared_experts.w1",
"model.transformer.layers.24.ffn.shared_experts.w2",
"model.transformer.layers.24.ffn.shared_experts.w3",
"model.transformer.layers.25.attn.compressor.wkv",
"model.transformer.layers.25.attn.compressor.wgate",
"model.transformer.layers.25.ffn.shared_experts.w1",
"model.transformer.layers.25.ffn.shared_experts.w2",
"model.transformer.layers.25.ffn.shared_experts.w3",
"model.transformer.layers.26.attn.compressor.wkv",
"model.transformer.layers.26.attn.compressor.wgate",
"model.transformer.layers.26.attn.indexer.wq_b",
"model.transformer.layers.26.attn.indexer.weights_proj",
"model.transformer.layers.26.attn.indexer.compressor.wkv",
"model.transformer.layers.26.attn.indexer.compressor.wgate",
"model.transformer.layers.26.ffn.shared_experts.w1",
"model.transformer.layers.26.ffn.shared_experts.w2",
"model.transformer.layers.26.ffn.shared_experts.w3",
"model.transformer.layers.27.attn.compressor.wkv",
"model.transformer.layers.27.attn.compressor.wgate",
"model.transformer.layers.27.ffn.shared_experts.w1",
"model.transformer.layers.27.ffn.shared_experts.w2",
"model.transformer.layers.27.ffn.shared_experts.w3",
"model.transformer.layers.28.attn.compressor.wkv",
"model.transformer.layers.28.attn.compressor.wgate",
"model.transformer.layers.28.attn.indexer.wq_b",
"model.transformer.layers.28.attn.indexer.weights_proj",
"model.transformer.layers.28.attn.indexer.compressor.wkv",
"model.transformer.layers.28.attn.indexer.compressor.wgate",
"model.transformer.layers.28.ffn.shared_experts.w1",
"model.transformer.layers.28.ffn.shared_experts.w2",
"model.transformer.layers.28.ffn.shared_experts.w3",
"model.transformer.layers.29.attn.compressor.wkv",
"model.transformer.layers.29.attn.compressor.wgate",
"model.transformer.layers.29.ffn.shared_experts.w1",
"model.transformer.layers.29.ffn.shared_experts.w2",
"model.transformer.layers.29.ffn.shared_experts.w3",
"model.transformer.layers.30.attn.compressor.wkv",
"model.transformer.layers.30.attn.compressor.wgate",
"model.transformer.layers.30.attn.indexer.wq_b",
"model.transformer.layers.30.attn.indexer.weights_proj",
"model.transformer.layers.30.attn.indexer.compressor.wkv",
"model.transformer.layers.30.attn.indexer.compressor.wgate",
"model.transformer.layers.30.ffn.shared_experts.w1",
"model.transformer.layers.30.ffn.shared_experts.w2",
"model.transformer.layers.30.ffn.shared_experts.w3",
"model.transformer.layers.31.attn.compressor.wkv",
"model.transformer.layers.31.attn.compressor.wgate",
"model.transformer.layers.31.ffn.shared_experts.w1",
"model.transformer.layers.31.ffn.shared_experts.w2",
"model.transformer.layers.31.ffn.shared_experts.w3",
"model.transformer.layers.32.attn.compressor.wkv",
"model.transformer.layers.32.attn.compressor.wgate",
"model.transformer.layers.32.attn.indexer.wq_b",
"model.transformer.layers.32.attn.indexer.weights_proj",
"model.transformer.layers.32.attn.indexer.compressor.wkv",
"model.transformer.layers.32.attn.indexer.compressor.wgate",
"model.transformer.layers.32.ffn.shared_experts.w1",
"model.transformer.layers.32.ffn.shared_experts.w2",
"model.transformer.layers.32.ffn.shared_experts.w3",
"model.transformer.layers.33.attn.compressor.wkv",
"model.transformer.layers.33.attn.compressor.wgate",
"model.transformer.layers.33.ffn.shared_experts.w1",
"model.transformer.layers.33.ffn.shared_experts.w2",
"model.transformer.layers.33.ffn.shared_experts.w3",
"model.transformer.layers.34.attn.compressor.wkv",
"model.transformer.layers.34.attn.compressor.wgate",
"model.transformer.layers.34.attn.indexer.wq_b",
"model.transformer.layers.34.attn.indexer.weights_proj",
"model.transformer.layers.34.attn.indexer.compressor.wkv",
"model.transformer.layers.34.attn.indexer.compressor.wgate",
"model.transformer.layers.34.ffn.shared_experts.w1",
"model.transformer.layers.34.ffn.shared_experts.w2",
"model.transformer.layers.34.ffn.shared_experts.w3",
"model.transformer.layers.35.attn.compressor.wkv",
"model.transformer.layers.35.attn.compressor.wgate",
"model.transformer.layers.35.ffn.shared_experts.w1",
"model.transformer.layers.35.ffn.shared_experts.w2",
"model.transformer.layers.35.ffn.shared_experts.w3",
"model.transformer.layers.36.attn.compressor.wkv",
"model.transformer.layers.36.attn.compressor.wgate",
"model.transformer.layers.36.attn.indexer.wq_b",
"model.transformer.layers.36.attn.indexer.weights_proj",
"model.transformer.layers.36.attn.indexer.compressor.wkv",
"model.transformer.layers.36.attn.indexer.compressor.wgate",
"model.transformer.layers.36.ffn.shared_experts.w1",
"model.transformer.layers.36.ffn.shared_experts.w2",
"model.transformer.layers.36.ffn.shared_experts.w3",
"model.transformer.layers.37.attn.compressor.wkv",
"model.transformer.layers.37.attn.compressor.wgate",
"model.transformer.layers.37.ffn.shared_experts.w1",
"model.transformer.layers.37.ffn.shared_experts.w2",
"model.transformer.layers.37.ffn.shared_experts.w3",
"model.transformer.layers.38.attn.compressor.wkv",
"model.transformer.layers.38.attn.compressor.wgate",
"model.transformer.layers.38.attn.indexer.wq_b",
"model.transformer.layers.38.attn.indexer.weights_proj",
"model.transformer.layers.38.attn.indexer.compressor.wkv",
"model.transformer.layers.38.attn.indexer.compressor.wgate",
"model.transformer.layers.38.ffn.shared_experts.w1",
"model.transformer.layers.38.ffn.shared_experts.w2",
"model.transformer.layers.38.ffn.shared_experts.w3",
"model.transformer.layers.39.attn.compressor.wkv",
"model.transformer.layers.39.attn.compressor.wgate",
"model.transformer.layers.39.ffn.shared_experts.w1",
"model.transformer.layers.39.ffn.shared_experts.w2",
"model.transformer.layers.39.ffn.shared_experts.w3",
"model.transformer.layers.40.attn.compressor.wkv",
"model.transformer.layers.40.attn.compressor.wgate",
"model.transformer.layers.40.attn.indexer.wq_b",
"model.transformer.layers.40.attn.indexer.weights_proj",
"model.transformer.layers.40.attn.indexer.compressor.wkv",
"model.transformer.layers.40.attn.indexer.compressor.wgate",
"model.transformer.layers.40.ffn.shared_experts.w1",
"model.transformer.layers.40.ffn.shared_experts.w2",
"model.transformer.layers.40.ffn.shared_experts.w3",
"model.transformer.layers.41.attn.compressor.wkv",
"model.transformer.layers.41.attn.compressor.wgate",
"model.transformer.layers.41.ffn.shared_experts.w1",
"model.transformer.layers.41.ffn.shared_experts.w2",
"model.transformer.layers.41.ffn.shared_experts.w3",
"model.transformer.layers.42.attn.compressor.wkv",
"model.transformer.layers.42.attn.compressor.wgate",
"model.transformer.layers.42.attn.indexer.wq_b",
"model.transformer.layers.42.attn.indexer.weights_proj",
"model.transformer.layers.42.attn.indexer.compressor.wkv",
"model.transformer.layers.42.attn.indexer.compressor.wgate",
"model.transformer.layers.42.ffn.shared_experts.w1",
"model.transformer.layers.42.ffn.shared_experts.w2",
"model.transformer.layers.42.ffn.shared_experts.w3",
"model.transformer.mtp.0.attn.wq_a",
"model.transformer.mtp.0.attn.wq_b",
"model.transformer.mtp.0.attn.wkv",
"model.transformer.mtp.0.attn.wo_a",
"model.transformer.mtp.0.attn.wo_b",
"model.transformer.mtp.0.ffn.experts.0.w1",
"model.transformer.mtp.0.ffn.experts.0.w2",
"model.transformer.mtp.0.ffn.experts.0.w3",
"model.transformer.mtp.0.ffn.experts.1.w1",
"model.transformer.mtp.0.ffn.experts.1.w2",
"model.transformer.mtp.0.ffn.experts.1.w3",
"model.transformer.mtp.0.ffn.experts.2.w1",
"model.transformer.mtp.0.ffn.experts.2.w2",
"model.transformer.mtp.0.ffn.experts.2.w3",
"model.transformer.mtp.0.ffn.experts.3.w1",
"model.transformer.mtp.0.ffn.experts.3.w2",
"model.transformer.mtp.0.ffn.experts.3.w3",
"model.transformer.mtp.0.ffn.experts.4.w1",
"model.transformer.mtp.0.ffn.experts.4.w2",
"model.transformer.mtp.0.ffn.experts.4.w3",
"model.transformer.mtp.0.ffn.experts.5.w1",
"model.transformer.mtp.0.ffn.experts.5.w2",
"model.transformer.mtp.0.ffn.experts.5.w3",
"model.transformer.mtp.0.ffn.experts.6.w1",
"model.transformer.mtp.0.ffn.experts.6.w2",
"model.transformer.mtp.0.ffn.experts.6.w3",
"model.transformer.mtp.0.ffn.experts.7.w1",
"model.transformer.mtp.0.ffn.experts.7.w2",
"model.transformer.mtp.0.ffn.experts.7.w3",
"model.transformer.mtp.0.ffn.experts.8.w1",
"model.transformer.mtp.0.ffn.experts.8.w2",
"model.transformer.mtp.0.ffn.experts.8.w3",
"model.transformer.mtp.0.ffn.experts.9.w1",
"model.transformer.mtp.0.ffn.experts.9.w2",
"model.transformer.mtp.0.ffn.experts.9.w3",
"model.transformer.mtp.0.ffn.experts.10.w1",
"model.transformer.mtp.0.ffn.experts.10.w2",
"model.transformer.mtp.0.ffn.experts.10.w3",
"model.transformer.mtp.0.ffn.experts.11.w1",
"model.transformer.mtp.0.ffn.experts.11.w2",
"model.transformer.mtp.0.ffn.experts.11.w3",
"model.transformer.mtp.0.ffn.experts.12.w1",
"model.transformer.mtp.0.ffn.experts.12.w2",
"model.transformer.mtp.0.ffn.experts.12.w3",
"model.transformer.mtp.0.ffn.experts.13.w1",
"model.transformer.mtp.0.ffn.experts.13.w2",
"model.transformer.mtp.0.ffn.experts.13.w3",
"model.transformer.mtp.0.ffn.experts.14.w1",
"model.transformer.mtp.0.ffn.experts.14.w2",
"model.transformer.mtp.0.ffn.experts.14.w3",
"model.transformer.mtp.0.ffn.experts.15.w1",
"model.transformer.mtp.0.ffn.experts.15.w2",
"model.transformer.mtp.0.ffn.experts.15.w3",
"model.transformer.mtp.0.ffn.experts.16.w1",
"model.transformer.mtp.0.ffn.experts.16.w2",
"model.transformer.mtp.0.ffn.experts.16.w3",
"model.transformer.mtp.0.ffn.experts.17.w1",
"model.transformer.mtp.0.ffn.experts.17.w2",
"model.transformer.mtp.0.ffn.experts.17.w3",
"model.transformer.mtp.0.ffn.experts.18.w1",
"model.transformer.mtp.0.ffn.experts.18.w2",
"model.transformer.mtp.0.ffn.experts.18.w3",
"model.transformer.mtp.0.ffn.experts.19.w1",
"model.transformer.mtp.0.ffn.experts.19.w2",
"model.transformer.mtp.0.ffn.experts.19.w3",
"model.transformer.mtp.0.ffn.experts.20.w1",
"model.transformer.mtp.0.ffn.experts.20.w2",
"model.transformer.mtp.0.ffn.experts.20.w3",
"model.transformer.mtp.0.ffn.experts.21.w1",
"model.transformer.mtp.0.ffn.experts.21.w2",
"model.transformer.mtp.0.ffn.experts.21.w3",
"model.transformer.mtp.0.ffn.experts.22.w1",
"model.transformer.mtp.0.ffn.experts.22.w2",
"model.transformer.mtp.0.ffn.experts.22.w3",
"model.transformer.mtp.0.ffn.experts.23.w1",
"model.transformer.mtp.0.ffn.experts.23.w2",
"model.transformer.mtp.0.ffn.experts.23.w3",
"model.transformer.mtp.0.ffn.experts.24.w1",
"model.transformer.mtp.0.ffn.experts.24.w2",
"model.transformer.mtp.0.ffn.experts.24.w3",
"model.transformer.mtp.0.ffn.experts.25.w1",
"model.transformer.mtp.0.ffn.experts.25.w2",
"model.transformer.mtp.0.ffn.experts.25.w3",
"model.transformer.mtp.0.ffn.experts.26.w1",
"model.transformer.mtp.0.ffn.experts.26.w2",
"model.transformer.mtp.0.ffn.experts.26.w3",
"model.transformer.mtp.0.ffn.experts.27.w1",
"model.transformer.mtp.0.ffn.experts.27.w2",
"model.transformer.mtp.0.ffn.experts.27.w3",
"model.transformer.mtp.0.ffn.experts.28.w1",
"model.transformer.mtp.0.ffn.experts.28.w2",
"model.transformer.mtp.0.ffn.experts.28.w3",
"model.transformer.mtp.0.ffn.experts.29.w1",
"model.transformer.mtp.0.ffn.experts.29.w2",
"model.transformer.mtp.0.ffn.experts.29.w3",
"model.transformer.mtp.0.ffn.experts.30.w1",
"model.transformer.mtp.0.ffn.experts.30.w2",
"model.transformer.mtp.0.ffn.experts.30.w3",
"model.transformer.mtp.0.ffn.experts.31.w1",
"model.transformer.mtp.0.ffn.experts.31.w2",
"model.transformer.mtp.0.ffn.experts.31.w3",
"model.transformer.mtp.0.ffn.experts.32.w1",
"model.transformer.mtp.0.ffn.experts.32.w2",
"model.transformer.mtp.0.ffn.experts.32.w3",
"model.transformer.mtp.0.ffn.experts.33.w1",
"model.transformer.mtp.0.ffn.experts.33.w2",
"model.transformer.mtp.0.ffn.experts.33.w3",
"model.transformer.mtp.0.ffn.experts.34.w1",
"model.transformer.mtp.0.ffn.experts.34.w2",
"model.transformer.mtp.0.ffn.experts.34.w3",
"model.transformer.mtp.0.ffn.experts.35.w1",
"model.transformer.mtp.0.ffn.experts.35.w2",
"model.transformer.mtp.0.ffn.experts.35.w3",
"model.transformer.mtp.0.ffn.experts.36.w1",
"model.transformer.mtp.0.ffn.experts.36.w2",
"model.transformer.mtp.0.ffn.experts.36.w3",
"model.transformer.mtp.0.ffn.experts.37.w1",
"model.transformer.mtp.0.ffn.experts.37.w2",
"model.transformer.mtp.0.ffn.experts.37.w3",
"model.transformer.mtp.0.ffn.experts.38.w1",
"model.transformer.mtp.0.ffn.experts.38.w2",
"model.transformer.mtp.0.ffn.experts.38.w3",
"model.transformer.mtp.0.ffn.experts.39.w1",
"model.transformer.mtp.0.ffn.experts.39.w2",
"model.transformer.mtp.0.ffn.experts.39.w3",
"model.transformer.mtp.0.ffn.experts.40.w1",
"model.transformer.mtp.0.ffn.experts.40.w2",
"model.transformer.mtp.0.ffn.experts.40.w3",
"model.transformer.mtp.0.ffn.experts.41.w1",
"model.transformer.mtp.0.ffn.experts.41.w2",
"model.transformer.mtp.0.ffn.experts.41.w3",
"model.transformer.mtp.0.ffn.experts.42.w1",
"model.transformer.mtp.0.ffn.experts.42.w2",
"model.transformer.mtp.0.ffn.experts.42.w3",
"model.transformer.mtp.0.ffn.experts.43.w1",
"model.transformer.mtp.0.ffn.experts.43.w2",
"model.transformer.mtp.0.ffn.experts.43.w3",
"model.transformer.mtp.0.ffn.experts.44.w1",
"model.transformer.mtp.0.ffn.experts.44.w2",
"model.transformer.mtp.0.ffn.experts.44.w3",
"model.transformer.mtp.0.ffn.experts.45.w1",
"model.transformer.mtp.0.ffn.experts.45.w2",
"model.transformer.mtp.0.ffn.experts.45.w3",
"model.transformer.mtp.0.ffn.experts.46.w1",
"model.transformer.mtp.0.ffn.experts.46.w2",
"model.transformer.mtp.0.ffn.experts.46.w3",
"model.transformer.mtp.0.ffn.experts.47.w1",
"model.transformer.mtp.0.ffn.experts.47.w2",
"model.transformer.mtp.0.ffn.experts.47.w3",
"model.transformer.mtp.0.ffn.experts.48.w1",
"model.transformer.mtp.0.ffn.experts.48.w2",
"model.transformer.mtp.0.ffn.experts.48.w3",
"model.transformer.mtp.0.ffn.experts.49.w1",
"model.transformer.mtp.0.ffn.experts.49.w2",
"model.transformer.mtp.0.ffn.experts.49.w3",
"model.transformer.mtp.0.ffn.experts.50.w1",
"model.transformer.mtp.0.ffn.experts.50.w2",
"model.transformer.mtp.0.ffn.experts.50.w3",
"model.transformer.mtp.0.ffn.experts.51.w1",
"model.transformer.mtp.0.ffn.experts.51.w2",
"model.transformer.mtp.0.ffn.experts.51.w3",
"model.transformer.mtp.0.ffn.experts.52.w1",
"model.transformer.mtp.0.ffn.experts.52.w2",
"model.transformer.mtp.0.ffn.experts.52.w3",
"model.transformer.mtp.0.ffn.experts.53.w1",
"model.transformer.mtp.0.ffn.experts.53.w2",
"model.transformer.mtp.0.ffn.experts.53.w3",
"model.transformer.mtp.0.ffn.experts.54.w1",
"model.transformer.mtp.0.ffn.experts.54.w2",
"model.transformer.mtp.0.ffn.experts.54.w3",
"model.transformer.mtp.0.ffn.experts.55.w1",
"model.transformer.mtp.0.ffn.experts.55.w2",
"model.transformer.mtp.0.ffn.experts.55.w3",
"model.transformer.mtp.0.ffn.experts.56.w1",
"model.transformer.mtp.0.ffn.experts.56.w2",
"model.transformer.mtp.0.ffn.experts.56.w3",
"model.transformer.mtp.0.ffn.experts.57.w1",
"model.transformer.mtp.0.ffn.experts.57.w2",
"model.transformer.mtp.0.ffn.experts.57.w3",
"model.transformer.mtp.0.ffn.experts.58.w1",
"model.transformer.mtp.0.ffn.experts.58.w2",
"model.transformer.mtp.0.ffn.experts.58.w3",
"model.transformer.mtp.0.ffn.experts.59.w1",
"model.transformer.mtp.0.ffn.experts.59.w2",
"model.transformer.mtp.0.ffn.experts.59.w3",
"model.transformer.mtp.0.ffn.experts.60.w1",
"model.transformer.mtp.0.ffn.experts.60.w2",
"model.transformer.mtp.0.ffn.experts.60.w3",
"model.transformer.mtp.0.ffn.experts.61.w1",
"model.transformer.mtp.0.ffn.experts.61.w2",
"model.transformer.mtp.0.ffn.experts.61.w3",
"model.transformer.mtp.0.ffn.experts.62.w1",
"model.transformer.mtp.0.ffn.experts.62.w2",
"model.transformer.mtp.0.ffn.experts.62.w3",
"model.transformer.mtp.0.ffn.experts.63.w1",
"model.transformer.mtp.0.ffn.experts.63.w2",
"model.transformer.mtp.0.ffn.experts.63.w3",
"model.transformer.mtp.0.ffn.experts.64.w1",
"model.transformer.mtp.0.ffn.experts.64.w2",
"model.transformer.mtp.0.ffn.experts.64.w3",
"model.transformer.mtp.0.ffn.experts.65.w1",
"model.transformer.mtp.0.ffn.experts.65.w2",
"model.transformer.mtp.0.ffn.experts.65.w3",
"model.transformer.mtp.0.ffn.experts.66.w1",
"model.transformer.mtp.0.ffn.experts.66.w2",
"model.transformer.mtp.0.ffn.experts.66.w3",
"model.transformer.mtp.0.ffn.experts.67.w1",
"model.transformer.mtp.0.ffn.experts.67.w2",
"model.transformer.mtp.0.ffn.experts.67.w3",
"model.transformer.mtp.0.ffn.experts.68.w1",
"model.transformer.mtp.0.ffn.experts.68.w2",
"model.transformer.mtp.0.ffn.experts.68.w3",
"model.transformer.mtp.0.ffn.experts.69.w1",
"model.transformer.mtp.0.ffn.experts.69.w2",
"model.transformer.mtp.0.ffn.experts.69.w3",
"model.transformer.mtp.0.ffn.experts.70.w1",
"model.transformer.mtp.0.ffn.experts.70.w2",
"model.transformer.mtp.0.ffn.experts.70.w3",
"model.transformer.mtp.0.ffn.experts.71.w1",
"model.transformer.mtp.0.ffn.experts.71.w2",
"model.transformer.mtp.0.ffn.experts.71.w3",
"model.transformer.mtp.0.ffn.experts.72.w1",
"model.transformer.mtp.0.ffn.experts.72.w2",
"model.transformer.mtp.0.ffn.experts.72.w3",
"model.transformer.mtp.0.ffn.experts.73.w1",
"model.transformer.mtp.0.ffn.experts.73.w2",
"model.transformer.mtp.0.ffn.experts.73.w3",
"model.transformer.mtp.0.ffn.experts.74.w1",
"model.transformer.mtp.0.ffn.experts.74.w2",
"model.transformer.mtp.0.ffn.experts.74.w3",
"model.transformer.mtp.0.ffn.experts.75.w1",
"model.transformer.mtp.0.ffn.experts.75.w2",
"model.transformer.mtp.0.ffn.experts.75.w3",
"model.transformer.mtp.0.ffn.experts.76.w1",
"model.transformer.mtp.0.ffn.experts.76.w2",
"model.transformer.mtp.0.ffn.experts.76.w3",
"model.transformer.mtp.0.ffn.experts.77.w1",
"model.transformer.mtp.0.ffn.experts.77.w2",
"model.transformer.mtp.0.ffn.experts.77.w3",
"model.transformer.mtp.0.ffn.experts.78.w1",
"model.transformer.mtp.0.ffn.experts.78.w2",
"model.transformer.mtp.0.ffn.experts.78.w3",
"model.transformer.mtp.0.ffn.experts.79.w1",
"model.transformer.mtp.0.ffn.experts.79.w2",
"model.transformer.mtp.0.ffn.experts.79.w3",
"model.transformer.mtp.0.ffn.experts.80.w1",
"model.transformer.mtp.0.ffn.experts.80.w2",
"model.transformer.mtp.0.ffn.experts.80.w3",
"model.transformer.mtp.0.ffn.experts.81.w1",
"model.transformer.mtp.0.ffn.experts.81.w2",
"model.transformer.mtp.0.ffn.experts.81.w3",
"model.transformer.mtp.0.ffn.experts.82.w1",
"model.transformer.mtp.0.ffn.experts.82.w2",
"model.transformer.mtp.0.ffn.experts.82.w3",
"model.transformer.mtp.0.ffn.experts.83.w1",
"model.transformer.mtp.0.ffn.experts.83.w2",
"model.transformer.mtp.0.ffn.experts.83.w3",
"model.transformer.mtp.0.ffn.experts.84.w1",
"model.transformer.mtp.0.ffn.experts.84.w2",
"model.transformer.mtp.0.ffn.experts.84.w3",
"model.transformer.mtp.0.ffn.experts.85.w1",
"model.transformer.mtp.0.ffn.experts.85.w2",
"model.transformer.mtp.0.ffn.experts.85.w3",
"model.transformer.mtp.0.ffn.experts.86.w1",
"model.transformer.mtp.0.ffn.experts.86.w2",
"model.transformer.mtp.0.ffn.experts.86.w3",
"model.transformer.mtp.0.ffn.experts.87.w1",
"model.transformer.mtp.0.ffn.experts.87.w2",
"model.transformer.mtp.0.ffn.experts.87.w3",
"model.transformer.mtp.0.ffn.experts.88.w1",
"model.transformer.mtp.0.ffn.experts.88.w2",
"model.transformer.mtp.0.ffn.experts.88.w3",
"model.transformer.mtp.0.ffn.experts.89.w1",
"model.transformer.mtp.0.ffn.experts.89.w2",
"model.transformer.mtp.0.ffn.experts.89.w3",
"model.transformer.mtp.0.ffn.experts.90.w1",
"model.transformer.mtp.0.ffn.experts.90.w2",
"model.transformer.mtp.0.ffn.experts.90.w3",
"model.transformer.mtp.0.ffn.experts.91.w1",
"model.transformer.mtp.0.ffn.experts.91.w2",
"model.transformer.mtp.0.ffn.experts.91.w3",
"model.transformer.mtp.0.ffn.experts.92.w1",
"model.transformer.mtp.0.ffn.experts.92.w2",
"model.transformer.mtp.0.ffn.experts.92.w3",
"model.transformer.mtp.0.ffn.experts.93.w1",
"model.transformer.mtp.0.ffn.experts.93.w2",
"model.transformer.mtp.0.ffn.experts.93.w3",
"model.transformer.mtp.0.ffn.experts.94.w1",
"model.transformer.mtp.0.ffn.experts.94.w2",
"model.transformer.mtp.0.ffn.experts.94.w3",
"model.transformer.mtp.0.ffn.experts.95.w1",
"model.transformer.mtp.0.ffn.experts.95.w2",
"model.transformer.mtp.0.ffn.experts.95.w3",
"model.transformer.mtp.0.ffn.experts.96.w1",
"model.transformer.mtp.0.ffn.experts.96.w2",
"model.transformer.mtp.0.ffn.experts.96.w3",
"model.transformer.mtp.0.ffn.experts.97.w1",
"model.transformer.mtp.0.ffn.experts.97.w2",
"model.transformer.mtp.0.ffn.experts.97.w3",
"model.transformer.mtp.0.ffn.experts.98.w1",
"model.transformer.mtp.0.ffn.experts.98.w2",
"model.transformer.mtp.0.ffn.experts.98.w3",
"model.transformer.mtp.0.ffn.experts.99.w1",
"model.transformer.mtp.0.ffn.experts.99.w2",
"model.transformer.mtp.0.ffn.experts.99.w3",
"model.transformer.mtp.0.ffn.experts.100.w1",
"model.transformer.mtp.0.ffn.experts.100.w2",
"model.transformer.mtp.0.ffn.experts.100.w3",
"model.transformer.mtp.0.ffn.experts.101.w1",
"model.transformer.mtp.0.ffn.experts.101.w2",
"model.transformer.mtp.0.ffn.experts.101.w3",
"model.transformer.mtp.0.ffn.experts.102.w1",
"model.transformer.mtp.0.ffn.experts.102.w2",
"model.transformer.mtp.0.ffn.experts.102.w3",
"model.transformer.mtp.0.ffn.experts.103.w1",
"model.transformer.mtp.0.ffn.experts.103.w2",
"model.transformer.mtp.0.ffn.experts.103.w3",
"model.transformer.mtp.0.ffn.experts.104.w1",
"model.transformer.mtp.0.ffn.experts.104.w2",
"model.transformer.mtp.0.ffn.experts.104.w3",
"model.transformer.mtp.0.ffn.experts.105.w1",
"model.transformer.mtp.0.ffn.experts.105.w2",
"model.transformer.mtp.0.ffn.experts.105.w3",
"model.transformer.mtp.0.ffn.experts.106.w1",
"model.transformer.mtp.0.ffn.experts.106.w2",
"model.transformer.mtp.0.ffn.experts.106.w3",
"model.transformer.mtp.0.ffn.experts.107.w1",
"model.transformer.mtp.0.ffn.experts.107.w2",
"model.transformer.mtp.0.ffn.experts.107.w3",
"model.transformer.mtp.0.ffn.experts.108.w1",
"model.transformer.mtp.0.ffn.experts.108.w2",
"model.transformer.mtp.0.ffn.experts.108.w3",
"model.transformer.mtp.0.ffn.experts.109.w1",
"model.transformer.mtp.0.ffn.experts.109.w2",
"model.transformer.mtp.0.ffn.experts.109.w3",
"model.transformer.mtp.0.ffn.experts.110.w1",
"model.transformer.mtp.0.ffn.experts.110.w2",
"model.transformer.mtp.0.ffn.experts.110.w3",
"model.transformer.mtp.0.ffn.experts.111.w1",
"model.transformer.mtp.0.ffn.experts.111.w2",
"model.transformer.mtp.0.ffn.experts.111.w3",
"model.transformer.mtp.0.ffn.experts.112.w1",
"model.transformer.mtp.0.ffn.experts.112.w2",
"model.transformer.mtp.0.ffn.experts.112.w3",
"model.transformer.mtp.0.ffn.experts.113.w1",
"model.transformer.mtp.0.ffn.experts.113.w2",
"model.transformer.mtp.0.ffn.experts.113.w3",
"model.transformer.mtp.0.ffn.experts.114.w1",
"model.transformer.mtp.0.ffn.experts.114.w2",
"model.transformer.mtp.0.ffn.experts.114.w3",
"model.transformer.mtp.0.ffn.experts.115.w1",
"model.transformer.mtp.0.ffn.experts.115.w2",
"model.transformer.mtp.0.ffn.experts.115.w3",
"model.transformer.mtp.0.ffn.experts.116.w1",
"model.transformer.mtp.0.ffn.experts.116.w2",
"model.transformer.mtp.0.ffn.experts.116.w3",
"model.transformer.mtp.0.ffn.experts.117.w1",
"model.transformer.mtp.0.ffn.experts.117.w2",
"model.transformer.mtp.0.ffn.experts.117.w3",
"model.transformer.mtp.0.ffn.experts.118.w1",
"model.transformer.mtp.0.ffn.experts.118.w2",
"model.transformer.mtp.0.ffn.experts.118.w3",
"model.transformer.mtp.0.ffn.experts.119.w1",
"model.transformer.mtp.0.ffn.experts.119.w2",
"model.transformer.mtp.0.ffn.experts.119.w3",
"model.transformer.mtp.0.ffn.experts.120.w1",
"model.transformer.mtp.0.ffn.experts.120.w2",
"model.transformer.mtp.0.ffn.experts.120.w3",
"model.transformer.mtp.0.ffn.experts.121.w1",
"model.transformer.mtp.0.ffn.experts.121.w2",
"model.transformer.mtp.0.ffn.experts.121.w3",
"model.transformer.mtp.0.ffn.experts.122.w1",
"model.transformer.mtp.0.ffn.experts.122.w2",
"model.transformer.mtp.0.ffn.experts.122.w3",
"model.transformer.mtp.0.ffn.experts.123.w1",
"model.transformer.mtp.0.ffn.experts.123.w2",
"model.transformer.mtp.0.ffn.experts.123.w3",
"model.transformer.mtp.0.ffn.experts.124.w1",
"model.transformer.mtp.0.ffn.experts.124.w2",
"model.transformer.mtp.0.ffn.experts.124.w3",
"model.transformer.mtp.0.ffn.experts.125.w1",
"model.transformer.mtp.0.ffn.experts.125.w2",
"model.transformer.mtp.0.ffn.experts.125.w3",
"model.transformer.mtp.0.ffn.experts.126.w1",
"model.transformer.mtp.0.ffn.experts.126.w2",
"model.transformer.mtp.0.ffn.experts.126.w3",
"model.transformer.mtp.0.ffn.experts.127.w1",
"model.transformer.mtp.0.ffn.experts.127.w2",
"model.transformer.mtp.0.ffn.experts.127.w3",
"model.transformer.mtp.0.ffn.experts.128.w1",
"model.transformer.mtp.0.ffn.experts.128.w2",
"model.transformer.mtp.0.ffn.experts.128.w3",
"model.transformer.mtp.0.ffn.experts.129.w1",
"model.transformer.mtp.0.ffn.experts.129.w2",
"model.transformer.mtp.0.ffn.experts.129.w3",
"model.transformer.mtp.0.ffn.experts.130.w1",
"model.transformer.mtp.0.ffn.experts.130.w2",
"model.transformer.mtp.0.ffn.experts.130.w3",
"model.transformer.mtp.0.ffn.experts.131.w1",
"model.transformer.mtp.0.ffn.experts.131.w2",
"model.transformer.mtp.0.ffn.experts.131.w3",
"model.transformer.mtp.0.ffn.experts.132.w1",
"model.transformer.mtp.0.ffn.experts.132.w2",
"model.transformer.mtp.0.ffn.experts.132.w3",
"model.transformer.mtp.0.ffn.experts.133.w1",
"model.transformer.mtp.0.ffn.experts.133.w2",
"model.transformer.mtp.0.ffn.experts.133.w3",
"model.transformer.mtp.0.ffn.experts.134.w1",
"model.transformer.mtp.0.ffn.experts.134.w2",
"model.transformer.mtp.0.ffn.experts.134.w3",
"model.transformer.mtp.0.ffn.experts.135.w1",
"model.transformer.mtp.0.ffn.experts.135.w2",
"model.transformer.mtp.0.ffn.experts.135.w3",
"model.transformer.mtp.0.ffn.experts.136.w1",
"model.transformer.mtp.0.ffn.experts.136.w2",
"model.transformer.mtp.0.ffn.experts.136.w3",
"model.transformer.mtp.0.ffn.experts.137.w1",
"model.transformer.mtp.0.ffn.experts.137.w2",
"model.transformer.mtp.0.ffn.experts.137.w3",
"model.transformer.mtp.0.ffn.experts.138.w1",
"model.transformer.mtp.0.ffn.experts.138.w2",
"model.transformer.mtp.0.ffn.experts.138.w3",
"model.transformer.mtp.0.ffn.experts.139.w1",
"model.transformer.mtp.0.ffn.experts.139.w2",
"model.transformer.mtp.0.ffn.experts.139.w3",
"model.transformer.mtp.0.ffn.experts.140.w1",
"model.transformer.mtp.0.ffn.experts.140.w2",
"model.transformer.mtp.0.ffn.experts.140.w3",
"model.transformer.mtp.0.ffn.experts.141.w1",
"model.transformer.mtp.0.ffn.experts.141.w2",
"model.transformer.mtp.0.ffn.experts.141.w3",
"model.transformer.mtp.0.ffn.experts.142.w1",
"model.transformer.mtp.0.ffn.experts.142.w2",
"model.transformer.mtp.0.ffn.experts.142.w3",
"model.transformer.mtp.0.ffn.experts.143.w1",
"model.transformer.mtp.0.ffn.experts.143.w2",
"model.transformer.mtp.0.ffn.experts.143.w3",
"model.transformer.mtp.0.ffn.experts.144.w1",
"model.transformer.mtp.0.ffn.experts.144.w2",
"model.transformer.mtp.0.ffn.experts.144.w3",
"model.transformer.mtp.0.ffn.experts.145.w1",
"model.transformer.mtp.0.ffn.experts.145.w2",
"model.transformer.mtp.0.ffn.experts.145.w3",
"model.transformer.mtp.0.ffn.experts.146.w1",
"model.transformer.mtp.0.ffn.experts.146.w2",
"model.transformer.mtp.0.ffn.experts.146.w3",
"model.transformer.mtp.0.ffn.experts.147.w1",
"model.transformer.mtp.0.ffn.experts.147.w2",
"model.transformer.mtp.0.ffn.experts.147.w3",
"model.transformer.mtp.0.ffn.experts.148.w1",
"model.transformer.mtp.0.ffn.experts.148.w2",
"model.transformer.mtp.0.ffn.experts.148.w3",
"model.transformer.mtp.0.ffn.experts.149.w1",
"model.transformer.mtp.0.ffn.experts.149.w2",
"model.transformer.mtp.0.ffn.experts.149.w3",
"model.transformer.mtp.0.ffn.experts.150.w1",
"model.transformer.mtp.0.ffn.experts.150.w2",
"model.transformer.mtp.0.ffn.experts.150.w3",
"model.transformer.mtp.0.ffn.experts.151.w1",
"model.transformer.mtp.0.ffn.experts.151.w2",
"model.transformer.mtp.0.ffn.experts.151.w3",
"model.transformer.mtp.0.ffn.experts.152.w1",
"model.transformer.mtp.0.ffn.experts.152.w2",
"model.transformer.mtp.0.ffn.experts.152.w3",
"model.transformer.mtp.0.ffn.experts.153.w1",
"model.transformer.mtp.0.ffn.experts.153.w2",
"model.transformer.mtp.0.ffn.experts.153.w3",
"model.transformer.mtp.0.ffn.experts.154.w1",
"model.transformer.mtp.0.ffn.experts.154.w2",
"model.transformer.mtp.0.ffn.experts.154.w3",
"model.transformer.mtp.0.ffn.experts.155.w1",
"model.transformer.mtp.0.ffn.experts.155.w2",
"model.transformer.mtp.0.ffn.experts.155.w3",
"model.transformer.mtp.0.ffn.experts.156.w1",
"model.transformer.mtp.0.ffn.experts.156.w2",
"model.transformer.mtp.0.ffn.experts.156.w3",
"model.transformer.mtp.0.ffn.experts.157.w1",
"model.transformer.mtp.0.ffn.experts.157.w2",
"model.transformer.mtp.0.ffn.experts.157.w3",
"model.transformer.mtp.0.ffn.experts.158.w1",
"model.transformer.mtp.0.ffn.experts.158.w2",
"model.transformer.mtp.0.ffn.experts.158.w3",
"model.transformer.mtp.0.ffn.experts.159.w1",
"model.transformer.mtp.0.ffn.experts.159.w2",
"model.transformer.mtp.0.ffn.experts.159.w3",
"model.transformer.mtp.0.ffn.experts.160.w1",
"model.transformer.mtp.0.ffn.experts.160.w2",
"model.transformer.mtp.0.ffn.experts.160.w3",
"model.transformer.mtp.0.ffn.experts.161.w1",
"model.transformer.mtp.0.ffn.experts.161.w2",
"model.transformer.mtp.0.ffn.experts.161.w3",
"model.transformer.mtp.0.ffn.experts.162.w1",
"model.transformer.mtp.0.ffn.experts.162.w2",
"model.transformer.mtp.0.ffn.experts.162.w3",
"model.transformer.mtp.0.ffn.experts.163.w1",
"model.transformer.mtp.0.ffn.experts.163.w2",
"model.transformer.mtp.0.ffn.experts.163.w3",
"model.transformer.mtp.0.ffn.experts.164.w1",
"model.transformer.mtp.0.ffn.experts.164.w2",
"model.transformer.mtp.0.ffn.experts.164.w3",
"model.transformer.mtp.0.ffn.experts.165.w1",
"model.transformer.mtp.0.ffn.experts.165.w2",
"model.transformer.mtp.0.ffn.experts.165.w3",
"model.transformer.mtp.0.ffn.experts.166.w1",
"model.transformer.mtp.0.ffn.experts.166.w2",
"model.transformer.mtp.0.ffn.experts.166.w3",
"model.transformer.mtp.0.ffn.experts.167.w1",
"model.transformer.mtp.0.ffn.experts.167.w2",
"model.transformer.mtp.0.ffn.experts.167.w3",
"model.transformer.mtp.0.ffn.experts.168.w1",
"model.transformer.mtp.0.ffn.experts.168.w2",
"model.transformer.mtp.0.ffn.experts.168.w3",
"model.transformer.mtp.0.ffn.experts.169.w1",
"model.transformer.mtp.0.ffn.experts.169.w2",
"model.transformer.mtp.0.ffn.experts.169.w3",
"model.transformer.mtp.0.ffn.experts.170.w1",
"model.transformer.mtp.0.ffn.experts.170.w2",
"model.transformer.mtp.0.ffn.experts.170.w3",
"model.transformer.mtp.0.ffn.experts.171.w1",
"model.transformer.mtp.0.ffn.experts.171.w2",
"model.transformer.mtp.0.ffn.experts.171.w3",
"model.transformer.mtp.0.ffn.experts.172.w1",
"model.transformer.mtp.0.ffn.experts.172.w2",
"model.transformer.mtp.0.ffn.experts.172.w3",
"model.transformer.mtp.0.ffn.experts.173.w1",
"model.transformer.mtp.0.ffn.experts.173.w2",
"model.transformer.mtp.0.ffn.experts.173.w3",
"model.transformer.mtp.0.ffn.experts.174.w1",
"model.transformer.mtp.0.ffn.experts.174.w2",
"model.transformer.mtp.0.ffn.experts.174.w3",
"model.transformer.mtp.0.ffn.experts.175.w1",
"model.transformer.mtp.0.ffn.experts.175.w2",
"model.transformer.mtp.0.ffn.experts.175.w3",
"model.transformer.mtp.0.ffn.experts.176.w1",
"model.transformer.mtp.0.ffn.experts.176.w2",
"model.transformer.mtp.0.ffn.experts.176.w3",
"model.transformer.mtp.0.ffn.experts.177.w1",
"model.transformer.mtp.0.ffn.experts.177.w2",
"model.transformer.mtp.0.ffn.experts.177.w3",
"model.transformer.mtp.0.ffn.experts.178.w1",
"model.transformer.mtp.0.ffn.experts.178.w2",
"model.transformer.mtp.0.ffn.experts.178.w3",
"model.transformer.mtp.0.ffn.experts.179.w1",
"model.transformer.mtp.0.ffn.experts.179.w2",
"model.transformer.mtp.0.ffn.experts.179.w3",
"model.transformer.mtp.0.ffn.experts.180.w1",
"model.transformer.mtp.0.ffn.experts.180.w2",
"model.transformer.mtp.0.ffn.experts.180.w3",
"model.transformer.mtp.0.ffn.experts.181.w1",
"model.transformer.mtp.0.ffn.experts.181.w2",
"model.transformer.mtp.0.ffn.experts.181.w3",
"model.transformer.mtp.0.ffn.experts.182.w1",
"model.transformer.mtp.0.ffn.experts.182.w2",
"model.transformer.mtp.0.ffn.experts.182.w3",
"model.transformer.mtp.0.ffn.experts.183.w1",
"model.transformer.mtp.0.ffn.experts.183.w2",
"model.transformer.mtp.0.ffn.experts.183.w3",
"model.transformer.mtp.0.ffn.experts.184.w1",
"model.transformer.mtp.0.ffn.experts.184.w2",
"model.transformer.mtp.0.ffn.experts.184.w3",
"model.transformer.mtp.0.ffn.experts.185.w1",
"model.transformer.mtp.0.ffn.experts.185.w2",
"model.transformer.mtp.0.ffn.experts.185.w3",
"model.transformer.mtp.0.ffn.experts.186.w1",
"model.transformer.mtp.0.ffn.experts.186.w2",
"model.transformer.mtp.0.ffn.experts.186.w3",
"model.transformer.mtp.0.ffn.experts.187.w1",
"model.transformer.mtp.0.ffn.experts.187.w2",
"model.transformer.mtp.0.ffn.experts.187.w3",
"model.transformer.mtp.0.ffn.experts.188.w1",
"model.transformer.mtp.0.ffn.experts.188.w2",
"model.transformer.mtp.0.ffn.experts.188.w3",
"model.transformer.mtp.0.ffn.experts.189.w1",
"model.transformer.mtp.0.ffn.experts.189.w2",
"model.transformer.mtp.0.ffn.experts.189.w3",
"model.transformer.mtp.0.ffn.experts.190.w1",
"model.transformer.mtp.0.ffn.experts.190.w2",
"model.transformer.mtp.0.ffn.experts.190.w3",
"model.transformer.mtp.0.ffn.experts.191.w1",
"model.transformer.mtp.0.ffn.experts.191.w2",
"model.transformer.mtp.0.ffn.experts.191.w3",
"model.transformer.mtp.0.ffn.experts.192.w1",
"model.transformer.mtp.0.ffn.experts.192.w2",
"model.transformer.mtp.0.ffn.experts.192.w3",
"model.transformer.mtp.0.ffn.experts.193.w1",
"model.transformer.mtp.0.ffn.experts.193.w2",
"model.transformer.mtp.0.ffn.experts.193.w3",
"model.transformer.mtp.0.ffn.experts.194.w1",
"model.transformer.mtp.0.ffn.experts.194.w2",
"model.transformer.mtp.0.ffn.experts.194.w3",
"model.transformer.mtp.0.ffn.experts.195.w1",
"model.transformer.mtp.0.ffn.experts.195.w2",
"model.transformer.mtp.0.ffn.experts.195.w3",
"model.transformer.mtp.0.ffn.experts.196.w1",
"model.transformer.mtp.0.ffn.experts.196.w2",
"model.transformer.mtp.0.ffn.experts.196.w3",
"model.transformer.mtp.0.ffn.experts.197.w1",
"model.transformer.mtp.0.ffn.experts.197.w2",
"model.transformer.mtp.0.ffn.experts.197.w3",
"model.transformer.mtp.0.ffn.experts.198.w1",
"model.transformer.mtp.0.ffn.experts.198.w2",
"model.transformer.mtp.0.ffn.experts.198.w3",
"model.transformer.mtp.0.ffn.experts.199.w1",
"model.transformer.mtp.0.ffn.experts.199.w2",
"model.transformer.mtp.0.ffn.experts.199.w3",
"model.transformer.mtp.0.ffn.experts.200.w1",
"model.transformer.mtp.0.ffn.experts.200.w2",
"model.transformer.mtp.0.ffn.experts.200.w3",
"model.transformer.mtp.0.ffn.experts.201.w1",
"model.transformer.mtp.0.ffn.experts.201.w2",
"model.transformer.mtp.0.ffn.experts.201.w3",
"model.transformer.mtp.0.ffn.experts.202.w1",
"model.transformer.mtp.0.ffn.experts.202.w2",
"model.transformer.mtp.0.ffn.experts.202.w3",
"model.transformer.mtp.0.ffn.experts.203.w1",
"model.transformer.mtp.0.ffn.experts.203.w2",
"model.transformer.mtp.0.ffn.experts.203.w3",
"model.transformer.mtp.0.ffn.experts.204.w1",
"model.transformer.mtp.0.ffn.experts.204.w2",
"model.transformer.mtp.0.ffn.experts.204.w3",
"model.transformer.mtp.0.ffn.experts.205.w1",
"model.transformer.mtp.0.ffn.experts.205.w2",
"model.transformer.mtp.0.ffn.experts.205.w3",
"model.transformer.mtp.0.ffn.experts.206.w1",
"model.transformer.mtp.0.ffn.experts.206.w2",
"model.transformer.mtp.0.ffn.experts.206.w3",
"model.transformer.mtp.0.ffn.experts.207.w1",
"model.transformer.mtp.0.ffn.experts.207.w2",
"model.transformer.mtp.0.ffn.experts.207.w3",
"model.transformer.mtp.0.ffn.experts.208.w1",
"model.transformer.mtp.0.ffn.experts.208.w2",
"model.transformer.mtp.0.ffn.experts.208.w3",
"model.transformer.mtp.0.ffn.experts.209.w1",
"model.transformer.mtp.0.ffn.experts.209.w2",
"model.transformer.mtp.0.ffn.experts.209.w3",
"model.transformer.mtp.0.ffn.experts.210.w1",
"model.transformer.mtp.0.ffn.experts.210.w2",
"model.transformer.mtp.0.ffn.experts.210.w3",
"model.transformer.mtp.0.ffn.experts.211.w1",
"model.transformer.mtp.0.ffn.experts.211.w2",
"model.transformer.mtp.0.ffn.experts.211.w3",
"model.transformer.mtp.0.ffn.experts.212.w1",
"model.transformer.mtp.0.ffn.experts.212.w2",
"model.transformer.mtp.0.ffn.experts.212.w3",
"model.transformer.mtp.0.ffn.experts.213.w1",
"model.transformer.mtp.0.ffn.experts.213.w2",
"model.transformer.mtp.0.ffn.experts.213.w3",
"model.transformer.mtp.0.ffn.experts.214.w1",
"model.transformer.mtp.0.ffn.experts.214.w2",
"model.transformer.mtp.0.ffn.experts.214.w3",
"model.transformer.mtp.0.ffn.experts.215.w1",
"model.transformer.mtp.0.ffn.experts.215.w2",
"model.transformer.mtp.0.ffn.experts.215.w3",
"model.transformer.mtp.0.ffn.experts.216.w1",
"model.transformer.mtp.0.ffn.experts.216.w2",
"model.transformer.mtp.0.ffn.experts.216.w3",
"model.transformer.mtp.0.ffn.experts.217.w1",
"model.transformer.mtp.0.ffn.experts.217.w2",
"model.transformer.mtp.0.ffn.experts.217.w3",
"model.transformer.mtp.0.ffn.experts.218.w1",
"model.transformer.mtp.0.ffn.experts.218.w2",
"model.transformer.mtp.0.ffn.experts.218.w3",
"model.transformer.mtp.0.ffn.experts.219.w1",
"model.transformer.mtp.0.ffn.experts.219.w2",
"model.transformer.mtp.0.ffn.experts.219.w3",
"model.transformer.mtp.0.ffn.experts.220.w1",
"model.transformer.mtp.0.ffn.experts.220.w2",
"model.transformer.mtp.0.ffn.experts.220.w3",
"model.transformer.mtp.0.ffn.experts.221.w1",
"model.transformer.mtp.0.ffn.experts.221.w2",
"model.transformer.mtp.0.ffn.experts.221.w3",
"model.transformer.mtp.0.ffn.experts.222.w1",
"model.transformer.mtp.0.ffn.experts.222.w2",
"model.transformer.mtp.0.ffn.experts.222.w3",
"model.transformer.mtp.0.ffn.experts.223.w1",
"model.transformer.mtp.0.ffn.experts.223.w2",
"model.transformer.mtp.0.ffn.experts.223.w3",
"model.transformer.mtp.0.ffn.experts.224.w1",
"model.transformer.mtp.0.ffn.experts.224.w2",
"model.transformer.mtp.0.ffn.experts.224.w3",
"model.transformer.mtp.0.ffn.experts.225.w1",
"model.transformer.mtp.0.ffn.experts.225.w2",
"model.transformer.mtp.0.ffn.experts.225.w3",
"model.transformer.mtp.0.ffn.experts.226.w1",
"model.transformer.mtp.0.ffn.experts.226.w2",
"model.transformer.mtp.0.ffn.experts.226.w3",
"model.transformer.mtp.0.ffn.experts.227.w1",
"model.transformer.mtp.0.ffn.experts.227.w2",
"model.transformer.mtp.0.ffn.experts.227.w3",
"model.transformer.mtp.0.ffn.experts.228.w1",
"model.transformer.mtp.0.ffn.experts.228.w2",
"model.transformer.mtp.0.ffn.experts.228.w3",
"model.transformer.mtp.0.ffn.experts.229.w1",
"model.transformer.mtp.0.ffn.experts.229.w2",
"model.transformer.mtp.0.ffn.experts.229.w3",
"model.transformer.mtp.0.ffn.experts.230.w1",
"model.transformer.mtp.0.ffn.experts.230.w2",
"model.transformer.mtp.0.ffn.experts.230.w3",
"model.transformer.mtp.0.ffn.experts.231.w1",
"model.transformer.mtp.0.ffn.experts.231.w2",
"model.transformer.mtp.0.ffn.experts.231.w3",
"model.transformer.mtp.0.ffn.experts.232.w1",
"model.transformer.mtp.0.ffn.experts.232.w2",
"model.transformer.mtp.0.ffn.experts.232.w3",
"model.transformer.mtp.0.ffn.experts.233.w1",
"model.transformer.mtp.0.ffn.experts.233.w2",
"model.transformer.mtp.0.ffn.experts.233.w3",
"model.transformer.mtp.0.ffn.experts.234.w1",
"model.transformer.mtp.0.ffn.experts.234.w2",
"model.transformer.mtp.0.ffn.experts.234.w3",
"model.transformer.mtp.0.ffn.experts.235.w1",
"model.transformer.mtp.0.ffn.experts.235.w2",
"model.transformer.mtp.0.ffn.experts.235.w3",
"model.transformer.mtp.0.ffn.experts.236.w1",
"model.transformer.mtp.0.ffn.experts.236.w2",
"model.transformer.mtp.0.ffn.experts.236.w3",
"model.transformer.mtp.0.ffn.experts.237.w1",
"model.transformer.mtp.0.ffn.experts.237.w2",
"model.transformer.mtp.0.ffn.experts.237.w3",
"model.transformer.mtp.0.ffn.experts.238.w1",
"model.transformer.mtp.0.ffn.experts.238.w2",
"model.transformer.mtp.0.ffn.experts.238.w3",
"model.transformer.mtp.0.ffn.experts.239.w1",
"model.transformer.mtp.0.ffn.experts.239.w2",
"model.transformer.mtp.0.ffn.experts.239.w3",
"model.transformer.mtp.0.ffn.experts.240.w1",
"model.transformer.mtp.0.ffn.experts.240.w2",
"model.transformer.mtp.0.ffn.experts.240.w3",
"model.transformer.mtp.0.ffn.experts.241.w1",
"model.transformer.mtp.0.ffn.experts.241.w2",
"model.transformer.mtp.0.ffn.experts.241.w3",
"model.transformer.mtp.0.ffn.experts.242.w1",
"model.transformer.mtp.0.ffn.experts.242.w2",
"model.transformer.mtp.0.ffn.experts.242.w3",
"model.transformer.mtp.0.ffn.experts.243.w1",
"model.transformer.mtp.0.ffn.experts.243.w2",
"model.transformer.mtp.0.ffn.experts.243.w3",
"model.transformer.mtp.0.ffn.experts.244.w1",
"model.transformer.mtp.0.ffn.experts.244.w2",
"model.transformer.mtp.0.ffn.experts.244.w3",
"model.transformer.mtp.0.ffn.experts.245.w1",
"model.transformer.mtp.0.ffn.experts.245.w2",
"model.transformer.mtp.0.ffn.experts.245.w3",
"model.transformer.mtp.0.ffn.experts.246.w1",
"model.transformer.mtp.0.ffn.experts.246.w2",
"model.transformer.mtp.0.ffn.experts.246.w3",
"model.transformer.mtp.0.ffn.experts.247.w1",
"model.transformer.mtp.0.ffn.experts.247.w2",
"model.transformer.mtp.0.ffn.experts.247.w3",
"model.transformer.mtp.0.ffn.experts.248.w1",
"model.transformer.mtp.0.ffn.experts.248.w2",
"model.transformer.mtp.0.ffn.experts.248.w3",
"model.transformer.mtp.0.ffn.experts.249.w1",
"model.transformer.mtp.0.ffn.experts.249.w2",
"model.transformer.mtp.0.ffn.experts.249.w3",
"model.transformer.mtp.0.ffn.experts.250.w1",
"model.transformer.mtp.0.ffn.experts.250.w2",
"model.transformer.mtp.0.ffn.experts.250.w3",
"model.transformer.mtp.0.ffn.experts.251.w1",
"model.transformer.mtp.0.ffn.experts.251.w2",
"model.transformer.mtp.0.ffn.experts.251.w3",
"model.transformer.mtp.0.ffn.experts.252.w1",
"model.transformer.mtp.0.ffn.experts.252.w2",
"model.transformer.mtp.0.ffn.experts.252.w3",
"model.transformer.mtp.0.ffn.experts.253.w1",
"model.transformer.mtp.0.ffn.experts.253.w2",
"model.transformer.mtp.0.ffn.experts.253.w3",
"model.transformer.mtp.0.ffn.experts.254.w1",
"model.transformer.mtp.0.ffn.experts.254.w2",
"model.transformer.mtp.0.ffn.experts.254.w3",
"model.transformer.mtp.0.ffn.experts.255.w1",
"model.transformer.mtp.0.ffn.experts.255.w2",
"model.transformer.mtp.0.ffn.experts.255.w3",
"model.transformer.mtp.0.ffn.shared_experts.w1",
"model.transformer.mtp.0.ffn.shared_experts.w2",
"model.transformer.mtp.0.ffn.shared_experts.w3",
"model.transformer.mtp.0.e_proj",
"model.transformer.mtp.0.h_proj",
"re:^mtp\\..*",
"re:.*\\.mtp_block\\..*",
"re:.*\\.mtp\\..*",
"re:.*\\.layers\\.43\\..*",
"re:.*\\.layers\\.43$"
],
"kv_cache_scheme": null,
"quant_method": "compressed-tensors",
"quantization_status": "compressed",
"sparsity_config": {},
"transform_config": {},
"version": "0.15.1.a20260515",
"scale_fmt": "ue8m0"
},
"rms_norm_eps": 1e-06,
"rope_scaling": {
"beta_fast": 32,
"beta_slow": 1,
"factor": 16,
"original_max_position_embeddings": 65536,
"type": "yarn"
},
"rope_theta": 10000,
"routed_scaling_factor": 1.5,
"scoring_func": "sqrtsoftplus",
"sliding_window": 128,
"swiglu_limit": 10.0,
"tie_word_embeddings": true,
"topk_method": "noaux_tc",
"torch_dtype": "bfloat16",
"transformers_version": "4.57.1",
"use_cache": true,
"vocab_size": 129280
}