| { |
| "architectures": [ |
| "DeepseekV4ForCausalLM" |
| ], |
| "attention_bias": false, |
| "attention_dropout": 0.0, |
| "bos_token_id": 0, |
| "compress_ratios": [ |
| 0, |
| 0, |
| 4, |
| 128, |
| 4, |
| 128, |
| 4, |
| 128, |
| 4, |
| 128, |
| 4, |
| 128, |
| 4, |
| 128, |
| 4, |
| 128, |
| 4, |
| 128, |
| 4, |
| 128, |
| 4, |
| 128, |
| 4, |
| 128, |
| 4, |
| 128, |
| 4, |
| 128, |
| 4, |
| 128, |
| 4, |
| 128, |
| 4, |
| 128, |
| 4, |
| 128, |
| 4, |
| 128, |
| 4, |
| 128, |
| 4, |
| 128, |
| 4, |
| 0 |
| ], |
| "compress_rope_theta": 160000, |
| "eos_token_id": 1, |
| "expert_dtype": "fp4", |
| "hc_eps": 1e-06, |
| "hc_mult": 4, |
| "hc_sinkhorn_iters": 20, |
| "head_dim": 512, |
| "hidden_act": "silu", |
| "hidden_size": 4096, |
| "index_head_dim": 128, |
| "index_n_heads": 64, |
| "index_topk": 512, |
| "initializer_range": 0.02, |
| "max_position_embeddings": 1048576, |
| "model_type": "deepseek_v4", |
| "moe_intermediate_size": 2048, |
| "n_routed_experts": 256, |
| "n_shared_experts": 1, |
| "norm_topk_prob": true, |
| "num_attention_heads": 64, |
| "num_experts_per_tok": 6, |
| "num_hash_layers": 3, |
| "num_hidden_layers": 43, |
| "num_key_value_heads": 1, |
| "num_nextn_predict_layers": 1, |
| "o_groups": 8, |
| "o_lora_rank": 1024, |
| "q_lora_rank": 1024, |
| "qk_rope_head_dim": 64, |
| "quantization_config": { |
| "config_groups": { |
| "group_0": { |
| "format": "float-quantized", |
| "input_activations": { |
| "actorder": null, |
| "block_structure": null, |
| "dynamic": true, |
| "group_size": 128, |
| "num_bits": 8, |
| "observer": null, |
| "observer_kwargs": {}, |
| "scale_dtype": null, |
| "strategy": "group", |
| "symmetric": true, |
| "type": "float", |
| "zp_dtype": null |
| }, |
| "output_activations": null, |
| "targets": [ |
| "re:.*\\.attn\\.(fused_wqa_wkv|compressor\\.fused_wkv_wgate|wq_b|wo_a|wo_b)$" |
| ], |
| "weights": { |
| "actorder": null, |
| "block_structure": [ |
| 128, |
| 128 |
| ], |
| "dynamic": false, |
| "group_size": null, |
| "num_bits": 8, |
| "observer": "memoryless_minmax", |
| "observer_kwargs": {}, |
| "scale_dtype": null, |
| "strategy": "block", |
| "symmetric": true, |
| "type": "float", |
| "zp_dtype": null |
| } |
| }, |
| "group_1": { |
| "format": "nvfp4-pack-quantized", |
| "input_activations": { |
| "actorder": null, |
| "block_structure": null, |
| "dynamic": "local", |
| "group_size": 16, |
| "num_bits": 4, |
| "observer": "static_minmax", |
| "observer_kwargs": {}, |
| "scale_dtype": "torch.float8_e4m3fn", |
| "strategy": "tensor_group", |
| "symmetric": true, |
| "type": "float", |
| "zp_dtype": null |
| }, |
| "output_activations": null, |
| "targets": [ |
| "re:.*\\.ffn\\.experts\\.\\d+\\.(gate_proj|up_proj|down_proj)$" |
| ], |
| "weights": { |
| "actorder": null, |
| "block_structure": null, |
| "dynamic": false, |
| "group_size": 16, |
| "num_bits": 4, |
| "observer": "memoryless_minmax", |
| "observer_kwargs": {}, |
| "scale_dtype": "torch.float8_e4m3fn", |
| "strategy": "tensor_group", |
| "symmetric": true, |
| "type": "float", |
| "zp_dtype": null |
| } |
| } |
| }, |
| "format": "mixed-precision", |
| "global_compression_ratio": null, |
| "ignore": [ |
| "model.transformer.layers.0.ffn.shared_experts.w1", |
| "model.transformer.layers.0.ffn.shared_experts.w2", |
| "model.transformer.layers.0.ffn.shared_experts.w3", |
| "model.transformer.layers.1.ffn.shared_experts.w1", |
| "model.transformer.layers.1.ffn.shared_experts.w2", |
| "model.transformer.layers.1.ffn.shared_experts.w3", |
| "model.transformer.layers.2.attn.compressor.wkv", |
| "model.transformer.layers.2.attn.compressor.wgate", |
| "model.transformer.layers.2.attn.indexer.wq_b", |
| "model.transformer.layers.2.attn.indexer.weights_proj", |
| "model.transformer.layers.2.attn.indexer.compressor.wkv", |
| "model.transformer.layers.2.attn.indexer.compressor.wgate", |
| "model.transformer.layers.2.ffn.shared_experts.w1", |
| "model.transformer.layers.2.ffn.shared_experts.w2", |
| "model.transformer.layers.2.ffn.shared_experts.w3", |
| "model.transformer.layers.3.attn.compressor.wkv", |
| "model.transformer.layers.3.attn.compressor.wgate", |
| "model.transformer.layers.3.ffn.shared_experts.w1", |
| "model.transformer.layers.3.ffn.shared_experts.w2", |
| "model.transformer.layers.3.ffn.shared_experts.w3", |
| "model.transformer.layers.4.attn.compressor.wkv", |
| "model.transformer.layers.4.attn.compressor.wgate", |
| "model.transformer.layers.4.attn.indexer.wq_b", |
| "model.transformer.layers.4.attn.indexer.weights_proj", |
| "model.transformer.layers.4.attn.indexer.compressor.wkv", |
| "model.transformer.layers.4.attn.indexer.compressor.wgate", |
| "model.transformer.layers.4.ffn.shared_experts.w1", |
| "model.transformer.layers.4.ffn.shared_experts.w2", |
| "model.transformer.layers.4.ffn.shared_experts.w3", |
| "model.transformer.layers.5.attn.compressor.wkv", |
| "model.transformer.layers.5.attn.compressor.wgate", |
| "model.transformer.layers.5.ffn.shared_experts.w1", |
| "model.transformer.layers.5.ffn.shared_experts.w2", |
| "model.transformer.layers.5.ffn.shared_experts.w3", |
| "model.transformer.layers.6.attn.compressor.wkv", |
| "model.transformer.layers.6.attn.compressor.wgate", |
| "model.transformer.layers.6.attn.indexer.wq_b", |
| "model.transformer.layers.6.attn.indexer.weights_proj", |
| "model.transformer.layers.6.attn.indexer.compressor.wkv", |
| "model.transformer.layers.6.attn.indexer.compressor.wgate", |
| "model.transformer.layers.6.ffn.shared_experts.w1", |
| "model.transformer.layers.6.ffn.shared_experts.w2", |
| "model.transformer.layers.6.ffn.shared_experts.w3", |
| "model.transformer.layers.7.attn.compressor.wkv", |
| "model.transformer.layers.7.attn.compressor.wgate", |
| "model.transformer.layers.7.ffn.shared_experts.w1", |
| "model.transformer.layers.7.ffn.shared_experts.w2", |
| "model.transformer.layers.7.ffn.shared_experts.w3", |
| "model.transformer.layers.8.attn.compressor.wkv", |
| "model.transformer.layers.8.attn.compressor.wgate", |
| "model.transformer.layers.8.attn.indexer.wq_b", |
| "model.transformer.layers.8.attn.indexer.weights_proj", |
| "model.transformer.layers.8.attn.indexer.compressor.wkv", |
| "model.transformer.layers.8.attn.indexer.compressor.wgate", |
| "model.transformer.layers.8.ffn.shared_experts.w1", |
| "model.transformer.layers.8.ffn.shared_experts.w2", |
| "model.transformer.layers.8.ffn.shared_experts.w3", |
| "model.transformer.layers.9.attn.compressor.wkv", |
| "model.transformer.layers.9.attn.compressor.wgate", |
| "model.transformer.layers.9.ffn.shared_experts.w1", |
| "model.transformer.layers.9.ffn.shared_experts.w2", |
| "model.transformer.layers.9.ffn.shared_experts.w3", |
| "model.transformer.layers.10.attn.compressor.wkv", |
| "model.transformer.layers.10.attn.compressor.wgate", |
| "model.transformer.layers.10.attn.indexer.wq_b", |
| "model.transformer.layers.10.attn.indexer.weights_proj", |
| "model.transformer.layers.10.attn.indexer.compressor.wkv", |
| "model.transformer.layers.10.attn.indexer.compressor.wgate", |
| "model.transformer.layers.10.ffn.shared_experts.w1", |
| "model.transformer.layers.10.ffn.shared_experts.w2", |
| "model.transformer.layers.10.ffn.shared_experts.w3", |
| "model.transformer.layers.11.attn.compressor.wkv", |
| "model.transformer.layers.11.attn.compressor.wgate", |
| "model.transformer.layers.11.ffn.shared_experts.w1", |
| "model.transformer.layers.11.ffn.shared_experts.w2", |
| "model.transformer.layers.11.ffn.shared_experts.w3", |
| "model.transformer.layers.12.attn.compressor.wkv", |
| "model.transformer.layers.12.attn.compressor.wgate", |
| "model.transformer.layers.12.attn.indexer.wq_b", |
| "model.transformer.layers.12.attn.indexer.weights_proj", |
| "model.transformer.layers.12.attn.indexer.compressor.wkv", |
| "model.transformer.layers.12.attn.indexer.compressor.wgate", |
| "model.transformer.layers.12.ffn.shared_experts.w1", |
| "model.transformer.layers.12.ffn.shared_experts.w2", |
| "model.transformer.layers.12.ffn.shared_experts.w3", |
| "model.transformer.layers.13.attn.compressor.wkv", |
| "model.transformer.layers.13.attn.compressor.wgate", |
| "model.transformer.layers.13.ffn.shared_experts.w1", |
| "model.transformer.layers.13.ffn.shared_experts.w2", |
| "model.transformer.layers.13.ffn.shared_experts.w3", |
| "model.transformer.layers.14.attn.compressor.wkv", |
| "model.transformer.layers.14.attn.compressor.wgate", |
| "model.transformer.layers.14.attn.indexer.wq_b", |
| "model.transformer.layers.14.attn.indexer.weights_proj", |
| "model.transformer.layers.14.attn.indexer.compressor.wkv", |
| "model.transformer.layers.14.attn.indexer.compressor.wgate", |
| "model.transformer.layers.14.ffn.shared_experts.w1", |
| "model.transformer.layers.14.ffn.shared_experts.w2", |
| "model.transformer.layers.14.ffn.shared_experts.w3", |
| "model.transformer.layers.15.attn.compressor.wkv", |
| "model.transformer.layers.15.attn.compressor.wgate", |
| "model.transformer.layers.15.ffn.shared_experts.w1", |
| "model.transformer.layers.15.ffn.shared_experts.w2", |
| "model.transformer.layers.15.ffn.shared_experts.w3", |
| "model.transformer.layers.16.attn.compressor.wkv", |
| "model.transformer.layers.16.attn.compressor.wgate", |
| "model.transformer.layers.16.attn.indexer.wq_b", |
| "model.transformer.layers.16.attn.indexer.weights_proj", |
| "model.transformer.layers.16.attn.indexer.compressor.wkv", |
| "model.transformer.layers.16.attn.indexer.compressor.wgate", |
| "model.transformer.layers.16.ffn.shared_experts.w1", |
| "model.transformer.layers.16.ffn.shared_experts.w2", |
| "model.transformer.layers.16.ffn.shared_experts.w3", |
| "model.transformer.layers.17.attn.compressor.wkv", |
| "model.transformer.layers.17.attn.compressor.wgate", |
| "model.transformer.layers.17.ffn.shared_experts.w1", |
| "model.transformer.layers.17.ffn.shared_experts.w2", |
| "model.transformer.layers.17.ffn.shared_experts.w3", |
| "model.transformer.layers.18.attn.compressor.wkv", |
| "model.transformer.layers.18.attn.compressor.wgate", |
| "model.transformer.layers.18.attn.indexer.wq_b", |
| "model.transformer.layers.18.attn.indexer.weights_proj", |
| "model.transformer.layers.18.attn.indexer.compressor.wkv", |
| "model.transformer.layers.18.attn.indexer.compressor.wgate", |
| "model.transformer.layers.18.ffn.shared_experts.w1", |
| "model.transformer.layers.18.ffn.shared_experts.w2", |
| "model.transformer.layers.18.ffn.shared_experts.w3", |
| "model.transformer.layers.19.attn.compressor.wkv", |
| "model.transformer.layers.19.attn.compressor.wgate", |
| "model.transformer.layers.19.ffn.shared_experts.w1", |
| "model.transformer.layers.19.ffn.shared_experts.w2", |
| "model.transformer.layers.19.ffn.shared_experts.w3", |
| "model.transformer.layers.20.attn.compressor.wkv", |
| "model.transformer.layers.20.attn.compressor.wgate", |
| "model.transformer.layers.20.attn.indexer.wq_b", |
| "model.transformer.layers.20.attn.indexer.weights_proj", |
| "model.transformer.layers.20.attn.indexer.compressor.wkv", |
| "model.transformer.layers.20.attn.indexer.compressor.wgate", |
| "model.transformer.layers.20.ffn.shared_experts.w1", |
| "model.transformer.layers.20.ffn.shared_experts.w2", |
| "model.transformer.layers.20.ffn.shared_experts.w3", |
| "model.transformer.layers.21.attn.compressor.wkv", |
| "model.transformer.layers.21.attn.compressor.wgate", |
| "model.transformer.layers.21.ffn.shared_experts.w1", |
| "model.transformer.layers.21.ffn.shared_experts.w2", |
| "model.transformer.layers.21.ffn.shared_experts.w3", |
| "model.transformer.layers.22.attn.compressor.wkv", |
| "model.transformer.layers.22.attn.compressor.wgate", |
| "model.transformer.layers.22.attn.indexer.wq_b", |
| "model.transformer.layers.22.attn.indexer.weights_proj", |
| "model.transformer.layers.22.attn.indexer.compressor.wkv", |
| "model.transformer.layers.22.attn.indexer.compressor.wgate", |
| "model.transformer.layers.22.ffn.shared_experts.w1", |
| "model.transformer.layers.22.ffn.shared_experts.w2", |
| "model.transformer.layers.22.ffn.shared_experts.w3", |
| "model.transformer.layers.23.attn.compressor.wkv", |
| "model.transformer.layers.23.attn.compressor.wgate", |
| "model.transformer.layers.23.ffn.shared_experts.w1", |
| "model.transformer.layers.23.ffn.shared_experts.w2", |
| "model.transformer.layers.23.ffn.shared_experts.w3", |
| "model.transformer.layers.24.attn.compressor.wkv", |
| "model.transformer.layers.24.attn.compressor.wgate", |
| "model.transformer.layers.24.attn.indexer.wq_b", |
| "model.transformer.layers.24.attn.indexer.weights_proj", |
| "model.transformer.layers.24.attn.indexer.compressor.wkv", |
| "model.transformer.layers.24.attn.indexer.compressor.wgate", |
| "model.transformer.layers.24.ffn.shared_experts.w1", |
| "model.transformer.layers.24.ffn.shared_experts.w2", |
| "model.transformer.layers.24.ffn.shared_experts.w3", |
| "model.transformer.layers.25.attn.compressor.wkv", |
| "model.transformer.layers.25.attn.compressor.wgate", |
| "model.transformer.layers.25.ffn.shared_experts.w1", |
| "model.transformer.layers.25.ffn.shared_experts.w2", |
| "model.transformer.layers.25.ffn.shared_experts.w3", |
| "model.transformer.layers.26.attn.compressor.wkv", |
| "model.transformer.layers.26.attn.compressor.wgate", |
| "model.transformer.layers.26.attn.indexer.wq_b", |
| "model.transformer.layers.26.attn.indexer.weights_proj", |
| "model.transformer.layers.26.attn.indexer.compressor.wkv", |
| "model.transformer.layers.26.attn.indexer.compressor.wgate", |
| "model.transformer.layers.26.ffn.shared_experts.w1", |
| "model.transformer.layers.26.ffn.shared_experts.w2", |
| "model.transformer.layers.26.ffn.shared_experts.w3", |
| "model.transformer.layers.27.attn.compressor.wkv", |
| "model.transformer.layers.27.attn.compressor.wgate", |
| "model.transformer.layers.27.ffn.shared_experts.w1", |
| "model.transformer.layers.27.ffn.shared_experts.w2", |
| "model.transformer.layers.27.ffn.shared_experts.w3", |
| "model.transformer.layers.28.attn.compressor.wkv", |
| "model.transformer.layers.28.attn.compressor.wgate", |
| "model.transformer.layers.28.attn.indexer.wq_b", |
| "model.transformer.layers.28.attn.indexer.weights_proj", |
| "model.transformer.layers.28.attn.indexer.compressor.wkv", |
| "model.transformer.layers.28.attn.indexer.compressor.wgate", |
| "model.transformer.layers.28.ffn.shared_experts.w1", |
| "model.transformer.layers.28.ffn.shared_experts.w2", |
| "model.transformer.layers.28.ffn.shared_experts.w3", |
| "model.transformer.layers.29.attn.compressor.wkv", |
| "model.transformer.layers.29.attn.compressor.wgate", |
| "model.transformer.layers.29.ffn.shared_experts.w1", |
| "model.transformer.layers.29.ffn.shared_experts.w2", |
| "model.transformer.layers.29.ffn.shared_experts.w3", |
| "model.transformer.layers.30.attn.compressor.wkv", |
| "model.transformer.layers.30.attn.compressor.wgate", |
| "model.transformer.layers.30.attn.indexer.wq_b", |
| "model.transformer.layers.30.attn.indexer.weights_proj", |
| "model.transformer.layers.30.attn.indexer.compressor.wkv", |
| "model.transformer.layers.30.attn.indexer.compressor.wgate", |
| "model.transformer.layers.30.ffn.shared_experts.w1", |
| "model.transformer.layers.30.ffn.shared_experts.w2", |
| "model.transformer.layers.30.ffn.shared_experts.w3", |
| "model.transformer.layers.31.attn.compressor.wkv", |
| "model.transformer.layers.31.attn.compressor.wgate", |
| "model.transformer.layers.31.ffn.shared_experts.w1", |
| "model.transformer.layers.31.ffn.shared_experts.w2", |
| "model.transformer.layers.31.ffn.shared_experts.w3", |
| "model.transformer.layers.32.attn.compressor.wkv", |
| "model.transformer.layers.32.attn.compressor.wgate", |
| "model.transformer.layers.32.attn.indexer.wq_b", |
| "model.transformer.layers.32.attn.indexer.weights_proj", |
| "model.transformer.layers.32.attn.indexer.compressor.wkv", |
| "model.transformer.layers.32.attn.indexer.compressor.wgate", |
| "model.transformer.layers.32.ffn.shared_experts.w1", |
| "model.transformer.layers.32.ffn.shared_experts.w2", |
| "model.transformer.layers.32.ffn.shared_experts.w3", |
| "model.transformer.layers.33.attn.compressor.wkv", |
| "model.transformer.layers.33.attn.compressor.wgate", |
| "model.transformer.layers.33.ffn.shared_experts.w1", |
| "model.transformer.layers.33.ffn.shared_experts.w2", |
| "model.transformer.layers.33.ffn.shared_experts.w3", |
| "model.transformer.layers.34.attn.compressor.wkv", |
| "model.transformer.layers.34.attn.compressor.wgate", |
| "model.transformer.layers.34.attn.indexer.wq_b", |
| "model.transformer.layers.34.attn.indexer.weights_proj", |
| "model.transformer.layers.34.attn.indexer.compressor.wkv", |
| "model.transformer.layers.34.attn.indexer.compressor.wgate", |
| "model.transformer.layers.34.ffn.shared_experts.w1", |
| "model.transformer.layers.34.ffn.shared_experts.w2", |
| "model.transformer.layers.34.ffn.shared_experts.w3", |
| "model.transformer.layers.35.attn.compressor.wkv", |
| "model.transformer.layers.35.attn.compressor.wgate", |
| "model.transformer.layers.35.ffn.shared_experts.w1", |
| "model.transformer.layers.35.ffn.shared_experts.w2", |
| "model.transformer.layers.35.ffn.shared_experts.w3", |
| "model.transformer.layers.36.attn.compressor.wkv", |
| "model.transformer.layers.36.attn.compressor.wgate", |
| "model.transformer.layers.36.attn.indexer.wq_b", |
| "model.transformer.layers.36.attn.indexer.weights_proj", |
| "model.transformer.layers.36.attn.indexer.compressor.wkv", |
| "model.transformer.layers.36.attn.indexer.compressor.wgate", |
| "model.transformer.layers.36.ffn.shared_experts.w1", |
| "model.transformer.layers.36.ffn.shared_experts.w2", |
| "model.transformer.layers.36.ffn.shared_experts.w3", |
| "model.transformer.layers.37.attn.compressor.wkv", |
| "model.transformer.layers.37.attn.compressor.wgate", |
| "model.transformer.layers.37.ffn.shared_experts.w1", |
| "model.transformer.layers.37.ffn.shared_experts.w2", |
| "model.transformer.layers.37.ffn.shared_experts.w3", |
| "model.transformer.layers.38.attn.compressor.wkv", |
| "model.transformer.layers.38.attn.compressor.wgate", |
| "model.transformer.layers.38.attn.indexer.wq_b", |
| "model.transformer.layers.38.attn.indexer.weights_proj", |
| "model.transformer.layers.38.attn.indexer.compressor.wkv", |
| "model.transformer.layers.38.attn.indexer.compressor.wgate", |
| "model.transformer.layers.38.ffn.shared_experts.w1", |
| "model.transformer.layers.38.ffn.shared_experts.w2", |
| "model.transformer.layers.38.ffn.shared_experts.w3", |
| "model.transformer.layers.39.attn.compressor.wkv", |
| "model.transformer.layers.39.attn.compressor.wgate", |
| "model.transformer.layers.39.ffn.shared_experts.w1", |
| "model.transformer.layers.39.ffn.shared_experts.w2", |
| "model.transformer.layers.39.ffn.shared_experts.w3", |
| "model.transformer.layers.40.attn.compressor.wkv", |
| "model.transformer.layers.40.attn.compressor.wgate", |
| "model.transformer.layers.40.attn.indexer.wq_b", |
| "model.transformer.layers.40.attn.indexer.weights_proj", |
| "model.transformer.layers.40.attn.indexer.compressor.wkv", |
| "model.transformer.layers.40.attn.indexer.compressor.wgate", |
| "model.transformer.layers.40.ffn.shared_experts.w1", |
| "model.transformer.layers.40.ffn.shared_experts.w2", |
| "model.transformer.layers.40.ffn.shared_experts.w3", |
| "model.transformer.layers.41.attn.compressor.wkv", |
| "model.transformer.layers.41.attn.compressor.wgate", |
| "model.transformer.layers.41.ffn.shared_experts.w1", |
| "model.transformer.layers.41.ffn.shared_experts.w2", |
| "model.transformer.layers.41.ffn.shared_experts.w3", |
| "model.transformer.layers.42.attn.compressor.wkv", |
| "model.transformer.layers.42.attn.compressor.wgate", |
| "model.transformer.layers.42.attn.indexer.wq_b", |
| "model.transformer.layers.42.attn.indexer.weights_proj", |
| "model.transformer.layers.42.attn.indexer.compressor.wkv", |
| "model.transformer.layers.42.attn.indexer.compressor.wgate", |
| "model.transformer.layers.42.ffn.shared_experts.w1", |
| "model.transformer.layers.42.ffn.shared_experts.w2", |
| "model.transformer.layers.42.ffn.shared_experts.w3", |
| "model.transformer.mtp.0.attn.wq_a", |
| "model.transformer.mtp.0.attn.wq_b", |
| "model.transformer.mtp.0.attn.wkv", |
| "model.transformer.mtp.0.attn.wo_a", |
| "model.transformer.mtp.0.attn.wo_b", |
| "model.transformer.mtp.0.ffn.experts.0.w1", |
| "model.transformer.mtp.0.ffn.experts.0.w2", |
| "model.transformer.mtp.0.ffn.experts.0.w3", |
| "model.transformer.mtp.0.ffn.experts.1.w1", |
| "model.transformer.mtp.0.ffn.experts.1.w2", |
| "model.transformer.mtp.0.ffn.experts.1.w3", |
| "model.transformer.mtp.0.ffn.experts.2.w1", |
| "model.transformer.mtp.0.ffn.experts.2.w2", |
| "model.transformer.mtp.0.ffn.experts.2.w3", |
| "model.transformer.mtp.0.ffn.experts.3.w1", |
| "model.transformer.mtp.0.ffn.experts.3.w2", |
| "model.transformer.mtp.0.ffn.experts.3.w3", |
| "model.transformer.mtp.0.ffn.experts.4.w1", |
| "model.transformer.mtp.0.ffn.experts.4.w2", |
| "model.transformer.mtp.0.ffn.experts.4.w3", |
| "model.transformer.mtp.0.ffn.experts.5.w1", |
| "model.transformer.mtp.0.ffn.experts.5.w2", |
| "model.transformer.mtp.0.ffn.experts.5.w3", |
| "model.transformer.mtp.0.ffn.experts.6.w1", |
| "model.transformer.mtp.0.ffn.experts.6.w2", |
| "model.transformer.mtp.0.ffn.experts.6.w3", |
| "model.transformer.mtp.0.ffn.experts.7.w1", |
| "model.transformer.mtp.0.ffn.experts.7.w2", |
| "model.transformer.mtp.0.ffn.experts.7.w3", |
| "model.transformer.mtp.0.ffn.experts.8.w1", |
| "model.transformer.mtp.0.ffn.experts.8.w2", |
| "model.transformer.mtp.0.ffn.experts.8.w3", |
| "model.transformer.mtp.0.ffn.experts.9.w1", |
| "model.transformer.mtp.0.ffn.experts.9.w2", |
| "model.transformer.mtp.0.ffn.experts.9.w3", |
| "model.transformer.mtp.0.ffn.experts.10.w1", |
| "model.transformer.mtp.0.ffn.experts.10.w2", |
| "model.transformer.mtp.0.ffn.experts.10.w3", |
| "model.transformer.mtp.0.ffn.experts.11.w1", |
| "model.transformer.mtp.0.ffn.experts.11.w2", |
| "model.transformer.mtp.0.ffn.experts.11.w3", |
| "model.transformer.mtp.0.ffn.experts.12.w1", |
| "model.transformer.mtp.0.ffn.experts.12.w2", |
| "model.transformer.mtp.0.ffn.experts.12.w3", |
| "model.transformer.mtp.0.ffn.experts.13.w1", |
| "model.transformer.mtp.0.ffn.experts.13.w2", |
| "model.transformer.mtp.0.ffn.experts.13.w3", |
| "model.transformer.mtp.0.ffn.experts.14.w1", |
| "model.transformer.mtp.0.ffn.experts.14.w2", |
| "model.transformer.mtp.0.ffn.experts.14.w3", |
| "model.transformer.mtp.0.ffn.experts.15.w1", |
| "model.transformer.mtp.0.ffn.experts.15.w2", |
| "model.transformer.mtp.0.ffn.experts.15.w3", |
| "model.transformer.mtp.0.ffn.experts.16.w1", |
| "model.transformer.mtp.0.ffn.experts.16.w2", |
| "model.transformer.mtp.0.ffn.experts.16.w3", |
| "model.transformer.mtp.0.ffn.experts.17.w1", |
| "model.transformer.mtp.0.ffn.experts.17.w2", |
| "model.transformer.mtp.0.ffn.experts.17.w3", |
| "model.transformer.mtp.0.ffn.experts.18.w1", |
| "model.transformer.mtp.0.ffn.experts.18.w2", |
| "model.transformer.mtp.0.ffn.experts.18.w3", |
| "model.transformer.mtp.0.ffn.experts.19.w1", |
| "model.transformer.mtp.0.ffn.experts.19.w2", |
| "model.transformer.mtp.0.ffn.experts.19.w3", |
| "model.transformer.mtp.0.ffn.experts.20.w1", |
| "model.transformer.mtp.0.ffn.experts.20.w2", |
| "model.transformer.mtp.0.ffn.experts.20.w3", |
| "model.transformer.mtp.0.ffn.experts.21.w1", |
| "model.transformer.mtp.0.ffn.experts.21.w2", |
| "model.transformer.mtp.0.ffn.experts.21.w3", |
| "model.transformer.mtp.0.ffn.experts.22.w1", |
| "model.transformer.mtp.0.ffn.experts.22.w2", |
| "model.transformer.mtp.0.ffn.experts.22.w3", |
| "model.transformer.mtp.0.ffn.experts.23.w1", |
| "model.transformer.mtp.0.ffn.experts.23.w2", |
| "model.transformer.mtp.0.ffn.experts.23.w3", |
| "model.transformer.mtp.0.ffn.experts.24.w1", |
| "model.transformer.mtp.0.ffn.experts.24.w2", |
| "model.transformer.mtp.0.ffn.experts.24.w3", |
| "model.transformer.mtp.0.ffn.experts.25.w1", |
| "model.transformer.mtp.0.ffn.experts.25.w2", |
| "model.transformer.mtp.0.ffn.experts.25.w3", |
| "model.transformer.mtp.0.ffn.experts.26.w1", |
| "model.transformer.mtp.0.ffn.experts.26.w2", |
| "model.transformer.mtp.0.ffn.experts.26.w3", |
| "model.transformer.mtp.0.ffn.experts.27.w1", |
| "model.transformer.mtp.0.ffn.experts.27.w2", |
| "model.transformer.mtp.0.ffn.experts.27.w3", |
| "model.transformer.mtp.0.ffn.experts.28.w1", |
| "model.transformer.mtp.0.ffn.experts.28.w2", |
| "model.transformer.mtp.0.ffn.experts.28.w3", |
| "model.transformer.mtp.0.ffn.experts.29.w1", |
| "model.transformer.mtp.0.ffn.experts.29.w2", |
| "model.transformer.mtp.0.ffn.experts.29.w3", |
| "model.transformer.mtp.0.ffn.experts.30.w1", |
| "model.transformer.mtp.0.ffn.experts.30.w2", |
| "model.transformer.mtp.0.ffn.experts.30.w3", |
| "model.transformer.mtp.0.ffn.experts.31.w1", |
| "model.transformer.mtp.0.ffn.experts.31.w2", |
| "model.transformer.mtp.0.ffn.experts.31.w3", |
| "model.transformer.mtp.0.ffn.experts.32.w1", |
| "model.transformer.mtp.0.ffn.experts.32.w2", |
| "model.transformer.mtp.0.ffn.experts.32.w3", |
| "model.transformer.mtp.0.ffn.experts.33.w1", |
| "model.transformer.mtp.0.ffn.experts.33.w2", |
| "model.transformer.mtp.0.ffn.experts.33.w3", |
| "model.transformer.mtp.0.ffn.experts.34.w1", |
| "model.transformer.mtp.0.ffn.experts.34.w2", |
| "model.transformer.mtp.0.ffn.experts.34.w3", |
| "model.transformer.mtp.0.ffn.experts.35.w1", |
| "model.transformer.mtp.0.ffn.experts.35.w2", |
| "model.transformer.mtp.0.ffn.experts.35.w3", |
| "model.transformer.mtp.0.ffn.experts.36.w1", |
| "model.transformer.mtp.0.ffn.experts.36.w2", |
| "model.transformer.mtp.0.ffn.experts.36.w3", |
| "model.transformer.mtp.0.ffn.experts.37.w1", |
| "model.transformer.mtp.0.ffn.experts.37.w2", |
| "model.transformer.mtp.0.ffn.experts.37.w3", |
| "model.transformer.mtp.0.ffn.experts.38.w1", |
| "model.transformer.mtp.0.ffn.experts.38.w2", |
| "model.transformer.mtp.0.ffn.experts.38.w3", |
| "model.transformer.mtp.0.ffn.experts.39.w1", |
| "model.transformer.mtp.0.ffn.experts.39.w2", |
| "model.transformer.mtp.0.ffn.experts.39.w3", |
| "model.transformer.mtp.0.ffn.experts.40.w1", |
| "model.transformer.mtp.0.ffn.experts.40.w2", |
| "model.transformer.mtp.0.ffn.experts.40.w3", |
| "model.transformer.mtp.0.ffn.experts.41.w1", |
| "model.transformer.mtp.0.ffn.experts.41.w2", |
| "model.transformer.mtp.0.ffn.experts.41.w3", |
| "model.transformer.mtp.0.ffn.experts.42.w1", |
| "model.transformer.mtp.0.ffn.experts.42.w2", |
| "model.transformer.mtp.0.ffn.experts.42.w3", |
| "model.transformer.mtp.0.ffn.experts.43.w1", |
| "model.transformer.mtp.0.ffn.experts.43.w2", |
| "model.transformer.mtp.0.ffn.experts.43.w3", |
| "model.transformer.mtp.0.ffn.experts.44.w1", |
| "model.transformer.mtp.0.ffn.experts.44.w2", |
| "model.transformer.mtp.0.ffn.experts.44.w3", |
| "model.transformer.mtp.0.ffn.experts.45.w1", |
| "model.transformer.mtp.0.ffn.experts.45.w2", |
| "model.transformer.mtp.0.ffn.experts.45.w3", |
| "model.transformer.mtp.0.ffn.experts.46.w1", |
| "model.transformer.mtp.0.ffn.experts.46.w2", |
| "model.transformer.mtp.0.ffn.experts.46.w3", |
| "model.transformer.mtp.0.ffn.experts.47.w1", |
| "model.transformer.mtp.0.ffn.experts.47.w2", |
| "model.transformer.mtp.0.ffn.experts.47.w3", |
| "model.transformer.mtp.0.ffn.experts.48.w1", |
| "model.transformer.mtp.0.ffn.experts.48.w2", |
| "model.transformer.mtp.0.ffn.experts.48.w3", |
| "model.transformer.mtp.0.ffn.experts.49.w1", |
| "model.transformer.mtp.0.ffn.experts.49.w2", |
| "model.transformer.mtp.0.ffn.experts.49.w3", |
| "model.transformer.mtp.0.ffn.experts.50.w1", |
| "model.transformer.mtp.0.ffn.experts.50.w2", |
| "model.transformer.mtp.0.ffn.experts.50.w3", |
| "model.transformer.mtp.0.ffn.experts.51.w1", |
| "model.transformer.mtp.0.ffn.experts.51.w2", |
| "model.transformer.mtp.0.ffn.experts.51.w3", |
| "model.transformer.mtp.0.ffn.experts.52.w1", |
| "model.transformer.mtp.0.ffn.experts.52.w2", |
| "model.transformer.mtp.0.ffn.experts.52.w3", |
| "model.transformer.mtp.0.ffn.experts.53.w1", |
| "model.transformer.mtp.0.ffn.experts.53.w2", |
| "model.transformer.mtp.0.ffn.experts.53.w3", |
| "model.transformer.mtp.0.ffn.experts.54.w1", |
| "model.transformer.mtp.0.ffn.experts.54.w2", |
| "model.transformer.mtp.0.ffn.experts.54.w3", |
| "model.transformer.mtp.0.ffn.experts.55.w1", |
| "model.transformer.mtp.0.ffn.experts.55.w2", |
| "model.transformer.mtp.0.ffn.experts.55.w3", |
| "model.transformer.mtp.0.ffn.experts.56.w1", |
| "model.transformer.mtp.0.ffn.experts.56.w2", |
| "model.transformer.mtp.0.ffn.experts.56.w3", |
| "model.transformer.mtp.0.ffn.experts.57.w1", |
| "model.transformer.mtp.0.ffn.experts.57.w2", |
| "model.transformer.mtp.0.ffn.experts.57.w3", |
| "model.transformer.mtp.0.ffn.experts.58.w1", |
| "model.transformer.mtp.0.ffn.experts.58.w2", |
| "model.transformer.mtp.0.ffn.experts.58.w3", |
| "model.transformer.mtp.0.ffn.experts.59.w1", |
| "model.transformer.mtp.0.ffn.experts.59.w2", |
| "model.transformer.mtp.0.ffn.experts.59.w3", |
| "model.transformer.mtp.0.ffn.experts.60.w1", |
| "model.transformer.mtp.0.ffn.experts.60.w2", |
| "model.transformer.mtp.0.ffn.experts.60.w3", |
| "model.transformer.mtp.0.ffn.experts.61.w1", |
| "model.transformer.mtp.0.ffn.experts.61.w2", |
| "model.transformer.mtp.0.ffn.experts.61.w3", |
| "model.transformer.mtp.0.ffn.experts.62.w1", |
| "model.transformer.mtp.0.ffn.experts.62.w2", |
| "model.transformer.mtp.0.ffn.experts.62.w3", |
| "model.transformer.mtp.0.ffn.experts.63.w1", |
| "model.transformer.mtp.0.ffn.experts.63.w2", |
| "model.transformer.mtp.0.ffn.experts.63.w3", |
| "model.transformer.mtp.0.ffn.experts.64.w1", |
| "model.transformer.mtp.0.ffn.experts.64.w2", |
| "model.transformer.mtp.0.ffn.experts.64.w3", |
| "model.transformer.mtp.0.ffn.experts.65.w1", |
| "model.transformer.mtp.0.ffn.experts.65.w2", |
| "model.transformer.mtp.0.ffn.experts.65.w3", |
| "model.transformer.mtp.0.ffn.experts.66.w1", |
| "model.transformer.mtp.0.ffn.experts.66.w2", |
| "model.transformer.mtp.0.ffn.experts.66.w3", |
| "model.transformer.mtp.0.ffn.experts.67.w1", |
| "model.transformer.mtp.0.ffn.experts.67.w2", |
| "model.transformer.mtp.0.ffn.experts.67.w3", |
| "model.transformer.mtp.0.ffn.experts.68.w1", |
| "model.transformer.mtp.0.ffn.experts.68.w2", |
| "model.transformer.mtp.0.ffn.experts.68.w3", |
| "model.transformer.mtp.0.ffn.experts.69.w1", |
| "model.transformer.mtp.0.ffn.experts.69.w2", |
| "model.transformer.mtp.0.ffn.experts.69.w3", |
| "model.transformer.mtp.0.ffn.experts.70.w1", |
| "model.transformer.mtp.0.ffn.experts.70.w2", |
| "model.transformer.mtp.0.ffn.experts.70.w3", |
| "model.transformer.mtp.0.ffn.experts.71.w1", |
| "model.transformer.mtp.0.ffn.experts.71.w2", |
| "model.transformer.mtp.0.ffn.experts.71.w3", |
| "model.transformer.mtp.0.ffn.experts.72.w1", |
| "model.transformer.mtp.0.ffn.experts.72.w2", |
| "model.transformer.mtp.0.ffn.experts.72.w3", |
| "model.transformer.mtp.0.ffn.experts.73.w1", |
| "model.transformer.mtp.0.ffn.experts.73.w2", |
| "model.transformer.mtp.0.ffn.experts.73.w3", |
| "model.transformer.mtp.0.ffn.experts.74.w1", |
| "model.transformer.mtp.0.ffn.experts.74.w2", |
| "model.transformer.mtp.0.ffn.experts.74.w3", |
| "model.transformer.mtp.0.ffn.experts.75.w1", |
| "model.transformer.mtp.0.ffn.experts.75.w2", |
| "model.transformer.mtp.0.ffn.experts.75.w3", |
| "model.transformer.mtp.0.ffn.experts.76.w1", |
| "model.transformer.mtp.0.ffn.experts.76.w2", |
| "model.transformer.mtp.0.ffn.experts.76.w3", |
| "model.transformer.mtp.0.ffn.experts.77.w1", |
| "model.transformer.mtp.0.ffn.experts.77.w2", |
| "model.transformer.mtp.0.ffn.experts.77.w3", |
| "model.transformer.mtp.0.ffn.experts.78.w1", |
| "model.transformer.mtp.0.ffn.experts.78.w2", |
| "model.transformer.mtp.0.ffn.experts.78.w3", |
| "model.transformer.mtp.0.ffn.experts.79.w1", |
| "model.transformer.mtp.0.ffn.experts.79.w2", |
| "model.transformer.mtp.0.ffn.experts.79.w3", |
| "model.transformer.mtp.0.ffn.experts.80.w1", |
| "model.transformer.mtp.0.ffn.experts.80.w2", |
| "model.transformer.mtp.0.ffn.experts.80.w3", |
| "model.transformer.mtp.0.ffn.experts.81.w1", |
| "model.transformer.mtp.0.ffn.experts.81.w2", |
| "model.transformer.mtp.0.ffn.experts.81.w3", |
| "model.transformer.mtp.0.ffn.experts.82.w1", |
| "model.transformer.mtp.0.ffn.experts.82.w2", |
| "model.transformer.mtp.0.ffn.experts.82.w3", |
| "model.transformer.mtp.0.ffn.experts.83.w1", |
| "model.transformer.mtp.0.ffn.experts.83.w2", |
| "model.transformer.mtp.0.ffn.experts.83.w3", |
| "model.transformer.mtp.0.ffn.experts.84.w1", |
| "model.transformer.mtp.0.ffn.experts.84.w2", |
| "model.transformer.mtp.0.ffn.experts.84.w3", |
| "model.transformer.mtp.0.ffn.experts.85.w1", |
| "model.transformer.mtp.0.ffn.experts.85.w2", |
| "model.transformer.mtp.0.ffn.experts.85.w3", |
| "model.transformer.mtp.0.ffn.experts.86.w1", |
| "model.transformer.mtp.0.ffn.experts.86.w2", |
| "model.transformer.mtp.0.ffn.experts.86.w3", |
| "model.transformer.mtp.0.ffn.experts.87.w1", |
| "model.transformer.mtp.0.ffn.experts.87.w2", |
| "model.transformer.mtp.0.ffn.experts.87.w3", |
| "model.transformer.mtp.0.ffn.experts.88.w1", |
| "model.transformer.mtp.0.ffn.experts.88.w2", |
| "model.transformer.mtp.0.ffn.experts.88.w3", |
| "model.transformer.mtp.0.ffn.experts.89.w1", |
| "model.transformer.mtp.0.ffn.experts.89.w2", |
| "model.transformer.mtp.0.ffn.experts.89.w3", |
| "model.transformer.mtp.0.ffn.experts.90.w1", |
| "model.transformer.mtp.0.ffn.experts.90.w2", |
| "model.transformer.mtp.0.ffn.experts.90.w3", |
| "model.transformer.mtp.0.ffn.experts.91.w1", |
| "model.transformer.mtp.0.ffn.experts.91.w2", |
| "model.transformer.mtp.0.ffn.experts.91.w3", |
| "model.transformer.mtp.0.ffn.experts.92.w1", |
| "model.transformer.mtp.0.ffn.experts.92.w2", |
| "model.transformer.mtp.0.ffn.experts.92.w3", |
| "model.transformer.mtp.0.ffn.experts.93.w1", |
| "model.transformer.mtp.0.ffn.experts.93.w2", |
| "model.transformer.mtp.0.ffn.experts.93.w3", |
| "model.transformer.mtp.0.ffn.experts.94.w1", |
| "model.transformer.mtp.0.ffn.experts.94.w2", |
| "model.transformer.mtp.0.ffn.experts.94.w3", |
| "model.transformer.mtp.0.ffn.experts.95.w1", |
| "model.transformer.mtp.0.ffn.experts.95.w2", |
| "model.transformer.mtp.0.ffn.experts.95.w3", |
| "model.transformer.mtp.0.ffn.experts.96.w1", |
| "model.transformer.mtp.0.ffn.experts.96.w2", |
| "model.transformer.mtp.0.ffn.experts.96.w3", |
| "model.transformer.mtp.0.ffn.experts.97.w1", |
| "model.transformer.mtp.0.ffn.experts.97.w2", |
| "model.transformer.mtp.0.ffn.experts.97.w3", |
| "model.transformer.mtp.0.ffn.experts.98.w1", |
| "model.transformer.mtp.0.ffn.experts.98.w2", |
| "model.transformer.mtp.0.ffn.experts.98.w3", |
| "model.transformer.mtp.0.ffn.experts.99.w1", |
| "model.transformer.mtp.0.ffn.experts.99.w2", |
| "model.transformer.mtp.0.ffn.experts.99.w3", |
| "model.transformer.mtp.0.ffn.experts.100.w1", |
| "model.transformer.mtp.0.ffn.experts.100.w2", |
| "model.transformer.mtp.0.ffn.experts.100.w3", |
| "model.transformer.mtp.0.ffn.experts.101.w1", |
| "model.transformer.mtp.0.ffn.experts.101.w2", |
| "model.transformer.mtp.0.ffn.experts.101.w3", |
| "model.transformer.mtp.0.ffn.experts.102.w1", |
| "model.transformer.mtp.0.ffn.experts.102.w2", |
| "model.transformer.mtp.0.ffn.experts.102.w3", |
| "model.transformer.mtp.0.ffn.experts.103.w1", |
| "model.transformer.mtp.0.ffn.experts.103.w2", |
| "model.transformer.mtp.0.ffn.experts.103.w3", |
| "model.transformer.mtp.0.ffn.experts.104.w1", |
| "model.transformer.mtp.0.ffn.experts.104.w2", |
| "model.transformer.mtp.0.ffn.experts.104.w3", |
| "model.transformer.mtp.0.ffn.experts.105.w1", |
| "model.transformer.mtp.0.ffn.experts.105.w2", |
| "model.transformer.mtp.0.ffn.experts.105.w3", |
| "model.transformer.mtp.0.ffn.experts.106.w1", |
| "model.transformer.mtp.0.ffn.experts.106.w2", |
| "model.transformer.mtp.0.ffn.experts.106.w3", |
| "model.transformer.mtp.0.ffn.experts.107.w1", |
| "model.transformer.mtp.0.ffn.experts.107.w2", |
| "model.transformer.mtp.0.ffn.experts.107.w3", |
| "model.transformer.mtp.0.ffn.experts.108.w1", |
| "model.transformer.mtp.0.ffn.experts.108.w2", |
| "model.transformer.mtp.0.ffn.experts.108.w3", |
| "model.transformer.mtp.0.ffn.experts.109.w1", |
| "model.transformer.mtp.0.ffn.experts.109.w2", |
| "model.transformer.mtp.0.ffn.experts.109.w3", |
| "model.transformer.mtp.0.ffn.experts.110.w1", |
| "model.transformer.mtp.0.ffn.experts.110.w2", |
| "model.transformer.mtp.0.ffn.experts.110.w3", |
| "model.transformer.mtp.0.ffn.experts.111.w1", |
| "model.transformer.mtp.0.ffn.experts.111.w2", |
| "model.transformer.mtp.0.ffn.experts.111.w3", |
| "model.transformer.mtp.0.ffn.experts.112.w1", |
| "model.transformer.mtp.0.ffn.experts.112.w2", |
| "model.transformer.mtp.0.ffn.experts.112.w3", |
| "model.transformer.mtp.0.ffn.experts.113.w1", |
| "model.transformer.mtp.0.ffn.experts.113.w2", |
| "model.transformer.mtp.0.ffn.experts.113.w3", |
| "model.transformer.mtp.0.ffn.experts.114.w1", |
| "model.transformer.mtp.0.ffn.experts.114.w2", |
| "model.transformer.mtp.0.ffn.experts.114.w3", |
| "model.transformer.mtp.0.ffn.experts.115.w1", |
| "model.transformer.mtp.0.ffn.experts.115.w2", |
| "model.transformer.mtp.0.ffn.experts.115.w3", |
| "model.transformer.mtp.0.ffn.experts.116.w1", |
| "model.transformer.mtp.0.ffn.experts.116.w2", |
| "model.transformer.mtp.0.ffn.experts.116.w3", |
| "model.transformer.mtp.0.ffn.experts.117.w1", |
| "model.transformer.mtp.0.ffn.experts.117.w2", |
| "model.transformer.mtp.0.ffn.experts.117.w3", |
| "model.transformer.mtp.0.ffn.experts.118.w1", |
| "model.transformer.mtp.0.ffn.experts.118.w2", |
| "model.transformer.mtp.0.ffn.experts.118.w3", |
| "model.transformer.mtp.0.ffn.experts.119.w1", |
| "model.transformer.mtp.0.ffn.experts.119.w2", |
| "model.transformer.mtp.0.ffn.experts.119.w3", |
| "model.transformer.mtp.0.ffn.experts.120.w1", |
| "model.transformer.mtp.0.ffn.experts.120.w2", |
| "model.transformer.mtp.0.ffn.experts.120.w3", |
| "model.transformer.mtp.0.ffn.experts.121.w1", |
| "model.transformer.mtp.0.ffn.experts.121.w2", |
| "model.transformer.mtp.0.ffn.experts.121.w3", |
| "model.transformer.mtp.0.ffn.experts.122.w1", |
| "model.transformer.mtp.0.ffn.experts.122.w2", |
| "model.transformer.mtp.0.ffn.experts.122.w3", |
| "model.transformer.mtp.0.ffn.experts.123.w1", |
| "model.transformer.mtp.0.ffn.experts.123.w2", |
| "model.transformer.mtp.0.ffn.experts.123.w3", |
| "model.transformer.mtp.0.ffn.experts.124.w1", |
| "model.transformer.mtp.0.ffn.experts.124.w2", |
| "model.transformer.mtp.0.ffn.experts.124.w3", |
| "model.transformer.mtp.0.ffn.experts.125.w1", |
| "model.transformer.mtp.0.ffn.experts.125.w2", |
| "model.transformer.mtp.0.ffn.experts.125.w3", |
| "model.transformer.mtp.0.ffn.experts.126.w1", |
| "model.transformer.mtp.0.ffn.experts.126.w2", |
| "model.transformer.mtp.0.ffn.experts.126.w3", |
| "model.transformer.mtp.0.ffn.experts.127.w1", |
| "model.transformer.mtp.0.ffn.experts.127.w2", |
| "model.transformer.mtp.0.ffn.experts.127.w3", |
| "model.transformer.mtp.0.ffn.experts.128.w1", |
| "model.transformer.mtp.0.ffn.experts.128.w2", |
| "model.transformer.mtp.0.ffn.experts.128.w3", |
| "model.transformer.mtp.0.ffn.experts.129.w1", |
| "model.transformer.mtp.0.ffn.experts.129.w2", |
| "model.transformer.mtp.0.ffn.experts.129.w3", |
| "model.transformer.mtp.0.ffn.experts.130.w1", |
| "model.transformer.mtp.0.ffn.experts.130.w2", |
| "model.transformer.mtp.0.ffn.experts.130.w3", |
| "model.transformer.mtp.0.ffn.experts.131.w1", |
| "model.transformer.mtp.0.ffn.experts.131.w2", |
| "model.transformer.mtp.0.ffn.experts.131.w3", |
| "model.transformer.mtp.0.ffn.experts.132.w1", |
| "model.transformer.mtp.0.ffn.experts.132.w2", |
| "model.transformer.mtp.0.ffn.experts.132.w3", |
| "model.transformer.mtp.0.ffn.experts.133.w1", |
| "model.transformer.mtp.0.ffn.experts.133.w2", |
| "model.transformer.mtp.0.ffn.experts.133.w3", |
| "model.transformer.mtp.0.ffn.experts.134.w1", |
| "model.transformer.mtp.0.ffn.experts.134.w2", |
| "model.transformer.mtp.0.ffn.experts.134.w3", |
| "model.transformer.mtp.0.ffn.experts.135.w1", |
| "model.transformer.mtp.0.ffn.experts.135.w2", |
| "model.transformer.mtp.0.ffn.experts.135.w3", |
| "model.transformer.mtp.0.ffn.experts.136.w1", |
| "model.transformer.mtp.0.ffn.experts.136.w2", |
| "model.transformer.mtp.0.ffn.experts.136.w3", |
| "model.transformer.mtp.0.ffn.experts.137.w1", |
| "model.transformer.mtp.0.ffn.experts.137.w2", |
| "model.transformer.mtp.0.ffn.experts.137.w3", |
| "model.transformer.mtp.0.ffn.experts.138.w1", |
| "model.transformer.mtp.0.ffn.experts.138.w2", |
| "model.transformer.mtp.0.ffn.experts.138.w3", |
| "model.transformer.mtp.0.ffn.experts.139.w1", |
| "model.transformer.mtp.0.ffn.experts.139.w2", |
| "model.transformer.mtp.0.ffn.experts.139.w3", |
| "model.transformer.mtp.0.ffn.experts.140.w1", |
| "model.transformer.mtp.0.ffn.experts.140.w2", |
| "model.transformer.mtp.0.ffn.experts.140.w3", |
| "model.transformer.mtp.0.ffn.experts.141.w1", |
| "model.transformer.mtp.0.ffn.experts.141.w2", |
| "model.transformer.mtp.0.ffn.experts.141.w3", |
| "model.transformer.mtp.0.ffn.experts.142.w1", |
| "model.transformer.mtp.0.ffn.experts.142.w2", |
| "model.transformer.mtp.0.ffn.experts.142.w3", |
| "model.transformer.mtp.0.ffn.experts.143.w1", |
| "model.transformer.mtp.0.ffn.experts.143.w2", |
| "model.transformer.mtp.0.ffn.experts.143.w3", |
| "model.transformer.mtp.0.ffn.experts.144.w1", |
| "model.transformer.mtp.0.ffn.experts.144.w2", |
| "model.transformer.mtp.0.ffn.experts.144.w3", |
| "model.transformer.mtp.0.ffn.experts.145.w1", |
| "model.transformer.mtp.0.ffn.experts.145.w2", |
| "model.transformer.mtp.0.ffn.experts.145.w3", |
| "model.transformer.mtp.0.ffn.experts.146.w1", |
| "model.transformer.mtp.0.ffn.experts.146.w2", |
| "model.transformer.mtp.0.ffn.experts.146.w3", |
| "model.transformer.mtp.0.ffn.experts.147.w1", |
| "model.transformer.mtp.0.ffn.experts.147.w2", |
| "model.transformer.mtp.0.ffn.experts.147.w3", |
| "model.transformer.mtp.0.ffn.experts.148.w1", |
| "model.transformer.mtp.0.ffn.experts.148.w2", |
| "model.transformer.mtp.0.ffn.experts.148.w3", |
| "model.transformer.mtp.0.ffn.experts.149.w1", |
| "model.transformer.mtp.0.ffn.experts.149.w2", |
| "model.transformer.mtp.0.ffn.experts.149.w3", |
| "model.transformer.mtp.0.ffn.experts.150.w1", |
| "model.transformer.mtp.0.ffn.experts.150.w2", |
| "model.transformer.mtp.0.ffn.experts.150.w3", |
| "model.transformer.mtp.0.ffn.experts.151.w1", |
| "model.transformer.mtp.0.ffn.experts.151.w2", |
| "model.transformer.mtp.0.ffn.experts.151.w3", |
| "model.transformer.mtp.0.ffn.experts.152.w1", |
| "model.transformer.mtp.0.ffn.experts.152.w2", |
| "model.transformer.mtp.0.ffn.experts.152.w3", |
| "model.transformer.mtp.0.ffn.experts.153.w1", |
| "model.transformer.mtp.0.ffn.experts.153.w2", |
| "model.transformer.mtp.0.ffn.experts.153.w3", |
| "model.transformer.mtp.0.ffn.experts.154.w1", |
| "model.transformer.mtp.0.ffn.experts.154.w2", |
| "model.transformer.mtp.0.ffn.experts.154.w3", |
| "model.transformer.mtp.0.ffn.experts.155.w1", |
| "model.transformer.mtp.0.ffn.experts.155.w2", |
| "model.transformer.mtp.0.ffn.experts.155.w3", |
| "model.transformer.mtp.0.ffn.experts.156.w1", |
| "model.transformer.mtp.0.ffn.experts.156.w2", |
| "model.transformer.mtp.0.ffn.experts.156.w3", |
| "model.transformer.mtp.0.ffn.experts.157.w1", |
| "model.transformer.mtp.0.ffn.experts.157.w2", |
| "model.transformer.mtp.0.ffn.experts.157.w3", |
| "model.transformer.mtp.0.ffn.experts.158.w1", |
| "model.transformer.mtp.0.ffn.experts.158.w2", |
| "model.transformer.mtp.0.ffn.experts.158.w3", |
| "model.transformer.mtp.0.ffn.experts.159.w1", |
| "model.transformer.mtp.0.ffn.experts.159.w2", |
| "model.transformer.mtp.0.ffn.experts.159.w3", |
| "model.transformer.mtp.0.ffn.experts.160.w1", |
| "model.transformer.mtp.0.ffn.experts.160.w2", |
| "model.transformer.mtp.0.ffn.experts.160.w3", |
| "model.transformer.mtp.0.ffn.experts.161.w1", |
| "model.transformer.mtp.0.ffn.experts.161.w2", |
| "model.transformer.mtp.0.ffn.experts.161.w3", |
| "model.transformer.mtp.0.ffn.experts.162.w1", |
| "model.transformer.mtp.0.ffn.experts.162.w2", |
| "model.transformer.mtp.0.ffn.experts.162.w3", |
| "model.transformer.mtp.0.ffn.experts.163.w1", |
| "model.transformer.mtp.0.ffn.experts.163.w2", |
| "model.transformer.mtp.0.ffn.experts.163.w3", |
| "model.transformer.mtp.0.ffn.experts.164.w1", |
| "model.transformer.mtp.0.ffn.experts.164.w2", |
| "model.transformer.mtp.0.ffn.experts.164.w3", |
| "model.transformer.mtp.0.ffn.experts.165.w1", |
| "model.transformer.mtp.0.ffn.experts.165.w2", |
| "model.transformer.mtp.0.ffn.experts.165.w3", |
| "model.transformer.mtp.0.ffn.experts.166.w1", |
| "model.transformer.mtp.0.ffn.experts.166.w2", |
| "model.transformer.mtp.0.ffn.experts.166.w3", |
| "model.transformer.mtp.0.ffn.experts.167.w1", |
| "model.transformer.mtp.0.ffn.experts.167.w2", |
| "model.transformer.mtp.0.ffn.experts.167.w3", |
| "model.transformer.mtp.0.ffn.experts.168.w1", |
| "model.transformer.mtp.0.ffn.experts.168.w2", |
| "model.transformer.mtp.0.ffn.experts.168.w3", |
| "model.transformer.mtp.0.ffn.experts.169.w1", |
| "model.transformer.mtp.0.ffn.experts.169.w2", |
| "model.transformer.mtp.0.ffn.experts.169.w3", |
| "model.transformer.mtp.0.ffn.experts.170.w1", |
| "model.transformer.mtp.0.ffn.experts.170.w2", |
| "model.transformer.mtp.0.ffn.experts.170.w3", |
| "model.transformer.mtp.0.ffn.experts.171.w1", |
| "model.transformer.mtp.0.ffn.experts.171.w2", |
| "model.transformer.mtp.0.ffn.experts.171.w3", |
| "model.transformer.mtp.0.ffn.experts.172.w1", |
| "model.transformer.mtp.0.ffn.experts.172.w2", |
| "model.transformer.mtp.0.ffn.experts.172.w3", |
| "model.transformer.mtp.0.ffn.experts.173.w1", |
| "model.transformer.mtp.0.ffn.experts.173.w2", |
| "model.transformer.mtp.0.ffn.experts.173.w3", |
| "model.transformer.mtp.0.ffn.experts.174.w1", |
| "model.transformer.mtp.0.ffn.experts.174.w2", |
| "model.transformer.mtp.0.ffn.experts.174.w3", |
| "model.transformer.mtp.0.ffn.experts.175.w1", |
| "model.transformer.mtp.0.ffn.experts.175.w2", |
| "model.transformer.mtp.0.ffn.experts.175.w3", |
| "model.transformer.mtp.0.ffn.experts.176.w1", |
| "model.transformer.mtp.0.ffn.experts.176.w2", |
| "model.transformer.mtp.0.ffn.experts.176.w3", |
| "model.transformer.mtp.0.ffn.experts.177.w1", |
| "model.transformer.mtp.0.ffn.experts.177.w2", |
| "model.transformer.mtp.0.ffn.experts.177.w3", |
| "model.transformer.mtp.0.ffn.experts.178.w1", |
| "model.transformer.mtp.0.ffn.experts.178.w2", |
| "model.transformer.mtp.0.ffn.experts.178.w3", |
| "model.transformer.mtp.0.ffn.experts.179.w1", |
| "model.transformer.mtp.0.ffn.experts.179.w2", |
| "model.transformer.mtp.0.ffn.experts.179.w3", |
| "model.transformer.mtp.0.ffn.experts.180.w1", |
| "model.transformer.mtp.0.ffn.experts.180.w2", |
| "model.transformer.mtp.0.ffn.experts.180.w3", |
| "model.transformer.mtp.0.ffn.experts.181.w1", |
| "model.transformer.mtp.0.ffn.experts.181.w2", |
| "model.transformer.mtp.0.ffn.experts.181.w3", |
| "model.transformer.mtp.0.ffn.experts.182.w1", |
| "model.transformer.mtp.0.ffn.experts.182.w2", |
| "model.transformer.mtp.0.ffn.experts.182.w3", |
| "model.transformer.mtp.0.ffn.experts.183.w1", |
| "model.transformer.mtp.0.ffn.experts.183.w2", |
| "model.transformer.mtp.0.ffn.experts.183.w3", |
| "model.transformer.mtp.0.ffn.experts.184.w1", |
| "model.transformer.mtp.0.ffn.experts.184.w2", |
| "model.transformer.mtp.0.ffn.experts.184.w3", |
| "model.transformer.mtp.0.ffn.experts.185.w1", |
| "model.transformer.mtp.0.ffn.experts.185.w2", |
| "model.transformer.mtp.0.ffn.experts.185.w3", |
| "model.transformer.mtp.0.ffn.experts.186.w1", |
| "model.transformer.mtp.0.ffn.experts.186.w2", |
| "model.transformer.mtp.0.ffn.experts.186.w3", |
| "model.transformer.mtp.0.ffn.experts.187.w1", |
| "model.transformer.mtp.0.ffn.experts.187.w2", |
| "model.transformer.mtp.0.ffn.experts.187.w3", |
| "model.transformer.mtp.0.ffn.experts.188.w1", |
| "model.transformer.mtp.0.ffn.experts.188.w2", |
| "model.transformer.mtp.0.ffn.experts.188.w3", |
| "model.transformer.mtp.0.ffn.experts.189.w1", |
| "model.transformer.mtp.0.ffn.experts.189.w2", |
| "model.transformer.mtp.0.ffn.experts.189.w3", |
| "model.transformer.mtp.0.ffn.experts.190.w1", |
| "model.transformer.mtp.0.ffn.experts.190.w2", |
| "model.transformer.mtp.0.ffn.experts.190.w3", |
| "model.transformer.mtp.0.ffn.experts.191.w1", |
| "model.transformer.mtp.0.ffn.experts.191.w2", |
| "model.transformer.mtp.0.ffn.experts.191.w3", |
| "model.transformer.mtp.0.ffn.experts.192.w1", |
| "model.transformer.mtp.0.ffn.experts.192.w2", |
| "model.transformer.mtp.0.ffn.experts.192.w3", |
| "model.transformer.mtp.0.ffn.experts.193.w1", |
| "model.transformer.mtp.0.ffn.experts.193.w2", |
| "model.transformer.mtp.0.ffn.experts.193.w3", |
| "model.transformer.mtp.0.ffn.experts.194.w1", |
| "model.transformer.mtp.0.ffn.experts.194.w2", |
| "model.transformer.mtp.0.ffn.experts.194.w3", |
| "model.transformer.mtp.0.ffn.experts.195.w1", |
| "model.transformer.mtp.0.ffn.experts.195.w2", |
| "model.transformer.mtp.0.ffn.experts.195.w3", |
| "model.transformer.mtp.0.ffn.experts.196.w1", |
| "model.transformer.mtp.0.ffn.experts.196.w2", |
| "model.transformer.mtp.0.ffn.experts.196.w3", |
| "model.transformer.mtp.0.ffn.experts.197.w1", |
| "model.transformer.mtp.0.ffn.experts.197.w2", |
| "model.transformer.mtp.0.ffn.experts.197.w3", |
| "model.transformer.mtp.0.ffn.experts.198.w1", |
| "model.transformer.mtp.0.ffn.experts.198.w2", |
| "model.transformer.mtp.0.ffn.experts.198.w3", |
| "model.transformer.mtp.0.ffn.experts.199.w1", |
| "model.transformer.mtp.0.ffn.experts.199.w2", |
| "model.transformer.mtp.0.ffn.experts.199.w3", |
| "model.transformer.mtp.0.ffn.experts.200.w1", |
| "model.transformer.mtp.0.ffn.experts.200.w2", |
| "model.transformer.mtp.0.ffn.experts.200.w3", |
| "model.transformer.mtp.0.ffn.experts.201.w1", |
| "model.transformer.mtp.0.ffn.experts.201.w2", |
| "model.transformer.mtp.0.ffn.experts.201.w3", |
| "model.transformer.mtp.0.ffn.experts.202.w1", |
| "model.transformer.mtp.0.ffn.experts.202.w2", |
| "model.transformer.mtp.0.ffn.experts.202.w3", |
| "model.transformer.mtp.0.ffn.experts.203.w1", |
| "model.transformer.mtp.0.ffn.experts.203.w2", |
| "model.transformer.mtp.0.ffn.experts.203.w3", |
| "model.transformer.mtp.0.ffn.experts.204.w1", |
| "model.transformer.mtp.0.ffn.experts.204.w2", |
| "model.transformer.mtp.0.ffn.experts.204.w3", |
| "model.transformer.mtp.0.ffn.experts.205.w1", |
| "model.transformer.mtp.0.ffn.experts.205.w2", |
| "model.transformer.mtp.0.ffn.experts.205.w3", |
| "model.transformer.mtp.0.ffn.experts.206.w1", |
| "model.transformer.mtp.0.ffn.experts.206.w2", |
| "model.transformer.mtp.0.ffn.experts.206.w3", |
| "model.transformer.mtp.0.ffn.experts.207.w1", |
| "model.transformer.mtp.0.ffn.experts.207.w2", |
| "model.transformer.mtp.0.ffn.experts.207.w3", |
| "model.transformer.mtp.0.ffn.experts.208.w1", |
| "model.transformer.mtp.0.ffn.experts.208.w2", |
| "model.transformer.mtp.0.ffn.experts.208.w3", |
| "model.transformer.mtp.0.ffn.experts.209.w1", |
| "model.transformer.mtp.0.ffn.experts.209.w2", |
| "model.transformer.mtp.0.ffn.experts.209.w3", |
| "model.transformer.mtp.0.ffn.experts.210.w1", |
| "model.transformer.mtp.0.ffn.experts.210.w2", |
| "model.transformer.mtp.0.ffn.experts.210.w3", |
| "model.transformer.mtp.0.ffn.experts.211.w1", |
| "model.transformer.mtp.0.ffn.experts.211.w2", |
| "model.transformer.mtp.0.ffn.experts.211.w3", |
| "model.transformer.mtp.0.ffn.experts.212.w1", |
| "model.transformer.mtp.0.ffn.experts.212.w2", |
| "model.transformer.mtp.0.ffn.experts.212.w3", |
| "model.transformer.mtp.0.ffn.experts.213.w1", |
| "model.transformer.mtp.0.ffn.experts.213.w2", |
| "model.transformer.mtp.0.ffn.experts.213.w3", |
| "model.transformer.mtp.0.ffn.experts.214.w1", |
| "model.transformer.mtp.0.ffn.experts.214.w2", |
| "model.transformer.mtp.0.ffn.experts.214.w3", |
| "model.transformer.mtp.0.ffn.experts.215.w1", |
| "model.transformer.mtp.0.ffn.experts.215.w2", |
| "model.transformer.mtp.0.ffn.experts.215.w3", |
| "model.transformer.mtp.0.ffn.experts.216.w1", |
| "model.transformer.mtp.0.ffn.experts.216.w2", |
| "model.transformer.mtp.0.ffn.experts.216.w3", |
| "model.transformer.mtp.0.ffn.experts.217.w1", |
| "model.transformer.mtp.0.ffn.experts.217.w2", |
| "model.transformer.mtp.0.ffn.experts.217.w3", |
| "model.transformer.mtp.0.ffn.experts.218.w1", |
| "model.transformer.mtp.0.ffn.experts.218.w2", |
| "model.transformer.mtp.0.ffn.experts.218.w3", |
| "model.transformer.mtp.0.ffn.experts.219.w1", |
| "model.transformer.mtp.0.ffn.experts.219.w2", |
| "model.transformer.mtp.0.ffn.experts.219.w3", |
| "model.transformer.mtp.0.ffn.experts.220.w1", |
| "model.transformer.mtp.0.ffn.experts.220.w2", |
| "model.transformer.mtp.0.ffn.experts.220.w3", |
| "model.transformer.mtp.0.ffn.experts.221.w1", |
| "model.transformer.mtp.0.ffn.experts.221.w2", |
| "model.transformer.mtp.0.ffn.experts.221.w3", |
| "model.transformer.mtp.0.ffn.experts.222.w1", |
| "model.transformer.mtp.0.ffn.experts.222.w2", |
| "model.transformer.mtp.0.ffn.experts.222.w3", |
| "model.transformer.mtp.0.ffn.experts.223.w1", |
| "model.transformer.mtp.0.ffn.experts.223.w2", |
| "model.transformer.mtp.0.ffn.experts.223.w3", |
| "model.transformer.mtp.0.ffn.experts.224.w1", |
| "model.transformer.mtp.0.ffn.experts.224.w2", |
| "model.transformer.mtp.0.ffn.experts.224.w3", |
| "model.transformer.mtp.0.ffn.experts.225.w1", |
| "model.transformer.mtp.0.ffn.experts.225.w2", |
| "model.transformer.mtp.0.ffn.experts.225.w3", |
| "model.transformer.mtp.0.ffn.experts.226.w1", |
| "model.transformer.mtp.0.ffn.experts.226.w2", |
| "model.transformer.mtp.0.ffn.experts.226.w3", |
| "model.transformer.mtp.0.ffn.experts.227.w1", |
| "model.transformer.mtp.0.ffn.experts.227.w2", |
| "model.transformer.mtp.0.ffn.experts.227.w3", |
| "model.transformer.mtp.0.ffn.experts.228.w1", |
| "model.transformer.mtp.0.ffn.experts.228.w2", |
| "model.transformer.mtp.0.ffn.experts.228.w3", |
| "model.transformer.mtp.0.ffn.experts.229.w1", |
| "model.transformer.mtp.0.ffn.experts.229.w2", |
| "model.transformer.mtp.0.ffn.experts.229.w3", |
| "model.transformer.mtp.0.ffn.experts.230.w1", |
| "model.transformer.mtp.0.ffn.experts.230.w2", |
| "model.transformer.mtp.0.ffn.experts.230.w3", |
| "model.transformer.mtp.0.ffn.experts.231.w1", |
| "model.transformer.mtp.0.ffn.experts.231.w2", |
| "model.transformer.mtp.0.ffn.experts.231.w3", |
| "model.transformer.mtp.0.ffn.experts.232.w1", |
| "model.transformer.mtp.0.ffn.experts.232.w2", |
| "model.transformer.mtp.0.ffn.experts.232.w3", |
| "model.transformer.mtp.0.ffn.experts.233.w1", |
| "model.transformer.mtp.0.ffn.experts.233.w2", |
| "model.transformer.mtp.0.ffn.experts.233.w3", |
| "model.transformer.mtp.0.ffn.experts.234.w1", |
| "model.transformer.mtp.0.ffn.experts.234.w2", |
| "model.transformer.mtp.0.ffn.experts.234.w3", |
| "model.transformer.mtp.0.ffn.experts.235.w1", |
| "model.transformer.mtp.0.ffn.experts.235.w2", |
| "model.transformer.mtp.0.ffn.experts.235.w3", |
| "model.transformer.mtp.0.ffn.experts.236.w1", |
| "model.transformer.mtp.0.ffn.experts.236.w2", |
| "model.transformer.mtp.0.ffn.experts.236.w3", |
| "model.transformer.mtp.0.ffn.experts.237.w1", |
| "model.transformer.mtp.0.ffn.experts.237.w2", |
| "model.transformer.mtp.0.ffn.experts.237.w3", |
| "model.transformer.mtp.0.ffn.experts.238.w1", |
| "model.transformer.mtp.0.ffn.experts.238.w2", |
| "model.transformer.mtp.0.ffn.experts.238.w3", |
| "model.transformer.mtp.0.ffn.experts.239.w1", |
| "model.transformer.mtp.0.ffn.experts.239.w2", |
| "model.transformer.mtp.0.ffn.experts.239.w3", |
| "model.transformer.mtp.0.ffn.experts.240.w1", |
| "model.transformer.mtp.0.ffn.experts.240.w2", |
| "model.transformer.mtp.0.ffn.experts.240.w3", |
| "model.transformer.mtp.0.ffn.experts.241.w1", |
| "model.transformer.mtp.0.ffn.experts.241.w2", |
| "model.transformer.mtp.0.ffn.experts.241.w3", |
| "model.transformer.mtp.0.ffn.experts.242.w1", |
| "model.transformer.mtp.0.ffn.experts.242.w2", |
| "model.transformer.mtp.0.ffn.experts.242.w3", |
| "model.transformer.mtp.0.ffn.experts.243.w1", |
| "model.transformer.mtp.0.ffn.experts.243.w2", |
| "model.transformer.mtp.0.ffn.experts.243.w3", |
| "model.transformer.mtp.0.ffn.experts.244.w1", |
| "model.transformer.mtp.0.ffn.experts.244.w2", |
| "model.transformer.mtp.0.ffn.experts.244.w3", |
| "model.transformer.mtp.0.ffn.experts.245.w1", |
| "model.transformer.mtp.0.ffn.experts.245.w2", |
| "model.transformer.mtp.0.ffn.experts.245.w3", |
| "model.transformer.mtp.0.ffn.experts.246.w1", |
| "model.transformer.mtp.0.ffn.experts.246.w2", |
| "model.transformer.mtp.0.ffn.experts.246.w3", |
| "model.transformer.mtp.0.ffn.experts.247.w1", |
| "model.transformer.mtp.0.ffn.experts.247.w2", |
| "model.transformer.mtp.0.ffn.experts.247.w3", |
| "model.transformer.mtp.0.ffn.experts.248.w1", |
| "model.transformer.mtp.0.ffn.experts.248.w2", |
| "model.transformer.mtp.0.ffn.experts.248.w3", |
| "model.transformer.mtp.0.ffn.experts.249.w1", |
| "model.transformer.mtp.0.ffn.experts.249.w2", |
| "model.transformer.mtp.0.ffn.experts.249.w3", |
| "model.transformer.mtp.0.ffn.experts.250.w1", |
| "model.transformer.mtp.0.ffn.experts.250.w2", |
| "model.transformer.mtp.0.ffn.experts.250.w3", |
| "model.transformer.mtp.0.ffn.experts.251.w1", |
| "model.transformer.mtp.0.ffn.experts.251.w2", |
| "model.transformer.mtp.0.ffn.experts.251.w3", |
| "model.transformer.mtp.0.ffn.experts.252.w1", |
| "model.transformer.mtp.0.ffn.experts.252.w2", |
| "model.transformer.mtp.0.ffn.experts.252.w3", |
| "model.transformer.mtp.0.ffn.experts.253.w1", |
| "model.transformer.mtp.0.ffn.experts.253.w2", |
| "model.transformer.mtp.0.ffn.experts.253.w3", |
| "model.transformer.mtp.0.ffn.experts.254.w1", |
| "model.transformer.mtp.0.ffn.experts.254.w2", |
| "model.transformer.mtp.0.ffn.experts.254.w3", |
| "model.transformer.mtp.0.ffn.experts.255.w1", |
| "model.transformer.mtp.0.ffn.experts.255.w2", |
| "model.transformer.mtp.0.ffn.experts.255.w3", |
| "model.transformer.mtp.0.ffn.shared_experts.w1", |
| "model.transformer.mtp.0.ffn.shared_experts.w2", |
| "model.transformer.mtp.0.ffn.shared_experts.w3", |
| "model.transformer.mtp.0.e_proj", |
| "model.transformer.mtp.0.h_proj", |
| "re:^mtp\\..*", |
| "re:.*\\.mtp_block\\..*", |
| "re:.*\\.mtp\\..*", |
| "re:.*\\.layers\\.43\\..*", |
| "re:.*\\.layers\\.43$" |
| ], |
| "kv_cache_scheme": null, |
| "quant_method": "compressed-tensors", |
| "quantization_status": "compressed", |
| "sparsity_config": {}, |
| "transform_config": {}, |
| "version": "0.15.1.a20260515", |
| "scale_fmt": "ue8m0" |
| }, |
| "rms_norm_eps": 1e-06, |
| "rope_scaling": { |
| "beta_fast": 32, |
| "beta_slow": 1, |
| "factor": 16, |
| "original_max_position_embeddings": 65536, |
| "type": "yarn" |
| }, |
| "rope_theta": 10000, |
| "routed_scaling_factor": 1.5, |
| "scoring_func": "sqrtsoftplus", |
| "sliding_window": 128, |
| "swiglu_limit": 10.0, |
| "tie_word_embeddings": true, |
| "topk_method": "noaux_tc", |
| "torch_dtype": "bfloat16", |
| "transformers_version": "4.57.1", |
| "use_cache": true, |
| "vocab_size": 129280 |
| } |