Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

config.json +253 -0
generation_config.json +9 -0
model.safetensors +3 -0
recipe.yaml +41 -0
tokenizer.json +0 -0
tokenizer_config.json +14 -0

config.json ADDED Viewed

	@@ -0,0 +1,253 @@

+{
+  "architectures": [
+    "DeepseekV4ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "compress_ratios": [
+    0,
+    0,
+    4,
+    128,
+    4
+  ],
+  "compress_rope_parameters": {
+    "beta_fast": 32,
+    "beta_slow": 1,
+    "factor": 16,
+    "original_max_position_embeddings": 65536,
+    "partial_rotary_factor": 0.125,
+    "rope_theta": 160000.0,
+    "rope_type": "yarn",
+    "type": "yarn"
+  },
+  "compress_rope_theta": 160000.0,
+  "dtype": "bfloat16",
+  "eos_token_id": 1,
+  "first_k_dense_replace": null,
+  "hc_eps": 1e-06,
+  "hc_mult": 4,
+  "hc_sinkhorn_iters": 20,
+  "head_dim": 512,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "index_head_dim": 128,
+  "index_n_heads": 64,
+  "index_topk": 512,
+  "initializer_range": 0.02,
+  "intermediate_size": 18432,
+  "kv_lora_rank": null,
+  "max_position_embeddings": 1048576,
+  "model_type": "deepseek_v4",
+  "moe_intermediate_size": 2048,
+  "n_group": null,
+  "n_routed_experts": 256,
+  "n_shared_experts": 1,
+  "norm_topk_prob": true,
+  "num_attention_heads": 64,
+  "num_experts_per_tok": 6,
+  "num_hash_layers": 3,
+  "num_hidden_layers": 5,
+  "num_key_value_heads": 1,
+  "num_nextn_predict_layers": 0,
+  "o_groups": 8,
+  "o_lora_rank": 1024,
+  "output_router_logits": false,
+  "pad_token_id": null,
+  "partial_rotary_factor": 0.125,
+  "pretraining_tp": 1,
+  "q_lora_rank": 1024,
+  "qk_nope_head_dim": 448,
+  "qk_rope_head_dim": 64,
+  "quantization": {
+    "bits": 4,
+    "group_size": 32,
+    "mode": "mxfp4",
+    "model.layers.0.ffn.switch_mlp.down_proj": {
+      "bits": 4,
+      "group_size": 32,
+      "mode": "mxfp4"
+    },
+    "model.layers.0.ffn.switch_mlp.gate_proj": {
+      "bits": 4,
+      "group_size": 32,
+      "mode": "mxfp4"
+    },
+    "model.layers.0.ffn.switch_mlp.up_proj": {
+      "bits": 4,
+      "group_size": 32,
+      "mode": "mxfp4"
+    },
+    "model.layers.1.ffn.switch_mlp.down_proj": {
+      "bits": 4,
+      "group_size": 32,
+      "mode": "mxfp4"
+    },
+    "model.layers.1.ffn.switch_mlp.gate_proj": {
+      "bits": 4,
+      "group_size": 32,
+      "mode": "mxfp4"
+    },
+    "model.layers.1.ffn.switch_mlp.up_proj": {
+      "bits": 4,
+      "group_size": 32,
+      "mode": "mxfp4"
+    },
+    "model.layers.2.ffn.switch_mlp.down_proj": {
+      "bits": 4,
+      "group_size": 32,
+      "mode": "mxfp4"
+    },
+    "model.layers.2.ffn.switch_mlp.gate_proj": {
+      "bits": 4,
+      "group_size": 32,
+      "mode": "mxfp4"
+    },
+    "model.layers.2.ffn.switch_mlp.up_proj": {
+      "bits": 4,
+      "group_size": 32,
+      "mode": "mxfp4"
+    },
+    "model.layers.3.ffn.switch_mlp.down_proj": {
+      "bits": 4,
+      "group_size": 32,
+      "mode": "mxfp4"
+    },
+    "model.layers.3.ffn.switch_mlp.gate_proj": {
+      "bits": 4,
+      "group_size": 32,
+      "mode": "mxfp4"
+    },
+    "model.layers.3.ffn.switch_mlp.up_proj": {
+      "bits": 4,
+      "group_size": 32,
+      "mode": "mxfp4"
+    },
+    "model.layers.4.ffn.switch_mlp.down_proj": {
+      "bits": 4,
+      "group_size": 32,
+      "mode": "mxfp4"
+    },
+    "model.layers.4.ffn.switch_mlp.gate_proj": {
+      "bits": 4,
+      "group_size": 32,
+      "mode": "mxfp4"
+    },
+    "model.layers.4.ffn.switch_mlp.up_proj": {
+      "bits": 4,
+      "group_size": 32,
+      "mode": "mxfp4"
+    }
+  },
+  "quantization_config": {
+    "config_groups": {
+      "group_0": {
+        "format": "nvfp4-pack-quantized",
+        "input_activations": {
+          "actorder": null,
+          "block_structure": null,
+          "dynamic": "local",
+          "group_size": 16,
+          "num_bits": 4,
+          "observer": "static_minmax",
+          "observer_kwargs": {},
+          "scale_dtype": "torch.float8_e4m3fn",
+          "strategy": "tensor_group",
+          "symmetric": true,
+          "type": "float",
+          "zp_dtype": null
+        },
+        "output_activations": null,
+        "targets": [
+          "re:model.*mlp.*(gate|up|down)_proj$"
+        ],
+        "weights": {
+          "actorder": null,
+          "block_structure": null,
+          "dynamic": false,
+          "group_size": 16,
+          "num_bits": 4,
+          "observer": "memoryless_minmax",
+          "observer_kwargs": {},
+          "scale_dtype": "torch.float8_e4m3fn",
+          "strategy": "tensor_group",
+          "symmetric": true,
+          "type": "float",
+          "zp_dtype": null
+        }
+      }
+    },
+    "format": "nvfp4-pack-quantized",
+    "global_compression_ratio": null,
+    "ignore": [
+      "model.layers.0.self_attn.wq_a",
+      "model.layers.0.self_attn.wq_b",
+      "model.layers.0.self_attn.wkv",
+      "model.layers.0.self_attn.wo_b",
+      "model.layers.1.self_attn.wq_a",
+      "model.layers.1.self_attn.wq_b",
+      "model.layers.1.self_attn.wkv",
+      "model.layers.1.self_attn.wo_b",
+      "model.layers.2.self_attn.wq_a",
+      "model.layers.2.self_attn.wq_b",
+      "model.layers.2.self_attn.wkv",
+      "model.layers.2.self_attn.wo_b",
+      "model.layers.2.self_attn.compressor.wkv",
+      "model.layers.2.self_attn.compressor.wgate",
+      "model.layers.2.self_attn.compressor.indexer.wkv",
+      "model.layers.2.self_attn.compressor.indexer.wgate",
+      "model.layers.2.self_attn.compressor.indexer.wq_b",
+      "model.layers.2.self_attn.compressor.indexer.weights_proj",
+      "model.layers.3.self_attn.wq_a",
+      "model.layers.3.self_attn.wq_b",
+      "model.layers.3.self_attn.wkv",
+      "model.layers.3.self_attn.wo_b",
+      "model.layers.3.self_attn.compressor.wkv",
+      "model.layers.3.self_attn.compressor.wgate",
+      "model.layers.4.self_attn.wq_a",
+      "model.layers.4.self_attn.wq_b",
+      "model.layers.4.self_attn.wkv",
+      "model.layers.4.self_attn.wo_b",
+      "model.layers.4.self_attn.compressor.wkv",
+      "model.layers.4.self_attn.compressor.wgate",
+      "model.layers.4.self_attn.compressor.indexer.wkv",
+      "model.layers.4.self_attn.compressor.indexer.wgate",
+      "model.layers.4.self_attn.compressor.indexer.wq_b",
+      "model.layers.4.self_attn.compressor.indexer.weights_proj",
+      "lm_head"
+    ],
+    "kv_cache_scheme": null,
+    "quant_method": "compressed-tensors",
+    "quantization_status": "compressed",
+    "sparsity_config": {},
+    "transform_config": {},
+    "version": "0.15.1.dev15+g11daf97.d20260427"
+  },
+  "rms_norm_eps": 1e-06,
+  "rope_interleave": true,
+  "rope_parameters": {
+    "beta_fast": 32,
+    "beta_slow": 1,
+    "factor": 16,
+    "original_max_position_embeddings": 65536,
+    "partial_rotary_factor": 0.125,
+    "rope_theta": 10000.0,
+    "rope_type": "yarn",
+    "type": "yarn"
+  },
+  "rope_theta": 10000.0,
+  "routed_scaling_factor": 1.5,
+  "router_aux_loss_coef": 0.001,
+  "router_jitter_noise": 0.0,
+  "scoring_func": "sqrtsoftplus",
+  "sliding_window": 128,
+  "swiglu_limit": 10.0,
+  "tie_word_embeddings": false,
+  "topk_group": null,
+  "topk_method": "noaux_tc",
+  "transformers_version": "5.7.0.dev0",
+  "use_cache": true,
+  "v_head_dim": null,
+  "vocab_size": 129280
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "do_sample": true,
+  "eos_token_id": 1,
+  "temperature": 1.0,
+  "top_p": 1.0,
+  "transformers_version": "5.7.0.dev0"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3f0959912b15546c6ebf0b48dbe64c040685e160b0c5fd9b3bf8beaa11401fec
+size 21510156084

recipe.yaml ADDED Viewed

	@@ -0,0 +1,41 @@

+default_stage:
+  default_modifiers:
+    GPTQModifier:
+      config_groups:
+        experts:
+          targets: ['re:model.*mlp.*(gate|up|down)_proj$']
+          weights:
+            num_bits: 4
+            type: float
+            symmetric: true
+            group_size: 16
+            strategy: tensor_group
+            block_structure: null
+            dynamic: false
+            actorder: null
+            scale_dtype: torch.float8_e4m3fn
+            zp_dtype: null
+            observer: memoryless_minmax
+            observer_kwargs: {}
+          input_activations:
+            num_bits: 4
+            type: float
+            symmetric: true
+            group_size: 16
+            strategy: tensor_group
+            block_structure: null
+            dynamic: local
+            actorder: null
+            scale_dtype: torch.float8_e4m3fn
+            zp_dtype: null
+            observer: static_minmax
+            observer_kwargs: {}
+          output_activations: null
+          format: null
+      targets: [Linear]
+      ignore: [lm_head, 're:model.*self_attn.*re:model.*ffn_hc$']
+      bypass_divisibility_checks: false
+      block_size: 128
+      dampening_frac: 0.01
+      actorder: static
+      offload_hessians: false

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "backend": "tokenizers",
+  "bos_token": "<｜begin▁of▁sentence｜>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<｜end▁of▁sentence｜>",
+  "is_local": true,
+  "legacy": true,
+  "local_files_only": false,
+  "model_max_length": 1048576,
+  "pad_token": "<｜end▁of▁sentence｜>",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "TokenizersBackend",
+  "unk_token": null
+}