kylesayrs commited on
Commit
fcd4ae8
·
verified ·
1 Parent(s): 528095d

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DeepseekV4ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "compress_ratios": [
9
+ 0,
10
+ 0,
11
+ 4,
12
+ 128,
13
+ 4,
14
+ 128,
15
+ 4,
16
+ 128,
17
+ 4,
18
+ 128,
19
+ 4,
20
+ 128,
21
+ 4,
22
+ 128,
23
+ 4,
24
+ 128,
25
+ 4,
26
+ 128,
27
+ 4,
28
+ 128,
29
+ 4,
30
+ 128,
31
+ 4,
32
+ 128,
33
+ 4,
34
+ 128,
35
+ 4,
36
+ 128,
37
+ 4,
38
+ 128,
39
+ 4,
40
+ 128,
41
+ 4,
42
+ 128,
43
+ 4,
44
+ 128,
45
+ 4,
46
+ 128,
47
+ 4,
48
+ 128,
49
+ 4,
50
+ 128,
51
+ 4
52
+ ],
53
+ "compress_rope_parameters": {
54
+ "beta_fast": 32,
55
+ "beta_slow": 1,
56
+ "factor": 16,
57
+ "original_max_position_embeddings": 65536,
58
+ "partial_rotary_factor": 0.125,
59
+ "rope_theta": 160000,
60
+ "rope_type": "yarn",
61
+ "type": "yarn"
62
+ },
63
+ "compress_rope_theta": 160000,
64
+ "dtype": "bfloat16",
65
+ "eos_token_id": 1,
66
+ "expert_dtype": "fp4",
67
+ "first_k_dense_replace": null,
68
+ "hc_eps": 1e-06,
69
+ "hc_mult": 4,
70
+ "hc_sinkhorn_iters": 20,
71
+ "head_dim": 512,
72
+ "hidden_act": "silu",
73
+ "hidden_size": 4096,
74
+ "index_head_dim": 128,
75
+ "index_n_heads": 64,
76
+ "index_topk": 512,
77
+ "initializer_range": 0.02,
78
+ "intermediate_size": 18432,
79
+ "kv_lora_rank": null,
80
+ "max_position_embeddings": 1048576,
81
+ "model_type": "deepseek_v4",
82
+ "moe_intermediate_size": 2048,
83
+ "n_group": null,
84
+ "n_routed_experts": 256,
85
+ "n_shared_experts": 1,
86
+ "norm_topk_prob": true,
87
+ "num_attention_heads": 64,
88
+ "num_experts_per_tok": 6,
89
+ "num_hash_layers": 3,
90
+ "num_hidden_layers": 43,
91
+ "num_key_value_heads": 1,
92
+ "num_nextn_predict_layers": 1,
93
+ "o_groups": 8,
94
+ "o_lora_rank": 1024,
95
+ "output_router_logits": false,
96
+ "pad_token_id": null,
97
+ "partial_rotary_factor": 0.125,
98
+ "pretraining_tp": 1,
99
+ "q_lora_rank": 1024,
100
+ "qk_nope_head_dim": 448,
101
+ "qk_rope_head_dim": 64,
102
+ "rms_norm_eps": 1e-06,
103
+ "rope_interleave": true,
104
+ "rope_parameters": {
105
+ "beta_fast": 32,
106
+ "beta_slow": 1,
107
+ "factor": 16,
108
+ "original_max_position_embeddings": 65536,
109
+ "partial_rotary_factor": 0.125,
110
+ "rope_theta": 10000,
111
+ "rope_type": "yarn",
112
+ "type": "yarn"
113
+ },
114
+ "rope_theta": 10000,
115
+ "routed_scaling_factor": 1.5,
116
+ "router_aux_loss_coef": 0.001,
117
+ "router_jitter_noise": 0.0,
118
+ "scoring_func": "sqrtsoftplus",
119
+ "sliding_window": 128,
120
+ "swiglu_limit": 10.0,
121
+ "tie_word_embeddings": false,
122
+ "topk_group": null,
123
+ "topk_method": "noaux_tc",
124
+ "transformers_version": "5.7.0.dev0",
125
+ "use_cache": true,
126
+ "v_head_dim": null,
127
+ "vocab_size": 129280
128
+ }
generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "do_sample": true,
5
+ "eos_token_id": 1,
6
+ "temperature": 1.0,
7
+ "top_p": 1.0,
8
+ "transformers_version": "5.7.0.dev0"
9
+ }
model-00001-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ee3585fc6955d3f3f7f865030a0b01b3f28804d02e1e6acf07cbe3fa34bfafa
3
+ size 45936952700
model-00002-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0776f7334d09bffc815ce393090a0be803179002f3b74a0d0d28ad5c08e294c
3
+ size 48416904112
model-00003-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54038e68aedb08ffdb5345e0b6c8b26bc0bc95f0e0b64f97276a7b52d21ebde2
3
+ size 43814102880
model-00004-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72f053ff91d654ed2594c9fcf3b2894ea5aac6e26119718fc6677f199db78d3a
3
+ size 48416907032
model-00005-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a9b99fe702a0002314640dd823438cbb4af3ad34893a42861d1a5d9389f883e
3
+ size 43843749064
model-00006-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c051347addd7f223476e4e539adbeba38791dc4747fed53813a78ab676e5ce8b
3
+ size 48416907040
model-00007-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38c820e3e1d9dafa1ce1226054c0f633531e92618f361f6b5996a4edc4e2f0bf
3
+ size 43814105256
model-00008-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b171d9f85d46b634dd00683a3e1683fdc12acd4398f09a4be7fa65bd289a548
3
+ size 48416907032
model-00009-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2962dabdc2469f09674542d512bbf7554669db0493f7e5cad4b66008419840f
3
+ size 43843749064
model-00010-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae6a55e88a011970c8b3a3fd133823748fef09914014fc98612bec59fef6d35f
3
+ size 48416907040
model-00011-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79665068dafdf611b7b0f4f4ac0fcbf044c4293ddae931877193f73c8893df6c
3
+ size 43814105256
model-00012-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03a78e7e48c2de92cd844cb52c633e11dbd03741f581c5e56f7945fdaa581ac9
3
+ size 48416907032
model-00013-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:180c9e000684fa00a7e9ba77b47f2176db57f12a6174b9a647a7cfa998747a84
3
+ size 13189650784
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<|begin▁of▁sentence|>",
4
+ "clean_up_tokenization_spaces": false,
5
+ "eos_token": "<|end▁of▁sentence|>",
6
+ "is_local": false,
7
+ "legacy": true,
8
+ "local_files_only": false,
9
+ "model_max_length": 1048576,
10
+ "pad_token": "<|end▁of▁sentence|>",
11
+ "sp_model_kwargs": {},
12
+ "tokenizer_class": "TokenizersBackend",
13
+ "unk_token": null
14
+ }