RyuichiLT commited on
Commit
59cbd20
·
verified ·
1 Parent(s): 1cc96b3

Add files using upload-large-folder tool

Browse files
README.md CHANGED
@@ -1,3 +1,7 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ tags:
4
+ - mlx
5
+ pipeline_tag: text-generation
6
+ library_name: mlx
7
+ ---
config.json ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DeepseekV4ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "compress_ratios": [
9
+ 0,
10
+ 0,
11
+ 4,
12
+ 128,
13
+ 4,
14
+ 128,
15
+ 4,
16
+ 128,
17
+ 4,
18
+ 128,
19
+ 4,
20
+ 128,
21
+ 4,
22
+ 128,
23
+ 4,
24
+ 128,
25
+ 4,
26
+ 128,
27
+ 4,
28
+ 128,
29
+ 4,
30
+ 128,
31
+ 4,
32
+ 128,
33
+ 4,
34
+ 128,
35
+ 4,
36
+ 128,
37
+ 4,
38
+ 128,
39
+ 4,
40
+ 128,
41
+ 4,
42
+ 128,
43
+ 4,
44
+ 128,
45
+ 4,
46
+ 128,
47
+ 4,
48
+ 128,
49
+ 4,
50
+ 128,
51
+ 4,
52
+ 0
53
+ ],
54
+ "compress_rope_theta": 160000,
55
+ "eos_token_id": 1,
56
+ "hc_eps": 1e-06,
57
+ "hc_mult": 4,
58
+ "hc_sinkhorn_iters": 20,
59
+ "head_dim": 512,
60
+ "hidden_act": "silu",
61
+ "hidden_size": 4096,
62
+ "index_head_dim": 128,
63
+ "index_n_heads": 64,
64
+ "index_topk": 512,
65
+ "initializer_range": 0.02,
66
+ "max_position_embeddings": 1048576,
67
+ "model_type": "deepseek_v4",
68
+ "moe_intermediate_size": 2048,
69
+ "n_routed_experts": 256,
70
+ "n_shared_experts": 1,
71
+ "norm_topk_prob": true,
72
+ "num_attention_heads": 64,
73
+ "num_experts_per_tok": 6,
74
+ "num_hash_layers": 3,
75
+ "num_hidden_layers": 43,
76
+ "num_key_value_heads": 1,
77
+ "num_nextn_predict_layers": 1,
78
+ "o_groups": 8,
79
+ "o_lora_rank": 1024,
80
+ "q_lora_rank": 1024,
81
+ "qk_rope_head_dim": 64,
82
+ "quantization": {
83
+ "group_size": 64,
84
+ "bits": 8,
85
+ "mode": "affine"
86
+ },
87
+ "quantization_config": {
88
+ "group_size": 64,
89
+ "bits": 8,
90
+ "mode": "affine"
91
+ },
92
+ "rms_norm_eps": 1e-06,
93
+ "rope_scaling": {
94
+ "beta_fast": 32,
95
+ "beta_slow": 1,
96
+ "factor": 16,
97
+ "original_max_position_embeddings": 65536,
98
+ "type": "yarn"
99
+ },
100
+ "rope_theta": 10000,
101
+ "routed_scaling_factor": 1.5,
102
+ "scoring_func": "sqrtsoftplus",
103
+ "sliding_window": 128,
104
+ "swiglu_limit": 10.0,
105
+ "tie_word_embeddings": false,
106
+ "topk_method": "noaux_tc",
107
+ "torch_dtype": "bfloat16",
108
+ "transformers_version": "4.57.1",
109
+ "use_cache": true,
110
+ "vocab_size": 129280
111
+ }
generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "do_sample": true,
6
+ "temperature": 1.0,
7
+ "top_p": 1.0,
8
+ "transformers_version": "4.46.3"
9
+ }
model-00001-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db0fb40ffd6c2670634efcfe8d8c428361693442636d397a3184b7c480bcbf34
3
+ size 5326006680
model-00002-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94d899cfb4429af55743b156584b47682a82a5b48fa39c812f90db9ce5305e4e
3
+ size 4802709901
model-00003-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7b5275c4b78a1ee053945d8578d68497f533209cc3fa2c4af4476167dbe9ebf
3
+ size 4885836255
model-00004-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48b2b8af2932a1d7fc72c70620df185a3d954887345f9b097cae097159663101
3
+ size 4713639238
model-00005-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:273d07340b5f42f020ff2cac16395c4cb3a5ea281fcfc5e66255358ba6f4a624
3
+ size 4729396550
model-00006-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84101e3814161b50899faa1d06a1157660d9265e22c1f3b7dd36ebfcfa786591
3
+ size 4879631924
model-00007-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99d3fedd71be2cc7283529a55c659eb049c7109db99d52c44156c846f1bc1d67
3
+ size 4713639320
model-00008-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9131bfb2ea7772fc8bdfe5aa6a7cfb85ad0ef8e7050e9b745accc5a9c58e917
3
+ size 4729396465
model-00009-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e898ccdaa9e6c425453508af7c65e488d8826fdbaf2961b7ad25e0ab0a01fc8f
3
+ size 4879632020
model-00010-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a11ecaa6dc687c2bdc37e7429c2db57d69cd5b3029d7868166ca2a511313d6f0
3
+ size 4713639373
model-00011-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ecd6f6ddb1516fcf4f3e8cfe2e84310e1fd0e110ded60fdcb5c6e98efcf1a1c
3
+ size 4729396621
model-00012-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:190274daad0220f5af6a140483f23bdcec6dfce2ba2d285c78dbb42edb78c2c8
3
+ size 4879631968
model-00013-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41769f57625e271c15e98abfc75cf89bb8bd7c4ae6cb9541cc6746fec0e0d690
3
+ size 4713639369
model-00014-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a5daab18433845a0c45b896a7471dd2e52b2612fac24e0f29c5e7fb031b0166
3
+ size 4729396617
model-00015-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4114d4db50b8a89bed050305cec6de15775add0e3c263df0cfdb54025d0e9ab4
3
+ size 4879632006
model-00016-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17fd0e1047e177b6abdec0b6ecc4d921953c266b7f0a45b1e635cc74a95f9a29
3
+ size 4713639369
model-00017-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aef05c3bbf29b860bd105ecc3bb7e6f5f9f8b1301a54d3afa5538f4d6a2d7251
3
+ size 4729396545
model-00018-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:726afbcaf2a662b2bf47fb6bfa885a66c111590cd99354c6fcd2d203d3b13eb5
3
+ size 4879632024
model-00019-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d12aec4eb71ea6582d963309f1488256832f44bb0da570a1420986960476e5bb
3
+ size 4713639369
model-00020-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8cf17ae8e2632ca17973619b4f5495a39fdfc3b268cc493d78da14db650fa25
3
+ size 4729396543
model-00021-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:702fc71922564bd1023abc9c6148479e7707079ea94fcf4f0a359c056661b7ec
3
+ size 4879632060
model-00022-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5ea22ac32a0ac6093bbefb005a7f7ab3fd760d3fe3e2961dcb06d0707a37f67
3
+ size 4713639373
model-00023-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de428f0c20d8e9f9960dccc8317ceea7fed7fe6ad3c56f594b29efe38837d9a4
3
+ size 4729396617
model-00024-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd37469dc18490f804c2e4da256c499ee7948352ae949b20a3b77defb505719c
3
+ size 4879632008
model-00025-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:152a290deb23d702815fe6e9ca88be66f6cafd24e9fc11c0a44654f9e9fecccb
3
+ size 4713639241
model-00026-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db6567c856ae807029d5d4eb09d748b765934e15c48c2f80f5f878b4de2b5e14
3
+ size 4729396615
model-00027-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af2e82d9b2a02d891bdcd21fedcf73b97a82081848dfba05f96a6507d409d3b6
3
+ size 4879632022
model-00028-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06c03ad09199ef1dbb263e19f10e346c4e780e34abd26ee1ec5b93066556cae1
3
+ size 4713639369
model-00029-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66c88d57f02e53b2564bbd5663dba134bbf396ef0877ba3a8a341b738f968807
3
+ size 4729396617
model-00030-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e2885fabb9cc9488fd915822ffe6691332759fe5b7d73118d3adf2bf8f5df2e
3
+ size 4879632022
model-00031-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d6c9396c4fdcb6043aa8f0ea36d5710012c990c2682cc5277142cb0e14e72d3
3
+ size 4713639299
model-00032-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51d781122ca30e0f947f10b5a99d7edd61ed18582e72de48dafaea43e89ea163
3
+ size 4729396621
model-00033-of-00033.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3828314ee7f865532c467ae2a9a642794d7258a2eb39a19de9870304fb3beba4
3
+ size 1734160009
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<|begin▁of▁sentence|>",
4
+ "clean_up_tokenization_spaces": false,
5
+ "eos_token": "<|end▁of▁sentence|>",
6
+ "is_local": true,
7
+ "legacy": true,
8
+ "local_files_only": false,
9
+ "model_max_length": 1048576,
10
+ "pad_token": "<|end▁of▁sentence|>",
11
+ "sp_model_kwargs": {},
12
+ "tokenizer_class": "TokenizersBackend",
13
+ "unk_token": null
14
+ }