RyuichiLT commited on
Commit
b15fba8
·
verified ·
1 Parent(s): 4e12f66

Add files using upload-large-folder tool

Browse files
Files changed (50) hide show
  1. README.md +7 -0
  2. config.json +129 -0
  3. generation_config.json +9 -0
  4. model-00008-of-00185.safetensors +3 -0
  5. model-00010-of-00185.safetensors +3 -0
  6. model-00013-of-00185.safetensors +3 -0
  7. model-00015-of-00185.safetensors +3 -0
  8. model-00016-of-00185.safetensors +3 -0
  9. model-00019-of-00185.safetensors +3 -0
  10. model-00021-of-00185.safetensors +3 -0
  11. model-00022-of-00185.safetensors +3 -0
  12. model-00027-of-00185.safetensors +3 -0
  13. model-00028-of-00185.safetensors +3 -0
  14. model-00035-of-00185.safetensors +3 -0
  15. model-00051-of-00185.safetensors +3 -0
  16. model-00052-of-00185.safetensors +3 -0
  17. model-00054-of-00185.safetensors +3 -0
  18. model-00057-of-00185.safetensors +3 -0
  19. model-00058-of-00185.safetensors +3 -0
  20. model-00063-of-00185.safetensors +3 -0
  21. model-00066-of-00185.safetensors +3 -0
  22. model-00069-of-00185.safetensors +3 -0
  23. model-00071-of-00185.safetensors +3 -0
  24. model-00074-of-00185.safetensors +3 -0
  25. model-00087-of-00185.safetensors +3 -0
  26. model-00090-of-00185.safetensors +3 -0
  27. model-00095-of-00185.safetensors +3 -0
  28. model-00099-of-00185.safetensors +3 -0
  29. model-00100-of-00185.safetensors +3 -0
  30. model-00103-of-00185.safetensors +3 -0
  31. model-00109-of-00185.safetensors +3 -0
  32. model-00114-of-00185.safetensors +3 -0
  33. model-00131-of-00185.safetensors +3 -0
  34. model-00132-of-00185.safetensors +3 -0
  35. model-00134-of-00185.safetensors +3 -0
  36. model-00137-of-00185.safetensors +3 -0
  37. model-00142-of-00185.safetensors +3 -0
  38. model-00147-of-00185.safetensors +3 -0
  39. model-00148-of-00185.safetensors +3 -0
  40. model-00155-of-00185.safetensors +3 -0
  41. model-00170-of-00185.safetensors +3 -0
  42. model-00173-of-00185.safetensors +3 -0
  43. model-00175-of-00185.safetensors +3 -0
  44. model-00176-of-00185.safetensors +3 -0
  45. model-00179-of-00185.safetensors +3 -0
  46. model-00180-of-00185.safetensors +3 -0
  47. model-00183-of-00185.safetensors +3 -0
  48. model.safetensors.index.json +0 -0
  49. tokenizer.json +0 -0
  50. tokenizer_config.json +14 -0
README.md ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ tags:
4
+ - mlx
5
+ pipeline_tag: text-generation
6
+ library_name: mlx
7
+ ---
config.json ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DeepseekV4ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "compress_ratios": [
9
+ 128,
10
+ 128,
11
+ 4,
12
+ 128,
13
+ 4,
14
+ 128,
15
+ 4,
16
+ 128,
17
+ 4,
18
+ 128,
19
+ 4,
20
+ 128,
21
+ 4,
22
+ 128,
23
+ 4,
24
+ 128,
25
+ 4,
26
+ 128,
27
+ 4,
28
+ 128,
29
+ 4,
30
+ 128,
31
+ 4,
32
+ 128,
33
+ 4,
34
+ 128,
35
+ 4,
36
+ 128,
37
+ 4,
38
+ 128,
39
+ 4,
40
+ 128,
41
+ 4,
42
+ 128,
43
+ 4,
44
+ 128,
45
+ 4,
46
+ 128,
47
+ 4,
48
+ 128,
49
+ 4,
50
+ 128,
51
+ 4,
52
+ 128,
53
+ 4,
54
+ 128,
55
+ 4,
56
+ 128,
57
+ 4,
58
+ 128,
59
+ 4,
60
+ 128,
61
+ 4,
62
+ 128,
63
+ 4,
64
+ 128,
65
+ 4,
66
+ 128,
67
+ 4,
68
+ 128,
69
+ 4,
70
+ 0
71
+ ],
72
+ "compress_rope_theta": 160000,
73
+ "eos_token_id": 1,
74
+ "hc_eps": 1e-06,
75
+ "hc_mult": 4,
76
+ "hc_sinkhorn_iters": 20,
77
+ "head_dim": 512,
78
+ "hidden_act": "silu",
79
+ "hidden_size": 7168,
80
+ "index_head_dim": 128,
81
+ "index_n_heads": 64,
82
+ "index_topk": 1024,
83
+ "initializer_range": 0.02,
84
+ "max_position_embeddings": 1048576,
85
+ "model_type": "deepseek_v4",
86
+ "moe_intermediate_size": 3072,
87
+ "n_routed_experts": 384,
88
+ "n_shared_experts": 1,
89
+ "norm_topk_prob": true,
90
+ "num_attention_heads": 128,
91
+ "num_experts_per_tok": 6,
92
+ "num_hash_layers": 3,
93
+ "num_hidden_layers": 61,
94
+ "num_key_value_heads": 1,
95
+ "num_nextn_predict_layers": 1,
96
+ "o_groups": 16,
97
+ "o_lora_rank": 1024,
98
+ "q_lora_rank": 1536,
99
+ "qk_rope_head_dim": 64,
100
+ "quantization": {
101
+ "group_size": 64,
102
+ "bits": 8,
103
+ "mode": "affine"
104
+ },
105
+ "quantization_config": {
106
+ "group_size": 64,
107
+ "bits": 8,
108
+ "mode": "affine"
109
+ },
110
+ "rms_norm_eps": 1e-06,
111
+ "rope_scaling": {
112
+ "beta_fast": 32,
113
+ "beta_slow": 1,
114
+ "factor": 16,
115
+ "original_max_position_embeddings": 65536,
116
+ "type": "yarn"
117
+ },
118
+ "rope_theta": 10000,
119
+ "routed_scaling_factor": 2.5,
120
+ "scoring_func": "sqrtsoftplus",
121
+ "sliding_window": 128,
122
+ "swiglu_limit": 10.0,
123
+ "tie_word_embeddings": false,
124
+ "topk_method": "noaux_tc",
125
+ "torch_dtype": "bfloat16",
126
+ "transformers_version": "4.57.1",
127
+ "use_cache": true,
128
+ "vocab_size": 129280
129
+ }
generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "do_sample": true,
6
+ "temperature": 1.0,
7
+ "top_p": 1.0,
8
+ "transformers_version": "4.46.3"
9
+ }
model-00008-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9013d7094de9e6e4ea636383e3f465fcbe974473ea33ed60688dc453e3c332b3
3
+ size 4492099867
model-00010-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:047c2bd9b7988d83e65a4ab0d4fcd34259d17567e106c39de1ca09db77d20799
3
+ size 4906109758
model-00013-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a83e1d1b210035ebc26b925f7d1cb3fc1af33c5038d443c354a3505c577ef931
3
+ size 4925341901
model-00015-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8b5047d9e57a8aef8b5f6534607b72b1e5572fddea4a489c48866dfe66baae0
3
+ size 4492099863
model-00016-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c8e8fa14d568d91cfe178c8da790eb31862dae05f19de9ca1f3cbb07cc46d88
3
+ size 4899905857
model-00019-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3481cb52570563d9cf5ed25d007f428af825f3e8333b5d92cb894070847f57b9
3
+ size 4925341893
model-00021-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cf1099d14a133b96f4a51dcc441baf43347a99f4132854370b5ec2b76cbbbfd
3
+ size 4492099863
model-00022-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:505997b128f6aae8d09847e5cd9df2a9bb9524cd94f3387eb600f7f4be2fef4b
3
+ size 4899905877
model-00027-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f77c3c8f939ff48668a5a84fb77c3fd4d7cd0bacc0fbbc6dd5edff9bf83d456
3
+ size 4492099863
model-00028-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7607231d46225d238874351e2b8a7551d009f79e17d5f80b5b715d8faeeaff1
3
+ size 4899905847
model-00035-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a262e28babbdb42ca03af7ce857d8752686dc0e15849dd2b19f26bf4ed6c109a
3
+ size 4492099869
model-00051-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:922bd597be656d84e909f55ea7a7cfdc5be8c8cca355ebc5d09d89be96abd480
3
+ size 4492099865
model-00052-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e7e43594de1eadd6cea09ebc1995cd603c134fcf9e638604b10ea77bc93d7ce
3
+ size 4899905852
model-00054-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a4488715b749bd1eb290bae36642f38e8e7f14fd3b3766fb48418478880fb09
3
+ size 4492099865
model-00057-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22f3e2512f1642989ee20cd41bb2957d5b79b15e6333bee70ae1d42d80852c4d
3
+ size 4492099865
model-00058-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8ccecf61ae3a828cc33af6de131c0a74c7d5087ed82e425a57cc3ec28a77f89
3
+ size 4899905906
model-00063-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3fb510d6a5722a7a32578609f7fd83a83b5a856b6eb1eef8b5f3bd3d814ae27
3
+ size 4492099865
model-00066-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee4ab939a7ec22aac093042c0905432d228189aefc6275a1091cd9be24c3cdac
3
+ size 4492099865
model-00069-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d12fa02fe13ff4402efab5a92f774d6b2d3ae56fc4f8c1a307c45b26fcfc9739
3
+ size 4492099865
model-00071-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26a272b0c3ddcfd1bb9616a0703dc54bf73478897370f090ba015ec28bff15b3
3
+ size 4492099869
model-00074-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8213e337af40a99bd985383386d3778b3f67d218ca690002f60b634f7d5aba2c
3
+ size 4492099869
model-00087-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc8aedd23482c81bebc70e00bdaf38d8e2e0c79fcfb50fb7f02253c8f2be58e7
3
+ size 4492099865
model-00090-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3e379b011c37de394e7846d23cd5aaaf34826879c7fc817dba3a0c09744291e
3
+ size 4492099865
model-00095-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1da9d4b06c5906071120147b01a57d08c28b647e14a00eaac5650d0caea0ecd9
3
+ size 4492099869
model-00099-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2eb8aa765a2563fa1c93709790529797765fddd9391aa4096c65d2802a66b6df
3
+ size 4492099865
model-00100-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7de5754441387d3f19087ae3cb2edcdfeb2e43a317765ed0e8caf95fb754b6c7
3
+ size 4899905862
model-00103-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f622b599692bfbec33cbd623590f8bc264c6c3d6e2bfe27932402b1b29d5a40
3
+ size 4925341966
model-00109-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ecbdd1baaacdbcb7e6a91f34dd5b0fcd83e1d5b1c5b30cf988f6cf196162331
3
+ size 4925341936
model-00114-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c66d0f52965c75215ba3b5c4cc280cd45f610a8d0ea0826a52f0fe8c4a62dee8
3
+ size 4492099865
model-00131-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1efdfbefc4a5b54cce3e4e5b15b6e22ccae8263f225d479c5ee95d34e2fc4967
3
+ size 4492099869
model-00132-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f08a8b185921c827ab4f633e48c76ce891e24842766e49d73d00e0b8c504c2e5
3
+ size 4492099865
model-00134-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f5bfdef1062d7d41323ffc7e570fe68a03d1c62d44f6f23c48f7921acc8c46a
3
+ size 4492099869
model-00137-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62e841ac394df4a776e12c14c5e018eba84454872cde6c995b349f589fcfe044
3
+ size 4492099869
model-00142-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c86e40d477fa0edbf6141fb9460474ae56fcaf392f86b193a47d568cc191517
3
+ size 4899905896
model-00147-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f229ce116d7f9c74393471ab28384cc609b69dab1a2fe59f29ce56820f5c4a21
3
+ size 4492099865
model-00148-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ede1c85cb48e08b5b8825bd5b8ca846b8bd3bbd4c6ea069b40bbaca1af329b5
3
+ size 4899905904
model-00155-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7afc9f4f72adc97434410f68342322f5d1c89750b27f12c6bd93d70bdbd2a538
3
+ size 4492099869
model-00170-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38ad5809b49f4a6ec68ea091515b29da4f07920d0c8ba199cca7400eee61f590
3
+ size 4492099869
model-00173-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8b51435faf1d780ca9db942c304eca99bfc455b187154c2a1843f29b0dff721
3
+ size 4492099869
model-00175-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9abc1092f8595b6a6c7a36b352c1ea95a98c3753b2bcb97fa993d107c931ee7a
3
+ size 4925341996
model-00176-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57ab6e29ed270d24cdd2e0cbe96cfde61f24ddd6b1cfd697f764b5d12f3d0d1d
3
+ size 4492099869
model-00179-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14f4049e0573aa7b51f730e2633f79487ba843d1b6e7782fed4cd0ec9b6cb5b0
3
+ size 4492099869
model-00180-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f2406ccb9076ab128e993448e3ebe74ee4230ea5267fc027e5fd7ac8752c479
3
+ size 4492099865
model-00183-of-00185.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45400443aa8faf7a3205ddbbc27f267ac02bfb2b66f4faf795685c32dd8703f4
3
+ size 4492099865
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<|begin▁of▁sentence|>",
4
+ "clean_up_tokenization_spaces": false,
5
+ "eos_token": "<|end▁of▁sentence|>",
6
+ "is_local": true,
7
+ "legacy": true,
8
+ "local_files_only": false,
9
+ "model_max_length": 1048576,
10
+ "pad_token": "<|end▁of▁sentence|>",
11
+ "sp_model_kwargs": {},
12
+ "tokenizer_class": "TokenizersBackend",
13
+ "unk_token": null
14
+ }