Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +59 -0
- model/params/model/layers/76/mlp/experts/up_proj/kernel/.zarray +1 -0
- model/params/model/layers/76/mlp/gate/e_score_correction_bias/.zarray +1 -0
- model/params/model/layers/76/mlp/gate/e_score_correction_bias/0 +0 -0
- model/params/model/layers/76/mlp/gate/kernel/.zarray +1 -0
- model/params/model/layers/76/mlp/shared_experts/down_proj/kernel/.zarray +1 -0
- model/params/model/layers/76/mlp/shared_experts/gate_proj/kernel/.zarray +1 -0
- model/params/model/layers/76/mlp/shared_experts/up_proj/kernel/.zarray +1 -0
- model/params/model/layers/76/post_attention_layernorm/kernel/.zarray +1 -0
- model/params/model/layers/76/post_attention_layernorm/kernel/0 +0 -0
- model/params/model/layers/76/self_attn/k_norm/kernel/.zarray +1 -0
- model/params/model/layers/76/self_attn/k_norm/kernel/0 +0 -0
- model/params/model/layers/76/self_attn/k_proj/bias/.zarray +1 -0
- model/params/model/layers/76/self_attn/k_proj/bias/0 +0 -0
- model/params/model/layers/76/self_attn/k_proj/kernel/.zarray +1 -0
- model/params/model/layers/76/self_attn/o_proj/kernel/.zarray +1 -0
- model/params/model/layers/76/self_attn/q_norm/kernel/.zarray +1 -0
- model/params/model/layers/76/self_attn/q_norm/kernel/0 +0 -0
- model/params/model/layers/76/self_attn/q_proj/bias/.zarray +1 -0
- model/params/model/layers/76/self_attn/q_proj/bias/0 +0 -0
- model/params/model/layers/76/self_attn/q_proj/kernel/.zarray +1 -0
- model/params/model/layers/76/self_attn/v_proj/bias/.zarray +1 -0
- model/params/model/layers/76/self_attn/v_proj/bias/0 +0 -0
- model/params/model/layers/76/self_attn/v_proj/kernel/.zarray +1 -0
- model/params/model/layers/77/input_layernorm/kernel/.zarray +1 -0
- model/params/model/layers/77/input_layernorm/kernel/0 +0 -0
- model/params/model/layers/77/mlp/experts/down_proj/kernel/.zarray +1 -0
- model/params/model/layers/77/mlp/experts/gate_proj/kernel/.zarray +1 -0
- model/params/model/layers/77/mlp/experts/up_proj/kernel/.zarray +1 -0
- model/params/model/layers/77/mlp/gate/e_score_correction_bias/.zarray +1 -0
- model/params/model/layers/77/mlp/gate/e_score_correction_bias/0 +0 -0
- model/params/model/layers/77/mlp/gate/kernel/.zarray +1 -0
- model/params/model/layers/77/mlp/shared_experts/down_proj/kernel/.zarray +1 -0
- model/params/model/layers/77/mlp/shared_experts/gate_proj/kernel/.zarray +1 -0
- model/params/model/layers/77/mlp/shared_experts/up_proj/kernel/.zarray +1 -0
- model/params/model/layers/77/post_attention_layernorm/kernel/.zarray +1 -0
- model/params/model/layers/77/post_attention_layernorm/kernel/0 +0 -0
- model/params/model/layers/77/self_attn/k_norm/kernel/.zarray +1 -0
- model/params/model/layers/77/self_attn/k_norm/kernel/0 +0 -0
- model/params/model/layers/77/self_attn/k_proj/bias/.zarray +1 -0
- model/params/model/layers/77/self_attn/k_proj/bias/0 +0 -0
- model/params/model/layers/77/self_attn/k_proj/kernel/.zarray +1 -0
- model/params/model/layers/77/self_attn/o_proj/kernel/.zarray +1 -0
- model/params/model/layers/77/self_attn/q_norm/kernel/.zarray +1 -0
- model/params/model/layers/77/self_attn/q_norm/kernel/0 +0 -0
- model/params/model/layers/77/self_attn/q_proj/bias/.zarray +1 -0
- model/params/model/layers/77/self_attn/q_proj/bias/0 +0 -0
- model/params/model/layers/77/self_attn/q_proj/kernel/.zarray +1 -0
- model/params/model/layers/77/self_attn/v_proj/bias/.zarray +1 -0
- model/params/model/layers/77/self_attn/v_proj/bias/0 +0 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,62 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
model/params/model/layers/91/self_attn/k_proj/kernel/2.0 filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
model/params/model/layers/91/self_attn/k_proj/kernel/3.0 filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
model/params/model/layers/91/mlp/shared_experts/up_proj/kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
model/params/model/layers/91/mlp/shared_experts/up_proj/kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
model/params/model/layers/91/mlp/shared_experts/gate_proj/kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
model/params/model/layers/91/self_attn/v_proj/kernel/2.0 filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
model/params/model/layers/91/mlp/shared_experts/down_proj/kernel/0.2 filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
model/params/model/layers/91/mlp/shared_experts/up_proj/kernel/3.0 filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
model/params/model/layers/91/mlp/gate/kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
model/params/model/layers/91/self_attn/v_proj/kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
model/params/model/layers/91/self_attn/k_proj/kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
model/params/model/layers/91/mlp/shared_experts/gate_proj/kernel/3.0 filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
model/params/model/layers/91/mlp/shared_experts/up_proj/kernel/2.0 filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
model/params/model/layers/91/mlp/gate/kernel/2.0 filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
model/params/model/layers/91/mlp/shared_experts/gate_proj/kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
model/params/model/layers/91/mlp/shared_experts/gate_proj/kernel/2.0 filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
model/params/model/layers/91/mlp/shared_experts/down_proj/kernel/0.3 filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
model/params/model/layers/91/self_attn/k_proj/kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
model/params/model/layers/91/self_attn/v_proj/kernel/3.0 filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
model/params/model/layers/91/mlp/shared_experts/down_proj/kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
model/params/model/layers/91/mlp/gate/kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
model/params/model/layers/91/mlp/shared_experts/down_proj/kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
model/params/model/layers/91/mlp/experts/up_proj/kernel/41.0.0 filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
model/params/model/layers/91/mlp/gate/kernel/3.0 filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
model/params/model/layers/91/mlp/experts/up_proj/kernel/27.0.0 filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
model/params/model/layers/91/self_attn/v_proj/kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
model/params/model/layers/91/mlp/experts/up_proj/kernel/3.0.0 filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
model/params/model/layers/91/self_attn/q_proj/kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
model/params/model/layers/91/self_attn/o_proj/kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
model/params/model/layers/91/mlp/experts/up_proj/kernel/52.0.0 filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
model/params/model/layers/91/self_attn/o_proj/kernel/0.3 filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
model/params/model/layers/91/mlp/experts/up_proj/kernel/36.0.0 filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
model/params/model/layers/91/self_attn/o_proj/kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
| 70 |
+
model/params/model/layers/91/self_attn/q_proj/kernel/3.0 filter=lfs diff=lfs merge=lfs -text
|
| 71 |
+
model/params/model/layers/91/mlp/experts/up_proj/kernel/44.0.0 filter=lfs diff=lfs merge=lfs -text
|
| 72 |
+
model/params/model/layers/91/mlp/experts/up_proj/kernel/80.0.0 filter=lfs diff=lfs merge=lfs -text
|
| 73 |
+
model/params/model/layers/91/self_attn/q_proj/kernel/2.0 filter=lfs diff=lfs merge=lfs -text
|
| 74 |
+
model/params/model/layers/91/mlp/experts/gate_proj/kernel/10.0.0 filter=lfs diff=lfs merge=lfs -text
|
| 75 |
+
model/params/model/layers/91/self_attn/o_proj/kernel/0.2 filter=lfs diff=lfs merge=lfs -text
|
| 76 |
+
model/params/model/layers/91/mlp/experts/up_proj/kernel/8.0.0 filter=lfs diff=lfs merge=lfs -text
|
| 77 |
+
model/params/model/layers/91/self_attn/q_proj/kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
| 78 |
+
model/params/model/layers/91/mlp/experts/gate_proj/kernel/100.0.0 filter=lfs diff=lfs merge=lfs -text
|
| 79 |
+
model/params/model/layers/91/mlp/experts/gate_proj/kernel/112.0.0 filter=lfs diff=lfs merge=lfs -text
|
| 80 |
+
model/params/model/layers/91/mlp/experts/gate_proj/kernel/110.0.0 filter=lfs diff=lfs merge=lfs -text
|
| 81 |
+
model/params/model/layers/91/mlp/experts/up_proj/kernel/67.0.0 filter=lfs diff=lfs merge=lfs -text
|
| 82 |
+
model/params/model/layers/91/mlp/experts/gate_proj/kernel/104.0.0 filter=lfs diff=lfs merge=lfs -text
|
| 83 |
+
model/params/model/layers/91/mlp/experts/up_proj/kernel/38.0.0 filter=lfs diff=lfs merge=lfs -text
|
| 84 |
+
model/params/model/layers/91/mlp/experts/up_proj/kernel/33.0.0 filter=lfs diff=lfs merge=lfs -text
|
| 85 |
+
model/params/model/layers/91/mlp/experts/gate_proj/kernel/105.0.0 filter=lfs diff=lfs merge=lfs -text
|
| 86 |
+
model/params/model/layers/91/mlp/experts/gate_proj/kernel/101.0.0 filter=lfs diff=lfs merge=lfs -text
|
| 87 |
+
model/params/model/layers/91/mlp/experts/up_proj/kernel/43.0.0 filter=lfs diff=lfs merge=lfs -text
|
| 88 |
+
model/params/model/layers/91/mlp/experts/up_proj/kernel/50.0.0 filter=lfs diff=lfs merge=lfs -text
|
| 89 |
+
model/params/model/layers/91/mlp/experts/gate_proj/kernel/1.0.0 filter=lfs diff=lfs merge=lfs -text
|
| 90 |
+
model/params/model/layers/91/mlp/experts/up_proj/kernel/22.0.0 filter=lfs diff=lfs merge=lfs -text
|
| 91 |
+
model/params/model/layers/91/mlp/experts/gate_proj/kernel/102.0.0 filter=lfs diff=lfs merge=lfs -text
|
| 92 |
+
model/params/model/layers/91/mlp/experts/up_proj/kernel/81.0.0 filter=lfs diff=lfs merge=lfs -text
|
| 93 |
+
model/params/model/layers/91/mlp/experts/gate_proj/kernel/120.0.0 filter=lfs diff=lfs merge=lfs -text
|
| 94 |
+
model/params/model/layers/91/mlp/experts/gate_proj/kernel/108.0.0 filter=lfs diff=lfs merge=lfs -text
|
model/params/model/layers/76/mlp/experts/up_proj/kernel/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[1,5120,1536],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[160,5120,1536],"zarr_format":2}
|
model/params/model/layers/76/mlp/gate/e_score_correction_bias/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[160],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[160],"zarr_format":2}
|
model/params/model/layers/76/mlp/gate/e_score_correction_bias/0
ADDED
|
Binary file (154 Bytes). View file
|
|
|
model/params/model/layers/76/mlp/gate/kernel/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[1280,160],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[5120,160],"zarr_format":2}
|
model/params/model/layers/76/mlp/shared_experts/down_proj/kernel/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[1536,1280],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536,5120],"zarr_format":2}
|
model/params/model/layers/76/mlp/shared_experts/gate_proj/kernel/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[1280,1536],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[5120,1536],"zarr_format":2}
|
model/params/model/layers/76/mlp/shared_experts/up_proj/kernel/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[1280,1536],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[5120,1536],"zarr_format":2}
|
model/params/model/layers/76/post_attention_layernorm/kernel/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[5120],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[5120],"zarr_format":2}
|
model/params/model/layers/76/post_attention_layernorm/kernel/0
ADDED
|
Binary file (3.59 kB). View file
|
|
|
model/params/model/layers/76/self_attn/k_norm/kernel/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[128],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[128],"zarr_format":2}
|
model/params/model/layers/76/self_attn/k_norm/kernel/0
ADDED
|
Binary file (198 Bytes). View file
|
|
|
model/params/model/layers/76/self_attn/k_proj/bias/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[1024],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
model/params/model/layers/76/self_attn/k_proj/bias/0
ADDED
|
Binary file (1.67 kB). View file
|
|
|
model/params/model/layers/76/self_attn/k_proj/kernel/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[1280,1024],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[5120,1024],"zarr_format":2}
|
model/params/model/layers/76/self_attn/o_proj/kernel/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[12288,1280],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[12288,5120],"zarr_format":2}
|
model/params/model/layers/76/self_attn/q_norm/kernel/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[128],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[128],"zarr_format":2}
|
model/params/model/layers/76/self_attn/q_norm/kernel/0
ADDED
|
Binary file (190 Bytes). View file
|
|
|
model/params/model/layers/76/self_attn/q_proj/bias/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[12288],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[12288],"zarr_format":2}
|
model/params/model/layers/76/self_attn/q_proj/bias/0
ADDED
|
Binary file (19.3 kB). View file
|
|
|
model/params/model/layers/76/self_attn/q_proj/kernel/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[1280,12288],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[5120,12288],"zarr_format":2}
|
model/params/model/layers/76/self_attn/v_proj/bias/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[1024],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
model/params/model/layers/76/self_attn/v_proj/bias/0
ADDED
|
Binary file (1.66 kB). View file
|
|
|
model/params/model/layers/76/self_attn/v_proj/kernel/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[1280,1024],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[5120,1024],"zarr_format":2}
|
model/params/model/layers/77/input_layernorm/kernel/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[5120],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[5120],"zarr_format":2}
|
model/params/model/layers/77/input_layernorm/kernel/0
ADDED
|
Binary file (5.15 kB). View file
|
|
|
model/params/model/layers/77/mlp/experts/down_proj/kernel/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[1,1536,5120],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[160,1536,5120],"zarr_format":2}
|
model/params/model/layers/77/mlp/experts/gate_proj/kernel/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[1,5120,1536],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[160,5120,1536],"zarr_format":2}
|
model/params/model/layers/77/mlp/experts/up_proj/kernel/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[1,5120,1536],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[160,5120,1536],"zarr_format":2}
|
model/params/model/layers/77/mlp/gate/e_score_correction_bias/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[160],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[160],"zarr_format":2}
|
model/params/model/layers/77/mlp/gate/e_score_correction_bias/0
ADDED
|
Binary file (177 Bytes). View file
|
|
|
model/params/model/layers/77/mlp/gate/kernel/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[1280,160],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[5120,160],"zarr_format":2}
|
model/params/model/layers/77/mlp/shared_experts/down_proj/kernel/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[1536,1280],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536,5120],"zarr_format":2}
|
model/params/model/layers/77/mlp/shared_experts/gate_proj/kernel/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[1280,1536],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[5120,1536],"zarr_format":2}
|
model/params/model/layers/77/mlp/shared_experts/up_proj/kernel/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[1280,1536],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[5120,1536],"zarr_format":2}
|
model/params/model/layers/77/post_attention_layernorm/kernel/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[5120],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[5120],"zarr_format":2}
|
model/params/model/layers/77/post_attention_layernorm/kernel/0
ADDED
|
Binary file (3.4 kB). View file
|
|
|
model/params/model/layers/77/self_attn/k_norm/kernel/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[128],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[128],"zarr_format":2}
|
model/params/model/layers/77/self_attn/k_norm/kernel/0
ADDED
|
Binary file (220 Bytes). View file
|
|
|
model/params/model/layers/77/self_attn/k_proj/bias/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[1024],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
model/params/model/layers/77/self_attn/k_proj/bias/0
ADDED
|
Binary file (1.68 kB). View file
|
|
|
model/params/model/layers/77/self_attn/k_proj/kernel/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[1280,1024],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[5120,1024],"zarr_format":2}
|
model/params/model/layers/77/self_attn/o_proj/kernel/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[12288,1280],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[12288,5120],"zarr_format":2}
|
model/params/model/layers/77/self_attn/q_norm/kernel/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[128],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[128],"zarr_format":2}
|
model/params/model/layers/77/self_attn/q_norm/kernel/0
ADDED
|
Binary file (212 Bytes). View file
|
|
|
model/params/model/layers/77/self_attn/q_proj/bias/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[12288],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[12288],"zarr_format":2}
|
model/params/model/layers/77/self_attn/q_proj/bias/0
ADDED
|
Binary file (19.5 kB). View file
|
|
|
model/params/model/layers/77/self_attn/q_proj/kernel/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[1280,12288],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[5120,12288],"zarr_format":2}
|
model/params/model/layers/77/self_attn/v_proj/bias/.zarray
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"chunks":[1024],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
model/params/model/layers/77/self_attn/v_proj/bias/0
ADDED
|
Binary file (1.67 kB). View file
|
|
|