hexmage commited on
Upload folder using huggingface_hub
Browse files- epoch_0.pt +3 -0
- epoch_1.pt +3 -0
- epoch_10.pt +3 -0
- epoch_11.pt +3 -0
- epoch_12.pt +3 -0
- epoch_13.pt +3 -0
- epoch_14.pt +3 -0
- epoch_15.pt +3 -0
- epoch_16.pt +3 -0
- epoch_17.pt +3 -0
- epoch_18.pt +3 -0
- epoch_19.pt +3 -0
- epoch_2.pt +3 -0
- epoch_3.pt +3 -0
- epoch_4.pt +3 -0
- epoch_5.pt +3 -0
- epoch_6.pt +3 -0
- epoch_7.pt +3 -0
- epoch_8.pt +3 -0
- epoch_9.pt +3 -0
- model_config.json +28 -0
epoch_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f2cba8d3a36bf59fb44e83a77c430c2430cfb86a5a9fd09415cb645b4d16dcea
|
| 3 |
+
size 50381351
|
epoch_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0dbdcb55deb86f51e11fcebb5ab504e65b64346f1b360f2da69d2fd09dcf8029
|
| 3 |
+
size 50381351
|
epoch_10.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50bd5d7c5ca36620ce726c8e1bda31552253cd659866e7e9f3fc15566147a971
|
| 3 |
+
size 50381953
|
epoch_11.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:189927de47cb520308d802e64686bfdb40f00190e23b7d56f30bb8bd5181de13
|
| 3 |
+
size 50381953
|
epoch_12.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:104be834b5af45f093438487277980c8e72dbd3100836bb25f6e85c3833434a6
|
| 3 |
+
size 50381953
|
epoch_13.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9cd155dfba35a9dfdb6143c2ff9565baf599f079438fd6dea48f21b340f539d4
|
| 3 |
+
size 50381953
|
epoch_14.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9bad528dd0e6de629e3de1a8e949ae8cd1179dbac59069efaab65abb0c72d2a5
|
| 3 |
+
size 50381953
|
epoch_15.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:915ef2a7684e43198584898c2edc09094d91b001fd48faedaac5f49b52adef60
|
| 3 |
+
size 50381953
|
epoch_16.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:12189c8ed34baee1e31b6ea0f458821f7e27f2b8c0267e3253facf230ce7b2bf
|
| 3 |
+
size 50381953
|
epoch_17.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20554c6d901c5edc4abd9886836610768db9c6b5d58abd3eee4ef54b234351cb
|
| 3 |
+
size 50381953
|
epoch_18.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c549e662394e9c9e19c55959b571d52b705e1c93257b9031b181ced7accdf6e
|
| 3 |
+
size 50381953
|
epoch_19.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a535185e4c0b4fcd03ba5c7241198047ed19b5bb5825af09f416ebc49816115
|
| 3 |
+
size 50381953
|
epoch_2.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f39a7c7373a4f97081985bbb0dd1c64bc49cc3622627679d1ce605acfbfc22fe
|
| 3 |
+
size 50381351
|
epoch_3.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3b7d9821430c6ec3fcbd7b7435f369dfbc86203c23c3116647fa143908965a8c
|
| 3 |
+
size 50381351
|
epoch_4.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:14c127fa74f3323487820fd0244c54e36d8aabb227cda46bd4ed893b8dc8a1ba
|
| 3 |
+
size 50381351
|
epoch_5.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:364b81cc7e70bdce5eeb673666131dd548fb3c978b7c4372dd547e05f0a08ddf
|
| 3 |
+
size 50381351
|
epoch_6.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0641bde5702ec4e4ac71c0caa259e5c20a7e02775e6a72dc271e0384a55cfdbc
|
| 3 |
+
size 50381351
|
epoch_7.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50a9a8c08949b99cd3345144e63084551b0ab7e8a932c6087242d913da717876
|
| 3 |
+
size 50381351
|
epoch_8.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d758bc52f074c30de95474b0ca343f4a7c03e2690cc0d06d4a8d1f73f8e06bf4
|
| 3 |
+
size 50381351
|
epoch_9.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:450502b50e8aa98a90855b6bfbdc4b2abdf113ff6d8a48c6ba008e1a13af9929
|
| 3 |
+
size 50381351
|
model_config.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
arch:
|
| 2 |
+
H_cycles: 2
|
| 3 |
+
L_cycles: 6
|
| 4 |
+
bptt: true
|
| 5 |
+
forward_dtype: bfloat16
|
| 6 |
+
head_dim: 64
|
| 7 |
+
hidden_size: 512
|
| 8 |
+
intermediate_size: 2048
|
| 9 |
+
name: hrm@HRM
|
| 10 |
+
norm_eps: 1.0e-06
|
| 11 |
+
num_layers: 2
|
| 12 |
+
rope_theta: 10000.0
|
| 13 |
+
beta1: 0.9
|
| 14 |
+
beta2: 0.95
|
| 15 |
+
cycles_per_data: 16
|
| 16 |
+
data:
|
| 17 |
+
augment: true
|
| 18 |
+
dataset_name: /sg-pretrain/datasets/sudoku-extreme-1k
|
| 19 |
+
name: sudoku
|
| 20 |
+
repeat: 200
|
| 21 |
+
ema: 0.999
|
| 22 |
+
epochs: 20
|
| 23 |
+
local_batch_size: 96
|
| 24 |
+
log_interval: 5
|
| 25 |
+
lr: 0.0001
|
| 26 |
+
lr_min_ratio: 1.0
|
| 27 |
+
lr_warmup_steps: 2000
|
| 28 |
+
weight_decay: 1.0
|