hexmage commited on
Commit
4cd2f3f
·
verified ·
1 Parent(s): 40bcbb3

Upload folder using huggingface_hub

Browse files
Files changed (21) hide show
  1. epoch_0.pt +3 -0
  2. epoch_1.pt +3 -0
  3. epoch_10.pt +3 -0
  4. epoch_11.pt +3 -0
  5. epoch_12.pt +3 -0
  6. epoch_13.pt +3 -0
  7. epoch_14.pt +3 -0
  8. epoch_15.pt +3 -0
  9. epoch_16.pt +3 -0
  10. epoch_17.pt +3 -0
  11. epoch_18.pt +3 -0
  12. epoch_19.pt +3 -0
  13. epoch_2.pt +3 -0
  14. epoch_3.pt +3 -0
  15. epoch_4.pt +3 -0
  16. epoch_5.pt +3 -0
  17. epoch_6.pt +3 -0
  18. epoch_7.pt +3 -0
  19. epoch_8.pt +3 -0
  20. epoch_9.pt +3 -0
  21. model_config.json +28 -0
epoch_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2cba8d3a36bf59fb44e83a77c430c2430cfb86a5a9fd09415cb645b4d16dcea
3
+ size 50381351
epoch_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dbdcb55deb86f51e11fcebb5ab504e65b64346f1b360f2da69d2fd09dcf8029
3
+ size 50381351
epoch_10.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50bd5d7c5ca36620ce726c8e1bda31552253cd659866e7e9f3fc15566147a971
3
+ size 50381953
epoch_11.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:189927de47cb520308d802e64686bfdb40f00190e23b7d56f30bb8bd5181de13
3
+ size 50381953
epoch_12.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:104be834b5af45f093438487277980c8e72dbd3100836bb25f6e85c3833434a6
3
+ size 50381953
epoch_13.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cd155dfba35a9dfdb6143c2ff9565baf599f079438fd6dea48f21b340f539d4
3
+ size 50381953
epoch_14.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bad528dd0e6de629e3de1a8e949ae8cd1179dbac59069efaab65abb0c72d2a5
3
+ size 50381953
epoch_15.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:915ef2a7684e43198584898c2edc09094d91b001fd48faedaac5f49b52adef60
3
+ size 50381953
epoch_16.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12189c8ed34baee1e31b6ea0f458821f7e27f2b8c0267e3253facf230ce7b2bf
3
+ size 50381953
epoch_17.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20554c6d901c5edc4abd9886836610768db9c6b5d58abd3eee4ef54b234351cb
3
+ size 50381953
epoch_18.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c549e662394e9c9e19c55959b571d52b705e1c93257b9031b181ced7accdf6e
3
+ size 50381953
epoch_19.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a535185e4c0b4fcd03ba5c7241198047ed19b5bb5825af09f416ebc49816115
3
+ size 50381953
epoch_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f39a7c7373a4f97081985bbb0dd1c64bc49cc3622627679d1ce605acfbfc22fe
3
+ size 50381351
epoch_3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b7d9821430c6ec3fcbd7b7435f369dfbc86203c23c3116647fa143908965a8c
3
+ size 50381351
epoch_4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14c127fa74f3323487820fd0244c54e36d8aabb227cda46bd4ed893b8dc8a1ba
3
+ size 50381351
epoch_5.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:364b81cc7e70bdce5eeb673666131dd548fb3c978b7c4372dd547e05f0a08ddf
3
+ size 50381351
epoch_6.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0641bde5702ec4e4ac71c0caa259e5c20a7e02775e6a72dc271e0384a55cfdbc
3
+ size 50381351
epoch_7.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50a9a8c08949b99cd3345144e63084551b0ab7e8a932c6087242d913da717876
3
+ size 50381351
epoch_8.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d758bc52f074c30de95474b0ca343f4a7c03e2690cc0d06d4a8d1f73f8e06bf4
3
+ size 50381351
epoch_9.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:450502b50e8aa98a90855b6bfbdc4b2abdf113ff6d8a48c6ba008e1a13af9929
3
+ size 50381351
model_config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ arch:
2
+ H_cycles: 2
3
+ L_cycles: 6
4
+ bptt: true
5
+ forward_dtype: bfloat16
6
+ head_dim: 64
7
+ hidden_size: 512
8
+ intermediate_size: 2048
9
+ name: hrm@HRM
10
+ norm_eps: 1.0e-06
11
+ num_layers: 2
12
+ rope_theta: 10000.0
13
+ beta1: 0.9
14
+ beta2: 0.95
15
+ cycles_per_data: 16
16
+ data:
17
+ augment: true
18
+ dataset_name: /sg-pretrain/datasets/sudoku-extreme-1k
19
+ name: sudoku
20
+ repeat: 200
21
+ ema: 0.999
22
+ epochs: 20
23
+ local_batch_size: 96
24
+ log_interval: 5
25
+ lr: 0.0001
26
+ lr_min_ratio: 1.0
27
+ lr_warmup_steps: 2000
28
+ weight_decay: 1.0