IvanHU commited on
Commit
b4995a5
·
verified ·
1 Parent(s): 15fdee4

Upload folder using huggingface_hub

Browse files
Files changed (22) hide show
  1. .gitattributes +17 -0
  2. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata +3 -0
  3. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp +3 -0
  4. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp +3 -0
  5. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp +3 -0
  6. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp +3 -0
  7. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp +3 -0
  8. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp +3 -0
  9. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp +3 -0
  10. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp +3 -0
  11. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp +3 -0
  12. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp +3 -0
  13. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp +3 -0
  14. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp +3 -0
  15. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp +3 -0
  16. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp +3 -0
  17. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp +3 -0
  18. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp +3 -0
  19. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt +3 -0
  20. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json +1 -0
  21. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt +1 -0
  22. model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/linked_runs.txt +1 -0
.gitattributes CHANGED
@@ -746,3 +746,20 @@ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3
746
  model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
747
  model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
748
  model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
746
  model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
747
  model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
748
  model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
749
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata filter=lfs diff=lfs merge=lfs -text
750
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
751
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp filter=lfs diff=lfs merge=lfs -text
752
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
753
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp filter=lfs diff=lfs merge=lfs -text
754
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
755
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp filter=lfs diff=lfs merge=lfs -text
756
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
757
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp filter=lfs diff=lfs merge=lfs -text
758
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
759
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp filter=lfs diff=lfs merge=lfs -text
760
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
761
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp filter=lfs diff=lfs merge=lfs -text
762
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
763
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
764
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
765
+ model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:463155346f8676814d1b2d9e23b79813e43782f56bd828f5ae4aa2b2cfee5485
3
+ size 1419079
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6d0e2fab6f35a384e98ee210b84fad534e3adfbbdad0f54b7274a3501304cb7
3
+ size 506944597
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91e50e4e5bdbfac6beffeac0378a2b732153dad702f2a250f57bbcceb6505a44
3
+ size 507014665
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a009e9f793f372d84cc94465ca91d227da66dd35883df7838afd187c1f228eb
3
+ size 473253992
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02077c953c36f8bcdde15e55b792e65078735dafb5d1867162d77e0f327d8344
3
+ size 473251834
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49af1dc63003a83c7b7dac06060ee24a055faf4aba57f35ab2ed4e379ebce2f8
3
+ size 473356132
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84092ea2770b20ea6891434330e5a8676bf02d6c13efa3197d5099aa0e2ef977
3
+ size 473370325
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e7375507ab818019b26d95bc3ef55e0d3e102b968c69b201dd597131d15a14c
3
+ size 472932788
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e821c61471d5241e21d2cb2e67ff3a9ffdf20c74af34028186739c55902f328
3
+ size 472972213
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a8e49685c58e21c70d3abd6ef5ed8ce379e2413355b248a6784c4c77f740c42
3
+ size 473046656
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abfd4d769c7d0e228e91d8d9e874a3de329c054fa2cdb527b805a43afc908e6e
3
+ size 473066092
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7331d45324e8066e6d13653c1742315bd50faa3432d209bba2373e9e114a8388
3
+ size 506752597
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f6c6a48455d9b3479d81bfd61801115c19c9613fb551123398a7e43e22b5283
3
+ size 505662160
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b231a05f7c1e8cfdb3e073733bb73506cc902c88a6351202d23332831af23d5
3
+ size 473412140
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08ae804cc04eaa4057c252860023aae58f8fca4b6ce5ac9c375abb8d25525f53
3
+ size 472714879
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ded698864091cdc2161af51c18b2aa19d5f770bc03ca842482c5b1f2c754c80
3
+ size 472636339
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8eca8e74b8e7f30952bdd5eaf056acbf59ad75849b58ea6593b23306cfa61437
3
+ size 472714879
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d8295d4bc921d746022f1f31a7402b14671654f3e6d6ad534a6f4bf8c15c7d9
3
+ size 19239
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sharded_backend": "torch_dist", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 4768
model/dev-muon-mamba_moe-0.5b-q16-kv2-hybrid0.16-ep-8-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/linked_runs.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 2025.05.27-00.54.10