diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..8341b9ca2073e06dc38dd8deedecd4645ac098b7 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,14 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +stage1-gbs180/fsdp2_step_10000/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +stage1-gbs180/fsdp2_step_10000/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +stage1-gbs180/fsdp2_step_10000/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +stage1-gbs180/fsdp2_step_10000/.metadata filter=lfs diff=lfs merge=lfs -text +stage1-gbs180/fsdp2_step_10000/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +stage1-gbs180/fsdp2_step_10000/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +stage1-gbs180/fsdp2_step_10000/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +stage1-gbs180/fsdp2_step_10000/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +stage1-gbs180/fsdp2_step_10000/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +stage1-gbs180/fsdp2_step_15000/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +stage1-gbs180/fsdp2_step_15000/__4_0.distcp filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bf616542003d50cea60051053cd3171f01951b55 --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +# KoHRM-Text-1.4B Raw Checkpoints + +Raw FSDP2 checkpoints for training resume. These files are intentionally separated from the main model repo because Hugging Face may flag DCP shard files as unsafe for normal model loading. + +- stage: stage1-gbs180 +- available steps: 10000, 15000, 20000, 25000 +- main safe model repo: LLM-OS-Models/KoHRM-Text-1.4B diff --git a/stage1-gbs180/all_config.yaml b/stage1-gbs180/all_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7ee23c70b8ed4539c47c0de07d0c87df404e3373 --- /dev/null +++ b/stage1-gbs180/all_config.yaml @@ -0,0 +1,45 @@ +arch: + H_cycles: 2 + H_override: {} + L_cycles: 3 + bp_max_steps: 5 + bp_warmup_ratio: 0.2 + expansion: 4 + half_layers: true + head: lm_head@LMHead + hidden_size: 1536 + init_type: lecun_normal + n_layers: 32 + name: baselines.hrm_nocarry_bp_warmup@HierarchicalReasoningModel + norm_eps: 1.0e-06 + norm_type: pre + num_heads: 12 + pos_emb_type: rope + rope_theta: 10000.0 +beta1: 0.9 +beta2: 0.95 +checkpoint_interval: 1 +checkpoint_path: /home/work/.data/hrm_text_checkpoints/KoHRM-Text-1.4B-stage1-hrm-fastcap-gbs180 +checkpoint_step_interval: 5000 +data: + path: /home/work/.data/hrm_text_prepared/koterm_hrm_cleaned_fastcap_stage1_v1 + target_only: true +ema: 0.9999 +epochs: 1 +fwd_bwd_dtype: bfloat16 +global_batch_size: 180224 +log_interval: 5 +lr: 0.00022 +lr_min_ratio: 1.0 +lr_warmup_steps: 2000 +project_name: KoHRM-Text +resume_epoch: null +resume_from: /home/work/.data/hrm_text_checkpoints/KoHRM-Text-1.4B-stage0b-debug-launch2 +resume_step: null +resume_step_offset: 7765 +run_name: KoHRM-Text-1.4B-stage1-hrm-fastcap-gbs180 +seed: 0 +skip_batches: 0 +total_steps_override: 88522 +weight_decay: 0.1 +weights_only_resume_from_ema: false diff --git a/stage1-gbs180/carry_step_10000.0.pt b/stage1-gbs180/carry_step_10000.0.pt new file mode 100644 index 0000000000000000000000000000000000000000..af8bdb42994a3ed13cbb9ddb08354aeb37ae21c0 --- /dev/null +++ b/stage1-gbs180/carry_step_10000.0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dab378260628dcc41db9ff1f116901964966061288558610b97b997f39543720 +size 1327 diff --git a/stage1-gbs180/carry_step_10000.1.pt b/stage1-gbs180/carry_step_10000.1.pt new file mode 100644 index 0000000000000000000000000000000000000000..a0b99af5a8c5da6346e28d38a6b023ef11eaffb2 --- /dev/null +++ b/stage1-gbs180/carry_step_10000.1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:375b8f9a8608bf736738aa99fa81465f9b12836adf1e0732eb961db413a5608f +size 1327 diff --git a/stage1-gbs180/carry_step_10000.2.pt b/stage1-gbs180/carry_step_10000.2.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4d089dcea1c0f78c565300f81efe17baf7d9cfd --- /dev/null +++ b/stage1-gbs180/carry_step_10000.2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f60eb95cd0cfa79ee73bc0c6c833bd174f67fd5e03500f0878290c7697235d1 +size 1327 diff --git a/stage1-gbs180/carry_step_10000.3.pt b/stage1-gbs180/carry_step_10000.3.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c7b754b3bbb15d5b181db68d117a33ac03fdded --- /dev/null +++ b/stage1-gbs180/carry_step_10000.3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5ba80edc79155bd8ebdb04f0e3f181400566db20cdc2daf33af4c227852a39a +size 1327 diff --git a/stage1-gbs180/carry_step_10000.4.pt b/stage1-gbs180/carry_step_10000.4.pt new file mode 100644 index 0000000000000000000000000000000000000000..e9dc2ff85c97b83fd4e5a0ba4f15d963b538c914 --- /dev/null +++ b/stage1-gbs180/carry_step_10000.4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:311783bef37a35511f2013a0b5c16214b1bef8f6377d63725795025f0922c2e5 +size 1327 diff --git a/stage1-gbs180/carry_step_10000.5.pt b/stage1-gbs180/carry_step_10000.5.pt new file mode 100644 index 0000000000000000000000000000000000000000..7443c316c4027f247b49f6dc63610372cd84f953 --- /dev/null +++ b/stage1-gbs180/carry_step_10000.5.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cf032c4cf2fb25122fbd87e8f7af1993778889c1ceffc4034b14fe27375dcce +size 1327 diff --git a/stage1-gbs180/carry_step_10000.6.pt b/stage1-gbs180/carry_step_10000.6.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f7a20884a2a35c503e7e8953aa4ec7a5bd8d1c2 --- /dev/null +++ b/stage1-gbs180/carry_step_10000.6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca3f394becf9931201d61bb85ae2fcccab4ceea8ed7e563b96b71cac8b881d5e +size 1327 diff --git a/stage1-gbs180/carry_step_10000.7.pt b/stage1-gbs180/carry_step_10000.7.pt new file mode 100644 index 0000000000000000000000000000000000000000..8b6a938c62ae4a24b870fa073d69abc2c12d6aaa --- /dev/null +++ b/stage1-gbs180/carry_step_10000.7.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63855aa3d1338c0539f03934a44e379043e26efc0c6dfef41e84ba9c50e0b62d +size 1327 diff --git a/stage1-gbs180/carry_step_15000.0.pt b/stage1-gbs180/carry_step_15000.0.pt new file mode 100644 index 0000000000000000000000000000000000000000..fad798438643609d13208c081ebca8d8f28a77fa --- /dev/null +++ b/stage1-gbs180/carry_step_15000.0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95bdeec6bd68ee8d6f79bb2401ba9fd14426d14e964147647da3fc83b0876caf +size 1327 diff --git a/stage1-gbs180/carry_step_15000.1.pt b/stage1-gbs180/carry_step_15000.1.pt new file mode 100644 index 0000000000000000000000000000000000000000..77ae4a1f14cc6377b4232bc21e424c7671e3e7c4 --- /dev/null +++ b/stage1-gbs180/carry_step_15000.1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22ab8b08877e42887a6b543a5a36c3d8e90d56254ee2a9d57d32b18239574026 +size 1327 diff --git a/stage1-gbs180/carry_step_15000.2.pt b/stage1-gbs180/carry_step_15000.2.pt new file mode 100644 index 0000000000000000000000000000000000000000..1fceb0eb4c6c9b9633121402bb265ba5d113e2e7 --- /dev/null +++ b/stage1-gbs180/carry_step_15000.2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83c145abb6ef679c699dc6e1879d708252f6829f28902049b5c017f0eb90f07a +size 1327 diff --git a/stage1-gbs180/carry_step_15000.3.pt b/stage1-gbs180/carry_step_15000.3.pt new file mode 100644 index 0000000000000000000000000000000000000000..22be01194653aea0f4e3dafb60abe9b15643d6e0 --- /dev/null +++ b/stage1-gbs180/carry_step_15000.3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e6ebad8c38062402800dc6cd15b825d601a1c41538c4907856163b84f84b4cf +size 1327 diff --git a/stage1-gbs180/carry_step_15000.4.pt b/stage1-gbs180/carry_step_15000.4.pt new file mode 100644 index 0000000000000000000000000000000000000000..876a9d9eab4abcbe3e888226ec50adcb253d0c13 --- /dev/null +++ b/stage1-gbs180/carry_step_15000.4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fd8d6eb70214f001bfc9ac8b6362ea69f2d4fe91596bf021d07db9e01ca979c +size 1327 diff --git a/stage1-gbs180/carry_step_15000.5.pt b/stage1-gbs180/carry_step_15000.5.pt new file mode 100644 index 0000000000000000000000000000000000000000..d1592357c354669347f800c94f61b5d726750e89 --- /dev/null +++ b/stage1-gbs180/carry_step_15000.5.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01fc6b242e53fe50d98dcc67dc118557f231a2c3e795a9b8ed60159d15e28b4c +size 1327 diff --git a/stage1-gbs180/carry_step_15000.6.pt b/stage1-gbs180/carry_step_15000.6.pt new file mode 100644 index 0000000000000000000000000000000000000000..1e75ff2b4b0e69db6f10dc51be8d07e9cc65b3ea --- /dev/null +++ b/stage1-gbs180/carry_step_15000.6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce884de03a2d9e9aeabcbe244b75f275a222b05bccd2224eb02607b238d1648f +size 1327 diff --git a/stage1-gbs180/carry_step_15000.7.pt b/stage1-gbs180/carry_step_15000.7.pt new file mode 100644 index 0000000000000000000000000000000000000000..7692158d40bf2d26eaccae945cf845d546796a8c --- /dev/null +++ b/stage1-gbs180/carry_step_15000.7.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cb6a2288fe6135e797825f00eb6d5dafc6883c2d2c7d2473b7c22382b82ba50 +size 1327 diff --git a/stage1-gbs180/carry_step_20000.0.pt b/stage1-gbs180/carry_step_20000.0.pt new file mode 100644 index 0000000000000000000000000000000000000000..3795d6422c177503c6dde39af9855bb75247fb6f --- /dev/null +++ b/stage1-gbs180/carry_step_20000.0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b364353e1bb5f14b6bacd5ca66a9dfa51d55d90958261fc37744a9fcd87b1b1f +size 1327 diff --git a/stage1-gbs180/carry_step_20000.1.pt b/stage1-gbs180/carry_step_20000.1.pt new file mode 100644 index 0000000000000000000000000000000000000000..601e518ca024de3d4cd2378b1347b38f92466baa --- /dev/null +++ b/stage1-gbs180/carry_step_20000.1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a2cfe87fdcc18d9901ae86f16294c7330695c96114f110715acfa9847a11995 +size 1327 diff --git a/stage1-gbs180/carry_step_20000.2.pt b/stage1-gbs180/carry_step_20000.2.pt new file mode 100644 index 0000000000000000000000000000000000000000..2439eb55125c8fc3970cdadb4f93a7c5a32d0b00 --- /dev/null +++ b/stage1-gbs180/carry_step_20000.2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f9d046cae2239607ed4b00aa2032e086d7302767c294946ecdaf94707001ec6 +size 1327 diff --git a/stage1-gbs180/carry_step_20000.3.pt b/stage1-gbs180/carry_step_20000.3.pt new file mode 100644 index 0000000000000000000000000000000000000000..7669a6c20081f0c34cd630672060f21644ce1877 --- /dev/null +++ b/stage1-gbs180/carry_step_20000.3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2f1ffa92c171b180407bfe45c3c2931ba3c63ba8b0bc373431053153e6580b4 +size 1327 diff --git a/stage1-gbs180/carry_step_20000.4.pt b/stage1-gbs180/carry_step_20000.4.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ed5791e1845820a3e232c857d9561fa8b8588db --- /dev/null +++ b/stage1-gbs180/carry_step_20000.4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:866375fa08b1fa67df79a368d47ed7e67dfbfd2ba3c8cb76fbabff90502cba21 +size 1327 diff --git a/stage1-gbs180/carry_step_20000.5.pt b/stage1-gbs180/carry_step_20000.5.pt new file mode 100644 index 0000000000000000000000000000000000000000..d3bb4db1544f918c2fcfe143b991ef8fb153a58c --- /dev/null +++ b/stage1-gbs180/carry_step_20000.5.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f998a8127b4b5a567669d2a0b37743a41b24fea04ad6c28a9dc924e13483719f +size 1327 diff --git a/stage1-gbs180/carry_step_20000.6.pt b/stage1-gbs180/carry_step_20000.6.pt new file mode 100644 index 0000000000000000000000000000000000000000..52bbbcc725bfddd6ed30f660c5ec1dc5155600d9 --- /dev/null +++ b/stage1-gbs180/carry_step_20000.6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0b37c0b444f6e20545af80590b48f2e51a3d4990b14f86753cf69b9eeb61372 +size 1327 diff --git a/stage1-gbs180/carry_step_20000.7.pt b/stage1-gbs180/carry_step_20000.7.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc879a30e070d8391f81f432708ab68ad08f28a3 --- /dev/null +++ b/stage1-gbs180/carry_step_20000.7.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f074d7c528d0eceb615a0a7e6453941f3fba69f4f40ea955af3795ebb2b3e38d +size 1327 diff --git a/stage1-gbs180/carry_step_25000.0.pt b/stage1-gbs180/carry_step_25000.0.pt new file mode 100644 index 0000000000000000000000000000000000000000..f67112e416e19d24b240fe4675b2b18beaa8e930 --- /dev/null +++ b/stage1-gbs180/carry_step_25000.0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00f739b64eb13b0c443aee82d76a5fc865c82b0e6e371d1f9c938a3c71c0a643 +size 1327 diff --git a/stage1-gbs180/carry_step_25000.1.pt b/stage1-gbs180/carry_step_25000.1.pt new file mode 100644 index 0000000000000000000000000000000000000000..1542996a40b7e599e363b24b46e8b382a1368f83 --- /dev/null +++ b/stage1-gbs180/carry_step_25000.1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43ce1f40b823c0f132e496e72f5eb4116d53725bdfb4b84adbe7c3a1873950e7 +size 1327 diff --git a/stage1-gbs180/carry_step_25000.2.pt b/stage1-gbs180/carry_step_25000.2.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e3d77384771c9655dc481196f2cf1ca5984ffa3 --- /dev/null +++ b/stage1-gbs180/carry_step_25000.2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d833323b299383830482a8c19fac63bb61dfe3206535bd206d8f7a246b4ba7a4 +size 1327 diff --git a/stage1-gbs180/carry_step_25000.3.pt b/stage1-gbs180/carry_step_25000.3.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4018445ec0bf64c7e69a367729858d083bba4dd --- /dev/null +++ b/stage1-gbs180/carry_step_25000.3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bfa8a4e8a87054853886cafcd26959e67f6f6298e381ba58f0cf948ae6dabb1 +size 1327 diff --git a/stage1-gbs180/carry_step_25000.4.pt b/stage1-gbs180/carry_step_25000.4.pt new file mode 100644 index 0000000000000000000000000000000000000000..51f1d8963d0d0a55867dc93de8e5015f665bdef2 --- /dev/null +++ b/stage1-gbs180/carry_step_25000.4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60e0cd377accb93db1732a008054d109b77b5d66e3525ab84fa48d6ba462696e +size 1327 diff --git a/stage1-gbs180/carry_step_25000.5.pt b/stage1-gbs180/carry_step_25000.5.pt new file mode 100644 index 0000000000000000000000000000000000000000..500ffbdb0ffc8f3f7cf5a85fed7e4d968b6f98a7 --- /dev/null +++ b/stage1-gbs180/carry_step_25000.5.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caebc4381f62ce2a9194f5f6c6ad30df7066ecec8e319c471f247607d7934e0f +size 1327 diff --git a/stage1-gbs180/carry_step_25000.6.pt b/stage1-gbs180/carry_step_25000.6.pt new file mode 100644 index 0000000000000000000000000000000000000000..70ee8338ca926645a8f4b587689fb2f25f8b4156 --- /dev/null +++ b/stage1-gbs180/carry_step_25000.6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:333014deed66cb1fe60680d4dbed8a77ffa24cd21410d4e58eb099d907b6f397 +size 1327 diff --git a/stage1-gbs180/carry_step_25000.7.pt b/stage1-gbs180/carry_step_25000.7.pt new file mode 100644 index 0000000000000000000000000000000000000000..807b7edc900720094180444e7414a96d083f920b --- /dev/null +++ b/stage1-gbs180/carry_step_25000.7.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9a55d28492838d1b18c75453d457118bd9a650ce282814198b589107d41e924 +size 1327 diff --git a/stage1-gbs180/fsdp2_step_10000/.metadata b/stage1-gbs180/fsdp2_step_10000/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..fe235d21051d307cff2f9d3b3f14fdf2a273092a --- /dev/null +++ b/stage1-gbs180/fsdp2_step_10000/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0e8493667e421add1f4f2d485ebb5763bdad456c9236bf8ee8d6dd527a6f8c9 +size 983802 diff --git a/stage1-gbs180/fsdp2_step_10000/__0_0.distcp b/stage1-gbs180/fsdp2_step_10000/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..f4a6c7bde06891510bf83e1843701d8981df53aa --- /dev/null +++ b/stage1-gbs180/fsdp2_step_10000/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87e9846b8e74c33006787d96dbd8b7cdeabb0aa54bba763a766a8ce0cd26b67a +size 2769065329 diff --git a/stage1-gbs180/fsdp2_step_10000/__1_0.distcp b/stage1-gbs180/fsdp2_step_10000/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..cbb39cf99ad4958801e704390f6503877d7c690b --- /dev/null +++ b/stage1-gbs180/fsdp2_step_10000/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bac20d0adde97f47009e1376dd39574c658f5d817a42842dc279909daa0d4e2f +size 2769090643 diff --git a/stage1-gbs180/fsdp2_step_10000/__2_0.distcp b/stage1-gbs180/fsdp2_step_10000/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..7a854bfd75a32e438c25cf1b9c45a795dd4ef9b9 --- /dev/null +++ b/stage1-gbs180/fsdp2_step_10000/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16cf1de977fc30582f871cc33911ead3b7c025c9ebc19cf3d70f8c91e368c861 +size 2769090643 diff --git a/stage1-gbs180/fsdp2_step_10000/__3_0.distcp b/stage1-gbs180/fsdp2_step_10000/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..a6027dc6d4ad249b4951e3ebcfe072cbb0fa3c4b --- /dev/null +++ b/stage1-gbs180/fsdp2_step_10000/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ab8447aec711c830a0c977085dbd86b6b4c662afe98fc660634d5ee47e28326 +size 2769090643 diff --git a/stage1-gbs180/fsdp2_step_10000/__4_0.distcp b/stage1-gbs180/fsdp2_step_10000/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..4ff68ae05a933d097e60279288561bec26858016 --- /dev/null +++ b/stage1-gbs180/fsdp2_step_10000/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:413d51d51c1c191f6a5ceb5881f36c6db0ca206fc1deb22602aa3dcde87b5b81 +size 2769090643 diff --git a/stage1-gbs180/fsdp2_step_10000/__5_0.distcp b/stage1-gbs180/fsdp2_step_10000/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..2e1fd6ea0105f3e3a661f53304dcc057792831fa --- /dev/null +++ b/stage1-gbs180/fsdp2_step_10000/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da4fd0c186ddd268de0e889eb9d67afc96f2895ca9e9ad4f7a2a99b664178880 +size 2769090643 diff --git a/stage1-gbs180/fsdp2_step_10000/__6_0.distcp b/stage1-gbs180/fsdp2_step_10000/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..d6d29684532e473d839b27264adccb17a19f84bf --- /dev/null +++ b/stage1-gbs180/fsdp2_step_10000/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea16f17f202ad52a9c49e8ab5c61e02b84d6e78b14e6a593dc68d689a60c7b12 +size 2769091588 diff --git a/stage1-gbs180/fsdp2_step_10000/__7_0.distcp b/stage1-gbs180/fsdp2_step_10000/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..59ce163cecc2cec8dc04753516e6bb0bed28406c --- /dev/null +++ b/stage1-gbs180/fsdp2_step_10000/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2745c90ed50dafb5eaf12bf7f2c62ced6752cf5e137350a515593fd92f906bd +size 2769098756 diff --git a/stage1-gbs180/fsdp2_step_15000/__4_0.distcp b/stage1-gbs180/fsdp2_step_15000/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..d7470846698ff1c8372c771f8a16af70abfcfc8d --- /dev/null +++ b/stage1-gbs180/fsdp2_step_15000/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15c61709ce968c2a3dadca65cddcf6e4f96d1f4323f7a5ee050b1dbada40fd14 +size 2769090643 diff --git a/stage1-gbs180/fsdp2_step_15000/__6_0.distcp b/stage1-gbs180/fsdp2_step_15000/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..d78e86752daf38edcf0cc29d90015950421abd76 --- /dev/null +++ b/stage1-gbs180/fsdp2_step_15000/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3efb3df29979c73a4cd33cb9bdd346e24e9df9f0d87a8a0539111190e4b944d4 +size 2769091588 diff --git a/stage1-gbs180/step_10000_info.json b/stage1-gbs180/step_10000_info.json new file mode 100644 index 0000000000000000000000000000000000000000..1b5bf7b23e636f72e76624e13582080b003d7a36 --- /dev/null +++ b/stage1-gbs180/step_10000_info.json @@ -0,0 +1,8 @@ +{ + "tag": "step_10000", + "global_step": 10000, + "stage_start_step": 7765, + "skip_batches_hint": 2235, + "data_path": "/home/work/.data/hrm_text_prepared/koterm_hrm_cleaned_fastcap_stage1_v1", + "global_batch_size": 180224 +} \ No newline at end of file diff --git a/stage1-gbs180/step_15000_info.json b/stage1-gbs180/step_15000_info.json new file mode 100644 index 0000000000000000000000000000000000000000..118c6ef8837e5a7720c916365c68fb2a774973b3 --- /dev/null +++ b/stage1-gbs180/step_15000_info.json @@ -0,0 +1,8 @@ +{ + "tag": "step_15000", + "global_step": 15000, + "stage_start_step": 7765, + "skip_batches_hint": 7235, + "data_path": "/home/work/.data/hrm_text_prepared/koterm_hrm_cleaned_fastcap_stage1_v1", + "global_batch_size": 180224 +} \ No newline at end of file diff --git a/stage1-gbs180/step_20000_info.json b/stage1-gbs180/step_20000_info.json new file mode 100644 index 0000000000000000000000000000000000000000..cb2e653f6abf0ae0a3e918bc071fa4f93896f136 --- /dev/null +++ b/stage1-gbs180/step_20000_info.json @@ -0,0 +1,8 @@ +{ + "tag": "step_20000", + "global_step": 20000, + "stage_start_step": 7765, + "skip_batches_hint": 12235, + "data_path": "/home/work/.data/hrm_text_prepared/koterm_hrm_cleaned_fastcap_stage1_v1", + "global_batch_size": 180224 +} \ No newline at end of file diff --git a/stage1-gbs180/step_25000_info.json b/stage1-gbs180/step_25000_info.json new file mode 100644 index 0000000000000000000000000000000000000000..9f524ac673f4e938344b1dc05aba0aaf7ab3d858 --- /dev/null +++ b/stage1-gbs180/step_25000_info.json @@ -0,0 +1,8 @@ +{ + "tag": "step_25000", + "global_step": 25000, + "stage_start_step": 7765, + "skip_batches_hint": 17235, + "data_path": "/home/work/.data/hrm_text_prepared/koterm_hrm_cleaned_fastcap_stage1_v1", + "global_batch_size": 180224 +} \ No newline at end of file diff --git a/stage1-gbs180/train_metadata.yaml b/stage1-gbs180/train_metadata.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8997939ade9082f6dc03e0fcd1886324dba5fbee --- /dev/null +++ b/stage1-gbs180/train_metadata.yaml @@ -0,0 +1,13 @@ +max_seq_len: 4096 +tokenizer_info: + boq: <|im_start|> + condition_mapping: + cot: <|object_ref_end|> + direct: <|object_ref_start|> + noisy: <|quad_start|> + synth: <|quad_end|> + eoa: <|box_end|> + eoq: <|im_end|> + tokenizer_path: /home/work/.data/huggingface/trained_tokenizers/hrm-ko-terminal-131k-v1/tokenizer.json +total_length: 14554291763 +vocab_size: 131072