diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..4aa333be61573b04f01a471a27bf6212007e24c1 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,26 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +ICL/RL/trl_source/trl/trainer/__pycache__/grpo_trainer.cpython-313.pyc filter=lfs diff=lfs merge=lfs -text +ICL/RL/plots/overview.png filter=lfs diff=lfs merge=lfs -text +ICL/RL/trl_source/assets/logo-light.png filter=lfs diff=lfs merge=lfs -text +ICL/SFT_new/eval_results/eval_val_20260403_161423.json filter=lfs diff=lfs merge=lfs -text +ICL/DAPO/verl-recipe/qat/img/image2.png filter=lfs diff=lfs merge=lfs -text +ICL/DAPO/verl-recipe/qat/img/image4.png filter=lfs diff=lfs merge=lfs -text +ICL/DAPO/verl-recipe/qat/img/image3.png filter=lfs diff=lfs merge=lfs -text +ICL/LV/dataset_inspect.schemas.json filter=lfs diff=lfs merge=lfs -text +ICL/dataset_inspect.schemas.json filter=lfs diff=lfs merge=lfs -text +ICL/DAPO/verl-recipe/qat/img/image1.png filter=lfs diff=lfs merge=lfs -text +ICL/DAPO/verl-recipe/flowrl/figures/flowrl.png filter=lfs diff=lfs merge=lfs -text +ICL/SFT_new/eval_results/eval_val_20260407_053929.json filter=lfs diff=lfs merge=lfs -text +ICL/SFT_new/eval_results/eval_val_20260406_040033.json filter=lfs diff=lfs merge=lfs -text +ICL/SFT_new/eval_results/eval_val_latest.json filter=lfs diff=lfs merge=lfs -text +ICL/SFT_new/eval_results/eval_val_20260404_135535.json filter=lfs diff=lfs merge=lfs -text +ICL/SFT_new/eval_results/eval_val_20260403_164141.json filter=lfs diff=lfs merge=lfs -text +ICL/RL/plots/reward.png filter=lfs diff=lfs merge=lfs -text +ICL/RL/plots/reward_detail.png filter=lfs diff=lfs merge=lfs -text +ICL/RL/plots/step_time.png filter=lfs diff=lfs merge=lfs -text +ICL/RL/plots/kl_divergence.png filter=lfs diff=lfs merge=lfs -text +ICL/RL/plots/completion_terminated_length.png filter=lfs diff=lfs merge=lfs -text +ICL/RL/plots/loss.png filter=lfs diff=lfs merge=lfs -text +ICL/SFT_new/eval_results/eval_val_20260403_163019.json filter=lfs diff=lfs merge=lfs -text diff --git a/ICL/DAPO/verl-recipe/flowrl/figures/flowrl.png b/ICL/DAPO/verl-recipe/flowrl/figures/flowrl.png new file mode 100644 index 0000000000000000000000000000000000000000..de2b2d491645971c14967df484fa71c4ce4fd7f5 --- /dev/null +++ b/ICL/DAPO/verl-recipe/flowrl/figures/flowrl.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb4e4a89e3b39c98f3729f6aed6d3fb11c0608c702684a924923d63b9ad76ac7 +size 716022 diff --git a/ICL/DAPO/verl-recipe/qat/img/image1.png b/ICL/DAPO/verl-recipe/qat/img/image1.png new file mode 100644 index 0000000000000000000000000000000000000000..a0c6fa6656030592020b84b24e2c31b5f890432b --- /dev/null +++ b/ICL/DAPO/verl-recipe/qat/img/image1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85211d1b8743c302c523790b14e99327fae43204ae23b3a43f5328d7c0e14044 +size 551786 diff --git a/ICL/DAPO/verl-recipe/qat/img/image2.png b/ICL/DAPO/verl-recipe/qat/img/image2.png new file mode 100644 index 0000000000000000000000000000000000000000..f0c8b8766884e7251792966c2e125770ec5f0bde --- /dev/null +++ b/ICL/DAPO/verl-recipe/qat/img/image2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d335f069b8f23ede02a54d6e0a4dbb6c290ae01313b18c6dae6c5f13ab795ab0 +size 698345 diff --git a/ICL/DAPO/verl-recipe/qat/img/image3.png b/ICL/DAPO/verl-recipe/qat/img/image3.png new file mode 100644 index 0000000000000000000000000000000000000000..34ea14f71a4cb21af8f1cb16fbd1f5efc3311ac9 --- /dev/null +++ b/ICL/DAPO/verl-recipe/qat/img/image3.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19399080452fe0f79e076a575af8ed579aeaf8b5e352ca6eacb8b11f5d4828ac +size 314645 diff --git a/ICL/DAPO/verl-recipe/qat/img/image4.png b/ICL/DAPO/verl-recipe/qat/img/image4.png new file mode 100644 index 0000000000000000000000000000000000000000..3b29851b0af70c2957700a105e1f940401765c39 --- /dev/null +++ b/ICL/DAPO/verl-recipe/qat/img/image4.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37b33acbf56aaeaf8a9be336cc916ff139c62544e998f6590f7f2defc2bfb51b +size 681500 diff --git a/ICL/LV/dataset_inspect.schemas.json b/ICL/LV/dataset_inspect.schemas.json new file mode 100644 index 0000000000000000000000000000000000000000..8cdc6d6b64506a045e42289a8498008efe2ab40c --- /dev/null +++ b/ICL/LV/dataset_inspect.schemas.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a66fb74f3220136d8910a7287bd70b7d3dfe9a9038c0e94e1f078598215eae22 +size 131773150 diff --git a/ICL/RL/plots/completion_terminated_length.png b/ICL/RL/plots/completion_terminated_length.png new file mode 100644 index 0000000000000000000000000000000000000000..7136cb5b475e92b263b95f8c204c6d8564f887bf --- /dev/null +++ b/ICL/RL/plots/completion_terminated_length.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae2b5c1cc91c4296910687bd0d6f90a32b6ba5d448a1041105415ee640870cc9 +size 116555 diff --git a/ICL/RL/plots/kl_divergence.png b/ICL/RL/plots/kl_divergence.png new file mode 100644 index 0000000000000000000000000000000000000000..f4cf13a40c67dce0a1653caf5951eeef929f5c42 --- /dev/null +++ b/ICL/RL/plots/kl_divergence.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc398203a7a8b1edc0779f3148224e2b0783c2469d2dcd488ad297ac1f2abaf1 +size 101236 diff --git a/ICL/RL/plots/loss.png b/ICL/RL/plots/loss.png new file mode 100644 index 0000000000000000000000000000000000000000..a0aac237aa1ad1f38447dc3024ee15f1557d6823 --- /dev/null +++ b/ICL/RL/plots/loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25d8f7920e3af98e7d2e3b63e33c408ce3f5be9088a337d660c8a0ce25a5c6ab +size 110219 diff --git a/ICL/RL/plots/overview.png b/ICL/RL/plots/overview.png new file mode 100644 index 0000000000000000000000000000000000000000..e4adbe0d8eaa18a891d3499d33863006609ce028 --- /dev/null +++ b/ICL/RL/plots/overview.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:745b49cc61d61a0b3700d9f1ade781704a442ce696222543b1ac0636e4fa66bf +size 487029 diff --git a/ICL/RL/plots/reward.png b/ICL/RL/plots/reward.png new file mode 100644 index 0000000000000000000000000000000000000000..cb7546ea68b3d56074e3a3a809e3110f21679046 --- /dev/null +++ b/ICL/RL/plots/reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c95f7d8de232d69c0b5016caffd7eff9797a4000dff886ffa4633d775cc640bd +size 116369 diff --git a/ICL/RL/plots/reward_detail.png b/ICL/RL/plots/reward_detail.png new file mode 100644 index 0000000000000000000000000000000000000000..d576363687a051a9f6b181a0e70d6f9c81b131b3 --- /dev/null +++ b/ICL/RL/plots/reward_detail.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9eb0b6885b08bf32c7a82c62f82471f02424959a818518b3d2c8a02f078375f8 +size 112861 diff --git a/ICL/RL/plots/step_time.png b/ICL/RL/plots/step_time.png new file mode 100644 index 0000000000000000000000000000000000000000..c5c69b07e317ff5aa32570df32f5aeb775167937 --- /dev/null +++ b/ICL/RL/plots/step_time.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4779915c7c5548a8bbe0fe6c27dac0140bdcdc9aeb8105395e680589f40db81b +size 100086 diff --git a/ICL/RL/trl_source/assets/logo-light.png b/ICL/RL/trl_source/assets/logo-light.png new file mode 100644 index 0000000000000000000000000000000000000000..520d8d1765456c213c47cb02fa2bfa70bd66abdf --- /dev/null +++ b/ICL/RL/trl_source/assets/logo-light.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:902ecba8e7b1d2fb4621f3ca38ff0edb968de38f4061d8743b5a4965d3af1f33 +size 137446 diff --git a/ICL/RL/trl_source/trl/trainer/__pycache__/grpo_trainer.cpython-313.pyc b/ICL/RL/trl_source/trl/trainer/__pycache__/grpo_trainer.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..547f1d6ce3cb479315984648c30abcc99fe34512 --- /dev/null +++ b/ICL/RL/trl_source/trl/trainer/__pycache__/grpo_trainer.cpython-313.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf8a45c462259c95e85b528d08312f573e2f8f2b49ccba4520c19a922a6f86e3 +size 111750 diff --git a/ICL/SFT_new/eval_results/eval_val_20260403_161423.json b/ICL/SFT_new/eval_results/eval_val_20260403_161423.json new file mode 100644 index 0000000000000000000000000000000000000000..a3f86e5ab239e69dca162ca2f3f699f039ddba72 --- /dev/null +++ b/ICL/SFT_new/eval_results/eval_val_20260403_161423.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9115add0560bcaf24bfe4a86376a058fdd1062ea39dc98a0f89dc4070f735cf6 +size 73357999 diff --git a/ICL/SFT_new/eval_results/eval_val_20260403_163019.json b/ICL/SFT_new/eval_results/eval_val_20260403_163019.json new file mode 100644 index 0000000000000000000000000000000000000000..663952c035462b9f0be7d03943c2928f7e456ef3 --- /dev/null +++ b/ICL/SFT_new/eval_results/eval_val_20260403_163019.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b2ede88ea04913c716f490c0bc603ed762c15944db68d3018cdf4d76a258a6d +size 73357999 diff --git a/ICL/SFT_new/eval_results/eval_val_20260403_164141.json b/ICL/SFT_new/eval_results/eval_val_20260403_164141.json new file mode 100644 index 0000000000000000000000000000000000000000..6355488b6551e222ba71ada36f974f7172900c6f --- /dev/null +++ b/ICL/SFT_new/eval_results/eval_val_20260403_164141.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed8e56f90fc398311661760bf9804ede3919e045d7930eec385cc8a9f03a577a +size 13380826 diff --git a/ICL/SFT_new/eval_results/eval_val_20260404_135535.json b/ICL/SFT_new/eval_results/eval_val_20260404_135535.json new file mode 100644 index 0000000000000000000000000000000000000000..425f554acb72f0f8620d70c81e74138639d3820c --- /dev/null +++ b/ICL/SFT_new/eval_results/eval_val_20260404_135535.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ee5f475959dea8d2b6054008c000278e8adbc56b73299bd04bf053fb25d6038 +size 13614172 diff --git a/ICL/SFT_new/eval_results/eval_val_20260406_040033.json b/ICL/SFT_new/eval_results/eval_val_20260406_040033.json new file mode 100644 index 0000000000000000000000000000000000000000..95df2da44764f067dc226c1bedfaf098f9656cb9 --- /dev/null +++ b/ICL/SFT_new/eval_results/eval_val_20260406_040033.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58442e061f013c65083f37f1b81f732c45917e858ba7d0de800fd9b003d47852 +size 13614171 diff --git a/ICL/SFT_new/eval_results/eval_val_20260407_053929.json b/ICL/SFT_new/eval_results/eval_val_20260407_053929.json new file mode 100644 index 0000000000000000000000000000000000000000..d6c8efc65d49ef28d6c376bcd3a50243edc6e8ab --- /dev/null +++ b/ICL/SFT_new/eval_results/eval_val_20260407_053929.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0278b419a1cda69240f2ead4c5c9c20e4f3f5c55cdf2e43abf4a9781a5f1407c +size 13614173 diff --git a/ICL/SFT_new/eval_results/eval_val_latest.json b/ICL/SFT_new/eval_results/eval_val_latest.json new file mode 100644 index 0000000000000000000000000000000000000000..d6c8efc65d49ef28d6c376bcd3a50243edc6e8ab --- /dev/null +++ b/ICL/SFT_new/eval_results/eval_val_latest.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0278b419a1cda69240f2ead4c5c9c20e4f3f5c55cdf2e43abf4a9781a5f1407c +size 13614173 diff --git a/ICL/dataset_inspect.schemas.json b/ICL/dataset_inspect.schemas.json new file mode 100644 index 0000000000000000000000000000000000000000..8cdc6d6b64506a045e42289a8498008efe2ab40c --- /dev/null +++ b/ICL/dataset_inspect.schemas.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a66fb74f3220136d8910a7287bd70b7d3dfe9a9038c0e94e1f078598215eae22 +size 131773150 diff --git a/ICL/sft_model/epoch3_step1406_fp32/model-00001-of-00008.safetensors b/ICL/sft_model/epoch3_step1406_fp32/model-00001-of-00008.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d93250d0ebe210b50e5e781f10cf6ee9ea66ee94 --- /dev/null +++ b/ICL/sft_model/epoch3_step1406_fp32/model-00001-of-00008.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:956273bc71e29c89b14d3511a2ea82457990091518795159ee2b31d07d910c72 +size 4958393960 diff --git a/ICL/sft_model/epoch3_step1406_fp32/model-00002-of-00008.safetensors b/ICL/sft_model/epoch3_step1406_fp32/model-00002-of-00008.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c31fb7794bfad8015aa9a121d706f6dcb94d562a --- /dev/null +++ b/ICL/sft_model/epoch3_step1406_fp32/model-00002-of-00008.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23dad69d99e176a0441a6c7159575864507eb283138be2985bb031ac745b2aea +size 4832049608 diff --git a/ICL/sft_model/epoch3_step1406_fp32/model-00003-of-00008.safetensors b/ICL/sft_model/epoch3_step1406_fp32/model-00003-of-00008.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ef32286e446a6f150b1a35102edbb6a792cafbdc --- /dev/null +++ b/ICL/sft_model/epoch3_step1406_fp32/model-00003-of-00008.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bfbcd41c343359ca4d5a311ecbbcbf46a47eb013bdc9b2298bd025405962400 +size 4832049624 diff --git a/ICL/sft_model/epoch3_step1406_fp32/model-00004-of-00008.safetensors b/ICL/sft_model/epoch3_step1406_fp32/model-00004-of-00008.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..07efffe12f27f41da00cd7efa6f344efcc60f809 --- /dev/null +++ b/ICL/sft_model/epoch3_step1406_fp32/model-00004-of-00008.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68f4c63b799b37e8ec837866fe491342bcbc6357bd7a240a49a12cb944a136ab +size 4999856656 diff --git a/ICL/sft_model/epoch3_step1406_fp32/model-00005-of-00008.safetensors b/ICL/sft_model/epoch3_step1406_fp32/model-00005-of-00008.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f26ebae596a490a505dc10b7b0f757e2f2169c44 --- /dev/null +++ b/ICL/sft_model/epoch3_step1406_fp32/model-00005-of-00008.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee6c01232f7cc496d9f9c39498b3a2db1d69befad2088a699cfd5cc4857aa5c8 +size 4832049680 diff --git a/ICL/sft_model/epoch3_step1406_fp32/model-00006-of-00008.safetensors b/ICL/sft_model/epoch3_step1406_fp32/model-00006-of-00008.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..57138df887ce71c38ba582ee019cef367c6ad1fa --- /dev/null +++ b/ICL/sft_model/epoch3_step1406_fp32/model-00006-of-00008.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95e0648042f88d033039e7fe4f604f61d531a6ac596428e3a7f38f4fd6016930 +size 4832049672 diff --git a/ICL/sft_model/epoch3_step1406_fp32/model-00007-of-00008.safetensors b/ICL/sft_model/epoch3_step1406_fp32/model-00007-of-00008.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6488b2d8a4a9eabaa5ffc5a6341a589a8a1af132 --- /dev/null +++ b/ICL/sft_model/epoch3_step1406_fp32/model-00007-of-00008.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5f0756c34ce46f9130b6cab182e04fba2bc3e0d0579b2b64b3cf41bc76ad8f1 +size 3288524864 diff --git a/ICL/sft_model/epoch3_step1406_fp32/model-00008-of-00008.safetensors b/ICL/sft_model/epoch3_step1406_fp32/model-00008-of-00008.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6b561b27ee1c05472df77e5bab775e716c61f61d --- /dev/null +++ b/ICL/sft_model/epoch3_step1406_fp32/model-00008-of-00008.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6b1cd69a615f82fda5cc5e2c3bd6c5f7cd176c434056707a9a16a5832e9377b +size 2485026944 diff --git a/ICL/sft_model/final/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/ICL/sft_model/final/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..45268958ce4f5318799d2fed596cd710951d1b77 --- /dev/null +++ b/ICL/sft_model/final/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca4b60aa7696aa4d046c145ad402a088b9fe2ff7d4fe06af9ca7c334db04b134 +size 6573736572 diff --git a/ICL/sft_model/final/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/ICL/sft_model/final/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..11446544fee2ab478c72bf5da39e59091eadd7b0 --- /dev/null +++ b/ICL/sft_model/final/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61788c2c6d848850dcdd917badd0e1a6d13a7eea1c62d16ab14891607bb41cfd +size 6573736579 diff --git a/ICL/sft_model/final/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/ICL/sft_model/final/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..46f8525036584783d96cc7c7865678a7d0df5bf9 --- /dev/null +++ b/ICL/sft_model/final/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6224e39ac79938208d53b49d10d56f85d570e20d2fc62cc2752788fa909afdfa +size 6573736579 diff --git a/ICL/sft_model/final/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/ICL/sft_model/final/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..32a7340a3bf18d3981b180e80a12ee9f5611e28a --- /dev/null +++ b/ICL/sft_model/final/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:265ce64eb99df5387525955544bc2014831201d56cceb71b20f60002bb075dd9 +size 6573736579 diff --git a/ICL/sft_model/final/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/ICL/sft_model/final/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..de053338c748b97f85c855c2ef6a7f4e10e7af9d --- /dev/null +++ b/ICL/sft_model/final/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:004298254e7d182c7a30c8d80529234312d5a7874f0e1e6c49b82b3d37b0bb47 +size 6573736579 diff --git a/ICL/sft_model/final/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/ICL/sft_model/final/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..af95cb021ee4db00a2c2a9ccfbefe6ce2dbbbf09 --- /dev/null +++ b/ICL/sft_model/final/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:757e6d7353534cd0062008b6f8532e9c4d3b970e8d83f21681fef1cd51505df8 +size 6573736579 diff --git a/ICL/sft_model/final/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/ICL/sft_model/final/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a79795e4e9138c6b0f9be6216983bc0acfa811a0 --- /dev/null +++ b/ICL/sft_model/final/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12a98ed1389b083c9d0b354a536afab56ff0a4097ba982cc87fa156c2d6997dd +size 6573736579 diff --git a/ICL/sft_model/final/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/ICL/sft_model/final/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a32af28d56d5e8f23413db4fea71b1e04b998e57 --- /dev/null +++ b/ICL/sft_model/final/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45b8f8b10bcc52e445f0d5d68fa589afa28348ee2d83be18d8dcfe36889c92f5 +size 6573736572 diff --git a/ICL/sft_model/final/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/ICL/sft_model/final/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9087a3004dc357345761361de245c4d9d386c2f1 --- /dev/null +++ b/ICL/sft_model/final/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6207e3336074d4d28c852a37edba15823f297a9e87939ddccc7e73d47c306567 +size 6573736572 diff --git a/ICL/sft_model/final/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/ICL/sft_model/final/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..68f7049465a1a8ca11af7c4fef20bf5b78697fc8 --- /dev/null +++ b/ICL/sft_model/final/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd0bf955c6f9736d120b4f96a8383c9d168c2f2738297ed405ccd4423d4afb0f +size 6573736572 diff --git a/ICL/sft_model/final/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/ICL/sft_model/final/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2083475a6fa3676313ba21cf290cb31abc2dcdca --- /dev/null +++ b/ICL/sft_model/final/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5af3ad76a6fed5e135c5e013f7611ad8928c7774eacfec0afdd3e22184b469c1 +size 6573736572 diff --git a/ICL/sft_model/final/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/ICL/sft_model/final/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2bddd636af56937885e49af2aa82e356d8659793 --- /dev/null +++ b/ICL/sft_model/final/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:704d46917ad406649380aaca206b4bd5e337c2b3e736c0c1c4d4d0fe6f47107a +size 6573736572 diff --git a/ICL/sft_model/final/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/ICL/sft_model/final/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ee1444235ae1aedc54c278314814fac0a7847d39 --- /dev/null +++ b/ICL/sft_model/final/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:098c371bc81b3d0cdb79f56f5b1330c4c33554c9a52693dab5e2affee9b1aad2 +size 6573736572 diff --git a/ICL/sft_model/final/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/ICL/sft_model/final/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..42404406571547bc4495b7b6e40d424709982f4c --- /dev/null +++ b/ICL/sft_model/final/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3589040bc46bf07a80cffebf0ee6a3ecd127c5b4227fb0e9a54d842117c2ae30 +size 6573736572 diff --git a/ICL/sft_model/final/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/ICL/sft_model/final/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..26cb3af5667fedde5d4a74984c56985987a7fc4c --- /dev/null +++ b/ICL/sft_model/final/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b419b4a97d3dd62f8508bb125e7bcc3d104d737b93cdddce4ff51d3bd0b50c14 +size 6573736572 diff --git a/ICL/sft_model/final/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/ICL/sft_model/final/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..478b4ac96a0282a0a2f53725692f017f8312e342 --- /dev/null +++ b/ICL/sft_model/final/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bccc66d0614cd2c73a1af98180f5aff38830b26f40f1c64a7d6860024ed90503 +size 6573736572 diff --git a/ICL/sft_model/final/zero_pp_rank_0_mp_rank_00_model_states.pt b/ICL/sft_model/final/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ed6b26017ca2696de34a25b5d8c218567cc265c4 --- /dev/null +++ b/ICL/sft_model/final/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e101c44d1ed036f0afac833bd62ed7b0d87e6757cca52a73bcefc0b6b85be2cb +size 389860 diff --git a/ICL/sft_model/final/zero_pp_rank_10_mp_rank_00_model_states.pt b/ICL/sft_model/final/zero_pp_rank_10_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1fdd157ff78df2685d0475de28dd26032bc35024 --- /dev/null +++ b/ICL/sft_model/final/zero_pp_rank_10_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:010906bc4d4fde5cc7316901226854dae856722033634412cb9d4e3834d136bf +size 390614 diff --git a/ICL/sft_model/final/zero_pp_rank_11_mp_rank_00_model_states.pt b/ICL/sft_model/final/zero_pp_rank_11_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..33478acea4d6f77224f6faa5886c11c287a2457c --- /dev/null +++ b/ICL/sft_model/final/zero_pp_rank_11_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f6b86a4e0abbb6dea4022473d42035a66e7c6c5d179e3d65f81b6b48263c606 +size 390614 diff --git a/ICL/sft_model/final/zero_pp_rank_12_mp_rank_00_model_states.pt b/ICL/sft_model/final/zero_pp_rank_12_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..488d6e01e71efb401b6a257b6c06f81c0abb621a --- /dev/null +++ b/ICL/sft_model/final/zero_pp_rank_12_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef145ae53de002231cef2ed57ea1149b55f324debe483ddb6c1954c65746132e +size 390614 diff --git a/ICL/sft_model/final/zero_pp_rank_13_mp_rank_00_model_states.pt b/ICL/sft_model/final/zero_pp_rank_13_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f3eb5e0ca6d2b1b4d8e476fec57162e32e2dbf8d --- /dev/null +++ b/ICL/sft_model/final/zero_pp_rank_13_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:499906a17886409aba9028d245a6c0761be4a5f56c7c4ec5d2bd589e74397eda +size 390614 diff --git a/ICL/sft_model/final/zero_pp_rank_14_mp_rank_00_model_states.pt b/ICL/sft_model/final/zero_pp_rank_14_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c2f1c13dcce6df174bc7717cb3e2a6c56cb4fec --- /dev/null +++ b/ICL/sft_model/final/zero_pp_rank_14_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4525fb97c6b13bd56836c9330b9f9fb1213e357b41b8272027bf0cb0b54b7109 +size 390614 diff --git a/ICL/sft_model/final/zero_pp_rank_15_mp_rank_00_model_states.pt b/ICL/sft_model/final/zero_pp_rank_15_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..08290330f32088707d6fe4576244687eae7953f3 --- /dev/null +++ b/ICL/sft_model/final/zero_pp_rank_15_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55a33d6a072326c94a0fdf8763b43e945155bf8081fdb66e64409b0bd24319f4 +size 390614 diff --git a/ICL/sft_model/final/zero_pp_rank_1_mp_rank_00_model_states.pt b/ICL/sft_model/final/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5a6c2fdc38ad38d199df5a517e329f714b8602f --- /dev/null +++ b/ICL/sft_model/final/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6d1b932ebd107e8d5c66fcaafbb070d3df98f8dab21cefc01ef41900396deb1 +size 389860 diff --git a/ICL/sft_model/final/zero_pp_rank_2_mp_rank_00_model_states.pt b/ICL/sft_model/final/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..346faf1a25acaca64f144e2d1e5a6d1877954f91 --- /dev/null +++ b/ICL/sft_model/final/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52bd0bcd26a8a113d079c173bdaebede6f8780f17ecdd44d35f45320aa6cbe0b +size 389860 diff --git a/ICL/sft_model/final/zero_pp_rank_3_mp_rank_00_model_states.pt b/ICL/sft_model/final/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a3d4bd3570ca60fc5bf9df0475c8a4419338cd9 --- /dev/null +++ b/ICL/sft_model/final/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fca4ca0d2e8372188c6164f5086c5490c138888f4a45155ae34d56e7e32b1ad +size 389860 diff --git a/ICL/sft_model/final/zero_pp_rank_4_mp_rank_00_model_states.pt b/ICL/sft_model/final/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..53acdea68dc22fddc3539965f00c008dff56830b --- /dev/null +++ b/ICL/sft_model/final/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a01ecbdd9d0eae1bcd0bc8b7176394185ca126c29be809e4ca9449eef6035b1 +size 389860 diff --git a/ICL/sft_model/final/zero_pp_rank_5_mp_rank_00_model_states.pt b/ICL/sft_model/final/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0cdfe0b0b3fa02cefb7a303ca6c8a8fff8b83fc1 --- /dev/null +++ b/ICL/sft_model/final/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f62e7a623491fad2c675922c0dc0542bfac77e740fe3037dff51a624a165e238 +size 389860 diff --git a/ICL/sft_model/final/zero_pp_rank_6_mp_rank_00_model_states.pt b/ICL/sft_model/final/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e853b742649b1720646a83a22d0a00451650bc9f --- /dev/null +++ b/ICL/sft_model/final/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40ac2b65bbfefbe6434df614788edfa642b60668c9d8adec0806ad31fadcbbf6 +size 389860 diff --git a/ICL/sft_model/final/zero_pp_rank_7_mp_rank_00_model_states.pt b/ICL/sft_model/final/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f911e1fec0e8fc12e6f4caf6948578b61afb533 --- /dev/null +++ b/ICL/sft_model/final/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8601156381bd14f997a12aa4d8a72369a3e6dbb6739282cb6aab50f16ab60526 +size 389860 diff --git a/ICL/sft_model/final/zero_pp_rank_9_mp_rank_00_model_states.pt b/ICL/sft_model/final/zero_pp_rank_9_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..56c7d683d9d98314392c008f0f5c8bb8ec923b40 --- /dev/null +++ b/ICL/sft_model/final/zero_pp_rank_9_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:897526c0dcb746ebc581112402dd53c65b5562a6bf7045a6bd235480a5360190 +size 389860