Remove raw FSDP checkpoint artifacts from main model repo
Browse files- all_config.yaml +0 -42
- carry_epoch_1.0.pt +0 -3
- carry_epoch_1.1.pt +0 -3
- carry_epoch_1.2.pt +0 -3
- carry_epoch_1.3.pt +0 -3
- carry_epoch_1.4.pt +0 -3
- carry_epoch_1.5.pt +0 -3
- carry_epoch_1.6.pt +0 -3
- carry_epoch_1.7.pt +0 -3
- fsdp2_epoch_1/.metadata +0 -3
- fsdp2_epoch_1/__0_0.distcp +0 -3
- fsdp2_epoch_1/__1_0.distcp +0 -3
- fsdp2_epoch_1/__2_0.distcp +0 -3
- fsdp2_epoch_1/__3_0.distcp +0 -3
- fsdp2_epoch_1/__4_0.distcp +0 -3
- fsdp2_epoch_1/__5_0.distcp +0 -3
- fsdp2_epoch_1/__6_0.distcp +0 -3
- fsdp2_epoch_1/__7_0.distcp +0 -3
- tokenizer/epoch_0/inst_len.npy +0 -3
- tokenizer/epoch_0/inst_start.npy +0 -3
- tokenizer/epoch_0/resp_len.npy +0 -3
- tokenizer/epoch_0/resp_start.npy +0 -3
- tokenizer/merge_stats.json +0 -29
- tokenizer/metadata.json +0 -1
- tokenizer/tokenizer.json +0 -3
- tokenizer/tokenizer_info.json +0 -1
- tokenizer/tokens.npy +0 -3
- train_metadata.yaml +0 -13
- upload_manifest.json +0 -7
all_config.yaml
DELETED
|
@@ -1,42 +0,0 @@
|
|
| 1 |
-
arch:
|
| 2 |
-
H_cycles: 2
|
| 3 |
-
H_override: {}
|
| 4 |
-
L_cycles: 3
|
| 5 |
-
bp_max_steps: 5
|
| 6 |
-
bp_warmup_ratio: 0.2
|
| 7 |
-
expansion: 4
|
| 8 |
-
half_layers: true
|
| 9 |
-
head: lm_head@LMHead
|
| 10 |
-
hidden_size: 1536
|
| 11 |
-
init_type: lecun_normal
|
| 12 |
-
n_layers: 32
|
| 13 |
-
name: baselines.hrm_nocarry_bp_warmup@HierarchicalReasoningModel
|
| 14 |
-
norm_eps: 1.0e-06
|
| 15 |
-
norm_type: pre
|
| 16 |
-
num_heads: 12
|
| 17 |
-
pos_emb_type: rope
|
| 18 |
-
rope_theta: 10000.0
|
| 19 |
-
beta1: 0.9
|
| 20 |
-
beta2: 0.95
|
| 21 |
-
checkpoint_interval: 1
|
| 22 |
-
checkpoint_path: /home/work/.data/hrm_text_checkpoints/KoHRM-Text-1.4B-stage0b-debug-launch2
|
| 23 |
-
data:
|
| 24 |
-
path: /home/work/.data/hrm_text_prepared/koterm_pretrain_mix_v1
|
| 25 |
-
target_only: true
|
| 26 |
-
ema: 0.9999
|
| 27 |
-
epochs: 1
|
| 28 |
-
fwd_bwd_dtype: bfloat16
|
| 29 |
-
global_batch_size: 196608
|
| 30 |
-
log_interval: 5
|
| 31 |
-
lr: 0.00022
|
| 32 |
-
lr_min_ratio: 1.0
|
| 33 |
-
lr_warmup_steps: 2000
|
| 34 |
-
project_name: KoHRM-Text
|
| 35 |
-
resume_epoch: null
|
| 36 |
-
resume_from: /home/work/.data/hrm_text_checkpoints/KoHRM-Text-1.4B-stage0-available-mix-gbs172
|
| 37 |
-
resume_step_offset: 4134
|
| 38 |
-
run_name: KoHRM-Text-1.4B-stage0b-debug-launch2
|
| 39 |
-
seed: 0
|
| 40 |
-
total_steps_override: 290643
|
| 41 |
-
weight_decay: 0.1
|
| 42 |
-
weights_only_resume_from_ema: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
carry_epoch_1.0.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:4e3b089e35eacca121e8bc850c0fc138c4ef45a63cf9e370e2f852d6245db36b
|
| 3 |
-
size 1309
|
|
|
|
|
|
|
|
|
|
|
|
carry_epoch_1.1.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:66952aadc5b6f1d9d38cd436e6dba2c3b6a487138c6960beb815672ddf699495
|
| 3 |
-
size 1309
|
|
|
|
|
|
|
|
|
|
|
|
carry_epoch_1.2.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:7a7da182d5d1dfe900b2018b6e4fe6d318c69f791b7a3c94c1727e2112a5f57d
|
| 3 |
-
size 1309
|
|
|
|
|
|
|
|
|
|
|
|
carry_epoch_1.3.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:45156a7f9cef48f22d7a3c46c59d92394c3da40b2b596f2681cecece9156177e
|
| 3 |
-
size 1309
|
|
|
|
|
|
|
|
|
|
|
|
carry_epoch_1.4.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:34356bd35b48ac6ce98742241b2c0e1c96147c36743415c7b0e432ae28f8bfc8
|
| 3 |
-
size 1309
|
|
|
|
|
|
|
|
|
|
|
|
carry_epoch_1.5.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:6f5934083e16382c0d96bab003d7f577ced0da026175aff5a4ad2aaf31c603f6
|
| 3 |
-
size 1309
|
|
|
|
|
|
|
|
|
|
|
|
carry_epoch_1.6.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:93480251cc3a3285f6cea88b5f8b7c6d46672f04cacd0623127885ff4469e7d8
|
| 3 |
-
size 1309
|
|
|
|
|
|
|
|
|
|
|
|
carry_epoch_1.7.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:d2de90149405f8416a3f1c4bb6b69b52843f8a2f835c1b874bd13c13129fc3f5
|
| 3 |
-
size 1309
|
|
|
|
|
|
|
|
|
|
|
|
fsdp2_epoch_1/.metadata
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:550c05a8cece87340caee4521c6833b221b45130878490dd191914d4b77f4848
|
| 3 |
-
size 983795
|
|
|
|
|
|
|
|
|
|
|
|
fsdp2_epoch_1/__0_0.distcp
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:3d031aa0a10c80c1726a0806206307ad1fadbd0413bd7253911f45af5190ab5c
|
| 3 |
-
size 2769065329
|
|
|
|
|
|
|
|
|
|
|
|
fsdp2_epoch_1/__1_0.distcp
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:125adba2372eef3bc99055707ad232a22dfe696252b9e77e4c815155266b71b7
|
| 3 |
-
size 2769090643
|
|
|
|
|
|
|
|
|
|
|
|
fsdp2_epoch_1/__2_0.distcp
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:897cab7b8e60c13a2bbfb056ac733f68ce35c4b700cfa7d9df9d5feb38eab485
|
| 3 |
-
size 2769090643
|
|
|
|
|
|
|
|
|
|
|
|
fsdp2_epoch_1/__3_0.distcp
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:d837c17876b1722a6c145808c5285727f8ca0147f504be6e26ca6ac5796fb06e
|
| 3 |
-
size 2769090643
|
|
|
|
|
|
|
|
|
|
|
|
fsdp2_epoch_1/__4_0.distcp
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:bdbd7c0a982dead1a693467bc538cc464196e2eae7f8565823f72f578dca86c9
|
| 3 |
-
size 2769090643
|
|
|
|
|
|
|
|
|
|
|
|
fsdp2_epoch_1/__5_0.distcp
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:e4660b1642281241e4125d83d1fcbb4b72dd2e4d91a0ecddb20696cf682bef69
|
| 3 |
-
size 2769090643
|
|
|
|
|
|
|
|
|
|
|
|
fsdp2_epoch_1/__6_0.distcp
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:a803d2cfbd7b8b7cd5c5550c5cf8df5487b1576ae4b6b9105942b1e3cef73695
|
| 3 |
-
size 2769091588
|
|
|
|
|
|
|
|
|
|
|
|
fsdp2_epoch_1/__7_0.distcp
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:d4f60699364d470f1e139f0cc4b9108060443fcf6656f1cd9d39bd575a082316
|
| 3 |
-
size 2769098756
|
|
|
|
|
|
|
|
|
|
|
|
tokenizer/epoch_0/inst_len.npy
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:08d3530388ff62c75805c79645906ca8a409fe327b2d6b6a9bcd3e95ec6476b3
|
| 3 |
-
size 9413912
|
|
|
|
|
|
|
|
|
|
|
|
tokenizer/epoch_0/inst_start.npy
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:e59893d88294f5912372dae50e4b609d05090dcdaaa3bce9966418fb82ab309c
|
| 3 |
-
size 9413912
|
|
|
|
|
|
|
|
|
|
|
|
tokenizer/epoch_0/resp_len.npy
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:bc39c23c241138694be76029a0ef48a20d63bf7c6115c85c28b974038f0e70f7
|
| 3 |
-
size 9413912
|
|
|
|
|
|
|
|
|
|
|
|
tokenizer/epoch_0/resp_start.npy
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:04f9ca39d99fedcd05c65ad67186d1b3c230abc6b2fe90c5281cfa9aa3deedff
|
| 3 |
-
size 9413912
|
|
|
|
|
|
|
|
|
|
|
|
tokenizer/merge_stats.json
DELETED
|
@@ -1,29 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"inputs": [
|
| 3 |
-
{
|
| 4 |
-
"path": "/home/work/.data/hrm_text_prepared/hrm_cleaned_base_sample_v1",
|
| 5 |
-
"samples": 819617,
|
| 6 |
-
"tokens": 250000177
|
| 7 |
-
},
|
| 8 |
-
{
|
| 9 |
-
"path": "/home/work/.data/hrm_text_prepared/sft_swe_glm_mix_v1",
|
| 10 |
-
"samples": 109889,
|
| 11 |
-
"tokens": 251170780
|
| 12 |
-
},
|
| 13 |
-
{
|
| 14 |
-
"path": "/home/work/.data/hrm_text_prepared/sft_korean_legal_v1",
|
| 15 |
-
"samples": 183080,
|
| 16 |
-
"tokens": 83144929
|
| 17 |
-
},
|
| 18 |
-
{
|
| 19 |
-
"path": "/home/work/.data/hrm_text_prepared/sft_toolbench_v1",
|
| 20 |
-
"samples": 64137,
|
| 21 |
-
"tokens": 126961441
|
| 22 |
-
}
|
| 23 |
-
],
|
| 24 |
-
"samples": 1176723,
|
| 25 |
-
"tokens": 711277327,
|
| 26 |
-
"avg_sample_len": 604.4560419062091,
|
| 27 |
-
"max_sample_len": 4096,
|
| 28 |
-
"epochs": 1
|
| 29 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tokenizer/metadata.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"tokenizer_info": {"tokenizer_path": "/home/work/.data/hrm_text_prepared/koterm_pretrain_mix_v1", "boq": "<|im_start|>", "eoq": "<|im_end|>", "eoa": "<|box_end|>", "condition_mapping": {"direct": "<|object_ref_start|>", "cot": "<|object_ref_end|>", "noisy": "<|quad_start|>", "synth": "<|quad_end|>"}, "vocab_size": 131072}, "vocab_size": null, "max_seq_len": 4097, "total_length": 711277327}
|
|
|
|
|
|
tokenizer/tokenizer.json
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:ef6d5204ebfb25e992926714af88ad6b77e12a90ea6f3eb0f200e1a1f8712d5c
|
| 3 |
-
size 11457812
|
|
|
|
|
|
|
|
|
|
|
|
tokenizer/tokenizer_info.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"tokenizer_path": "/home/work/.data/hrm_text_prepared/koterm_pretrain_mix_v1", "boq": "<|im_start|>", "eoq": "<|im_end|>", "eoa": "<|box_end|>", "condition_mapping": {"direct": "<|object_ref_start|>", "cot": "<|object_ref_end|>", "noisy": "<|quad_start|>", "synth": "<|quad_end|>"}, "vocab_size": 131072}
|
|
|
|
|
|
tokenizer/tokens.npy
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:89aebd37a8e19e41ada124300c8dcb656dc0d393ac19ff672dec6cac38c1f360
|
| 3 |
-
size 2845109436
|
|
|
|
|
|
|
|
|
|
|
|
train_metadata.yaml
DELETED
|
@@ -1,13 +0,0 @@
|
|
| 1 |
-
max_seq_len: 4096
|
| 2 |
-
tokenizer_info:
|
| 3 |
-
boq: <|im_start|>
|
| 4 |
-
condition_mapping:
|
| 5 |
-
cot: <|object_ref_end|>
|
| 6 |
-
direct: <|object_ref_start|>
|
| 7 |
-
noisy: <|quad_start|>
|
| 8 |
-
synth: <|quad_end|>
|
| 9 |
-
eoa: <|box_end|>
|
| 10 |
-
eoq: <|im_end|>
|
| 11 |
-
tokenizer_path: /home/work/.data/hrm_text_prepared/koterm_pretrain_mix_v1
|
| 12 |
-
total_length: 711277327
|
| 13 |
-
vocab_size: 131072
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
upload_manifest.json
DELETED
|
@@ -1,7 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"repo_id": "LLM-OS-Models/KoHRM-Text-1.4B",
|
| 3 |
-
"checkpoint_root": "/home/work/.data/hrm_text_checkpoints/KoHRM-Text-1.4B-stage0b-debug-launch2",
|
| 4 |
-
"epoch": 1,
|
| 5 |
-
"staged_at": "2026-05-23T09:43:36Z",
|
| 6 |
-
"stage_size_bytes": 25047932140
|
| 7 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|