Add files using upload-large-folder tool
Browse files- .gitattributes +10 -0
- README.md +21 -0
- all_config.yaml +40 -0
- carry_epoch_1.0.pt +3 -0
- carry_epoch_1.1.pt +3 -0
- carry_epoch_1.2.pt +3 -0
- carry_epoch_1.3.pt +3 -0
- carry_epoch_1.4.pt +3 -0
- carry_epoch_1.5.pt +3 -0
- carry_epoch_1.6.pt +3 -0
- carry_epoch_1.7.pt +3 -0
- fsdp2_epoch_1/.metadata +3 -0
- fsdp2_epoch_1/__0_0.distcp +3 -0
- fsdp2_epoch_1/__1_0.distcp +3 -0
- fsdp2_epoch_1/__2_0.distcp +3 -0
- fsdp2_epoch_1/__3_0.distcp +3 -0
- fsdp2_epoch_1/__4_0.distcp +3 -0
- fsdp2_epoch_1/__5_0.distcp +3 -0
- fsdp2_epoch_1/__6_0.distcp +3 -0
- fsdp2_epoch_1/__7_0.distcp +3 -0
- tokenizer/epoch_0/inst_len.npy +3 -0
- tokenizer/epoch_0/inst_start.npy +3 -0
- tokenizer/epoch_0/resp_len.npy +3 -0
- tokenizer/epoch_0/resp_start.npy +3 -0
- tokenizer/merge_stats.json +29 -0
- tokenizer/metadata.json +1 -0
- tokenizer/tokenizer.json +3 -0
- tokenizer/tokenizer_info.json +1 -0
- tokenizer/tokens.npy +3 -0
- train_metadata.yaml +13 -0
- upload_manifest.json +7 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,13 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
fsdp2_epoch_1/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
fsdp2_epoch_1/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
fsdp2_epoch_1/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
fsdp2_epoch_1/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
fsdp2_epoch_1/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
fsdp2_epoch_1/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
fsdp2_epoch_1/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
fsdp2_epoch_1/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
fsdp2_epoch_1/.metadata filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: other
|
| 3 |
+
tags:
|
| 4 |
+
- hrm-text
|
| 5 |
+
- korean
|
| 6 |
+
- terminal
|
| 7 |
+
- tool-use
|
| 8 |
+
- checkpoint
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
# KoHRM-Text-1.4B
|
| 12 |
+
|
| 13 |
+
Raw HRM-Text FSDP2 checkpoint artifact.
|
| 14 |
+
|
| 15 |
+
- Source checkpoint root: `/home/work/.data/hrm_text_checkpoints/KoHRM-Text-1.4B-stage0-available-mix-gbs172`
|
| 16 |
+
- Epoch: `1`
|
| 17 |
+
- Upload policy: epoch-level upload only, to avoid slowing training with frequent network syncs.
|
| 18 |
+
- Format: HRM-Text training checkpoint (`fsdp2_epoch_*`) plus carry/config/tokenizer metadata.
|
| 19 |
+
|
| 20 |
+
This is primarily for monitoring and recovery. Final model-only exports should be produced with
|
| 21 |
+
`HRM-Text/conversion/convert_to_hf.py` after a checkpoint is selected.
|
all_config.yaml
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
arch:
|
| 2 |
+
H_cycles: 2
|
| 3 |
+
H_override: {}
|
| 4 |
+
L_cycles: 3
|
| 5 |
+
bp_max_steps: 5
|
| 6 |
+
bp_warmup_ratio: 0.2
|
| 7 |
+
expansion: 4
|
| 8 |
+
half_layers: true
|
| 9 |
+
head: lm_head@LMHead
|
| 10 |
+
hidden_size: 1536
|
| 11 |
+
init_type: lecun_normal
|
| 12 |
+
n_layers: 32
|
| 13 |
+
name: baselines.hrm_nocarry_bp_warmup@HierarchicalReasoningModel
|
| 14 |
+
norm_eps: 1.0e-06
|
| 15 |
+
norm_type: pre
|
| 16 |
+
num_heads: 12
|
| 17 |
+
pos_emb_type: rope
|
| 18 |
+
rope_theta: 10000.0
|
| 19 |
+
beta1: 0.9
|
| 20 |
+
beta2: 0.95
|
| 21 |
+
checkpoint_interval: 1
|
| 22 |
+
checkpoint_path: /home/work/.data/hrm_text_checkpoints/KoHRM-Text-1.4B-stage0-available-mix-gbs172
|
| 23 |
+
data:
|
| 24 |
+
path: /home/work/.data/hrm_text_prepared/koterm_pretrain_mix_v1
|
| 25 |
+
target_only: true
|
| 26 |
+
ema: 0.9999
|
| 27 |
+
epochs: 1
|
| 28 |
+
fwd_bwd_dtype: bfloat16
|
| 29 |
+
global_batch_size: 172032
|
| 30 |
+
log_interval: 5
|
| 31 |
+
lr: 0.00022
|
| 32 |
+
lr_min_ratio: 1.0
|
| 33 |
+
lr_warmup_steps: 2000
|
| 34 |
+
project_name: KoHRM-Text
|
| 35 |
+
resume_epoch: null
|
| 36 |
+
resume_from: null
|
| 37 |
+
run_name: KoHRM-Text-1.4B-stage0-available-mix-gbs172
|
| 38 |
+
seed: 0
|
| 39 |
+
weight_decay: 0.1
|
| 40 |
+
weights_only_resume_from_ema: false
|
carry_epoch_1.0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e3b089e35eacca121e8bc850c0fc138c4ef45a63cf9e370e2f852d6245db36b
|
| 3 |
+
size 1309
|
carry_epoch_1.1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66952aadc5b6f1d9d38cd436e6dba2c3b6a487138c6960beb815672ddf699495
|
| 3 |
+
size 1309
|
carry_epoch_1.2.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7a7da182d5d1dfe900b2018b6e4fe6d318c69f791b7a3c94c1727e2112a5f57d
|
| 3 |
+
size 1309
|
carry_epoch_1.3.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:45156a7f9cef48f22d7a3c46c59d92394c3da40b2b596f2681cecece9156177e
|
| 3 |
+
size 1309
|
carry_epoch_1.4.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34356bd35b48ac6ce98742241b2c0e1c96147c36743415c7b0e432ae28f8bfc8
|
| 3 |
+
size 1309
|
carry_epoch_1.5.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6f5934083e16382c0d96bab003d7f577ced0da026175aff5a4ad2aaf31c603f6
|
| 3 |
+
size 1309
|
carry_epoch_1.6.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:93480251cc3a3285f6cea88b5f8b7c6d46672f04cacd0623127885ff4469e7d8
|
| 3 |
+
size 1309
|
carry_epoch_1.7.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d2de90149405f8416a3f1c4bb6b69b52843f8a2f835c1b874bd13c13129fc3f5
|
| 3 |
+
size 1309
|
fsdp2_epoch_1/.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fcc92a43939acd13b1b37b169bf80a36aa87bcd99a1d2cadf8a468fd088ecad3
|
| 3 |
+
size 983801
|
fsdp2_epoch_1/__0_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9ba42a7b016d3573633583034d72194f1d5624378f7785e08175f6155223050d
|
| 3 |
+
size 2769065329
|
fsdp2_epoch_1/__1_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3937a022d2e122f06b7a63aefe120f76000e88048f64e3fe3684726d9c339cb5
|
| 3 |
+
size 2769090643
|
fsdp2_epoch_1/__2_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4d4e8d16fbc87f1fe4bbca6b908cc8d4c4e72d97d416e086676464e44863787
|
| 3 |
+
size 2769090643
|
fsdp2_epoch_1/__3_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:07d82f26dd0540c1160d0d2dd2b33b6ced75b7f818b3b56d7cbc3534ee6fdf0d
|
| 3 |
+
size 2769090643
|
fsdp2_epoch_1/__4_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4f208931d4ec1f062967640b6c653c5020226a1edeb67861453b7372e6ea6b7
|
| 3 |
+
size 2769090643
|
fsdp2_epoch_1/__5_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5395d3e2f99b75298ecb8e97dfd6c2883ade881b0e8413d15c1de80a9d2e2158
|
| 3 |
+
size 2769090643
|
fsdp2_epoch_1/__6_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:685ff0fb3629e778c3f2145349d43388883609737a39de5d760493d4ac59e8e9
|
| 3 |
+
size 2769091588
|
fsdp2_epoch_1/__7_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2136fb8e5d2fde1dd4ec0035aabc81224b3f3345bbda01ca208ec21f04094a7a
|
| 3 |
+
size 2769098756
|
tokenizer/epoch_0/inst_len.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:08d3530388ff62c75805c79645906ca8a409fe327b2d6b6a9bcd3e95ec6476b3
|
| 3 |
+
size 9413912
|
tokenizer/epoch_0/inst_start.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e59893d88294f5912372dae50e4b609d05090dcdaaa3bce9966418fb82ab309c
|
| 3 |
+
size 9413912
|
tokenizer/epoch_0/resp_len.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bc39c23c241138694be76029a0ef48a20d63bf7c6115c85c28b974038f0e70f7
|
| 3 |
+
size 9413912
|
tokenizer/epoch_0/resp_start.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04f9ca39d99fedcd05c65ad67186d1b3c230abc6b2fe90c5281cfa9aa3deedff
|
| 3 |
+
size 9413912
|
tokenizer/merge_stats.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"inputs": [
|
| 3 |
+
{
|
| 4 |
+
"path": "/home/work/.data/hrm_text_prepared/hrm_cleaned_base_sample_v1",
|
| 5 |
+
"samples": 819617,
|
| 6 |
+
"tokens": 250000177
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"path": "/home/work/.data/hrm_text_prepared/sft_swe_glm_mix_v1",
|
| 10 |
+
"samples": 109889,
|
| 11 |
+
"tokens": 251170780
|
| 12 |
+
},
|
| 13 |
+
{
|
| 14 |
+
"path": "/home/work/.data/hrm_text_prepared/sft_korean_legal_v1",
|
| 15 |
+
"samples": 183080,
|
| 16 |
+
"tokens": 83144929
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"path": "/home/work/.data/hrm_text_prepared/sft_toolbench_v1",
|
| 20 |
+
"samples": 64137,
|
| 21 |
+
"tokens": 126961441
|
| 22 |
+
}
|
| 23 |
+
],
|
| 24 |
+
"samples": 1176723,
|
| 25 |
+
"tokens": 711277327,
|
| 26 |
+
"avg_sample_len": 604.4560419062091,
|
| 27 |
+
"max_sample_len": 4096,
|
| 28 |
+
"epochs": 1
|
| 29 |
+
}
|
tokenizer/metadata.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"tokenizer_info": {"tokenizer_path": "/home/work/.data/hrm_text_prepared/koterm_pretrain_mix_v1", "boq": "<|im_start|>", "eoq": "<|im_end|>", "eoa": "<|box_end|>", "condition_mapping": {"direct": "<|object_ref_start|>", "cot": "<|object_ref_end|>", "noisy": "<|quad_start|>", "synth": "<|quad_end|>"}, "vocab_size": 131072}, "vocab_size": null, "max_seq_len": 4097, "total_length": 711277327}
|
tokenizer/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef6d5204ebfb25e992926714af88ad6b77e12a90ea6f3eb0f200e1a1f8712d5c
|
| 3 |
+
size 11457812
|
tokenizer/tokenizer_info.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"tokenizer_path": "/home/work/.data/hrm_text_prepared/koterm_pretrain_mix_v1", "boq": "<|im_start|>", "eoq": "<|im_end|>", "eoa": "<|box_end|>", "condition_mapping": {"direct": "<|object_ref_start|>", "cot": "<|object_ref_end|>", "noisy": "<|quad_start|>", "synth": "<|quad_end|>"}, "vocab_size": 131072}
|
tokenizer/tokens.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:89aebd37a8e19e41ada124300c8dcb656dc0d393ac19ff672dec6cac38c1f360
|
| 3 |
+
size 2845109436
|
train_metadata.yaml
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
max_seq_len: 4096
|
| 2 |
+
tokenizer_info:
|
| 3 |
+
boq: <|im_start|>
|
| 4 |
+
condition_mapping:
|
| 5 |
+
cot: <|object_ref_end|>
|
| 6 |
+
direct: <|object_ref_start|>
|
| 7 |
+
noisy: <|quad_start|>
|
| 8 |
+
synth: <|quad_end|>
|
| 9 |
+
eoa: <|box_end|>
|
| 10 |
+
eoq: <|im_end|>
|
| 11 |
+
tokenizer_path: /home/work/.data/hrm_text_prepared/koterm_pretrain_mix_v1
|
| 12 |
+
total_length: 711277327
|
| 13 |
+
vocab_size: 131072
|
upload_manifest.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"repo_id": "LLM-OS-Models/KoHRM-Text-1.4B",
|
| 3 |
+
"checkpoint_root": "/home/work/.data/hrm_text_checkpoints/KoHRM-Text-1.4B-stage0-available-mix-gbs172",
|
| 4 |
+
"epoch": 1,
|
| 5 |
+
"staged_at": "2026-05-23T08:52:30Z",
|
| 6 |
+
"stage_size_bytes": 25047929344
|
| 7 |
+
}
|