KoHRM-Text-1.4B / tokenizer /merge_stats.json
gyung's picture
Add files using upload-large-folder tool
f9e20ea verified
raw
history blame
690 Bytes
{
"inputs": [
{
"path": "/home/work/.data/hrm_text_prepared/hrm_cleaned_base_sample_v1",
"samples": 819617,
"tokens": 250000177
},
{
"path": "/home/work/.data/hrm_text_prepared/sft_swe_glm_mix_v1",
"samples": 109889,
"tokens": 251170780
},
{
"path": "/home/work/.data/hrm_text_prepared/sft_korean_legal_v1",
"samples": 183080,
"tokens": 83144929
},
{
"path": "/home/work/.data/hrm_text_prepared/sft_toolbench_v1",
"samples": 64137,
"tokens": 126961441
}
],
"samples": 1176723,
"tokens": 711277327,
"avg_sample_len": 604.4560419062091,
"max_sample_len": 4096,
"epochs": 1
}