KoHRM-Text-1.4B / train_metadata.yaml
gyung's picture
Add files using upload-large-folder tool
f9e20ea verified
raw
history blame
342 Bytes
max_seq_len: 4096
tokenizer_info:
boq: <|im_start|>
condition_mapping:
cot: <|object_ref_end|>
direct: <|object_ref_start|>
noisy: <|quad_start|>
synth: <|quad_end|>
eoa: <|box_end|>
eoq: <|im_end|>
tokenizer_path: /home/work/.data/hrm_text_prepared/koterm_pretrain_mix_v1
total_length: 711277327
vocab_size: 131072