| max_seq_len: 4096 | |
| tokenizer_info: | |
| boq: <|im_start|> | |
| condition_mapping: | |
| cot: <|object_ref_end|> | |
| direct: <|object_ref_start|> | |
| noisy: <|quad_start|> | |
| synth: <|quad_end|> | |
| eoa: <|box_end|> | |
| eoq: <|im_end|> | |
| tokenizer_path: /home/work/.data/hrm_text_prepared/koterm_pretrain_mix_v1 | |
| total_length: 711277327 | |
| vocab_size: 131072 | |