max_seq_len: 4096 tokenizer_info: boq: <|im_start|> condition_mapping: cot: <|object_ref_end|> direct: <|object_ref_start|> noisy: <|quad_start|> synth: <|quad_end|> eoa: <|box_end|> eoq: <|im_end|> tokenizer_path: /home/work/.data/hrm_text_prepared/koterm_pretrain_mix_v1 total_length: 711277327 vocab_size: 131072