defaults: - base - exp_manager: sft - model: llm-jp-3-13b - trainer: sft - _self_ data: train_ds: data_dir: ${data_dir}/tuning/train global_batch_size: ${gbs} micro_batch_size: ${mbs} validation_ds: data_dir: ${data_dir}/tuning/dev global_batch_size: ${gbs} micro_batch_size: ${mbs} # tuning datasets # max_train_samples: max number of samples to use for training. -1 means all. 0 means not to use. # split_dev: whether to split the dataset into training and validation datasets. If false, the dataset is used for training only. # upsampling_factor: upsampling factor for the dataset. 1 means no upsampling. Valid for both training and validation datasets. datasets: answer_carefully: max_train_samples: -1 # -1 means all split_dev: false upsampling_factor: 16 calm3_22b_chat_20241018083433--Qwen2.5_32B_Instruct_20241022115410: max_train_samples: -1 split_dev: true upsampling_factor: 1 calm3_22b_chat_20241022133932--Qwen2.5_32B_Instruct_20241024100350: max_train_samples: -1 split_dev: true upsampling_factor: 1 calm3_22b_chat_20241022155627--Qwen2.5_32B_Instruct_20241024144245: max_train_samples: -1 split_dev: true upsampling_factor: 1 daring_anteater_en: max_train_samples: -1 split_dev: true upsampling_factor: 1 flan: max_train_samples: -1 split_dev: true upsampling_factor: 1 ichikara: max_train_samples: -1 split_dev: true upsampling_factor: 1 logical_math_coding_wizard8x22b: max_train_samples: -1 split_dev: true upsampling_factor: 1 multiturn_calm3: max_train_samples: -1 split_dev: true upsampling_factor: 1 random_to_fixed_multiturn_calm3: max_train_samples: -1 split_dev: true upsampling_factor: 1 synthetic_jp_en_coding_0: max_train_samples: -1 split_dev: true upsampling_factor: 1 # number of dev samples are the minimum value of {max_dev_samples, max_dev_ratio * number of dev samples} in the dataset. max_dev_samples: 1000 max_dev_ratio: 0.1 # hyperparameters gbs: 64 mbs: 1 dropout: 0.0 lr: 2e-5 min_lr: 2e-6 # other options use_mpi: false use_slurm: false # This option should be set to true when using Slurm and MPI. Otherwise, set it to false. ignore_hparams_on_save: false # constants hparams_to_ignore_on_save: - project - work_dir - data_dir - seed - name - exp_dir - run_id - run_dir - config_name - logger - hparams_to_ignore_on_save - per_device_train_batch_size - per_device_eval_batch_size - gradient_checkpointing - logging_steps - eval_steps - save_steps - use_mpi - use_slurm