| !!python/object:aether.train.train.TrainingArguments |
| output_dir: /mnt/disks/persist/data/checkpoints/L3 |
| overwrite_output_dir: false |
| do_train: false |
| do_eval: false |
| do_predict: false |
| eval_strategy: 'no' |
| prediction_loss_only: false |
| per_device_train_batch_size: 32 |
| per_device_eval_batch_size: 8 |
| per_gpu_train_batch_size: null |
| per_gpu_eval_batch_size: null |
| gradient_accumulation_steps: 1 |
| eval_accumulation_steps: null |
| eval_delay: 0 |
| torch_empty_cache_steps: null |
| learning_rate: 0.001 |
| weight_decay: 0.05 |
| adam_beta1: 0.9 |
| adam_beta2: 0.999 |
| adam_epsilon: 1.0e-08 |
| max_grad_norm: 1.0 |
| num_train_epochs: 3.0 |
| max_steps: 75000 |
| lr_scheduler_type: constant |
| lr_scheduler_kwargs: {} |
| warmup_ratio: 0.0 |
| warmup_steps: 0 |
| log_level: passive |
| log_level_replica: warning |
| log_on_each_node: true |
| logging_dir: null |
| logging_strategy: steps |
| logging_first_step: true |
| logging_steps: 250 |
| logging_nan_inf_filter: true |
| save_strategy: steps |
| save_steps: 300 |
| save_total_limit: null |
| save_safetensors: true |
| save_on_each_node: false |
| save_only_model: false |
| restore_callback_states_from_checkpoint: false |
| no_cuda: false |
| use_cpu: false |
| use_mps_device: false |
| seed: 42 |
| data_seed: null |
| jit_mode_eval: false |
| use_ipex: false |
| bf16: false |
| fp16: false |
| fp16_opt_level: O1 |
| half_precision_backend: auto |
| bf16_full_eval: false |
| fp16_full_eval: false |
| tf32: null |
| local_rank: -1 |
| ddp_backend: null |
| tpu_num_cores: null |
| tpu_metrics_debug: false |
| debug: '' |
| dataloader_drop_last: false |
| eval_steps: null |
| dataloader_num_workers: 0 |
| dataloader_prefetch_factor: null |
| past_index: -1 |
| run_name: L3 |
| disable_tqdm: null |
| remove_unused_columns: false |
| label_names: |
| - input_ids |
| load_best_model_at_end: false |
| metric_for_best_model: null |
| greater_is_better: null |
| ignore_data_skip: false |
| fsdp: '' |
| fsdp_min_num_params: 0 |
| fsdp_config: null |
| fsdp_transformer_layer_cls_to_wrap: null |
| accelerator_config: null |
| deepspeed: null |
| label_smoothing_factor: 0.0 |
| optim: adamw_torch |
| optim_args: null |
| adafactor: false |
| group_by_length: false |
| length_column_name: length |
| report_to: null |
| ddp_find_unused_parameters: null |
| ddp_bucket_cap_mb: null |
| ddp_broadcast_buffers: null |
| dataloader_pin_memory: true |
| dataloader_persistent_workers: false |
| skip_memory_metrics: true |
| use_legacy_prediction_loop: false |
| push_to_hub: false |
| resume_from_checkpoint: null |
| hub_model_id: timaeus/L3 |
| hub_strategy: every_save |
| hub_token: null |
| hub_private_repo: false |
| hub_always_push: false |
| gradient_checkpointing: false |
| gradient_checkpointing_kwargs: null |
| include_inputs_for_metrics: false |
| eval_do_concat_batches: true |
| fp16_backend: auto |
| evaluation_strategy: null |
| push_to_hub_model_id: null |
| push_to_hub_organization: null |
| push_to_hub_token: null |
| mp_parameters: '' |
| auto_find_batch_size: false |
| full_determinism: false |
| torchdynamo: null |
| ray_scope: last |
| ddp_timeout: 1800 |
| torch_compile: false |
| torch_compile_backend: null |
| torch_compile_mode: null |
| dispatch_batches: null |
| split_batches: null |
| include_tokens_per_second: false |
| include_num_input_tokens_seen: false |
| neftune_noise_alpha: null |
| optim_target_modules: null |
| batch_eval_metrics: false |
| eval_on_start: false |
| use_liger_kernel: false |
| eval_use_gather_object: false |
| checkpoints_dir: /mnt/disks/persist/data/checkpoints |
| save_log_steps: 250 |
| bucket_name: devinterp-language |
| s3_folder: checkpoints/L3 |
| delete_after_upload: false |
| push_to_aws: true |
| project_name: train_slms_pile13m |
| is_debug: false |
| group_name: L |
| job_type: train |
| notes: null |
| tags: null |
| extra_save_steps: |
| - 1 |
| - 1 |
| - 1 |
| - 1 |
| - 1 |
| - 1 |
| - 1 |
| - 1 |
| - 1 |
| - 1 |
| - 1 |
| - 1 |
| - 1 |
| - 1 |
| - 1 |
| - 1 |
| - 2 |
| - 2 |
| - 2 |
| - 2 |
| - 2 |
| - 2 |
| - 2 |
| - 2 |
| - 2 |
| - 3 |
| - 3 |
| - 3 |
| - 3 |
| - 3 |
| - 3 |
| - 4 |
| - 4 |
| - 4 |
| - 4 |
| - 4 |
| - 5 |
| - 5 |
| - 5 |
| - 5 |
| - 6 |
| - 6 |
| - 6 |
| - 6 |
| - 7 |
| - 7 |
| - 7 |
| - 8 |
| - 8 |
| - 9 |
| - 9 |
| - 9 |
| - 10 |
| - 10 |
| - 11 |
| - 11 |
| - 12 |
| - 13 |
| - 13 |
| - 14 |
| - 14 |
| - 15 |
| - 16 |
| - 17 |
| - 17 |
| - 18 |
| - 19 |
| - 20 |
| - 21 |
| - 22 |
| - 23 |
| - 24 |
| - 25 |
| - 26 |
| - 28 |
| - 29 |
| - 30 |
| - 32 |
| - 33 |
| - 35 |
| - 36 |
| - 38 |
| - 40 |
| - 42 |
| - 44 |
| - 46 |
| - 48 |
| - 50 |
| - 52 |
| - 55 |
| - 57 |
| - 60 |
| - 63 |
| - 66 |
| - 69 |
| - 72 |
| - 75 |
| - 79 |
| - 82 |
| - 86 |
| - 90 |
| - 94 |
| - 99 |
| - 103 |
| - 108 |
| - 113 |
| - 118 |
| - 124 |
| - 130 |
| - 136 |
| - 142 |
| - 149 |
| - 155 |
| - 163 |
| - 170 |
| - 178 |
| - 186 |
| - 195 |
| - 204 |
| - 213 |
| - 223 |
| - 233 |
| - 244 |
| - 255 |
| - 267 |
| - 280 |
| - 293 |
| - 306 |
| - 320 |
| - 335 |
| - 350 |
| - 367 |
| - 384 |
| - 401 |
| - 420 |
| - 439 |
| - 459 |
| - 481 |
| - 503 |
| - 526 |
| - 550 |
| - 576 |
| - 602 |
| - 630 |
| - 659 |
| - 690 |
| - 721 |
| - 755 |
| - 789 |
| - 826 |
| - 864 |
| - 904 |
| - 946 |
| - 989 |
| - 1035 |
| - 1083 |
| - 1133 |
| - 1185 |
| - 1239 |
| - 1297 |
| - 1356 |
| - 1419 |
| - 1485 |
| - 1553 |
| - 1625 |
| - 1700 |
| - 1778 |
| - 1860 |
| - 1946 |
| - 2035 |
| - 2129 |
| - 2228 |
| - 2330 |
| - 2438 |
| - 2550 |
| - 2668 |
| - 2791 |
| - 2920 |
| - 3054 |
| - 3195 |
| - 3343 |
| - 3497 |
| - 3658 |
| - 3827 |
| - 4003 |
| - 4188 |
| - 4381 |
| - 4583 |
| - 4794 |
| - 5015 |
| - 5247 |
| - 5489 |
| - 5742 |
| - 6007 |
| - 6284 |
| - 6573 |
| - 6876 |
| - 7194 |
| - 7525 |
| - 7872 |
| - 8235 |
| - 8615 |
| - 9012 |
| - 9428 |
| - 9863 |
| - 10318 |
| - 10794 |
| - 11291 |
| - 11812 |
| - 12357 |
| - 12926 |
| - 13523 |
| - 14146 |
| - 14799 |
| - 15481 |
| - 16195 |
| - 16942 |
| - 17723 |
| - 18540 |
| - 19395 |
| - 20290 |
| - 21225 |
| - 22204 |
| - 23228 |
| - 24299 |
| - 25420 |
| - 26592 |
| - 27818 |
| - 29101 |
| - 30443 |
| - 31847 |
| - 33315 |
| - 34851 |
| - 36458 |
| - 38140 |
| - 39898 |
| - 41738 |
| - 43663 |
| - 45676 |
| - 47783 |
| - 49986 |
| - 52291 |
| - 54703 |
| - 57225 |
| - 59864 |
| - 62624 |
| - 65512 |
| - 68533 |
| - 71693 |
| - 75000 |
|
|