| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.019054651121247126, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 9.8e-05, |
| "loss": 2.0665, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 9.6e-05, |
| "loss": 2.038, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 9.4e-05, |
| "loss": 2.0553, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 9.200000000000001e-05, |
| "loss": 2.063, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 9e-05, |
| "loss": 2.0761, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 8.800000000000001e-05, |
| "loss": 2.0427, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 8.6e-05, |
| "loss": 2.0504, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 8.4e-05, |
| "loss": 2.072, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 8.2e-05, |
| "loss": 2.0897, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 8e-05, |
| "loss": 2.0567, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0, |
| "eval_accuracy": 0.57173385518591, |
| "eval_loss": 2.053316354751587, |
| "eval_runtime": 13.3251, |
| "eval_samples_per_second": 75.046, |
| "eval_steps_per_second": 1.201, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 7.800000000000001e-05, |
| "loss": 2.0786, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 7.6e-05, |
| "loss": 2.0559, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 7.4e-05, |
| "loss": 2.0664, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 7.2e-05, |
| "loss": 2.0799, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 7e-05, |
| "loss": 2.0744, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 6.800000000000001e-05, |
| "loss": 2.0509, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 6.6e-05, |
| "loss": 2.094, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 6.400000000000001e-05, |
| "loss": 2.0419, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 6.2e-05, |
| "loss": 2.055, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 6e-05, |
| "loss": 2.041, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.01, |
| "eval_accuracy": 0.5733287671232876, |
| "eval_loss": 2.043827772140503, |
| "eval_runtime": 13.0102, |
| "eval_samples_per_second": 76.863, |
| "eval_steps_per_second": 1.23, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 5.8e-05, |
| "loss": 2.0314, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 5.6000000000000006e-05, |
| "loss": 2.0513, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 5.4000000000000005e-05, |
| "loss": 2.0545, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 5.2000000000000004e-05, |
| "loss": 2.0954, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 5e-05, |
| "loss": 2.0493, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.8e-05, |
| "loss": 2.0714, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.600000000000001e-05, |
| "loss": 2.077, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 2.07, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.2e-05, |
| "loss": 2.0233, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4e-05, |
| "loss": 2.0496, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.01, |
| "eval_accuracy": 0.5749158512720156, |
| "eval_loss": 2.036094903945923, |
| "eval_runtime": 12.9978, |
| "eval_samples_per_second": 76.936, |
| "eval_steps_per_second": 1.231, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 3.8e-05, |
| "loss": 2.063, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 3.6e-05, |
| "loss": 2.0683, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 3.4000000000000007e-05, |
| "loss": 2.0344, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 2.0414, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 3e-05, |
| "loss": 2.0146, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 2.8000000000000003e-05, |
| "loss": 2.0369, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 2.6000000000000002e-05, |
| "loss": 2.0333, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 2.4e-05, |
| "loss": 2.039, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 2.2000000000000003e-05, |
| "loss": 2.0402, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 2e-05, |
| "loss": 2.0194, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_accuracy": 0.5760665362035226, |
| "eval_loss": 2.0276312828063965, |
| "eval_runtime": 11.977, |
| "eval_samples_per_second": 83.493, |
| "eval_steps_per_second": 1.336, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.8e-05, |
| "loss": 2.003, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 2.0207, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.4000000000000001e-05, |
| "loss": 2.0228, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.2e-05, |
| "loss": 2.0349, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1e-05, |
| "loss": 2.0158, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 2.019, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 6e-06, |
| "loss": 2.0196, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 2.0414, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 2.0279, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.0, |
| "loss": 2.0338, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_accuracy": 0.5767534246575342, |
| "eval_loss": 2.0226891040802, |
| "eval_runtime": 13.0341, |
| "eval_samples_per_second": 76.722, |
| "eval_steps_per_second": 1.228, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.02, |
| "step": 1000, |
| "total_flos": 1.7866929733632e+17, |
| "train_loss": 2.0490207977294923, |
| "train_runtime": 1425.429, |
| "train_samples_per_second": 44.899, |
| "train_steps_per_second": 0.702 |
| } |
| ], |
| "max_steps": 1000, |
| "num_train_epochs": 1, |
| "total_flos": 1.7866929733632e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|