| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.03450506793185249, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 9.8e-05, |
| "loss": 2.232, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 9.6e-05, |
| "loss": 2.2474, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 9.4e-05, |
| "loss": 2.2499, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 9.200000000000001e-05, |
| "loss": 2.2637, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 9e-05, |
| "loss": 2.2397, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 8.800000000000001e-05, |
| "loss": 2.2132, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 8.6e-05, |
| "loss": 2.2499, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 8.4e-05, |
| "loss": 2.2838, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 8.2e-05, |
| "loss": 2.2712, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 8e-05, |
| "loss": 2.2735, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.01, |
| "eval_accuracy": 0.5325260466845249, |
| "eval_loss": 2.2464351654052734, |
| "eval_runtime": 633.513, |
| "eval_samples_per_second": 82.642, |
| "eval_steps_per_second": 1.293, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 7.800000000000001e-05, |
| "loss": 2.2386, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 7.6e-05, |
| "loss": 2.2245, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 7.4e-05, |
| "loss": 2.2549, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 7.2e-05, |
| "loss": 2.2652, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 7e-05, |
| "loss": 2.268, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 6.800000000000001e-05, |
| "loss": 2.2421, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 6.6e-05, |
| "loss": 2.266, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 6.400000000000001e-05, |
| "loss": 2.2428, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 6.2e-05, |
| "loss": 2.2337, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 6e-05, |
| "loss": 2.2557, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.01, |
| "eval_accuracy": 0.5331100844920488, |
| "eval_loss": 2.2417314052581787, |
| "eval_runtime": 626.4522, |
| "eval_samples_per_second": 83.574, |
| "eval_steps_per_second": 1.307, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 5.8e-05, |
| "loss": 2.2322, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 5.6000000000000006e-05, |
| "loss": 2.2466, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 5.4000000000000005e-05, |
| "loss": 2.2366, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 5.2000000000000004e-05, |
| "loss": 2.2453, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 5e-05, |
| "loss": 2.2393, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.8e-05, |
| "loss": 2.2327, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.600000000000001e-05, |
| "loss": 2.2261, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 2.2432, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.2e-05, |
| "loss": 2.2451, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4e-05, |
| "loss": 2.2342, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_accuracy": 0.5343608037930125, |
| "eval_loss": 2.2341866493225098, |
| "eval_runtime": 633.7987, |
| "eval_samples_per_second": 82.605, |
| "eval_steps_per_second": 1.292, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 3.8e-05, |
| "loss": 2.2508, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 3.6e-05, |
| "loss": 2.2614, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 3.4000000000000007e-05, |
| "loss": 2.2452, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 2.2374, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 3e-05, |
| "loss": 2.2307, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 2.8000000000000003e-05, |
| "loss": 2.2127, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 2.6000000000000002e-05, |
| "loss": 2.246, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 2.4e-05, |
| "loss": 2.2144, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 2.2000000000000003e-05, |
| "loss": 2.2438, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 2e-05, |
| "loss": 2.2241, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_accuracy": 0.5354609628194991, |
| "eval_loss": 2.2267251014709473, |
| "eval_runtime": 627.471, |
| "eval_samples_per_second": 83.438, |
| "eval_steps_per_second": 1.305, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.8e-05, |
| "loss": 2.243, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 2.2366, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.4000000000000001e-05, |
| "loss": 2.224, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.2e-05, |
| "loss": 2.2467, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1e-05, |
| "loss": 2.2341, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 2.2483, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 6e-06, |
| "loss": 2.2123, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 2.2465, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 2.2235, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.0, |
| "loss": 2.229, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_accuracy": 0.5362047933711616, |
| "eval_loss": 2.2219960689544678, |
| "eval_runtime": 627.555, |
| "eval_samples_per_second": 83.427, |
| "eval_steps_per_second": 1.305, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.03, |
| "step": 1000, |
| "total_flos": 1.7866929733632e+17, |
| "train_loss": 2.2421503562927247, |
| "train_runtime": 4846.003, |
| "train_samples_per_second": 13.207, |
| "train_steps_per_second": 0.206 |
| } |
| ], |
| "max_steps": 1000, |
| "num_train_epochs": 1, |
| "total_flos": 1.7866929733632e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|