| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 21.70479763169194, |
| "global_step": 30000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.0002777777777777778, |
| "loss": 7.7491, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.0005555555555555556, |
| "loss": 6.5663, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 0.0008333333333333334, |
| "loss": 6.293, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 0.0009929078014184398, |
| "loss": 5.636, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 0.000975177304964539, |
| "loss": 4.3729, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 0.0009574468085106384, |
| "loss": 3.5407, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 0.0009397163120567376, |
| "loss": 3.1107, |
| "step": 3500 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 0.0009219858156028368, |
| "loss": 2.8753, |
| "step": 4000 |
| }, |
| { |
| "epoch": 3.26, |
| "learning_rate": 0.0009042553191489362, |
| "loss": 2.7162, |
| "step": 4500 |
| }, |
| { |
| "epoch": 3.62, |
| "learning_rate": 0.0008865248226950354, |
| "loss": 2.6016, |
| "step": 5000 |
| }, |
| { |
| "epoch": 3.98, |
| "learning_rate": 0.0008687943262411348, |
| "loss": 2.5154, |
| "step": 5500 |
| }, |
| { |
| "epoch": 4.34, |
| "learning_rate": 0.000851063829787234, |
| "loss": 2.4472, |
| "step": 6000 |
| }, |
| { |
| "epoch": 4.7, |
| "learning_rate": 0.0008333333333333334, |
| "loss": 2.3923, |
| "step": 6500 |
| }, |
| { |
| "epoch": 5.06, |
| "learning_rate": 0.0008156028368794326, |
| "loss": 2.3473, |
| "step": 7000 |
| }, |
| { |
| "epoch": 5.42, |
| "learning_rate": 0.0007978723404255319, |
| "loss": 2.3071, |
| "step": 7500 |
| }, |
| { |
| "epoch": 5.79, |
| "learning_rate": 0.0007801418439716312, |
| "loss": 2.2735, |
| "step": 8000 |
| }, |
| { |
| "epoch": 6.15, |
| "learning_rate": 0.0007624113475177306, |
| "loss": 2.2441, |
| "step": 8500 |
| }, |
| { |
| "epoch": 6.51, |
| "learning_rate": 0.0007446808510638298, |
| "loss": 2.2174, |
| "step": 9000 |
| }, |
| { |
| "epoch": 6.87, |
| "learning_rate": 0.0007269503546099291, |
| "loss": 2.194, |
| "step": 9500 |
| }, |
| { |
| "epoch": 7.23, |
| "learning_rate": 0.0007092198581560284, |
| "loss": 2.1728, |
| "step": 10000 |
| }, |
| { |
| "epoch": 7.59, |
| "learning_rate": 0.0006914893617021278, |
| "loss": 2.1535, |
| "step": 10500 |
| }, |
| { |
| "epoch": 7.96, |
| "learning_rate": 0.0006737588652482269, |
| "loss": 2.1346, |
| "step": 11000 |
| }, |
| { |
| "epoch": 8.32, |
| "learning_rate": 0.0006560283687943263, |
| "loss": 2.1183, |
| "step": 11500 |
| }, |
| { |
| "epoch": 8.68, |
| "learning_rate": 0.0006382978723404256, |
| "loss": 2.1035, |
| "step": 12000 |
| }, |
| { |
| "epoch": 9.04, |
| "learning_rate": 0.0006205673758865247, |
| "loss": 2.0897, |
| "step": 12500 |
| }, |
| { |
| "epoch": 9.41, |
| "learning_rate": 0.0006028368794326241, |
| "loss": 2.076, |
| "step": 13000 |
| }, |
| { |
| "epoch": 9.77, |
| "learning_rate": 0.0005851063829787234, |
| "loss": 2.065, |
| "step": 13500 |
| }, |
| { |
| "epoch": 10.13, |
| "learning_rate": 0.0005673758865248228, |
| "loss": 2.0528, |
| "step": 14000 |
| }, |
| { |
| "epoch": 10.49, |
| "learning_rate": 0.0005496453900709219, |
| "loss": 2.0432, |
| "step": 14500 |
| }, |
| { |
| "epoch": 10.85, |
| "learning_rate": 0.0005319148936170213, |
| "loss": 2.0335, |
| "step": 15000 |
| }, |
| { |
| "epoch": 11.21, |
| "learning_rate": 0.0005141843971631206, |
| "loss": 2.0241, |
| "step": 15500 |
| }, |
| { |
| "epoch": 11.58, |
| "learning_rate": 0.0004964539007092199, |
| "loss": 2.0153, |
| "step": 16000 |
| }, |
| { |
| "epoch": 11.94, |
| "learning_rate": 0.0004787234042553192, |
| "loss": 2.0066, |
| "step": 16500 |
| }, |
| { |
| "epoch": 12.3, |
| "learning_rate": 0.0004609929078014184, |
| "loss": 1.9987, |
| "step": 17000 |
| }, |
| { |
| "epoch": 12.66, |
| "learning_rate": 0.0004432624113475177, |
| "loss": 1.9909, |
| "step": 17500 |
| }, |
| { |
| "epoch": 13.02, |
| "learning_rate": 0.000425531914893617, |
| "loss": 1.9842, |
| "step": 18000 |
| }, |
| { |
| "epoch": 13.38, |
| "learning_rate": 0.0004078014184397163, |
| "loss": 1.9768, |
| "step": 18500 |
| }, |
| { |
| "epoch": 13.75, |
| "learning_rate": 0.0003900709219858156, |
| "loss": 1.9705, |
| "step": 19000 |
| }, |
| { |
| "epoch": 14.11, |
| "learning_rate": 0.0003723404255319149, |
| "loss": 1.9638, |
| "step": 19500 |
| }, |
| { |
| "epoch": 14.46, |
| "learning_rate": 0.0003546099290780142, |
| "loss": 1.9587, |
| "step": 20000 |
| }, |
| { |
| "epoch": 14.82, |
| "learning_rate": 0.00033687943262411345, |
| "loss": 1.9549, |
| "step": 20500 |
| }, |
| { |
| "epoch": 15.18, |
| "learning_rate": 0.0003191489361702128, |
| "loss": 1.9493, |
| "step": 21000 |
| }, |
| { |
| "epoch": 15.55, |
| "learning_rate": 0.00030141843971631205, |
| "loss": 1.9424, |
| "step": 21500 |
| }, |
| { |
| "epoch": 15.91, |
| "learning_rate": 0.0002836879432624114, |
| "loss": 1.9393, |
| "step": 22000 |
| }, |
| { |
| "epoch": 16.27, |
| "learning_rate": 0.00026595744680851064, |
| "loss": 1.9334, |
| "step": 22500 |
| }, |
| { |
| "epoch": 16.63, |
| "learning_rate": 0.00024822695035460994, |
| "loss": 1.9294, |
| "step": 23000 |
| }, |
| { |
| "epoch": 16.99, |
| "learning_rate": 0.0002304964539007092, |
| "loss": 1.9229, |
| "step": 23500 |
| }, |
| { |
| "epoch": 17.35, |
| "learning_rate": 0.0002127659574468085, |
| "loss": 1.9193, |
| "step": 24000 |
| }, |
| { |
| "epoch": 17.73, |
| "learning_rate": 0.0001950354609929078, |
| "loss": 1.9133, |
| "step": 24500 |
| }, |
| { |
| "epoch": 18.09, |
| "learning_rate": 0.0001773049645390071, |
| "loss": 1.9082, |
| "step": 25000 |
| }, |
| { |
| "epoch": 18.45, |
| "learning_rate": 0.0001595744680851064, |
| "loss": 1.9038, |
| "step": 25500 |
| }, |
| { |
| "epoch": 18.81, |
| "learning_rate": 0.0001418439716312057, |
| "loss": 1.9004, |
| "step": 26000 |
| }, |
| { |
| "epoch": 19.17, |
| "learning_rate": 0.00012411347517730497, |
| "loss": 1.8966, |
| "step": 26500 |
| }, |
| { |
| "epoch": 19.54, |
| "learning_rate": 0.00010638297872340425, |
| "loss": 1.8934, |
| "step": 27000 |
| }, |
| { |
| "epoch": 19.9, |
| "learning_rate": 8.865248226950355e-05, |
| "loss": 1.8888, |
| "step": 27500 |
| }, |
| { |
| "epoch": 20.26, |
| "learning_rate": 7.092198581560285e-05, |
| "loss": 1.8852, |
| "step": 28000 |
| }, |
| { |
| "epoch": 20.62, |
| "learning_rate": 5.319148936170213e-05, |
| "loss": 1.8816, |
| "step": 28500 |
| }, |
| { |
| "epoch": 20.98, |
| "learning_rate": 3.5460992907801425e-05, |
| "loss": 1.8789, |
| "step": 29000 |
| }, |
| { |
| "epoch": 21.34, |
| "learning_rate": 1.7730496453900712e-05, |
| "loss": 1.8754, |
| "step": 29500 |
| }, |
| { |
| "epoch": 21.7, |
| "learning_rate": 0.0, |
| "loss": 1.8724, |
| "step": 30000 |
| } |
| ], |
| "max_steps": 30000, |
| "num_train_epochs": 22, |
| "total_flos": 1.6544861361662853e+19, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|