[ { "eval_loss": 1.2019327878952026, "eval_runtime": 3.7296, "eval_samples_per_second": 130.04, "eval_steps_per_second": 16.356, "epoch": 1.0, "step": 486 }, { "loss": 2.012456298828125, "grad_norm": 61.2252311706543, "learning_rate": 1.3155006858710564e-05, "epoch": 1.02880658436214, "step": 500 }, { "eval_loss": 0.9657873511314392, "eval_runtime": 3.8673, "eval_samples_per_second": 125.41, "eval_steps_per_second": 15.773, "epoch": 2.0, "step": 972 }, { "loss": 1.1124793701171876, "grad_norm": 51.13796615600586, "learning_rate": 6.296296296296297e-06, "epoch": 2.05761316872428, "step": 1000 }, { "eval_loss": 0.9960638284683228, "eval_runtime": 3.7498, "eval_samples_per_second": 129.34, "eval_steps_per_second": 16.267, "epoch": 3.0, "step": 1458 }, { "train_runtime": 714.5044, "train_samples_per_second": 16.304, "train_steps_per_second": 2.041, "total_flos": 3043845919217664.0, "train_loss": 1.3373331663866919, "epoch": 3.0, "step": 1458 }, { "eval_loss": 0.9657873511314392, "eval_runtime": 3.3404, "eval_samples_per_second": 145.191, "eval_steps_per_second": 18.261, "epoch": 3.0, "step": 1458 } ]