| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 22.0, |
| "eval_steps": 500, |
| "global_step": 2354, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.5887850467289719, |
| "eval_loss": 1.2691198587417603, |
| "eval_runtime": 4.8417, |
| "eval_samples_per_second": 44.199, |
| "eval_steps_per_second": 5.577, |
| "step": 107 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.6214953271028038, |
| "eval_loss": 1.0963133573532104, |
| "eval_runtime": 5.0021, |
| "eval_samples_per_second": 42.782, |
| "eval_steps_per_second": 5.398, |
| "step": 214 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.6308411214953271, |
| "eval_loss": 0.860569179058075, |
| "eval_runtime": 4.6891, |
| "eval_samples_per_second": 45.638, |
| "eval_steps_per_second": 5.758, |
| "step": 321 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.7990654205607477, |
| "eval_loss": 0.672334611415863, |
| "eval_runtime": 4.717, |
| "eval_samples_per_second": 45.368, |
| "eval_steps_per_second": 5.724, |
| "step": 428 |
| }, |
| { |
| "epoch": 4.67, |
| "learning_rate": 4.0654205607476636e-05, |
| "loss": 1.1331, |
| "step": 500 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.8457943925233645, |
| "eval_loss": 0.48274144530296326, |
| "eval_runtime": 4.6309, |
| "eval_samples_per_second": 46.212, |
| "eval_steps_per_second": 5.83, |
| "step": 535 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.8551401869158879, |
| "eval_loss": 0.35962656140327454, |
| "eval_runtime": 4.7112, |
| "eval_samples_per_second": 45.424, |
| "eval_steps_per_second": 5.731, |
| "step": 642 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.8925233644859814, |
| "eval_loss": 0.26292115449905396, |
| "eval_runtime": 5.4422, |
| "eval_samples_per_second": 39.322, |
| "eval_steps_per_second": 4.961, |
| "step": 749 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.9158878504672897, |
| "eval_loss": 0.22579917311668396, |
| "eval_runtime": 4.8554, |
| "eval_samples_per_second": 44.074, |
| "eval_steps_per_second": 5.561, |
| "step": 856 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.9158878504672897, |
| "eval_loss": 0.19785191118717194, |
| "eval_runtime": 4.9901, |
| "eval_samples_per_second": 42.885, |
| "eval_steps_per_second": 5.411, |
| "step": 963 |
| }, |
| { |
| "epoch": 9.35, |
| "learning_rate": 3.130841121495327e-05, |
| "loss": 0.6031, |
| "step": 1000 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.9719626168224299, |
| "eval_loss": 0.16760671138763428, |
| "eval_runtime": 4.8143, |
| "eval_samples_per_second": 44.451, |
| "eval_steps_per_second": 5.608, |
| "step": 1070 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 1.0, |
| "eval_loss": 0.1423913985490799, |
| "eval_runtime": 4.7195, |
| "eval_samples_per_second": 45.343, |
| "eval_steps_per_second": 5.721, |
| "step": 1177 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.985981308411215, |
| "eval_loss": 0.12256418913602829, |
| "eval_runtime": 5.1046, |
| "eval_samples_per_second": 41.923, |
| "eval_steps_per_second": 5.289, |
| "step": 1284 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.9953271028037384, |
| "eval_loss": 0.11292136460542679, |
| "eval_runtime": 4.8426, |
| "eval_samples_per_second": 44.191, |
| "eval_steps_per_second": 5.576, |
| "step": 1391 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.9906542056074766, |
| "eval_loss": 0.106930673122406, |
| "eval_runtime": 4.9097, |
| "eval_samples_per_second": 43.587, |
| "eval_steps_per_second": 5.499, |
| "step": 1498 |
| }, |
| { |
| "epoch": 14.02, |
| "learning_rate": 2.196261682242991e-05, |
| "loss": 0.4317, |
| "step": 1500 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.9953271028037384, |
| "eval_loss": 0.09224073588848114, |
| "eval_runtime": 4.854, |
| "eval_samples_per_second": 44.087, |
| "eval_steps_per_second": 5.562, |
| "step": 1605 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.9953271028037384, |
| "eval_loss": 0.08621260523796082, |
| "eval_runtime": 4.7695, |
| "eval_samples_per_second": 44.868, |
| "eval_steps_per_second": 5.661, |
| "step": 1712 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_accuracy": 1.0, |
| "eval_loss": 0.07688089460134506, |
| "eval_runtime": 4.8878, |
| "eval_samples_per_second": 43.782, |
| "eval_steps_per_second": 5.524, |
| "step": 1819 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_accuracy": 1.0, |
| "eval_loss": 0.061450209468603134, |
| "eval_runtime": 4.7171, |
| "eval_samples_per_second": 45.367, |
| "eval_steps_per_second": 5.724, |
| "step": 1926 |
| }, |
| { |
| "epoch": 18.69, |
| "learning_rate": 1.2616822429906542e-05, |
| "loss": 0.3584, |
| "step": 2000 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_accuracy": 1.0, |
| "eval_loss": 0.0667119175195694, |
| "eval_runtime": 4.8093, |
| "eval_samples_per_second": 44.498, |
| "eval_steps_per_second": 5.614, |
| "step": 2033 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_accuracy": 0.9953271028037384, |
| "eval_loss": 0.05547282472252846, |
| "eval_runtime": 4.7518, |
| "eval_samples_per_second": 45.036, |
| "eval_steps_per_second": 5.682, |
| "step": 2140 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_accuracy": 1.0, |
| "eval_loss": 0.054027605801820755, |
| "eval_runtime": 4.7251, |
| "eval_samples_per_second": 45.29, |
| "eval_steps_per_second": 5.714, |
| "step": 2247 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_accuracy": 1.0, |
| "eval_loss": 0.04496881738305092, |
| "eval_runtime": 4.6636, |
| "eval_samples_per_second": 45.887, |
| "eval_steps_per_second": 5.79, |
| "step": 2354 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 2675, |
| "num_train_epochs": 25, |
| "save_steps": 200, |
| "total_flos": 3.9904029741839155e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|