| { |
| "best_metric": 0.04727424308657646, |
| "best_model_checkpoint": "results_simple-latin-bert-uncased/epoch20_bs64/checkpoint-36", |
| "epoch": 11.0, |
| "eval_steps": 500, |
| "global_step": 99, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.7604417204856873, |
| "learning_rate": 4.75e-05, |
| "loss": 0.2645, |
| "step": 9 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accurracy": { |
| "accuracy": 0.9775494672754946 |
| }, |
| "eval_f1": [ |
| 0.9749575551782682, |
| 0.7978910369068541, |
| 0.9994264958898872 |
| ], |
| "eval_loss": 0.07480967044830322, |
| "eval_precision": [ |
| 0.968381112984823, |
| 0.850187265917603, |
| 0.9988536492166603 |
| ], |
| "eval_recall": [ |
| 0.9816239316239316, |
| 0.7516556291390728, |
| 1.0 |
| ], |
| "eval_runtime": 2.8335, |
| "eval_samples_per_second": 25.41, |
| "eval_steps_per_second": 0.706, |
| "step": 9 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.30367013812065125, |
| "learning_rate": 4.5e-05, |
| "loss": 0.0564, |
| "step": 18 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accurracy": { |
| "accuracy": 0.9813546423135464 |
| }, |
| "eval_f1": [ |
| 0.9791400595998297, |
| 0.8327645051194539, |
| 1.0 |
| ], |
| "eval_loss": 0.053640153259038925, |
| "eval_precision": [ |
| 0.9754028837998303, |
| 0.8591549295774648, |
| 1.0 |
| ], |
| "eval_recall": [ |
| 0.9829059829059829, |
| 0.8079470198675497, |
| 1.0 |
| ], |
| "eval_runtime": 2.7042, |
| "eval_samples_per_second": 26.625, |
| "eval_steps_per_second": 0.74, |
| "step": 18 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.28803956508636475, |
| "learning_rate": 4.25e-05, |
| "loss": 0.0447, |
| "step": 27 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accurracy": { |
| "accuracy": 0.9823059360730594 |
| }, |
| "eval_f1": [ |
| 0.9801916932907349, |
| 0.8421052631578947, |
| 1.0 |
| ], |
| "eval_loss": 0.052741460502147675, |
| "eval_precision": [ |
| 0.9770700636942675, |
| 0.8641114982578397, |
| 1.0 |
| ], |
| "eval_recall": [ |
| 0.9833333333333333, |
| 0.8211920529801324, |
| 1.0 |
| ], |
| "eval_runtime": 2.6628, |
| "eval_samples_per_second": 27.04, |
| "eval_steps_per_second": 0.751, |
| "step": 27 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.2971172630786896, |
| "learning_rate": 4e-05, |
| "loss": 0.0377, |
| "step": 36 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accurracy": { |
| "accuracy": 0.9811643835616438 |
| }, |
| "eval_f1": [ |
| 0.9789227166276346, |
| 0.8313458262350937, |
| 1.0 |
| ], |
| "eval_loss": 0.04727424308657646, |
| "eval_precision": [ |
| 0.9753924480271532, |
| 0.856140350877193, |
| 1.0 |
| ], |
| "eval_recall": [ |
| 0.9824786324786324, |
| 0.8079470198675497, |
| 1.0 |
| ], |
| "eval_runtime": 2.6802, |
| "eval_samples_per_second": 26.864, |
| "eval_steps_per_second": 0.746, |
| "step": 36 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.32125240564346313, |
| "learning_rate": 3.7500000000000003e-05, |
| "loss": 0.0325, |
| "step": 45 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accurracy": { |
| "accuracy": 0.9826864535768646 |
| }, |
| "eval_f1": [ |
| 0.9806176783812567, |
| 0.8455008488964346, |
| 1.0 |
| ], |
| "eval_loss": 0.04757271707057953, |
| "eval_precision": [ |
| 0.9774946921443737, |
| 0.867595818815331, |
| 1.0 |
| ], |
| "eval_recall": [ |
| 0.9837606837606837, |
| 0.8245033112582781, |
| 1.0 |
| ], |
| "eval_runtime": 2.6737, |
| "eval_samples_per_second": 26.929, |
| "eval_steps_per_second": 0.748, |
| "step": 45 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 0.23932930827140808, |
| "learning_rate": 3.5e-05, |
| "loss": 0.0276, |
| "step": 54 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accurracy": { |
| "accuracy": 0.9828767123287672 |
| }, |
| "eval_f1": [ |
| 0.98080204778157, |
| 0.8489932885906041, |
| 1.0 |
| ], |
| "eval_loss": 0.04851287603378296, |
| "eval_precision": [ |
| 0.9791311754684838, |
| 0.8605442176870748, |
| 1.0 |
| ], |
| "eval_recall": [ |
| 0.9824786324786324, |
| 0.8377483443708609, |
| 1.0 |
| ], |
| "eval_runtime": 2.7322, |
| "eval_samples_per_second": 26.352, |
| "eval_steps_per_second": 0.732, |
| "step": 54 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 0.2584969997406006, |
| "learning_rate": 3.2500000000000004e-05, |
| "loss": 0.0235, |
| "step": 63 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accurracy": { |
| "accuracy": 0.9834474885844748 |
| }, |
| "eval_f1": [ |
| 0.9814300960512273, |
| 0.8547579298831386, |
| 1.0 |
| ], |
| "eval_loss": 0.04932190850377083, |
| "eval_precision": [ |
| 0.9803837953091684, |
| 0.8619528619528619, |
| 1.0 |
| ], |
| "eval_recall": [ |
| 0.9824786324786324, |
| 0.847682119205298, |
| 1.0 |
| ], |
| "eval_runtime": 2.7945, |
| "eval_samples_per_second": 25.765, |
| "eval_steps_per_second": 0.716, |
| "step": 63 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 0.22960752248764038, |
| "learning_rate": 3e-05, |
| "loss": 0.0206, |
| "step": 72 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accurracy": { |
| "accuracy": 0.9836377473363774 |
| }, |
| "eval_f1": [ |
| 0.9816160752458315, |
| 0.858085808580858, |
| 1.0 |
| ], |
| "eval_loss": 0.05238157883286476, |
| "eval_precision": [ |
| 0.9820359281437125, |
| 0.8552631578947368, |
| 1.0 |
| ], |
| "eval_recall": [ |
| 0.9811965811965812, |
| 0.8609271523178808, |
| 1.0 |
| ], |
| "eval_runtime": 2.6883, |
| "eval_samples_per_second": 26.783, |
| "eval_steps_per_second": 0.744, |
| "step": 72 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 0.19476890563964844, |
| "learning_rate": 2.7500000000000004e-05, |
| "loss": 0.0169, |
| "step": 81 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accurracy": { |
| "accuracy": 0.9828767123287672 |
| }, |
| "eval_f1": [ |
| 0.9807527801539777, |
| 0.8519736842105263, |
| 1.0 |
| ], |
| "eval_loss": 0.055463388562202454, |
| "eval_precision": [ |
| 0.9815924657534246, |
| 0.8464052287581699, |
| 1.0 |
| ], |
| "eval_recall": [ |
| 0.9799145299145299, |
| 0.8576158940397351, |
| 1.0 |
| ], |
| "eval_runtime": 2.6823, |
| "eval_samples_per_second": 26.843, |
| "eval_steps_per_second": 0.746, |
| "step": 81 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.2635619640350342, |
| "learning_rate": 2.5e-05, |
| "loss": 0.0146, |
| "step": 90 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accurracy": { |
| "accuracy": 0.9836377473363774 |
| }, |
| "eval_f1": [ |
| 0.9816160752458315, |
| 0.858085808580858, |
| 1.0 |
| ], |
| "eval_loss": 0.05970200523734093, |
| "eval_precision": [ |
| 0.9820359281437125, |
| 0.8552631578947368, |
| 1.0 |
| ], |
| "eval_recall": [ |
| 0.9811965811965812, |
| 0.8609271523178808, |
| 1.0 |
| ], |
| "eval_runtime": 2.7111, |
| "eval_samples_per_second": 26.558, |
| "eval_steps_per_second": 0.738, |
| "step": 90 |
| }, |
| { |
| "epoch": 11.0, |
| "grad_norm": 0.23059502243995667, |
| "learning_rate": 2.25e-05, |
| "loss": 0.0125, |
| "step": 99 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accurracy": { |
| "accuracy": 0.9834474885844748 |
| }, |
| "eval_f1": [ |
| 0.9813824095869891, |
| 0.8576104746317512, |
| 1.0 |
| ], |
| "eval_loss": 0.06442799419164658, |
| "eval_precision": [ |
| 0.9828546935276468, |
| 0.8478964401294499, |
| 1.0 |
| ], |
| "eval_recall": [ |
| 0.9799145299145299, |
| 0.8675496688741722, |
| 1.0 |
| ], |
| "eval_runtime": 2.6437, |
| "eval_samples_per_second": 27.234, |
| "eval_steps_per_second": 0.757, |
| "step": 99 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 180, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 20, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 267219177271200.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|