{ "best_global_step": 1316, "best_metric": 0.987228321756666, "best_model_checkpoint": "/kaggle/working/skillguard/models/roberta/checkpoint-1316", "epoch": 4.0, "eval_steps": 500, "global_step": 1316, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.303951367781155, "grad_norm": 26.459680557250977, "learning_rate": 1.967663702506063e-05, "loss": 0.8790406799316406, "step": 100 }, { "epoch": 0.60790273556231, "grad_norm": 37.875850677490234, "learning_rate": 1.8059822150363784e-05, "loss": 0.20122947692871093, "step": 200 }, { "epoch": 0.9118541033434651, "grad_norm": 0.6300860047340393, "learning_rate": 1.6443007275666938e-05, "loss": 0.14148117065429688, "step": 300 }, { "epoch": 1.0, "eval_accuracy": 0.9817777777777777, "eval_f1": 0.9816472694717995, "eval_loss": 0.11642087250947952, "eval_precision": 0.9887285843101894, "eval_recall": 0.9746666666666667, "eval_runtime": 38.768, "eval_samples_per_second": 116.075, "eval_steps_per_second": 0.929, "step": 329 }, { "epoch": 1.21580547112462, "grad_norm": 44.39780807495117, "learning_rate": 1.482619240097009e-05, "loss": 0.08979954719543456, "step": 400 }, { "epoch": 1.5197568389057752, "grad_norm": 8.478060722351074, "learning_rate": 1.3209377526273244e-05, "loss": 0.06728220462799073, "step": 500 }, { "epoch": 1.8237082066869301, "grad_norm": 15.931158065795898, "learning_rate": 1.1592562651576396e-05, "loss": 0.07970011711120606, "step": 600 }, { "epoch": 2.0, "eval_accuracy": 0.9811111111111112, "eval_f1": 0.9808169713382984, "eval_loss": 0.16507814824581146, "eval_precision": 0.9963319578175149, "eval_recall": 0.9657777777777777, "eval_runtime": 38.6568, "eval_samples_per_second": 116.409, "eval_steps_per_second": 0.931, "step": 658 }, { "epoch": 2.127659574468085, "grad_norm": 0.06346142292022705, "learning_rate": 9.975747776879548e-06, "loss": 0.06562654018402099, "step": 700 }, { "epoch": 2.43161094224924, "grad_norm": 0.021810326725244522, "learning_rate": 8.3589329021827e-06, "loss": 0.037502543926239015, "step": 800 }, { "epoch": 2.735562310030395, "grad_norm": 0.42783406376838684, "learning_rate": 6.7421180274858535e-06, "loss": 0.041276140213012694, "step": 900 }, { "epoch": 3.0, "eval_accuracy": 0.9871111111111112, "eval_f1": 0.9870013446884806, "eval_loss": 0.14181749522686005, "eval_precision": 0.9954792043399638, "eval_recall": 0.9786666666666667, "eval_runtime": 38.725, "eval_samples_per_second": 116.204, "eval_steps_per_second": 0.93, "step": 987 }, { "epoch": 3.0395136778115504, "grad_norm": 5.668140888214111, "learning_rate": 5.125303152789006e-06, "loss": 0.024772546291351318, "step": 1000 }, { "epoch": 3.3434650455927053, "grad_norm": 1.2623281478881836, "learning_rate": 3.5084882780921584e-06, "loss": 0.017916127443313598, "step": 1100 }, { "epoch": 3.6474164133738602, "grad_norm": 2.460536479949951, "learning_rate": 1.8916734033953113e-06, "loss": 0.0251471209526062, "step": 1200 }, { "epoch": 3.951367781155015, "grad_norm": 0.019897326827049255, "learning_rate": 2.74858528698464e-07, "loss": 0.016884679794311522, "step": 1300 }, { "epoch": 4.0, "eval_accuracy": 0.9873333333333333, "eval_f1": 0.987228321756666, "eval_loss": 0.14398272335529327, "eval_precision": 0.9954812471757795, "eval_recall": 0.9791111111111112, "eval_runtime": 38.4471, "eval_samples_per_second": 117.044, "eval_steps_per_second": 0.936, "step": 1316 } ], "logging_steps": 100, "max_steps": 1316, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.105066432512e+16, "train_batch_size": 64, "trial_name": null, "trial_params": null }