{ "best_metric": 0.8301314459049545, "best_model_checkpoint": "./swin_final_model/checkpoint-88", "epoch": 9.0, "eval_steps": 500, "global_step": 198, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.45454545454545453, "grad_norm": 5.4057230949401855, "learning_rate": 4.993630286525634e-05, "loss": 0.1972, "step": 10 }, { "epoch": 0.9090909090909091, "grad_norm": 3.5544466972351074, "learning_rate": 4.9745536047023324e-05, "loss": 0.1455, "step": 20 }, { "epoch": 1.0, "eval_accuracy": 0.6785714285714286, "eval_f1": 0.40425531914893614, "eval_loss": 0.1654156595468521, "eval_precision": 0.3392857142857143, "eval_recall": 0.5, "eval_runtime": 2.0657, "eval_samples_per_second": 27.11, "eval_steps_per_second": 1.936, "step": 22 }, { "epoch": 1.3636363636363638, "grad_norm": 34.79863357543945, "learning_rate": 4.942867164927899e-05, "loss": 0.1528, "step": 30 }, { "epoch": 1.8181818181818183, "grad_norm": 3.0228710174560547, "learning_rate": 4.898732434036244e-05, "loss": 0.1415, "step": 40 }, { "epoch": 2.0, "eval_accuracy": 0.6785714285714286, "eval_f1": 0.6199095022624435, "eval_loss": 0.15097904205322266, "eval_precision": 0.625, "eval_recall": 0.6169590643274854, "eval_runtime": 2.0117, "eval_samples_per_second": 27.838, "eval_steps_per_second": 1.988, "step": 44 }, { "epoch": 2.2727272727272725, "grad_norm": 4.495301246643066, "learning_rate": 4.842374312499405e-05, "loss": 0.1288, "step": 50 }, { "epoch": 2.7272727272727275, "grad_norm": 2.2047574520111084, "learning_rate": 4.774079988386296e-05, "loss": 0.1213, "step": 60 }, { "epoch": 3.0, "eval_accuracy": 0.7142857142857143, "eval_f1": 0.7005347593582887, "eval_loss": 0.1337863653898239, "eval_precision": 0.7025641025641025, "eval_recall": 0.7309941520467836, "eval_runtime": 2.1054, "eval_samples_per_second": 26.599, "eval_steps_per_second": 1.9, "step": 66 }, { "epoch": 3.1818181818181817, "grad_norm": 1.6458451747894287, "learning_rate": 4.6941974739181395e-05, "loss": 0.1208, "step": 70 }, { "epoch": 3.6363636363636362, "grad_norm": 1.5783404111862183, "learning_rate": 4.6031338320779534e-05, "loss": 0.1211, "step": 80 }, { "epoch": 4.0, "eval_accuracy": 0.8392857142857143, "eval_f1": 0.8301314459049545, "eval_loss": 0.1286560595035553, "eval_precision": 0.8238709677419356, "eval_recall": 0.8669590643274854, "eval_runtime": 1.9937, "eval_samples_per_second": 28.088, "eval_steps_per_second": 2.006, "step": 88 }, { "epoch": 4.090909090909091, "grad_norm": 2.3769476413726807, "learning_rate": 4.5013531023109014e-05, "loss": 0.1136, "step": 90 }, { "epoch": 4.545454545454545, "grad_norm": 2.6184587478637695, "learning_rate": 4.389373935885646e-05, "loss": 0.1024, "step": 100 }, { "epoch": 5.0, "grad_norm": 2.965785264968872, "learning_rate": 4.267766952966369e-05, "loss": 0.1159, "step": 110 }, { "epoch": 5.0, "eval_accuracy": 0.8392857142857143, "eval_f1": 0.790610718737017, "eval_loss": 0.08838049322366714, "eval_precision": 0.8656565656565656, "eval_recall": 0.7646198830409356, "eval_runtime": 2.0259, "eval_samples_per_second": 27.642, "eval_steps_per_second": 1.974, "step": 110 }, { "epoch": 5.454545454545454, "grad_norm": 6.49717378616333, "learning_rate": 4.137151834863213e-05, "loss": 0.1013, "step": 120 }, { "epoch": 5.909090909090909, "grad_norm": 2.3494088649749756, "learning_rate": 3.9981941662783674e-05, "loss": 0.1307, "step": 130 }, { "epoch": 6.0, "eval_accuracy": 0.7857142857142857, "eval_f1": 0.6818181818181819, "eval_loss": 0.13052888214588165, "eval_precision": 0.88, "eval_recall": 0.6666666666666666, "eval_runtime": 2.0536, "eval_samples_per_second": 27.269, "eval_steps_per_second": 1.948, "step": 132 }, { "epoch": 6.363636363636363, "grad_norm": 1.3006728887557983, "learning_rate": 3.851602043638994e-05, "loss": 0.0906, "step": 140 }, { "epoch": 6.818181818181818, "grad_norm": 2.7580723762512207, "learning_rate": 3.6981224668001424e-05, "loss": 0.1085, "step": 150 }, { "epoch": 7.0, "eval_accuracy": 0.8214285714285714, "eval_f1": 0.78125, "eval_loss": 0.1015724316239357, "eval_precision": 0.8095238095238095, "eval_recall": 0.7660818713450293, "eval_runtime": 2.0225, "eval_samples_per_second": 27.688, "eval_steps_per_second": 1.978, "step": 154 }, { "epoch": 7.2727272727272725, "grad_norm": 2.479637384414673, "learning_rate": 3.5385375325047166e-05, "loss": 0.0981, "step": 160 }, { "epoch": 7.7272727272727275, "grad_norm": 2.572568655014038, "learning_rate": 3.3736604489977466e-05, "loss": 0.0782, "step": 170 }, { "epoch": 8.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8095238095238095, "eval_loss": 0.06962920725345612, "eval_precision": 0.9130434782608696, "eval_recall": 0.7777777777777778, "eval_runtime": 2.0794, "eval_samples_per_second": 26.931, "eval_steps_per_second": 1.924, "step": 176 }, { "epoch": 8.181818181818182, "grad_norm": 22.004362106323242, "learning_rate": 3.2043313921035743e-05, "loss": 0.0619, "step": 180 }, { "epoch": 8.636363636363637, "grad_norm": 6.473495006561279, "learning_rate": 3.0314132238824415e-05, "loss": 0.0449, "step": 190 }, { "epoch": 9.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8095238095238095, "eval_loss": 0.1630028635263443, "eval_precision": 0.9130434782608696, "eval_recall": 0.7777777777777778, "eval_runtime": 2.0328, "eval_samples_per_second": 27.548, "eval_steps_per_second": 1.968, "step": 198 } ], "logging_steps": 10, "max_steps": 440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.4326295022503936e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }