| { |
| "best_global_step": 15500, |
| "best_metric": 0.9434096975688787, |
| "best_model_checkpoint": "./arabert_author_model_full/checkpoint-15500", |
| "epoch": 3.374700631395602, |
| "eval_steps": 500, |
| "global_step": 15500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.10886131069018071, |
| "grad_norm": 745471.1875, |
| "learning_rate": 2.171926006528836e-05, |
| "loss": 2.2995, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.10886131069018071, |
| "eval_accuracy": 0.6985195154777928, |
| "eval_f1_macro": 0.601246564201863, |
| "eval_f1_micro": 0.6985195154777928, |
| "eval_loss": 1.2079353332519531, |
| "eval_precision_macro": 0.6412966664769482, |
| "eval_precision_micro": 0.6985195154777928, |
| "eval_recall_macro": 0.6419387939365965, |
| "eval_recall_micro": 0.6985195154777928, |
| "eval_runtime": 14.7462, |
| "eval_samples_per_second": 50.386, |
| "eval_steps_per_second": 3.187, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.21772262138036141, |
| "grad_norm": 956772.5625, |
| "learning_rate": 4.348204570184984e-05, |
| "loss": 0.849, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.21772262138036141, |
| "eval_accuracy": 0.819650067294751, |
| "eval_f1_macro": 0.7996069224582287, |
| "eval_f1_micro": 0.819650067294751, |
| "eval_loss": 0.5631475448608398, |
| "eval_precision_macro": 0.80345079706281, |
| "eval_precision_micro": 0.819650067294751, |
| "eval_recall_macro": 0.82483257704162, |
| "eval_recall_micro": 0.819650067294751, |
| "eval_runtime": 14.7707, |
| "eval_samples_per_second": 50.302, |
| "eval_steps_per_second": 3.182, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.32658393207054215, |
| "grad_norm": 326904.5, |
| "learning_rate": 6.524483133841132e-05, |
| "loss": 0.5868, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.32658393207054215, |
| "eval_accuracy": 0.8021534320323015, |
| "eval_f1_macro": 0.793396840762137, |
| "eval_f1_micro": 0.8021534320323015, |
| "eval_loss": 0.6846649646759033, |
| "eval_precision_macro": 0.8435106749075885, |
| "eval_precision_micro": 0.8021534320323015, |
| "eval_recall_macro": 0.7916833340258262, |
| "eval_recall_micro": 0.8021534320323015, |
| "eval_runtime": 14.8117, |
| "eval_samples_per_second": 50.163, |
| "eval_steps_per_second": 3.173, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.43544524276072283, |
| "grad_norm": 218412.828125, |
| "learning_rate": 7.998128491699842e-05, |
| "loss": 0.5612, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.43544524276072283, |
| "eval_accuracy": 0.8519515477792732, |
| "eval_f1_macro": 0.8247069527158585, |
| "eval_f1_micro": 0.8519515477792732, |
| "eval_loss": 0.4477691948413849, |
| "eval_precision_macro": 0.8896978331250329, |
| "eval_precision_micro": 0.8519515477792732, |
| "eval_recall_macro": 0.8379996548860711, |
| "eval_recall_micro": 0.8519515477792732, |
| "eval_runtime": 14.7186, |
| "eval_samples_per_second": 50.48, |
| "eval_steps_per_second": 3.193, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5443065534509035, |
| "grad_norm": 1928294.625, |
| "learning_rate": 7.968493088594472e-05, |
| "loss": 0.4929, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.5443065534509035, |
| "eval_accuracy": 0.873485868102288, |
| "eval_f1_macro": 0.8688756385026712, |
| "eval_f1_micro": 0.873485868102288, |
| "eval_loss": 0.3026486039161682, |
| "eval_precision_macro": 0.878210989714668, |
| "eval_precision_micro": 0.873485868102288, |
| "eval_recall_macro": 0.8859921080399548, |
| "eval_recall_micro": 0.873485868102288, |
| "eval_runtime": 14.7131, |
| "eval_samples_per_second": 50.499, |
| "eval_steps_per_second": 3.194, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.6531678641410843, |
| "grad_norm": 3023410.5, |
| "learning_rate": 7.903065943344406e-05, |
| "loss": 0.4618, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.6531678641410843, |
| "eval_accuracy": 0.8613728129205922, |
| "eval_f1_macro": 0.8296445269102163, |
| "eval_f1_micro": 0.8613728129205922, |
| "eval_loss": 0.43775779008865356, |
| "eval_precision_macro": 0.8710561256381226, |
| "eval_precision_micro": 0.8613728129205922, |
| "eval_recall_macro": 0.8541696546910839, |
| "eval_recall_micro": 0.8613728129205922, |
| "eval_runtime": 14.7062, |
| "eval_samples_per_second": 50.523, |
| "eval_steps_per_second": 3.196, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.762029174831265, |
| "grad_norm": 211605.15625, |
| "learning_rate": 7.802437141773096e-05, |
| "loss": 0.4028, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.762029174831265, |
| "eval_accuracy": 0.8950201884253028, |
| "eval_f1_macro": 0.8917785158702655, |
| "eval_f1_micro": 0.8950201884253028, |
| "eval_loss": 0.25510504841804504, |
| "eval_precision_macro": 0.9107123575695487, |
| "eval_precision_micro": 0.8950201884253028, |
| "eval_recall_macro": 0.9057010565367906, |
| "eval_recall_micro": 0.8950201884253028, |
| "eval_runtime": 14.7188, |
| "eval_samples_per_second": 50.48, |
| "eval_steps_per_second": 3.193, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.8708904855214457, |
| "grad_norm": 37626.74609375, |
| "learning_rate": 7.667514252581752e-05, |
| "loss": 0.3747, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.8708904855214457, |
| "eval_accuracy": 0.892328398384926, |
| "eval_f1_macro": 0.8948877387080549, |
| "eval_f1_micro": 0.892328398384926, |
| "eval_loss": 0.2622196674346924, |
| "eval_precision_macro": 0.9437605053976897, |
| "eval_precision_micro": 0.892328398384926, |
| "eval_recall_macro": 0.9063603025064753, |
| "eval_recall_micro": 0.892328398384926, |
| "eval_runtime": 14.7613, |
| "eval_samples_per_second": 50.334, |
| "eval_steps_per_second": 3.184, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.9797517962116263, |
| "grad_norm": 341548.65625, |
| "learning_rate": 7.499514142009407e-05, |
| "loss": 0.3686, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.9797517962116263, |
| "eval_accuracy": 0.901749663526245, |
| "eval_f1_macro": 0.9071958475193036, |
| "eval_f1_micro": 0.9017496635262451, |
| "eval_loss": 0.21770605444908142, |
| "eval_precision_macro": 0.9392339212137314, |
| "eval_precision_micro": 0.901749663526245, |
| "eval_recall_macro": 0.9187280722751042, |
| "eval_recall_micro": 0.901749663526245, |
| "eval_runtime": 14.7411, |
| "eval_samples_per_second": 50.403, |
| "eval_steps_per_second": 3.188, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.088613106901807, |
| "grad_norm": 51656.32421875, |
| "learning_rate": 7.299951998946065e-05, |
| "loss": 0.2762, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.088613106901807, |
| "eval_accuracy": 0.8896366083445492, |
| "eval_f1_macro": 0.8803954267807832, |
| "eval_f1_micro": 0.8896366083445492, |
| "eval_loss": 0.37781140208244324, |
| "eval_precision_macro": 0.8980066417509999, |
| "eval_precision_micro": 0.8896366083445492, |
| "eval_recall_macro": 0.8882222866157216, |
| "eval_recall_micro": 0.8896366083445492, |
| "eval_runtime": 15.0879, |
| "eval_samples_per_second": 49.245, |
| "eval_steps_per_second": 3.115, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.1974744175919878, |
| "grad_norm": 1009913.0625, |
| "learning_rate": 7.070627669481137e-05, |
| "loss": 0.2851, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.1974744175919878, |
| "eval_accuracy": 0.882907133243607, |
| "eval_f1_macro": 0.8672894626796113, |
| "eval_f1_micro": 0.882907133243607, |
| "eval_loss": 0.38583362102508545, |
| "eval_precision_macro": 0.9049625152940963, |
| "eval_precision_micro": 0.882907133243607, |
| "eval_recall_macro": 0.8813935878782198, |
| "eval_recall_micro": 0.882907133243607, |
| "eval_runtime": 14.7029, |
| "eval_samples_per_second": 50.534, |
| "eval_steps_per_second": 3.197, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.3063357282821686, |
| "grad_norm": 26227.69140625, |
| "learning_rate": 6.813609424135567e-05, |
| "loss": 0.2818, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.3063357282821686, |
| "eval_accuracy": 0.9138627187079408, |
| "eval_f1_macro": 0.9250807107212078, |
| "eval_f1_micro": 0.9138627187079408, |
| "eval_loss": 0.1822730302810669, |
| "eval_precision_macro": 0.9436200764635643, |
| "eval_precision_micro": 0.9138627187079408, |
| "eval_recall_macro": 0.9322277636580386, |
| "eval_recall_micro": 0.9138627187079408, |
| "eval_runtime": 14.7441, |
| "eval_samples_per_second": 50.393, |
| "eval_steps_per_second": 3.188, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.4151970389723492, |
| "grad_norm": 87145.015625, |
| "learning_rate": 6.531215304180572e-05, |
| "loss": 0.2539, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.4151970389723492, |
| "eval_accuracy": 0.9044414535666218, |
| "eval_f1_macro": 0.9159118265135213, |
| "eval_f1_micro": 0.9044414535666218, |
| "eval_loss": 0.19744105637073517, |
| "eval_precision_macro": 0.9248731430404993, |
| "eval_precision_micro": 0.9044414535666218, |
| "eval_recall_macro": 0.9361879615931227, |
| "eval_recall_micro": 0.9044414535666218, |
| "eval_runtime": 14.7205, |
| "eval_samples_per_second": 50.474, |
| "eval_steps_per_second": 3.193, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.52405834966253, |
| "grad_norm": 4197689.5, |
| "learning_rate": 6.22599221528008e-05, |
| "loss": 0.2342, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.52405834966253, |
| "eval_accuracy": 0.9152086137281292, |
| "eval_f1_macro": 0.9209521774588028, |
| "eval_f1_micro": 0.9152086137281292, |
| "eval_loss": 0.16721387207508087, |
| "eval_precision_macro": 0.9316385374819118, |
| "eval_precision_micro": 0.9152086137281292, |
| "eval_recall_macro": 0.9305594066426393, |
| "eval_recall_micro": 0.9152086137281292, |
| "eval_runtime": 14.7185, |
| "eval_samples_per_second": 50.481, |
| "eval_steps_per_second": 3.193, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.6329196603527105, |
| "grad_norm": 29691.1875, |
| "learning_rate": 5.900692957010821e-05, |
| "loss": 0.2658, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.6329196603527105, |
| "eval_accuracy": 0.9205921938088829, |
| "eval_f1_macro": 0.9292673927082579, |
| "eval_f1_micro": 0.9205921938088829, |
| "eval_loss": 0.16926071047782898, |
| "eval_precision_macro": 0.9467601029387086, |
| "eval_precision_micro": 0.9205921938088829, |
| "eval_recall_macro": 0.9353857192023052, |
| "eval_recall_micro": 0.9205921938088829, |
| "eval_runtime": 14.7038, |
| "eval_samples_per_second": 50.531, |
| "eval_steps_per_second": 3.196, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.7417809710428913, |
| "grad_norm": 82702.546875, |
| "learning_rate": 5.5582513954302386e-05, |
| "loss": 0.2703, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.7417809710428913, |
| "eval_accuracy": 0.917900403768506, |
| "eval_f1_macro": 0.9205592899943698, |
| "eval_f1_micro": 0.917900403768506, |
| "eval_loss": 0.22037993371486664, |
| "eval_precision_macro": 0.9459349396324186, |
| "eval_precision_micro": 0.917900403768506, |
| "eval_recall_macro": 0.9278516945604416, |
| "eval_recall_micro": 0.917900403768506, |
| "eval_runtime": 14.7085, |
| "eval_samples_per_second": 50.515, |
| "eval_steps_per_second": 3.195, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.850642281733072, |
| "grad_norm": 450699.1875, |
| "learning_rate": 5.201756002610252e-05, |
| "loss": 0.2566, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.850642281733072, |
| "eval_accuracy": 0.9098250336473755, |
| "eval_f1_macro": 0.9126391472355347, |
| "eval_f1_micro": 0.9098250336473755, |
| "eval_loss": 0.26449093222618103, |
| "eval_precision_macro": 0.9352643525302922, |
| "eval_precision_micro": 0.9098250336473755, |
| "eval_recall_macro": 0.931955435163728, |
| "eval_recall_micro": 0.9098250336473755, |
| "eval_runtime": 14.6939, |
| "eval_samples_per_second": 50.565, |
| "eval_steps_per_second": 3.199, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.959503592423253, |
| "grad_norm": 37148.73046875, |
| "learning_rate": 4.834422001783138e-05, |
| "loss": 0.2242, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.959503592423253, |
| "eval_accuracy": 0.9246298788694481, |
| "eval_f1_macro": 0.9278695233625198, |
| "eval_f1_micro": 0.9246298788694481, |
| "eval_loss": 0.20524874329566956, |
| "eval_precision_macro": 0.9473174570200222, |
| "eval_precision_micro": 0.9246298788694481, |
| "eval_recall_macro": 0.9317137486146517, |
| "eval_recall_micro": 0.9246298788694481, |
| "eval_runtime": 14.65, |
| "eval_samples_per_second": 50.717, |
| "eval_steps_per_second": 3.208, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.0683649031134337, |
| "grad_norm": 65893.8984375, |
| "learning_rate": 4.45956236932181e-05, |
| "loss": 0.1672, |
| "step": 9500 |
| }, |
| { |
| "epoch": 2.0683649031134337, |
| "eval_accuracy": 0.9165545087483177, |
| "eval_f1_macro": 0.9239702133396492, |
| "eval_f1_micro": 0.9165545087483177, |
| "eval_loss": 0.3571414351463318, |
| "eval_precision_macro": 0.9412785975210729, |
| "eval_precision_micro": 0.9165545087483177, |
| "eval_recall_macro": 0.9173054563259597, |
| "eval_recall_micro": 0.9165545087483177, |
| "eval_runtime": 14.749, |
| "eval_samples_per_second": 50.376, |
| "eval_steps_per_second": 3.187, |
| "step": 9500 |
| }, |
| { |
| "epoch": 2.177226213803614, |
| "grad_norm": 20243.5546875, |
| "learning_rate": 4.0805579550869046e-05, |
| "loss": 0.1593, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.177226213803614, |
| "eval_accuracy": 0.9125168236877523, |
| "eval_f1_macro": 0.9238184226911409, |
| "eval_f1_micro": 0.9125168236877523, |
| "eval_loss": 0.30988800525665283, |
| "eval_precision_macro": 0.9555289484815556, |
| "eval_precision_micro": 0.9125168236877523, |
| "eval_recall_macro": 0.9275764985418137, |
| "eval_recall_micro": 0.9125168236877523, |
| "eval_runtime": 15.0155, |
| "eval_samples_per_second": 49.482, |
| "eval_steps_per_second": 3.13, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.286087524493795, |
| "grad_norm": 33157.19140625, |
| "learning_rate": 3.7008269906245454e-05, |
| "loss": 0.1799, |
| "step": 10500 |
| }, |
| { |
| "epoch": 2.286087524493795, |
| "eval_accuracy": 0.9246298788694481, |
| "eval_f1_macro": 0.9287251727049811, |
| "eval_f1_micro": 0.9246298788694481, |
| "eval_loss": 0.23414301872253418, |
| "eval_precision_macro": 0.959944603131214, |
| "eval_precision_micro": 0.9246298788694481, |
| "eval_recall_macro": 0.9306134629626335, |
| "eval_recall_micro": 0.9246298788694481, |
| "eval_runtime": 14.6983, |
| "eval_samples_per_second": 50.55, |
| "eval_steps_per_second": 3.198, |
| "step": 10500 |
| }, |
| { |
| "epoch": 2.3949488351839756, |
| "grad_norm": 48777.84375, |
| "learning_rate": 3.323794260219589e-05, |
| "loss": 0.166, |
| "step": 11000 |
| }, |
| { |
| "epoch": 2.3949488351839756, |
| "eval_accuracy": 0.9057873485868102, |
| "eval_f1_macro": 0.9123153410480982, |
| "eval_f1_micro": 0.9057873485868102, |
| "eval_loss": 0.3453662395477295, |
| "eval_precision_macro": 0.9446104426733389, |
| "eval_precision_micro": 0.9057873485868102, |
| "eval_recall_macro": 0.91935239522038, |
| "eval_recall_micro": 0.9057873485868102, |
| "eval_runtime": 14.7404, |
| "eval_samples_per_second": 50.406, |
| "eval_steps_per_second": 3.189, |
| "step": 11000 |
| }, |
| { |
| "epoch": 2.5038101458741564, |
| "grad_norm": 33563.56640625, |
| "learning_rate": 2.9528602128499004e-05, |
| "loss": 0.162, |
| "step": 11500 |
| }, |
| { |
| "epoch": 2.5038101458741564, |
| "eval_accuracy": 0.9098250336473755, |
| "eval_f1_macro": 0.9212878627631594, |
| "eval_f1_micro": 0.9098250336473755, |
| "eval_loss": 0.22809743881225586, |
| "eval_precision_macro": 0.9389309808956737, |
| "eval_precision_micro": 0.9098250336473755, |
| "eval_recall_macro": 0.9311247877025975, |
| "eval_recall_micro": 0.9098250336473755, |
| "eval_runtime": 14.666, |
| "eval_samples_per_second": 50.661, |
| "eval_steps_per_second": 3.205, |
| "step": 11500 |
| }, |
| { |
| "epoch": 2.612671456564337, |
| "grad_norm": 58977.125, |
| "learning_rate": 2.591370293620146e-05, |
| "loss": 0.1452, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.612671456564337, |
| "eval_accuracy": 0.9219380888290714, |
| "eval_f1_macro": 0.9232635700162879, |
| "eval_f1_micro": 0.9219380888290714, |
| "eval_loss": 0.2860707640647888, |
| "eval_precision_macro": 0.9426347574998575, |
| "eval_precision_micro": 0.9219380888290714, |
| "eval_recall_macro": 0.9262974863930373, |
| "eval_recall_micro": 0.9219380888290714, |
| "eval_runtime": 14.8095, |
| "eval_samples_per_second": 50.171, |
| "eval_steps_per_second": 3.174, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.7215327672545175, |
| "grad_norm": 46900.25390625, |
| "learning_rate": 2.2425847712741887e-05, |
| "loss": 0.1418, |
| "step": 12500 |
| }, |
| { |
| "epoch": 2.7215327672545175, |
| "eval_accuracy": 0.9286675639300135, |
| "eval_f1_macro": 0.9357990563843356, |
| "eval_f1_micro": 0.9286675639300135, |
| "eval_loss": 0.15669873356819153, |
| "eval_precision_macro": 0.9529768865317036, |
| "eval_precision_micro": 0.9286675639300135, |
| "eval_recall_macro": 0.9417303559122717, |
| "eval_recall_micro": 0.9286675639300135, |
| "eval_runtime": 14.7072, |
| "eval_samples_per_second": 50.52, |
| "eval_steps_per_second": 3.196, |
| "step": 12500 |
| }, |
| { |
| "epoch": 2.8303940779446983, |
| "grad_norm": 37592.3515625, |
| "learning_rate": 1.9096493339109878e-05, |
| "loss": 0.1429, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.8303940779446983, |
| "eval_accuracy": 0.9165545087483177, |
| "eval_f1_macro": 0.9295728643158702, |
| "eval_f1_micro": 0.9165545087483177, |
| "eval_loss": 0.22479559481143951, |
| "eval_precision_macro": 0.9605098350591709, |
| "eval_precision_micro": 0.9165545087483177, |
| "eval_recall_macro": 0.9328126952515738, |
| "eval_recall_micro": 0.9165545087483177, |
| "eval_runtime": 14.6901, |
| "eval_samples_per_second": 50.578, |
| "eval_steps_per_second": 3.199, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.939255388634879, |
| "grad_norm": 79597.40625, |
| "learning_rate": 1.5955667181005554e-05, |
| "loss": 0.1293, |
| "step": 13500 |
| }, |
| { |
| "epoch": 2.939255388634879, |
| "eval_accuracy": 0.9246298788694481, |
| "eval_f1_macro": 0.9319848397676713, |
| "eval_f1_micro": 0.9246298788694481, |
| "eval_loss": 0.27543124556541443, |
| "eval_precision_macro": 0.9589344708678029, |
| "eval_precision_micro": 0.9246298788694481, |
| "eval_recall_macro": 0.932925082879603, |
| "eval_recall_micro": 0.9246298788694481, |
| "eval_runtime": 14.726, |
| "eval_samples_per_second": 50.455, |
| "eval_steps_per_second": 3.192, |
| "step": 13500 |
| }, |
| { |
| "epoch": 3.04811669932506, |
| "grad_norm": 25773.66796875, |
| "learning_rate": 1.3031696272762192e-05, |
| "loss": 0.1137, |
| "step": 14000 |
| }, |
| { |
| "epoch": 3.04811669932506, |
| "eval_accuracy": 0.9246298788694481, |
| "eval_f1_macro": 0.937910042741771, |
| "eval_f1_micro": 0.9246298788694481, |
| "eval_loss": 0.20125848054885864, |
| "eval_precision_macro": 0.9546735463378956, |
| "eval_precision_micro": 0.9246298788694481, |
| "eval_recall_macro": 0.9429177293988182, |
| "eval_recall_micro": 0.9246298788694481, |
| "eval_runtime": 15.0054, |
| "eval_samples_per_second": 49.515, |
| "eval_steps_per_second": 3.132, |
| "step": 14000 |
| }, |
| { |
| "epoch": 3.1569780100152407, |
| "grad_norm": 17888.46484375, |
| "learning_rate": 1.0350951836516297e-05, |
| "loss": 0.0987, |
| "step": 14500 |
| }, |
| { |
| "epoch": 3.1569780100152407, |
| "eval_accuracy": 0.9232839838492598, |
| "eval_f1_macro": 0.9266276405829272, |
| "eval_f1_micro": 0.9232839838492598, |
| "eval_loss": 0.29369959235191345, |
| "eval_precision_macro": 0.9436536313571009, |
| "eval_precision_micro": 0.9232839838492598, |
| "eval_recall_macro": 0.9283196203410136, |
| "eval_recall_micro": 0.9232839838492598, |
| "eval_runtime": 14.7764, |
| "eval_samples_per_second": 50.283, |
| "eval_steps_per_second": 3.181, |
| "step": 14500 |
| }, |
| { |
| "epoch": 3.265839320705421, |
| "grad_norm": 85828.9375, |
| "learning_rate": 7.9376114407998e-06, |
| "loss": 0.0859, |
| "step": 15000 |
| }, |
| { |
| "epoch": 3.265839320705421, |
| "eval_accuracy": 0.9246298788694481, |
| "eval_f1_macro": 0.9402166974265765, |
| "eval_f1_micro": 0.9246298788694481, |
| "eval_loss": 0.17889092862606049, |
| "eval_precision_macro": 0.9685045177945787, |
| "eval_precision_micro": 0.9246298788694481, |
| "eval_recall_macro": 0.9463450172046672, |
| "eval_recall_micro": 0.9246298788694481, |
| "eval_runtime": 14.7495, |
| "eval_samples_per_second": 50.375, |
| "eval_steps_per_second": 3.187, |
| "step": 15000 |
| }, |
| { |
| "epoch": 3.374700631395602, |
| "grad_norm": 146288.75, |
| "learning_rate": 5.813440943640527e-06, |
| "loss": 0.0857, |
| "step": 15500 |
| }, |
| { |
| "epoch": 3.374700631395602, |
| "eval_accuracy": 0.927321668909825, |
| "eval_f1_macro": 0.9434096975688787, |
| "eval_f1_micro": 0.927321668909825, |
| "eval_loss": 0.16961060464382172, |
| "eval_precision_macro": 0.9641802881027017, |
| "eval_precision_micro": 0.927321668909825, |
| "eval_recall_macro": 0.9472331991452233, |
| "eval_recall_micro": 0.927321668909825, |
| "eval_runtime": 14.7305, |
| "eval_samples_per_second": 50.44, |
| "eval_steps_per_second": 3.191, |
| "step": 15500 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 18372, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.52555679969065e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|