| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.26628895184136, |
| "eval_steps": 500, |
| "global_step": 100, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0226628895184136, |
| "grad_norm": 5.715946698612309, |
| "learning_rate": 3.7037037037037036e-08, |
| "loss": 1.039, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0453257790368272, |
| "grad_norm": 5.918098634610158, |
| "learning_rate": 7.407407407407407e-08, |
| "loss": 1.0345, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0679886685552408, |
| "grad_norm": 5.967358491879423, |
| "learning_rate": 1.111111111111111e-07, |
| "loss": 1.0568, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0906515580736544, |
| "grad_norm": 6.076151471056227, |
| "learning_rate": 1.4814814814814815e-07, |
| "loss": 1.0407, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.11331444759206799, |
| "grad_norm": 5.698276915195162, |
| "learning_rate": 1.8518518518518516e-07, |
| "loss": 1.0355, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.1359773371104816, |
| "grad_norm": 5.524873495595531, |
| "learning_rate": 2.222222222222222e-07, |
| "loss": 1.0329, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.15864022662889518, |
| "grad_norm": 5.663139068043792, |
| "learning_rate": 2.5925925925925923e-07, |
| "loss": 1.013, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.1813031161473088, |
| "grad_norm": 5.483842003291619, |
| "learning_rate": 2.962962962962963e-07, |
| "loss": 1.0285, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.20396600566572237, |
| "grad_norm": 5.501921058157795, |
| "learning_rate": 3.333333333333333e-07, |
| "loss": 1.0181, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.22662889518413598, |
| "grad_norm": 5.691661611678567, |
| "learning_rate": 3.703703703703703e-07, |
| "loss": 1.0046, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.24929178470254956, |
| "grad_norm": 5.490524973688248, |
| "learning_rate": 4.0740740740740737e-07, |
| "loss": 1.0291, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.2719546742209632, |
| "grad_norm": 4.885236117260528, |
| "learning_rate": 4.444444444444444e-07, |
| "loss": 1.0084, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.29461756373937675, |
| "grad_norm": 5.256688897749667, |
| "learning_rate": 4.814814814814814e-07, |
| "loss": 0.9945, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.31728045325779036, |
| "grad_norm": 5.026023661790397, |
| "learning_rate": 5.185185185185185e-07, |
| "loss": 0.9936, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.33994334277620397, |
| "grad_norm": 4.979666180740075, |
| "learning_rate": 5.555555555555555e-07, |
| "loss": 0.9997, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.3626062322946176, |
| "grad_norm": 4.741351636847691, |
| "learning_rate": 5.925925925925926e-07, |
| "loss": 0.9904, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.38526912181303113, |
| "grad_norm": 4.429638197959212, |
| "learning_rate": 6.296296296296296e-07, |
| "loss": 0.9779, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.40793201133144474, |
| "grad_norm": 4.2702651723674006, |
| "learning_rate": 6.666666666666666e-07, |
| "loss": 0.9373, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.43059490084985835, |
| "grad_norm": 4.371215055008036, |
| "learning_rate": 7.037037037037037e-07, |
| "loss": 0.9616, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.45325779036827196, |
| "grad_norm": 4.300078040900759, |
| "learning_rate": 7.407407407407406e-07, |
| "loss": 0.9581, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.47592067988668557, |
| "grad_norm": 4.242855799180736, |
| "learning_rate": 7.777777777777778e-07, |
| "loss": 0.9454, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.4985835694050991, |
| "grad_norm": 3.4536592234259555, |
| "learning_rate": 8.148148148148147e-07, |
| "loss": 0.9274, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.5212464589235127, |
| "grad_norm": 3.3525795982748203, |
| "learning_rate": 8.518518518518518e-07, |
| "loss": 0.8833, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.5439093484419264, |
| "grad_norm": 3.110575381958802, |
| "learning_rate": 8.888888888888888e-07, |
| "loss": 0.9066, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.56657223796034, |
| "grad_norm": 3.18785930927135, |
| "learning_rate": 9.259259259259259e-07, |
| "loss": 0.8896, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.5892351274787535, |
| "grad_norm": 3.0188412291205684, |
| "learning_rate": 9.629629629629628e-07, |
| "loss": 0.9068, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.6118980169971672, |
| "grad_norm": 3.0072699515749344, |
| "learning_rate": 1e-06, |
| "loss": 0.8959, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.6345609065155807, |
| "grad_norm": 3.050779999599616, |
| "learning_rate": 9.999560724782173e-07, |
| "loss": 0.8648, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.6572237960339944, |
| "grad_norm": 3.034749793056673, |
| "learning_rate": 9.998242976313776e-07, |
| "loss": 0.8763, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.6798866855524079, |
| "grad_norm": 2.6230160618361897, |
| "learning_rate": 9.996046986136508e-07, |
| "loss": 0.8439, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.7025495750708215, |
| "grad_norm": 2.619746810094255, |
| "learning_rate": 9.992973140107996e-07, |
| "loss": 0.8395, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.7252124645892352, |
| "grad_norm": 2.2660982887250496, |
| "learning_rate": 9.989021978333994e-07, |
| "loss": 0.8407, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.7478753541076487, |
| "grad_norm": 1.92948640709938, |
| "learning_rate": 9.984194195073478e-07, |
| "loss": 0.8175, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.7705382436260623, |
| "grad_norm": 1.8673042037436878, |
| "learning_rate": 9.97849063861667e-07, |
| "loss": 0.7963, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.7932011331444759, |
| "grad_norm": 1.841378707582655, |
| "learning_rate": 9.971912311135967e-07, |
| "loss": 0.8177, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.8158640226628895, |
| "grad_norm": 1.6212101538356403, |
| "learning_rate": 9.964460368509865e-07, |
| "loss": 0.8036, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.8385269121813032, |
| "grad_norm": 1.6148282593388759, |
| "learning_rate": 9.956136120119856e-07, |
| "loss": 0.7945, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.8611898016997167, |
| "grad_norm": 1.5660870386151309, |
| "learning_rate": 9.946941028620347e-07, |
| "loss": 0.7919, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.8838526912181303, |
| "grad_norm": 1.5162976532167538, |
| "learning_rate": 9.936876709681666e-07, |
| "loss": 0.7965, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.9065155807365439, |
| "grad_norm": 1.4779616090178773, |
| "learning_rate": 9.92594493170617e-07, |
| "loss": 0.7872, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.9291784702549575, |
| "grad_norm": 1.4588545367417372, |
| "learning_rate": 9.914147615517526e-07, |
| "loss": 0.7933, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.9518413597733711, |
| "grad_norm": 1.2450088034935203, |
| "learning_rate": 9.901486834023181e-07, |
| "loss": 0.7401, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.9745042492917847, |
| "grad_norm": 1.1159548060929454, |
| "learning_rate": 9.887964811850157e-07, |
| "loss": 0.7496, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.9971671388101983, |
| "grad_norm": 1.0418410473138606, |
| "learning_rate": 9.87358392495415e-07, |
| "loss": 0.7568, |
| "step": 44 |
| }, |
| { |
| "epoch": 1.019830028328612, |
| "grad_norm": 2.1594760368768195, |
| "learning_rate": 9.858346700202048e-07, |
| "loss": 1.3469, |
| "step": 45 |
| }, |
| { |
| "epoch": 1.0424929178470255, |
| "grad_norm": 0.9706954495224399, |
| "learning_rate": 9.842255814927944e-07, |
| "loss": 0.7412, |
| "step": 46 |
| }, |
| { |
| "epoch": 1.065155807365439, |
| "grad_norm": 0.9479843401943371, |
| "learning_rate": 9.825314096462684e-07, |
| "loss": 0.712, |
| "step": 47 |
| }, |
| { |
| "epoch": 1.0878186968838528, |
| "grad_norm": 0.8785518016295425, |
| "learning_rate": 9.807524521637102e-07, |
| "loss": 0.721, |
| "step": 48 |
| }, |
| { |
| "epoch": 1.1104815864022664, |
| "grad_norm": 0.9083971698155864, |
| "learning_rate": 9.788890216258938e-07, |
| "loss": 0.7405, |
| "step": 49 |
| }, |
| { |
| "epoch": 1.13314447592068, |
| "grad_norm": 0.9052818651846114, |
| "learning_rate": 9.769414454563615e-07, |
| "loss": 0.7223, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.1558073654390935, |
| "grad_norm": 0.8244297426454674, |
| "learning_rate": 9.749100658638914e-07, |
| "loss": 0.7113, |
| "step": 51 |
| }, |
| { |
| "epoch": 1.178470254957507, |
| "grad_norm": 0.7448472800310213, |
| "learning_rate": 9.72795239782369e-07, |
| "loss": 0.7001, |
| "step": 52 |
| }, |
| { |
| "epoch": 1.2011331444759206, |
| "grad_norm": 0.8936397991398377, |
| "learning_rate": 9.705973388080692e-07, |
| "loss": 0.6924, |
| "step": 53 |
| }, |
| { |
| "epoch": 1.2237960339943343, |
| "grad_norm": 0.7188466048624885, |
| "learning_rate": 9.68316749134364e-07, |
| "loss": 0.7005, |
| "step": 54 |
| }, |
| { |
| "epoch": 1.246458923512748, |
| "grad_norm": 0.6923178573722074, |
| "learning_rate": 9.659538714838633e-07, |
| "loss": 0.6983, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.2691218130311614, |
| "grad_norm": 0.6963394168232236, |
| "learning_rate": 9.63509121038005e-07, |
| "loss": 0.6932, |
| "step": 56 |
| }, |
| { |
| "epoch": 1.291784702549575, |
| "grad_norm": 0.6743675615821408, |
| "learning_rate": 9.609829273641032e-07, |
| "loss": 0.6789, |
| "step": 57 |
| }, |
| { |
| "epoch": 1.3144475920679888, |
| "grad_norm": 0.6786035246894967, |
| "learning_rate": 9.583757343398684e-07, |
| "loss": 0.6628, |
| "step": 58 |
| }, |
| { |
| "epoch": 1.3371104815864023, |
| "grad_norm": 0.7270460673039131, |
| "learning_rate": 9.55688000075414e-07, |
| "loss": 0.6831, |
| "step": 59 |
| }, |
| { |
| "epoch": 1.3597733711048159, |
| "grad_norm": 0.6841455902480504, |
| "learning_rate": 9.529201968327616e-07, |
| "loss": 0.6951, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.3824362606232294, |
| "grad_norm": 0.6153616879449294, |
| "learning_rate": 9.500728109428603e-07, |
| "loss": 0.676, |
| "step": 61 |
| }, |
| { |
| "epoch": 1.405099150141643, |
| "grad_norm": 0.6177487537567523, |
| "learning_rate": 9.47146342720133e-07, |
| "loss": 0.6842, |
| "step": 62 |
| }, |
| { |
| "epoch": 1.4277620396600565, |
| "grad_norm": 0.5753559089127149, |
| "learning_rate": 9.441413063745659e-07, |
| "loss": 0.6408, |
| "step": 63 |
| }, |
| { |
| "epoch": 1.4504249291784703, |
| "grad_norm": 0.620464077741966, |
| "learning_rate": 9.410582299213572e-07, |
| "loss": 0.6952, |
| "step": 64 |
| }, |
| { |
| "epoch": 1.4730878186968839, |
| "grad_norm": 0.587732312757755, |
| "learning_rate": 9.378976550881392e-07, |
| "loss": 0.6897, |
| "step": 65 |
| }, |
| { |
| "epoch": 1.4957507082152974, |
| "grad_norm": 0.6133303288545134, |
| "learning_rate": 9.346601372197913e-07, |
| "loss": 0.6319, |
| "step": 66 |
| }, |
| { |
| "epoch": 1.5184135977337112, |
| "grad_norm": 0.5975684805854956, |
| "learning_rate": 9.313462451808599e-07, |
| "loss": 0.7085, |
| "step": 67 |
| }, |
| { |
| "epoch": 1.5410764872521248, |
| "grad_norm": 0.5691716789827311, |
| "learning_rate": 9.279565612556042e-07, |
| "loss": 0.6799, |
| "step": 68 |
| }, |
| { |
| "epoch": 1.5637393767705383, |
| "grad_norm": 0.5623581760482004, |
| "learning_rate": 9.24491681045682e-07, |
| "loss": 0.6627, |
| "step": 69 |
| }, |
| { |
| "epoch": 1.5864022662889519, |
| "grad_norm": 0.5545018113642449, |
| "learning_rate": 9.209522133654968e-07, |
| "loss": 0.6673, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.6090651558073654, |
| "grad_norm": 0.6223379664208608, |
| "learning_rate": 9.17338780135223e-07, |
| "loss": 0.6682, |
| "step": 71 |
| }, |
| { |
| "epoch": 1.631728045325779, |
| "grad_norm": 0.5484348938274137, |
| "learning_rate": 9.136520162715286e-07, |
| "loss": 0.6459, |
| "step": 72 |
| }, |
| { |
| "epoch": 1.6543909348441925, |
| "grad_norm": 0.598633459691356, |
| "learning_rate": 9.098925695760131e-07, |
| "loss": 0.6663, |
| "step": 73 |
| }, |
| { |
| "epoch": 1.677053824362606, |
| "grad_norm": 0.6063642708751795, |
| "learning_rate": 9.060611006213832e-07, |
| "loss": 0.6471, |
| "step": 74 |
| }, |
| { |
| "epoch": 1.6997167138810199, |
| "grad_norm": 0.5310843433827631, |
| "learning_rate": 9.021582826353824e-07, |
| "loss": 0.6422, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.7223796033994334, |
| "grad_norm": 0.5899701772442509, |
| "learning_rate": 8.981848013824993e-07, |
| "loss": 0.6616, |
| "step": 76 |
| }, |
| { |
| "epoch": 1.7450424929178472, |
| "grad_norm": 0.6774981304086599, |
| "learning_rate": 8.94141355043471e-07, |
| "loss": 0.6442, |
| "step": 77 |
| }, |
| { |
| "epoch": 1.7677053824362607, |
| "grad_norm": 0.5555862881849043, |
| "learning_rate": 8.90028654092606e-07, |
| "loss": 0.6427, |
| "step": 78 |
| }, |
| { |
| "epoch": 1.7903682719546743, |
| "grad_norm": 0.5521769324318557, |
| "learning_rate": 8.858474211729469e-07, |
| "loss": 0.6308, |
| "step": 79 |
| }, |
| { |
| "epoch": 1.8130311614730878, |
| "grad_norm": 0.5094008328024741, |
| "learning_rate": 8.815983909692941e-07, |
| "loss": 0.6375, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.8356940509915014, |
| "grad_norm": 0.47949684902186096, |
| "learning_rate": 8.77282310079115e-07, |
| "loss": 0.6124, |
| "step": 81 |
| }, |
| { |
| "epoch": 1.858356940509915, |
| "grad_norm": 0.5457213358478963, |
| "learning_rate": 8.72899936881359e-07, |
| "loss": 0.676, |
| "step": 82 |
| }, |
| { |
| "epoch": 1.8810198300283285, |
| "grad_norm": 0.5475114660934921, |
| "learning_rate": 8.684520414032023e-07, |
| "loss": 0.6462, |
| "step": 83 |
| }, |
| { |
| "epoch": 1.903682719546742, |
| "grad_norm": 0.5780771596548755, |
| "learning_rate": 8.639394051847471e-07, |
| "loss": 0.629, |
| "step": 84 |
| }, |
| { |
| "epoch": 1.9263456090651558, |
| "grad_norm": 0.5153044368837152, |
| "learning_rate": 8.593628211416963e-07, |
| "loss": 0.6607, |
| "step": 85 |
| }, |
| { |
| "epoch": 1.9490084985835694, |
| "grad_norm": 0.5078347714748787, |
| "learning_rate": 8.547230934260311e-07, |
| "loss": 0.653, |
| "step": 86 |
| }, |
| { |
| "epoch": 1.9716713881019832, |
| "grad_norm": 0.5090369208403657, |
| "learning_rate": 8.500210372847126e-07, |
| "loss": 0.6555, |
| "step": 87 |
| }, |
| { |
| "epoch": 1.9943342776203967, |
| "grad_norm": 0.521639825746896, |
| "learning_rate": 8.45257478916435e-07, |
| "loss": 0.6187, |
| "step": 88 |
| }, |
| { |
| "epoch": 2.0169971671388103, |
| "grad_norm": 1.4410319682327064, |
| "learning_rate": 8.404332553264546e-07, |
| "loss": 1.1825, |
| "step": 89 |
| }, |
| { |
| "epoch": 2.039660056657224, |
| "grad_norm": 0.5362038209234066, |
| "learning_rate": 8.355492141795184e-07, |
| "loss": 0.6046, |
| "step": 90 |
| }, |
| { |
| "epoch": 2.0623229461756374, |
| "grad_norm": 0.5295224430525873, |
| "learning_rate": 8.306062136509219e-07, |
| "loss": 0.607, |
| "step": 91 |
| }, |
| { |
| "epoch": 2.084985835694051, |
| "grad_norm": 0.5438204610049183, |
| "learning_rate": 8.256051222757187e-07, |
| "loss": 0.6425, |
| "step": 92 |
| }, |
| { |
| "epoch": 2.1076487252124645, |
| "grad_norm": 0.5637438849056178, |
| "learning_rate": 8.2054681879611e-07, |
| "loss": 0.6472, |
| "step": 93 |
| }, |
| { |
| "epoch": 2.130311614730878, |
| "grad_norm": 0.4915626737833171, |
| "learning_rate": 8.154321920070412e-07, |
| "loss": 0.6366, |
| "step": 94 |
| }, |
| { |
| "epoch": 2.1529745042492916, |
| "grad_norm": 0.5445000714826581, |
| "learning_rate": 8.102621406000308e-07, |
| "loss": 0.6302, |
| "step": 95 |
| }, |
| { |
| "epoch": 2.1756373937677056, |
| "grad_norm": 0.544017574639994, |
| "learning_rate": 8.050375730052621e-07, |
| "loss": 0.6016, |
| "step": 96 |
| }, |
| { |
| "epoch": 2.198300283286119, |
| "grad_norm": 0.7667138278664033, |
| "learning_rate": 7.997594072319625e-07, |
| "loss": 0.6476, |
| "step": 97 |
| }, |
| { |
| "epoch": 2.2209631728045327, |
| "grad_norm": 0.5723261101431134, |
| "learning_rate": 7.944285707070997e-07, |
| "loss": 0.5982, |
| "step": 98 |
| }, |
| { |
| "epoch": 2.2436260623229463, |
| "grad_norm": 0.5198427284810859, |
| "learning_rate": 7.890460001124241e-07, |
| "loss": 0.6373, |
| "step": 99 |
| }, |
| { |
| "epoch": 2.26628895184136, |
| "grad_norm": 0.5082652201383684, |
| "learning_rate": 7.83612641219884e-07, |
| "loss": 0.5894, |
| "step": 100 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 264, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 208143843852288.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|