{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 734, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0013623978201634877, "grad_norm": 27.647886276245117, "learning_rate": 0.0, "loss": 2.7866311073303223, "step": 1 }, { "epoch": 0.0027247956403269754, "grad_norm": 28.430374145507812, "learning_rate": 4.444444444444445e-07, "loss": 2.9265763759613037, "step": 2 }, { "epoch": 0.004087193460490463, "grad_norm": 24.750080108642578, "learning_rate": 8.88888888888889e-07, "loss": 2.650007724761963, "step": 3 }, { "epoch": 0.005449591280653951, "grad_norm": 26.000843048095703, "learning_rate": 1.3333333333333334e-06, "loss": 3.0528907775878906, "step": 4 }, { "epoch": 0.006811989100817439, "grad_norm": 27.27891731262207, "learning_rate": 1.777777777777778e-06, "loss": 2.7619106769561768, "step": 5 }, { "epoch": 0.008174386920980926, "grad_norm": 21.593921661376953, "learning_rate": 2.222222222222222e-06, "loss": 2.5617551803588867, "step": 6 }, { "epoch": 0.009536784741144414, "grad_norm": 19.521177291870117, "learning_rate": 2.666666666666667e-06, "loss": 2.3857626914978027, "step": 7 }, { "epoch": 0.010899182561307902, "grad_norm": 13.534676551818848, "learning_rate": 3.1111111111111116e-06, "loss": 2.1002044677734375, "step": 8 }, { "epoch": 0.01226158038147139, "grad_norm": 9.906437873840332, "learning_rate": 3.555555555555556e-06, "loss": 1.8867732286453247, "step": 9 }, { "epoch": 0.013623978201634877, "grad_norm": 10.24316692352295, "learning_rate": 4.000000000000001e-06, "loss": 1.7133018970489502, "step": 10 }, { "epoch": 0.014986376021798364, "grad_norm": 5.109798908233643, "learning_rate": 4.444444444444444e-06, "loss": 1.3970978260040283, "step": 11 }, { "epoch": 0.01634877384196185, "grad_norm": 4.742298126220703, "learning_rate": 4.888888888888889e-06, "loss": 1.3010644912719727, "step": 12 }, { "epoch": 0.017711171662125342, "grad_norm": 8.546895027160645, "learning_rate": 5.333333333333334e-06, "loss": 1.1571484804153442, "step": 13 }, { "epoch": 0.01907356948228883, "grad_norm": 2.846215009689331, "learning_rate": 5.777777777777778e-06, "loss": 1.193354845046997, "step": 14 }, { "epoch": 0.020435967302452316, "grad_norm": 2.637321710586548, "learning_rate": 6.222222222222223e-06, "loss": 1.077557921409607, "step": 15 }, { "epoch": 0.021798365122615803, "grad_norm": 1.948442816734314, "learning_rate": 6.666666666666667e-06, "loss": 0.8891923427581787, "step": 16 }, { "epoch": 0.02316076294277929, "grad_norm": 2.195993423461914, "learning_rate": 7.111111111111112e-06, "loss": 0.917742908000946, "step": 17 }, { "epoch": 0.02452316076294278, "grad_norm": 1.420324683189392, "learning_rate": 7.555555555555556e-06, "loss": 1.0057953596115112, "step": 18 }, { "epoch": 0.025885558583106268, "grad_norm": 1.1720658540725708, "learning_rate": 8.000000000000001e-06, "loss": 0.7948991060256958, "step": 19 }, { "epoch": 0.027247956403269755, "grad_norm": 2.2465505599975586, "learning_rate": 8.444444444444446e-06, "loss": 0.9116527438163757, "step": 20 }, { "epoch": 0.02861035422343324, "grad_norm": 1.0430631637573242, "learning_rate": 8.888888888888888e-06, "loss": 0.8433143496513367, "step": 21 }, { "epoch": 0.02997275204359673, "grad_norm": 0.9517339468002319, "learning_rate": 9.333333333333334e-06, "loss": 0.7266635894775391, "step": 22 }, { "epoch": 0.031335149863760216, "grad_norm": 0.9054233431816101, "learning_rate": 9.777777777777779e-06, "loss": 0.6896510124206543, "step": 23 }, { "epoch": 0.0326975476839237, "grad_norm": 0.8445485830307007, "learning_rate": 1.0222222222222223e-05, "loss": 0.7166739106178284, "step": 24 }, { "epoch": 0.0340599455040872, "grad_norm": 0.799867570400238, "learning_rate": 1.0666666666666667e-05, "loss": 0.70599365234375, "step": 25 }, { "epoch": 0.035422343324250684, "grad_norm": 0.8114046454429626, "learning_rate": 1.1111111111111113e-05, "loss": 0.68497633934021, "step": 26 }, { "epoch": 0.03678474114441417, "grad_norm": 0.717369556427002, "learning_rate": 1.1555555555555556e-05, "loss": 0.4579915404319763, "step": 27 }, { "epoch": 0.03814713896457766, "grad_norm": 0.8005459904670715, "learning_rate": 1.2e-05, "loss": 0.6453820466995239, "step": 28 }, { "epoch": 0.039509536784741145, "grad_norm": 0.7655712366104126, "learning_rate": 1.2444444444444446e-05, "loss": 0.5622988939285278, "step": 29 }, { "epoch": 0.04087193460490463, "grad_norm": 0.6727678775787354, "learning_rate": 1.288888888888889e-05, "loss": 0.5062695741653442, "step": 30 }, { "epoch": 0.04223433242506812, "grad_norm": 0.7242804169654846, "learning_rate": 1.3333333333333333e-05, "loss": 0.5595001578330994, "step": 31 }, { "epoch": 0.043596730245231606, "grad_norm": 0.7660211324691772, "learning_rate": 1.377777777777778e-05, "loss": 0.5437361598014832, "step": 32 }, { "epoch": 0.04495912806539509, "grad_norm": 0.7480101585388184, "learning_rate": 1.4222222222222224e-05, "loss": 0.5963411927223206, "step": 33 }, { "epoch": 0.04632152588555858, "grad_norm": 0.7058649659156799, "learning_rate": 1.4666666666666666e-05, "loss": 0.5767841339111328, "step": 34 }, { "epoch": 0.047683923705722074, "grad_norm": 0.6593936681747437, "learning_rate": 1.5111111111111112e-05, "loss": 0.4501464366912842, "step": 35 }, { "epoch": 0.04904632152588556, "grad_norm": 0.6758292317390442, "learning_rate": 1.555555555555556e-05, "loss": 0.5993712544441223, "step": 36 }, { "epoch": 0.05040871934604905, "grad_norm": 0.7313498258590698, "learning_rate": 1.6000000000000003e-05, "loss": 0.5267407894134521, "step": 37 }, { "epoch": 0.051771117166212535, "grad_norm": 0.6532080173492432, "learning_rate": 1.6444444444444444e-05, "loss": 0.5639113783836365, "step": 38 }, { "epoch": 0.05313351498637602, "grad_norm": 0.6379766464233398, "learning_rate": 1.688888888888889e-05, "loss": 0.5104179382324219, "step": 39 }, { "epoch": 0.05449591280653951, "grad_norm": 0.6598156690597534, "learning_rate": 1.7333333333333336e-05, "loss": 0.5445395112037659, "step": 40 }, { "epoch": 0.055858310626702996, "grad_norm": 0.6215161085128784, "learning_rate": 1.7777777777777777e-05, "loss": 0.5025588870048523, "step": 41 }, { "epoch": 0.05722070844686648, "grad_norm": 0.5954686403274536, "learning_rate": 1.8222222222222224e-05, "loss": 0.46108683943748474, "step": 42 }, { "epoch": 0.05858310626702997, "grad_norm": 0.6308771967887878, "learning_rate": 1.866666666666667e-05, "loss": 0.5122817158699036, "step": 43 }, { "epoch": 0.05994550408719346, "grad_norm": 0.6523401141166687, "learning_rate": 1.9111111111111113e-05, "loss": 0.5358462333679199, "step": 44 }, { "epoch": 0.06130790190735695, "grad_norm": 0.6872179508209229, "learning_rate": 1.9555555555555557e-05, "loss": 0.5521703958511353, "step": 45 }, { "epoch": 0.06267029972752043, "grad_norm": 0.5889444947242737, "learning_rate": 2e-05, "loss": 0.43677768111228943, "step": 46 }, { "epoch": 0.06403269754768393, "grad_norm": 0.6145616769790649, "learning_rate": 1.9999975629761854e-05, "loss": 0.581091046333313, "step": 47 }, { "epoch": 0.0653950953678474, "grad_norm": 0.570745587348938, "learning_rate": 1.9999902519166192e-05, "loss": 0.5111463069915771, "step": 48 }, { "epoch": 0.0667574931880109, "grad_norm": 0.6257374286651611, "learning_rate": 1.9999780668569363e-05, "loss": 0.5142855644226074, "step": 49 }, { "epoch": 0.0681198910081744, "grad_norm": 0.5916558504104614, "learning_rate": 1.9999610078565272e-05, "loss": 0.4986756443977356, "step": 50 }, { "epoch": 0.06948228882833787, "grad_norm": 0.517594575881958, "learning_rate": 1.999939074998538e-05, "loss": 0.40387624502182007, "step": 51 }, { "epoch": 0.07084468664850137, "grad_norm": 0.5758000612258911, "learning_rate": 1.9999122683898708e-05, "loss": 0.4982905387878418, "step": 52 }, { "epoch": 0.07220708446866485, "grad_norm": 0.5720963478088379, "learning_rate": 1.9998805881611816e-05, "loss": 0.4793824553489685, "step": 53 }, { "epoch": 0.07356948228882834, "grad_norm": 0.5784019231796265, "learning_rate": 1.9998440344668827e-05, "loss": 0.5332019329071045, "step": 54 }, { "epoch": 0.07493188010899182, "grad_norm": 0.5833513736724854, "learning_rate": 1.999802607485137e-05, "loss": 0.5038864016532898, "step": 55 }, { "epoch": 0.07629427792915532, "grad_norm": 0.5767697095870972, "learning_rate": 1.999756307417863e-05, "loss": 0.5325872898101807, "step": 56 }, { "epoch": 0.0776566757493188, "grad_norm": 0.5682862401008606, "learning_rate": 1.9997051344907284e-05, "loss": 0.5185012817382812, "step": 57 }, { "epoch": 0.07901907356948229, "grad_norm": 0.614080011844635, "learning_rate": 1.9996490889531528e-05, "loss": 0.49285584688186646, "step": 58 }, { "epoch": 0.08038147138964577, "grad_norm": 0.5887868404388428, "learning_rate": 1.999588171078305e-05, "loss": 0.5096205472946167, "step": 59 }, { "epoch": 0.08174386920980926, "grad_norm": 0.556518018245697, "learning_rate": 1.9995223811631016e-05, "loss": 0.43357372283935547, "step": 60 }, { "epoch": 0.08310626702997276, "grad_norm": 0.6359555125236511, "learning_rate": 1.9994517195282053e-05, "loss": 0.5556465983390808, "step": 61 }, { "epoch": 0.08446866485013624, "grad_norm": 0.5386205911636353, "learning_rate": 1.999376186518025e-05, "loss": 0.46860817074775696, "step": 62 }, { "epoch": 0.08583106267029973, "grad_norm": 0.6243954300880432, "learning_rate": 1.9992957825007115e-05, "loss": 0.5260002613067627, "step": 63 }, { "epoch": 0.08719346049046321, "grad_norm": 0.5575344562530518, "learning_rate": 1.9992105078681587e-05, "loss": 0.48442351818084717, "step": 64 }, { "epoch": 0.0885558583106267, "grad_norm": 0.5960455536842346, "learning_rate": 1.999120363035998e-05, "loss": 0.529167890548706, "step": 65 }, { "epoch": 0.08991825613079019, "grad_norm": 0.49278023838996887, "learning_rate": 1.9990253484436004e-05, "loss": 0.4226565361022949, "step": 66 }, { "epoch": 0.09128065395095368, "grad_norm": 0.64710932970047, "learning_rate": 1.9989254645540715e-05, "loss": 0.5989794731140137, "step": 67 }, { "epoch": 0.09264305177111716, "grad_norm": 0.5093353390693665, "learning_rate": 1.9988207118542504e-05, "loss": 0.4339316487312317, "step": 68 }, { "epoch": 0.09400544959128065, "grad_norm": 0.5457233786582947, "learning_rate": 1.998711090854706e-05, "loss": 0.46932798624038696, "step": 69 }, { "epoch": 0.09536784741144415, "grad_norm": 0.6200721859931946, "learning_rate": 1.998596602089737e-05, "loss": 0.5474086999893188, "step": 70 }, { "epoch": 0.09673024523160763, "grad_norm": 0.4950924217700958, "learning_rate": 1.9984772461173663e-05, "loss": 0.40740966796875, "step": 71 }, { "epoch": 0.09809264305177112, "grad_norm": 0.5403825640678406, "learning_rate": 1.998353023519341e-05, "loss": 0.4256601929664612, "step": 72 }, { "epoch": 0.0994550408719346, "grad_norm": 0.5570508241653442, "learning_rate": 1.9982239349011286e-05, "loss": 0.5229888558387756, "step": 73 }, { "epoch": 0.1008174386920981, "grad_norm": 0.5410299897193909, "learning_rate": 1.9980899808919122e-05, "loss": 0.4738315939903259, "step": 74 }, { "epoch": 0.10217983651226158, "grad_norm": 0.5304137468338013, "learning_rate": 1.9979511621445902e-05, "loss": 0.453370064496994, "step": 75 }, { "epoch": 0.10354223433242507, "grad_norm": 0.5148070454597473, "learning_rate": 1.9978074793357726e-05, "loss": 0.4637362062931061, "step": 76 }, { "epoch": 0.10490463215258855, "grad_norm": 0.4985616207122803, "learning_rate": 1.9976589331657754e-05, "loss": 0.41210031509399414, "step": 77 }, { "epoch": 0.10626702997275204, "grad_norm": 0.5819671154022217, "learning_rate": 1.99750552435862e-05, "loss": 0.49417591094970703, "step": 78 }, { "epoch": 0.10762942779291552, "grad_norm": 0.5260801315307617, "learning_rate": 1.997347253662028e-05, "loss": 0.46970927715301514, "step": 79 }, { "epoch": 0.10899182561307902, "grad_norm": 0.7598191499710083, "learning_rate": 1.9971841218474184e-05, "loss": 0.5050591230392456, "step": 80 }, { "epoch": 0.11035422343324251, "grad_norm": 0.5178552865982056, "learning_rate": 1.997016129709904e-05, "loss": 0.48321259021759033, "step": 81 }, { "epoch": 0.11171662125340599, "grad_norm": 0.588383674621582, "learning_rate": 1.9968432780682855e-05, "loss": 0.5230928659439087, "step": 82 }, { "epoch": 0.11307901907356949, "grad_norm": 0.48950737714767456, "learning_rate": 1.9966655677650512e-05, "loss": 0.41843950748443604, "step": 83 }, { "epoch": 0.11444141689373297, "grad_norm": 0.5205618739128113, "learning_rate": 1.9964829996663684e-05, "loss": 0.4415491819381714, "step": 84 }, { "epoch": 0.11580381471389646, "grad_norm": 0.5614944100379944, "learning_rate": 1.9962955746620832e-05, "loss": 0.5067013502120972, "step": 85 }, { "epoch": 0.11716621253405994, "grad_norm": 0.5298795104026794, "learning_rate": 1.9961032936657143e-05, "loss": 0.43599075078964233, "step": 86 }, { "epoch": 0.11852861035422343, "grad_norm": 0.5152458548545837, "learning_rate": 1.9959061576144482e-05, "loss": 0.4749916195869446, "step": 87 }, { "epoch": 0.11989100817438691, "grad_norm": 0.4422807991504669, "learning_rate": 1.9957041674691356e-05, "loss": 0.3546351194381714, "step": 88 }, { "epoch": 0.12125340599455041, "grad_norm": 0.5386228561401367, "learning_rate": 1.995497324214285e-05, "loss": 0.488656610250473, "step": 89 }, { "epoch": 0.1226158038147139, "grad_norm": 0.530255913734436, "learning_rate": 1.995285628858062e-05, "loss": 0.4679569602012634, "step": 90 }, { "epoch": 0.12397820163487738, "grad_norm": 0.5118647217750549, "learning_rate": 1.995069082432279e-05, "loss": 0.4375801682472229, "step": 91 }, { "epoch": 0.12534059945504086, "grad_norm": 0.5084353685379028, "learning_rate": 1.994847685992393e-05, "loss": 0.46044063568115234, "step": 92 }, { "epoch": 0.12670299727520437, "grad_norm": 0.49600714445114136, "learning_rate": 1.9946214406175016e-05, "loss": 0.4706187844276428, "step": 93 }, { "epoch": 0.12806539509536785, "grad_norm": 0.5475752949714661, "learning_rate": 1.9943903474103354e-05, "loss": 0.47216880321502686, "step": 94 }, { "epoch": 0.12942779291553133, "grad_norm": 0.4829142093658447, "learning_rate": 1.994154407497254e-05, "loss": 0.43380069732666016, "step": 95 }, { "epoch": 0.1307901907356948, "grad_norm": 0.4949623942375183, "learning_rate": 1.993913622028239e-05, "loss": 0.45172300934791565, "step": 96 }, { "epoch": 0.13215258855585832, "grad_norm": 0.5360006093978882, "learning_rate": 1.9936679921768905e-05, "loss": 0.43014198541641235, "step": 97 }, { "epoch": 0.1335149863760218, "grad_norm": 0.4718469977378845, "learning_rate": 1.9934175191404202e-05, "loss": 0.42612555623054504, "step": 98 }, { "epoch": 0.13487738419618528, "grad_norm": 0.5138616561889648, "learning_rate": 1.9931622041396456e-05, "loss": 0.4884234666824341, "step": 99 }, { "epoch": 0.1362397820163488, "grad_norm": 0.4901588559150696, "learning_rate": 1.9929020484189843e-05, "loss": 0.4561987519264221, "step": 100 }, { "epoch": 0.13760217983651227, "grad_norm": 0.46556147933006287, "learning_rate": 1.992637053246448e-05, "loss": 0.3546220660209656, "step": 101 }, { "epoch": 0.13896457765667575, "grad_norm": 0.5201694965362549, "learning_rate": 1.992367219913635e-05, "loss": 0.4550625681877136, "step": 102 }, { "epoch": 0.14032697547683923, "grad_norm": 0.5415034294128418, "learning_rate": 1.9920925497357265e-05, "loss": 0.43804478645324707, "step": 103 }, { "epoch": 0.14168937329700274, "grad_norm": 0.475267231464386, "learning_rate": 1.9918130440514775e-05, "loss": 0.37618038058280945, "step": 104 }, { "epoch": 0.14305177111716622, "grad_norm": 0.47954168915748596, "learning_rate": 1.9915287042232117e-05, "loss": 0.40945565700531006, "step": 105 }, { "epoch": 0.1444141689373297, "grad_norm": 0.4963965117931366, "learning_rate": 1.9912395316368163e-05, "loss": 0.39603012800216675, "step": 106 }, { "epoch": 0.14577656675749318, "grad_norm": 0.5018640756607056, "learning_rate": 1.990945527701731e-05, "loss": 0.42917919158935547, "step": 107 }, { "epoch": 0.14713896457765668, "grad_norm": 0.49071332812309265, "learning_rate": 1.9906466938509456e-05, "loss": 0.41273534297943115, "step": 108 }, { "epoch": 0.14850136239782016, "grad_norm": 0.4668489992618561, "learning_rate": 1.9903430315409908e-05, "loss": 0.37738558650016785, "step": 109 }, { "epoch": 0.14986376021798364, "grad_norm": 0.5912647843360901, "learning_rate": 1.9900345422519302e-05, "loss": 0.537000298500061, "step": 110 }, { "epoch": 0.15122615803814715, "grad_norm": 0.501377522945404, "learning_rate": 1.9897212274873558e-05, "loss": 0.45420363545417786, "step": 111 }, { "epoch": 0.15258855585831063, "grad_norm": 0.47566258907318115, "learning_rate": 1.989403088774379e-05, "loss": 0.4192931652069092, "step": 112 }, { "epoch": 0.1539509536784741, "grad_norm": 0.545165479183197, "learning_rate": 1.9890801276636226e-05, "loss": 0.5133235454559326, "step": 113 }, { "epoch": 0.1553133514986376, "grad_norm": 0.4873102307319641, "learning_rate": 1.9887523457292145e-05, "loss": 0.45365816354751587, "step": 114 }, { "epoch": 0.1566757493188011, "grad_norm": 0.45992809534072876, "learning_rate": 1.9884197445687795e-05, "loss": 0.4125695824623108, "step": 115 }, { "epoch": 0.15803814713896458, "grad_norm": 0.489008367061615, "learning_rate": 1.9880823258034317e-05, "loss": 0.45539143681526184, "step": 116 }, { "epoch": 0.15940054495912806, "grad_norm": 0.45899710059165955, "learning_rate": 1.987740091077766e-05, "loss": 0.3859185576438904, "step": 117 }, { "epoch": 0.16076294277929154, "grad_norm": 0.48963943123817444, "learning_rate": 1.9873930420598508e-05, "loss": 0.4098602831363678, "step": 118 }, { "epoch": 0.16212534059945505, "grad_norm": 0.5609935522079468, "learning_rate": 1.9870411804412196e-05, "loss": 0.541596531867981, "step": 119 }, { "epoch": 0.16348773841961853, "grad_norm": 0.451092392206192, "learning_rate": 1.9866845079368628e-05, "loss": 0.369413822889328, "step": 120 }, { "epoch": 0.164850136239782, "grad_norm": 0.5160583257675171, "learning_rate": 1.9863230262852188e-05, "loss": 0.48153048753738403, "step": 121 }, { "epoch": 0.16621253405994552, "grad_norm": 0.5104784369468689, "learning_rate": 1.9859567372481666e-05, "loss": 0.463962197303772, "step": 122 }, { "epoch": 0.167574931880109, "grad_norm": 0.4819332957267761, "learning_rate": 1.9855856426110163e-05, "loss": 0.4457293152809143, "step": 123 }, { "epoch": 0.16893732970027248, "grad_norm": 0.545768678188324, "learning_rate": 1.9852097441825017e-05, "loss": 0.4510464072227478, "step": 124 }, { "epoch": 0.17029972752043596, "grad_norm": 0.46205708384513855, "learning_rate": 1.9848290437947683e-05, "loss": 0.4033975303173065, "step": 125 }, { "epoch": 0.17166212534059946, "grad_norm": 0.4812323749065399, "learning_rate": 1.9844435433033687e-05, "loss": 0.42786744236946106, "step": 126 }, { "epoch": 0.17302452316076294, "grad_norm": 0.5063855051994324, "learning_rate": 1.9840532445872504e-05, "loss": 0.439453125, "step": 127 }, { "epoch": 0.17438692098092642, "grad_norm": 0.42103609442710876, "learning_rate": 1.983658149548748e-05, "loss": 0.372048556804657, "step": 128 }, { "epoch": 0.17574931880108993, "grad_norm": 0.4785323739051819, "learning_rate": 1.9832582601135737e-05, "loss": 0.4088958501815796, "step": 129 }, { "epoch": 0.1771117166212534, "grad_norm": 0.4796772301197052, "learning_rate": 1.9828535782308074e-05, "loss": 0.4392489492893219, "step": 130 }, { "epoch": 0.1784741144414169, "grad_norm": 0.5020930767059326, "learning_rate": 1.9824441058728882e-05, "loss": 0.4506283402442932, "step": 131 }, { "epoch": 0.17983651226158037, "grad_norm": 0.44487935304641724, "learning_rate": 1.9820298450356036e-05, "loss": 0.37223148345947266, "step": 132 }, { "epoch": 0.18119891008174388, "grad_norm": 0.48866400122642517, "learning_rate": 1.9816107977380805e-05, "loss": 0.4460309147834778, "step": 133 }, { "epoch": 0.18256130790190736, "grad_norm": 0.4672461450099945, "learning_rate": 1.9811869660227757e-05, "loss": 0.4269269108772278, "step": 134 }, { "epoch": 0.18392370572207084, "grad_norm": 0.49336063861846924, "learning_rate": 1.980758351955465e-05, "loss": 0.4544104039669037, "step": 135 }, { "epoch": 0.18528610354223432, "grad_norm": 0.45952633023262024, "learning_rate": 1.9803249576252338e-05, "loss": 0.38991525769233704, "step": 136 }, { "epoch": 0.18664850136239783, "grad_norm": 0.48544129729270935, "learning_rate": 1.979886785144467e-05, "loss": 0.42076367139816284, "step": 137 }, { "epoch": 0.1880108991825613, "grad_norm": 0.38183560967445374, "learning_rate": 1.9794438366488377e-05, "loss": 0.2765650153160095, "step": 138 }, { "epoch": 0.1893732970027248, "grad_norm": 0.45844128727912903, "learning_rate": 1.9789961142972983e-05, "loss": 0.3620038628578186, "step": 139 }, { "epoch": 0.1907356948228883, "grad_norm": 0.4732954502105713, "learning_rate": 1.9785436202720687e-05, "loss": 0.40507155656814575, "step": 140 }, { "epoch": 0.19209809264305178, "grad_norm": 0.4901241362094879, "learning_rate": 1.978086356778626e-05, "loss": 0.4239882826805115, "step": 141 }, { "epoch": 0.19346049046321526, "grad_norm": 0.4907649755477905, "learning_rate": 1.9776243260456953e-05, "loss": 0.4591466188430786, "step": 142 }, { "epoch": 0.19482288828337874, "grad_norm": 0.4430483877658844, "learning_rate": 1.977157530325235e-05, "loss": 0.4003378450870514, "step": 143 }, { "epoch": 0.19618528610354224, "grad_norm": 0.4529455006122589, "learning_rate": 1.976685971892431e-05, "loss": 0.4060153663158417, "step": 144 }, { "epoch": 0.19754768392370572, "grad_norm": 0.500708281993866, "learning_rate": 1.9762096530456803e-05, "loss": 0.4507729411125183, "step": 145 }, { "epoch": 0.1989100817438692, "grad_norm": 0.4838204085826874, "learning_rate": 1.9757285761065846e-05, "loss": 0.4445388913154602, "step": 146 }, { "epoch": 0.20027247956403268, "grad_norm": 0.47918063402175903, "learning_rate": 1.9752427434199356e-05, "loss": 0.4199233651161194, "step": 147 }, { "epoch": 0.2016348773841962, "grad_norm": 0.4424859881401062, "learning_rate": 1.9747521573537048e-05, "loss": 0.3845542073249817, "step": 148 }, { "epoch": 0.20299727520435967, "grad_norm": 0.5027382373809814, "learning_rate": 1.974256820299032e-05, "loss": 0.4608474373817444, "step": 149 }, { "epoch": 0.20435967302452315, "grad_norm": 0.4017482399940491, "learning_rate": 1.9737567346702137e-05, "loss": 0.35635966062545776, "step": 150 }, { "epoch": 0.20572207084468666, "grad_norm": 0.4682810604572296, "learning_rate": 1.973251902904691e-05, "loss": 0.3920828104019165, "step": 151 }, { "epoch": 0.20708446866485014, "grad_norm": 0.4764021337032318, "learning_rate": 1.9727423274630385e-05, "loss": 0.43593406677246094, "step": 152 }, { "epoch": 0.20844686648501362, "grad_norm": 0.5115736126899719, "learning_rate": 1.97222801082895e-05, "loss": 0.4738002419471741, "step": 153 }, { "epoch": 0.2098092643051771, "grad_norm": 0.4896221458911896, "learning_rate": 1.9717089555092306e-05, "loss": 0.4509856104850769, "step": 154 }, { "epoch": 0.2111716621253406, "grad_norm": 0.4766773581504822, "learning_rate": 1.97118516403378e-05, "loss": 0.44380685687065125, "step": 155 }, { "epoch": 0.2125340599455041, "grad_norm": 0.5250641107559204, "learning_rate": 1.9706566389555825e-05, "loss": 0.5936962962150574, "step": 156 }, { "epoch": 0.21389645776566757, "grad_norm": 0.4729316830635071, "learning_rate": 1.970123382850695e-05, "loss": 0.4068388342857361, "step": 157 }, { "epoch": 0.21525885558583105, "grad_norm": 0.4348066449165344, "learning_rate": 1.969585398318233e-05, "loss": 0.3906315267086029, "step": 158 }, { "epoch": 0.21662125340599456, "grad_norm": 0.4758957624435425, "learning_rate": 1.969042687980359e-05, "loss": 0.4242008924484253, "step": 159 }, { "epoch": 0.21798365122615804, "grad_norm": 0.49115386605262756, "learning_rate": 1.9684952544822685e-05, "loss": 0.44710224866867065, "step": 160 }, { "epoch": 0.21934604904632152, "grad_norm": 0.506231963634491, "learning_rate": 1.9679431004921788e-05, "loss": 0.49800950288772583, "step": 161 }, { "epoch": 0.22070844686648503, "grad_norm": 0.4652118682861328, "learning_rate": 1.9673862287013144e-05, "loss": 0.397954523563385, "step": 162 }, { "epoch": 0.2220708446866485, "grad_norm": 0.4163675010204315, "learning_rate": 1.9668246418238955e-05, "loss": 0.33537113666534424, "step": 163 }, { "epoch": 0.22343324250681199, "grad_norm": 0.46636438369750977, "learning_rate": 1.9662583425971227e-05, "loss": 0.4491173028945923, "step": 164 }, { "epoch": 0.22479564032697547, "grad_norm": 0.471574068069458, "learning_rate": 1.9656873337811658e-05, "loss": 0.4455568790435791, "step": 165 }, { "epoch": 0.22615803814713897, "grad_norm": 0.42547208070755005, "learning_rate": 1.9651116181591493e-05, "loss": 0.33267468214035034, "step": 166 }, { "epoch": 0.22752043596730245, "grad_norm": 0.46092942357063293, "learning_rate": 1.9645311985371374e-05, "loss": 0.38794922828674316, "step": 167 }, { "epoch": 0.22888283378746593, "grad_norm": 0.4635142683982849, "learning_rate": 1.9639460777441243e-05, "loss": 0.41988471150398254, "step": 168 }, { "epoch": 0.23024523160762944, "grad_norm": 0.5089847445487976, "learning_rate": 1.9633562586320157e-05, "loss": 0.4452226758003235, "step": 169 }, { "epoch": 0.23160762942779292, "grad_norm": 0.42120999097824097, "learning_rate": 1.962761744075618e-05, "loss": 0.34265103936195374, "step": 170 }, { "epoch": 0.2329700272479564, "grad_norm": 0.37199750542640686, "learning_rate": 1.9621625369726246e-05, "loss": 0.2743265926837921, "step": 171 }, { "epoch": 0.23433242506811988, "grad_norm": 0.45613083243370056, "learning_rate": 1.961558640243598e-05, "loss": 0.3892587423324585, "step": 172 }, { "epoch": 0.2356948228882834, "grad_norm": 0.5023411512374878, "learning_rate": 1.9609500568319605e-05, "loss": 0.4512037932872772, "step": 173 }, { "epoch": 0.23705722070844687, "grad_norm": 0.4689593017101288, "learning_rate": 1.960336789703977e-05, "loss": 0.4525565207004547, "step": 174 }, { "epoch": 0.23841961852861035, "grad_norm": 0.4769952595233917, "learning_rate": 1.9597188418487395e-05, "loss": 0.4317702651023865, "step": 175 }, { "epoch": 0.23978201634877383, "grad_norm": 0.44649723172187805, "learning_rate": 1.959096216278156e-05, "loss": 0.4062075912952423, "step": 176 }, { "epoch": 0.24114441416893734, "grad_norm": 0.4167250096797943, "learning_rate": 1.958468916026933e-05, "loss": 0.3360878825187683, "step": 177 }, { "epoch": 0.24250681198910082, "grad_norm": 0.501832127571106, "learning_rate": 1.957836944152562e-05, "loss": 0.4890025854110718, "step": 178 }, { "epoch": 0.2438692098092643, "grad_norm": 0.4422992169857025, "learning_rate": 1.957200303735304e-05, "loss": 0.3915623128414154, "step": 179 }, { "epoch": 0.2452316076294278, "grad_norm": 0.43513938784599304, "learning_rate": 1.9565589978781747e-05, "loss": 0.3817967474460602, "step": 180 }, { "epoch": 0.24659400544959129, "grad_norm": 0.5025864839553833, "learning_rate": 1.955913029706929e-05, "loss": 0.47003644704818726, "step": 181 }, { "epoch": 0.24795640326975477, "grad_norm": 0.481099009513855, "learning_rate": 1.9552624023700472e-05, "loss": 0.4433462917804718, "step": 182 }, { "epoch": 0.24931880108991825, "grad_norm": 0.5063067078590393, "learning_rate": 1.9546071190387175e-05, "loss": 0.41345930099487305, "step": 183 }, { "epoch": 0.2506811989100817, "grad_norm": 0.46571362018585205, "learning_rate": 1.953947182906822e-05, "loss": 0.4099092483520508, "step": 184 }, { "epoch": 0.25204359673024523, "grad_norm": 0.43692854046821594, "learning_rate": 1.953282597190921e-05, "loss": 0.34167295694351196, "step": 185 }, { "epoch": 0.25340599455040874, "grad_norm": 0.46433547139167786, "learning_rate": 1.9526133651302372e-05, "loss": 0.4195370674133301, "step": 186 }, { "epoch": 0.2547683923705722, "grad_norm": 0.4526609778404236, "learning_rate": 1.951939489986639e-05, "loss": 0.4328376352787018, "step": 187 }, { "epoch": 0.2561307901907357, "grad_norm": 0.4832986891269684, "learning_rate": 1.951260975044626e-05, "loss": 0.42811205983161926, "step": 188 }, { "epoch": 0.2574931880108992, "grad_norm": 0.46561217308044434, "learning_rate": 1.950577823611313e-05, "loss": 0.4136439561843872, "step": 189 }, { "epoch": 0.25885558583106266, "grad_norm": 0.7555133700370789, "learning_rate": 1.9498900390164118e-05, "loss": 0.44394075870513916, "step": 190 }, { "epoch": 0.26021798365122617, "grad_norm": 0.49045872688293457, "learning_rate": 1.949197624612218e-05, "loss": 0.47819724678993225, "step": 191 }, { "epoch": 0.2615803814713896, "grad_norm": 0.4706166386604309, "learning_rate": 1.9485005837735918e-05, "loss": 0.4488511383533478, "step": 192 }, { "epoch": 0.26294277929155313, "grad_norm": 0.4559481143951416, "learning_rate": 1.947798919897944e-05, "loss": 0.35875898599624634, "step": 193 }, { "epoch": 0.26430517711171664, "grad_norm": 0.459917277097702, "learning_rate": 1.947092636405217e-05, "loss": 0.43740493059158325, "step": 194 }, { "epoch": 0.2656675749318801, "grad_norm": 0.48496246337890625, "learning_rate": 1.946381736737871e-05, "loss": 0.4613405466079712, "step": 195 }, { "epoch": 0.2670299727520436, "grad_norm": 0.5405692458152771, "learning_rate": 1.9456662243608643e-05, "loss": 0.5320178270339966, "step": 196 }, { "epoch": 0.2683923705722071, "grad_norm": 0.4415741264820099, "learning_rate": 1.9449461027616382e-05, "loss": 0.376873642206192, "step": 197 }, { "epoch": 0.26975476839237056, "grad_norm": 0.4813419282436371, "learning_rate": 1.9442213754501002e-05, "loss": 0.4509425759315491, "step": 198 }, { "epoch": 0.27111716621253407, "grad_norm": 0.511968731880188, "learning_rate": 1.9434920459586054e-05, "loss": 0.5076147317886353, "step": 199 }, { "epoch": 0.2724795640326976, "grad_norm": 0.4836997091770172, "learning_rate": 1.9427581178419408e-05, "loss": 0.44785064458847046, "step": 200 }, { "epoch": 0.273841961852861, "grad_norm": 0.4130904972553253, "learning_rate": 1.9420195946773063e-05, "loss": 0.36012643575668335, "step": 201 }, { "epoch": 0.27520435967302453, "grad_norm": 0.4587012827396393, "learning_rate": 1.9412764800643e-05, "loss": 0.45600613951683044, "step": 202 }, { "epoch": 0.276566757493188, "grad_norm": 0.4676440954208374, "learning_rate": 1.940528777624897e-05, "loss": 0.4534785747528076, "step": 203 }, { "epoch": 0.2779291553133515, "grad_norm": 0.5145980715751648, "learning_rate": 1.939776491003435e-05, "loss": 0.48491308093070984, "step": 204 }, { "epoch": 0.279291553133515, "grad_norm": 0.41034233570098877, "learning_rate": 1.9390196238665944e-05, "loss": 0.34817013144493103, "step": 205 }, { "epoch": 0.28065395095367845, "grad_norm": 0.5155647397041321, "learning_rate": 1.9382581799033824e-05, "loss": 0.5011834502220154, "step": 206 }, { "epoch": 0.28201634877384196, "grad_norm": 0.4596708416938782, "learning_rate": 1.9374921628251127e-05, "loss": 0.39814120531082153, "step": 207 }, { "epoch": 0.28337874659400547, "grad_norm": 0.4686714708805084, "learning_rate": 1.936721576365389e-05, "loss": 0.48343995213508606, "step": 208 }, { "epoch": 0.2847411444141689, "grad_norm": 0.47685396671295166, "learning_rate": 1.935946424280087e-05, "loss": 0.3693901002407074, "step": 209 }, { "epoch": 0.28610354223433243, "grad_norm": 0.46070703864097595, "learning_rate": 1.935166710347334e-05, "loss": 0.40199288725852966, "step": 210 }, { "epoch": 0.28746594005449594, "grad_norm": 0.42680197954177856, "learning_rate": 1.9343824383674936e-05, "loss": 0.39812421798706055, "step": 211 }, { "epoch": 0.2888283378746594, "grad_norm": 0.44704172015190125, "learning_rate": 1.9335936121631442e-05, "loss": 0.36939626932144165, "step": 212 }, { "epoch": 0.2901907356948229, "grad_norm": 0.39125990867614746, "learning_rate": 1.9328002355790624e-05, "loss": 0.332170695066452, "step": 213 }, { "epoch": 0.29155313351498635, "grad_norm": 0.4364356994628906, "learning_rate": 1.9320023124822035e-05, "loss": 0.3795939087867737, "step": 214 }, { "epoch": 0.29291553133514986, "grad_norm": 0.427299827337265, "learning_rate": 1.931199846761683e-05, "loss": 0.3539160192012787, "step": 215 }, { "epoch": 0.29427792915531337, "grad_norm": 0.3824451267719269, "learning_rate": 1.9303928423287568e-05, "loss": 0.33199343085289, "step": 216 }, { "epoch": 0.2956403269754768, "grad_norm": 0.4601697027683258, "learning_rate": 1.929581303116803e-05, "loss": 0.4479762017726898, "step": 217 }, { "epoch": 0.2970027247956403, "grad_norm": 0.4551186263561249, "learning_rate": 1.9287652330813024e-05, "loss": 0.4017976224422455, "step": 218 }, { "epoch": 0.29836512261580383, "grad_norm": 0.4389873445034027, "learning_rate": 1.9279446361998188e-05, "loss": 0.3540688157081604, "step": 219 }, { "epoch": 0.2997275204359673, "grad_norm": 0.45103585720062256, "learning_rate": 1.927119516471981e-05, "loss": 0.4199802875518799, "step": 220 }, { "epoch": 0.3010899182561308, "grad_norm": 0.4221908450126648, "learning_rate": 1.9262898779194613e-05, "loss": 0.36206182837486267, "step": 221 }, { "epoch": 0.3024523160762943, "grad_norm": 0.46009913086891174, "learning_rate": 1.9254557245859583e-05, "loss": 0.4081338047981262, "step": 222 }, { "epoch": 0.30381471389645776, "grad_norm": 0.46696937084198, "learning_rate": 1.924617060537175e-05, "loss": 0.36600178480148315, "step": 223 }, { "epoch": 0.30517711171662126, "grad_norm": 0.44915148615837097, "learning_rate": 1.9237738898607992e-05, "loss": 0.3773924708366394, "step": 224 }, { "epoch": 0.3065395095367847, "grad_norm": 0.40148746967315674, "learning_rate": 1.9229262166664854e-05, "loss": 0.29502028226852417, "step": 225 }, { "epoch": 0.3079019073569482, "grad_norm": 0.3726682960987091, "learning_rate": 1.9220740450858328e-05, "loss": 0.3030723035335541, "step": 226 }, { "epoch": 0.30926430517711173, "grad_norm": 0.5065223574638367, "learning_rate": 1.921217379272367e-05, "loss": 0.4941141605377197, "step": 227 }, { "epoch": 0.3106267029972752, "grad_norm": 0.4267047941684723, "learning_rate": 1.9203562234015172e-05, "loss": 0.35858261585235596, "step": 228 }, { "epoch": 0.3119891008174387, "grad_norm": 0.38982680439949036, "learning_rate": 1.9194905816705988e-05, "loss": 0.3197462856769562, "step": 229 }, { "epoch": 0.3133514986376022, "grad_norm": 0.44453105330467224, "learning_rate": 1.91862045829879e-05, "loss": 0.40640050172805786, "step": 230 }, { "epoch": 0.31471389645776565, "grad_norm": 0.492948442697525, "learning_rate": 1.9177458575271143e-05, "loss": 0.4595552682876587, "step": 231 }, { "epoch": 0.31607629427792916, "grad_norm": 0.4012284278869629, "learning_rate": 1.916866783618417e-05, "loss": 0.3264189660549164, "step": 232 }, { "epoch": 0.31743869209809267, "grad_norm": 0.4652048647403717, "learning_rate": 1.9159832408573467e-05, "loss": 0.4094054698944092, "step": 233 }, { "epoch": 0.3188010899182561, "grad_norm": 0.46388110518455505, "learning_rate": 1.9150952335503325e-05, "loss": 0.4324240982532501, "step": 234 }, { "epoch": 0.3201634877384196, "grad_norm": 0.44346991181373596, "learning_rate": 1.9142027660255645e-05, "loss": 0.369159996509552, "step": 235 }, { "epoch": 0.3215258855585831, "grad_norm": 0.42788419127464294, "learning_rate": 1.9133058426329717e-05, "loss": 0.37542596459388733, "step": 236 }, { "epoch": 0.3228882833787466, "grad_norm": 0.41329479217529297, "learning_rate": 1.912404467744202e-05, "loss": 0.36349016427993774, "step": 237 }, { "epoch": 0.3242506811989101, "grad_norm": 0.47673869132995605, "learning_rate": 1.911498645752599e-05, "loss": 0.4231317937374115, "step": 238 }, { "epoch": 0.32561307901907355, "grad_norm": 0.45236167311668396, "learning_rate": 1.9105883810731822e-05, "loss": 0.41860949993133545, "step": 239 }, { "epoch": 0.32697547683923706, "grad_norm": 0.47766655683517456, "learning_rate": 1.9096736781426252e-05, "loss": 0.45107653737068176, "step": 240 }, { "epoch": 0.32833787465940056, "grad_norm": 0.4172971546649933, "learning_rate": 1.9087545414192338e-05, "loss": 0.36643242835998535, "step": 241 }, { "epoch": 0.329700272479564, "grad_norm": 0.4231413006782532, "learning_rate": 1.907830975382924e-05, "loss": 0.3520575165748596, "step": 242 }, { "epoch": 0.3310626702997275, "grad_norm": 0.4126909375190735, "learning_rate": 1.9069029845352006e-05, "loss": 0.36405885219573975, "step": 243 }, { "epoch": 0.33242506811989103, "grad_norm": 0.42022719979286194, "learning_rate": 1.9059705733991352e-05, "loss": 0.3583207130432129, "step": 244 }, { "epoch": 0.3337874659400545, "grad_norm": 0.4531959593296051, "learning_rate": 1.9050337465193443e-05, "loss": 0.38180652260780334, "step": 245 }, { "epoch": 0.335149863760218, "grad_norm": 0.42306748032569885, "learning_rate": 1.9040925084619663e-05, "loss": 0.3619072437286377, "step": 246 }, { "epoch": 0.33651226158038144, "grad_norm": 0.46138980984687805, "learning_rate": 1.9031468638146408e-05, "loss": 0.38578206300735474, "step": 247 }, { "epoch": 0.33787465940054495, "grad_norm": 0.43929317593574524, "learning_rate": 1.9021968171864843e-05, "loss": 0.4235476851463318, "step": 248 }, { "epoch": 0.33923705722070846, "grad_norm": 0.401759535074234, "learning_rate": 1.90124237320807e-05, "loss": 0.34792521595954895, "step": 249 }, { "epoch": 0.3405994550408719, "grad_norm": 0.478180468082428, "learning_rate": 1.900283536531403e-05, "loss": 0.4711982011795044, "step": 250 }, { "epoch": 0.3419618528610354, "grad_norm": 0.43466177582740784, "learning_rate": 1.8993203118298988e-05, "loss": 0.36447733640670776, "step": 251 }, { "epoch": 0.34332425068119893, "grad_norm": 0.44407346844673157, "learning_rate": 1.8983527037983606e-05, "loss": 0.38788866996765137, "step": 252 }, { "epoch": 0.3446866485013624, "grad_norm": 0.4327251613140106, "learning_rate": 1.8973807171529556e-05, "loss": 0.38622599840164185, "step": 253 }, { "epoch": 0.3460490463215259, "grad_norm": 0.44564682245254517, "learning_rate": 1.8964043566311942e-05, "loss": 0.3841802477836609, "step": 254 }, { "epoch": 0.3474114441416894, "grad_norm": 0.4468725621700287, "learning_rate": 1.8954236269919026e-05, "loss": 0.3644585609436035, "step": 255 }, { "epoch": 0.34877384196185285, "grad_norm": 0.4858334958553314, "learning_rate": 1.8944385330152047e-05, "loss": 0.4247015118598938, "step": 256 }, { "epoch": 0.35013623978201636, "grad_norm": 0.4670877456665039, "learning_rate": 1.893449079502495e-05, "loss": 0.41649121046066284, "step": 257 }, { "epoch": 0.35149863760217986, "grad_norm": 0.42309293150901794, "learning_rate": 1.892455271276418e-05, "loss": 0.3775060772895813, "step": 258 }, { "epoch": 0.3528610354223433, "grad_norm": 0.42874789237976074, "learning_rate": 1.8914571131808407e-05, "loss": 0.40018612146377563, "step": 259 }, { "epoch": 0.3542234332425068, "grad_norm": 0.45877915620803833, "learning_rate": 1.8904546100808346e-05, "loss": 0.4454330801963806, "step": 260 }, { "epoch": 0.3555858310626703, "grad_norm": 0.4848128855228424, "learning_rate": 1.889447766862647e-05, "loss": 0.4608079791069031, "step": 261 }, { "epoch": 0.3569482288828338, "grad_norm": 0.38830137252807617, "learning_rate": 1.8884365884336796e-05, "loss": 0.34983474016189575, "step": 262 }, { "epoch": 0.3583106267029973, "grad_norm": 0.45347243547439575, "learning_rate": 1.8874210797224646e-05, "loss": 0.37533241510391235, "step": 263 }, { "epoch": 0.35967302452316074, "grad_norm": 0.4399562478065491, "learning_rate": 1.8864012456786397e-05, "loss": 0.3630625903606415, "step": 264 }, { "epoch": 0.36103542234332425, "grad_norm": 0.4255392849445343, "learning_rate": 1.8853770912729243e-05, "loss": 0.37027978897094727, "step": 265 }, { "epoch": 0.36239782016348776, "grad_norm": 0.423592746257782, "learning_rate": 1.884348621497096e-05, "loss": 0.37999391555786133, "step": 266 }, { "epoch": 0.3637602179836512, "grad_norm": 0.4627998173236847, "learning_rate": 1.8833158413639656e-05, "loss": 0.43481695652008057, "step": 267 }, { "epoch": 0.3651226158038147, "grad_norm": 0.4129141867160797, "learning_rate": 1.8822787559073522e-05, "loss": 0.35913515090942383, "step": 268 }, { "epoch": 0.36648501362397823, "grad_norm": 0.45795056223869324, "learning_rate": 1.8812373701820603e-05, "loss": 0.3965306878089905, "step": 269 }, { "epoch": 0.3678474114441417, "grad_norm": 0.40726739168167114, "learning_rate": 1.8801916892638533e-05, "loss": 0.3417432904243469, "step": 270 }, { "epoch": 0.3692098092643052, "grad_norm": 0.4479921758174896, "learning_rate": 1.8791417182494296e-05, "loss": 0.4090738594532013, "step": 271 }, { "epoch": 0.37057220708446864, "grad_norm": 0.380698561668396, "learning_rate": 1.878087462256398e-05, "loss": 0.31110888719558716, "step": 272 }, { "epoch": 0.37193460490463215, "grad_norm": 0.4521123468875885, "learning_rate": 1.8770289264232526e-05, "loss": 0.414950966835022, "step": 273 }, { "epoch": 0.37329700272479566, "grad_norm": 0.4697805643081665, "learning_rate": 1.875966115909347e-05, "loss": 0.42033064365386963, "step": 274 }, { "epoch": 0.3746594005449591, "grad_norm": 0.4483690857887268, "learning_rate": 1.8748990358948713e-05, "loss": 0.3845226466655731, "step": 275 }, { "epoch": 0.3760217983651226, "grad_norm": 0.4191807210445404, "learning_rate": 1.8738276915808232e-05, "loss": 0.3214520514011383, "step": 276 }, { "epoch": 0.3773841961852861, "grad_norm": 0.42148375511169434, "learning_rate": 1.8727520881889865e-05, "loss": 0.3698553442955017, "step": 277 }, { "epoch": 0.3787465940054496, "grad_norm": 0.42890867590904236, "learning_rate": 1.8716722309619033e-05, "loss": 0.35935938358306885, "step": 278 }, { "epoch": 0.3801089918256131, "grad_norm": 0.4575975835323334, "learning_rate": 1.870588125162849e-05, "loss": 0.3697071075439453, "step": 279 }, { "epoch": 0.3814713896457766, "grad_norm": 0.46115341782569885, "learning_rate": 1.8694997760758073e-05, "loss": 0.3968576192855835, "step": 280 }, { "epoch": 0.38283378746594005, "grad_norm": 0.494211882352829, "learning_rate": 1.8684071890054425e-05, "loss": 0.44661998748779297, "step": 281 }, { "epoch": 0.38419618528610355, "grad_norm": 0.4422100782394409, "learning_rate": 1.8673103692770772e-05, "loss": 0.39138174057006836, "step": 282 }, { "epoch": 0.385558583106267, "grad_norm": 0.39777207374572754, "learning_rate": 1.8662093222366623e-05, "loss": 0.34542185068130493, "step": 283 }, { "epoch": 0.3869209809264305, "grad_norm": 0.38417258858680725, "learning_rate": 1.8651040532507538e-05, "loss": 0.3356142044067383, "step": 284 }, { "epoch": 0.388283378746594, "grad_norm": 0.46102169156074524, "learning_rate": 1.863994567706485e-05, "loss": 0.430128276348114, "step": 285 }, { "epoch": 0.3896457765667575, "grad_norm": 0.4118143618106842, "learning_rate": 1.8628808710115417e-05, "loss": 0.34772276878356934, "step": 286 }, { "epoch": 0.391008174386921, "grad_norm": 0.39578622579574585, "learning_rate": 1.861762968594135e-05, "loss": 0.32895392179489136, "step": 287 }, { "epoch": 0.3923705722070845, "grad_norm": 0.3824600875377655, "learning_rate": 1.8606408659029736e-05, "loss": 0.32151734828948975, "step": 288 }, { "epoch": 0.39373297002724794, "grad_norm": 0.4184086322784424, "learning_rate": 1.8595145684072398e-05, "loss": 0.3639921247959137, "step": 289 }, { "epoch": 0.39509536784741145, "grad_norm": 0.45187273621559143, "learning_rate": 1.8583840815965614e-05, "loss": 0.38925087451934814, "step": 290 }, { "epoch": 0.39645776566757496, "grad_norm": 0.39254751801490784, "learning_rate": 1.8572494109809852e-05, "loss": 0.3342083692550659, "step": 291 }, { "epoch": 0.3978201634877384, "grad_norm": 0.43775174021720886, "learning_rate": 1.856110562090949e-05, "loss": 0.41103285551071167, "step": 292 }, { "epoch": 0.3991825613079019, "grad_norm": 0.4114571213722229, "learning_rate": 1.8549675404772574e-05, "loss": 0.36463573575019836, "step": 293 }, { "epoch": 0.40054495912806537, "grad_norm": 0.4514394998550415, "learning_rate": 1.853820351711052e-05, "loss": 0.40414565801620483, "step": 294 }, { "epoch": 0.4019073569482289, "grad_norm": 0.4267936944961548, "learning_rate": 1.852669001383785e-05, "loss": 0.39556679129600525, "step": 295 }, { "epoch": 0.4032697547683924, "grad_norm": 0.3571447432041168, "learning_rate": 1.8515134951071932e-05, "loss": 0.27661603689193726, "step": 296 }, { "epoch": 0.40463215258855584, "grad_norm": 0.4328432083129883, "learning_rate": 1.8503538385132692e-05, "loss": 0.3701832890510559, "step": 297 }, { "epoch": 0.40599455040871935, "grad_norm": 0.3956933915615082, "learning_rate": 1.849190037254234e-05, "loss": 0.31413373351097107, "step": 298 }, { "epoch": 0.40735694822888285, "grad_norm": 0.4569341540336609, "learning_rate": 1.8480220970025114e-05, "loss": 0.46876251697540283, "step": 299 }, { "epoch": 0.4087193460490463, "grad_norm": 0.4352339208126068, "learning_rate": 1.8468500234506965e-05, "loss": 0.34557855129241943, "step": 300 }, { "epoch": 0.4100817438692098, "grad_norm": 0.47778064012527466, "learning_rate": 1.8456738223115325e-05, "loss": 0.4523351490497589, "step": 301 }, { "epoch": 0.4114441416893733, "grad_norm": 0.3924112021923065, "learning_rate": 1.8444934993178796e-05, "loss": 0.32918280363082886, "step": 302 }, { "epoch": 0.4128065395095368, "grad_norm": 0.4721715748310089, "learning_rate": 1.843309060222688e-05, "loss": 0.4108656346797943, "step": 303 }, { "epoch": 0.4141689373297003, "grad_norm": 0.4324953556060791, "learning_rate": 1.8421205107989707e-05, "loss": 0.39233145117759705, "step": 304 }, { "epoch": 0.41553133514986373, "grad_norm": 0.4237790107727051, "learning_rate": 1.8409278568397742e-05, "loss": 0.33894914388656616, "step": 305 }, { "epoch": 0.41689373297002724, "grad_norm": 0.3769596815109253, "learning_rate": 1.83973110415815e-05, "loss": 0.28818315267562866, "step": 306 }, { "epoch": 0.41825613079019075, "grad_norm": 0.39678072929382324, "learning_rate": 1.8385302585871284e-05, "loss": 0.3248705565929413, "step": 307 }, { "epoch": 0.4196185286103542, "grad_norm": 0.4702602028846741, "learning_rate": 1.8373253259796877e-05, "loss": 0.44057464599609375, "step": 308 }, { "epoch": 0.4209809264305177, "grad_norm": 0.43663206696510315, "learning_rate": 1.8361163122087265e-05, "loss": 0.41709601879119873, "step": 309 }, { "epoch": 0.4223433242506812, "grad_norm": 0.41904065012931824, "learning_rate": 1.8349032231670363e-05, "loss": 0.3891496956348419, "step": 310 }, { "epoch": 0.42370572207084467, "grad_norm": 0.42954081296920776, "learning_rate": 1.8336860647672702e-05, "loss": 0.38407522439956665, "step": 311 }, { "epoch": 0.4250681198910082, "grad_norm": 0.40387260913848877, "learning_rate": 1.8324648429419164e-05, "loss": 0.35146600008010864, "step": 312 }, { "epoch": 0.4264305177111717, "grad_norm": 0.3923007845878601, "learning_rate": 1.831239563643268e-05, "loss": 0.3610236942768097, "step": 313 }, { "epoch": 0.42779291553133514, "grad_norm": 0.8759288191795349, "learning_rate": 1.8300102328433952e-05, "loss": 0.4138031601905823, "step": 314 }, { "epoch": 0.42915531335149865, "grad_norm": 0.4077308475971222, "learning_rate": 1.8287768565341143e-05, "loss": 0.3436448574066162, "step": 315 }, { "epoch": 0.4305177111716621, "grad_norm": 0.4676551818847656, "learning_rate": 1.82753944072696e-05, "loss": 0.4458121061325073, "step": 316 }, { "epoch": 0.4318801089918256, "grad_norm": 0.3823956847190857, "learning_rate": 1.826297991453157e-05, "loss": 0.32054227590560913, "step": 317 }, { "epoch": 0.4332425068119891, "grad_norm": 0.42467930912971497, "learning_rate": 1.8250525147635873e-05, "loss": 0.3760542869567871, "step": 318 }, { "epoch": 0.43460490463215257, "grad_norm": 0.4084574580192566, "learning_rate": 1.8238030167287638e-05, "loss": 0.36126622557640076, "step": 319 }, { "epoch": 0.4359673024523161, "grad_norm": 0.3886258602142334, "learning_rate": 1.8225495034387996e-05, "loss": 0.324174702167511, "step": 320 }, { "epoch": 0.4373297002724796, "grad_norm": 0.3744279146194458, "learning_rate": 1.8212919810033777e-05, "loss": 0.3609943985939026, "step": 321 }, { "epoch": 0.43869209809264303, "grad_norm": 0.39002716541290283, "learning_rate": 1.820030455551723e-05, "loss": 0.33594828844070435, "step": 322 }, { "epoch": 0.44005449591280654, "grad_norm": 0.4567135274410248, "learning_rate": 1.8187649332325702e-05, "loss": 0.4041770100593567, "step": 323 }, { "epoch": 0.44141689373297005, "grad_norm": 0.40135565400123596, "learning_rate": 1.8174954202141352e-05, "loss": 0.3575393855571747, "step": 324 }, { "epoch": 0.4427792915531335, "grad_norm": 0.45827990770339966, "learning_rate": 1.8162219226840857e-05, "loss": 0.414590448141098, "step": 325 }, { "epoch": 0.444141689373297, "grad_norm": 0.49530652165412903, "learning_rate": 1.814944446849508e-05, "loss": 0.45791754126548767, "step": 326 }, { "epoch": 0.44550408719346046, "grad_norm": 0.4364672601222992, "learning_rate": 1.8136629989368815e-05, "loss": 0.3820730447769165, "step": 327 }, { "epoch": 0.44686648501362397, "grad_norm": 0.46344640851020813, "learning_rate": 1.8123775851920438e-05, "loss": 0.42946088314056396, "step": 328 }, { "epoch": 0.4482288828337875, "grad_norm": 0.3821968138217926, "learning_rate": 1.8110882118801633e-05, "loss": 0.3248857855796814, "step": 329 }, { "epoch": 0.44959128065395093, "grad_norm": 0.39740368723869324, "learning_rate": 1.8097948852857054e-05, "loss": 0.34824830293655396, "step": 330 }, { "epoch": 0.45095367847411444, "grad_norm": 0.4666673541069031, "learning_rate": 1.8084976117124072e-05, "loss": 0.39515918493270874, "step": 331 }, { "epoch": 0.45231607629427795, "grad_norm": 0.36016690731048584, "learning_rate": 1.807196397483241e-05, "loss": 0.2601340711116791, "step": 332 }, { "epoch": 0.4536784741144414, "grad_norm": 0.3608089089393616, "learning_rate": 1.8058912489403867e-05, "loss": 0.2999919056892395, "step": 333 }, { "epoch": 0.4550408719346049, "grad_norm": 0.42653337121009827, "learning_rate": 1.804582172445201e-05, "loss": 0.39884787797927856, "step": 334 }, { "epoch": 0.4564032697547684, "grad_norm": 0.42624911665916443, "learning_rate": 1.8032691743781853e-05, "loss": 0.39197784662246704, "step": 335 }, { "epoch": 0.45776566757493187, "grad_norm": 0.513020396232605, "learning_rate": 1.8019522611389543e-05, "loss": 0.4664883315563202, "step": 336 }, { "epoch": 0.4591280653950954, "grad_norm": 0.3827018141746521, "learning_rate": 1.8006314391462056e-05, "loss": 0.3538336157798767, "step": 337 }, { "epoch": 0.4604904632152589, "grad_norm": 0.3974727988243103, "learning_rate": 1.799306714837689e-05, "loss": 0.3404923379421234, "step": 338 }, { "epoch": 0.46185286103542234, "grad_norm": 0.390813410282135, "learning_rate": 1.7979780946701737e-05, "loss": 0.3352108895778656, "step": 339 }, { "epoch": 0.46321525885558584, "grad_norm": 0.395134299993515, "learning_rate": 1.7966455851194178e-05, "loss": 0.35127317905426025, "step": 340 }, { "epoch": 0.4645776566757493, "grad_norm": 0.3822191059589386, "learning_rate": 1.795309192680136e-05, "loss": 0.3215380311012268, "step": 341 }, { "epoch": 0.4659400544959128, "grad_norm": 0.38805821537971497, "learning_rate": 1.7939689238659692e-05, "loss": 0.3339554965496063, "step": 342 }, { "epoch": 0.4673024523160763, "grad_norm": 0.4180435538291931, "learning_rate": 1.792624785209451e-05, "loss": 0.37479549646377563, "step": 343 }, { "epoch": 0.46866485013623976, "grad_norm": 0.3718315362930298, "learning_rate": 1.7912767832619776e-05, "loss": 0.2911319136619568, "step": 344 }, { "epoch": 0.47002724795640327, "grad_norm": 0.43831902742385864, "learning_rate": 1.789924924593774e-05, "loss": 0.388246089220047, "step": 345 }, { "epoch": 0.4713896457765668, "grad_norm": 0.40909937024116516, "learning_rate": 1.7885692157938646e-05, "loss": 0.3607439398765564, "step": 346 }, { "epoch": 0.47275204359673023, "grad_norm": 0.4436415433883667, "learning_rate": 1.787209663470038e-05, "loss": 0.4487878382205963, "step": 347 }, { "epoch": 0.47411444141689374, "grad_norm": 0.4295803904533386, "learning_rate": 1.7858462742488175e-05, "loss": 0.4205361008644104, "step": 348 }, { "epoch": 0.47547683923705725, "grad_norm": 0.3416333794593811, "learning_rate": 1.7844790547754264e-05, "loss": 0.2564454674720764, "step": 349 }, { "epoch": 0.4768392370572207, "grad_norm": 0.3937414884567261, "learning_rate": 1.7831080117137584e-05, "loss": 0.3362025022506714, "step": 350 }, { "epoch": 0.4782016348773842, "grad_norm": 0.44493257999420166, "learning_rate": 1.781733151746342e-05, "loss": 0.41151055693626404, "step": 351 }, { "epoch": 0.47956403269754766, "grad_norm": 0.4470524489879608, "learning_rate": 1.7803544815743107e-05, "loss": 0.4086991548538208, "step": 352 }, { "epoch": 0.48092643051771117, "grad_norm": 0.4161277115345001, "learning_rate": 1.7789720079173682e-05, "loss": 0.3814135193824768, "step": 353 }, { "epoch": 0.4822888283378747, "grad_norm": 0.4259527027606964, "learning_rate": 1.777585737513757e-05, "loss": 0.39335864782333374, "step": 354 }, { "epoch": 0.48365122615803813, "grad_norm": 0.44917598366737366, "learning_rate": 1.7761956771202255e-05, "loss": 0.41355639696121216, "step": 355 }, { "epoch": 0.48501362397820164, "grad_norm": 0.45215553045272827, "learning_rate": 1.7748018335119935e-05, "loss": 0.42670729756355286, "step": 356 }, { "epoch": 0.48637602179836514, "grad_norm": 0.43753278255462646, "learning_rate": 1.7734042134827216e-05, "loss": 0.39761465787887573, "step": 357 }, { "epoch": 0.4877384196185286, "grad_norm": 0.42144203186035156, "learning_rate": 1.772002823844476e-05, "loss": 0.3738403916358948, "step": 358 }, { "epoch": 0.4891008174386921, "grad_norm": 0.4108579456806183, "learning_rate": 1.7705976714276976e-05, "loss": 0.3864634037017822, "step": 359 }, { "epoch": 0.4904632152588556, "grad_norm": 0.44009944796562195, "learning_rate": 1.7691887630811653e-05, "loss": 0.387514591217041, "step": 360 }, { "epoch": 0.49182561307901906, "grad_norm": 0.42644309997558594, "learning_rate": 1.7677761056719652e-05, "loss": 0.38349243998527527, "step": 361 }, { "epoch": 0.49318801089918257, "grad_norm": 0.4462713301181793, "learning_rate": 1.7663597060854577e-05, "loss": 0.43910130858421326, "step": 362 }, { "epoch": 0.494550408719346, "grad_norm": 0.4547002911567688, "learning_rate": 1.764939571225241e-05, "loss": 0.4280800223350525, "step": 363 }, { "epoch": 0.49591280653950953, "grad_norm": 0.3978780210018158, "learning_rate": 1.763515708013121e-05, "loss": 0.3507936894893646, "step": 364 }, { "epoch": 0.49727520435967304, "grad_norm": 0.3963427245616913, "learning_rate": 1.762088123389074e-05, "loss": 0.37120676040649414, "step": 365 }, { "epoch": 0.4986376021798365, "grad_norm": 0.4243077337741852, "learning_rate": 1.760656824311216e-05, "loss": 0.36520522832870483, "step": 366 }, { "epoch": 0.5, "grad_norm": 0.44183287024497986, "learning_rate": 1.7592218177557662e-05, "loss": 0.42573392391204834, "step": 367 }, { "epoch": 0.5013623978201635, "grad_norm": 0.4238261282444, "learning_rate": 1.7577831107170157e-05, "loss": 0.3345707952976227, "step": 368 }, { "epoch": 0.502724795640327, "grad_norm": 0.4145262539386749, "learning_rate": 1.7563407102072902e-05, "loss": 0.3537534475326538, "step": 369 }, { "epoch": 0.5040871934604905, "grad_norm": 0.4781807065010071, "learning_rate": 1.7548946232569196e-05, "loss": 0.4613100588321686, "step": 370 }, { "epoch": 0.5054495912806539, "grad_norm": 0.383511483669281, "learning_rate": 1.7534448569141997e-05, "loss": 0.33608487248420715, "step": 371 }, { "epoch": 0.5068119891008175, "grad_norm": 0.4402075409889221, "learning_rate": 1.751991418245361e-05, "loss": 0.4129033088684082, "step": 372 }, { "epoch": 0.5081743869209809, "grad_norm": 0.5239101052284241, "learning_rate": 1.7505343143345328e-05, "loss": 0.45621195435523987, "step": 373 }, { "epoch": 0.5095367847411444, "grad_norm": 0.4115491807460785, "learning_rate": 1.749073552283709e-05, "loss": 0.338983952999115, "step": 374 }, { "epoch": 0.510899182561308, "grad_norm": 0.4104604423046112, "learning_rate": 1.7476091392127132e-05, "loss": 0.34245091676712036, "step": 375 }, { "epoch": 0.5122615803814714, "grad_norm": 0.43850037455558777, "learning_rate": 1.746141082259165e-05, "loss": 0.40123671293258667, "step": 376 }, { "epoch": 0.5136239782016349, "grad_norm": 0.41533970832824707, "learning_rate": 1.7446693885784435e-05, "loss": 0.34971946477890015, "step": 377 }, { "epoch": 0.5149863760217984, "grad_norm": 0.36809873580932617, "learning_rate": 1.7431940653436538e-05, "loss": 0.3055441379547119, "step": 378 }, { "epoch": 0.5163487738419619, "grad_norm": 0.4054659605026245, "learning_rate": 1.7417151197455915e-05, "loss": 0.35166579484939575, "step": 379 }, { "epoch": 0.5177111716621253, "grad_norm": 0.435969740152359, "learning_rate": 1.740232558992708e-05, "loss": 0.3930160403251648, "step": 380 }, { "epoch": 0.5190735694822888, "grad_norm": 0.4018082916736603, "learning_rate": 1.738746390311075e-05, "loss": 0.3543049693107605, "step": 381 }, { "epoch": 0.5204359673024523, "grad_norm": 0.4188288450241089, "learning_rate": 1.7372566209443496e-05, "loss": 0.37953218817710876, "step": 382 }, { "epoch": 0.5217983651226158, "grad_norm": 0.4601037800312042, "learning_rate": 1.735763258153739e-05, "loss": 0.4313342571258545, "step": 383 }, { "epoch": 0.5231607629427792, "grad_norm": 0.43152326345443726, "learning_rate": 1.7342663092179636e-05, "loss": 0.41218316555023193, "step": 384 }, { "epoch": 0.5245231607629428, "grad_norm": 0.4087159037590027, "learning_rate": 1.7327657814332247e-05, "loss": 0.3378143906593323, "step": 385 }, { "epoch": 0.5258855585831063, "grad_norm": 0.39226964116096497, "learning_rate": 1.7312616821131657e-05, "loss": 0.3294611871242523, "step": 386 }, { "epoch": 0.5272479564032697, "grad_norm": 0.41971486806869507, "learning_rate": 1.729754018588838e-05, "loss": 0.37797778844833374, "step": 387 }, { "epoch": 0.5286103542234333, "grad_norm": 0.4497551918029785, "learning_rate": 1.728242798208666e-05, "loss": 0.38471484184265137, "step": 388 }, { "epoch": 0.5299727520435967, "grad_norm": 0.41503041982650757, "learning_rate": 1.7267280283384104e-05, "loss": 0.38631588220596313, "step": 389 }, { "epoch": 0.5313351498637602, "grad_norm": 0.4341152310371399, "learning_rate": 1.7252097163611304e-05, "loss": 0.43717920780181885, "step": 390 }, { "epoch": 0.5326975476839237, "grad_norm": 0.41421955823898315, "learning_rate": 1.723687869677152e-05, "loss": 0.3743841052055359, "step": 391 }, { "epoch": 0.5340599455040872, "grad_norm": 0.39457762241363525, "learning_rate": 1.7221624957040274e-05, "loss": 0.3561673164367676, "step": 392 }, { "epoch": 0.5354223433242506, "grad_norm": 0.38131183385849, "learning_rate": 1.7206336018765026e-05, "loss": 0.3137727975845337, "step": 393 }, { "epoch": 0.5367847411444142, "grad_norm": 0.3871758282184601, "learning_rate": 1.7191011956464788e-05, "loss": 0.35666629672050476, "step": 394 }, { "epoch": 0.5381471389645777, "grad_norm": 0.43537119030952454, "learning_rate": 1.717565284482977e-05, "loss": 0.42949140071868896, "step": 395 }, { "epoch": 0.5395095367847411, "grad_norm": 0.4189457893371582, "learning_rate": 1.7160258758721015e-05, "loss": 0.396271288394928, "step": 396 }, { "epoch": 0.5408719346049047, "grad_norm": 0.41827261447906494, "learning_rate": 1.714482977317003e-05, "loss": 0.4051450490951538, "step": 397 }, { "epoch": 0.5422343324250681, "grad_norm": 0.38875138759613037, "learning_rate": 1.7129365963378428e-05, "loss": 0.3301708698272705, "step": 398 }, { "epoch": 0.5435967302452316, "grad_norm": 0.42139869928359985, "learning_rate": 1.711386740471755e-05, "loss": 0.3770272433757782, "step": 399 }, { "epoch": 0.5449591280653951, "grad_norm": 0.4148419499397278, "learning_rate": 1.7098334172728112e-05, "loss": 0.37180018424987793, "step": 400 }, { "epoch": 0.5463215258855586, "grad_norm": 0.42452266812324524, "learning_rate": 1.7082766343119822e-05, "loss": 0.37390637397766113, "step": 401 }, { "epoch": 0.547683923705722, "grad_norm": 0.4145396053791046, "learning_rate": 1.706716399177103e-05, "loss": 0.3574928939342499, "step": 402 }, { "epoch": 0.5490463215258855, "grad_norm": 0.404379665851593, "learning_rate": 1.7051527194728343e-05, "loss": 0.3393360674381256, "step": 403 }, { "epoch": 0.5504087193460491, "grad_norm": 0.4394095242023468, "learning_rate": 1.703585602820624e-05, "loss": 0.38446563482284546, "step": 404 }, { "epoch": 0.5517711171662125, "grad_norm": 0.4012243449687958, "learning_rate": 1.7020150568586743e-05, "loss": 0.34150344133377075, "step": 405 }, { "epoch": 0.553133514986376, "grad_norm": 0.43355002999305725, "learning_rate": 1.7004410892419012e-05, "loss": 0.3841056227684021, "step": 406 }, { "epoch": 0.5544959128065395, "grad_norm": 0.4600158631801605, "learning_rate": 1.698863707641897e-05, "loss": 0.39545172452926636, "step": 407 }, { "epoch": 0.555858310626703, "grad_norm": 0.473522424697876, "learning_rate": 1.6972829197468958e-05, "loss": 0.4410251975059509, "step": 408 }, { "epoch": 0.5572207084468664, "grad_norm": 0.44125762581825256, "learning_rate": 1.695698733261732e-05, "loss": 0.3858538269996643, "step": 409 }, { "epoch": 0.55858310626703, "grad_norm": 0.5052700042724609, "learning_rate": 1.694111155907807e-05, "loss": 0.505725622177124, "step": 410 }, { "epoch": 0.5599455040871935, "grad_norm": 0.39854127168655396, "learning_rate": 1.6925201954230474e-05, "loss": 0.3284291625022888, "step": 411 }, { "epoch": 0.5613079019073569, "grad_norm": 0.4676287770271301, "learning_rate": 1.690925859561871e-05, "loss": 0.42946767807006836, "step": 412 }, { "epoch": 0.5626702997275205, "grad_norm": 0.4244855046272278, "learning_rate": 1.689328156095147e-05, "loss": 0.3870871663093567, "step": 413 }, { "epoch": 0.5640326975476839, "grad_norm": 0.41849035024642944, "learning_rate": 1.6877270928101573e-05, "loss": 0.37404653429985046, "step": 414 }, { "epoch": 0.5653950953678474, "grad_norm": 0.4176686406135559, "learning_rate": 1.6861226775105618e-05, "loss": 0.38222536444664, "step": 415 }, { "epoch": 0.5667574931880109, "grad_norm": 0.42130082845687866, "learning_rate": 1.684514918016356e-05, "loss": 0.3801380395889282, "step": 416 }, { "epoch": 0.5681198910081744, "grad_norm": 0.4545654058456421, "learning_rate": 1.6829038221638366e-05, "loss": 0.42598506808280945, "step": 417 }, { "epoch": 0.5694822888283378, "grad_norm": 0.4204128682613373, "learning_rate": 1.681289397805562e-05, "loss": 0.3880673944950104, "step": 418 }, { "epoch": 0.5708446866485014, "grad_norm": 0.40949374437332153, "learning_rate": 1.6796716528103127e-05, "loss": 0.3792712092399597, "step": 419 }, { "epoch": 0.5722070844686649, "grad_norm": 0.42425790429115295, "learning_rate": 1.6780505950630552e-05, "loss": 0.40029221773147583, "step": 420 }, { "epoch": 0.5735694822888283, "grad_norm": 0.449004203081131, "learning_rate": 1.6764262324649024e-05, "loss": 0.4227592647075653, "step": 421 }, { "epoch": 0.5749318801089919, "grad_norm": 0.3931463956832886, "learning_rate": 1.674798572933075e-05, "loss": 0.3561609387397766, "step": 422 }, { "epoch": 0.5762942779291553, "grad_norm": 0.4123283624649048, "learning_rate": 1.6731676244008622e-05, "loss": 0.3775140643119812, "step": 423 }, { "epoch": 0.5776566757493188, "grad_norm": 0.413714736700058, "learning_rate": 1.6715333948175857e-05, "loss": 0.3620632290840149, "step": 424 }, { "epoch": 0.5790190735694822, "grad_norm": 0.36583277583122253, "learning_rate": 1.6698958921485577e-05, "loss": 0.30589473247528076, "step": 425 }, { "epoch": 0.5803814713896458, "grad_norm": 0.4184879958629608, "learning_rate": 1.668255124375045e-05, "loss": 0.37757375836372375, "step": 426 }, { "epoch": 0.5817438692098093, "grad_norm": 0.4325942397117615, "learning_rate": 1.6666110994942274e-05, "loss": 0.3947750926017761, "step": 427 }, { "epoch": 0.5831062670299727, "grad_norm": 0.41197946667671204, "learning_rate": 1.6649638255191604e-05, "loss": 0.36544039845466614, "step": 428 }, { "epoch": 0.5844686648501363, "grad_norm": 0.4328736662864685, "learning_rate": 1.663313310478736e-05, "loss": 0.3924716114997864, "step": 429 }, { "epoch": 0.5858310626702997, "grad_norm": 0.4509877562522888, "learning_rate": 1.661659562417643e-05, "loss": 0.4020155370235443, "step": 430 }, { "epoch": 0.5871934604904632, "grad_norm": 0.4538882076740265, "learning_rate": 1.660002589396328e-05, "loss": 0.42874789237976074, "step": 431 }, { "epoch": 0.5885558583106267, "grad_norm": 0.4030280113220215, "learning_rate": 1.6583423994909573e-05, "loss": 0.36683404445648193, "step": 432 }, { "epoch": 0.5899182561307902, "grad_norm": 0.4076124131679535, "learning_rate": 1.6566790007933746e-05, "loss": 0.3436656594276428, "step": 433 }, { "epoch": 0.5912806539509536, "grad_norm": 0.454622358083725, "learning_rate": 1.6550124014110646e-05, "loss": 0.4208211302757263, "step": 434 }, { "epoch": 0.5926430517711172, "grad_norm": 0.422625869512558, "learning_rate": 1.6533426094671125e-05, "loss": 0.3721390664577484, "step": 435 }, { "epoch": 0.5940054495912807, "grad_norm": 0.35872289538383484, "learning_rate": 1.651669633100163e-05, "loss": 0.29117974638938904, "step": 436 }, { "epoch": 0.5953678474114441, "grad_norm": 0.36502501368522644, "learning_rate": 1.6499934804643838e-05, "loss": 0.2901703119277954, "step": 437 }, { "epoch": 0.5967302452316077, "grad_norm": 0.412866473197937, "learning_rate": 1.6483141597294214e-05, "loss": 0.31686800718307495, "step": 438 }, { "epoch": 0.5980926430517711, "grad_norm": 0.37948358058929443, "learning_rate": 1.646631679080366e-05, "loss": 0.3297507166862488, "step": 439 }, { "epoch": 0.5994550408719346, "grad_norm": 0.4186059832572937, "learning_rate": 1.6449460467177078e-05, "loss": 0.37851136922836304, "step": 440 }, { "epoch": 0.6008174386920981, "grad_norm": 0.3846619427204132, "learning_rate": 1.6432572708572997e-05, "loss": 0.3128829002380371, "step": 441 }, { "epoch": 0.6021798365122616, "grad_norm": 0.4310852289199829, "learning_rate": 1.641565359730315e-05, "loss": 0.39028769731521606, "step": 442 }, { "epoch": 0.603542234332425, "grad_norm": 0.4645153880119324, "learning_rate": 1.6398703215832097e-05, "loss": 0.428106427192688, "step": 443 }, { "epoch": 0.6049046321525886, "grad_norm": 0.4289141595363617, "learning_rate": 1.6381721646776805e-05, "loss": 0.3838496208190918, "step": 444 }, { "epoch": 0.6062670299727521, "grad_norm": 0.381273478269577, "learning_rate": 1.6364708972906246e-05, "loss": 0.3133726119995117, "step": 445 }, { "epoch": 0.6076294277929155, "grad_norm": 0.4178448021411896, "learning_rate": 1.6347665277141005e-05, "loss": 0.37862086296081543, "step": 446 }, { "epoch": 0.6089918256130791, "grad_norm": 0.42201322317123413, "learning_rate": 1.6330590642552867e-05, "loss": 0.39746665954589844, "step": 447 }, { "epoch": 0.6103542234332425, "grad_norm": 0.43660175800323486, "learning_rate": 1.6313485152364417e-05, "loss": 0.3727680444717407, "step": 448 }, { "epoch": 0.611716621253406, "grad_norm": 0.4348539710044861, "learning_rate": 1.6296348889948627e-05, "loss": 0.386578232049942, "step": 449 }, { "epoch": 0.6130790190735694, "grad_norm": 0.4908278286457062, "learning_rate": 1.627918193882845e-05, "loss": 0.45541319251060486, "step": 450 }, { "epoch": 0.614441416893733, "grad_norm": 0.46335548162460327, "learning_rate": 1.6261984382676432e-05, "loss": 0.4110366702079773, "step": 451 }, { "epoch": 0.6158038147138964, "grad_norm": 0.4220646917819977, "learning_rate": 1.624475630531428e-05, "loss": 0.35985416173934937, "step": 452 }, { "epoch": 0.6171662125340599, "grad_norm": 0.4713914096355438, "learning_rate": 1.6227497790712458e-05, "loss": 0.4303567111492157, "step": 453 }, { "epoch": 0.6185286103542235, "grad_norm": 0.4228816628456116, "learning_rate": 1.621020892298979e-05, "loss": 0.35945630073547363, "step": 454 }, { "epoch": 0.6198910081743869, "grad_norm": 0.3780403435230255, "learning_rate": 1.6192889786413048e-05, "loss": 0.32220372557640076, "step": 455 }, { "epoch": 0.6212534059945504, "grad_norm": 0.4096536338329315, "learning_rate": 1.617554046539652e-05, "loss": 0.3749344050884247, "step": 456 }, { "epoch": 0.6226158038147139, "grad_norm": 0.4119769334793091, "learning_rate": 1.6158161044501624e-05, "loss": 0.38287466764450073, "step": 457 }, { "epoch": 0.6239782016348774, "grad_norm": 0.39846253395080566, "learning_rate": 1.6140751608436487e-05, "loss": 0.3293435275554657, "step": 458 }, { "epoch": 0.6253405994550408, "grad_norm": 0.4839153289794922, "learning_rate": 1.6123312242055533e-05, "loss": 0.49237170815467834, "step": 459 }, { "epoch": 0.6267029972752044, "grad_norm": 0.345336377620697, "learning_rate": 1.6105843030359055e-05, "loss": 0.2600591778755188, "step": 460 }, { "epoch": 0.6280653950953679, "grad_norm": 0.36076706647872925, "learning_rate": 1.6088344058492836e-05, "loss": 0.3101092576980591, "step": 461 }, { "epoch": 0.6294277929155313, "grad_norm": 0.4160480499267578, "learning_rate": 1.6070815411747686e-05, "loss": 0.38243746757507324, "step": 462 }, { "epoch": 0.6307901907356949, "grad_norm": 0.35517603158950806, "learning_rate": 1.6053257175559074e-05, "loss": 0.28862473368644714, "step": 463 }, { "epoch": 0.6321525885558583, "grad_norm": 0.3878868520259857, "learning_rate": 1.6035669435506674e-05, "loss": 0.3289714455604553, "step": 464 }, { "epoch": 0.6335149863760218, "grad_norm": 0.42289090156555176, "learning_rate": 1.6018052277313966e-05, "loss": 0.3916146755218506, "step": 465 }, { "epoch": 0.6348773841961853, "grad_norm": 0.40691617131233215, "learning_rate": 1.600040578684782e-05, "loss": 0.38257652521133423, "step": 466 }, { "epoch": 0.6362397820163488, "grad_norm": 0.3280545771121979, "learning_rate": 1.598273005011808e-05, "loss": 0.26138222217559814, "step": 467 }, { "epoch": 0.6376021798365122, "grad_norm": 0.4141409695148468, "learning_rate": 1.5965025153277112e-05, "loss": 0.35480794310569763, "step": 468 }, { "epoch": 0.6389645776566758, "grad_norm": 0.4675044119358063, "learning_rate": 1.5947291182619444e-05, "loss": 0.4690595865249634, "step": 469 }, { "epoch": 0.6403269754768393, "grad_norm": 0.3967062830924988, "learning_rate": 1.5929528224581283e-05, "loss": 0.34801554679870605, "step": 470 }, { "epoch": 0.6416893732970027, "grad_norm": 0.43718570470809937, "learning_rate": 1.5911736365740133e-05, "loss": 0.3724061846733093, "step": 471 }, { "epoch": 0.6430517711171662, "grad_norm": 0.45367127656936646, "learning_rate": 1.5893915692814365e-05, "loss": 0.4154523015022278, "step": 472 }, { "epoch": 0.6444141689373297, "grad_norm": 0.41580531001091003, "learning_rate": 1.5876066292662784e-05, "loss": 0.3685305714607239, "step": 473 }, { "epoch": 0.6457765667574932, "grad_norm": 0.3875036835670471, "learning_rate": 1.585818825228422e-05, "loss": 0.33631476759910583, "step": 474 }, { "epoch": 0.6471389645776566, "grad_norm": 0.3880947232246399, "learning_rate": 1.5840281658817093e-05, "loss": 0.3135243058204651, "step": 475 }, { "epoch": 0.6485013623978202, "grad_norm": 0.4160878658294678, "learning_rate": 1.582234659953899e-05, "loss": 0.3561175763607025, "step": 476 }, { "epoch": 0.6498637602179836, "grad_norm": 0.427616149187088, "learning_rate": 1.5804383161866245e-05, "loss": 0.39607974886894226, "step": 477 }, { "epoch": 0.6512261580381471, "grad_norm": 0.4082714319229126, "learning_rate": 1.5786391433353508e-05, "loss": 0.35210445523262024, "step": 478 }, { "epoch": 0.6525885558583107, "grad_norm": 0.41783925890922546, "learning_rate": 1.5768371501693326e-05, "loss": 0.376731276512146, "step": 479 }, { "epoch": 0.6539509536784741, "grad_norm": 0.37993308901786804, "learning_rate": 1.5750323454715696e-05, "loss": 0.322902649641037, "step": 480 }, { "epoch": 0.6553133514986376, "grad_norm": 0.4466570317745209, "learning_rate": 1.5732247380387664e-05, "loss": 0.39785605669021606, "step": 481 }, { "epoch": 0.6566757493188011, "grad_norm": 0.40761619806289673, "learning_rate": 1.5714143366812876e-05, "loss": 0.3618497848510742, "step": 482 }, { "epoch": 0.6580381471389646, "grad_norm": 0.3778402507305145, "learning_rate": 1.5696011502231158e-05, "loss": 0.339969664812088, "step": 483 }, { "epoch": 0.659400544959128, "grad_norm": 0.4464191496372223, "learning_rate": 1.5677851875018076e-05, "loss": 0.43336886167526245, "step": 484 }, { "epoch": 0.6607629427792916, "grad_norm": 0.3769300878047943, "learning_rate": 1.565966457368453e-05, "loss": 0.31399497389793396, "step": 485 }, { "epoch": 0.662125340599455, "grad_norm": 0.39209413528442383, "learning_rate": 1.564144968687628e-05, "loss": 0.34925514459609985, "step": 486 }, { "epoch": 0.6634877384196185, "grad_norm": 0.44385579228401184, "learning_rate": 1.5623207303373553e-05, "loss": 0.4195161461830139, "step": 487 }, { "epoch": 0.6648501362397821, "grad_norm": 0.44335752725601196, "learning_rate": 1.5604937512090602e-05, "loss": 0.394603431224823, "step": 488 }, { "epoch": 0.6662125340599455, "grad_norm": 0.41506636142730713, "learning_rate": 1.5586640402075258e-05, "loss": 0.39139121770858765, "step": 489 }, { "epoch": 0.667574931880109, "grad_norm": 0.4141143262386322, "learning_rate": 1.5568316062508502e-05, "loss": 0.3790861666202545, "step": 490 }, { "epoch": 0.6689373297002725, "grad_norm": 0.4253380298614502, "learning_rate": 1.5549964582704044e-05, "loss": 0.38978779315948486, "step": 491 }, { "epoch": 0.670299727520436, "grad_norm": 0.38304126262664795, "learning_rate": 1.5531586052107868e-05, "loss": 0.3342415690422058, "step": 492 }, { "epoch": 0.6716621253405994, "grad_norm": 0.4576060175895691, "learning_rate": 1.5513180560297808e-05, "loss": 0.42615556716918945, "step": 493 }, { "epoch": 0.6730245231607629, "grad_norm": 0.4253360331058502, "learning_rate": 1.5494748196983106e-05, "loss": 0.4119224548339844, "step": 494 }, { "epoch": 0.6743869209809265, "grad_norm": 0.4528999626636505, "learning_rate": 1.547628905200398e-05, "loss": 0.49085426330566406, "step": 495 }, { "epoch": 0.6757493188010899, "grad_norm": 0.4398576617240906, "learning_rate": 1.5457803215331182e-05, "loss": 0.40197885036468506, "step": 496 }, { "epoch": 0.6771117166212534, "grad_norm": 0.4078048765659332, "learning_rate": 1.5439290777065558e-05, "loss": 0.35455724596977234, "step": 497 }, { "epoch": 0.6784741144414169, "grad_norm": 0.41575735807418823, "learning_rate": 1.542075182743762e-05, "loss": 0.3819142282009125, "step": 498 }, { "epoch": 0.6798365122615804, "grad_norm": 0.3810281753540039, "learning_rate": 1.5402186456807086e-05, "loss": 0.330873966217041, "step": 499 }, { "epoch": 0.6811989100817438, "grad_norm": 0.3949718773365021, "learning_rate": 1.5383594755662453e-05, "loss": 0.34446001052856445, "step": 500 }, { "epoch": 0.6825613079019074, "grad_norm": 0.3613286316394806, "learning_rate": 1.5364976814620568e-05, "loss": 0.3226144313812256, "step": 501 }, { "epoch": 0.6839237057220708, "grad_norm": 0.3470703661441803, "learning_rate": 1.5346332724426155e-05, "loss": 0.29430314898490906, "step": 502 }, { "epoch": 0.6852861035422343, "grad_norm": 0.3816620111465454, "learning_rate": 1.5327662575951404e-05, "loss": 0.3283197283744812, "step": 503 }, { "epoch": 0.6866485013623979, "grad_norm": 0.391053169965744, "learning_rate": 1.5308966460195503e-05, "loss": 0.34893274307250977, "step": 504 }, { "epoch": 0.6880108991825613, "grad_norm": 0.4378596544265747, "learning_rate": 1.5290244468284206e-05, "loss": 0.40730422735214233, "step": 505 }, { "epoch": 0.6893732970027248, "grad_norm": 0.4042869210243225, "learning_rate": 1.5271496691469404e-05, "loss": 0.3062353730201721, "step": 506 }, { "epoch": 0.6907356948228883, "grad_norm": 0.42129504680633545, "learning_rate": 1.525272322112865e-05, "loss": 0.39751139283180237, "step": 507 }, { "epoch": 0.6920980926430518, "grad_norm": 0.42566022276878357, "learning_rate": 1.5233924148764727e-05, "loss": 0.3925180435180664, "step": 508 }, { "epoch": 0.6934604904632152, "grad_norm": 0.3715537488460541, "learning_rate": 1.5215099566005217e-05, "loss": 0.31566864252090454, "step": 509 }, { "epoch": 0.6948228882833788, "grad_norm": 0.45559632778167725, "learning_rate": 1.519624956460203e-05, "loss": 0.4103913903236389, "step": 510 }, { "epoch": 0.6961852861035422, "grad_norm": 0.34860438108444214, "learning_rate": 1.517737423643097e-05, "loss": 0.2865496277809143, "step": 511 }, { "epoch": 0.6975476839237057, "grad_norm": 0.44315922260284424, "learning_rate": 1.5158473673491285e-05, "loss": 0.4178670048713684, "step": 512 }, { "epoch": 0.6989100817438693, "grad_norm": 0.4062572419643402, "learning_rate": 1.5139547967905221e-05, "loss": 0.34824541211128235, "step": 513 }, { "epoch": 0.7002724795640327, "grad_norm": 0.42972540855407715, "learning_rate": 1.5120597211917564e-05, "loss": 0.40658995509147644, "step": 514 }, { "epoch": 0.7016348773841962, "grad_norm": 0.4179152846336365, "learning_rate": 1.510162149789521e-05, "loss": 0.38474875688552856, "step": 515 }, { "epoch": 0.7029972752043597, "grad_norm": 0.41898468136787415, "learning_rate": 1.5082620918326685e-05, "loss": 0.3936446011066437, "step": 516 }, { "epoch": 0.7043596730245232, "grad_norm": 0.4191248416900635, "learning_rate": 1.5063595565821721e-05, "loss": 0.3933945298194885, "step": 517 }, { "epoch": 0.7057220708446866, "grad_norm": 0.45913466811180115, "learning_rate": 1.5044545533110793e-05, "loss": 0.38266128301620483, "step": 518 }, { "epoch": 0.7070844686648501, "grad_norm": 0.4441032409667969, "learning_rate": 1.5025470913044666e-05, "loss": 0.4108320474624634, "step": 519 }, { "epoch": 0.7084468664850136, "grad_norm": 0.42410925030708313, "learning_rate": 1.5006371798593948e-05, "loss": 0.4113953709602356, "step": 520 }, { "epoch": 0.7098092643051771, "grad_norm": 0.4308149218559265, "learning_rate": 1.4987248282848637e-05, "loss": 0.3980264961719513, "step": 521 }, { "epoch": 0.7111716621253406, "grad_norm": 0.44944408535957336, "learning_rate": 1.4968100459017652e-05, "loss": 0.36318397521972656, "step": 522 }, { "epoch": 0.7125340599455041, "grad_norm": 0.39412856101989746, "learning_rate": 1.4948928420428403e-05, "loss": 0.3426111042499542, "step": 523 }, { "epoch": 0.7138964577656676, "grad_norm": 0.35763245820999146, "learning_rate": 1.4929732260526318e-05, "loss": 0.28396356105804443, "step": 524 }, { "epoch": 0.715258855585831, "grad_norm": 0.4681552052497864, "learning_rate": 1.4910512072874395e-05, "loss": 0.4816880226135254, "step": 525 }, { "epoch": 0.7166212534059946, "grad_norm": 0.38465479016304016, "learning_rate": 1.489126795115274e-05, "loss": 0.31631630659103394, "step": 526 }, { "epoch": 0.717983651226158, "grad_norm": 0.700157880783081, "learning_rate": 1.4871999989158123e-05, "loss": 0.3877072334289551, "step": 527 }, { "epoch": 0.7193460490463215, "grad_norm": 0.36922982335090637, "learning_rate": 1.4852708280803512e-05, "loss": 0.33346259593963623, "step": 528 }, { "epoch": 0.720708446866485, "grad_norm": 0.4037865698337555, "learning_rate": 1.4833392920117607e-05, "loss": 0.3595266342163086, "step": 529 }, { "epoch": 0.7220708446866485, "grad_norm": 0.38142573833465576, "learning_rate": 1.4814054001244395e-05, "loss": 0.31460440158843994, "step": 530 }, { "epoch": 0.723433242506812, "grad_norm": 0.3785574436187744, "learning_rate": 1.4794691618442691e-05, "loss": 0.30783623456954956, "step": 531 }, { "epoch": 0.7247956403269755, "grad_norm": 0.44186413288116455, "learning_rate": 1.477530586608567e-05, "loss": 0.41240638494491577, "step": 532 }, { "epoch": 0.726158038147139, "grad_norm": 0.3977665603160858, "learning_rate": 1.4755896838660412e-05, "loss": 0.32479894161224365, "step": 533 }, { "epoch": 0.7275204359673024, "grad_norm": 0.40463075041770935, "learning_rate": 1.4736464630767442e-05, "loss": 0.3699343502521515, "step": 534 }, { "epoch": 0.728882833787466, "grad_norm": 0.41050946712493896, "learning_rate": 1.4717009337120268e-05, "loss": 0.3829270005226135, "step": 535 }, { "epoch": 0.7302452316076294, "grad_norm": 0.4132955074310303, "learning_rate": 1.4697531052544914e-05, "loss": 0.34740668535232544, "step": 536 }, { "epoch": 0.7316076294277929, "grad_norm": 0.43629321455955505, "learning_rate": 1.4678029871979469e-05, "loss": 0.3730025589466095, "step": 537 }, { "epoch": 0.7329700272479565, "grad_norm": 0.44061553478240967, "learning_rate": 1.4658505890473615e-05, "loss": 0.40129199624061584, "step": 538 }, { "epoch": 0.7343324250681199, "grad_norm": 0.4008716642856598, "learning_rate": 1.463895920318817e-05, "loss": 0.3489900827407837, "step": 539 }, { "epoch": 0.7356948228882834, "grad_norm": 0.3535612225532532, "learning_rate": 1.4619389905394616e-05, "loss": 0.2988109290599823, "step": 540 }, { "epoch": 0.7370572207084468, "grad_norm": 0.4381372630596161, "learning_rate": 1.4599798092474646e-05, "loss": 0.3608015775680542, "step": 541 }, { "epoch": 0.7384196185286104, "grad_norm": 0.3945034444332123, "learning_rate": 1.4580183859919686e-05, "loss": 0.32994401454925537, "step": 542 }, { "epoch": 0.7397820163487738, "grad_norm": 0.41232776641845703, "learning_rate": 1.4560547303330441e-05, "loss": 0.38900381326675415, "step": 543 }, { "epoch": 0.7411444141689373, "grad_norm": 0.38951003551483154, "learning_rate": 1.4540888518416423e-05, "loss": 0.3017880320549011, "step": 544 }, { "epoch": 0.7425068119891008, "grad_norm": 0.3949369490146637, "learning_rate": 1.4521207600995487e-05, "loss": 0.32867276668548584, "step": 545 }, { "epoch": 0.7438692098092643, "grad_norm": 0.3731197416782379, "learning_rate": 1.4501504646993358e-05, "loss": 0.32244962453842163, "step": 546 }, { "epoch": 0.7452316076294278, "grad_norm": 0.42670756578445435, "learning_rate": 1.4481779752443177e-05, "loss": 0.34767279028892517, "step": 547 }, { "epoch": 0.7465940054495913, "grad_norm": 0.4462198317050934, "learning_rate": 1.446203301348502e-05, "loss": 0.40442246198654175, "step": 548 }, { "epoch": 0.7479564032697548, "grad_norm": 0.43297913670539856, "learning_rate": 1.4442264526365425e-05, "loss": 0.39974820613861084, "step": 549 }, { "epoch": 0.7493188010899182, "grad_norm": 0.38846778869628906, "learning_rate": 1.4422474387436951e-05, "loss": 0.3305945098400116, "step": 550 }, { "epoch": 0.7506811989100818, "grad_norm": 0.3659208118915558, "learning_rate": 1.4402662693157672e-05, "loss": 0.28628021478652954, "step": 551 }, { "epoch": 0.7520435967302452, "grad_norm": 0.4232732653617859, "learning_rate": 1.4382829540090728e-05, "loss": 0.39836883544921875, "step": 552 }, { "epoch": 0.7534059945504087, "grad_norm": 0.3981814682483673, "learning_rate": 1.4362975024903854e-05, "loss": 0.3883022964000702, "step": 553 }, { "epoch": 0.7547683923705722, "grad_norm": 0.37059178948402405, "learning_rate": 1.43430992443689e-05, "loss": 0.31552067399024963, "step": 554 }, { "epoch": 0.7561307901907357, "grad_norm": 0.5106791257858276, "learning_rate": 1.4323202295361375e-05, "loss": 0.3364841043949127, "step": 555 }, { "epoch": 0.7574931880108992, "grad_norm": 0.3709793984889984, "learning_rate": 1.4303284274859947e-05, "loss": 0.3160533010959625, "step": 556 }, { "epoch": 0.7588555858310627, "grad_norm": 0.38688817620277405, "learning_rate": 1.4283345279946e-05, "loss": 0.3377053439617157, "step": 557 }, { "epoch": 0.7602179836512262, "grad_norm": 0.3955825865268707, "learning_rate": 1.4263385407803147e-05, "loss": 0.367174357175827, "step": 558 }, { "epoch": 0.7615803814713896, "grad_norm": 0.387660413980484, "learning_rate": 1.424340475571675e-05, "loss": 0.355742484331131, "step": 559 }, { "epoch": 0.7629427792915532, "grad_norm": 0.4117288589477539, "learning_rate": 1.4223403421073465e-05, "loss": 0.3632027506828308, "step": 560 }, { "epoch": 0.7643051771117166, "grad_norm": 0.3913569748401642, "learning_rate": 1.4203381501360746e-05, "loss": 0.34550565481185913, "step": 561 }, { "epoch": 0.7656675749318801, "grad_norm": 0.3794730305671692, "learning_rate": 1.4183339094166386e-05, "loss": 0.32455602288246155, "step": 562 }, { "epoch": 0.7670299727520435, "grad_norm": 0.4123244881629944, "learning_rate": 1.416327629717803e-05, "loss": 0.33050400018692017, "step": 563 }, { "epoch": 0.7683923705722071, "grad_norm": 0.5458968877792358, "learning_rate": 1.4143193208182705e-05, "loss": 0.33752286434173584, "step": 564 }, { "epoch": 0.7697547683923706, "grad_norm": 0.3934307396411896, "learning_rate": 1.4123089925066347e-05, "loss": 0.31893715262413025, "step": 565 }, { "epoch": 0.771117166212534, "grad_norm": 0.4280702471733093, "learning_rate": 1.4102966545813312e-05, "loss": 0.38816317915916443, "step": 566 }, { "epoch": 0.7724795640326976, "grad_norm": 0.38977575302124023, "learning_rate": 1.4082823168505912e-05, "loss": 0.3048614263534546, "step": 567 }, { "epoch": 0.773841961852861, "grad_norm": 0.4607933461666107, "learning_rate": 1.4062659891323927e-05, "loss": 0.3881381154060364, "step": 568 }, { "epoch": 0.7752043596730245, "grad_norm": 0.4132702350616455, "learning_rate": 1.4042476812544128e-05, "loss": 0.367891788482666, "step": 569 }, { "epoch": 0.776566757493188, "grad_norm": 0.36173874139785767, "learning_rate": 1.4022274030539802e-05, "loss": 0.2838096618652344, "step": 570 }, { "epoch": 0.7779291553133515, "grad_norm": 0.3847193121910095, "learning_rate": 1.4002051643780275e-05, "loss": 0.31407347321510315, "step": 571 }, { "epoch": 0.779291553133515, "grad_norm": 0.3895076811313629, "learning_rate": 1.398180975083042e-05, "loss": 0.3542616367340088, "step": 572 }, { "epoch": 0.7806539509536785, "grad_norm": 0.4060671329498291, "learning_rate": 1.3961548450350184e-05, "loss": 0.3690754175186157, "step": 573 }, { "epoch": 0.782016348773842, "grad_norm": 0.3732289671897888, "learning_rate": 1.3941267841094118e-05, "loss": 0.3208252191543579, "step": 574 }, { "epoch": 0.7833787465940054, "grad_norm": 0.4188457429409027, "learning_rate": 1.3920968021910872e-05, "loss": 0.3940437436103821, "step": 575 }, { "epoch": 0.784741144414169, "grad_norm": 0.3824058175086975, "learning_rate": 1.3900649091742734e-05, "loss": 0.3468947410583496, "step": 576 }, { "epoch": 0.7861035422343324, "grad_norm": 0.36254164576530457, "learning_rate": 1.3880311149625141e-05, "loss": 0.3099260926246643, "step": 577 }, { "epoch": 0.7874659400544959, "grad_norm": 0.34511882066726685, "learning_rate": 1.3859954294686185e-05, "loss": 0.2614838778972626, "step": 578 }, { "epoch": 0.7888283378746594, "grad_norm": 0.42470481991767883, "learning_rate": 1.3839578626146143e-05, "loss": 0.36970824003219604, "step": 579 }, { "epoch": 0.7901907356948229, "grad_norm": 0.43448764085769653, "learning_rate": 1.3819184243317008e-05, "loss": 0.38234907388687134, "step": 580 }, { "epoch": 0.7915531335149864, "grad_norm": 0.41992104053497314, "learning_rate": 1.3798771245601961e-05, "loss": 0.3760548233985901, "step": 581 }, { "epoch": 0.7929155313351499, "grad_norm": 0.3761197328567505, "learning_rate": 1.3778339732494933e-05, "loss": 0.31548407673835754, "step": 582 }, { "epoch": 0.7942779291553134, "grad_norm": 0.40058112144470215, "learning_rate": 1.3757889803580085e-05, "loss": 0.3305339515209198, "step": 583 }, { "epoch": 0.7956403269754768, "grad_norm": 0.4397111237049103, "learning_rate": 1.373742155853135e-05, "loss": 0.39684200286865234, "step": 584 }, { "epoch": 0.7970027247956403, "grad_norm": 0.3730669319629669, "learning_rate": 1.3716935097111926e-05, "loss": 0.30914586782455444, "step": 585 }, { "epoch": 0.7983651226158038, "grad_norm": 0.3845134377479553, "learning_rate": 1.3696430519173802e-05, "loss": 0.3142814040184021, "step": 586 }, { "epoch": 0.7997275204359673, "grad_norm": 0.4052923619747162, "learning_rate": 1.367590792465727e-05, "loss": 0.33971768617630005, "step": 587 }, { "epoch": 0.8010899182561307, "grad_norm": 0.35842764377593994, "learning_rate": 1.3655367413590433e-05, "loss": 0.3152307868003845, "step": 588 }, { "epoch": 0.8024523160762943, "grad_norm": 0.41765162348747253, "learning_rate": 1.3634809086088715e-05, "loss": 0.36689841747283936, "step": 589 }, { "epoch": 0.8038147138964578, "grad_norm": 0.3411005735397339, "learning_rate": 1.361423304235439e-05, "loss": 0.2985839545726776, "step": 590 }, { "epoch": 0.8051771117166212, "grad_norm": 0.38796186447143555, "learning_rate": 1.359363938267607e-05, "loss": 0.31117764115333557, "step": 591 }, { "epoch": 0.8065395095367848, "grad_norm": 0.4477858543395996, "learning_rate": 1.3573028207428239e-05, "loss": 0.3759269714355469, "step": 592 }, { "epoch": 0.8079019073569482, "grad_norm": 0.3874566853046417, "learning_rate": 1.3552399617070742e-05, "loss": 0.35220903158187866, "step": 593 }, { "epoch": 0.8092643051771117, "grad_norm": 0.3966315686702728, "learning_rate": 1.3531753712148312e-05, "loss": 0.327017605304718, "step": 594 }, { "epoch": 0.8106267029972752, "grad_norm": 0.38722142577171326, "learning_rate": 1.3511090593290073e-05, "loss": 0.3531439006328583, "step": 595 }, { "epoch": 0.8119891008174387, "grad_norm": 0.35546261072158813, "learning_rate": 1.3490410361209051e-05, "loss": 0.29371026158332825, "step": 596 }, { "epoch": 0.8133514986376021, "grad_norm": 0.4084646701812744, "learning_rate": 1.3469713116701683e-05, "loss": 0.3657349944114685, "step": 597 }, { "epoch": 0.8147138964577657, "grad_norm": 0.3828872740268707, "learning_rate": 1.3448998960647324e-05, "loss": 0.27304738759994507, "step": 598 }, { "epoch": 0.8160762942779292, "grad_norm": 0.40250927209854126, "learning_rate": 1.3428267994007756e-05, "loss": 0.36223679780960083, "step": 599 }, { "epoch": 0.8174386920980926, "grad_norm": 0.38634181022644043, "learning_rate": 1.3407520317826697e-05, "loss": 0.32051095366477966, "step": 600 }, { "epoch": 0.8188010899182562, "grad_norm": 0.3875936269760132, "learning_rate": 1.3386756033229314e-05, "loss": 0.31921273469924927, "step": 601 }, { "epoch": 0.8201634877384196, "grad_norm": 0.4128807783126831, "learning_rate": 1.3365975241421712e-05, "loss": 0.35329893231391907, "step": 602 }, { "epoch": 0.8215258855585831, "grad_norm": 0.3846687376499176, "learning_rate": 1.3345178043690463e-05, "loss": 0.3306158185005188, "step": 603 }, { "epoch": 0.8228882833787466, "grad_norm": 0.37342211604118347, "learning_rate": 1.3324364541402102e-05, "loss": 0.29243338108062744, "step": 604 }, { "epoch": 0.8242506811989101, "grad_norm": 0.4314493238925934, "learning_rate": 1.3303534836002629e-05, "loss": 0.3962687849998474, "step": 605 }, { "epoch": 0.8256130790190735, "grad_norm": 0.41795799136161804, "learning_rate": 1.328268902901702e-05, "loss": 0.3593180775642395, "step": 606 }, { "epoch": 0.8269754768392371, "grad_norm": 0.41061070561408997, "learning_rate": 1.326182722204873e-05, "loss": 0.3891078233718872, "step": 607 }, { "epoch": 0.8283378746594006, "grad_norm": 0.3990163803100586, "learning_rate": 1.32409495167792e-05, "loss": 0.3603130578994751, "step": 608 }, { "epoch": 0.829700272479564, "grad_norm": 0.37110787630081177, "learning_rate": 1.3220056014967359e-05, "loss": 0.3168402910232544, "step": 609 }, { "epoch": 0.8310626702997275, "grad_norm": 0.42547863721847534, "learning_rate": 1.3199146818449134e-05, "loss": 0.4026668667793274, "step": 610 }, { "epoch": 0.832425068119891, "grad_norm": 0.47038719058036804, "learning_rate": 1.317822202913694e-05, "loss": 0.44461554288864136, "step": 611 }, { "epoch": 0.8337874659400545, "grad_norm": 0.38207799196243286, "learning_rate": 1.3157281749019199e-05, "loss": 0.3126547336578369, "step": 612 }, { "epoch": 0.8351498637602179, "grad_norm": 0.3733403980731964, "learning_rate": 1.3136326080159836e-05, "loss": 0.31984227895736694, "step": 613 }, { "epoch": 0.8365122615803815, "grad_norm": 0.37421712279319763, "learning_rate": 1.3115355124697775e-05, "loss": 0.3349495232105255, "step": 614 }, { "epoch": 0.837874659400545, "grad_norm": 0.4028952121734619, "learning_rate": 1.3094368984846453e-05, "loss": 0.3623151183128357, "step": 615 }, { "epoch": 0.8392370572207084, "grad_norm": 0.3345983028411865, "learning_rate": 1.3073367762893316e-05, "loss": 0.25584784150123596, "step": 616 }, { "epoch": 0.840599455040872, "grad_norm": 0.4131789803504944, "learning_rate": 1.3052351561199321e-05, "loss": 0.36740627884864807, "step": 617 }, { "epoch": 0.8419618528610354, "grad_norm": 0.38671812415122986, "learning_rate": 1.3031320482198433e-05, "loss": 0.32881784439086914, "step": 618 }, { "epoch": 0.8433242506811989, "grad_norm": 0.43707364797592163, "learning_rate": 1.3010274628397137e-05, "loss": 0.4240247905254364, "step": 619 }, { "epoch": 0.8446866485013624, "grad_norm": 0.37011024355888367, "learning_rate": 1.298921410237392e-05, "loss": 0.3215616047382355, "step": 620 }, { "epoch": 0.8460490463215259, "grad_norm": 0.40730512142181396, "learning_rate": 1.2968139006778797e-05, "loss": 0.3768579661846161, "step": 621 }, { "epoch": 0.8474114441416893, "grad_norm": 0.43396347761154175, "learning_rate": 1.2947049444332782e-05, "loss": 0.41485506296157837, "step": 622 }, { "epoch": 0.8487738419618529, "grad_norm": 0.43405383825302124, "learning_rate": 1.292594551782741e-05, "loss": 0.40304839611053467, "step": 623 }, { "epoch": 0.8501362397820164, "grad_norm": 0.44254007935523987, "learning_rate": 1.2904827330124223e-05, "loss": 0.39402496814727783, "step": 624 }, { "epoch": 0.8514986376021798, "grad_norm": 0.36872732639312744, "learning_rate": 1.2883694984154273e-05, "loss": 0.3053838312625885, "step": 625 }, { "epoch": 0.8528610354223434, "grad_norm": 0.4020664095878601, "learning_rate": 1.2862548582917622e-05, "loss": 0.34385716915130615, "step": 626 }, { "epoch": 0.8542234332425068, "grad_norm": 0.37539398670196533, "learning_rate": 1.2841388229482834e-05, "loss": 0.32522130012512207, "step": 627 }, { "epoch": 0.8555858310626703, "grad_norm": 0.4901396632194519, "learning_rate": 1.2820214026986481e-05, "loss": 0.4514284133911133, "step": 628 }, { "epoch": 0.8569482288828338, "grad_norm": 0.42575520277023315, "learning_rate": 1.2799026078632638e-05, "loss": 0.3785257935523987, "step": 629 }, { "epoch": 0.8583106267029973, "grad_norm": 0.4292484223842621, "learning_rate": 1.2777824487692373e-05, "loss": 0.37280866503715515, "step": 630 }, { "epoch": 0.8596730245231607, "grad_norm": 0.38012853264808655, "learning_rate": 1.2756609357503248e-05, "loss": 0.3435444235801697, "step": 631 }, { "epoch": 0.8610354223433242, "grad_norm": 0.37119728326797485, "learning_rate": 1.2735380791468814e-05, "loss": 0.315696656703949, "step": 632 }, { "epoch": 0.8623978201634878, "grad_norm": 0.4296729564666748, "learning_rate": 1.271413889305812e-05, "loss": 0.3677264451980591, "step": 633 }, { "epoch": 0.8637602179836512, "grad_norm": 0.43633759021759033, "learning_rate": 1.2692883765805188e-05, "loss": 0.42795848846435547, "step": 634 }, { "epoch": 0.8651226158038147, "grad_norm": 0.35888829827308655, "learning_rate": 1.2671615513308524e-05, "loss": 0.307987779378891, "step": 635 }, { "epoch": 0.8664850136239782, "grad_norm": 0.39719316363334656, "learning_rate": 1.2650334239230598e-05, "loss": 0.34023380279541016, "step": 636 }, { "epoch": 0.8678474114441417, "grad_norm": 0.39609214663505554, "learning_rate": 1.2629040047297356e-05, "loss": 0.3457816243171692, "step": 637 }, { "epoch": 0.8692098092643051, "grad_norm": 0.3761802911758423, "learning_rate": 1.2607733041297703e-05, "loss": 0.3295412063598633, "step": 638 }, { "epoch": 0.8705722070844687, "grad_norm": 0.4121301472187042, "learning_rate": 1.2586413325083e-05, "loss": 0.3802829086780548, "step": 639 }, { "epoch": 0.8719346049046321, "grad_norm": 0.42629581689834595, "learning_rate": 1.2565081002566563e-05, "loss": 0.41711223125457764, "step": 640 }, { "epoch": 0.8732970027247956, "grad_norm": 0.4152551591396332, "learning_rate": 1.2543736177723147e-05, "loss": 0.3565431833267212, "step": 641 }, { "epoch": 0.8746594005449592, "grad_norm": 0.3676934838294983, "learning_rate": 1.2522378954588443e-05, "loss": 0.3011196553707123, "step": 642 }, { "epoch": 0.8760217983651226, "grad_norm": 0.3511006534099579, "learning_rate": 1.2501009437258576e-05, "loss": 0.27889248728752136, "step": 643 }, { "epoch": 0.8773841961852861, "grad_norm": 0.33939114212989807, "learning_rate": 1.2479627729889587e-05, "loss": 0.2643985152244568, "step": 644 }, { "epoch": 0.8787465940054496, "grad_norm": 0.46138131618499756, "learning_rate": 1.245823393669694e-05, "loss": 0.431702196598053, "step": 645 }, { "epoch": 0.8801089918256131, "grad_norm": 0.39123064279556274, "learning_rate": 1.2436828161955004e-05, "loss": 0.35548466444015503, "step": 646 }, { "epoch": 0.8814713896457765, "grad_norm": 0.37139931321144104, "learning_rate": 1.2415410509996537e-05, "loss": 0.3073146939277649, "step": 647 }, { "epoch": 0.8828337874659401, "grad_norm": 0.39180418848991394, "learning_rate": 1.2393981085212204e-05, "loss": 0.3435180187225342, "step": 648 }, { "epoch": 0.8841961852861036, "grad_norm": 0.3841586410999298, "learning_rate": 1.2372539992050037e-05, "loss": 0.3478638529777527, "step": 649 }, { "epoch": 0.885558583106267, "grad_norm": 0.4314171373844147, "learning_rate": 1.2351087335014945e-05, "loss": 0.34792008996009827, "step": 650 }, { "epoch": 0.8869209809264306, "grad_norm": 0.36530905961990356, "learning_rate": 1.2329623218668197e-05, "loss": 0.31735897064208984, "step": 651 }, { "epoch": 0.888283378746594, "grad_norm": 0.4034612476825714, "learning_rate": 1.2308147747626926e-05, "loss": 0.3590067923069, "step": 652 }, { "epoch": 0.8896457765667575, "grad_norm": 0.3697197437286377, "learning_rate": 1.2286661026563597e-05, "loss": 0.30383849143981934, "step": 653 }, { "epoch": 0.8910081743869209, "grad_norm": 0.36092400550842285, "learning_rate": 1.2265163160205514e-05, "loss": 0.2998080551624298, "step": 654 }, { "epoch": 0.8923705722070845, "grad_norm": 0.41580548882484436, "learning_rate": 1.2243654253334299e-05, "loss": 0.39810556173324585, "step": 655 }, { "epoch": 0.8937329700272479, "grad_norm": 0.43990781903266907, "learning_rate": 1.2222134410785386e-05, "loss": 0.37231457233428955, "step": 656 }, { "epoch": 0.8950953678474114, "grad_norm": 0.3491821587085724, "learning_rate": 1.2200603737447515e-05, "loss": 0.27115774154663086, "step": 657 }, { "epoch": 0.896457765667575, "grad_norm": 0.41555097699165344, "learning_rate": 1.2179062338262217e-05, "loss": 0.3704499900341034, "step": 658 }, { "epoch": 0.8978201634877384, "grad_norm": 0.41340363025665283, "learning_rate": 1.2157510318223296e-05, "loss": 0.3653028905391693, "step": 659 }, { "epoch": 0.8991825613079019, "grad_norm": 0.42316463589668274, "learning_rate": 1.2135947782376322e-05, "loss": 0.37947019934654236, "step": 660 }, { "epoch": 0.9005449591280654, "grad_norm": 0.4614158570766449, "learning_rate": 1.2114374835818122e-05, "loss": 0.4423688054084778, "step": 661 }, { "epoch": 0.9019073569482289, "grad_norm": 0.3793640434741974, "learning_rate": 1.2092791583696266e-05, "loss": 0.3140786290168762, "step": 662 }, { "epoch": 0.9032697547683923, "grad_norm": 0.41017946600914, "learning_rate": 1.207119813120855e-05, "loss": 0.3691558241844177, "step": 663 }, { "epoch": 0.9046321525885559, "grad_norm": 0.42569923400878906, "learning_rate": 1.2049594583602495e-05, "loss": 0.39558589458465576, "step": 664 }, { "epoch": 0.9059945504087193, "grad_norm": 0.38926219940185547, "learning_rate": 1.2027981046174817e-05, "loss": 0.36622583866119385, "step": 665 }, { "epoch": 0.9073569482288828, "grad_norm": 0.3998047709465027, "learning_rate": 1.2006357624270927e-05, "loss": 0.30081361532211304, "step": 666 }, { "epoch": 0.9087193460490464, "grad_norm": 0.3933253884315491, "learning_rate": 1.198472442328442e-05, "loss": 0.3435978293418884, "step": 667 }, { "epoch": 0.9100817438692098, "grad_norm": 0.3495563864707947, "learning_rate": 1.1963081548656539e-05, "loss": 0.29989689588546753, "step": 668 }, { "epoch": 0.9114441416893733, "grad_norm": 0.4060697853565216, "learning_rate": 1.1941429105875686e-05, "loss": 0.3763880431652069, "step": 669 }, { "epoch": 0.9128065395095368, "grad_norm": 0.33757802844047546, "learning_rate": 1.1919767200476904e-05, "loss": 0.2711639106273651, "step": 670 }, { "epoch": 0.9141689373297003, "grad_norm": 0.3644583523273468, "learning_rate": 1.1898095938041352e-05, "loss": 0.2867870330810547, "step": 671 }, { "epoch": 0.9155313351498637, "grad_norm": 0.4242314100265503, "learning_rate": 1.187641542419579e-05, "loss": 0.3521907925605774, "step": 672 }, { "epoch": 0.9168937329700273, "grad_norm": 0.4531702995300293, "learning_rate": 1.1854725764612078e-05, "loss": 0.40591543912887573, "step": 673 }, { "epoch": 0.9182561307901907, "grad_norm": 0.4164139926433563, "learning_rate": 1.183302706500665e-05, "loss": 0.3550049662590027, "step": 674 }, { "epoch": 0.9196185286103542, "grad_norm": 0.37005481123924255, "learning_rate": 1.181131943114e-05, "loss": 0.30720236897468567, "step": 675 }, { "epoch": 0.9209809264305178, "grad_norm": 0.3972548246383667, "learning_rate": 1.1789602968816172e-05, "loss": 0.318911075592041, "step": 676 }, { "epoch": 0.9223433242506812, "grad_norm": 0.37663185596466064, "learning_rate": 1.1767877783882235e-05, "loss": 0.31429654359817505, "step": 677 }, { "epoch": 0.9237057220708447, "grad_norm": 0.373142808675766, "learning_rate": 1.1746143982227778e-05, "loss": 0.30172014236450195, "step": 678 }, { "epoch": 0.9250681198910081, "grad_norm": 0.42605262994766235, "learning_rate": 1.1724401669784385e-05, "loss": 0.37753045558929443, "step": 679 }, { "epoch": 0.9264305177111717, "grad_norm": 0.41822153329849243, "learning_rate": 1.1702650952525116e-05, "loss": 0.39182257652282715, "step": 680 }, { "epoch": 0.9277929155313351, "grad_norm": 0.39568570256233215, "learning_rate": 1.168089193646401e-05, "loss": 0.33675575256347656, "step": 681 }, { "epoch": 0.9291553133514986, "grad_norm": 0.3839928209781647, "learning_rate": 1.1659124727655546e-05, "loss": 0.34991830587387085, "step": 682 }, { "epoch": 0.9305177111716622, "grad_norm": 0.4019002914428711, "learning_rate": 1.1637349432194137e-05, "loss": 0.3185323476791382, "step": 683 }, { "epoch": 0.9318801089918256, "grad_norm": 0.3919433057308197, "learning_rate": 1.1615566156213609e-05, "loss": 0.31005561351776123, "step": 684 }, { "epoch": 0.9332425068119891, "grad_norm": 0.3654928207397461, "learning_rate": 1.1593775005886687e-05, "loss": 0.3170800507068634, "step": 685 }, { "epoch": 0.9346049046321526, "grad_norm": 0.4315038025379181, "learning_rate": 1.1571976087424478e-05, "loss": 0.4050094485282898, "step": 686 }, { "epoch": 0.9359673024523161, "grad_norm": 0.3850279450416565, "learning_rate": 1.1550169507075939e-05, "loss": 0.3527379631996155, "step": 687 }, { "epoch": 0.9373297002724795, "grad_norm": 0.39894241094589233, "learning_rate": 1.1528355371127396e-05, "loss": 0.35128986835479736, "step": 688 }, { "epoch": 0.9386920980926431, "grad_norm": 0.39317700266838074, "learning_rate": 1.1506533785901977e-05, "loss": 0.32606303691864014, "step": 689 }, { "epoch": 0.9400544959128065, "grad_norm": 0.33535271883010864, "learning_rate": 1.148470485775913e-05, "loss": 0.26627829670906067, "step": 690 }, { "epoch": 0.94141689373297, "grad_norm": 0.38109129667282104, "learning_rate": 1.146286869309409e-05, "loss": 0.3295374810695648, "step": 691 }, { "epoch": 0.9427792915531336, "grad_norm": 0.3924390375614166, "learning_rate": 1.1441025398337365e-05, "loss": 0.3435341417789459, "step": 692 }, { "epoch": 0.944141689373297, "grad_norm": 0.42672890424728394, "learning_rate": 1.141917507995421e-05, "loss": 0.36071181297302246, "step": 693 }, { "epoch": 0.9455040871934605, "grad_norm": 0.417208194732666, "learning_rate": 1.1397317844444125e-05, "loss": 0.38000190258026123, "step": 694 }, { "epoch": 0.946866485013624, "grad_norm": 0.38172468543052673, "learning_rate": 1.137545379834031e-05, "loss": 0.30421239137649536, "step": 695 }, { "epoch": 0.9482288828337875, "grad_norm": 0.32868707180023193, "learning_rate": 1.1353583048209171e-05, "loss": 0.2640570104122162, "step": 696 }, { "epoch": 0.9495912806539509, "grad_norm": 0.39148321747779846, "learning_rate": 1.1331705700649786e-05, "loss": 0.32436051964759827, "step": 697 }, { "epoch": 0.9509536784741145, "grad_norm": 0.4759628176689148, "learning_rate": 1.1309821862293385e-05, "loss": 0.41697192192077637, "step": 698 }, { "epoch": 0.952316076294278, "grad_norm": 0.38073548674583435, "learning_rate": 1.128793163980284e-05, "loss": 0.323738157749176, "step": 699 }, { "epoch": 0.9536784741144414, "grad_norm": 0.3676002025604248, "learning_rate": 1.1266035139872142e-05, "loss": 0.288091778755188, "step": 700 }, { "epoch": 0.9550408719346049, "grad_norm": 0.3463435173034668, "learning_rate": 1.1244132469225872e-05, "loss": 0.29015398025512695, "step": 701 }, { "epoch": 0.9564032697547684, "grad_norm": 0.3797283470630646, "learning_rate": 1.1222223734618689e-05, "loss": 0.34152185916900635, "step": 702 }, { "epoch": 0.9577656675749319, "grad_norm": 0.3729090094566345, "learning_rate": 1.120030904283481e-05, "loss": 0.2951827943325043, "step": 703 }, { "epoch": 0.9591280653950953, "grad_norm": 0.37600177526474, "learning_rate": 1.1178388500687482e-05, "loss": 0.3445882797241211, "step": 704 }, { "epoch": 0.9604904632152589, "grad_norm": 0.3924142122268677, "learning_rate": 1.115646221501848e-05, "loss": 0.34941422939300537, "step": 705 }, { "epoch": 0.9618528610354223, "grad_norm": 0.3414173424243927, "learning_rate": 1.1134530292697558e-05, "loss": 0.2858058214187622, "step": 706 }, { "epoch": 0.9632152588555858, "grad_norm": 0.35240837931632996, "learning_rate": 1.1112592840621954e-05, "loss": 0.29714637994766235, "step": 707 }, { "epoch": 0.9645776566757494, "grad_norm": 0.4237878620624542, "learning_rate": 1.1090649965715852e-05, "loss": 0.3578713536262512, "step": 708 }, { "epoch": 0.9659400544959128, "grad_norm": 0.33575090765953064, "learning_rate": 1.1068701774929868e-05, "loss": 0.2601892352104187, "step": 709 }, { "epoch": 0.9673024523160763, "grad_norm": 0.4071582555770874, "learning_rate": 1.1046748375240532e-05, "loss": 0.3512653410434723, "step": 710 }, { "epoch": 0.9686648501362398, "grad_norm": 0.3828555941581726, "learning_rate": 1.1024789873649761e-05, "loss": 0.3424757719039917, "step": 711 }, { "epoch": 0.9700272479564033, "grad_norm": 0.3985345661640167, "learning_rate": 1.1002826377184334e-05, "loss": 0.3405340313911438, "step": 712 }, { "epoch": 0.9713896457765667, "grad_norm": 0.3681023120880127, "learning_rate": 1.0980857992895381e-05, "loss": 0.30252936482429504, "step": 713 }, { "epoch": 0.9727520435967303, "grad_norm": 0.4133037328720093, "learning_rate": 1.0958884827857853e-05, "loss": 0.39566880464553833, "step": 714 }, { "epoch": 0.9741144414168937, "grad_norm": 0.3889504671096802, "learning_rate": 1.0936906989170004e-05, "loss": 0.3164287805557251, "step": 715 }, { "epoch": 0.9754768392370572, "grad_norm": 0.42561522126197815, "learning_rate": 1.0914924583952864e-05, "loss": 0.39496541023254395, "step": 716 }, { "epoch": 0.9768392370572208, "grad_norm": 0.4179486632347107, "learning_rate": 1.0892937719349723e-05, "loss": 0.3516708016395569, "step": 717 }, { "epoch": 0.9782016348773842, "grad_norm": 0.4315149188041687, "learning_rate": 1.087094650252561e-05, "loss": 0.40251898765563965, "step": 718 }, { "epoch": 0.9795640326975477, "grad_norm": 0.4050588309764862, "learning_rate": 1.0848951040666762e-05, "loss": 0.3581554591655731, "step": 719 }, { "epoch": 0.9809264305177112, "grad_norm": 0.4035855829715729, "learning_rate": 1.0826951440980105e-05, "loss": 0.3374115526676178, "step": 720 }, { "epoch": 0.9822888283378747, "grad_norm": 0.3701610565185547, "learning_rate": 1.0804947810692736e-05, "loss": 0.3004315495491028, "step": 721 }, { "epoch": 0.9836512261580381, "grad_norm": 0.4019452631473541, "learning_rate": 1.07829402570514e-05, "loss": 0.3550060987472534, "step": 722 }, { "epoch": 0.9850136239782016, "grad_norm": 0.4384351968765259, "learning_rate": 1.076092888732196e-05, "loss": 0.4013964831829071, "step": 723 }, { "epoch": 0.9863760217983651, "grad_norm": 0.41561856865882874, "learning_rate": 1.073891380878888e-05, "loss": 0.3638450801372528, "step": 724 }, { "epoch": 0.9877384196185286, "grad_norm": 0.4159381687641144, "learning_rate": 1.0716895128754704e-05, "loss": 0.3947365880012512, "step": 725 }, { "epoch": 0.989100817438692, "grad_norm": 0.3710257112979889, "learning_rate": 1.069487295453952e-05, "loss": 0.32494619488716125, "step": 726 }, { "epoch": 0.9904632152588556, "grad_norm": 0.3763940930366516, "learning_rate": 1.0672847393480466e-05, "loss": 0.32123690843582153, "step": 727 }, { "epoch": 0.9918256130790191, "grad_norm": 0.4262928366661072, "learning_rate": 1.0650818552931162e-05, "loss": 0.344281405210495, "step": 728 }, { "epoch": 0.9931880108991825, "grad_norm": 0.4106824994087219, "learning_rate": 1.0628786540261235e-05, "loss": 0.3853453993797302, "step": 729 }, { "epoch": 0.9945504087193461, "grad_norm": 0.4608916640281677, "learning_rate": 1.0606751462855764e-05, "loss": 0.3871040940284729, "step": 730 }, { "epoch": 0.9959128065395095, "grad_norm": 0.41636621952056885, "learning_rate": 1.0584713428114764e-05, "loss": 0.34777921438217163, "step": 731 }, { "epoch": 0.997275204359673, "grad_norm": 0.41956406831741333, "learning_rate": 1.0562672543452666e-05, "loss": 0.38779330253601074, "step": 732 }, { "epoch": 0.9986376021798365, "grad_norm": 0.35188162326812744, "learning_rate": 1.0540628916297791e-05, "loss": 0.2766571640968323, "step": 733 }, { "epoch": 1.0, "grad_norm": 0.3943222165107727, "learning_rate": 1.0518582654091824e-05, "loss": 0.33342719078063965, "step": 734 } ], "logging_steps": 1, "max_steps": 1468, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.300068371614925e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }