Qwen2.5-Coder-7B-Instruct-num01 / trainer_state.json
AgPerry's picture
Upload folder using huggingface_hub
2713227 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 26533,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.000376889156898956,
"grad_norm": 3.3290820403013632,
"learning_rate": 3.391107761868877e-08,
"loss": 0.8407,
"step": 10
},
{
"epoch": 0.000753778313797912,
"grad_norm": 2.5764460749760327,
"learning_rate": 7.15900527505652e-08,
"loss": 0.8487,
"step": 20
},
{
"epoch": 0.0011306674706968681,
"grad_norm": 3.1381979941321165,
"learning_rate": 1.092690278824416e-07,
"loss": 0.8437,
"step": 30
},
{
"epoch": 0.001507556627595824,
"grad_norm": 2.9762629392340583,
"learning_rate": 1.4694800301431802e-07,
"loss": 0.8462,
"step": 40
},
{
"epoch": 0.0018844457844947801,
"grad_norm": 3.8702318858093343,
"learning_rate": 1.8462697814619442e-07,
"loss": 0.8557,
"step": 50
},
{
"epoch": 0.0022613349413937362,
"grad_norm": 2.567740305342297,
"learning_rate": 2.2230595327807085e-07,
"loss": 0.7769,
"step": 60
},
{
"epoch": 0.002638224098292692,
"grad_norm": 2.3831563848971293,
"learning_rate": 2.5998492840994723e-07,
"loss": 0.825,
"step": 70
},
{
"epoch": 0.003015113255191648,
"grad_norm": 2.2771940101499366,
"learning_rate": 2.976639035418237e-07,
"loss": 0.8116,
"step": 80
},
{
"epoch": 0.003392002412090604,
"grad_norm": 1.5832075179588132,
"learning_rate": 3.353428786737001e-07,
"loss": 0.7362,
"step": 90
},
{
"epoch": 0.0037688915689895602,
"grad_norm": 2.3176022862671526,
"learning_rate": 3.7302185380557655e-07,
"loss": 0.776,
"step": 100
},
{
"epoch": 0.004145780725888516,
"grad_norm": 2.3560080639273617,
"learning_rate": 4.107008289374529e-07,
"loss": 0.7496,
"step": 110
},
{
"epoch": 0.0045226698827874725,
"grad_norm": 1.439480969015099,
"learning_rate": 4.4837980406932935e-07,
"loss": 0.7045,
"step": 120
},
{
"epoch": 0.004899559039686428,
"grad_norm": 1.9309695894872856,
"learning_rate": 4.860587792012058e-07,
"loss": 0.7354,
"step": 130
},
{
"epoch": 0.005276448196585384,
"grad_norm": 1.9566795366950147,
"learning_rate": 5.237377543330822e-07,
"loss": 0.7171,
"step": 140
},
{
"epoch": 0.00565333735348434,
"grad_norm": 1.9730496032128708,
"learning_rate": 5.614167294649587e-07,
"loss": 0.7264,
"step": 150
},
{
"epoch": 0.006030226510383296,
"grad_norm": 1.5644394630847538,
"learning_rate": 5.99095704596835e-07,
"loss": 0.6523,
"step": 160
},
{
"epoch": 0.006407115667282253,
"grad_norm": 1.8613164375921365,
"learning_rate": 6.367746797287114e-07,
"loss": 0.6955,
"step": 170
},
{
"epoch": 0.006784004824181208,
"grad_norm": 2.0520868176808023,
"learning_rate": 6.744536548605879e-07,
"loss": 0.7115,
"step": 180
},
{
"epoch": 0.007160893981080164,
"grad_norm": 1.9416771515631597,
"learning_rate": 7.121326299924643e-07,
"loss": 0.6734,
"step": 190
},
{
"epoch": 0.0075377831379791205,
"grad_norm": 2.4867970424407413,
"learning_rate": 7.498116051243407e-07,
"loss": 0.6785,
"step": 200
},
{
"epoch": 0.007914672294878076,
"grad_norm": 1.7774834832854693,
"learning_rate": 7.874905802562172e-07,
"loss": 0.6227,
"step": 210
},
{
"epoch": 0.008291561451777032,
"grad_norm": 1.9812937141799203,
"learning_rate": 8.251695553880935e-07,
"loss": 0.6806,
"step": 220
},
{
"epoch": 0.00866845060867599,
"grad_norm": 2.921310201335651,
"learning_rate": 8.628485305199699e-07,
"loss": 0.6409,
"step": 230
},
{
"epoch": 0.009045339765574945,
"grad_norm": 2.158350529344399,
"learning_rate": 9.005275056518463e-07,
"loss": 0.6757,
"step": 240
},
{
"epoch": 0.0094222289224739,
"grad_norm": 2.0780037382003784,
"learning_rate": 9.382064807837228e-07,
"loss": 0.6759,
"step": 250
},
{
"epoch": 0.009799118079372856,
"grad_norm": 1.9015358166448175,
"learning_rate": 9.758854559155991e-07,
"loss": 0.5856,
"step": 260
},
{
"epoch": 0.010176007236271812,
"grad_norm": 1.7097668450833368,
"learning_rate": 1.0135644310474755e-06,
"loss": 0.6735,
"step": 270
},
{
"epoch": 0.010552896393170768,
"grad_norm": 1.9842362284855692,
"learning_rate": 1.051243406179352e-06,
"loss": 0.6286,
"step": 280
},
{
"epoch": 0.010929785550069725,
"grad_norm": 1.6319392350237532,
"learning_rate": 1.0889223813112285e-06,
"loss": 0.5993,
"step": 290
},
{
"epoch": 0.01130667470696868,
"grad_norm": 1.9629194279960003,
"learning_rate": 1.126601356443105e-06,
"loss": 0.5989,
"step": 300
},
{
"epoch": 0.011683563863867636,
"grad_norm": 1.635011424220465,
"learning_rate": 1.1642803315749811e-06,
"loss": 0.6292,
"step": 310
},
{
"epoch": 0.012060453020766592,
"grad_norm": 2.7449123371579023,
"learning_rate": 1.2019593067068578e-06,
"loss": 0.6444,
"step": 320
},
{
"epoch": 0.012437342177665548,
"grad_norm": 1.7611175230514056,
"learning_rate": 1.2396382818387342e-06,
"loss": 0.6001,
"step": 330
},
{
"epoch": 0.012814231334564505,
"grad_norm": 1.7299587835974657,
"learning_rate": 1.2773172569706106e-06,
"loss": 0.6373,
"step": 340
},
{
"epoch": 0.01319112049146346,
"grad_norm": 2.1190034707614367,
"learning_rate": 1.314996232102487e-06,
"loss": 0.6386,
"step": 350
},
{
"epoch": 0.013568009648362417,
"grad_norm": 1.6507907146985035,
"learning_rate": 1.3526752072343632e-06,
"loss": 0.6025,
"step": 360
},
{
"epoch": 0.013944898805261372,
"grad_norm": 1.6248644379536064,
"learning_rate": 1.3903541823662398e-06,
"loss": 0.633,
"step": 370
},
{
"epoch": 0.014321787962160328,
"grad_norm": 1.8214328463856349,
"learning_rate": 1.4280331574981162e-06,
"loss": 0.6231,
"step": 380
},
{
"epoch": 0.014698677119059285,
"grad_norm": 1.7233452084340422,
"learning_rate": 1.4657121326299926e-06,
"loss": 0.6086,
"step": 390
},
{
"epoch": 0.015075566275958241,
"grad_norm": 1.9949384108347457,
"learning_rate": 1.503391107761869e-06,
"loss": 0.5944,
"step": 400
},
{
"epoch": 0.015452455432857197,
"grad_norm": 1.7891389236926964,
"learning_rate": 1.5410700828937456e-06,
"loss": 0.5935,
"step": 410
},
{
"epoch": 0.015829344589756152,
"grad_norm": 1.9798760306416079,
"learning_rate": 1.5787490580256218e-06,
"loss": 0.6405,
"step": 420
},
{
"epoch": 0.01620623374665511,
"grad_norm": 1.8498442500498027,
"learning_rate": 1.6164280331574982e-06,
"loss": 0.5948,
"step": 430
},
{
"epoch": 0.016583122903554064,
"grad_norm": 1.7813340027075553,
"learning_rate": 1.6541070082893746e-06,
"loss": 0.6369,
"step": 440
},
{
"epoch": 0.01696001206045302,
"grad_norm": 1.9276634385297304,
"learning_rate": 1.691785983421251e-06,
"loss": 0.5752,
"step": 450
},
{
"epoch": 0.01733690121735198,
"grad_norm": 1.6512215587375432,
"learning_rate": 1.7294649585531276e-06,
"loss": 0.5961,
"step": 460
},
{
"epoch": 0.017713790374250932,
"grad_norm": 1.80925998198896,
"learning_rate": 1.7671439336850038e-06,
"loss": 0.6343,
"step": 470
},
{
"epoch": 0.01809067953114989,
"grad_norm": 1.837322385048155,
"learning_rate": 1.8048229088168804e-06,
"loss": 0.6125,
"step": 480
},
{
"epoch": 0.018467568688048844,
"grad_norm": 1.976044009740098,
"learning_rate": 1.8425018839487568e-06,
"loss": 0.5939,
"step": 490
},
{
"epoch": 0.0188444578449478,
"grad_norm": 2.16596793862554,
"learning_rate": 1.880180859080633e-06,
"loss": 0.5832,
"step": 500
},
{
"epoch": 0.01922134700184676,
"grad_norm": 1.7490489143874088,
"learning_rate": 1.9178598342125096e-06,
"loss": 0.6141,
"step": 510
},
{
"epoch": 0.019598236158745713,
"grad_norm": 2.7984276241901394,
"learning_rate": 1.955538809344386e-06,
"loss": 0.5967,
"step": 520
},
{
"epoch": 0.01997512531564467,
"grad_norm": 1.8460062687463663,
"learning_rate": 1.9932177844762624e-06,
"loss": 0.5855,
"step": 530
},
{
"epoch": 0.020352014472543624,
"grad_norm": 3.071868251764471,
"learning_rate": 2.030896759608139e-06,
"loss": 0.5923,
"step": 540
},
{
"epoch": 0.02072890362944258,
"grad_norm": 2.249558954464503,
"learning_rate": 2.0685757347400153e-06,
"loss": 0.6267,
"step": 550
},
{
"epoch": 0.021105792786341535,
"grad_norm": 1.7739493406466322,
"learning_rate": 2.1062547098718917e-06,
"loss": 0.5733,
"step": 560
},
{
"epoch": 0.021482681943240493,
"grad_norm": 1.9571521040237465,
"learning_rate": 2.143933685003768e-06,
"loss": 0.5901,
"step": 570
},
{
"epoch": 0.02185957110013945,
"grad_norm": 1.8862454156341757,
"learning_rate": 2.1816126601356445e-06,
"loss": 0.5668,
"step": 580
},
{
"epoch": 0.022236460257038404,
"grad_norm": 1.9035008144788996,
"learning_rate": 2.219291635267521e-06,
"loss": 0.6003,
"step": 590
},
{
"epoch": 0.02261334941393736,
"grad_norm": 2.1401923526726114,
"learning_rate": 2.2569706103993973e-06,
"loss": 0.6069,
"step": 600
},
{
"epoch": 0.022990238570836315,
"grad_norm": 1.8983635538399102,
"learning_rate": 2.2946495855312737e-06,
"loss": 0.5551,
"step": 610
},
{
"epoch": 0.023367127727735273,
"grad_norm": 1.8341056062808996,
"learning_rate": 2.33232856066315e-06,
"loss": 0.6195,
"step": 620
},
{
"epoch": 0.02374401688463423,
"grad_norm": 1.9001660208603897,
"learning_rate": 2.3700075357950265e-06,
"loss": 0.5787,
"step": 630
},
{
"epoch": 0.024120906041533184,
"grad_norm": 2.024583452910895,
"learning_rate": 2.407686510926903e-06,
"loss": 0.6006,
"step": 640
},
{
"epoch": 0.02449779519843214,
"grad_norm": 1.9671891126956806,
"learning_rate": 2.4453654860587793e-06,
"loss": 0.6045,
"step": 650
},
{
"epoch": 0.024874684355331095,
"grad_norm": 1.9647192549218615,
"learning_rate": 2.483044461190656e-06,
"loss": 0.5632,
"step": 660
},
{
"epoch": 0.025251573512230053,
"grad_norm": 1.6577621142537855,
"learning_rate": 2.5207234363225325e-06,
"loss": 0.5713,
"step": 670
},
{
"epoch": 0.02562846266912901,
"grad_norm": 1.8104039197666246,
"learning_rate": 2.5584024114544085e-06,
"loss": 0.6227,
"step": 680
},
{
"epoch": 0.026005351826027964,
"grad_norm": 1.8269715554295154,
"learning_rate": 2.596081386586285e-06,
"loss": 0.5576,
"step": 690
},
{
"epoch": 0.02638224098292692,
"grad_norm": 2.132660241375296,
"learning_rate": 2.6337603617181617e-06,
"loss": 0.5985,
"step": 700
},
{
"epoch": 0.026759130139825876,
"grad_norm": 1.9004356716576036,
"learning_rate": 2.671439336850038e-06,
"loss": 0.5741,
"step": 710
},
{
"epoch": 0.027136019296724833,
"grad_norm": 1.9904870404606059,
"learning_rate": 2.709118311981914e-06,
"loss": 0.608,
"step": 720
},
{
"epoch": 0.02751290845362379,
"grad_norm": 1.7934156140344455,
"learning_rate": 2.7467972871137905e-06,
"loss": 0.6041,
"step": 730
},
{
"epoch": 0.027889797610522744,
"grad_norm": 1.9452679410394749,
"learning_rate": 2.7844762622456674e-06,
"loss": 0.569,
"step": 740
},
{
"epoch": 0.028266686767421702,
"grad_norm": 1.9646256949492564,
"learning_rate": 2.8221552373775433e-06,
"loss": 0.5566,
"step": 750
},
{
"epoch": 0.028643575924320656,
"grad_norm": 1.6964066105708113,
"learning_rate": 2.8598342125094197e-06,
"loss": 0.5771,
"step": 760
},
{
"epoch": 0.029020465081219613,
"grad_norm": 1.626500474046293,
"learning_rate": 2.8975131876412966e-06,
"loss": 0.6028,
"step": 770
},
{
"epoch": 0.02939735423811857,
"grad_norm": 1.705518791196053,
"learning_rate": 2.935192162773173e-06,
"loss": 0.598,
"step": 780
},
{
"epoch": 0.029774243395017524,
"grad_norm": 2.023608938357471,
"learning_rate": 2.972871137905049e-06,
"loss": 0.599,
"step": 790
},
{
"epoch": 0.030151132551916482,
"grad_norm": 1.9251923030741178,
"learning_rate": 3.0105501130369258e-06,
"loss": 0.5705,
"step": 800
},
{
"epoch": 0.030528021708815436,
"grad_norm": 1.8099259001405903,
"learning_rate": 3.048229088168802e-06,
"loss": 0.5824,
"step": 810
},
{
"epoch": 0.030904910865714393,
"grad_norm": 1.9287634604630308,
"learning_rate": 3.085908063300678e-06,
"loss": 0.5761,
"step": 820
},
{
"epoch": 0.03128180002261335,
"grad_norm": 1.783048816707533,
"learning_rate": 3.123587038432555e-06,
"loss": 0.579,
"step": 830
},
{
"epoch": 0.031658689179512305,
"grad_norm": 1.8166051797381484,
"learning_rate": 3.1612660135644314e-06,
"loss": 0.5877,
"step": 840
},
{
"epoch": 0.03203557833641126,
"grad_norm": 2.038736220774468,
"learning_rate": 3.1989449886963074e-06,
"loss": 0.572,
"step": 850
},
{
"epoch": 0.03241246749331022,
"grad_norm": 1.738601505380381,
"learning_rate": 3.236623963828184e-06,
"loss": 0.5777,
"step": 860
},
{
"epoch": 0.03278935665020918,
"grad_norm": 1.6676309446260524,
"learning_rate": 3.2743029389600606e-06,
"loss": 0.5637,
"step": 870
},
{
"epoch": 0.03316624580710813,
"grad_norm": 2.161220914250185,
"learning_rate": 3.311981914091937e-06,
"loss": 0.5862,
"step": 880
},
{
"epoch": 0.033543134964007085,
"grad_norm": 1.4967702154384854,
"learning_rate": 3.3496608892238134e-06,
"loss": 0.5529,
"step": 890
},
{
"epoch": 0.03392002412090604,
"grad_norm": 2.2279038438220877,
"learning_rate": 3.38733986435569e-06,
"loss": 0.5979,
"step": 900
},
{
"epoch": 0.034296913277805,
"grad_norm": 1.8345971384276711,
"learning_rate": 3.4250188394875662e-06,
"loss": 0.5669,
"step": 910
},
{
"epoch": 0.03467380243470396,
"grad_norm": 1.8142008601019335,
"learning_rate": 3.462697814619443e-06,
"loss": 0.6041,
"step": 920
},
{
"epoch": 0.03505069159160291,
"grad_norm": 1.6693434904318734,
"learning_rate": 3.500376789751319e-06,
"loss": 0.5272,
"step": 930
},
{
"epoch": 0.035427580748501865,
"grad_norm": 1.945433750394992,
"learning_rate": 3.5380557648831954e-06,
"loss": 0.558,
"step": 940
},
{
"epoch": 0.03580446990540082,
"grad_norm": 2.1129599329826614,
"learning_rate": 3.5757347400150723e-06,
"loss": 0.5801,
"step": 950
},
{
"epoch": 0.03618135906229978,
"grad_norm": 1.905120951845058,
"learning_rate": 3.6134137151469482e-06,
"loss": 0.572,
"step": 960
},
{
"epoch": 0.03655824821919874,
"grad_norm": 1.972585987519169,
"learning_rate": 3.6510926902788246e-06,
"loss": 0.5995,
"step": 970
},
{
"epoch": 0.03693513737609769,
"grad_norm": 1.9036055814375914,
"learning_rate": 3.688771665410701e-06,
"loss": 0.5993,
"step": 980
},
{
"epoch": 0.037312026532996645,
"grad_norm": 1.8646464012423685,
"learning_rate": 3.7264506405425774e-06,
"loss": 0.5733,
"step": 990
},
{
"epoch": 0.0376889156898956,
"grad_norm": 2.3089435664838933,
"learning_rate": 3.764129615674454e-06,
"loss": 0.5835,
"step": 1000
},
{
"epoch": 0.03806580484679456,
"grad_norm": 2.17191968568418,
"learning_rate": 3.8018085908063303e-06,
"loss": 0.5412,
"step": 1010
},
{
"epoch": 0.03844269400369352,
"grad_norm": 1.8949936547455895,
"learning_rate": 3.839487565938207e-06,
"loss": 0.5966,
"step": 1020
},
{
"epoch": 0.03881958316059247,
"grad_norm": 2.473982294301116,
"learning_rate": 3.877166541070083e-06,
"loss": 0.5615,
"step": 1030
},
{
"epoch": 0.039196472317491425,
"grad_norm": 1.7089999900943973,
"learning_rate": 3.9148455162019595e-06,
"loss": 0.5752,
"step": 1040
},
{
"epoch": 0.03957336147439038,
"grad_norm": 1.85392931543439,
"learning_rate": 3.952524491333836e-06,
"loss": 0.578,
"step": 1050
},
{
"epoch": 0.03995025063128934,
"grad_norm": 1.4905002182549854,
"learning_rate": 3.990203466465712e-06,
"loss": 0.553,
"step": 1060
},
{
"epoch": 0.04032713978818829,
"grad_norm": 1.7544817835449993,
"learning_rate": 4.027882441597589e-06,
"loss": 0.5564,
"step": 1070
},
{
"epoch": 0.04070402894508725,
"grad_norm": 1.798461388686054,
"learning_rate": 4.065561416729465e-06,
"loss": 0.5635,
"step": 1080
},
{
"epoch": 0.041080918101986205,
"grad_norm": 1.7675958852701312,
"learning_rate": 4.1032403918613415e-06,
"loss": 0.5683,
"step": 1090
},
{
"epoch": 0.04145780725888516,
"grad_norm": 2.1395567530774895,
"learning_rate": 4.140919366993218e-06,
"loss": 0.5678,
"step": 1100
},
{
"epoch": 0.04183469641578412,
"grad_norm": 1.703017057556802,
"learning_rate": 4.178598342125095e-06,
"loss": 0.552,
"step": 1110
},
{
"epoch": 0.04221158557268307,
"grad_norm": 1.6184789818203074,
"learning_rate": 4.216277317256971e-06,
"loss": 0.5655,
"step": 1120
},
{
"epoch": 0.04258847472958203,
"grad_norm": 2.0996907497711574,
"learning_rate": 4.253956292388847e-06,
"loss": 0.5525,
"step": 1130
},
{
"epoch": 0.042965363886480985,
"grad_norm": 1.8890470204414929,
"learning_rate": 4.291635267520724e-06,
"loss": 0.5608,
"step": 1140
},
{
"epoch": 0.04334225304337994,
"grad_norm": 2.080468775436257,
"learning_rate": 4.3293142426526e-06,
"loss": 0.5511,
"step": 1150
},
{
"epoch": 0.0437191422002789,
"grad_norm": 1.8224041675858285,
"learning_rate": 4.366993217784476e-06,
"loss": 0.5727,
"step": 1160
},
{
"epoch": 0.04409603135717785,
"grad_norm": 1.6149909377743559,
"learning_rate": 4.4046721929163536e-06,
"loss": 0.5448,
"step": 1170
},
{
"epoch": 0.04447292051407681,
"grad_norm": 1.9061771805744654,
"learning_rate": 4.442351168048229e-06,
"loss": 0.5637,
"step": 1180
},
{
"epoch": 0.044849809670975765,
"grad_norm": 1.807877264857849,
"learning_rate": 4.4800301431801055e-06,
"loss": 0.5511,
"step": 1190
},
{
"epoch": 0.04522669882787472,
"grad_norm": 2.0181868598887487,
"learning_rate": 4.517709118311983e-06,
"loss": 0.5582,
"step": 1200
},
{
"epoch": 0.04560358798477368,
"grad_norm": 1.573360107816235,
"learning_rate": 4.555388093443859e-06,
"loss": 0.5644,
"step": 1210
},
{
"epoch": 0.04598047714167263,
"grad_norm": 1.771584321526413,
"learning_rate": 4.593067068575735e-06,
"loss": 0.5689,
"step": 1220
},
{
"epoch": 0.04635736629857159,
"grad_norm": 2.0063138861207364,
"learning_rate": 4.630746043707611e-06,
"loss": 0.5724,
"step": 1230
},
{
"epoch": 0.046734255455470546,
"grad_norm": 1.6920600006872095,
"learning_rate": 4.668425018839488e-06,
"loss": 0.5566,
"step": 1240
},
{
"epoch": 0.0471111446123695,
"grad_norm": 1.8971593808774774,
"learning_rate": 4.706103993971364e-06,
"loss": 0.5699,
"step": 1250
},
{
"epoch": 0.04748803376926846,
"grad_norm": 1.587611604567327,
"learning_rate": 4.74378296910324e-06,
"loss": 0.5649,
"step": 1260
},
{
"epoch": 0.04786492292616741,
"grad_norm": 2.0265270332073246,
"learning_rate": 4.781461944235118e-06,
"loss": 0.5374,
"step": 1270
},
{
"epoch": 0.04824181208306637,
"grad_norm": 2.0590285975116083,
"learning_rate": 4.819140919366993e-06,
"loss": 0.576,
"step": 1280
},
{
"epoch": 0.048618701239965326,
"grad_norm": 1.7133539806651839,
"learning_rate": 4.8568198944988696e-06,
"loss": 0.5706,
"step": 1290
},
{
"epoch": 0.04899559039686428,
"grad_norm": 1.8988624169513535,
"learning_rate": 4.894498869630747e-06,
"loss": 0.5552,
"step": 1300
},
{
"epoch": 0.04937247955376324,
"grad_norm": 2.0372135320392086,
"learning_rate": 4.932177844762623e-06,
"loss": 0.5459,
"step": 1310
},
{
"epoch": 0.04974936871066219,
"grad_norm": 1.7794743693654322,
"learning_rate": 4.969856819894499e-06,
"loss": 0.5548,
"step": 1320
},
{
"epoch": 0.05012625786756115,
"grad_norm": 1.7352843761532744,
"learning_rate": 5.007535795026376e-06,
"loss": 0.571,
"step": 1330
},
{
"epoch": 0.050503147024460106,
"grad_norm": 1.8996845237311992,
"learning_rate": 5.0452147701582524e-06,
"loss": 0.5493,
"step": 1340
},
{
"epoch": 0.05088003618135906,
"grad_norm": 2.1612445033194962,
"learning_rate": 5.082893745290128e-06,
"loss": 0.5707,
"step": 1350
},
{
"epoch": 0.05125692533825802,
"grad_norm": 2.022115535247945,
"learning_rate": 5.120572720422004e-06,
"loss": 0.5541,
"step": 1360
},
{
"epoch": 0.05163381449515697,
"grad_norm": 1.714380161573395,
"learning_rate": 5.158251695553881e-06,
"loss": 0.5522,
"step": 1370
},
{
"epoch": 0.05201070365205593,
"grad_norm": 1.8375152499536425,
"learning_rate": 5.195930670685758e-06,
"loss": 0.5607,
"step": 1380
},
{
"epoch": 0.052387592808954886,
"grad_norm": 1.7211038349954213,
"learning_rate": 5.2336096458176345e-06,
"loss": 0.5429,
"step": 1390
},
{
"epoch": 0.05276448196585384,
"grad_norm": 1.9959664354157889,
"learning_rate": 5.271288620949511e-06,
"loss": 0.5656,
"step": 1400
},
{
"epoch": 0.0531413711227528,
"grad_norm": 1.7536569082906184,
"learning_rate": 5.308967596081387e-06,
"loss": 0.5452,
"step": 1410
},
{
"epoch": 0.05351826027965175,
"grad_norm": 1.7231610063386784,
"learning_rate": 5.346646571213263e-06,
"loss": 0.5353,
"step": 1420
},
{
"epoch": 0.05389514943655071,
"grad_norm": 1.8970119256356177,
"learning_rate": 5.384325546345139e-06,
"loss": 0.5302,
"step": 1430
},
{
"epoch": 0.054272038593449666,
"grad_norm": 1.5800174842538417,
"learning_rate": 5.4220045214770165e-06,
"loss": 0.5624,
"step": 1440
},
{
"epoch": 0.05464892775034862,
"grad_norm": 2.0740091270784053,
"learning_rate": 5.459683496608893e-06,
"loss": 0.5377,
"step": 1450
},
{
"epoch": 0.05502581690724758,
"grad_norm": 1.8294455280243227,
"learning_rate": 5.497362471740769e-06,
"loss": 0.5566,
"step": 1460
},
{
"epoch": 0.05540270606414653,
"grad_norm": 1.5838428291746807,
"learning_rate": 5.535041446872646e-06,
"loss": 0.5446,
"step": 1470
},
{
"epoch": 0.05577959522104549,
"grad_norm": 2.1056158207672233,
"learning_rate": 5.572720422004522e-06,
"loss": 0.5726,
"step": 1480
},
{
"epoch": 0.056156484377944446,
"grad_norm": 1.8677258424310819,
"learning_rate": 5.610399397136398e-06,
"loss": 0.5678,
"step": 1490
},
{
"epoch": 0.056533373534843404,
"grad_norm": 2.014882289331746,
"learning_rate": 5.648078372268275e-06,
"loss": 0.5623,
"step": 1500
},
{
"epoch": 0.05691026269174236,
"grad_norm": 1.8459621787433622,
"learning_rate": 5.685757347400151e-06,
"loss": 0.5775,
"step": 1510
},
{
"epoch": 0.05728715184864131,
"grad_norm": 1.7030968368942396,
"learning_rate": 5.723436322532028e-06,
"loss": 0.5456,
"step": 1520
},
{
"epoch": 0.05766404100554027,
"grad_norm": 1.7530142515014413,
"learning_rate": 5.761115297663904e-06,
"loss": 0.5493,
"step": 1530
},
{
"epoch": 0.058040930162439226,
"grad_norm": 1.7511504402842564,
"learning_rate": 5.7987942727957805e-06,
"loss": 0.5633,
"step": 1540
},
{
"epoch": 0.058417819319338184,
"grad_norm": 2.1931311143367607,
"learning_rate": 5.836473247927656e-06,
"loss": 0.5581,
"step": 1550
},
{
"epoch": 0.05879470847623714,
"grad_norm": 3.636694230456593,
"learning_rate": 5.874152223059534e-06,
"loss": 0.546,
"step": 1560
},
{
"epoch": 0.05917159763313609,
"grad_norm": 2.3094095827220302,
"learning_rate": 5.91183119819141e-06,
"loss": 0.5875,
"step": 1570
},
{
"epoch": 0.05954848679003505,
"grad_norm": 2.0011971290567794,
"learning_rate": 5.949510173323286e-06,
"loss": 0.5575,
"step": 1580
},
{
"epoch": 0.059925375946934006,
"grad_norm": 1.6650652223048363,
"learning_rate": 5.9871891484551625e-06,
"loss": 0.5525,
"step": 1590
},
{
"epoch": 0.060302265103832964,
"grad_norm": 1.9533169899266176,
"learning_rate": 6.024868123587039e-06,
"loss": 0.574,
"step": 1600
},
{
"epoch": 0.06067915426073192,
"grad_norm": 1.685943647766806,
"learning_rate": 6.062547098718915e-06,
"loss": 0.5149,
"step": 1610
},
{
"epoch": 0.06105604341763087,
"grad_norm": 1.3728725575465968,
"learning_rate": 6.100226073850791e-06,
"loss": 0.5436,
"step": 1620
},
{
"epoch": 0.06143293257452983,
"grad_norm": 1.8741392657740152,
"learning_rate": 6.137905048982669e-06,
"loss": 0.5628,
"step": 1630
},
{
"epoch": 0.061809821731428786,
"grad_norm": 1.9761429074276111,
"learning_rate": 6.1755840241145446e-06,
"loss": 0.5713,
"step": 1640
},
{
"epoch": 0.062186710888327744,
"grad_norm": 2.03436821453595,
"learning_rate": 6.213262999246421e-06,
"loss": 0.6002,
"step": 1650
},
{
"epoch": 0.0625636000452267,
"grad_norm": 1.832975464096823,
"learning_rate": 6.250941974378297e-06,
"loss": 0.5728,
"step": 1660
},
{
"epoch": 0.06294048920212565,
"grad_norm": 1.633511698263684,
"learning_rate": 6.288620949510174e-06,
"loss": 0.5552,
"step": 1670
},
{
"epoch": 0.06331737835902461,
"grad_norm": 2.090535151172006,
"learning_rate": 6.32629992464205e-06,
"loss": 0.531,
"step": 1680
},
{
"epoch": 0.06369426751592357,
"grad_norm": 1.6050052308654612,
"learning_rate": 6.363978899773927e-06,
"loss": 0.5891,
"step": 1690
},
{
"epoch": 0.06407115667282252,
"grad_norm": 2.212232256420831,
"learning_rate": 6.401657874905803e-06,
"loss": 0.5752,
"step": 1700
},
{
"epoch": 0.06444804582972148,
"grad_norm": 1.877674806453838,
"learning_rate": 6.439336850037679e-06,
"loss": 0.5778,
"step": 1710
},
{
"epoch": 0.06482493498662044,
"grad_norm": 1.8261529051865222,
"learning_rate": 6.477015825169556e-06,
"loss": 0.5437,
"step": 1720
},
{
"epoch": 0.0652018241435194,
"grad_norm": 2.1674429445191032,
"learning_rate": 6.514694800301432e-06,
"loss": 0.5439,
"step": 1730
},
{
"epoch": 0.06557871330041835,
"grad_norm": 1.7036520231489474,
"learning_rate": 6.552373775433309e-06,
"loss": 0.5716,
"step": 1740
},
{
"epoch": 0.0659556024573173,
"grad_norm": 2.031911396789848,
"learning_rate": 6.590052750565186e-06,
"loss": 0.5365,
"step": 1750
},
{
"epoch": 0.06633249161421625,
"grad_norm": 1.784884433990894,
"learning_rate": 6.627731725697062e-06,
"loss": 0.5556,
"step": 1760
},
{
"epoch": 0.06670938077111521,
"grad_norm": 1.7250914216810251,
"learning_rate": 6.665410700828938e-06,
"loss": 0.577,
"step": 1770
},
{
"epoch": 0.06708626992801417,
"grad_norm": 2.0118741851139417,
"learning_rate": 6.703089675960814e-06,
"loss": 0.5792,
"step": 1780
},
{
"epoch": 0.06746315908491313,
"grad_norm": 2.083914596235162,
"learning_rate": 6.740768651092691e-06,
"loss": 0.5817,
"step": 1790
},
{
"epoch": 0.06784004824181208,
"grad_norm": 1.8512470922958455,
"learning_rate": 6.778447626224567e-06,
"loss": 0.5421,
"step": 1800
},
{
"epoch": 0.06821693739871104,
"grad_norm": 1.8612678054005098,
"learning_rate": 6.816126601356444e-06,
"loss": 0.5392,
"step": 1810
},
{
"epoch": 0.06859382655561,
"grad_norm": 1.928991115625655,
"learning_rate": 6.853805576488321e-06,
"loss": 0.5819,
"step": 1820
},
{
"epoch": 0.06897071571250896,
"grad_norm": 1.8008419029962375,
"learning_rate": 6.891484551620197e-06,
"loss": 0.5547,
"step": 1830
},
{
"epoch": 0.06934760486940791,
"grad_norm": 2.0597436169161973,
"learning_rate": 6.929163526752073e-06,
"loss": 0.5599,
"step": 1840
},
{
"epoch": 0.06972449402630686,
"grad_norm": 2.0570020874528567,
"learning_rate": 6.966842501883949e-06,
"loss": 0.5694,
"step": 1850
},
{
"epoch": 0.07010138318320581,
"grad_norm": 1.5952035385299899,
"learning_rate": 7.0045214770158254e-06,
"loss": 0.5406,
"step": 1860
},
{
"epoch": 0.07047827234010477,
"grad_norm": 1.7271708038895457,
"learning_rate": 7.042200452147702e-06,
"loss": 0.555,
"step": 1870
},
{
"epoch": 0.07085516149700373,
"grad_norm": 2.0101886368535813,
"learning_rate": 7.079879427279579e-06,
"loss": 0.5716,
"step": 1880
},
{
"epoch": 0.07123205065390269,
"grad_norm": 1.6065477858483548,
"learning_rate": 7.1175584024114555e-06,
"loss": 0.5476,
"step": 1890
},
{
"epoch": 0.07160893981080164,
"grad_norm": 2.02286611730447,
"learning_rate": 7.155237377543331e-06,
"loss": 0.5383,
"step": 1900
},
{
"epoch": 0.0719858289677006,
"grad_norm": 2.037314841983119,
"learning_rate": 7.1929163526752075e-06,
"loss": 0.5587,
"step": 1910
},
{
"epoch": 0.07236271812459956,
"grad_norm": 1.8264192102516816,
"learning_rate": 7.230595327807084e-06,
"loss": 0.5553,
"step": 1920
},
{
"epoch": 0.07273960728149852,
"grad_norm": 1.9520150148970015,
"learning_rate": 7.26827430293896e-06,
"loss": 0.567,
"step": 1930
},
{
"epoch": 0.07311649643839747,
"grad_norm": 1.6905896085482799,
"learning_rate": 7.3059532780708375e-06,
"loss": 0.5252,
"step": 1940
},
{
"epoch": 0.07349338559529642,
"grad_norm": 1.8762657820598212,
"learning_rate": 7.343632253202714e-06,
"loss": 0.5696,
"step": 1950
},
{
"epoch": 0.07387027475219538,
"grad_norm": 1.7670005320740287,
"learning_rate": 7.38131122833459e-06,
"loss": 0.5444,
"step": 1960
},
{
"epoch": 0.07424716390909433,
"grad_norm": 1.7538737345614326,
"learning_rate": 7.418990203466466e-06,
"loss": 0.5884,
"step": 1970
},
{
"epoch": 0.07462405306599329,
"grad_norm": 1.515985834449344,
"learning_rate": 7.456669178598342e-06,
"loss": 0.5446,
"step": 1980
},
{
"epoch": 0.07500094222289225,
"grad_norm": 1.5815994265482278,
"learning_rate": 7.494348153730219e-06,
"loss": 0.5591,
"step": 1990
},
{
"epoch": 0.0753778313797912,
"grad_norm": 2.0488961746529486,
"learning_rate": 7.532027128862096e-06,
"loss": 0.5706,
"step": 2000
},
{
"epoch": 0.07575472053669016,
"grad_norm": 1.969269141686112,
"learning_rate": 7.569706103993972e-06,
"loss": 0.558,
"step": 2010
},
{
"epoch": 0.07613160969358912,
"grad_norm": 2.266330513109259,
"learning_rate": 7.607385079125849e-06,
"loss": 0.5396,
"step": 2020
},
{
"epoch": 0.07650849885048808,
"grad_norm": 1.8646005092443476,
"learning_rate": 7.645064054257724e-06,
"loss": 0.5423,
"step": 2030
},
{
"epoch": 0.07688538800738703,
"grad_norm": 1.7399985247018814,
"learning_rate": 7.682743029389602e-06,
"loss": 0.549,
"step": 2040
},
{
"epoch": 0.07726227716428598,
"grad_norm": 1.8255156942684898,
"learning_rate": 7.720422004521477e-06,
"loss": 0.5549,
"step": 2050
},
{
"epoch": 0.07763916632118494,
"grad_norm": 1.946851326618598,
"learning_rate": 7.758100979653354e-06,
"loss": 0.5666,
"step": 2060
},
{
"epoch": 0.07801605547808389,
"grad_norm": 1.9532587449674659,
"learning_rate": 7.795779954785232e-06,
"loss": 0.5711,
"step": 2070
},
{
"epoch": 0.07839294463498285,
"grad_norm": 1.7952693056787907,
"learning_rate": 7.833458929917107e-06,
"loss": 0.5386,
"step": 2080
},
{
"epoch": 0.07876983379188181,
"grad_norm": 1.8221658229163284,
"learning_rate": 7.871137905048983e-06,
"loss": 0.5731,
"step": 2090
},
{
"epoch": 0.07914672294878076,
"grad_norm": 1.833428780602818,
"learning_rate": 7.90881688018086e-06,
"loss": 0.5546,
"step": 2100
},
{
"epoch": 0.07952361210567972,
"grad_norm": 1.7570404308629375,
"learning_rate": 7.946495855312736e-06,
"loss": 0.5323,
"step": 2110
},
{
"epoch": 0.07990050126257868,
"grad_norm": 1.802778650409238,
"learning_rate": 7.984174830444613e-06,
"loss": 0.5422,
"step": 2120
},
{
"epoch": 0.08027739041947764,
"grad_norm": 1.6222520770975064,
"learning_rate": 8.02185380557649e-06,
"loss": 0.5573,
"step": 2130
},
{
"epoch": 0.08065427957637658,
"grad_norm": 1.9266827677857417,
"learning_rate": 8.059532780708366e-06,
"loss": 0.5559,
"step": 2140
},
{
"epoch": 0.08103116873327554,
"grad_norm": 2.050814084562181,
"learning_rate": 8.097211755840241e-06,
"loss": 0.551,
"step": 2150
},
{
"epoch": 0.0814080578901745,
"grad_norm": 1.7585001593613225,
"learning_rate": 8.134890730972118e-06,
"loss": 0.5364,
"step": 2160
},
{
"epoch": 0.08178494704707345,
"grad_norm": 1.8883866215853788,
"learning_rate": 8.172569706103994e-06,
"loss": 0.5888,
"step": 2170
},
{
"epoch": 0.08216183620397241,
"grad_norm": 1.758532254936197,
"learning_rate": 8.210248681235871e-06,
"loss": 0.5712,
"step": 2180
},
{
"epoch": 0.08253872536087137,
"grad_norm": 1.7452201238698966,
"learning_rate": 8.247927656367748e-06,
"loss": 0.562,
"step": 2190
},
{
"epoch": 0.08291561451777033,
"grad_norm": 2.0545039008121373,
"learning_rate": 8.285606631499624e-06,
"loss": 0.5326,
"step": 2200
},
{
"epoch": 0.08329250367466928,
"grad_norm": 1.7697141454393999,
"learning_rate": 8.323285606631501e-06,
"loss": 0.5777,
"step": 2210
},
{
"epoch": 0.08366939283156824,
"grad_norm": 1.9252812449566288,
"learning_rate": 8.360964581763377e-06,
"loss": 0.5747,
"step": 2220
},
{
"epoch": 0.0840462819884672,
"grad_norm": 1.8673410308838145,
"learning_rate": 8.398643556895252e-06,
"loss": 0.5352,
"step": 2230
},
{
"epoch": 0.08442317114536614,
"grad_norm": 1.649056430020697,
"learning_rate": 8.43632253202713e-06,
"loss": 0.5661,
"step": 2240
},
{
"epoch": 0.0848000603022651,
"grad_norm": 1.8813402816460187,
"learning_rate": 8.474001507159007e-06,
"loss": 0.5323,
"step": 2250
},
{
"epoch": 0.08517694945916406,
"grad_norm": 1.8849842368697092,
"learning_rate": 8.511680482290882e-06,
"loss": 0.5502,
"step": 2260
},
{
"epoch": 0.08555383861606301,
"grad_norm": 1.7894509940448735,
"learning_rate": 8.54935945742276e-06,
"loss": 0.5819,
"step": 2270
},
{
"epoch": 0.08593072777296197,
"grad_norm": 1.7627934782136696,
"learning_rate": 8.587038432554635e-06,
"loss": 0.5574,
"step": 2280
},
{
"epoch": 0.08630761692986093,
"grad_norm": 1.7684239573461413,
"learning_rate": 8.62471740768651e-06,
"loss": 0.5421,
"step": 2290
},
{
"epoch": 0.08668450608675989,
"grad_norm": 1.8420421564268776,
"learning_rate": 8.662396382818388e-06,
"loss": 0.5786,
"step": 2300
},
{
"epoch": 0.08706139524365884,
"grad_norm": 1.600074045318061,
"learning_rate": 8.700075357950264e-06,
"loss": 0.535,
"step": 2310
},
{
"epoch": 0.0874382844005578,
"grad_norm": 2.151450395002285,
"learning_rate": 8.737754333082141e-06,
"loss": 0.5821,
"step": 2320
},
{
"epoch": 0.08781517355745676,
"grad_norm": 1.805383691002577,
"learning_rate": 8.775433308214018e-06,
"loss": 0.5787,
"step": 2330
},
{
"epoch": 0.0881920627143557,
"grad_norm": 1.9600049150179448,
"learning_rate": 8.813112283345894e-06,
"loss": 0.5501,
"step": 2340
},
{
"epoch": 0.08856895187125466,
"grad_norm": 2.0847026403429485,
"learning_rate": 8.85079125847777e-06,
"loss": 0.5462,
"step": 2350
},
{
"epoch": 0.08894584102815362,
"grad_norm": 1.7768359780016918,
"learning_rate": 8.888470233609646e-06,
"loss": 0.5837,
"step": 2360
},
{
"epoch": 0.08932273018505257,
"grad_norm": 2.0158186623423386,
"learning_rate": 8.926149208741522e-06,
"loss": 0.5616,
"step": 2370
},
{
"epoch": 0.08969961934195153,
"grad_norm": 1.6581403784334712,
"learning_rate": 8.9638281838734e-06,
"loss": 0.5743,
"step": 2380
},
{
"epoch": 0.09007650849885049,
"grad_norm": 2.5790022328647044,
"learning_rate": 9.001507159005277e-06,
"loss": 0.5744,
"step": 2390
},
{
"epoch": 0.09045339765574945,
"grad_norm": 1.723560039585966,
"learning_rate": 9.039186134137152e-06,
"loss": 0.5602,
"step": 2400
},
{
"epoch": 0.0908302868126484,
"grad_norm": 1.351214600774913,
"learning_rate": 9.07686510926903e-06,
"loss": 0.5331,
"step": 2410
},
{
"epoch": 0.09120717596954736,
"grad_norm": 1.8039553055086206,
"learning_rate": 9.114544084400905e-06,
"loss": 0.5677,
"step": 2420
},
{
"epoch": 0.09158406512644632,
"grad_norm": 1.9123242622126195,
"learning_rate": 9.15222305953278e-06,
"loss": 0.5588,
"step": 2430
},
{
"epoch": 0.09196095428334526,
"grad_norm": 2.006300165478271,
"learning_rate": 9.189902034664658e-06,
"loss": 0.5812,
"step": 2440
},
{
"epoch": 0.09233784344024422,
"grad_norm": 1.9159468022645592,
"learning_rate": 9.227581009796535e-06,
"loss": 0.5372,
"step": 2450
},
{
"epoch": 0.09271473259714318,
"grad_norm": 1.7917577869195462,
"learning_rate": 9.26525998492841e-06,
"loss": 0.5675,
"step": 2460
},
{
"epoch": 0.09309162175404213,
"grad_norm": 5.4204153510281,
"learning_rate": 9.302938960060288e-06,
"loss": 0.5674,
"step": 2470
},
{
"epoch": 0.09346851091094109,
"grad_norm": 1.6618922792887658,
"learning_rate": 9.340617935192163e-06,
"loss": 0.5745,
"step": 2480
},
{
"epoch": 0.09384540006784005,
"grad_norm": 1.665306637026819,
"learning_rate": 9.378296910324039e-06,
"loss": 0.5556,
"step": 2490
},
{
"epoch": 0.094222289224739,
"grad_norm": 1.825366672374041,
"learning_rate": 9.415975885455916e-06,
"loss": 0.5318,
"step": 2500
},
{
"epoch": 0.09459917838163796,
"grad_norm": 1.9696174903266126,
"learning_rate": 9.453654860587793e-06,
"loss": 0.5411,
"step": 2510
},
{
"epoch": 0.09497606753853692,
"grad_norm": 1.7049399302783044,
"learning_rate": 9.491333835719669e-06,
"loss": 0.5706,
"step": 2520
},
{
"epoch": 0.09535295669543588,
"grad_norm": 1.7849968576762765,
"learning_rate": 9.529012810851546e-06,
"loss": 0.5665,
"step": 2530
},
{
"epoch": 0.09572984585233482,
"grad_norm": 1.353686035468744,
"learning_rate": 9.566691785983422e-06,
"loss": 0.5459,
"step": 2540
},
{
"epoch": 0.09610673500923378,
"grad_norm": 1.7623768145461352,
"learning_rate": 9.604370761115297e-06,
"loss": 0.5866,
"step": 2550
},
{
"epoch": 0.09648362416613274,
"grad_norm": 1.9641841168234557,
"learning_rate": 9.642049736247175e-06,
"loss": 0.5466,
"step": 2560
},
{
"epoch": 0.0968605133230317,
"grad_norm": 1.3736072242645836,
"learning_rate": 9.679728711379052e-06,
"loss": 0.5391,
"step": 2570
},
{
"epoch": 0.09723740247993065,
"grad_norm": 1.654177532173035,
"learning_rate": 9.717407686510927e-06,
"loss": 0.5592,
"step": 2580
},
{
"epoch": 0.09761429163682961,
"grad_norm": 1.675077992009493,
"learning_rate": 9.755086661642805e-06,
"loss": 0.5484,
"step": 2590
},
{
"epoch": 0.09799118079372857,
"grad_norm": 1.7745168797445137,
"learning_rate": 9.79276563677468e-06,
"loss": 0.5656,
"step": 2600
},
{
"epoch": 0.09836806995062752,
"grad_norm": 1.665358523138359,
"learning_rate": 9.830444611906557e-06,
"loss": 0.5544,
"step": 2610
},
{
"epoch": 0.09874495910752648,
"grad_norm": 2.373990573362573,
"learning_rate": 9.868123587038433e-06,
"loss": 0.5448,
"step": 2620
},
{
"epoch": 0.09912184826442544,
"grad_norm": 1.6903642288991887,
"learning_rate": 9.90580256217031e-06,
"loss": 0.5562,
"step": 2630
},
{
"epoch": 0.09949873742132438,
"grad_norm": 1.711502598162347,
"learning_rate": 9.943481537302186e-06,
"loss": 0.5582,
"step": 2640
},
{
"epoch": 0.09987562657822334,
"grad_norm": 1.7630192145087855,
"learning_rate": 9.981160512434063e-06,
"loss": 0.5568,
"step": 2650
},
{
"epoch": 0.1002525157351223,
"grad_norm": 1.9391272093641074,
"learning_rate": 9.999998918198758e-06,
"loss": 0.5814,
"step": 2660
},
{
"epoch": 0.10062940489202125,
"grad_norm": 1.1176379107900862,
"learning_rate": 9.999990263791625e-06,
"loss": 0.5327,
"step": 2670
},
{
"epoch": 0.10100629404892021,
"grad_norm": 1.7144355514907554,
"learning_rate": 9.99997295499234e-06,
"loss": 0.5424,
"step": 2680
},
{
"epoch": 0.10138318320581917,
"grad_norm": 1.3867888190170987,
"learning_rate": 9.99994699183086e-06,
"loss": 0.5498,
"step": 2690
},
{
"epoch": 0.10176007236271813,
"grad_norm": 1.6251131139875976,
"learning_rate": 9.999912374352125e-06,
"loss": 0.5526,
"step": 2700
},
{
"epoch": 0.10213696151961708,
"grad_norm": 1.758258285021875,
"learning_rate": 9.999869102616057e-06,
"loss": 0.5647,
"step": 2710
},
{
"epoch": 0.10251385067651604,
"grad_norm": 1.8922253129449746,
"learning_rate": 9.99981717669755e-06,
"loss": 0.569,
"step": 2720
},
{
"epoch": 0.102890739833415,
"grad_norm": 1.6492463045156762,
"learning_rate": 9.999756596686483e-06,
"loss": 0.5639,
"step": 2730
},
{
"epoch": 0.10326762899031394,
"grad_norm": 1.6884719814636986,
"learning_rate": 9.999687362687714e-06,
"loss": 0.5302,
"step": 2740
},
{
"epoch": 0.1036445181472129,
"grad_norm": 1.7663588108147437,
"learning_rate": 9.999609474821078e-06,
"loss": 0.5776,
"step": 2750
},
{
"epoch": 0.10402140730411186,
"grad_norm": 1.6023970608406795,
"learning_rate": 9.999522933221389e-06,
"loss": 0.5609,
"step": 2760
},
{
"epoch": 0.10439829646101081,
"grad_norm": 1.582018849874625,
"learning_rate": 9.999427738038438e-06,
"loss": 0.5432,
"step": 2770
},
{
"epoch": 0.10477518561790977,
"grad_norm": 1.8661113330406751,
"learning_rate": 9.999323889437004e-06,
"loss": 0.5505,
"step": 2780
},
{
"epoch": 0.10515207477480873,
"grad_norm": 2.4377666457132405,
"learning_rate": 9.99921138759683e-06,
"loss": 0.552,
"step": 2790
},
{
"epoch": 0.10552896393170769,
"grad_norm": 1.9769702999570735,
"learning_rate": 9.999090232712648e-06,
"loss": 0.5706,
"step": 2800
},
{
"epoch": 0.10590585308860664,
"grad_norm": 1.5852863285358036,
"learning_rate": 9.998960424994157e-06,
"loss": 0.5468,
"step": 2810
},
{
"epoch": 0.1062827422455056,
"grad_norm": 1.9487316640058903,
"learning_rate": 9.998821964666043e-06,
"loss": 0.5566,
"step": 2820
},
{
"epoch": 0.10665963140240456,
"grad_norm": 1.470091180676127,
"learning_rate": 9.998674851967965e-06,
"loss": 0.5375,
"step": 2830
},
{
"epoch": 0.1070365205593035,
"grad_norm": 1.8861024609523855,
"learning_rate": 9.998519087154555e-06,
"loss": 0.5714,
"step": 2840
},
{
"epoch": 0.10741340971620246,
"grad_norm": 1.8625015944443275,
"learning_rate": 9.998354670495426e-06,
"loss": 0.5718,
"step": 2850
},
{
"epoch": 0.10779029887310142,
"grad_norm": 1.9173353321183049,
"learning_rate": 9.99818160227516e-06,
"loss": 0.5591,
"step": 2860
},
{
"epoch": 0.10816718803000037,
"grad_norm": 1.6563786175924313,
"learning_rate": 9.997999882793323e-06,
"loss": 0.5644,
"step": 2870
},
{
"epoch": 0.10854407718689933,
"grad_norm": 1.8385719564959482,
"learning_rate": 9.997809512364447e-06,
"loss": 0.5574,
"step": 2880
},
{
"epoch": 0.10892096634379829,
"grad_norm": 1.7214173447884336,
"learning_rate": 9.99761049131804e-06,
"loss": 0.5575,
"step": 2890
},
{
"epoch": 0.10929785550069725,
"grad_norm": 2.263948770043836,
"learning_rate": 9.997402819998585e-06,
"loss": 0.5717,
"step": 2900
},
{
"epoch": 0.1096747446575962,
"grad_norm": 1.617474690577473,
"learning_rate": 9.997186498765537e-06,
"loss": 0.5381,
"step": 2910
},
{
"epoch": 0.11005163381449516,
"grad_norm": 1.9206922286733692,
"learning_rate": 9.996961527993322e-06,
"loss": 0.5403,
"step": 2920
},
{
"epoch": 0.11042852297139412,
"grad_norm": 1.6363891592183775,
"learning_rate": 9.996727908071337e-06,
"loss": 0.5504,
"step": 2930
},
{
"epoch": 0.11080541212829306,
"grad_norm": 1.7892900410710342,
"learning_rate": 9.99648563940395e-06,
"loss": 0.5398,
"step": 2940
},
{
"epoch": 0.11118230128519202,
"grad_norm": 1.837754723538967,
"learning_rate": 9.996234722410501e-06,
"loss": 0.5501,
"step": 2950
},
{
"epoch": 0.11155919044209098,
"grad_norm": 1.870686763606362,
"learning_rate": 9.995975157525298e-06,
"loss": 0.5697,
"step": 2960
},
{
"epoch": 0.11193607959898993,
"grad_norm": 1.8670194608446757,
"learning_rate": 9.995706945197616e-06,
"loss": 0.5819,
"step": 2970
},
{
"epoch": 0.11231296875588889,
"grad_norm": 1.764875781288927,
"learning_rate": 9.995430085891698e-06,
"loss": 0.5558,
"step": 2980
},
{
"epoch": 0.11268985791278785,
"grad_norm": 1.6461496237741704,
"learning_rate": 9.995144580086757e-06,
"loss": 0.5528,
"step": 2990
},
{
"epoch": 0.11306674706968681,
"grad_norm": 1.6525100198106808,
"learning_rate": 9.99485042827697e-06,
"loss": 0.5412,
"step": 3000
},
{
"epoch": 0.11344363622658576,
"grad_norm": 1.7111537561130798,
"learning_rate": 9.994547630971476e-06,
"loss": 0.556,
"step": 3010
},
{
"epoch": 0.11382052538348472,
"grad_norm": 1.6948046733289588,
"learning_rate": 9.994236188694384e-06,
"loss": 0.5634,
"step": 3020
},
{
"epoch": 0.11419741454038368,
"grad_norm": 1.7787138971691205,
"learning_rate": 9.99391610198476e-06,
"loss": 0.5099,
"step": 3030
},
{
"epoch": 0.11457430369728262,
"grad_norm": 1.6976810544764316,
"learning_rate": 9.993587371396642e-06,
"loss": 0.5526,
"step": 3040
},
{
"epoch": 0.11495119285418158,
"grad_norm": 1.8408592594962616,
"learning_rate": 9.99324999749902e-06,
"loss": 0.5328,
"step": 3050
},
{
"epoch": 0.11532808201108054,
"grad_norm": 1.7138365866573475,
"learning_rate": 9.992903980875849e-06,
"loss": 0.5233,
"step": 3060
},
{
"epoch": 0.1157049711679795,
"grad_norm": 1.6023041921101526,
"learning_rate": 9.992549322126044e-06,
"loss": 0.5562,
"step": 3070
},
{
"epoch": 0.11608186032487845,
"grad_norm": 1.7101785936588327,
"learning_rate": 9.992186021863475e-06,
"loss": 0.5625,
"step": 3080
},
{
"epoch": 0.11645874948177741,
"grad_norm": 1.7736884499183863,
"learning_rate": 9.991814080716974e-06,
"loss": 0.5624,
"step": 3090
},
{
"epoch": 0.11683563863867637,
"grad_norm": 2.1118966740309313,
"learning_rate": 9.991433499330326e-06,
"loss": 0.569,
"step": 3100
},
{
"epoch": 0.11721252779557532,
"grad_norm": 1.882358555790821,
"learning_rate": 9.991044278362274e-06,
"loss": 0.5556,
"step": 3110
},
{
"epoch": 0.11758941695247428,
"grad_norm": 1.4807120288858666,
"learning_rate": 9.990646418486512e-06,
"loss": 0.5332,
"step": 3120
},
{
"epoch": 0.11796630610937324,
"grad_norm": 1.9456842492772166,
"learning_rate": 9.990239920391687e-06,
"loss": 0.5456,
"step": 3130
},
{
"epoch": 0.11834319526627218,
"grad_norm": 1.9332110812656393,
"learning_rate": 9.9898247847814e-06,
"loss": 0.5914,
"step": 3140
},
{
"epoch": 0.11872008442317114,
"grad_norm": 1.819320948849462,
"learning_rate": 9.989401012374207e-06,
"loss": 0.5212,
"step": 3150
},
{
"epoch": 0.1190969735800701,
"grad_norm": 1.5937443837733034,
"learning_rate": 9.9889686039036e-06,
"loss": 0.5719,
"step": 3160
},
{
"epoch": 0.11947386273696906,
"grad_norm": 1.4724382207969984,
"learning_rate": 9.988527560118033e-06,
"loss": 0.5514,
"step": 3170
},
{
"epoch": 0.11985075189386801,
"grad_norm": 1.7128687990343496,
"learning_rate": 9.988077881780896e-06,
"loss": 0.5342,
"step": 3180
},
{
"epoch": 0.12022764105076697,
"grad_norm": 1.7843398826770018,
"learning_rate": 9.987619569670532e-06,
"loss": 0.5425,
"step": 3190
},
{
"epoch": 0.12060453020766593,
"grad_norm": 1.6747602178489263,
"learning_rate": 9.987152624580223e-06,
"loss": 0.5498,
"step": 3200
},
{
"epoch": 0.12098141936456488,
"grad_norm": 1.9740415780360288,
"learning_rate": 9.986677047318199e-06,
"loss": 0.5787,
"step": 3210
},
{
"epoch": 0.12135830852146384,
"grad_norm": 1.5401247418757107,
"learning_rate": 9.986192838707624e-06,
"loss": 0.529,
"step": 3220
},
{
"epoch": 0.1217351976783628,
"grad_norm": 1.6649769987957272,
"learning_rate": 9.98569999958661e-06,
"loss": 0.5532,
"step": 3230
},
{
"epoch": 0.12211208683526174,
"grad_norm": 2.0615017580666546,
"learning_rate": 9.9851985308082e-06,
"loss": 0.5306,
"step": 3240
},
{
"epoch": 0.1224889759921607,
"grad_norm": 1.7489740809134746,
"learning_rate": 9.98468843324038e-06,
"loss": 0.5728,
"step": 3250
},
{
"epoch": 0.12286586514905966,
"grad_norm": 1.9584508485487868,
"learning_rate": 9.984169707766065e-06,
"loss": 0.5497,
"step": 3260
},
{
"epoch": 0.12324275430595862,
"grad_norm": 1.8986564469950582,
"learning_rate": 9.98364235528311e-06,
"loss": 0.531,
"step": 3270
},
{
"epoch": 0.12361964346285757,
"grad_norm": 1.796715014709634,
"learning_rate": 9.983106376704299e-06,
"loss": 0.5604,
"step": 3280
},
{
"epoch": 0.12399653261975653,
"grad_norm": 1.4479882986534132,
"learning_rate": 9.98256177295735e-06,
"loss": 0.5441,
"step": 3290
},
{
"epoch": 0.12437342177665549,
"grad_norm": 1.9018212839289914,
"learning_rate": 9.982008544984902e-06,
"loss": 0.586,
"step": 3300
},
{
"epoch": 0.12475031093355445,
"grad_norm": 1.6731029649854103,
"learning_rate": 9.981446693744532e-06,
"loss": 0.5422,
"step": 3310
},
{
"epoch": 0.1251272000904534,
"grad_norm": 1.8224277036655119,
"learning_rate": 9.980876220208738e-06,
"loss": 0.574,
"step": 3320
},
{
"epoch": 0.12550408924735235,
"grad_norm": 1.7502821063481213,
"learning_rate": 9.980297125364939e-06,
"loss": 0.5182,
"step": 3330
},
{
"epoch": 0.1258809784042513,
"grad_norm": 1.6833110345844349,
"learning_rate": 9.979709410215483e-06,
"loss": 0.5732,
"step": 3340
},
{
"epoch": 0.12625786756115026,
"grad_norm": 2.1392787884736824,
"learning_rate": 9.979113075777636e-06,
"loss": 0.5421,
"step": 3350
},
{
"epoch": 0.12663475671804922,
"grad_norm": 1.7605201424192343,
"learning_rate": 9.978508123083579e-06,
"loss": 0.5693,
"step": 3360
},
{
"epoch": 0.12701164587494818,
"grad_norm": 1.814620805028767,
"learning_rate": 9.977894553180414e-06,
"loss": 0.5044,
"step": 3370
},
{
"epoch": 0.12738853503184713,
"grad_norm": 1.7333387641365299,
"learning_rate": 9.977272367130161e-06,
"loss": 0.5642,
"step": 3380
},
{
"epoch": 0.1277654241887461,
"grad_norm": 6.5511623986160705,
"learning_rate": 9.97664156600975e-06,
"loss": 0.539,
"step": 3390
},
{
"epoch": 0.12814231334564505,
"grad_norm": 1.8697849048435933,
"learning_rate": 9.97600215091102e-06,
"loss": 0.5723,
"step": 3400
},
{
"epoch": 0.128519202502544,
"grad_norm": 1.840910802370967,
"learning_rate": 9.975354122940725e-06,
"loss": 0.563,
"step": 3410
},
{
"epoch": 0.12889609165944296,
"grad_norm": 1.6208553393789018,
"learning_rate": 9.974697483220526e-06,
"loss": 0.569,
"step": 3420
},
{
"epoch": 0.12927298081634192,
"grad_norm": 1.7217470801750827,
"learning_rate": 9.974032232886988e-06,
"loss": 0.561,
"step": 3430
},
{
"epoch": 0.12964986997324088,
"grad_norm": 1.8150748187971744,
"learning_rate": 9.973358373091578e-06,
"loss": 0.5663,
"step": 3440
},
{
"epoch": 0.13002675913013984,
"grad_norm": 1.8613735972835608,
"learning_rate": 9.972675905000672e-06,
"loss": 0.5412,
"step": 3450
},
{
"epoch": 0.1304036482870388,
"grad_norm": 1.9863460603182592,
"learning_rate": 9.971984829795539e-06,
"loss": 0.5483,
"step": 3460
},
{
"epoch": 0.13078053744393775,
"grad_norm": 1.4584507302056484,
"learning_rate": 9.971285148672347e-06,
"loss": 0.5359,
"step": 3470
},
{
"epoch": 0.1311574266008367,
"grad_norm": 1.6661289692524046,
"learning_rate": 9.970576862842165e-06,
"loss": 0.5518,
"step": 3480
},
{
"epoch": 0.13153431575773564,
"grad_norm": 1.8117322128837796,
"learning_rate": 9.96985997353095e-06,
"loss": 0.5294,
"step": 3490
},
{
"epoch": 0.1319112049146346,
"grad_norm": 1.7990258695528125,
"learning_rate": 9.969134481979554e-06,
"loss": 0.5487,
"step": 3500
},
{
"epoch": 0.13228809407153355,
"grad_norm": 1.6917648391788271,
"learning_rate": 9.968400389443715e-06,
"loss": 0.5488,
"step": 3510
},
{
"epoch": 0.1326649832284325,
"grad_norm": 1.7815906576006262,
"learning_rate": 9.967657697194062e-06,
"loss": 0.5435,
"step": 3520
},
{
"epoch": 0.13304187238533147,
"grad_norm": 1.8019408287680707,
"learning_rate": 9.966906406516106e-06,
"loss": 0.5317,
"step": 3530
},
{
"epoch": 0.13341876154223042,
"grad_norm": 1.5650851056879715,
"learning_rate": 9.966146518710244e-06,
"loss": 0.5443,
"step": 3540
},
{
"epoch": 0.13379565069912938,
"grad_norm": 1.6846258897654576,
"learning_rate": 9.965378035091753e-06,
"loss": 0.5384,
"step": 3550
},
{
"epoch": 0.13417253985602834,
"grad_norm": 1.6977199699358678,
"learning_rate": 9.964600956990785e-06,
"loss": 0.5454,
"step": 3560
},
{
"epoch": 0.1345494290129273,
"grad_norm": 5.101071946029098,
"learning_rate": 9.963815285752369e-06,
"loss": 0.5174,
"step": 3570
},
{
"epoch": 0.13492631816982625,
"grad_norm": 1.5806913588105131,
"learning_rate": 9.963021022736413e-06,
"loss": 0.5455,
"step": 3580
},
{
"epoch": 0.1353032073267252,
"grad_norm": 1.540462795524182,
"learning_rate": 9.962218169317688e-06,
"loss": 0.5385,
"step": 3590
},
{
"epoch": 0.13568009648362417,
"grad_norm": 1.8117995998543306,
"learning_rate": 9.961406726885844e-06,
"loss": 0.547,
"step": 3600
},
{
"epoch": 0.13605698564052313,
"grad_norm": 1.702664311368578,
"learning_rate": 9.960586696845387e-06,
"loss": 0.5425,
"step": 3610
},
{
"epoch": 0.13643387479742208,
"grad_norm": 1.6558884575569928,
"learning_rate": 9.959758080615694e-06,
"loss": 0.5636,
"step": 3620
},
{
"epoch": 0.13681076395432104,
"grad_norm": 1.5110746291185808,
"learning_rate": 9.958920879631002e-06,
"loss": 0.5568,
"step": 3630
},
{
"epoch": 0.13718765311122,
"grad_norm": 2.139574069411458,
"learning_rate": 9.958075095340408e-06,
"loss": 0.505,
"step": 3640
},
{
"epoch": 0.13756454226811896,
"grad_norm": 1.9376102058687341,
"learning_rate": 9.957220729207862e-06,
"loss": 0.5498,
"step": 3650
},
{
"epoch": 0.1379414314250179,
"grad_norm": 2.2672765573662264,
"learning_rate": 9.95635778271217e-06,
"loss": 0.5435,
"step": 3660
},
{
"epoch": 0.13831832058191687,
"grad_norm": 1.5668715176963142,
"learning_rate": 9.955486257346995e-06,
"loss": 0.5589,
"step": 3670
},
{
"epoch": 0.13869520973881583,
"grad_norm": 1.7293817862316867,
"learning_rate": 9.954606154620841e-06,
"loss": 0.5536,
"step": 3680
},
{
"epoch": 0.13907209889571476,
"grad_norm": 1.5203042242279239,
"learning_rate": 9.953717476057062e-06,
"loss": 0.5157,
"step": 3690
},
{
"epoch": 0.13944898805261371,
"grad_norm": 1.6809131976448295,
"learning_rate": 9.952820223193856e-06,
"loss": 0.5581,
"step": 3700
},
{
"epoch": 0.13982587720951267,
"grad_norm": 1.6390460530372517,
"learning_rate": 9.951914397584262e-06,
"loss": 0.518,
"step": 3710
},
{
"epoch": 0.14020276636641163,
"grad_norm": 1.756920987537259,
"learning_rate": 9.951000000796159e-06,
"loss": 0.5561,
"step": 3720
},
{
"epoch": 0.1405796555233106,
"grad_norm": 1.6270655698945786,
"learning_rate": 9.950077034412254e-06,
"loss": 0.5417,
"step": 3730
},
{
"epoch": 0.14095654468020954,
"grad_norm": 1.7443369340079147,
"learning_rate": 9.949145500030099e-06,
"loss": 0.5546,
"step": 3740
},
{
"epoch": 0.1413334338371085,
"grad_norm": 1.9623236145537395,
"learning_rate": 9.948205399262066e-06,
"loss": 0.5614,
"step": 3750
},
{
"epoch": 0.14171032299400746,
"grad_norm": 1.8286963445588205,
"learning_rate": 9.94725673373536e-06,
"loss": 0.5194,
"step": 3760
},
{
"epoch": 0.14208721215090642,
"grad_norm": 1.6490782226850094,
"learning_rate": 9.946299505092008e-06,
"loss": 0.5133,
"step": 3770
},
{
"epoch": 0.14246410130780537,
"grad_norm": 1.459857790438441,
"learning_rate": 9.945333714988859e-06,
"loss": 0.5274,
"step": 3780
},
{
"epoch": 0.14284099046470433,
"grad_norm": 2.766851100980767,
"learning_rate": 9.944359365097584e-06,
"loss": 0.5637,
"step": 3790
},
{
"epoch": 0.1432178796216033,
"grad_norm": 1.773166799745728,
"learning_rate": 9.943376457104665e-06,
"loss": 0.5431,
"step": 3800
},
{
"epoch": 0.14359476877850225,
"grad_norm": 1.8707486173547303,
"learning_rate": 9.942384992711402e-06,
"loss": 0.5413,
"step": 3810
},
{
"epoch": 0.1439716579354012,
"grad_norm": 1.789830817687169,
"learning_rate": 9.9413849736339e-06,
"loss": 0.5488,
"step": 3820
},
{
"epoch": 0.14434854709230016,
"grad_norm": 1.7488430625940725,
"learning_rate": 9.940376401603077e-06,
"loss": 0.5345,
"step": 3830
},
{
"epoch": 0.14472543624919912,
"grad_norm": 1.5053305184156898,
"learning_rate": 9.939359278364648e-06,
"loss": 0.5323,
"step": 3840
},
{
"epoch": 0.14510232540609808,
"grad_norm": 1.7325706869085533,
"learning_rate": 9.938333605679137e-06,
"loss": 0.549,
"step": 3850
},
{
"epoch": 0.14547921456299703,
"grad_norm": 2.298650093349881,
"learning_rate": 9.937299385321858e-06,
"loss": 0.5629,
"step": 3860
},
{
"epoch": 0.145856103719896,
"grad_norm": 2.010120739235166,
"learning_rate": 9.936256619082928e-06,
"loss": 0.5546,
"step": 3870
},
{
"epoch": 0.14623299287679495,
"grad_norm": 1.6730215675721205,
"learning_rate": 9.935205308767251e-06,
"loss": 0.5592,
"step": 3880
},
{
"epoch": 0.14660988203369388,
"grad_norm": 1.623819818612049,
"learning_rate": 9.934145456194522e-06,
"loss": 0.5522,
"step": 3890
},
{
"epoch": 0.14698677119059284,
"grad_norm": 1.8737714445343006,
"learning_rate": 9.933077063199217e-06,
"loss": 0.5395,
"step": 3900
},
{
"epoch": 0.1473636603474918,
"grad_norm": 1.8896976945133552,
"learning_rate": 9.9320001316306e-06,
"loss": 0.5155,
"step": 3910
},
{
"epoch": 0.14774054950439075,
"grad_norm": 1.7999273816656425,
"learning_rate": 9.930914663352713e-06,
"loss": 0.552,
"step": 3920
},
{
"epoch": 0.1481174386612897,
"grad_norm": 1.5451191295487272,
"learning_rate": 9.929820660244372e-06,
"loss": 0.5308,
"step": 3930
},
{
"epoch": 0.14849432781818867,
"grad_norm": 1.6325314113099194,
"learning_rate": 9.928718124199168e-06,
"loss": 0.527,
"step": 3940
},
{
"epoch": 0.14887121697508762,
"grad_norm": 2.044279798697509,
"learning_rate": 9.927607057125461e-06,
"loss": 0.5257,
"step": 3950
},
{
"epoch": 0.14924810613198658,
"grad_norm": 1.6860219589179097,
"learning_rate": 9.926487460946375e-06,
"loss": 0.5179,
"step": 3960
},
{
"epoch": 0.14962499528888554,
"grad_norm": 1.8333348047186415,
"learning_rate": 9.9253593375998e-06,
"loss": 0.5467,
"step": 3970
},
{
"epoch": 0.1500018844457845,
"grad_norm": 1.8252914858844747,
"learning_rate": 9.924222689038384e-06,
"loss": 0.5698,
"step": 3980
},
{
"epoch": 0.15037877360268345,
"grad_norm": 1.636078330532333,
"learning_rate": 9.923077517229531e-06,
"loss": 0.5817,
"step": 3990
},
{
"epoch": 0.1507556627595824,
"grad_norm": 1.6310153659702036,
"learning_rate": 9.921923824155399e-06,
"loss": 0.5632,
"step": 4000
},
{
"epoch": 0.15113255191648137,
"grad_norm": 1.703545293572265,
"learning_rate": 9.920761611812892e-06,
"loss": 0.5362,
"step": 4010
},
{
"epoch": 0.15150944107338032,
"grad_norm": 1.5247266902845538,
"learning_rate": 9.919590882213666e-06,
"loss": 0.5847,
"step": 4020
},
{
"epoch": 0.15188633023027928,
"grad_norm": 1.8205620209169613,
"learning_rate": 9.91841163738411e-06,
"loss": 0.5758,
"step": 4030
},
{
"epoch": 0.15226321938717824,
"grad_norm": 1.6632230310524354,
"learning_rate": 9.917223879365366e-06,
"loss": 0.5373,
"step": 4040
},
{
"epoch": 0.1526401085440772,
"grad_norm": 1.6718013294159164,
"learning_rate": 9.916027610213293e-06,
"loss": 0.5557,
"step": 4050
},
{
"epoch": 0.15301699770097615,
"grad_norm": 1.5988376223664982,
"learning_rate": 9.914822831998498e-06,
"loss": 0.5453,
"step": 4060
},
{
"epoch": 0.1533938868578751,
"grad_norm": 1.659616299752992,
"learning_rate": 9.913609546806306e-06,
"loss": 0.5504,
"step": 4070
},
{
"epoch": 0.15377077601477407,
"grad_norm": 1.4037121435179074,
"learning_rate": 9.912387756736773e-06,
"loss": 0.5198,
"step": 4080
},
{
"epoch": 0.154147665171673,
"grad_norm": 1.5183583233165177,
"learning_rate": 9.911157463904673e-06,
"loss": 0.5566,
"step": 4090
},
{
"epoch": 0.15452455432857196,
"grad_norm": 1.680791972841532,
"learning_rate": 9.909918670439494e-06,
"loss": 0.5403,
"step": 4100
},
{
"epoch": 0.1549014434854709,
"grad_norm": 1.7482553689079507,
"learning_rate": 9.908671378485445e-06,
"loss": 0.5445,
"step": 4110
},
{
"epoch": 0.15527833264236987,
"grad_norm": 1.3584719888404202,
"learning_rate": 9.907415590201442e-06,
"loss": 0.5459,
"step": 4120
},
{
"epoch": 0.15565522179926883,
"grad_norm": 1.6501271620909048,
"learning_rate": 9.906151307761101e-06,
"loss": 0.5549,
"step": 4130
},
{
"epoch": 0.15603211095616779,
"grad_norm": 2.656483215686597,
"learning_rate": 9.90487853335275e-06,
"loss": 0.5504,
"step": 4140
},
{
"epoch": 0.15640900011306674,
"grad_norm": 1.9444522460365512,
"learning_rate": 9.903597269179406e-06,
"loss": 0.5713,
"step": 4150
},
{
"epoch": 0.1567858892699657,
"grad_norm": 1.8488494105763427,
"learning_rate": 9.902307517458791e-06,
"loss": 0.5422,
"step": 4160
},
{
"epoch": 0.15716277842686466,
"grad_norm": 1.8661983415681136,
"learning_rate": 9.90100928042331e-06,
"loss": 0.5326,
"step": 4170
},
{
"epoch": 0.15753966758376362,
"grad_norm": 1.5515586490627968,
"learning_rate": 9.89970256032006e-06,
"loss": 0.5282,
"step": 4180
},
{
"epoch": 0.15791655674066257,
"grad_norm": 1.7714176130932602,
"learning_rate": 9.898387359410817e-06,
"loss": 0.576,
"step": 4190
},
{
"epoch": 0.15829344589756153,
"grad_norm": 1.76322105124278,
"learning_rate": 9.89706367997204e-06,
"loss": 0.578,
"step": 4200
},
{
"epoch": 0.1586703350544605,
"grad_norm": 1.8101667561564285,
"learning_rate": 9.89573152429486e-06,
"loss": 0.5534,
"step": 4210
},
{
"epoch": 0.15904722421135944,
"grad_norm": 1.4490662091941684,
"learning_rate": 9.894390894685082e-06,
"loss": 0.5771,
"step": 4220
},
{
"epoch": 0.1594241133682584,
"grad_norm": 1.540551931413724,
"learning_rate": 9.893041793463176e-06,
"loss": 0.5581,
"step": 4230
},
{
"epoch": 0.15980100252515736,
"grad_norm": 1.7528415875090508,
"learning_rate": 9.89168422296428e-06,
"loss": 0.5631,
"step": 4240
},
{
"epoch": 0.16017789168205632,
"grad_norm": 1.8668229751083296,
"learning_rate": 9.890318185538183e-06,
"loss": 0.5586,
"step": 4250
},
{
"epoch": 0.16055478083895527,
"grad_norm": 1.490437975787403,
"learning_rate": 9.88894368354934e-06,
"loss": 0.5478,
"step": 4260
},
{
"epoch": 0.16093166999585423,
"grad_norm": 1.8869524648789562,
"learning_rate": 9.887560719376848e-06,
"loss": 0.5419,
"step": 4270
},
{
"epoch": 0.16130855915275316,
"grad_norm": 1.7852914761656666,
"learning_rate": 9.886169295414454e-06,
"loss": 0.5486,
"step": 4280
},
{
"epoch": 0.16168544830965212,
"grad_norm": 1.770938471455256,
"learning_rate": 9.884769414070551e-06,
"loss": 0.5546,
"step": 4290
},
{
"epoch": 0.16206233746655108,
"grad_norm": 2.005194363321807,
"learning_rate": 9.883361077768166e-06,
"loss": 0.5842,
"step": 4300
},
{
"epoch": 0.16243922662345003,
"grad_norm": 1.6800086790787019,
"learning_rate": 9.881944288944964e-06,
"loss": 0.5463,
"step": 4310
},
{
"epoch": 0.162816115780349,
"grad_norm": 2.1202763345448115,
"learning_rate": 9.880519050053239e-06,
"loss": 0.5609,
"step": 4320
},
{
"epoch": 0.16319300493724795,
"grad_norm": 1.7584386869370296,
"learning_rate": 9.879085363559911e-06,
"loss": 0.5503,
"step": 4330
},
{
"epoch": 0.1635698940941469,
"grad_norm": 1.6620875457762982,
"learning_rate": 9.87764323194652e-06,
"loss": 0.5751,
"step": 4340
},
{
"epoch": 0.16394678325104586,
"grad_norm": 1.9926548707738148,
"learning_rate": 9.876192657709227e-06,
"loss": 0.5921,
"step": 4350
},
{
"epoch": 0.16432367240794482,
"grad_norm": 1.820283969890227,
"learning_rate": 9.874733643358806e-06,
"loss": 0.5522,
"step": 4360
},
{
"epoch": 0.16470056156484378,
"grad_norm": 1.3361373532537333,
"learning_rate": 9.873266191420635e-06,
"loss": 0.5341,
"step": 4370
},
{
"epoch": 0.16507745072174274,
"grad_norm": 1.5975055201627169,
"learning_rate": 9.8717903044347e-06,
"loss": 0.5282,
"step": 4380
},
{
"epoch": 0.1654543398786417,
"grad_norm": 1.607578610847404,
"learning_rate": 9.870305984955591e-06,
"loss": 0.545,
"step": 4390
},
{
"epoch": 0.16583122903554065,
"grad_norm": 1.458228796237765,
"learning_rate": 9.868813235552485e-06,
"loss": 0.4811,
"step": 4400
},
{
"epoch": 0.1662081181924396,
"grad_norm": 1.6414986429735758,
"learning_rate": 9.86731205880916e-06,
"loss": 0.5616,
"step": 4410
},
{
"epoch": 0.16658500734933857,
"grad_norm": 1.8317176023383224,
"learning_rate": 9.86580245732397e-06,
"loss": 0.559,
"step": 4420
},
{
"epoch": 0.16696189650623752,
"grad_norm": 1.7709755238978837,
"learning_rate": 9.864284433709859e-06,
"loss": 0.549,
"step": 4430
},
{
"epoch": 0.16733878566313648,
"grad_norm": 1.982880798943511,
"learning_rate": 9.862757990594348e-06,
"loss": 0.5296,
"step": 4440
},
{
"epoch": 0.16771567482003544,
"grad_norm": 1.7614598486822042,
"learning_rate": 9.861223130619525e-06,
"loss": 0.575,
"step": 4450
},
{
"epoch": 0.1680925639769344,
"grad_norm": 1.3611867438499565,
"learning_rate": 9.859679856442058e-06,
"loss": 0.5224,
"step": 4460
},
{
"epoch": 0.16846945313383335,
"grad_norm": 4.687044934475357,
"learning_rate": 9.858128170733166e-06,
"loss": 0.5552,
"step": 4470
},
{
"epoch": 0.16884634229073228,
"grad_norm": 1.59571418195565,
"learning_rate": 9.856568076178637e-06,
"loss": 0.5229,
"step": 4480
},
{
"epoch": 0.16922323144763124,
"grad_norm": 2.3788819118048736,
"learning_rate": 9.85499957547881e-06,
"loss": 0.565,
"step": 4490
},
{
"epoch": 0.1696001206045302,
"grad_norm": 1.8329399955214953,
"learning_rate": 9.853422671348573e-06,
"loss": 0.5343,
"step": 4500
},
{
"epoch": 0.16997700976142915,
"grad_norm": 1.7468489996738379,
"learning_rate": 9.85183736651736e-06,
"loss": 0.5201,
"step": 4510
},
{
"epoch": 0.1703538989183281,
"grad_norm": 1.6120690658450187,
"learning_rate": 9.850243663729151e-06,
"loss": 0.5325,
"step": 4520
},
{
"epoch": 0.17073078807522707,
"grad_norm": 1.676405391419548,
"learning_rate": 9.848641565742451e-06,
"loss": 0.5358,
"step": 4530
},
{
"epoch": 0.17110767723212603,
"grad_norm": 1.5692375310803846,
"learning_rate": 9.847031075330305e-06,
"loss": 0.5469,
"step": 4540
},
{
"epoch": 0.17148456638902498,
"grad_norm": 1.7013387896565297,
"learning_rate": 9.845412195280283e-06,
"loss": 0.5415,
"step": 4550
},
{
"epoch": 0.17186145554592394,
"grad_norm": 1.6239671776400033,
"learning_rate": 9.843784928394473e-06,
"loss": 0.5337,
"step": 4560
},
{
"epoch": 0.1722383447028229,
"grad_norm": 1.7752808165423049,
"learning_rate": 9.842149277489482e-06,
"loss": 0.5581,
"step": 4570
},
{
"epoch": 0.17261523385972186,
"grad_norm": 1.6381139718605806,
"learning_rate": 9.840505245396428e-06,
"loss": 0.5512,
"step": 4580
},
{
"epoch": 0.1729921230166208,
"grad_norm": 1.3980621024689306,
"learning_rate": 9.838852834960937e-06,
"loss": 0.5475,
"step": 4590
},
{
"epoch": 0.17336901217351977,
"grad_norm": 1.8099458012944554,
"learning_rate": 9.837192049043138e-06,
"loss": 0.5288,
"step": 4600
},
{
"epoch": 0.17374590133041873,
"grad_norm": 1.7008772187496575,
"learning_rate": 9.83552289051765e-06,
"loss": 0.5559,
"step": 4610
},
{
"epoch": 0.17412279048731769,
"grad_norm": 7.078798933085531,
"learning_rate": 9.833845362273595e-06,
"loss": 0.538,
"step": 4620
},
{
"epoch": 0.17449967964421664,
"grad_norm": 1.8058836761237198,
"learning_rate": 9.832159467214571e-06,
"loss": 0.5366,
"step": 4630
},
{
"epoch": 0.1748765688011156,
"grad_norm": 1.6224697567837014,
"learning_rate": 9.830465208258667e-06,
"loss": 0.5315,
"step": 4640
},
{
"epoch": 0.17525345795801456,
"grad_norm": 1.9433130297018304,
"learning_rate": 9.828762588338442e-06,
"loss": 0.5412,
"step": 4650
},
{
"epoch": 0.17563034711491352,
"grad_norm": 1.6991522803860752,
"learning_rate": 9.827051610400933e-06,
"loss": 0.5403,
"step": 4660
},
{
"epoch": 0.17600723627181247,
"grad_norm": 1.705218216007172,
"learning_rate": 9.825332277407637e-06,
"loss": 0.5556,
"step": 4670
},
{
"epoch": 0.1763841254287114,
"grad_norm": 1.688592333353397,
"learning_rate": 9.823604592334519e-06,
"loss": 0.551,
"step": 4680
},
{
"epoch": 0.17676101458561036,
"grad_norm": 1.41612023446685,
"learning_rate": 9.821868558171996e-06,
"loss": 0.5173,
"step": 4690
},
{
"epoch": 0.17713790374250932,
"grad_norm": 1.6816541249040635,
"learning_rate": 9.820124177924939e-06,
"loss": 0.5428,
"step": 4700
},
{
"epoch": 0.17751479289940827,
"grad_norm": 1.3749122464689933,
"learning_rate": 9.818371454612664e-06,
"loss": 0.5341,
"step": 4710
},
{
"epoch": 0.17789168205630723,
"grad_norm": 1.426068903641529,
"learning_rate": 9.816610391268927e-06,
"loss": 0.4913,
"step": 4720
},
{
"epoch": 0.1782685712132062,
"grad_norm": 1.5924856232245717,
"learning_rate": 9.814840990941921e-06,
"loss": 0.541,
"step": 4730
},
{
"epoch": 0.17864546037010515,
"grad_norm": 1.4530573814344718,
"learning_rate": 9.813063256694268e-06,
"loss": 0.5088,
"step": 4740
},
{
"epoch": 0.1790223495270041,
"grad_norm": 1.8559077656986256,
"learning_rate": 9.811277191603018e-06,
"loss": 0.5432,
"step": 4750
},
{
"epoch": 0.17939923868390306,
"grad_norm": 1.668223036589219,
"learning_rate": 9.809482798759636e-06,
"loss": 0.5257,
"step": 4760
},
{
"epoch": 0.17977612784080202,
"grad_norm": 1.6197394995482917,
"learning_rate": 9.807680081270005e-06,
"loss": 0.5453,
"step": 4770
},
{
"epoch": 0.18015301699770098,
"grad_norm": 1.7676016632355371,
"learning_rate": 9.805869042254419e-06,
"loss": 0.5454,
"step": 4780
},
{
"epoch": 0.18052990615459993,
"grad_norm": 1.630694868729753,
"learning_rate": 9.804049684847566e-06,
"loss": 0.5327,
"step": 4790
},
{
"epoch": 0.1809067953114989,
"grad_norm": 1.7202050190497062,
"learning_rate": 9.802222012198543e-06,
"loss": 0.5609,
"step": 4800
},
{
"epoch": 0.18128368446839785,
"grad_norm": 1.6203695895840649,
"learning_rate": 9.800386027470836e-06,
"loss": 0.5196,
"step": 4810
},
{
"epoch": 0.1816605736252968,
"grad_norm": 1.7217972266536141,
"learning_rate": 9.798541733842315e-06,
"loss": 0.5357,
"step": 4820
},
{
"epoch": 0.18203746278219576,
"grad_norm": 1.653697053819612,
"learning_rate": 9.796689134505234e-06,
"loss": 0.5222,
"step": 4830
},
{
"epoch": 0.18241435193909472,
"grad_norm": 1.5596182552523383,
"learning_rate": 9.794828232666227e-06,
"loss": 0.5552,
"step": 4840
},
{
"epoch": 0.18279124109599368,
"grad_norm": 1.867472446965036,
"learning_rate": 9.79295903154629e-06,
"loss": 0.5339,
"step": 4850
},
{
"epoch": 0.18316813025289264,
"grad_norm": 1.6702383439481707,
"learning_rate": 9.791081534380796e-06,
"loss": 0.5151,
"step": 4860
},
{
"epoch": 0.1835450194097916,
"grad_norm": 1.3291743718971485,
"learning_rate": 9.789195744419463e-06,
"loss": 0.5607,
"step": 4870
},
{
"epoch": 0.18392190856669052,
"grad_norm": 1.5921907230443966,
"learning_rate": 9.787301664926376e-06,
"loss": 0.5339,
"step": 4880
},
{
"epoch": 0.18429879772358948,
"grad_norm": 1.8736268796358457,
"learning_rate": 9.78539929917996e-06,
"loss": 0.5302,
"step": 4890
},
{
"epoch": 0.18467568688048844,
"grad_norm": 1.8805034291822773,
"learning_rate": 9.783488650472988e-06,
"loss": 0.5445,
"step": 4900
},
{
"epoch": 0.1850525760373874,
"grad_norm": 1.7419614721480812,
"learning_rate": 9.781569722112564e-06,
"loss": 0.5428,
"step": 4910
},
{
"epoch": 0.18542946519428635,
"grad_norm": 1.9542910752412086,
"learning_rate": 9.779642517420129e-06,
"loss": 0.538,
"step": 4920
},
{
"epoch": 0.1858063543511853,
"grad_norm": 1.5668762657315158,
"learning_rate": 9.777707039731443e-06,
"loss": 0.5516,
"step": 4930
},
{
"epoch": 0.18618324350808427,
"grad_norm": 1.603854576231463,
"learning_rate": 9.775763292396591e-06,
"loss": 0.5535,
"step": 4940
},
{
"epoch": 0.18656013266498322,
"grad_norm": 1.675798754667643,
"learning_rate": 9.773811278779972e-06,
"loss": 0.524,
"step": 4950
},
{
"epoch": 0.18693702182188218,
"grad_norm": 2.348035421509291,
"learning_rate": 9.771851002260288e-06,
"loss": 0.5478,
"step": 4960
},
{
"epoch": 0.18731391097878114,
"grad_norm": 1.6072598485464797,
"learning_rate": 9.769882466230546e-06,
"loss": 0.5287,
"step": 4970
},
{
"epoch": 0.1876908001356801,
"grad_norm": 1.746528796476459,
"learning_rate": 9.767905674098051e-06,
"loss": 0.5475,
"step": 4980
},
{
"epoch": 0.18806768929257905,
"grad_norm": 1.386585738427929,
"learning_rate": 9.765920629284396e-06,
"loss": 0.5098,
"step": 4990
},
{
"epoch": 0.188444578449478,
"grad_norm": 1.8437041912808763,
"learning_rate": 9.763927335225458e-06,
"loss": 0.5286,
"step": 5000
},
{
"epoch": 0.18882146760637697,
"grad_norm": 1.9482474264160459,
"learning_rate": 9.761925795371394e-06,
"loss": 0.5442,
"step": 5010
},
{
"epoch": 0.18919835676327593,
"grad_norm": 1.7719952789842508,
"learning_rate": 9.759916013186633e-06,
"loss": 0.5461,
"step": 5020
},
{
"epoch": 0.18957524592017488,
"grad_norm": 1.6946989567888198,
"learning_rate": 9.757897992149868e-06,
"loss": 0.5798,
"step": 5030
},
{
"epoch": 0.18995213507707384,
"grad_norm": 1.4374415950854045,
"learning_rate": 9.755871735754058e-06,
"loss": 0.5406,
"step": 5040
},
{
"epoch": 0.1903290242339728,
"grad_norm": 1.6824526905803994,
"learning_rate": 9.753837247506415e-06,
"loss": 0.574,
"step": 5050
},
{
"epoch": 0.19070591339087176,
"grad_norm": 1.394126270724554,
"learning_rate": 9.751794530928394e-06,
"loss": 0.5327,
"step": 5060
},
{
"epoch": 0.1910828025477707,
"grad_norm": 1.8155055295556857,
"learning_rate": 9.749743589555696e-06,
"loss": 0.5551,
"step": 5070
},
{
"epoch": 0.19145969170466964,
"grad_norm": 1.6444154255861156,
"learning_rate": 9.747684426938259e-06,
"loss": 0.5324,
"step": 5080
},
{
"epoch": 0.1918365808615686,
"grad_norm": 1.693478657126227,
"learning_rate": 9.74561704664025e-06,
"loss": 0.5293,
"step": 5090
},
{
"epoch": 0.19221347001846756,
"grad_norm": 1.7991179686677223,
"learning_rate": 9.743541452240062e-06,
"loss": 0.5306,
"step": 5100
},
{
"epoch": 0.19259035917536652,
"grad_norm": 1.6938448201059806,
"learning_rate": 9.7414576473303e-06,
"loss": 0.5557,
"step": 5110
},
{
"epoch": 0.19296724833226547,
"grad_norm": 1.708193540793155,
"learning_rate": 9.739365635517786e-06,
"loss": 0.5527,
"step": 5120
},
{
"epoch": 0.19334413748916443,
"grad_norm": 1.623469387205484,
"learning_rate": 9.737265420423545e-06,
"loss": 0.5677,
"step": 5130
},
{
"epoch": 0.1937210266460634,
"grad_norm": 1.5336259453452732,
"learning_rate": 9.735157005682802e-06,
"loss": 0.5304,
"step": 5140
},
{
"epoch": 0.19409791580296235,
"grad_norm": 1.6029263301801304,
"learning_rate": 9.733040394944972e-06,
"loss": 0.5036,
"step": 5150
},
{
"epoch": 0.1944748049598613,
"grad_norm": 1.4027867586519995,
"learning_rate": 9.73091559187366e-06,
"loss": 0.5571,
"step": 5160
},
{
"epoch": 0.19485169411676026,
"grad_norm": 1.422018727809762,
"learning_rate": 9.728782600146646e-06,
"loss": 0.512,
"step": 5170
},
{
"epoch": 0.19522858327365922,
"grad_norm": 1.7086928414235798,
"learning_rate": 9.726641423455889e-06,
"loss": 0.5479,
"step": 5180
},
{
"epoch": 0.19560547243055817,
"grad_norm": 1.4865928775148851,
"learning_rate": 9.724492065507512e-06,
"loss": 0.5303,
"step": 5190
},
{
"epoch": 0.19598236158745713,
"grad_norm": 1.7652350174862295,
"learning_rate": 9.722334530021798e-06,
"loss": 0.568,
"step": 5200
},
{
"epoch": 0.1963592507443561,
"grad_norm": 1.6654751000696695,
"learning_rate": 9.720168820733189e-06,
"loss": 0.5425,
"step": 5210
},
{
"epoch": 0.19673613990125505,
"grad_norm": 1.6237026180449996,
"learning_rate": 9.717994941390269e-06,
"loss": 0.5256,
"step": 5220
},
{
"epoch": 0.197113029058154,
"grad_norm": 1.4809115306845473,
"learning_rate": 9.71581289575577e-06,
"loss": 0.5559,
"step": 5230
},
{
"epoch": 0.19748991821505296,
"grad_norm": 1.6736674947367551,
"learning_rate": 9.71362268760655e-06,
"loss": 0.5379,
"step": 5240
},
{
"epoch": 0.19786680737195192,
"grad_norm": 1.6771556923236743,
"learning_rate": 9.711424320733605e-06,
"loss": 0.5539,
"step": 5250
},
{
"epoch": 0.19824369652885088,
"grad_norm": 1.7283773691144386,
"learning_rate": 9.709217798942045e-06,
"loss": 0.5289,
"step": 5260
},
{
"epoch": 0.19862058568574983,
"grad_norm": 1.4438252244390533,
"learning_rate": 9.7070031260511e-06,
"loss": 0.5562,
"step": 5270
},
{
"epoch": 0.19899747484264876,
"grad_norm": 1.612252201951247,
"learning_rate": 9.704780305894107e-06,
"loss": 0.5508,
"step": 5280
},
{
"epoch": 0.19937436399954772,
"grad_norm": 1.520592481703519,
"learning_rate": 9.702549342318503e-06,
"loss": 0.522,
"step": 5290
},
{
"epoch": 0.19975125315644668,
"grad_norm": 1.6900037644390822,
"learning_rate": 9.700310239185824e-06,
"loss": 0.5495,
"step": 5300
},
{
"epoch": 0.20012814231334564,
"grad_norm": 1.709571109702106,
"learning_rate": 9.698063000371693e-06,
"loss": 0.5348,
"step": 5310
},
{
"epoch": 0.2005050314702446,
"grad_norm": 1.730178730299619,
"learning_rate": 9.695807629765815e-06,
"loss": 0.5141,
"step": 5320
},
{
"epoch": 0.20088192062714355,
"grad_norm": 1.6296835391691085,
"learning_rate": 9.69354413127197e-06,
"loss": 0.5361,
"step": 5330
},
{
"epoch": 0.2012588097840425,
"grad_norm": 1.6987810718130525,
"learning_rate": 9.691272508808006e-06,
"loss": 0.5452,
"step": 5340
},
{
"epoch": 0.20163569894094147,
"grad_norm": 1.650234117971004,
"learning_rate": 9.68899276630583e-06,
"loss": 0.5197,
"step": 5350
},
{
"epoch": 0.20201258809784042,
"grad_norm": 1.7189731868307763,
"learning_rate": 9.68670490771141e-06,
"loss": 0.5582,
"step": 5360
},
{
"epoch": 0.20238947725473938,
"grad_norm": 1.629642772885024,
"learning_rate": 9.68440893698476e-06,
"loss": 0.5467,
"step": 5370
},
{
"epoch": 0.20276636641163834,
"grad_norm": 1.714044057568933,
"learning_rate": 9.682104858099932e-06,
"loss": 0.5586,
"step": 5380
},
{
"epoch": 0.2031432555685373,
"grad_norm": 1.7776904536246745,
"learning_rate": 9.679792675045015e-06,
"loss": 0.5527,
"step": 5390
},
{
"epoch": 0.20352014472543625,
"grad_norm": 1.8482409209459776,
"learning_rate": 9.677472391822122e-06,
"loss": 0.5444,
"step": 5400
},
{
"epoch": 0.2038970338823352,
"grad_norm": 2.039065180414514,
"learning_rate": 9.675144012447393e-06,
"loss": 0.5495,
"step": 5410
},
{
"epoch": 0.20427392303923417,
"grad_norm": 1.6471101788933047,
"learning_rate": 9.672807540950976e-06,
"loss": 0.5304,
"step": 5420
},
{
"epoch": 0.20465081219613313,
"grad_norm": 1.7782838901225566,
"learning_rate": 9.670462981377024e-06,
"loss": 0.539,
"step": 5430
},
{
"epoch": 0.20502770135303208,
"grad_norm": 1.5599236676909736,
"learning_rate": 9.668110337783696e-06,
"loss": 0.5278,
"step": 5440
},
{
"epoch": 0.20540459050993104,
"grad_norm": 1.7551032509348412,
"learning_rate": 9.665749614243139e-06,
"loss": 0.5173,
"step": 5450
},
{
"epoch": 0.20578147966683,
"grad_norm": 1.5194286251590126,
"learning_rate": 9.663380814841487e-06,
"loss": 0.5209,
"step": 5460
},
{
"epoch": 0.20615836882372895,
"grad_norm": 1.9229370370271754,
"learning_rate": 9.66100394367885e-06,
"loss": 0.5366,
"step": 5470
},
{
"epoch": 0.20653525798062788,
"grad_norm": 1.6741605984890093,
"learning_rate": 9.658619004869314e-06,
"loss": 0.5479,
"step": 5480
},
{
"epoch": 0.20691214713752684,
"grad_norm": 1.5030012618627315,
"learning_rate": 9.656226002540923e-06,
"loss": 0.5151,
"step": 5490
},
{
"epoch": 0.2072890362944258,
"grad_norm": 1.8829102820108654,
"learning_rate": 9.653824940835683e-06,
"loss": 0.5398,
"step": 5500
},
{
"epoch": 0.20766592545132476,
"grad_norm": 1.7502902417744142,
"learning_rate": 9.651415823909547e-06,
"loss": 0.5356,
"step": 5510
},
{
"epoch": 0.20804281460822371,
"grad_norm": 1.8452401375252057,
"learning_rate": 9.648998655932414e-06,
"loss": 0.5392,
"step": 5520
},
{
"epoch": 0.20841970376512267,
"grad_norm": 1.6582272984116684,
"learning_rate": 9.646573441088112e-06,
"loss": 0.5128,
"step": 5530
},
{
"epoch": 0.20879659292202163,
"grad_norm": 1.5467497129190437,
"learning_rate": 9.644140183574407e-06,
"loss": 0.5551,
"step": 5540
},
{
"epoch": 0.2091734820789206,
"grad_norm": 1.9957256803945376,
"learning_rate": 9.641698887602973e-06,
"loss": 0.5658,
"step": 5550
},
{
"epoch": 0.20955037123581954,
"grad_norm": 1.6565250884502132,
"learning_rate": 9.63924955739941e-06,
"loss": 0.5103,
"step": 5560
},
{
"epoch": 0.2099272603927185,
"grad_norm": 1.730711290180862,
"learning_rate": 9.636792197203218e-06,
"loss": 0.5311,
"step": 5570
},
{
"epoch": 0.21030414954961746,
"grad_norm": 1.6469079302286813,
"learning_rate": 9.634326811267796e-06,
"loss": 0.558,
"step": 5580
},
{
"epoch": 0.21068103870651642,
"grad_norm": 2.1727142231298737,
"learning_rate": 9.631853403860437e-06,
"loss": 0.5467,
"step": 5590
},
{
"epoch": 0.21105792786341537,
"grad_norm": 1.7971977696668728,
"learning_rate": 9.629371979262314e-06,
"loss": 0.5043,
"step": 5600
},
{
"epoch": 0.21143481702031433,
"grad_norm": 10.80688874918979,
"learning_rate": 9.626882541768484e-06,
"loss": 0.5577,
"step": 5610
},
{
"epoch": 0.2118117061772133,
"grad_norm": 2.323690498795392,
"learning_rate": 9.624385095687865e-06,
"loss": 0.571,
"step": 5620
},
{
"epoch": 0.21218859533411225,
"grad_norm": 1.719967739675491,
"learning_rate": 9.621879645343245e-06,
"loss": 0.5411,
"step": 5630
},
{
"epoch": 0.2125654844910112,
"grad_norm": 1.532181810090769,
"learning_rate": 9.619366195071258e-06,
"loss": 0.5415,
"step": 5640
},
{
"epoch": 0.21294237364791016,
"grad_norm": 1.7442472684705252,
"learning_rate": 9.616844749222391e-06,
"loss": 0.555,
"step": 5650
},
{
"epoch": 0.21331926280480912,
"grad_norm": 1.8977510610017796,
"learning_rate": 9.614315312160972e-06,
"loss": 0.5259,
"step": 5660
},
{
"epoch": 0.21369615196170808,
"grad_norm": 1.5012868113061877,
"learning_rate": 9.611777888265153e-06,
"loss": 0.5602,
"step": 5670
},
{
"epoch": 0.214073041118607,
"grad_norm": 1.7027261474156072,
"learning_rate": 9.609232481926917e-06,
"loss": 0.4989,
"step": 5680
},
{
"epoch": 0.21444993027550596,
"grad_norm": 1.6913027886629168,
"learning_rate": 9.606679097552061e-06,
"loss": 0.5414,
"step": 5690
},
{
"epoch": 0.21482681943240492,
"grad_norm": 1.7822692679518921,
"learning_rate": 9.604117739560192e-06,
"loss": 0.5307,
"step": 5700
},
{
"epoch": 0.21520370858930388,
"grad_norm": 1.7521449233376833,
"learning_rate": 9.601548412384718e-06,
"loss": 0.5366,
"step": 5710
},
{
"epoch": 0.21558059774620283,
"grad_norm": 1.4927823655216998,
"learning_rate": 9.59897112047284e-06,
"loss": 0.5457,
"step": 5720
},
{
"epoch": 0.2159574869031018,
"grad_norm": 1.5719965609870012,
"learning_rate": 9.596385868285547e-06,
"loss": 0.5744,
"step": 5730
},
{
"epoch": 0.21633437606000075,
"grad_norm": 1.6228135876923027,
"learning_rate": 9.593792660297603e-06,
"loss": 0.5361,
"step": 5740
},
{
"epoch": 0.2167112652168997,
"grad_norm": 1.869501122929772,
"learning_rate": 9.591191500997545e-06,
"loss": 0.5824,
"step": 5750
},
{
"epoch": 0.21708815437379866,
"grad_norm": 1.6116057455831496,
"learning_rate": 9.588582394887674e-06,
"loss": 0.5208,
"step": 5760
},
{
"epoch": 0.21746504353069762,
"grad_norm": 1.7598646289067172,
"learning_rate": 9.58596534648404e-06,
"loss": 0.5172,
"step": 5770
},
{
"epoch": 0.21784193268759658,
"grad_norm": 1.459229389644524,
"learning_rate": 9.583340360316452e-06,
"loss": 0.5247,
"step": 5780
},
{
"epoch": 0.21821882184449554,
"grad_norm": 1.3090949493088406,
"learning_rate": 9.580707440928442e-06,
"loss": 0.5795,
"step": 5790
},
{
"epoch": 0.2185957110013945,
"grad_norm": 1.6277194603630043,
"learning_rate": 9.578066592877289e-06,
"loss": 0.5192,
"step": 5800
},
{
"epoch": 0.21897260015829345,
"grad_norm": 1.3790098723023494,
"learning_rate": 9.575417820733985e-06,
"loss": 0.5348,
"step": 5810
},
{
"epoch": 0.2193494893151924,
"grad_norm": 1.3968735415508085,
"learning_rate": 9.572761129083245e-06,
"loss": 0.512,
"step": 5820
},
{
"epoch": 0.21972637847209137,
"grad_norm": 1.7812282571098073,
"learning_rate": 9.570096522523484e-06,
"loss": 0.543,
"step": 5830
},
{
"epoch": 0.22010326762899032,
"grad_norm": 1.834347893716607,
"learning_rate": 9.567424005666825e-06,
"loss": 0.5329,
"step": 5840
},
{
"epoch": 0.22048015678588928,
"grad_norm": 1.8129108228485702,
"learning_rate": 9.564743583139076e-06,
"loss": 0.5384,
"step": 5850
},
{
"epoch": 0.22085704594278824,
"grad_norm": 1.6223887647007407,
"learning_rate": 9.562055259579731e-06,
"loss": 0.5268,
"step": 5860
},
{
"epoch": 0.2212339350996872,
"grad_norm": 1.606954920038498,
"learning_rate": 9.559359039641962e-06,
"loss": 0.5414,
"step": 5870
},
{
"epoch": 0.22161082425658613,
"grad_norm": 1.629843365300857,
"learning_rate": 9.556654927992609e-06,
"loss": 0.5242,
"step": 5880
},
{
"epoch": 0.22198771341348508,
"grad_norm": 1.7527089646543905,
"learning_rate": 9.553942929312166e-06,
"loss": 0.5272,
"step": 5890
},
{
"epoch": 0.22236460257038404,
"grad_norm": 1.6581171602617495,
"learning_rate": 9.551223048294785e-06,
"loss": 0.5116,
"step": 5900
},
{
"epoch": 0.222741491727283,
"grad_norm": 2.235545487251775,
"learning_rate": 9.548495289648254e-06,
"loss": 0.5385,
"step": 5910
},
{
"epoch": 0.22311838088418195,
"grad_norm": 2.0034168319325243,
"learning_rate": 9.545759658094007e-06,
"loss": 0.5557,
"step": 5920
},
{
"epoch": 0.2234952700410809,
"grad_norm": 1.4966777192122558,
"learning_rate": 9.543016158367093e-06,
"loss": 0.5405,
"step": 5930
},
{
"epoch": 0.22387215919797987,
"grad_norm": 1.7766848501696213,
"learning_rate": 9.540264795216192e-06,
"loss": 0.5104,
"step": 5940
},
{
"epoch": 0.22424904835487883,
"grad_norm": 1.6670384095637465,
"learning_rate": 9.537505573403582e-06,
"loss": 0.5436,
"step": 5950
},
{
"epoch": 0.22462593751177778,
"grad_norm": 1.5467554161425514,
"learning_rate": 9.534738497705153e-06,
"loss": 0.557,
"step": 5960
},
{
"epoch": 0.22500282666867674,
"grad_norm": 1.7813974332712823,
"learning_rate": 9.531963572910388e-06,
"loss": 0.559,
"step": 5970
},
{
"epoch": 0.2253797158255757,
"grad_norm": 1.7053744486978277,
"learning_rate": 9.529180803822351e-06,
"loss": 0.5489,
"step": 5980
},
{
"epoch": 0.22575660498247466,
"grad_norm": 1.243354982475666,
"learning_rate": 9.526390195257686e-06,
"loss": 0.4922,
"step": 5990
},
{
"epoch": 0.22613349413937361,
"grad_norm": 1.8099541389369207,
"learning_rate": 9.523591752046608e-06,
"loss": 0.5443,
"step": 6000
},
{
"epoch": 0.22651038329627257,
"grad_norm": 1.6063302471417757,
"learning_rate": 9.52078547903289e-06,
"loss": 0.519,
"step": 6010
},
{
"epoch": 0.22688727245317153,
"grad_norm": 1.480600706868146,
"learning_rate": 9.51797138107386e-06,
"loss": 0.516,
"step": 6020
},
{
"epoch": 0.2272641616100705,
"grad_norm": 1.4421735699209184,
"learning_rate": 9.51514946304039e-06,
"loss": 0.5507,
"step": 6030
},
{
"epoch": 0.22764105076696944,
"grad_norm": 1.6185750934964662,
"learning_rate": 9.512319729816886e-06,
"loss": 0.5155,
"step": 6040
},
{
"epoch": 0.2280179399238684,
"grad_norm": 1.6957212178717491,
"learning_rate": 9.50948218630128e-06,
"loss": 0.5272,
"step": 6050
},
{
"epoch": 0.22839482908076736,
"grad_norm": 1.7169594191615623,
"learning_rate": 9.506636837405025e-06,
"loss": 0.5314,
"step": 6060
},
{
"epoch": 0.22877171823766632,
"grad_norm": 1.5477125080399536,
"learning_rate": 9.503783688053085e-06,
"loss": 0.5431,
"step": 6070
},
{
"epoch": 0.22914860739456525,
"grad_norm": 1.6785046483352886,
"learning_rate": 9.500922743183922e-06,
"loss": 0.5126,
"step": 6080
},
{
"epoch": 0.2295254965514642,
"grad_norm": 1.890254359102995,
"learning_rate": 9.498054007749498e-06,
"loss": 0.5337,
"step": 6090
},
{
"epoch": 0.22990238570836316,
"grad_norm": 1.643088361181434,
"learning_rate": 9.495177486715247e-06,
"loss": 0.5793,
"step": 6100
},
{
"epoch": 0.23027927486526212,
"grad_norm": 1.735778264496379,
"learning_rate": 9.492293185060095e-06,
"loss": 0.5551,
"step": 6110
},
{
"epoch": 0.23065616402216108,
"grad_norm": 1.4390586582782128,
"learning_rate": 9.489401107776425e-06,
"loss": 0.5262,
"step": 6120
},
{
"epoch": 0.23103305317906003,
"grad_norm": 1.8374375522238977,
"learning_rate": 9.486501259870078e-06,
"loss": 0.5526,
"step": 6130
},
{
"epoch": 0.231409942335959,
"grad_norm": 1.279671922211445,
"learning_rate": 9.48359364636035e-06,
"loss": 0.5341,
"step": 6140
},
{
"epoch": 0.23178683149285795,
"grad_norm": 1.628189415008674,
"learning_rate": 9.480678272279976e-06,
"loss": 0.5088,
"step": 6150
},
{
"epoch": 0.2321637206497569,
"grad_norm": 1.6997013770455123,
"learning_rate": 9.477755142675125e-06,
"loss": 0.5629,
"step": 6160
},
{
"epoch": 0.23254060980665586,
"grad_norm": 3.5623010997302944,
"learning_rate": 9.474824262605386e-06,
"loss": 0.5488,
"step": 6170
},
{
"epoch": 0.23291749896355482,
"grad_norm": 1.7466850826897597,
"learning_rate": 9.47188563714377e-06,
"loss": 0.534,
"step": 6180
},
{
"epoch": 0.23329438812045378,
"grad_norm": 1.6695888230856442,
"learning_rate": 9.468939271376688e-06,
"loss": 0.5062,
"step": 6190
},
{
"epoch": 0.23367127727735273,
"grad_norm": 1.835339155823516,
"learning_rate": 9.46598517040395e-06,
"loss": 0.5274,
"step": 6200
},
{
"epoch": 0.2340481664342517,
"grad_norm": 1.6711258470843053,
"learning_rate": 9.463023339338758e-06,
"loss": 0.5395,
"step": 6210
},
{
"epoch": 0.23442505559115065,
"grad_norm": 1.7881856636235314,
"learning_rate": 9.46005378330769e-06,
"loss": 0.546,
"step": 6220
},
{
"epoch": 0.2348019447480496,
"grad_norm": 1.6790671962686543,
"learning_rate": 9.457076507450697e-06,
"loss": 0.5061,
"step": 6230
},
{
"epoch": 0.23517883390494856,
"grad_norm": 1.3732357055205477,
"learning_rate": 9.45409151692109e-06,
"loss": 0.521,
"step": 6240
},
{
"epoch": 0.23555572306184752,
"grad_norm": 1.7725150216025622,
"learning_rate": 9.451098816885538e-06,
"loss": 0.5429,
"step": 6250
},
{
"epoch": 0.23593261221874648,
"grad_norm": 1.714678055331704,
"learning_rate": 9.448098412524047e-06,
"loss": 0.5266,
"step": 6260
},
{
"epoch": 0.2363095013756454,
"grad_norm": 1.8750459001067499,
"learning_rate": 9.445090309029965e-06,
"loss": 0.5624,
"step": 6270
},
{
"epoch": 0.23668639053254437,
"grad_norm": 1.7380553727353028,
"learning_rate": 9.442074511609965e-06,
"loss": 0.5223,
"step": 6280
},
{
"epoch": 0.23706327968944332,
"grad_norm": 1.6267385236512102,
"learning_rate": 9.439051025484032e-06,
"loss": 0.5341,
"step": 6290
},
{
"epoch": 0.23744016884634228,
"grad_norm": 1.831598701186007,
"learning_rate": 9.436019855885466e-06,
"loss": 0.5406,
"step": 6300
},
{
"epoch": 0.23781705800324124,
"grad_norm": 2.6302827853025486,
"learning_rate": 9.432981008060861e-06,
"loss": 0.5331,
"step": 6310
},
{
"epoch": 0.2381939471601402,
"grad_norm": 1.2440710954030016,
"learning_rate": 9.429934487270105e-06,
"loss": 0.5129,
"step": 6320
},
{
"epoch": 0.23857083631703915,
"grad_norm": 1.637294830817524,
"learning_rate": 9.426880298786366e-06,
"loss": 0.539,
"step": 6330
},
{
"epoch": 0.2389477254739381,
"grad_norm": 1.446625909816762,
"learning_rate": 9.423818447896081e-06,
"loss": 0.505,
"step": 6340
},
{
"epoch": 0.23932461463083707,
"grad_norm": 2.0064803572085936,
"learning_rate": 9.420748939898955e-06,
"loss": 0.5741,
"step": 6350
},
{
"epoch": 0.23970150378773603,
"grad_norm": 1.9422133263523575,
"learning_rate": 9.417671780107941e-06,
"loss": 0.5398,
"step": 6360
},
{
"epoch": 0.24007839294463498,
"grad_norm": 1.7232411604328788,
"learning_rate": 9.414586973849241e-06,
"loss": 0.5293,
"step": 6370
},
{
"epoch": 0.24045528210153394,
"grad_norm": 1.6201469087371054,
"learning_rate": 9.411494526462286e-06,
"loss": 0.5269,
"step": 6380
},
{
"epoch": 0.2408321712584329,
"grad_norm": 1.70158816865489,
"learning_rate": 9.408394443299743e-06,
"loss": 0.5716,
"step": 6390
},
{
"epoch": 0.24120906041533186,
"grad_norm": 1.837108473006738,
"learning_rate": 9.405286729727483e-06,
"loss": 0.5208,
"step": 6400
},
{
"epoch": 0.2415859495722308,
"grad_norm": 1.7073086270634739,
"learning_rate": 9.402171391124597e-06,
"loss": 0.5428,
"step": 6410
},
{
"epoch": 0.24196283872912977,
"grad_norm": 3.051981097028862,
"learning_rate": 9.399048432883363e-06,
"loss": 0.5158,
"step": 6420
},
{
"epoch": 0.24233972788602873,
"grad_norm": 1.7864500392491454,
"learning_rate": 9.395917860409255e-06,
"loss": 0.5241,
"step": 6430
},
{
"epoch": 0.24271661704292768,
"grad_norm": 1.5968823208409386,
"learning_rate": 9.392779679120924e-06,
"loss": 0.5214,
"step": 6440
},
{
"epoch": 0.24309350619982664,
"grad_norm": 1.5815922668690165,
"learning_rate": 9.38963389445019e-06,
"loss": 0.5503,
"step": 6450
},
{
"epoch": 0.2434703953567256,
"grad_norm": 1.6823529848515881,
"learning_rate": 9.386480511842035e-06,
"loss": 0.5109,
"step": 6460
},
{
"epoch": 0.24384728451362453,
"grad_norm": 1.7524818808137763,
"learning_rate": 9.38331953675459e-06,
"loss": 0.5373,
"step": 6470
},
{
"epoch": 0.2442241736705235,
"grad_norm": 1.5806519372358918,
"learning_rate": 9.380150974659132e-06,
"loss": 0.5259,
"step": 6480
},
{
"epoch": 0.24460106282742244,
"grad_norm": 1.7916689002740533,
"learning_rate": 9.376974831040066e-06,
"loss": 0.5658,
"step": 6490
},
{
"epoch": 0.2449779519843214,
"grad_norm": 1.7760513276744359,
"learning_rate": 9.373791111394921e-06,
"loss": 0.5571,
"step": 6500
},
{
"epoch": 0.24535484114122036,
"grad_norm": 1.531656356531642,
"learning_rate": 9.37059982123434e-06,
"loss": 0.5324,
"step": 6510
},
{
"epoch": 0.24573173029811932,
"grad_norm": 1.7434004811232835,
"learning_rate": 9.367400966082067e-06,
"loss": 0.5402,
"step": 6520
},
{
"epoch": 0.24610861945501827,
"grad_norm": 1.9091316224501476,
"learning_rate": 9.364194551474947e-06,
"loss": 0.5314,
"step": 6530
},
{
"epoch": 0.24648550861191723,
"grad_norm": 1.4637280981673166,
"learning_rate": 9.3609805829629e-06,
"loss": 0.5214,
"step": 6540
},
{
"epoch": 0.2468623977688162,
"grad_norm": 1.6182959856878305,
"learning_rate": 9.357759066108928e-06,
"loss": 0.5547,
"step": 6550
},
{
"epoch": 0.24723928692571515,
"grad_norm": 1.9292790559109196,
"learning_rate": 9.354530006489093e-06,
"loss": 0.5423,
"step": 6560
},
{
"epoch": 0.2476161760826141,
"grad_norm": 2.1285426080176415,
"learning_rate": 9.351293409692519e-06,
"loss": 0.5643,
"step": 6570
},
{
"epoch": 0.24799306523951306,
"grad_norm": 1.8555701825811315,
"learning_rate": 9.34804928132137e-06,
"loss": 0.484,
"step": 6580
},
{
"epoch": 0.24836995439641202,
"grad_norm": 1.933502698125089,
"learning_rate": 9.344797626990851e-06,
"loss": 0.532,
"step": 6590
},
{
"epoch": 0.24874684355331098,
"grad_norm": 1.719405207009378,
"learning_rate": 9.341538452329191e-06,
"loss": 0.5411,
"step": 6600
},
{
"epoch": 0.24912373271020993,
"grad_norm": 1.6862541493581185,
"learning_rate": 9.338271762977633e-06,
"loss": 0.5173,
"step": 6610
},
{
"epoch": 0.2495006218671089,
"grad_norm": 1.5377963927275895,
"learning_rate": 9.334997564590434e-06,
"loss": 0.5156,
"step": 6620
},
{
"epoch": 0.24987751102400785,
"grad_norm": 1.677774421001133,
"learning_rate": 9.331715862834842e-06,
"loss": 0.5083,
"step": 6630
},
{
"epoch": 0.2502544001809068,
"grad_norm": 1.5490491612940884,
"learning_rate": 9.328426663391096e-06,
"loss": 0.5145,
"step": 6640
},
{
"epoch": 0.25063128933780576,
"grad_norm": 1.5642521485987577,
"learning_rate": 9.325129971952412e-06,
"loss": 0.5261,
"step": 6650
},
{
"epoch": 0.2510081784947047,
"grad_norm": 1.6706919162023979,
"learning_rate": 9.32182579422497e-06,
"loss": 0.529,
"step": 6660
},
{
"epoch": 0.2513850676516037,
"grad_norm": 1.5818205221721195,
"learning_rate": 9.318514135927916e-06,
"loss": 0.5369,
"step": 6670
},
{
"epoch": 0.2517619568085026,
"grad_norm": 2.0562318160924495,
"learning_rate": 9.315195002793335e-06,
"loss": 0.5234,
"step": 6680
},
{
"epoch": 0.2521388459654016,
"grad_norm": 1.67745815477172,
"learning_rate": 9.311868400566255e-06,
"loss": 0.5211,
"step": 6690
},
{
"epoch": 0.2525157351223005,
"grad_norm": 1.6591773189380454,
"learning_rate": 9.308534335004633e-06,
"loss": 0.5539,
"step": 6700
},
{
"epoch": 0.2528926242791995,
"grad_norm": 1.4697051603169902,
"learning_rate": 9.305192811879342e-06,
"loss": 0.5181,
"step": 6710
},
{
"epoch": 0.25326951343609844,
"grad_norm": 1.6681508210359333,
"learning_rate": 9.301843836974162e-06,
"loss": 0.5602,
"step": 6720
},
{
"epoch": 0.2536464025929974,
"grad_norm": 2.3108255647817724,
"learning_rate": 9.298487416085774e-06,
"loss": 0.5067,
"step": 6730
},
{
"epoch": 0.25402329174989635,
"grad_norm": 1.7726965314764336,
"learning_rate": 9.295123555023746e-06,
"loss": 0.5397,
"step": 6740
},
{
"epoch": 0.25440018090679534,
"grad_norm": 1.6529476204478943,
"learning_rate": 9.291752259610521e-06,
"loss": 0.5437,
"step": 6750
},
{
"epoch": 0.25477707006369427,
"grad_norm": 1.623414078675652,
"learning_rate": 9.288373535681417e-06,
"loss": 0.5206,
"step": 6760
},
{
"epoch": 0.2551539592205932,
"grad_norm": 1.4719649974173148,
"learning_rate": 9.284987389084602e-06,
"loss": 0.569,
"step": 6770
},
{
"epoch": 0.2555308483774922,
"grad_norm": 1.625628891396421,
"learning_rate": 9.281593825681102e-06,
"loss": 0.5398,
"step": 6780
},
{
"epoch": 0.2559077375343911,
"grad_norm": 1.792916883420293,
"learning_rate": 9.278192851344765e-06,
"loss": 0.5551,
"step": 6790
},
{
"epoch": 0.2562846266912901,
"grad_norm": 2.156619467272627,
"learning_rate": 9.274784471962283e-06,
"loss": 0.4974,
"step": 6800
},
{
"epoch": 0.256661515848189,
"grad_norm": 1.6687247274318509,
"learning_rate": 9.271368693433153e-06,
"loss": 0.5422,
"step": 6810
},
{
"epoch": 0.257038405005088,
"grad_norm": 1.7150335026062424,
"learning_rate": 9.267945521669687e-06,
"loss": 0.5179,
"step": 6820
},
{
"epoch": 0.25741529416198694,
"grad_norm": 1.5067882971197635,
"learning_rate": 9.264514962596989e-06,
"loss": 0.5207,
"step": 6830
},
{
"epoch": 0.2577921833188859,
"grad_norm": 2.1289911856188657,
"learning_rate": 9.261077022152953e-06,
"loss": 0.5437,
"step": 6840
},
{
"epoch": 0.25816907247578486,
"grad_norm": 1.9539761400644868,
"learning_rate": 9.257631706288246e-06,
"loss": 0.5517,
"step": 6850
},
{
"epoch": 0.25854596163268384,
"grad_norm": 1.9497689876502742,
"learning_rate": 9.254179020966303e-06,
"loss": 0.5288,
"step": 6860
},
{
"epoch": 0.25892285078958277,
"grad_norm": 1.5902800114741822,
"learning_rate": 9.250718972163312e-06,
"loss": 0.5244,
"step": 6870
},
{
"epoch": 0.25929973994648176,
"grad_norm": 1.7856883546213593,
"learning_rate": 9.247251565868214e-06,
"loss": 0.5054,
"step": 6880
},
{
"epoch": 0.2596766291033807,
"grad_norm": 1.5741551964446796,
"learning_rate": 9.243776808082675e-06,
"loss": 0.5371,
"step": 6890
},
{
"epoch": 0.26005351826027967,
"grad_norm": 1.648666621240082,
"learning_rate": 9.240294704821091e-06,
"loss": 0.5355,
"step": 6900
},
{
"epoch": 0.2604304074171786,
"grad_norm": 1.9842160004462486,
"learning_rate": 9.236805262110571e-06,
"loss": 0.531,
"step": 6910
},
{
"epoch": 0.2608072965740776,
"grad_norm": 1.6690746816867539,
"learning_rate": 9.233308485990929e-06,
"loss": 0.5199,
"step": 6920
},
{
"epoch": 0.2611841857309765,
"grad_norm": 2.000871754392917,
"learning_rate": 9.229804382514668e-06,
"loss": 0.5287,
"step": 6930
},
{
"epoch": 0.2615610748878755,
"grad_norm": 1.6694534259040406,
"learning_rate": 9.226292957746982e-06,
"loss": 0.5063,
"step": 6940
},
{
"epoch": 0.26193796404477443,
"grad_norm": 1.7676982056441022,
"learning_rate": 9.222774217765728e-06,
"loss": 0.542,
"step": 6950
},
{
"epoch": 0.2623148532016734,
"grad_norm": 1.4980337618111739,
"learning_rate": 9.21924816866143e-06,
"loss": 0.5138,
"step": 6960
},
{
"epoch": 0.26269174235857234,
"grad_norm": 1.5116056583689286,
"learning_rate": 9.215714816537265e-06,
"loss": 0.5265,
"step": 6970
},
{
"epoch": 0.2630686315154713,
"grad_norm": 1.4865966539266282,
"learning_rate": 9.212174167509044e-06,
"loss": 0.5403,
"step": 6980
},
{
"epoch": 0.26344552067237026,
"grad_norm": 1.6837642502881358,
"learning_rate": 9.208626227705212e-06,
"loss": 0.5434,
"step": 6990
},
{
"epoch": 0.2638224098292692,
"grad_norm": 1.7206211694739486,
"learning_rate": 9.205071003266838e-06,
"loss": 0.5293,
"step": 7000
},
{
"epoch": 0.2641992989861682,
"grad_norm": 1.5568568822181916,
"learning_rate": 9.201508500347592e-06,
"loss": 0.539,
"step": 7010
},
{
"epoch": 0.2645761881430671,
"grad_norm": 2.515135490034705,
"learning_rate": 9.197938725113745e-06,
"loss": 0.5273,
"step": 7020
},
{
"epoch": 0.2649530772999661,
"grad_norm": 1.7780698740100387,
"learning_rate": 9.194361683744156e-06,
"loss": 0.5249,
"step": 7030
},
{
"epoch": 0.265329966456865,
"grad_norm": 1.9741428022356151,
"learning_rate": 9.190777382430262e-06,
"loss": 0.5067,
"step": 7040
},
{
"epoch": 0.265706855613764,
"grad_norm": 1.5854337997969816,
"learning_rate": 9.187185827376065e-06,
"loss": 0.5162,
"step": 7050
},
{
"epoch": 0.26608374477066293,
"grad_norm": 1.8032750599953304,
"learning_rate": 9.183587024798122e-06,
"loss": 0.5337,
"step": 7060
},
{
"epoch": 0.2664606339275619,
"grad_norm": 1.6853602175583824,
"learning_rate": 9.179980980925533e-06,
"loss": 0.5619,
"step": 7070
},
{
"epoch": 0.26683752308446085,
"grad_norm": 1.668776536624248,
"learning_rate": 9.176367701999936e-06,
"loss": 0.5306,
"step": 7080
},
{
"epoch": 0.26721441224135983,
"grad_norm": 1.8011152218189725,
"learning_rate": 9.172747194275492e-06,
"loss": 0.5421,
"step": 7090
},
{
"epoch": 0.26759130139825876,
"grad_norm": 1.8708330614213387,
"learning_rate": 9.169119464018865e-06,
"loss": 0.5326,
"step": 7100
},
{
"epoch": 0.26796819055515775,
"grad_norm": 1.6632424343194352,
"learning_rate": 9.165484517509231e-06,
"loss": 0.5538,
"step": 7110
},
{
"epoch": 0.2683450797120567,
"grad_norm": 1.467226646436778,
"learning_rate": 9.161842361038255e-06,
"loss": 0.5068,
"step": 7120
},
{
"epoch": 0.26872196886895566,
"grad_norm": 1.6200121333407522,
"learning_rate": 9.158193000910078e-06,
"loss": 0.5388,
"step": 7130
},
{
"epoch": 0.2690988580258546,
"grad_norm": 1.7014337626263312,
"learning_rate": 9.15453644344131e-06,
"loss": 0.5293,
"step": 7140
},
{
"epoch": 0.2694757471827536,
"grad_norm": 1.7595131636999561,
"learning_rate": 9.15087269496102e-06,
"loss": 0.5448,
"step": 7150
},
{
"epoch": 0.2698526363396525,
"grad_norm": 1.8761488565801718,
"learning_rate": 9.147201761810722e-06,
"loss": 0.53,
"step": 7160
},
{
"epoch": 0.27022952549655144,
"grad_norm": 1.5345557406874837,
"learning_rate": 9.143523650344373e-06,
"loss": 0.5328,
"step": 7170
},
{
"epoch": 0.2706064146534504,
"grad_norm": 1.6268714379432996,
"learning_rate": 9.139838366928341e-06,
"loss": 0.5676,
"step": 7180
},
{
"epoch": 0.27098330381034935,
"grad_norm": 1.5406506948214973,
"learning_rate": 9.136145917941423e-06,
"loss": 0.5199,
"step": 7190
},
{
"epoch": 0.27136019296724834,
"grad_norm": 1.6991584106421929,
"learning_rate": 9.13244630977481e-06,
"loss": 0.5249,
"step": 7200
},
{
"epoch": 0.27173708212414727,
"grad_norm": 1.778739550927463,
"learning_rate": 9.128739548832084e-06,
"loss": 0.5493,
"step": 7210
},
{
"epoch": 0.27211397128104625,
"grad_norm": 1.5672069369770456,
"learning_rate": 9.125025641529212e-06,
"loss": 0.548,
"step": 7220
},
{
"epoch": 0.2724908604379452,
"grad_norm": 1.849271034046483,
"learning_rate": 9.121304594294526e-06,
"loss": 0.5069,
"step": 7230
},
{
"epoch": 0.27286774959484417,
"grad_norm": 1.6198818202816212,
"learning_rate": 9.117576413568726e-06,
"loss": 0.5491,
"step": 7240
},
{
"epoch": 0.2732446387517431,
"grad_norm": 1.7505219289954272,
"learning_rate": 9.113841105804843e-06,
"loss": 0.5331,
"step": 7250
},
{
"epoch": 0.2736215279086421,
"grad_norm": 1.5456684152965627,
"learning_rate": 9.110098677468258e-06,
"loss": 0.5304,
"step": 7260
},
{
"epoch": 0.273998417065541,
"grad_norm": 1.4875862331167544,
"learning_rate": 9.106349135036673e-06,
"loss": 0.5268,
"step": 7270
},
{
"epoch": 0.27437530622244,
"grad_norm": 1.7648342778101611,
"learning_rate": 9.102592485000101e-06,
"loss": 0.541,
"step": 7280
},
{
"epoch": 0.2747521953793389,
"grad_norm": 1.585590877192049,
"learning_rate": 9.09882873386086e-06,
"loss": 0.5159,
"step": 7290
},
{
"epoch": 0.2751290845362379,
"grad_norm": 1.8789850010967337,
"learning_rate": 9.095057888133557e-06,
"loss": 0.526,
"step": 7300
},
{
"epoch": 0.27550597369313684,
"grad_norm": 1.6350234581153913,
"learning_rate": 9.09127995434508e-06,
"loss": 0.5094,
"step": 7310
},
{
"epoch": 0.2758828628500358,
"grad_norm": 1.7747303027226409,
"learning_rate": 9.087494939034589e-06,
"loss": 0.5223,
"step": 7320
},
{
"epoch": 0.27625975200693476,
"grad_norm": 1.6705228626037893,
"learning_rate": 9.083702848753496e-06,
"loss": 0.5115,
"step": 7330
},
{
"epoch": 0.27663664116383374,
"grad_norm": 1.7334155324512346,
"learning_rate": 9.079903690065461e-06,
"loss": 0.536,
"step": 7340
},
{
"epoch": 0.27701353032073267,
"grad_norm": 1.7152387686021637,
"learning_rate": 9.076097469546378e-06,
"loss": 0.5174,
"step": 7350
},
{
"epoch": 0.27739041947763166,
"grad_norm": 2.7121339144898267,
"learning_rate": 9.072284193784366e-06,
"loss": 0.5278,
"step": 7360
},
{
"epoch": 0.2777673086345306,
"grad_norm": 1.4228173967742475,
"learning_rate": 9.068463869379755e-06,
"loss": 0.5315,
"step": 7370
},
{
"epoch": 0.2781441977914295,
"grad_norm": 1.5850322389947202,
"learning_rate": 9.064636502945074e-06,
"loss": 0.5437,
"step": 7380
},
{
"epoch": 0.2785210869483285,
"grad_norm": 1.462469278604766,
"learning_rate": 9.060802101105041e-06,
"loss": 0.5043,
"step": 7390
},
{
"epoch": 0.27889797610522743,
"grad_norm": 1.991811474599226,
"learning_rate": 9.056960670496555e-06,
"loss": 0.5347,
"step": 7400
},
{
"epoch": 0.2792748652621264,
"grad_norm": 1.5481573287316113,
"learning_rate": 9.053112217768675e-06,
"loss": 0.5317,
"step": 7410
},
{
"epoch": 0.27965175441902534,
"grad_norm": 1.9124383091203907,
"learning_rate": 9.049256749582621e-06,
"loss": 0.5363,
"step": 7420
},
{
"epoch": 0.28002864357592433,
"grad_norm": 1.8423235171267427,
"learning_rate": 9.045394272611752e-06,
"loss": 0.573,
"step": 7430
},
{
"epoch": 0.28040553273282326,
"grad_norm": 1.8356986741564718,
"learning_rate": 9.041524793541557e-06,
"loss": 0.527,
"step": 7440
},
{
"epoch": 0.28078242188972224,
"grad_norm": 1.709517929026637,
"learning_rate": 9.037648319069648e-06,
"loss": 0.5325,
"step": 7450
},
{
"epoch": 0.2811593110466212,
"grad_norm": 1.6511934273562192,
"learning_rate": 9.033764855905746e-06,
"loss": 0.5107,
"step": 7460
},
{
"epoch": 0.28153620020352016,
"grad_norm": 1.7382038263988389,
"learning_rate": 9.029874410771664e-06,
"loss": 0.5501,
"step": 7470
},
{
"epoch": 0.2819130893604191,
"grad_norm": 1.5833394804264111,
"learning_rate": 9.025976990401304e-06,
"loss": 0.5124,
"step": 7480
},
{
"epoch": 0.2822899785173181,
"grad_norm": 1.355478094878015,
"learning_rate": 9.022072601540642e-06,
"loss": 0.5425,
"step": 7490
},
{
"epoch": 0.282666867674217,
"grad_norm": 1.6618586288433348,
"learning_rate": 9.018161250947708e-06,
"loss": 0.5291,
"step": 7500
},
{
"epoch": 0.283043756831116,
"grad_norm": 1.273053488731193,
"learning_rate": 9.014242945392592e-06,
"loss": 0.4874,
"step": 7510
},
{
"epoch": 0.2834206459880149,
"grad_norm": 2.0289155247042383,
"learning_rate": 9.010317691657417e-06,
"loss": 0.4941,
"step": 7520
},
{
"epoch": 0.2837975351449139,
"grad_norm": 1.4934609822867562,
"learning_rate": 9.006385496536334e-06,
"loss": 0.5214,
"step": 7530
},
{
"epoch": 0.28417442430181283,
"grad_norm": 1.6263199918676976,
"learning_rate": 9.002446366835507e-06,
"loss": 0.5493,
"step": 7540
},
{
"epoch": 0.2845513134587118,
"grad_norm": 1.5893668854653922,
"learning_rate": 8.998500309373104e-06,
"loss": 0.4892,
"step": 7550
},
{
"epoch": 0.28492820261561075,
"grad_norm": 1.41806671358887,
"learning_rate": 8.994547330979281e-06,
"loss": 0.524,
"step": 7560
},
{
"epoch": 0.2853050917725097,
"grad_norm": 1.5553788303244762,
"learning_rate": 8.990587438496183e-06,
"loss": 0.5221,
"step": 7570
},
{
"epoch": 0.28568198092940866,
"grad_norm": 1.418345991264022,
"learning_rate": 8.986620638777911e-06,
"loss": 0.4997,
"step": 7580
},
{
"epoch": 0.2860588700863076,
"grad_norm": 1.508355836892063,
"learning_rate": 8.982646938690527e-06,
"loss": 0.5395,
"step": 7590
},
{
"epoch": 0.2864357592432066,
"grad_norm": 1.6201465695031843,
"learning_rate": 8.978666345112037e-06,
"loss": 0.5507,
"step": 7600
},
{
"epoch": 0.2868126484001055,
"grad_norm": 1.57811554793437,
"learning_rate": 8.974678864932379e-06,
"loss": 0.5004,
"step": 7610
},
{
"epoch": 0.2871895375570045,
"grad_norm": 1.5741977951369208,
"learning_rate": 8.970684505053407e-06,
"loss": 0.5116,
"step": 7620
},
{
"epoch": 0.2875664267139034,
"grad_norm": 1.676860663266555,
"learning_rate": 8.96668327238889e-06,
"loss": 0.5461,
"step": 7630
},
{
"epoch": 0.2879433158708024,
"grad_norm": 1.4656181144812006,
"learning_rate": 8.962675173864483e-06,
"loss": 0.513,
"step": 7640
},
{
"epoch": 0.28832020502770134,
"grad_norm": 1.7396852241340373,
"learning_rate": 8.958660216417735e-06,
"loss": 0.5341,
"step": 7650
},
{
"epoch": 0.2886970941846003,
"grad_norm": 1.4829830776509865,
"learning_rate": 8.954638406998062e-06,
"loss": 0.5364,
"step": 7660
},
{
"epoch": 0.28907398334149925,
"grad_norm": 2.003917803432353,
"learning_rate": 8.95060975256674e-06,
"loss": 0.5107,
"step": 7670
},
{
"epoch": 0.28945087249839824,
"grad_norm": 1.5880349728736471,
"learning_rate": 8.946574260096897e-06,
"loss": 0.5199,
"step": 7680
},
{
"epoch": 0.28982776165529717,
"grad_norm": 6.4085223096319295,
"learning_rate": 8.942531936573487e-06,
"loss": 0.5336,
"step": 7690
},
{
"epoch": 0.29020465081219615,
"grad_norm": 1.700617891186804,
"learning_rate": 8.9384827889933e-06,
"loss": 0.524,
"step": 7700
},
{
"epoch": 0.2905815399690951,
"grad_norm": 1.9072803846307507,
"learning_rate": 8.934426824364931e-06,
"loss": 0.4835,
"step": 7710
},
{
"epoch": 0.29095842912599407,
"grad_norm": 1.455274287063036,
"learning_rate": 8.93036404970877e-06,
"loss": 0.5229,
"step": 7720
},
{
"epoch": 0.291335318282893,
"grad_norm": 1.8160201738390154,
"learning_rate": 8.926294472057006e-06,
"loss": 0.5123,
"step": 7730
},
{
"epoch": 0.291712207439792,
"grad_norm": 1.865480030478559,
"learning_rate": 8.922218098453596e-06,
"loss": 0.5363,
"step": 7740
},
{
"epoch": 0.2920890965966909,
"grad_norm": 1.5712881260410958,
"learning_rate": 8.91813493595426e-06,
"loss": 0.5267,
"step": 7750
},
{
"epoch": 0.2924659857535899,
"grad_norm": 1.6468565924570966,
"learning_rate": 8.914044991626467e-06,
"loss": 0.5313,
"step": 7760
},
{
"epoch": 0.2928428749104888,
"grad_norm": 1.82716894869601,
"learning_rate": 8.90994827254943e-06,
"loss": 0.5278,
"step": 7770
},
{
"epoch": 0.29321976406738776,
"grad_norm": 1.5036064580532265,
"learning_rate": 8.905844785814086e-06,
"loss": 0.5086,
"step": 7780
},
{
"epoch": 0.29359665322428674,
"grad_norm": 1.5144138972292562,
"learning_rate": 8.901734538523083e-06,
"loss": 0.5081,
"step": 7790
},
{
"epoch": 0.29397354238118567,
"grad_norm": 1.7463912063578895,
"learning_rate": 8.897617537790775e-06,
"loss": 0.5414,
"step": 7800
},
{
"epoch": 0.29435043153808466,
"grad_norm": 1.4528500852259172,
"learning_rate": 8.893493790743205e-06,
"loss": 0.5077,
"step": 7810
},
{
"epoch": 0.2947273206949836,
"grad_norm": 1.6942113196018895,
"learning_rate": 8.889363304518088e-06,
"loss": 0.5399,
"step": 7820
},
{
"epoch": 0.29510420985188257,
"grad_norm": 1.5104690108892953,
"learning_rate": 8.88522608626481e-06,
"loss": 0.5459,
"step": 7830
},
{
"epoch": 0.2954810990087815,
"grad_norm": 1.8645243846646347,
"learning_rate": 8.881082143144405e-06,
"loss": 0.5134,
"step": 7840
},
{
"epoch": 0.2958579881656805,
"grad_norm": 1.6728891933665688,
"learning_rate": 8.876931482329554e-06,
"loss": 0.5177,
"step": 7850
},
{
"epoch": 0.2962348773225794,
"grad_norm": 1.2067983902923343,
"learning_rate": 8.872774111004553e-06,
"loss": 0.5047,
"step": 7860
},
{
"epoch": 0.2966117664794784,
"grad_norm": 1.5099148600629388,
"learning_rate": 8.868610036365324e-06,
"loss": 0.4986,
"step": 7870
},
{
"epoch": 0.29698865563637733,
"grad_norm": 2.34928629080023,
"learning_rate": 8.86443926561939e-06,
"loss": 0.5366,
"step": 7880
},
{
"epoch": 0.2973655447932763,
"grad_norm": 1.661924846466901,
"learning_rate": 8.860261805985857e-06,
"loss": 0.5272,
"step": 7890
},
{
"epoch": 0.29774243395017524,
"grad_norm": 1.6500446518789744,
"learning_rate": 8.856077664695418e-06,
"loss": 0.5247,
"step": 7900
},
{
"epoch": 0.29811932310707423,
"grad_norm": 1.6550912001046552,
"learning_rate": 8.851886848990326e-06,
"loss": 0.5073,
"step": 7910
},
{
"epoch": 0.29849621226397316,
"grad_norm": 2.4385332990829394,
"learning_rate": 8.847689366124387e-06,
"loss": 0.5224,
"step": 7920
},
{
"epoch": 0.29887310142087214,
"grad_norm": 1.7454181558614066,
"learning_rate": 8.843485223362947e-06,
"loss": 0.5404,
"step": 7930
},
{
"epoch": 0.2992499905777711,
"grad_norm": 1.6197152816080147,
"learning_rate": 8.839274427982883e-06,
"loss": 0.5249,
"step": 7940
},
{
"epoch": 0.29962687973467006,
"grad_norm": 1.6733433336543917,
"learning_rate": 8.835056987272581e-06,
"loss": 0.5369,
"step": 7950
},
{
"epoch": 0.300003768891569,
"grad_norm": 1.6407791190885486,
"learning_rate": 8.830832908531935e-06,
"loss": 0.5036,
"step": 7960
},
{
"epoch": 0.3003806580484679,
"grad_norm": 1.6837408609676576,
"learning_rate": 8.826602199072323e-06,
"loss": 0.5304,
"step": 7970
},
{
"epoch": 0.3007575472053669,
"grad_norm": 1.5824176969906407,
"learning_rate": 8.822364866216606e-06,
"loss": 0.4872,
"step": 7980
},
{
"epoch": 0.30113443636226583,
"grad_norm": 1.4977384439375079,
"learning_rate": 8.818120917299105e-06,
"loss": 0.4893,
"step": 7990
},
{
"epoch": 0.3015113255191648,
"grad_norm": 1.9173818601725474,
"learning_rate": 8.813870359665594e-06,
"loss": 0.515,
"step": 8000
},
{
"epoch": 0.30188821467606375,
"grad_norm": 1.224532027864391,
"learning_rate": 8.809613200673284e-06,
"loss": 0.4968,
"step": 8010
},
{
"epoch": 0.30226510383296273,
"grad_norm": 1.507185992220378,
"learning_rate": 8.805349447690819e-06,
"loss": 0.5283,
"step": 8020
},
{
"epoch": 0.30264199298986166,
"grad_norm": 1.5501471489535523,
"learning_rate": 8.801079108098247e-06,
"loss": 0.5297,
"step": 8030
},
{
"epoch": 0.30301888214676065,
"grad_norm": 1.6682468431261555,
"learning_rate": 8.796802189287021e-06,
"loss": 0.5169,
"step": 8040
},
{
"epoch": 0.3033957713036596,
"grad_norm": 1.5793954538427635,
"learning_rate": 8.792518698659985e-06,
"loss": 0.514,
"step": 8050
},
{
"epoch": 0.30377266046055856,
"grad_norm": 1.5869189857437231,
"learning_rate": 8.788228643631353e-06,
"loss": 0.5699,
"step": 8060
},
{
"epoch": 0.3041495496174575,
"grad_norm": 1.5017790526890054,
"learning_rate": 8.783932031626702e-06,
"loss": 0.5333,
"step": 8070
},
{
"epoch": 0.3045264387743565,
"grad_norm": 1.7418008438314914,
"learning_rate": 8.779628870082963e-06,
"loss": 0.5681,
"step": 8080
},
{
"epoch": 0.3049033279312554,
"grad_norm": 1.5384555545934482,
"learning_rate": 8.775319166448397e-06,
"loss": 0.4892,
"step": 8090
},
{
"epoch": 0.3052802170881544,
"grad_norm": 1.7242800734457064,
"learning_rate": 8.771002928182593e-06,
"loss": 0.5323,
"step": 8100
},
{
"epoch": 0.3056571062450533,
"grad_norm": 1.5545232251003573,
"learning_rate": 8.76668016275645e-06,
"loss": 0.5347,
"step": 8110
},
{
"epoch": 0.3060339954019523,
"grad_norm": 1.5864191351139438,
"learning_rate": 8.762350877652161e-06,
"loss": 0.5133,
"step": 8120
},
{
"epoch": 0.30641088455885124,
"grad_norm": 1.536374345634808,
"learning_rate": 8.758015080363209e-06,
"loss": 0.5429,
"step": 8130
},
{
"epoch": 0.3067877737157502,
"grad_norm": 1.3739374982043573,
"learning_rate": 8.753672778394348e-06,
"loss": 0.5071,
"step": 8140
},
{
"epoch": 0.30716466287264915,
"grad_norm": 1.9356831887412023,
"learning_rate": 8.749323979261586e-06,
"loss": 0.5598,
"step": 8150
},
{
"epoch": 0.30754155202954814,
"grad_norm": 1.8841720246001954,
"learning_rate": 8.744968690492183e-06,
"loss": 0.5251,
"step": 8160
},
{
"epoch": 0.30791844118644707,
"grad_norm": 1.4483138834088356,
"learning_rate": 8.740606919624628e-06,
"loss": 0.542,
"step": 8170
},
{
"epoch": 0.308295330343346,
"grad_norm": 1.739676604765352,
"learning_rate": 8.73623867420863e-06,
"loss": 0.5161,
"step": 8180
},
{
"epoch": 0.308672219500245,
"grad_norm": 1.4496329719941419,
"learning_rate": 8.731863961805108e-06,
"loss": 0.5142,
"step": 8190
},
{
"epoch": 0.3090491086571439,
"grad_norm": 1.586205834775067,
"learning_rate": 8.727482789986167e-06,
"loss": 0.5105,
"step": 8200
},
{
"epoch": 0.3094259978140429,
"grad_norm": 1.5822661719201925,
"learning_rate": 8.723095166335105e-06,
"loss": 0.4927,
"step": 8210
},
{
"epoch": 0.3098028869709418,
"grad_norm": 1.4744825299579403,
"learning_rate": 8.718701098446373e-06,
"loss": 0.5154,
"step": 8220
},
{
"epoch": 0.3101797761278408,
"grad_norm": 1.6760657484578674,
"learning_rate": 8.714300593925588e-06,
"loss": 0.5443,
"step": 8230
},
{
"epoch": 0.31055666528473974,
"grad_norm": 1.6507496475712244,
"learning_rate": 8.709893660389502e-06,
"loss": 0.5103,
"step": 8240
},
{
"epoch": 0.3109335544416387,
"grad_norm": 1.6267514125886644,
"learning_rate": 8.705480305465993e-06,
"loss": 0.5204,
"step": 8250
},
{
"epoch": 0.31131044359853766,
"grad_norm": 1.7139044318938548,
"learning_rate": 8.701060536794062e-06,
"loss": 0.5082,
"step": 8260
},
{
"epoch": 0.31168733275543664,
"grad_norm": 1.5364667287673175,
"learning_rate": 8.6966343620238e-06,
"loss": 0.5124,
"step": 8270
},
{
"epoch": 0.31206422191233557,
"grad_norm": 1.5973367114004897,
"learning_rate": 8.692201788816397e-06,
"loss": 0.5401,
"step": 8280
},
{
"epoch": 0.31244111106923456,
"grad_norm": 1.6171012597396603,
"learning_rate": 8.687762824844112e-06,
"loss": 0.5523,
"step": 8290
},
{
"epoch": 0.3128180002261335,
"grad_norm": 1.6798865965319494,
"learning_rate": 8.683317477790267e-06,
"loss": 0.5566,
"step": 8300
},
{
"epoch": 0.31319488938303247,
"grad_norm": 1.3965855622429122,
"learning_rate": 8.678865755349232e-06,
"loss": 0.5215,
"step": 8310
},
{
"epoch": 0.3135717785399314,
"grad_norm": 1.478475036458125,
"learning_rate": 8.674407665226412e-06,
"loss": 0.548,
"step": 8320
},
{
"epoch": 0.3139486676968304,
"grad_norm": 1.6537961782542907,
"learning_rate": 8.669943215138236e-06,
"loss": 0.5155,
"step": 8330
},
{
"epoch": 0.3143255568537293,
"grad_norm": 1.5083041356086178,
"learning_rate": 8.665472412812137e-06,
"loss": 0.5218,
"step": 8340
},
{
"epoch": 0.3147024460106283,
"grad_norm": 1.7094855096711425,
"learning_rate": 8.660995265986547e-06,
"loss": 0.4969,
"step": 8350
},
{
"epoch": 0.31507933516752723,
"grad_norm": 1.5448154600516473,
"learning_rate": 8.656511782410877e-06,
"loss": 0.5228,
"step": 8360
},
{
"epoch": 0.31545622432442616,
"grad_norm": 1.8392297958376145,
"learning_rate": 8.652021969845508e-06,
"loss": 0.5122,
"step": 8370
},
{
"epoch": 0.31583311348132515,
"grad_norm": 1.5057677282206323,
"learning_rate": 8.647525836061773e-06,
"loss": 0.5232,
"step": 8380
},
{
"epoch": 0.3162100026382241,
"grad_norm": 1.3387979618187744,
"learning_rate": 8.643023388841951e-06,
"loss": 0.4788,
"step": 8390
},
{
"epoch": 0.31658689179512306,
"grad_norm": 1.7299810103354585,
"learning_rate": 8.638514635979242e-06,
"loss": 0.5271,
"step": 8400
},
{
"epoch": 0.316963780952022,
"grad_norm": 1.5877959379659248,
"learning_rate": 8.633999585277769e-06,
"loss": 0.5492,
"step": 8410
},
{
"epoch": 0.317340670108921,
"grad_norm": 1.5926803639365381,
"learning_rate": 8.629478244552548e-06,
"loss": 0.4928,
"step": 8420
},
{
"epoch": 0.3177175592658199,
"grad_norm": 1.4153581295672915,
"learning_rate": 8.624950621629487e-06,
"loss": 0.5204,
"step": 8430
},
{
"epoch": 0.3180944484227189,
"grad_norm": 1.515638801797706,
"learning_rate": 8.620416724345365e-06,
"loss": 0.5266,
"step": 8440
},
{
"epoch": 0.3184713375796178,
"grad_norm": 1.7525263151023882,
"learning_rate": 8.615876560547822e-06,
"loss": 0.524,
"step": 8450
},
{
"epoch": 0.3188482267365168,
"grad_norm": 1.5277030246398249,
"learning_rate": 8.611330138095344e-06,
"loss": 0.5071,
"step": 8460
},
{
"epoch": 0.31922511589341573,
"grad_norm": 1.692436940543452,
"learning_rate": 8.606777464857254e-06,
"loss": 0.518,
"step": 8470
},
{
"epoch": 0.3196020050503147,
"grad_norm": 1.9842826236775062,
"learning_rate": 8.60221854871369e-06,
"loss": 0.5193,
"step": 8480
},
{
"epoch": 0.31997889420721365,
"grad_norm": 1.7689057994372488,
"learning_rate": 8.597653397555597e-06,
"loss": 0.5288,
"step": 8490
},
{
"epoch": 0.32035578336411263,
"grad_norm": 1.6259926395499629,
"learning_rate": 8.59308201928471e-06,
"loss": 0.5614,
"step": 8500
},
{
"epoch": 0.32073267252101156,
"grad_norm": 1.4818216820726773,
"learning_rate": 8.588504421813548e-06,
"loss": 0.519,
"step": 8510
},
{
"epoch": 0.32110956167791055,
"grad_norm": 1.4755097387688436,
"learning_rate": 8.583920613065389e-06,
"loss": 0.5363,
"step": 8520
},
{
"epoch": 0.3214864508348095,
"grad_norm": 1.4930239554129483,
"learning_rate": 8.579330600974263e-06,
"loss": 0.5245,
"step": 8530
},
{
"epoch": 0.32186333999170846,
"grad_norm": 1.5984210950118547,
"learning_rate": 8.57473439348494e-06,
"loss": 0.5069,
"step": 8540
},
{
"epoch": 0.3222402291486074,
"grad_norm": 1.667635983292806,
"learning_rate": 8.570131998552912e-06,
"loss": 0.5303,
"step": 8550
},
{
"epoch": 0.3226171183055063,
"grad_norm": 1.616690957226872,
"learning_rate": 8.56552342414438e-06,
"loss": 0.5231,
"step": 8560
},
{
"epoch": 0.3229940074624053,
"grad_norm": 1.8034906955411334,
"learning_rate": 8.560908678236243e-06,
"loss": 0.5207,
"step": 8570
},
{
"epoch": 0.32337089661930424,
"grad_norm": 1.629302217435465,
"learning_rate": 8.55628776881608e-06,
"loss": 0.5111,
"step": 8580
},
{
"epoch": 0.3237477857762032,
"grad_norm": 1.5524410794967876,
"learning_rate": 8.551660703882137e-06,
"loss": 0.5291,
"step": 8590
},
{
"epoch": 0.32412467493310215,
"grad_norm": 1.682436170015779,
"learning_rate": 8.547027491443319e-06,
"loss": 0.5203,
"step": 8600
},
{
"epoch": 0.32450156409000114,
"grad_norm": 1.4406822786435207,
"learning_rate": 8.542388139519166e-06,
"loss": 0.5147,
"step": 8610
},
{
"epoch": 0.32487845324690007,
"grad_norm": 1.7053903855885608,
"learning_rate": 8.537742656139854e-06,
"loss": 0.5221,
"step": 8620
},
{
"epoch": 0.32525534240379905,
"grad_norm": 1.6508021549322034,
"learning_rate": 8.533091049346158e-06,
"loss": 0.5087,
"step": 8630
},
{
"epoch": 0.325632231560698,
"grad_norm": 1.7227108959852215,
"learning_rate": 8.528433327189464e-06,
"loss": 0.5144,
"step": 8640
},
{
"epoch": 0.32600912071759697,
"grad_norm": 1.7003660286486464,
"learning_rate": 8.52376949773174e-06,
"loss": 0.5178,
"step": 8650
},
{
"epoch": 0.3263860098744959,
"grad_norm": 1.3401486704634595,
"learning_rate": 8.51909956904552e-06,
"loss": 0.5392,
"step": 8660
},
{
"epoch": 0.3267628990313949,
"grad_norm": 1.5872896977384134,
"learning_rate": 8.514423549213899e-06,
"loss": 0.5173,
"step": 8670
},
{
"epoch": 0.3271397881882938,
"grad_norm": 1.5745916384042906,
"learning_rate": 8.509741446330516e-06,
"loss": 0.5014,
"step": 8680
},
{
"epoch": 0.3275166773451928,
"grad_norm": 1.5616168597563413,
"learning_rate": 8.505053268499536e-06,
"loss": 0.5448,
"step": 8690
},
{
"epoch": 0.3278935665020917,
"grad_norm": 1.6773461052799608,
"learning_rate": 8.500359023835643e-06,
"loss": 0.5367,
"step": 8700
},
{
"epoch": 0.3282704556589907,
"grad_norm": 1.744777642171872,
"learning_rate": 8.49565872046402e-06,
"loss": 0.5294,
"step": 8710
},
{
"epoch": 0.32864734481588964,
"grad_norm": 1.4370066131355128,
"learning_rate": 8.490952366520332e-06,
"loss": 0.5104,
"step": 8720
},
{
"epoch": 0.3290242339727886,
"grad_norm": 1.5686171487455622,
"learning_rate": 8.486239970150726e-06,
"loss": 0.5082,
"step": 8730
},
{
"epoch": 0.32940112312968756,
"grad_norm": 1.729514880196974,
"learning_rate": 8.481521539511802e-06,
"loss": 0.5424,
"step": 8740
},
{
"epoch": 0.32977801228658654,
"grad_norm": 1.7026908866007286,
"learning_rate": 8.476797082770604e-06,
"loss": 0.5319,
"step": 8750
},
{
"epoch": 0.33015490144348547,
"grad_norm": 1.6815280081880184,
"learning_rate": 8.472066608104613e-06,
"loss": 0.497,
"step": 8760
},
{
"epoch": 0.3305317906003844,
"grad_norm": 1.3354916475471044,
"learning_rate": 8.467330123701713e-06,
"loss": 0.5199,
"step": 8770
},
{
"epoch": 0.3309086797572834,
"grad_norm": 1.6596093120910274,
"learning_rate": 8.462587637760207e-06,
"loss": 0.49,
"step": 8780
},
{
"epoch": 0.3312855689141823,
"grad_norm": 1.4490041698465326,
"learning_rate": 8.457839158488772e-06,
"loss": 0.5177,
"step": 8790
},
{
"epoch": 0.3316624580710813,
"grad_norm": 1.743520901799635,
"learning_rate": 8.453084694106468e-06,
"loss": 0.5554,
"step": 8800
},
{
"epoch": 0.33203934722798023,
"grad_norm": 1.3601593837886905,
"learning_rate": 8.448324252842708e-06,
"loss": 0.4939,
"step": 8810
},
{
"epoch": 0.3324162363848792,
"grad_norm": 1.0538420503197117,
"learning_rate": 8.443557842937257e-06,
"loss": 0.5129,
"step": 8820
},
{
"epoch": 0.33279312554177815,
"grad_norm": 1.580901795857022,
"learning_rate": 8.438785472640202e-06,
"loss": 0.523,
"step": 8830
},
{
"epoch": 0.33317001469867713,
"grad_norm": 1.6642518860804858,
"learning_rate": 8.434007150211957e-06,
"loss": 0.5148,
"step": 8840
},
{
"epoch": 0.33354690385557606,
"grad_norm": 1.5678025009658634,
"learning_rate": 8.42922288392323e-06,
"loss": 0.5189,
"step": 8850
},
{
"epoch": 0.33392379301247505,
"grad_norm": 1.5916235440951905,
"learning_rate": 8.424432682055022e-06,
"loss": 0.5072,
"step": 8860
},
{
"epoch": 0.334300682169374,
"grad_norm": 1.4657720753841874,
"learning_rate": 8.419636552898605e-06,
"loss": 0.5275,
"step": 8870
},
{
"epoch": 0.33467757132627296,
"grad_norm": 1.5520680065937231,
"learning_rate": 8.414834504755513e-06,
"loss": 0.5329,
"step": 8880
},
{
"epoch": 0.3350544604831719,
"grad_norm": 1.4122325295424663,
"learning_rate": 8.410026545937522e-06,
"loss": 0.5215,
"step": 8890
},
{
"epoch": 0.3354313496400709,
"grad_norm": 1.7208431430034201,
"learning_rate": 8.405212684766642e-06,
"loss": 0.5266,
"step": 8900
},
{
"epoch": 0.3358082387969698,
"grad_norm": 1.522595875256597,
"learning_rate": 8.400392929575098e-06,
"loss": 0.521,
"step": 8910
},
{
"epoch": 0.3361851279538688,
"grad_norm": 1.193362753628422,
"learning_rate": 8.395567288705315e-06,
"loss": 0.473,
"step": 8920
},
{
"epoch": 0.3365620171107677,
"grad_norm": 1.617816412749885,
"learning_rate": 8.390735770509909e-06,
"loss": 0.5367,
"step": 8930
},
{
"epoch": 0.3369389062676667,
"grad_norm": 3.7585583359479173,
"learning_rate": 8.385898383351662e-06,
"loss": 0.5273,
"step": 8940
},
{
"epoch": 0.33731579542456563,
"grad_norm": 1.570651388025663,
"learning_rate": 8.381055135603526e-06,
"loss": 0.5431,
"step": 8950
},
{
"epoch": 0.33769268458146456,
"grad_norm": 1.7069789377376001,
"learning_rate": 8.376206035648587e-06,
"loss": 0.5534,
"step": 8960
},
{
"epoch": 0.33806957373836355,
"grad_norm": 1.718778224322803,
"learning_rate": 8.371351091880064e-06,
"loss": 0.5132,
"step": 8970
},
{
"epoch": 0.3384464628952625,
"grad_norm": 1.5304391761250087,
"learning_rate": 8.366490312701292e-06,
"loss": 0.5227,
"step": 8980
},
{
"epoch": 0.33882335205216146,
"grad_norm": 1.5179430433625931,
"learning_rate": 8.361623706525703e-06,
"loss": 0.5131,
"step": 8990
},
{
"epoch": 0.3392002412090604,
"grad_norm": 1.7236639439764125,
"learning_rate": 8.356751281776818e-06,
"loss": 0.5392,
"step": 9000
},
{
"epoch": 0.3395771303659594,
"grad_norm": 1.5735386800178819,
"learning_rate": 8.35187304688823e-06,
"loss": 0.5347,
"step": 9010
},
{
"epoch": 0.3399540195228583,
"grad_norm": 1.8833870834225461,
"learning_rate": 8.346989010303586e-06,
"loss": 0.5209,
"step": 9020
},
{
"epoch": 0.3403309086797573,
"grad_norm": 1.4643995674675825,
"learning_rate": 8.342099180476575e-06,
"loss": 0.5049,
"step": 9030
},
{
"epoch": 0.3407077978366562,
"grad_norm": 1.8572382732988715,
"learning_rate": 8.337203565870915e-06,
"loss": 0.5256,
"step": 9040
},
{
"epoch": 0.3410846869935552,
"grad_norm": 1.7660917988302332,
"learning_rate": 8.332302174960336e-06,
"loss": 0.5416,
"step": 9050
},
{
"epoch": 0.34146157615045414,
"grad_norm": 1.8036514572139464,
"learning_rate": 8.327395016228567e-06,
"loss": 0.5649,
"step": 9060
},
{
"epoch": 0.3418384653073531,
"grad_norm": 1.3940242330097408,
"learning_rate": 8.32248209816932e-06,
"loss": 0.5114,
"step": 9070
},
{
"epoch": 0.34221535446425205,
"grad_norm": 1.6019993171028344,
"learning_rate": 8.317563429286274e-06,
"loss": 0.4947,
"step": 9080
},
{
"epoch": 0.34259224362115104,
"grad_norm": 2.247467137710252,
"learning_rate": 8.312639018093067e-06,
"loss": 0.5569,
"step": 9090
},
{
"epoch": 0.34296913277804997,
"grad_norm": 1.5951663374446334,
"learning_rate": 8.307708873113267e-06,
"loss": 0.4897,
"step": 9100
},
{
"epoch": 0.34334602193494895,
"grad_norm": 1.738333108559983,
"learning_rate": 8.302773002880377e-06,
"loss": 0.4987,
"step": 9110
},
{
"epoch": 0.3437229110918479,
"grad_norm": 1.5337169050623085,
"learning_rate": 8.297831415937802e-06,
"loss": 0.505,
"step": 9120
},
{
"epoch": 0.34409980024874687,
"grad_norm": 1.4472548665213318,
"learning_rate": 8.29288412083885e-06,
"loss": 0.512,
"step": 9130
},
{
"epoch": 0.3444766894056458,
"grad_norm": 1.4492396478807243,
"learning_rate": 8.287931126146696e-06,
"loss": 0.4886,
"step": 9140
},
{
"epoch": 0.3448535785625448,
"grad_norm": 1.7630091584083531,
"learning_rate": 8.282972440434393e-06,
"loss": 0.534,
"step": 9150
},
{
"epoch": 0.3452304677194437,
"grad_norm": 1.3895390793947004,
"learning_rate": 8.278008072284841e-06,
"loss": 0.4952,
"step": 9160
},
{
"epoch": 0.34560735687634264,
"grad_norm": 1.3777597188749056,
"learning_rate": 8.273038030290772e-06,
"loss": 0.5084,
"step": 9170
},
{
"epoch": 0.3459842460332416,
"grad_norm": 1.7939386012497827,
"learning_rate": 8.268062323054742e-06,
"loss": 0.5433,
"step": 9180
},
{
"epoch": 0.34636113519014056,
"grad_norm": 1.4962140700288487,
"learning_rate": 8.263080959189114e-06,
"loss": 0.5415,
"step": 9190
},
{
"epoch": 0.34673802434703954,
"grad_norm": 1.6671458345630203,
"learning_rate": 8.258093947316036e-06,
"loss": 0.5137,
"step": 9200
},
{
"epoch": 0.34711491350393847,
"grad_norm": 1.5818238301461247,
"learning_rate": 8.253101296067441e-06,
"loss": 0.5005,
"step": 9210
},
{
"epoch": 0.34749180266083746,
"grad_norm": 1.6485091022559941,
"learning_rate": 8.248103014085014e-06,
"loss": 0.526,
"step": 9220
},
{
"epoch": 0.3478686918177364,
"grad_norm": 1.6061788923973597,
"learning_rate": 8.243099110020191e-06,
"loss": 0.5299,
"step": 9230
},
{
"epoch": 0.34824558097463537,
"grad_norm": 1.5857303046251194,
"learning_rate": 8.238089592534143e-06,
"loss": 0.5272,
"step": 9240
},
{
"epoch": 0.3486224701315343,
"grad_norm": 1.5733908134969434,
"learning_rate": 8.233074470297746e-06,
"loss": 0.5027,
"step": 9250
},
{
"epoch": 0.3489993592884333,
"grad_norm": 1.2391810742375864,
"learning_rate": 8.228053751991586e-06,
"loss": 0.5147,
"step": 9260
},
{
"epoch": 0.3493762484453322,
"grad_norm": 1.6623967687120003,
"learning_rate": 8.223027446305939e-06,
"loss": 0.562,
"step": 9270
},
{
"epoch": 0.3497531376022312,
"grad_norm": 1.6535672620155857,
"learning_rate": 8.217995561940735e-06,
"loss": 0.5135,
"step": 9280
},
{
"epoch": 0.35013002675913013,
"grad_norm": 1.628954367480316,
"learning_rate": 8.21295810760558e-06,
"loss": 0.5491,
"step": 9290
},
{
"epoch": 0.3505069159160291,
"grad_norm": 1.6259980778856178,
"learning_rate": 8.207915092019709e-06,
"loss": 0.5277,
"step": 9300
},
{
"epoch": 0.35088380507292805,
"grad_norm": 1.7960271399406822,
"learning_rate": 8.202866523911985e-06,
"loss": 0.5026,
"step": 9310
},
{
"epoch": 0.35126069422982703,
"grad_norm": 1.5027810941773745,
"learning_rate": 8.197812412020882e-06,
"loss": 0.5219,
"step": 9320
},
{
"epoch": 0.35163758338672596,
"grad_norm": 1.4147518487518373,
"learning_rate": 8.192752765094474e-06,
"loss": 0.4946,
"step": 9330
},
{
"epoch": 0.35201447254362495,
"grad_norm": 1.7588868083859255,
"learning_rate": 8.18768759189041e-06,
"loss": 0.5214,
"step": 9340
},
{
"epoch": 0.3523913617005239,
"grad_norm": 1.9046948041639848,
"learning_rate": 8.182616901175904e-06,
"loss": 0.5327,
"step": 9350
},
{
"epoch": 0.3527682508574228,
"grad_norm": 1.8541351405292703,
"learning_rate": 8.177540701727725e-06,
"loss": 0.5332,
"step": 9360
},
{
"epoch": 0.3531451400143218,
"grad_norm": 1.4841893890474134,
"learning_rate": 8.172459002332174e-06,
"loss": 0.5198,
"step": 9370
},
{
"epoch": 0.3535220291712207,
"grad_norm": 1.6262991665753983,
"learning_rate": 8.16737181178507e-06,
"loss": 0.5204,
"step": 9380
},
{
"epoch": 0.3538989183281197,
"grad_norm": 1.9711118756578987,
"learning_rate": 8.16227913889174e-06,
"loss": 0.5227,
"step": 9390
},
{
"epoch": 0.35427580748501863,
"grad_norm": 1.6101287024416397,
"learning_rate": 8.157180992466999e-06,
"loss": 0.5213,
"step": 9400
},
{
"epoch": 0.3546526966419176,
"grad_norm": 1.4345161151869006,
"learning_rate": 8.152077381335136e-06,
"loss": 0.5156,
"step": 9410
},
{
"epoch": 0.35502958579881655,
"grad_norm": 1.3670300786401688,
"learning_rate": 8.146968314329897e-06,
"loss": 0.5415,
"step": 9420
},
{
"epoch": 0.35540647495571553,
"grad_norm": 2.0488645347215755,
"learning_rate": 8.141853800294474e-06,
"loss": 0.495,
"step": 9430
},
{
"epoch": 0.35578336411261446,
"grad_norm": 1.6748625828080144,
"learning_rate": 8.136733848081489e-06,
"loss": 0.5253,
"step": 9440
},
{
"epoch": 0.35616025326951345,
"grad_norm": 1.749008596784993,
"learning_rate": 8.131608466552968e-06,
"loss": 0.5306,
"step": 9450
},
{
"epoch": 0.3565371424264124,
"grad_norm": 1.4862728647070822,
"learning_rate": 8.126477664580347e-06,
"loss": 0.5124,
"step": 9460
},
{
"epoch": 0.35691403158331136,
"grad_norm": 1.6537416257961348,
"learning_rate": 8.121341451044433e-06,
"loss": 0.53,
"step": 9470
},
{
"epoch": 0.3572909207402103,
"grad_norm": 1.6263866458416927,
"learning_rate": 8.116199834835408e-06,
"loss": 0.4975,
"step": 9480
},
{
"epoch": 0.3576678098971093,
"grad_norm": 1.5395362965786443,
"learning_rate": 8.1110528248528e-06,
"loss": 0.5234,
"step": 9490
},
{
"epoch": 0.3580446990540082,
"grad_norm": 1.6712903285736607,
"learning_rate": 8.105900430005476e-06,
"loss": 0.5306,
"step": 9500
},
{
"epoch": 0.3584215882109072,
"grad_norm": 1.8324911808117614,
"learning_rate": 8.10074265921162e-06,
"loss": 0.5242,
"step": 9510
},
{
"epoch": 0.3587984773678061,
"grad_norm": 2.351195839028606,
"learning_rate": 8.095579521398727e-06,
"loss": 0.5219,
"step": 9520
},
{
"epoch": 0.3591753665247051,
"grad_norm": 1.7110044303010266,
"learning_rate": 8.090411025503576e-06,
"loss": 0.4978,
"step": 9530
},
{
"epoch": 0.35955225568160404,
"grad_norm": 1.6106891478057317,
"learning_rate": 8.085237180472222e-06,
"loss": 0.4827,
"step": 9540
},
{
"epoch": 0.359929144838503,
"grad_norm": 1.7506503522662198,
"learning_rate": 8.080057995259983e-06,
"loss": 0.5101,
"step": 9550
},
{
"epoch": 0.36030603399540195,
"grad_norm": 1.4401619042910059,
"learning_rate": 8.074873478831412e-06,
"loss": 0.4944,
"step": 9560
},
{
"epoch": 0.3606829231523009,
"grad_norm": 1.5447630644836872,
"learning_rate": 8.069683640160297e-06,
"loss": 0.5043,
"step": 9570
},
{
"epoch": 0.36105981230919987,
"grad_norm": 1.6594696899732704,
"learning_rate": 8.064488488229634e-06,
"loss": 0.5308,
"step": 9580
},
{
"epoch": 0.3614367014660988,
"grad_norm": 1.50896874784597,
"learning_rate": 8.059288032031616e-06,
"loss": 0.511,
"step": 9590
},
{
"epoch": 0.3618135906229978,
"grad_norm": 1.463806572060008,
"learning_rate": 8.05408228056762e-06,
"loss": 0.5112,
"step": 9600
},
{
"epoch": 0.3621904797798967,
"grad_norm": 2.3161399801404206,
"learning_rate": 8.048871242848186e-06,
"loss": 0.5094,
"step": 9610
},
{
"epoch": 0.3625673689367957,
"grad_norm": 1.6934623824302724,
"learning_rate": 8.043654927893003e-06,
"loss": 0.5302,
"step": 9620
},
{
"epoch": 0.3629442580936946,
"grad_norm": 1.669003654657236,
"learning_rate": 8.038433344730896e-06,
"loss": 0.5149,
"step": 9630
},
{
"epoch": 0.3633211472505936,
"grad_norm": 1.627539817842675,
"learning_rate": 8.033206502399811e-06,
"loss": 0.508,
"step": 9640
},
{
"epoch": 0.36369803640749254,
"grad_norm": 1.606477921617676,
"learning_rate": 8.027974409946791e-06,
"loss": 0.5388,
"step": 9650
},
{
"epoch": 0.3640749255643915,
"grad_norm": 1.5765425415250627,
"learning_rate": 8.02273707642797e-06,
"loss": 0.4904,
"step": 9660
},
{
"epoch": 0.36445181472129046,
"grad_norm": 1.377919727134035,
"learning_rate": 8.017494510908557e-06,
"loss": 0.4736,
"step": 9670
},
{
"epoch": 0.36482870387818944,
"grad_norm": 1.5390007963630454,
"learning_rate": 8.012246722462807e-06,
"loss": 0.5366,
"step": 9680
},
{
"epoch": 0.36520559303508837,
"grad_norm": 1.6016562038880642,
"learning_rate": 8.006993720174026e-06,
"loss": 0.5278,
"step": 9690
},
{
"epoch": 0.36558248219198736,
"grad_norm": 1.6926301666587609,
"learning_rate": 8.001735513134539e-06,
"loss": 0.524,
"step": 9700
},
{
"epoch": 0.3659593713488863,
"grad_norm": 1.4095598406907603,
"learning_rate": 7.996472110445682e-06,
"loss": 0.5248,
"step": 9710
},
{
"epoch": 0.36633626050578527,
"grad_norm": 1.931772269279146,
"learning_rate": 7.99120352121778e-06,
"loss": 0.5468,
"step": 9720
},
{
"epoch": 0.3667131496626842,
"grad_norm": 1.7501030367149104,
"learning_rate": 7.985929754570138e-06,
"loss": 0.522,
"step": 9730
},
{
"epoch": 0.3670900388195832,
"grad_norm": 1.526100066204629,
"learning_rate": 7.980650819631028e-06,
"loss": 0.5101,
"step": 9740
},
{
"epoch": 0.3674669279764821,
"grad_norm": 1.7385487243528495,
"learning_rate": 7.975366725537657e-06,
"loss": 0.5216,
"step": 9750
},
{
"epoch": 0.36784381713338105,
"grad_norm": 1.5749812078525176,
"learning_rate": 7.970077481436169e-06,
"loss": 0.5003,
"step": 9760
},
{
"epoch": 0.36822070629028003,
"grad_norm": 1.9001739721765867,
"learning_rate": 7.964783096481624e-06,
"loss": 0.5299,
"step": 9770
},
{
"epoch": 0.36859759544717896,
"grad_norm": 1.7061699416032134,
"learning_rate": 7.95948357983797e-06,
"loss": 0.5293,
"step": 9780
},
{
"epoch": 0.36897448460407795,
"grad_norm": 1.628434299258182,
"learning_rate": 7.954178940678048e-06,
"loss": 0.5365,
"step": 9790
},
{
"epoch": 0.3693513737609769,
"grad_norm": 1.5914358544888132,
"learning_rate": 7.94886918818356e-06,
"loss": 0.486,
"step": 9800
},
{
"epoch": 0.36972826291787586,
"grad_norm": 1.8260310549139975,
"learning_rate": 7.94355433154506e-06,
"loss": 0.5051,
"step": 9810
},
{
"epoch": 0.3701051520747748,
"grad_norm": 1.5743149246238135,
"learning_rate": 7.93823437996194e-06,
"loss": 0.4909,
"step": 9820
},
{
"epoch": 0.3704820412316738,
"grad_norm": 1.5885416947805906,
"learning_rate": 7.932909342642403e-06,
"loss": 0.5111,
"step": 9830
},
{
"epoch": 0.3708589303885727,
"grad_norm": 1.806111484800823,
"learning_rate": 7.92757922880346e-06,
"loss": 0.5213,
"step": 9840
},
{
"epoch": 0.3712358195454717,
"grad_norm": 1.622739814163259,
"learning_rate": 7.922244047670908e-06,
"loss": 0.5223,
"step": 9850
},
{
"epoch": 0.3716127087023706,
"grad_norm": 1.422381514282876,
"learning_rate": 7.916903808479316e-06,
"loss": 0.4954,
"step": 9860
},
{
"epoch": 0.3719895978592696,
"grad_norm": 1.4819588262177361,
"learning_rate": 7.911558520472007e-06,
"loss": 0.5041,
"step": 9870
},
{
"epoch": 0.37236648701616853,
"grad_norm": 1.6630721593622193,
"learning_rate": 7.906208192901043e-06,
"loss": 0.5031,
"step": 9880
},
{
"epoch": 0.3727433761730675,
"grad_norm": 1.5994053992133728,
"learning_rate": 7.900852835027207e-06,
"loss": 0.5212,
"step": 9890
},
{
"epoch": 0.37312026532996645,
"grad_norm": 1.6742248574206915,
"learning_rate": 7.89549245611999e-06,
"loss": 0.509,
"step": 9900
},
{
"epoch": 0.37349715448686543,
"grad_norm": 1.3401945002776963,
"learning_rate": 7.890127065457578e-06,
"loss": 0.4989,
"step": 9910
},
{
"epoch": 0.37387404364376436,
"grad_norm": 1.7744024916171353,
"learning_rate": 7.884756672326824e-06,
"loss": 0.5221,
"step": 9920
},
{
"epoch": 0.37425093280066335,
"grad_norm": 1.5151769409007294,
"learning_rate": 7.879381286023247e-06,
"loss": 0.4874,
"step": 9930
},
{
"epoch": 0.3746278219575623,
"grad_norm": 1.521150560489862,
"learning_rate": 7.874000915851e-06,
"loss": 0.5243,
"step": 9940
},
{
"epoch": 0.37500471111446126,
"grad_norm": 1.8681950791510447,
"learning_rate": 7.868615571122877e-06,
"loss": 0.5333,
"step": 9950
},
{
"epoch": 0.3753816002713602,
"grad_norm": 1.7696887020829462,
"learning_rate": 7.863225261160264e-06,
"loss": 0.5095,
"step": 9960
},
{
"epoch": 0.3757584894282591,
"grad_norm": 1.5476742902685527,
"learning_rate": 7.857829995293156e-06,
"loss": 0.5138,
"step": 9970
},
{
"epoch": 0.3761353785851581,
"grad_norm": 1.8113224519075866,
"learning_rate": 7.852429782860116e-06,
"loss": 0.5204,
"step": 9980
},
{
"epoch": 0.37651226774205704,
"grad_norm": 1.5771605683013104,
"learning_rate": 7.847024633208277e-06,
"loss": 0.5251,
"step": 9990
},
{
"epoch": 0.376889156898956,
"grad_norm": 1.7363856844555856,
"learning_rate": 7.841614555693315e-06,
"loss": 0.5374,
"step": 10000
},
{
"epoch": 0.37726604605585495,
"grad_norm": 1.4364933283426136,
"learning_rate": 7.83619955967943e-06,
"loss": 0.5058,
"step": 10010
},
{
"epoch": 0.37764293521275394,
"grad_norm": 1.7783682918591002,
"learning_rate": 7.830779654539347e-06,
"loss": 0.5219,
"step": 10020
},
{
"epoch": 0.37801982436965287,
"grad_norm": 1.502023352144145,
"learning_rate": 7.825354849654276e-06,
"loss": 0.5063,
"step": 10030
},
{
"epoch": 0.37839671352655185,
"grad_norm": 1.4662226669968323,
"learning_rate": 7.819925154413913e-06,
"loss": 0.519,
"step": 10040
},
{
"epoch": 0.3787736026834508,
"grad_norm": 1.7426260620841227,
"learning_rate": 7.814490578216418e-06,
"loss": 0.5139,
"step": 10050
},
{
"epoch": 0.37915049184034977,
"grad_norm": 1.392450084963678,
"learning_rate": 7.809051130468406e-06,
"loss": 0.5117,
"step": 10060
},
{
"epoch": 0.3795273809972487,
"grad_norm": 1.3645116377228637,
"learning_rate": 7.80360682058491e-06,
"loss": 0.4957,
"step": 10070
},
{
"epoch": 0.3799042701541477,
"grad_norm": 1.585986026276787,
"learning_rate": 7.798157657989393e-06,
"loss": 0.4932,
"step": 10080
},
{
"epoch": 0.3802811593110466,
"grad_norm": 1.732526223305691,
"learning_rate": 7.792703652113711e-06,
"loss": 0.489,
"step": 10090
},
{
"epoch": 0.3806580484679456,
"grad_norm": 1.5456835026999505,
"learning_rate": 7.7872448123981e-06,
"loss": 0.5113,
"step": 10100
},
{
"epoch": 0.3810349376248445,
"grad_norm": 1.691666553542104,
"learning_rate": 7.781781148291168e-06,
"loss": 0.5062,
"step": 10110
},
{
"epoch": 0.3814118267817435,
"grad_norm": 1.4393454733074624,
"learning_rate": 7.776312669249871e-06,
"loss": 0.5259,
"step": 10120
},
{
"epoch": 0.38178871593864244,
"grad_norm": 1.5606823602350173,
"learning_rate": 7.770839384739502e-06,
"loss": 0.481,
"step": 10130
},
{
"epoch": 0.3821656050955414,
"grad_norm": 1.5406731978486246,
"learning_rate": 7.765361304233669e-06,
"loss": 0.5022,
"step": 10140
},
{
"epoch": 0.38254249425244036,
"grad_norm": 1.5789632721608042,
"learning_rate": 7.759878437214279e-06,
"loss": 0.5205,
"step": 10150
},
{
"epoch": 0.3829193834093393,
"grad_norm": 1.4404684890173407,
"learning_rate": 7.75439079317153e-06,
"loss": 0.5194,
"step": 10160
},
{
"epoch": 0.38329627256623827,
"grad_norm": 1.5872838579011181,
"learning_rate": 7.748898381603885e-06,
"loss": 0.5209,
"step": 10170
},
{
"epoch": 0.3836731617231372,
"grad_norm": 1.7373659799447403,
"learning_rate": 7.743401212018058e-06,
"loss": 0.5338,
"step": 10180
},
{
"epoch": 0.3840500508800362,
"grad_norm": 1.5163208593264923,
"learning_rate": 7.737899293929e-06,
"loss": 0.5171,
"step": 10190
},
{
"epoch": 0.3844269400369351,
"grad_norm": 1.7459426641170264,
"learning_rate": 7.73239263685988e-06,
"loss": 0.514,
"step": 10200
},
{
"epoch": 0.3848038291938341,
"grad_norm": 1.4346771101184586,
"learning_rate": 7.726881250342072e-06,
"loss": 0.4932,
"step": 10210
},
{
"epoch": 0.38518071835073303,
"grad_norm": 1.5111843012047024,
"learning_rate": 7.721365143915134e-06,
"loss": 0.5125,
"step": 10220
},
{
"epoch": 0.385557607507632,
"grad_norm": 1.9962267341536073,
"learning_rate": 7.715844327126796e-06,
"loss": 0.5167,
"step": 10230
},
{
"epoch": 0.38593449666453095,
"grad_norm": 1.8161411611054894,
"learning_rate": 7.710318809532936e-06,
"loss": 0.5158,
"step": 10240
},
{
"epoch": 0.38631138582142993,
"grad_norm": 2.7639886954074973,
"learning_rate": 7.704788600697572e-06,
"loss": 0.5188,
"step": 10250
},
{
"epoch": 0.38668827497832886,
"grad_norm": 1.4820078773895478,
"learning_rate": 7.699253710192846e-06,
"loss": 0.5393,
"step": 10260
},
{
"epoch": 0.38706516413522785,
"grad_norm": 1.5410641109210454,
"learning_rate": 7.693714147598997e-06,
"loss": 0.5153,
"step": 10270
},
{
"epoch": 0.3874420532921268,
"grad_norm": 1.5522185520833145,
"learning_rate": 7.68816992250435e-06,
"loss": 0.501,
"step": 10280
},
{
"epoch": 0.38781894244902576,
"grad_norm": 1.5919453235813696,
"learning_rate": 7.682621044505307e-06,
"loss": 0.5129,
"step": 10290
},
{
"epoch": 0.3881958316059247,
"grad_norm": 1.8187432897104705,
"learning_rate": 7.67706752320632e-06,
"loss": 0.5119,
"step": 10300
},
{
"epoch": 0.3885727207628237,
"grad_norm": 1.7249561612487072,
"learning_rate": 7.671509368219876e-06,
"loss": 0.4994,
"step": 10310
},
{
"epoch": 0.3889496099197226,
"grad_norm": 1.6848328239504586,
"learning_rate": 7.665946589166487e-06,
"loss": 0.4925,
"step": 10320
},
{
"epoch": 0.3893264990766216,
"grad_norm": 1.5828091110560405,
"learning_rate": 7.660379195674661e-06,
"loss": 0.5038,
"step": 10330
},
{
"epoch": 0.3897033882335205,
"grad_norm": 1.5813911515185386,
"learning_rate": 7.654807197380905e-06,
"loss": 0.5195,
"step": 10340
},
{
"epoch": 0.3900802773904195,
"grad_norm": 1.6125111488328268,
"learning_rate": 7.649230603929682e-06,
"loss": 0.5015,
"step": 10350
},
{
"epoch": 0.39045716654731843,
"grad_norm": 1.3381134076751229,
"learning_rate": 7.643649424973423e-06,
"loss": 0.5088,
"step": 10360
},
{
"epoch": 0.39083405570421736,
"grad_norm": 1.7305681398298995,
"learning_rate": 7.638063670172484e-06,
"loss": 0.5185,
"step": 10370
},
{
"epoch": 0.39121094486111635,
"grad_norm": 1.4448524895747423,
"learning_rate": 7.632473349195148e-06,
"loss": 0.4951,
"step": 10380
},
{
"epoch": 0.3915878340180153,
"grad_norm": 1.4981197503697667,
"learning_rate": 7.626878471717601e-06,
"loss": 0.4968,
"step": 10390
},
{
"epoch": 0.39196472317491426,
"grad_norm": 1.7418645645645234,
"learning_rate": 7.621279047423913e-06,
"loss": 0.5008,
"step": 10400
},
{
"epoch": 0.3923416123318132,
"grad_norm": 1.5178888998524598,
"learning_rate": 7.615675086006027e-06,
"loss": 0.5289,
"step": 10410
},
{
"epoch": 0.3927185014887122,
"grad_norm": 1.830273252291855,
"learning_rate": 7.610066597163737e-06,
"loss": 0.5122,
"step": 10420
},
{
"epoch": 0.3930953906456111,
"grad_norm": 1.7128529156361387,
"learning_rate": 7.604453590604675e-06,
"loss": 0.5195,
"step": 10430
},
{
"epoch": 0.3934722798025101,
"grad_norm": 1.391617720005151,
"learning_rate": 7.5988360760442905e-06,
"loss": 0.5076,
"step": 10440
},
{
"epoch": 0.393849168959409,
"grad_norm": 1.6877860440671137,
"learning_rate": 7.5932140632058395e-06,
"loss": 0.4974,
"step": 10450
},
{
"epoch": 0.394226058116308,
"grad_norm": 1.8880275691787758,
"learning_rate": 7.587587561820357e-06,
"loss": 0.5285,
"step": 10460
},
{
"epoch": 0.39460294727320694,
"grad_norm": 2.1952687274271323,
"learning_rate": 7.581956581626659e-06,
"loss": 0.4788,
"step": 10470
},
{
"epoch": 0.3949798364301059,
"grad_norm": 1.4679354015620223,
"learning_rate": 7.5763211323713e-06,
"loss": 0.5148,
"step": 10480
},
{
"epoch": 0.39535672558700485,
"grad_norm": 5.552246755488179,
"learning_rate": 7.570681223808581e-06,
"loss": 0.5088,
"step": 10490
},
{
"epoch": 0.39573361474390384,
"grad_norm": 1.7064464108546398,
"learning_rate": 7.565036865700515e-06,
"loss": 0.5036,
"step": 10500
},
{
"epoch": 0.39611050390080277,
"grad_norm": 2.5811736419688125,
"learning_rate": 7.559388067816818e-06,
"loss": 0.4968,
"step": 10510
},
{
"epoch": 0.39648739305770175,
"grad_norm": 1.5411522330658454,
"learning_rate": 7.553734839934892e-06,
"loss": 0.5321,
"step": 10520
},
{
"epoch": 0.3968642822146007,
"grad_norm": 7.8958392331494265,
"learning_rate": 7.54807719183981e-06,
"loss": 0.5144,
"step": 10530
},
{
"epoch": 0.39724117137149967,
"grad_norm": 1.398958685780613,
"learning_rate": 7.5424151333242854e-06,
"loss": 0.5038,
"step": 10540
},
{
"epoch": 0.3976180605283986,
"grad_norm": 1.8158525212197818,
"learning_rate": 7.536748674188679e-06,
"loss": 0.5498,
"step": 10550
},
{
"epoch": 0.3979949496852975,
"grad_norm": 1.5804568739533704,
"learning_rate": 7.531077824240955e-06,
"loss": 0.4943,
"step": 10560
},
{
"epoch": 0.3983718388421965,
"grad_norm": 1.5226993293921613,
"learning_rate": 7.5254025932966915e-06,
"loss": 0.5197,
"step": 10570
},
{
"epoch": 0.39874872799909544,
"grad_norm": 1.4875198630376472,
"learning_rate": 7.519722991179037e-06,
"loss": 0.4911,
"step": 10580
},
{
"epoch": 0.3991256171559944,
"grad_norm": 1.4559827608231475,
"learning_rate": 7.514039027718714e-06,
"loss": 0.5369,
"step": 10590
},
{
"epoch": 0.39950250631289336,
"grad_norm": 1.6204937117910332,
"learning_rate": 7.50835071275399e-06,
"loss": 0.5126,
"step": 10600
},
{
"epoch": 0.39987939546979234,
"grad_norm": 1.3328838051236007,
"learning_rate": 7.502658056130667e-06,
"loss": 0.4924,
"step": 10610
},
{
"epoch": 0.40025628462669127,
"grad_norm": 1.6977987639267709,
"learning_rate": 7.496961067702061e-06,
"loss": 0.5299,
"step": 10620
},
{
"epoch": 0.40063317378359026,
"grad_norm": 1.7016949489371669,
"learning_rate": 7.491259757328986e-06,
"loss": 0.4688,
"step": 10630
},
{
"epoch": 0.4010100629404892,
"grad_norm": 1.7144533476268096,
"learning_rate": 7.4855541348797325e-06,
"loss": 0.496,
"step": 10640
},
{
"epoch": 0.40138695209738817,
"grad_norm": 1.6798605375904403,
"learning_rate": 7.479844210230063e-06,
"loss": 0.4848,
"step": 10650
},
{
"epoch": 0.4017638412542871,
"grad_norm": 1.6026398018607957,
"learning_rate": 7.474129993263181e-06,
"loss": 0.5436,
"step": 10660
},
{
"epoch": 0.4021407304111861,
"grad_norm": 2.1790501189406974,
"learning_rate": 7.468411493869719e-06,
"loss": 0.506,
"step": 10670
},
{
"epoch": 0.402517619568085,
"grad_norm": 1.5645837080947917,
"learning_rate": 7.462688721947724e-06,
"loss": 0.5218,
"step": 10680
},
{
"epoch": 0.402894508724984,
"grad_norm": 1.4088765793630955,
"learning_rate": 7.456961687402639e-06,
"loss": 0.5237,
"step": 10690
},
{
"epoch": 0.40327139788188293,
"grad_norm": 1.7292916232116071,
"learning_rate": 7.451230400147285e-06,
"loss": 0.5469,
"step": 10700
},
{
"epoch": 0.4036482870387819,
"grad_norm": 1.5186451953406783,
"learning_rate": 7.44549487010184e-06,
"loss": 0.4883,
"step": 10710
},
{
"epoch": 0.40402517619568085,
"grad_norm": 1.5087236868153622,
"learning_rate": 7.43975510719383e-06,
"loss": 0.5093,
"step": 10720
},
{
"epoch": 0.40440206535257983,
"grad_norm": 1.4677033417891505,
"learning_rate": 7.434011121358106e-06,
"loss": 0.5286,
"step": 10730
},
{
"epoch": 0.40477895450947876,
"grad_norm": 1.4802091202672563,
"learning_rate": 7.428262922536829e-06,
"loss": 0.5089,
"step": 10740
},
{
"epoch": 0.4051558436663777,
"grad_norm": 1.7355158540184825,
"learning_rate": 7.422510520679451e-06,
"loss": 0.4905,
"step": 10750
},
{
"epoch": 0.4055327328232767,
"grad_norm": 1.6236103109691675,
"learning_rate": 7.416753925742699e-06,
"loss": 0.5378,
"step": 10760
},
{
"epoch": 0.4059096219801756,
"grad_norm": 1.6609723115712154,
"learning_rate": 7.410993147690559e-06,
"loss": 0.5389,
"step": 10770
},
{
"epoch": 0.4062865111370746,
"grad_norm": 1.723187447121083,
"learning_rate": 7.405228196494258e-06,
"loss": 0.506,
"step": 10780
},
{
"epoch": 0.4066634002939735,
"grad_norm": 1.77371397243615,
"learning_rate": 7.399459082132245e-06,
"loss": 0.5513,
"step": 10790
},
{
"epoch": 0.4070402894508725,
"grad_norm": 1.61901847701063,
"learning_rate": 7.393685814590173e-06,
"loss": 0.5186,
"step": 10800
},
{
"epoch": 0.40741717860777144,
"grad_norm": 1.59625784282029,
"learning_rate": 7.387908403860888e-06,
"loss": 0.5185,
"step": 10810
},
{
"epoch": 0.4077940677646704,
"grad_norm": 1.6692144430702707,
"learning_rate": 7.382126859944404e-06,
"loss": 0.512,
"step": 10820
},
{
"epoch": 0.40817095692156935,
"grad_norm": 1.4908534978634642,
"learning_rate": 7.3763411928478905e-06,
"loss": 0.4861,
"step": 10830
},
{
"epoch": 0.40854784607846834,
"grad_norm": 1.8840748788280792,
"learning_rate": 7.370551412585653e-06,
"loss": 0.5155,
"step": 10840
},
{
"epoch": 0.40892473523536726,
"grad_norm": 1.6633231923886227,
"learning_rate": 7.364757529179116e-06,
"loss": 0.5222,
"step": 10850
},
{
"epoch": 0.40930162439226625,
"grad_norm": 1.484920688542688,
"learning_rate": 7.3589595526568105e-06,
"loss": 0.5058,
"step": 10860
},
{
"epoch": 0.4096785135491652,
"grad_norm": 1.9228281526896014,
"learning_rate": 7.353157493054342e-06,
"loss": 0.5304,
"step": 10870
},
{
"epoch": 0.41005540270606416,
"grad_norm": 1.8251544219160065,
"learning_rate": 7.347351360414396e-06,
"loss": 0.5211,
"step": 10880
},
{
"epoch": 0.4104322918629631,
"grad_norm": 1.6738905480970352,
"learning_rate": 7.341541164786701e-06,
"loss": 0.472,
"step": 10890
},
{
"epoch": 0.4108091810198621,
"grad_norm": 1.7440198392451363,
"learning_rate": 7.335726916228014e-06,
"loss": 0.5083,
"step": 10900
},
{
"epoch": 0.411186070176761,
"grad_norm": 1.6268369569369572,
"learning_rate": 7.329908624802118e-06,
"loss": 0.5208,
"step": 10910
},
{
"epoch": 0.41156295933366,
"grad_norm": 1.254862433004947,
"learning_rate": 7.3240863005797845e-06,
"loss": 0.4823,
"step": 10920
},
{
"epoch": 0.4119398484905589,
"grad_norm": 1.396064069324666,
"learning_rate": 7.3182599536387685e-06,
"loss": 0.5345,
"step": 10930
},
{
"epoch": 0.4123167376474579,
"grad_norm": 1.5132124149170976,
"learning_rate": 7.31242959406379e-06,
"loss": 0.5,
"step": 10940
},
{
"epoch": 0.41269362680435684,
"grad_norm": 1.6199593366062408,
"learning_rate": 7.306595231946509e-06,
"loss": 0.52,
"step": 10950
},
{
"epoch": 0.41307051596125577,
"grad_norm": 1.4096476517099632,
"learning_rate": 7.300756877385522e-06,
"loss": 0.5073,
"step": 10960
},
{
"epoch": 0.41344740511815475,
"grad_norm": 2.2637085400824266,
"learning_rate": 7.294914540486324e-06,
"loss": 0.5514,
"step": 10970
},
{
"epoch": 0.4138242942750537,
"grad_norm": 1.7076238981325191,
"learning_rate": 7.2890682313613145e-06,
"loss": 0.5193,
"step": 10980
},
{
"epoch": 0.41420118343195267,
"grad_norm": 1.404089053698958,
"learning_rate": 7.283217960129761e-06,
"loss": 0.4978,
"step": 10990
},
{
"epoch": 0.4145780725888516,
"grad_norm": 1.765699200284767,
"learning_rate": 7.277363736917793e-06,
"loss": 0.5045,
"step": 11000
},
{
"epoch": 0.4149549617457506,
"grad_norm": 1.2515597552306361,
"learning_rate": 7.271505571858378e-06,
"loss": 0.5161,
"step": 11010
},
{
"epoch": 0.4153318509026495,
"grad_norm": 1.5637990689131112,
"learning_rate": 7.265643475091308e-06,
"loss": 0.5056,
"step": 11020
},
{
"epoch": 0.4157087400595485,
"grad_norm": 1.6950097710271756,
"learning_rate": 7.25977745676318e-06,
"loss": 0.5305,
"step": 11030
},
{
"epoch": 0.41608562921644743,
"grad_norm": 1.3673559058916098,
"learning_rate": 7.253907527027377e-06,
"loss": 0.5156,
"step": 11040
},
{
"epoch": 0.4164625183733464,
"grad_norm": 1.7616801139174183,
"learning_rate": 7.2480336960440535e-06,
"loss": 0.5105,
"step": 11050
},
{
"epoch": 0.41683940753024534,
"grad_norm": 1.5601831166364712,
"learning_rate": 7.242155973980118e-06,
"loss": 0.4974,
"step": 11060
},
{
"epoch": 0.41721629668714433,
"grad_norm": 1.5959842163354248,
"learning_rate": 7.236274371009213e-06,
"loss": 0.5112,
"step": 11070
},
{
"epoch": 0.41759318584404326,
"grad_norm": 1.5739266782444512,
"learning_rate": 7.2303888973116955e-06,
"loss": 0.5146,
"step": 11080
},
{
"epoch": 0.41797007500094224,
"grad_norm": 1.4353518599364972,
"learning_rate": 7.224499563074627e-06,
"loss": 0.4896,
"step": 11090
},
{
"epoch": 0.4183469641578412,
"grad_norm": 1.6746462092510674,
"learning_rate": 7.218606378491748e-06,
"loss": 0.5083,
"step": 11100
},
{
"epoch": 0.41872385331474016,
"grad_norm": 1.4785054484483375,
"learning_rate": 7.2127093537634655e-06,
"loss": 0.504,
"step": 11110
},
{
"epoch": 0.4191007424716391,
"grad_norm": 2.5097265005422287,
"learning_rate": 7.20680849909683e-06,
"loss": 0.5089,
"step": 11120
},
{
"epoch": 0.4194776316285381,
"grad_norm": 1.936326911057867,
"learning_rate": 7.200903824705525e-06,
"loss": 0.4966,
"step": 11130
},
{
"epoch": 0.419854520785437,
"grad_norm": 1.3661039009657177,
"learning_rate": 7.194995340809845e-06,
"loss": 0.4992,
"step": 11140
},
{
"epoch": 0.42023140994233593,
"grad_norm": 1.7262176315985598,
"learning_rate": 7.189083057636677e-06,
"loss": 0.5468,
"step": 11150
},
{
"epoch": 0.4206082990992349,
"grad_norm": 1.4653322424571276,
"learning_rate": 7.183166985419482e-06,
"loss": 0.5141,
"step": 11160
},
{
"epoch": 0.42098518825613385,
"grad_norm": 1.401053993164947,
"learning_rate": 7.177247134398286e-06,
"loss": 0.4912,
"step": 11170
},
{
"epoch": 0.42136207741303283,
"grad_norm": 1.4861938807474284,
"learning_rate": 7.171323514819645e-06,
"loss": 0.5232,
"step": 11180
},
{
"epoch": 0.42173896656993176,
"grad_norm": 1.4194599929006113,
"learning_rate": 7.1653961369366495e-06,
"loss": 0.4814,
"step": 11190
},
{
"epoch": 0.42211585572683075,
"grad_norm": 1.76956116217899,
"learning_rate": 7.159465011008888e-06,
"loss": 0.4932,
"step": 11200
},
{
"epoch": 0.4224927448837297,
"grad_norm": 1.9109196300178812,
"learning_rate": 7.15353014730244e-06,
"loss": 0.5128,
"step": 11210
},
{
"epoch": 0.42286963404062866,
"grad_norm": 1.6509926306486402,
"learning_rate": 7.147591556089851e-06,
"loss": 0.505,
"step": 11220
},
{
"epoch": 0.4232465231975276,
"grad_norm": 1.6713792587181258,
"learning_rate": 7.141649247650122e-06,
"loss": 0.4887,
"step": 11230
},
{
"epoch": 0.4236234123544266,
"grad_norm": 1.668002839224313,
"learning_rate": 7.135703232268686e-06,
"loss": 0.4888,
"step": 11240
},
{
"epoch": 0.4240003015113255,
"grad_norm": 1.6242132187489502,
"learning_rate": 7.1297535202373935e-06,
"loss": 0.4965,
"step": 11250
},
{
"epoch": 0.4243771906682245,
"grad_norm": 1.4189185720393587,
"learning_rate": 7.1238001218544904e-06,
"loss": 0.4619,
"step": 11260
},
{
"epoch": 0.4247540798251234,
"grad_norm": 1.3574884914862415,
"learning_rate": 7.117843047424608e-06,
"loss": 0.5141,
"step": 11270
},
{
"epoch": 0.4251309689820224,
"grad_norm": 1.5298779197157868,
"learning_rate": 7.111882307258737e-06,
"loss": 0.4846,
"step": 11280
},
{
"epoch": 0.42550785813892134,
"grad_norm": 1.6463532463625445,
"learning_rate": 7.105917911674216e-06,
"loss": 0.52,
"step": 11290
},
{
"epoch": 0.4258847472958203,
"grad_norm": 1.8876680862122976,
"learning_rate": 7.099949870994706e-06,
"loss": 0.5022,
"step": 11300
},
{
"epoch": 0.42626163645271925,
"grad_norm": 1.6209020479441716,
"learning_rate": 7.093978195550181e-06,
"loss": 0.4856,
"step": 11310
},
{
"epoch": 0.42663852560961824,
"grad_norm": 1.4825939311414624,
"learning_rate": 7.088002895676905e-06,
"loss": 0.51,
"step": 11320
},
{
"epoch": 0.42701541476651717,
"grad_norm": 1.6454549725646832,
"learning_rate": 7.082023981717417e-06,
"loss": 0.5298,
"step": 11330
},
{
"epoch": 0.42739230392341615,
"grad_norm": 1.8515246881502785,
"learning_rate": 7.07604146402051e-06,
"loss": 0.5353,
"step": 11340
},
{
"epoch": 0.4277691930803151,
"grad_norm": 1.664094685534996,
"learning_rate": 7.0700553529412155e-06,
"loss": 0.5199,
"step": 11350
},
{
"epoch": 0.428146082237214,
"grad_norm": 2.1244311792060975,
"learning_rate": 7.064065658840782e-06,
"loss": 0.5145,
"step": 11360
},
{
"epoch": 0.428522971394113,
"grad_norm": 1.520255254828738,
"learning_rate": 7.058072392086663e-06,
"loss": 0.5159,
"step": 11370
},
{
"epoch": 0.4288998605510119,
"grad_norm": 1.6576637569684334,
"learning_rate": 7.052075563052496e-06,
"loss": 0.4984,
"step": 11380
},
{
"epoch": 0.4292767497079109,
"grad_norm": 1.5125466189919647,
"learning_rate": 7.0460751821180825e-06,
"loss": 0.5264,
"step": 11390
},
{
"epoch": 0.42965363886480984,
"grad_norm": 1.6676033896976015,
"learning_rate": 7.0400712596693735e-06,
"loss": 0.5229,
"step": 11400
},
{
"epoch": 0.4300305280217088,
"grad_norm": 1.7189105587329556,
"learning_rate": 7.034063806098447e-06,
"loss": 0.5207,
"step": 11410
},
{
"epoch": 0.43040741717860775,
"grad_norm": 1.7180985315889115,
"learning_rate": 7.0280528318034965e-06,
"loss": 0.5022,
"step": 11420
},
{
"epoch": 0.43078430633550674,
"grad_norm": 1.5059034039755612,
"learning_rate": 7.022038347188809e-06,
"loss": 0.5184,
"step": 11430
},
{
"epoch": 0.43116119549240567,
"grad_norm": 1.6528886008084185,
"learning_rate": 7.016020362664744e-06,
"loss": 0.5168,
"step": 11440
},
{
"epoch": 0.43153808464930465,
"grad_norm": 1.8617180030972997,
"learning_rate": 7.009998888647724e-06,
"loss": 0.502,
"step": 11450
},
{
"epoch": 0.4319149738062036,
"grad_norm": 1.3525198323198173,
"learning_rate": 7.003973935560206e-06,
"loss": 0.4869,
"step": 11460
},
{
"epoch": 0.43229186296310257,
"grad_norm": 1.677808325515248,
"learning_rate": 6.997945513830674e-06,
"loss": 0.5101,
"step": 11470
},
{
"epoch": 0.4326687521200015,
"grad_norm": 1.5676318820179806,
"learning_rate": 6.991913633893612e-06,
"loss": 0.5009,
"step": 11480
},
{
"epoch": 0.4330456412769005,
"grad_norm": 1.5538674193725228,
"learning_rate": 6.985878306189491e-06,
"loss": 0.4667,
"step": 11490
},
{
"epoch": 0.4334225304337994,
"grad_norm": 1.783690739137192,
"learning_rate": 6.979839541164754e-06,
"loss": 0.5322,
"step": 11500
},
{
"epoch": 0.4337994195906984,
"grad_norm": 1.6669015455087597,
"learning_rate": 6.973797349271783e-06,
"loss": 0.5029,
"step": 11510
},
{
"epoch": 0.43417630874759733,
"grad_norm": 1.4853940822920297,
"learning_rate": 6.967751740968902e-06,
"loss": 0.5118,
"step": 11520
},
{
"epoch": 0.4345531979044963,
"grad_norm": 1.6489294125281642,
"learning_rate": 6.9617027267203445e-06,
"loss": 0.5031,
"step": 11530
},
{
"epoch": 0.43493008706139524,
"grad_norm": 1.449669861501994,
"learning_rate": 6.955650316996236e-06,
"loss": 0.5022,
"step": 11540
},
{
"epoch": 0.4353069762182942,
"grad_norm": 1.8165282391809885,
"learning_rate": 6.949594522272587e-06,
"loss": 0.4935,
"step": 11550
},
{
"epoch": 0.43568386537519316,
"grad_norm": 1.5552590851069892,
"learning_rate": 6.943535353031258e-06,
"loss": 0.5157,
"step": 11560
},
{
"epoch": 0.4360607545320921,
"grad_norm": 1.6072285953371521,
"learning_rate": 6.937472819759959e-06,
"loss": 0.5416,
"step": 11570
},
{
"epoch": 0.4364376436889911,
"grad_norm": 1.7154718263219901,
"learning_rate": 6.931406932952216e-06,
"loss": 0.5112,
"step": 11580
},
{
"epoch": 0.43681453284589,
"grad_norm": 1.69530017912524,
"learning_rate": 6.92533770310736e-06,
"loss": 0.5259,
"step": 11590
},
{
"epoch": 0.437191422002789,
"grad_norm": 1.860148174972511,
"learning_rate": 6.919265140730514e-06,
"loss": 0.5106,
"step": 11600
},
{
"epoch": 0.4375683111596879,
"grad_norm": 1.5811409620351222,
"learning_rate": 6.913189256332566e-06,
"loss": 0.5198,
"step": 11610
},
{
"epoch": 0.4379452003165869,
"grad_norm": 1.4378401199137765,
"learning_rate": 6.9071100604301496e-06,
"loss": 0.5018,
"step": 11620
},
{
"epoch": 0.43832208947348583,
"grad_norm": 1.6434275760246106,
"learning_rate": 6.901027563545639e-06,
"loss": 0.5117,
"step": 11630
},
{
"epoch": 0.4386989786303848,
"grad_norm": 1.6446268399097024,
"learning_rate": 6.894941776207114e-06,
"loss": 0.5143,
"step": 11640
},
{
"epoch": 0.43907586778728375,
"grad_norm": 1.5857907742925326,
"learning_rate": 6.888852708948354e-06,
"loss": 0.5174,
"step": 11650
},
{
"epoch": 0.43945275694418273,
"grad_norm": 1.4020005504081066,
"learning_rate": 6.882760372308819e-06,
"loss": 0.5229,
"step": 11660
},
{
"epoch": 0.43982964610108166,
"grad_norm": 1.61069613644627,
"learning_rate": 6.876664776833616e-06,
"loss": 0.4927,
"step": 11670
},
{
"epoch": 0.44020653525798065,
"grad_norm": 1.7140951891727507,
"learning_rate": 6.870565933073505e-06,
"loss": 0.4936,
"step": 11680
},
{
"epoch": 0.4405834244148796,
"grad_norm": 1.4507770402959719,
"learning_rate": 6.864463851584863e-06,
"loss": 0.5296,
"step": 11690
},
{
"epoch": 0.44096031357177856,
"grad_norm": 1.460824925207524,
"learning_rate": 6.858358542929672e-06,
"loss": 0.5255,
"step": 11700
},
{
"epoch": 0.4413372027286775,
"grad_norm": 1.6825791692741976,
"learning_rate": 6.852250017675499e-06,
"loss": 0.5079,
"step": 11710
},
{
"epoch": 0.4417140918855765,
"grad_norm": 1.7652951082406643,
"learning_rate": 6.8461382863954786e-06,
"loss": 0.5281,
"step": 11720
},
{
"epoch": 0.4420909810424754,
"grad_norm": 1.6445005156801455,
"learning_rate": 6.840023359668297e-06,
"loss": 0.5007,
"step": 11730
},
{
"epoch": 0.4424678701993744,
"grad_norm": 3.0505367160131107,
"learning_rate": 6.833905248078168e-06,
"loss": 0.5027,
"step": 11740
},
{
"epoch": 0.4428447593562733,
"grad_norm": 1.7728481701439127,
"learning_rate": 6.82778396221482e-06,
"loss": 0.5067,
"step": 11750
},
{
"epoch": 0.44322164851317225,
"grad_norm": 1.4266982652231972,
"learning_rate": 6.8216595126734775e-06,
"loss": 0.4973,
"step": 11760
},
{
"epoch": 0.44359853767007124,
"grad_norm": 1.4465402471000843,
"learning_rate": 6.815531910054834e-06,
"loss": 0.5315,
"step": 11770
},
{
"epoch": 0.44397542682697017,
"grad_norm": 1.5371566334005535,
"learning_rate": 6.809401164965051e-06,
"loss": 0.4947,
"step": 11780
},
{
"epoch": 0.44435231598386915,
"grad_norm": 1.664248932365906,
"learning_rate": 6.803267288015718e-06,
"loss": 0.5267,
"step": 11790
},
{
"epoch": 0.4447292051407681,
"grad_norm": 1.6494483320654694,
"learning_rate": 6.7971302898238545e-06,
"loss": 0.4986,
"step": 11800
},
{
"epoch": 0.44510609429766707,
"grad_norm": 1.3037692393364804,
"learning_rate": 6.7909901810118785e-06,
"loss": 0.4724,
"step": 11810
},
{
"epoch": 0.445482983454566,
"grad_norm": 2.6200187135591713,
"learning_rate": 6.784846972207593e-06,
"loss": 0.5032,
"step": 11820
},
{
"epoch": 0.445859872611465,
"grad_norm": 1.6525404819183418,
"learning_rate": 6.778700674044164e-06,
"loss": 0.5413,
"step": 11830
},
{
"epoch": 0.4462367617683639,
"grad_norm": 1.480259604215167,
"learning_rate": 6.77255129716011e-06,
"loss": 0.5062,
"step": 11840
},
{
"epoch": 0.4466136509252629,
"grad_norm": 1.7299508446770775,
"learning_rate": 6.7663988521992744e-06,
"loss": 0.5062,
"step": 11850
},
{
"epoch": 0.4469905400821618,
"grad_norm": 1.389929475780395,
"learning_rate": 6.760243349810811e-06,
"loss": 0.5041,
"step": 11860
},
{
"epoch": 0.4473674292390608,
"grad_norm": 1.795064940876833,
"learning_rate": 6.754084800649169e-06,
"loss": 0.4923,
"step": 11870
},
{
"epoch": 0.44774431839595974,
"grad_norm": 1.5502942925119751,
"learning_rate": 6.747923215374068e-06,
"loss": 0.4851,
"step": 11880
},
{
"epoch": 0.4481212075528587,
"grad_norm": 1.635874220277167,
"learning_rate": 6.741758604650485e-06,
"loss": 0.5009,
"step": 11890
},
{
"epoch": 0.44849809670975765,
"grad_norm": 1.706031522666674,
"learning_rate": 6.735590979148629e-06,
"loss": 0.487,
"step": 11900
},
{
"epoch": 0.44887498586665664,
"grad_norm": 1.690364938457251,
"learning_rate": 6.729420349543934e-06,
"loss": 0.4947,
"step": 11910
},
{
"epoch": 0.44925187502355557,
"grad_norm": 1.7433823515896854,
"learning_rate": 6.7232467265170295e-06,
"loss": 0.5077,
"step": 11920
},
{
"epoch": 0.44962876418045455,
"grad_norm": 1.6126498482759561,
"learning_rate": 6.7170701207537285e-06,
"loss": 0.5043,
"step": 11930
},
{
"epoch": 0.4500056533373535,
"grad_norm": 1.5461921740338886,
"learning_rate": 6.7108905429450035e-06,
"loss": 0.5211,
"step": 11940
},
{
"epoch": 0.4503825424942524,
"grad_norm": 1.654741487368281,
"learning_rate": 6.704708003786974e-06,
"loss": 0.5162,
"step": 11950
},
{
"epoch": 0.4507594316511514,
"grad_norm": 1.5875885105401868,
"learning_rate": 6.698522513980884e-06,
"loss": 0.5076,
"step": 11960
},
{
"epoch": 0.45113632080805033,
"grad_norm": 1.501080589567821,
"learning_rate": 6.692334084233087e-06,
"loss": 0.5019,
"step": 11970
},
{
"epoch": 0.4515132099649493,
"grad_norm": 1.6323939585322438,
"learning_rate": 6.686142725255021e-06,
"loss": 0.5215,
"step": 11980
},
{
"epoch": 0.45189009912184824,
"grad_norm": 1.3624830176460956,
"learning_rate": 6.679948447763201e-06,
"loss": 0.5026,
"step": 11990
},
{
"epoch": 0.45226698827874723,
"grad_norm": 1.6944099137300341,
"learning_rate": 6.673751262479183e-06,
"loss": 0.5073,
"step": 12000
},
{
"epoch": 0.45264387743564616,
"grad_norm": 1.5860681363669702,
"learning_rate": 6.667551180129565e-06,
"loss": 0.5085,
"step": 12010
},
{
"epoch": 0.45302076659254514,
"grad_norm": 1.6331152902584547,
"learning_rate": 6.661348211445959e-06,
"loss": 0.5195,
"step": 12020
},
{
"epoch": 0.4533976557494441,
"grad_norm": 4.415183248235426,
"learning_rate": 6.655142367164967e-06,
"loss": 0.5005,
"step": 12030
},
{
"epoch": 0.45377454490634306,
"grad_norm": 1.5798349602106418,
"learning_rate": 6.648933658028174e-06,
"loss": 0.5045,
"step": 12040
},
{
"epoch": 0.454151434063242,
"grad_norm": 1.435313930078849,
"learning_rate": 6.642722094782121e-06,
"loss": 0.4641,
"step": 12050
},
{
"epoch": 0.454528323220141,
"grad_norm": 1.7147543461190684,
"learning_rate": 6.636507688178291e-06,
"loss": 0.5031,
"step": 12060
},
{
"epoch": 0.4549052123770399,
"grad_norm": 1.5410650369487786,
"learning_rate": 6.630290448973087e-06,
"loss": 0.5173,
"step": 12070
},
{
"epoch": 0.4552821015339389,
"grad_norm": 1.61040530239707,
"learning_rate": 6.624070387927811e-06,
"loss": 0.5205,
"step": 12080
},
{
"epoch": 0.4556589906908378,
"grad_norm": 1.4352463589168436,
"learning_rate": 6.61784751580866e-06,
"loss": 0.4747,
"step": 12090
},
{
"epoch": 0.4560358798477368,
"grad_norm": 1.8210569410817206,
"learning_rate": 6.611621843386684e-06,
"loss": 0.4858,
"step": 12100
},
{
"epoch": 0.45641276900463573,
"grad_norm": 1.9985993173284664,
"learning_rate": 6.605393381437792e-06,
"loss": 0.496,
"step": 12110
},
{
"epoch": 0.4567896581615347,
"grad_norm": 1.2939840914047982,
"learning_rate": 6.599162140742712e-06,
"loss": 0.4852,
"step": 12120
},
{
"epoch": 0.45716654731843365,
"grad_norm": 1.6530787032881842,
"learning_rate": 6.592928132086984e-06,
"loss": 0.5003,
"step": 12130
},
{
"epoch": 0.45754343647533263,
"grad_norm": 1.2909832653537678,
"learning_rate": 6.586691366260943e-06,
"loss": 0.5254,
"step": 12140
},
{
"epoch": 0.45792032563223156,
"grad_norm": 1.775707858274227,
"learning_rate": 6.580451854059693e-06,
"loss": 0.5033,
"step": 12150
},
{
"epoch": 0.4582972147891305,
"grad_norm": 1.5901192603045122,
"learning_rate": 6.574209606283089e-06,
"loss": 0.4797,
"step": 12160
},
{
"epoch": 0.4586741039460295,
"grad_norm": 1.5718931288158366,
"learning_rate": 6.56796463373573e-06,
"loss": 0.5122,
"step": 12170
},
{
"epoch": 0.4590509931029284,
"grad_norm": 1.5316527090504148,
"learning_rate": 6.561716947226918e-06,
"loss": 0.5096,
"step": 12180
},
{
"epoch": 0.4594278822598274,
"grad_norm": 1.6069192593187367,
"learning_rate": 6.555466557570666e-06,
"loss": 0.4749,
"step": 12190
},
{
"epoch": 0.4598047714167263,
"grad_norm": 1.6443701058606435,
"learning_rate": 6.549213475585657e-06,
"loss": 0.5315,
"step": 12200
},
{
"epoch": 0.4601816605736253,
"grad_norm": 1.6467073011805697,
"learning_rate": 6.542957712095236e-06,
"loss": 0.4864,
"step": 12210
},
{
"epoch": 0.46055854973052424,
"grad_norm": 1.6443944395717125,
"learning_rate": 6.536699277927393e-06,
"loss": 0.5069,
"step": 12220
},
{
"epoch": 0.4609354388874232,
"grad_norm": 1.6659779073083896,
"learning_rate": 6.530438183914735e-06,
"loss": 0.4887,
"step": 12230
},
{
"epoch": 0.46131232804432215,
"grad_norm": 1.802633741824681,
"learning_rate": 6.5241744408944776e-06,
"loss": 0.4967,
"step": 12240
},
{
"epoch": 0.46168921720122114,
"grad_norm": 1.6619516541296946,
"learning_rate": 6.517908059708417e-06,
"loss": 0.5076,
"step": 12250
},
{
"epoch": 0.46206610635812007,
"grad_norm": 1.9424205639529566,
"learning_rate": 6.511639051202922e-06,
"loss": 0.5228,
"step": 12260
},
{
"epoch": 0.46244299551501905,
"grad_norm": 1.864221195142991,
"learning_rate": 6.505367426228902e-06,
"loss": 0.5092,
"step": 12270
},
{
"epoch": 0.462819884671918,
"grad_norm": 1.5859581361435124,
"learning_rate": 6.499093195641801e-06,
"loss": 0.4919,
"step": 12280
},
{
"epoch": 0.46319677382881697,
"grad_norm": 1.5769900351453978,
"learning_rate": 6.49281637030157e-06,
"loss": 0.5003,
"step": 12290
},
{
"epoch": 0.4635736629857159,
"grad_norm": 1.962586555293073,
"learning_rate": 6.486536961072651e-06,
"loss": 0.5144,
"step": 12300
},
{
"epoch": 0.4639505521426149,
"grad_norm": 1.5960370769135634,
"learning_rate": 6.4802549788239585e-06,
"loss": 0.4968,
"step": 12310
},
{
"epoch": 0.4643274412995138,
"grad_norm": 1.4471637413402056,
"learning_rate": 6.473970434428865e-06,
"loss": 0.5133,
"step": 12320
},
{
"epoch": 0.4647043304564128,
"grad_norm": 1.5033062625403697,
"learning_rate": 6.467683338765169e-06,
"loss": 0.4983,
"step": 12330
},
{
"epoch": 0.4650812196133117,
"grad_norm": 1.8045255687380959,
"learning_rate": 6.461393702715093e-06,
"loss": 0.5212,
"step": 12340
},
{
"epoch": 0.46545810877021065,
"grad_norm": 1.5049755950938395,
"learning_rate": 6.455101537165251e-06,
"loss": 0.4964,
"step": 12350
},
{
"epoch": 0.46583499792710964,
"grad_norm": 1.307607429446019,
"learning_rate": 6.448806853006642e-06,
"loss": 0.4847,
"step": 12360
},
{
"epoch": 0.46621188708400857,
"grad_norm": 1.5148579248421497,
"learning_rate": 6.442509661134617e-06,
"loss": 0.4749,
"step": 12370
},
{
"epoch": 0.46658877624090755,
"grad_norm": 1.6520404081654247,
"learning_rate": 6.436209972448872e-06,
"loss": 0.5118,
"step": 12380
},
{
"epoch": 0.4669656653978065,
"grad_norm": 1.4969166628742265,
"learning_rate": 6.4299077978534215e-06,
"loss": 0.4866,
"step": 12390
},
{
"epoch": 0.46734255455470547,
"grad_norm": 1.688329647541456,
"learning_rate": 6.423603148256589e-06,
"loss": 0.5215,
"step": 12400
},
{
"epoch": 0.4677194437116044,
"grad_norm": 1.4741523126628993,
"learning_rate": 6.417296034570972e-06,
"loss": 0.4984,
"step": 12410
},
{
"epoch": 0.4680963328685034,
"grad_norm": 1.4962239650802354,
"learning_rate": 6.410986467713446e-06,
"loss": 0.5275,
"step": 12420
},
{
"epoch": 0.4684732220254023,
"grad_norm": 1.7317402848543209,
"learning_rate": 6.404674458605119e-06,
"loss": 0.5235,
"step": 12430
},
{
"epoch": 0.4688501111823013,
"grad_norm": 1.507370223846744,
"learning_rate": 6.398360018171335e-06,
"loss": 0.5065,
"step": 12440
},
{
"epoch": 0.46922700033920023,
"grad_norm": 1.6389685200877848,
"learning_rate": 6.392043157341645e-06,
"loss": 0.5139,
"step": 12450
},
{
"epoch": 0.4696038894960992,
"grad_norm": 4.587056243685229,
"learning_rate": 6.385723887049788e-06,
"loss": 0.5037,
"step": 12460
},
{
"epoch": 0.46998077865299814,
"grad_norm": 1.4469191726379287,
"learning_rate": 6.379402218233673e-06,
"loss": 0.5139,
"step": 12470
},
{
"epoch": 0.47035766780989713,
"grad_norm": 1.4631499108231787,
"learning_rate": 6.373078161835364e-06,
"loss": 0.5132,
"step": 12480
},
{
"epoch": 0.47073455696679606,
"grad_norm": 1.494757392645272,
"learning_rate": 6.366751728801051e-06,
"loss": 0.4952,
"step": 12490
},
{
"epoch": 0.47111144612369504,
"grad_norm": 1.6614354722718154,
"learning_rate": 6.360422930081045e-06,
"loss": 0.5236,
"step": 12500
},
{
"epoch": 0.471488335280594,
"grad_norm": 1.589063266340144,
"learning_rate": 6.3540917766297475e-06,
"loss": 0.4754,
"step": 12510
},
{
"epoch": 0.47186522443749296,
"grad_norm": 1.645641474309447,
"learning_rate": 6.347758279405636e-06,
"loss": 0.5105,
"step": 12520
},
{
"epoch": 0.4722421135943919,
"grad_norm": 1.842229947630362,
"learning_rate": 6.341422449371247e-06,
"loss": 0.5058,
"step": 12530
},
{
"epoch": 0.4726190027512908,
"grad_norm": 1.6305355750205157,
"learning_rate": 6.3350842974931526e-06,
"loss": 0.5387,
"step": 12540
},
{
"epoch": 0.4729958919081898,
"grad_norm": 1.618466891577579,
"learning_rate": 6.328743834741945e-06,
"loss": 0.4999,
"step": 12550
},
{
"epoch": 0.47337278106508873,
"grad_norm": 1.326825185706353,
"learning_rate": 6.322401072092216e-06,
"loss": 0.5027,
"step": 12560
},
{
"epoch": 0.4737496702219877,
"grad_norm": 1.5969546323969477,
"learning_rate": 6.316056020522538e-06,
"loss": 0.5222,
"step": 12570
},
{
"epoch": 0.47412655937888665,
"grad_norm": 1.5972771926737852,
"learning_rate": 6.309708691015443e-06,
"loss": 0.5015,
"step": 12580
},
{
"epoch": 0.47450344853578563,
"grad_norm": 1.5891933279912756,
"learning_rate": 6.303359094557411e-06,
"loss": 0.4977,
"step": 12590
},
{
"epoch": 0.47488033769268456,
"grad_norm": 1.7999033521409773,
"learning_rate": 6.297007242138842e-06,
"loss": 0.5161,
"step": 12600
},
{
"epoch": 0.47525722684958355,
"grad_norm": 1.5315463861951017,
"learning_rate": 6.290653144754043e-06,
"loss": 0.5215,
"step": 12610
},
{
"epoch": 0.4756341160064825,
"grad_norm": 1.6153094914746262,
"learning_rate": 6.2842968134012026e-06,
"loss": 0.4953,
"step": 12620
},
{
"epoch": 0.47601100516338146,
"grad_norm": 1.665498157343032,
"learning_rate": 6.277938259082382e-06,
"loss": 0.4995,
"step": 12630
},
{
"epoch": 0.4763878943202804,
"grad_norm": 1.3905032960020645,
"learning_rate": 6.271577492803486e-06,
"loss": 0.4796,
"step": 12640
},
{
"epoch": 0.4767647834771794,
"grad_norm": 1.6507279959491987,
"learning_rate": 6.265214525574248e-06,
"loss": 0.5157,
"step": 12650
},
{
"epoch": 0.4771416726340783,
"grad_norm": 1.2843410140916522,
"learning_rate": 6.258849368408213e-06,
"loss": 0.4858,
"step": 12660
},
{
"epoch": 0.4775185617909773,
"grad_norm": 1.8350039947211565,
"learning_rate": 6.252482032322716e-06,
"loss": 0.5029,
"step": 12670
},
{
"epoch": 0.4778954509478762,
"grad_norm": 1.5252536968641863,
"learning_rate": 6.246112528338864e-06,
"loss": 0.4954,
"step": 12680
},
{
"epoch": 0.4782723401047752,
"grad_norm": 15.626822494749767,
"learning_rate": 6.239740867481514e-06,
"loss": 0.4798,
"step": 12690
},
{
"epoch": 0.47864922926167414,
"grad_norm": 1.4368053909649985,
"learning_rate": 6.233367060779258e-06,
"loss": 0.4988,
"step": 12700
},
{
"epoch": 0.4790261184185731,
"grad_norm": 1.6672155662880033,
"learning_rate": 6.226991119264405e-06,
"loss": 0.501,
"step": 12710
},
{
"epoch": 0.47940300757547205,
"grad_norm": 1.7236312698986331,
"learning_rate": 6.22061305397295e-06,
"loss": 0.49,
"step": 12720
},
{
"epoch": 0.47977989673237104,
"grad_norm": 1.5777802364774114,
"learning_rate": 6.214232875944577e-06,
"loss": 0.4982,
"step": 12730
},
{
"epoch": 0.48015678588926997,
"grad_norm": 1.6768420216779125,
"learning_rate": 6.207850596222616e-06,
"loss": 0.4923,
"step": 12740
},
{
"epoch": 0.4805336750461689,
"grad_norm": 1.7758220533639664,
"learning_rate": 6.201466225854038e-06,
"loss": 0.5189,
"step": 12750
},
{
"epoch": 0.4809105642030679,
"grad_norm": 1.4580963940658098,
"learning_rate": 6.195079775889436e-06,
"loss": 0.4777,
"step": 12760
},
{
"epoch": 0.4812874533599668,
"grad_norm": 1.5845783588645914,
"learning_rate": 6.188691257382998e-06,
"loss": 0.5021,
"step": 12770
},
{
"epoch": 0.4816643425168658,
"grad_norm": 1.6276734887673494,
"learning_rate": 6.182300681392497e-06,
"loss": 0.5087,
"step": 12780
},
{
"epoch": 0.4820412316737647,
"grad_norm": 1.6758298613310316,
"learning_rate": 6.175908058979264e-06,
"loss": 0.4919,
"step": 12790
},
{
"epoch": 0.4824181208306637,
"grad_norm": 1.733797532127255,
"learning_rate": 6.169513401208169e-06,
"loss": 0.4979,
"step": 12800
},
{
"epoch": 0.48279500998756264,
"grad_norm": 1.497057452497915,
"learning_rate": 6.163116719147615e-06,
"loss": 0.5047,
"step": 12810
},
{
"epoch": 0.4831718991444616,
"grad_norm": 1.8210153084730252,
"learning_rate": 6.156718023869497e-06,
"loss": 0.4924,
"step": 12820
},
{
"epoch": 0.48354878830136055,
"grad_norm": 1.5847792515896284,
"learning_rate": 6.150317326449204e-06,
"loss": 0.4779,
"step": 12830
},
{
"epoch": 0.48392567745825954,
"grad_norm": 1.7086888789433332,
"learning_rate": 6.143914637965585e-06,
"loss": 0.5339,
"step": 12840
},
{
"epoch": 0.48430256661515847,
"grad_norm": 1.825053431015552,
"learning_rate": 6.137509969500936e-06,
"loss": 0.4735,
"step": 12850
},
{
"epoch": 0.48467945577205745,
"grad_norm": 1.4362907044279158,
"learning_rate": 6.131103332140983e-06,
"loss": 0.487,
"step": 12860
},
{
"epoch": 0.4850563449289564,
"grad_norm": 1.6070484026990965,
"learning_rate": 6.124694736974857e-06,
"loss": 0.5195,
"step": 12870
},
{
"epoch": 0.48543323408585537,
"grad_norm": 1.4921206455295302,
"learning_rate": 6.11828419509508e-06,
"loss": 0.4883,
"step": 12880
},
{
"epoch": 0.4858101232427543,
"grad_norm": 1.596099544918074,
"learning_rate": 6.111871717597542e-06,
"loss": 0.5001,
"step": 12890
},
{
"epoch": 0.4861870123996533,
"grad_norm": 1.6612200565392714,
"learning_rate": 6.10545731558148e-06,
"loss": 0.4955,
"step": 12900
},
{
"epoch": 0.4865639015565522,
"grad_norm": 1.5500638018436168,
"learning_rate": 6.09904100014947e-06,
"loss": 0.5034,
"step": 12910
},
{
"epoch": 0.4869407907134512,
"grad_norm": 1.4977064904230966,
"learning_rate": 6.092622782407395e-06,
"loss": 0.4963,
"step": 12920
},
{
"epoch": 0.48731767987035013,
"grad_norm": 1.556241953096911,
"learning_rate": 6.086202673464428e-06,
"loss": 0.4838,
"step": 12930
},
{
"epoch": 0.48769456902724906,
"grad_norm": 1.3784052149036907,
"learning_rate": 6.079780684433024e-06,
"loss": 0.5104,
"step": 12940
},
{
"epoch": 0.48807145818414804,
"grad_norm": 1.862515188918877,
"learning_rate": 6.0733568264288825e-06,
"loss": 0.4936,
"step": 12950
},
{
"epoch": 0.488448347341047,
"grad_norm": 1.4553084049722098,
"learning_rate": 6.066931110570946e-06,
"loss": 0.5048,
"step": 12960
},
{
"epoch": 0.48882523649794596,
"grad_norm": 1.8977016581962198,
"learning_rate": 6.0605035479813665e-06,
"loss": 0.51,
"step": 12970
},
{
"epoch": 0.4892021256548449,
"grad_norm": 1.4849897548302309,
"learning_rate": 6.054074149785495e-06,
"loss": 0.5085,
"step": 12980
},
{
"epoch": 0.4895790148117439,
"grad_norm": 1.5091872951995677,
"learning_rate": 6.047642927111861e-06,
"loss": 0.5341,
"step": 12990
},
{
"epoch": 0.4899559039686428,
"grad_norm": 1.6207478659154608,
"learning_rate": 6.04120989109215e-06,
"loss": 0.4925,
"step": 13000
},
{
"epoch": 0.4903327931255418,
"grad_norm": 1.7085152211604375,
"learning_rate": 6.0347750528611885e-06,
"loss": 0.5045,
"step": 13010
},
{
"epoch": 0.4907096822824407,
"grad_norm": 1.566132388020068,
"learning_rate": 6.028338423556921e-06,
"loss": 0.4953,
"step": 13020
},
{
"epoch": 0.4910865714393397,
"grad_norm": 5.227941826091356,
"learning_rate": 6.021900014320388e-06,
"loss": 0.4873,
"step": 13030
},
{
"epoch": 0.49146346059623863,
"grad_norm": 1.399863983250925,
"learning_rate": 6.015459836295719e-06,
"loss": 0.5148,
"step": 13040
},
{
"epoch": 0.4918403497531376,
"grad_norm": 1.7472942859455838,
"learning_rate": 6.0090179006301e-06,
"loss": 0.4913,
"step": 13050
},
{
"epoch": 0.49221723891003655,
"grad_norm": 1.6716498146417993,
"learning_rate": 6.002574218473759e-06,
"loss": 0.5056,
"step": 13060
},
{
"epoch": 0.49259412806693553,
"grad_norm": 1.4884093526859357,
"learning_rate": 5.996128800979949e-06,
"loss": 0.491,
"step": 13070
},
{
"epoch": 0.49297101722383446,
"grad_norm": 1.2765637587957481,
"learning_rate": 5.989681659304927e-06,
"loss": 0.5089,
"step": 13080
},
{
"epoch": 0.49334790638073345,
"grad_norm": 1.515026969598235,
"learning_rate": 5.9832328046079305e-06,
"loss": 0.5015,
"step": 13090
},
{
"epoch": 0.4937247955376324,
"grad_norm": 1.7949411377094697,
"learning_rate": 5.9767822480511685e-06,
"loss": 0.5161,
"step": 13100
},
{
"epoch": 0.49410168469453136,
"grad_norm": 1.4675750652458723,
"learning_rate": 5.970330000799787e-06,
"loss": 0.5104,
"step": 13110
},
{
"epoch": 0.4944785738514303,
"grad_norm": 2.0046846020153444,
"learning_rate": 5.963876074021868e-06,
"loss": 0.5292,
"step": 13120
},
{
"epoch": 0.4948554630083293,
"grad_norm": 1.5369389040150114,
"learning_rate": 5.957420478888393e-06,
"loss": 0.4792,
"step": 13130
},
{
"epoch": 0.4952323521652282,
"grad_norm": 1.4186555111290844,
"learning_rate": 5.950963226573237e-06,
"loss": 0.4947,
"step": 13140
},
{
"epoch": 0.49560924132212714,
"grad_norm": 1.8188924112261224,
"learning_rate": 5.944504328253137e-06,
"loss": 0.5111,
"step": 13150
},
{
"epoch": 0.4959861304790261,
"grad_norm": 1.5665015635774655,
"learning_rate": 5.9380437951076845e-06,
"loss": 0.487,
"step": 13160
},
{
"epoch": 0.49636301963592505,
"grad_norm": 1.408767806697384,
"learning_rate": 5.931581638319298e-06,
"loss": 0.5116,
"step": 13170
},
{
"epoch": 0.49673990879282404,
"grad_norm": 1.58728988540725,
"learning_rate": 5.925117869073208e-06,
"loss": 0.5059,
"step": 13180
},
{
"epoch": 0.49711679794972297,
"grad_norm": 1.6119437087711728,
"learning_rate": 5.918652498557434e-06,
"loss": 0.5047,
"step": 13190
},
{
"epoch": 0.49749368710662195,
"grad_norm": 1.6279721324019796,
"learning_rate": 5.91218553796277e-06,
"loss": 0.4935,
"step": 13200
},
{
"epoch": 0.4978705762635209,
"grad_norm": 1.459557111527884,
"learning_rate": 5.905716998482758e-06,
"loss": 0.4596,
"step": 13210
},
{
"epoch": 0.49824746542041987,
"grad_norm": 1.939844630405015,
"learning_rate": 5.899246891313678e-06,
"loss": 0.5423,
"step": 13220
},
{
"epoch": 0.4986243545773188,
"grad_norm": 2.0937651337717273,
"learning_rate": 5.892775227654518e-06,
"loss": 0.5223,
"step": 13230
},
{
"epoch": 0.4990012437342178,
"grad_norm": 1.6236113152239642,
"learning_rate": 5.886302018706964e-06,
"loss": 0.5015,
"step": 13240
},
{
"epoch": 0.4993781328911167,
"grad_norm": 1.701791434893844,
"learning_rate": 5.879827275675375e-06,
"loss": 0.5264,
"step": 13250
},
{
"epoch": 0.4997550220480157,
"grad_norm": 1.452378340458775,
"learning_rate": 5.8733510097667664e-06,
"loss": 0.4962,
"step": 13260
},
{
"epoch": 0.5001319112049146,
"grad_norm": 1.7488927537673415,
"learning_rate": 5.866873232190791e-06,
"loss": 0.5223,
"step": 13270
},
{
"epoch": 0.5005088003618136,
"grad_norm": 1.4944535300590263,
"learning_rate": 5.860393954159712e-06,
"loss": 0.4943,
"step": 13280
},
{
"epoch": 0.5008856895187126,
"grad_norm": 1.3543774753320355,
"learning_rate": 5.853913186888397e-06,
"loss": 0.5162,
"step": 13290
},
{
"epoch": 0.5012625786756115,
"grad_norm": 1.505410524383986,
"learning_rate": 5.847430941594287e-06,
"loss": 0.4926,
"step": 13300
},
{
"epoch": 0.5016394678325105,
"grad_norm": 1.8407394343276793,
"learning_rate": 5.840947229497382e-06,
"loss": 0.4958,
"step": 13310
},
{
"epoch": 0.5020163569894094,
"grad_norm": 1.6072319888354176,
"learning_rate": 5.834462061820223e-06,
"loss": 0.5221,
"step": 13320
},
{
"epoch": 0.5023932461463084,
"grad_norm": 1.4621312186704514,
"learning_rate": 5.827975449787868e-06,
"loss": 0.5361,
"step": 13330
},
{
"epoch": 0.5027701353032074,
"grad_norm": 1.478486209469283,
"learning_rate": 5.821487404627872e-06,
"loss": 0.4898,
"step": 13340
},
{
"epoch": 0.5031470244601063,
"grad_norm": 1.5900748132206215,
"learning_rate": 5.814997937570282e-06,
"loss": 0.4911,
"step": 13350
},
{
"epoch": 0.5035239136170052,
"grad_norm": 1.6106460671873601,
"learning_rate": 5.808507059847591e-06,
"loss": 0.5078,
"step": 13360
},
{
"epoch": 0.5039008027739041,
"grad_norm": 1.8201545845564078,
"learning_rate": 5.802014782694745e-06,
"loss": 0.5254,
"step": 13370
},
{
"epoch": 0.5042776919308032,
"grad_norm": 1.5604700154370892,
"learning_rate": 5.795521117349106e-06,
"loss": 0.5183,
"step": 13380
},
{
"epoch": 0.5046545810877021,
"grad_norm": 1.476477451573828,
"learning_rate": 5.789026075050445e-06,
"loss": 0.5158,
"step": 13390
},
{
"epoch": 0.505031470244601,
"grad_norm": 1.8253062366468507,
"learning_rate": 5.782529667040908e-06,
"loss": 0.5044,
"step": 13400
},
{
"epoch": 0.5054083594015,
"grad_norm": 1.4718401708375746,
"learning_rate": 5.7760319045650124e-06,
"loss": 0.4713,
"step": 13410
},
{
"epoch": 0.505785248558399,
"grad_norm": 2.0225600662739187,
"learning_rate": 5.769532798869617e-06,
"loss": 0.5435,
"step": 13420
},
{
"epoch": 0.5061621377152979,
"grad_norm": 1.7805238633538563,
"learning_rate": 5.763032361203904e-06,
"loss": 0.4927,
"step": 13430
},
{
"epoch": 0.5065390268721969,
"grad_norm": 1.6277608026896737,
"learning_rate": 5.756530602819363e-06,
"loss": 0.5066,
"step": 13440
},
{
"epoch": 0.5069159160290958,
"grad_norm": 1.4945525295342976,
"learning_rate": 5.750027534969771e-06,
"loss": 0.4921,
"step": 13450
},
{
"epoch": 0.5072928051859948,
"grad_norm": 1.4820174242789568,
"learning_rate": 5.743523168911167e-06,
"loss": 0.5075,
"step": 13460
},
{
"epoch": 0.5076696943428938,
"grad_norm": 1.3635089466658887,
"learning_rate": 5.7370175159018415e-06,
"loss": 0.5046,
"step": 13470
},
{
"epoch": 0.5080465834997927,
"grad_norm": 1.8437545665124715,
"learning_rate": 5.730510587202311e-06,
"loss": 0.515,
"step": 13480
},
{
"epoch": 0.5084234726566916,
"grad_norm": 1.786451934916694,
"learning_rate": 5.7240023940752984e-06,
"loss": 0.5066,
"step": 13490
},
{
"epoch": 0.5088003618135907,
"grad_norm": 1.7504343352240117,
"learning_rate": 5.71749294778572e-06,
"loss": 0.4949,
"step": 13500
},
{
"epoch": 0.5091772509704896,
"grad_norm": 1.4788751754375462,
"learning_rate": 5.710982259600656e-06,
"loss": 0.4816,
"step": 13510
},
{
"epoch": 0.5095541401273885,
"grad_norm": 1.5291626156746818,
"learning_rate": 5.704470340789335e-06,
"loss": 0.4725,
"step": 13520
},
{
"epoch": 0.5099310292842875,
"grad_norm": 1.6830268113490698,
"learning_rate": 5.697957202623126e-06,
"loss": 0.4755,
"step": 13530
},
{
"epoch": 0.5103079184411864,
"grad_norm": 1.495645523541403,
"learning_rate": 5.691442856375493e-06,
"loss": 0.4848,
"step": 13540
},
{
"epoch": 0.5106848075980854,
"grad_norm": 1.7364770312162106,
"learning_rate": 5.684927313322006e-06,
"loss": 0.4986,
"step": 13550
},
{
"epoch": 0.5110616967549844,
"grad_norm": 1.646648774368243,
"learning_rate": 5.678410584740296e-06,
"loss": 0.5062,
"step": 13560
},
{
"epoch": 0.5114385859118833,
"grad_norm": 1.4355497252644054,
"learning_rate": 5.671892681910052e-06,
"loss": 0.5043,
"step": 13570
},
{
"epoch": 0.5118154750687822,
"grad_norm": 1.5515113579431314,
"learning_rate": 5.6653736161129925e-06,
"loss": 0.5216,
"step": 13580
},
{
"epoch": 0.5121923642256813,
"grad_norm": 1.668318401565443,
"learning_rate": 5.658853398632849e-06,
"loss": 0.5192,
"step": 13590
},
{
"epoch": 0.5125692533825802,
"grad_norm": 1.4997427229219318,
"learning_rate": 5.6523320407553495e-06,
"loss": 0.4884,
"step": 13600
},
{
"epoch": 0.5129461425394791,
"grad_norm": 1.6101341859359553,
"learning_rate": 5.6458095537681924e-06,
"loss": 0.5099,
"step": 13610
},
{
"epoch": 0.513323031696378,
"grad_norm": 1.606887828085733,
"learning_rate": 5.63928594896103e-06,
"loss": 0.5299,
"step": 13620
},
{
"epoch": 0.5136999208532771,
"grad_norm": 1.416359664160664,
"learning_rate": 5.632761237625455e-06,
"loss": 0.4947,
"step": 13630
},
{
"epoch": 0.514076810010176,
"grad_norm": 1.4981521351869527,
"learning_rate": 5.626235431054968e-06,
"loss": 0.5176,
"step": 13640
},
{
"epoch": 0.514453699167075,
"grad_norm": 1.9146596793411172,
"learning_rate": 5.619708540544971e-06,
"loss": 0.4981,
"step": 13650
},
{
"epoch": 0.5148305883239739,
"grad_norm": 1.6248776563981115,
"learning_rate": 5.61318057739274e-06,
"loss": 0.5343,
"step": 13660
},
{
"epoch": 0.5152074774808729,
"grad_norm": 1.8392698085104202,
"learning_rate": 5.606651552897404e-06,
"loss": 0.4745,
"step": 13670
},
{
"epoch": 0.5155843666377719,
"grad_norm": 1.51974705908948,
"learning_rate": 5.6001214783599375e-06,
"loss": 0.484,
"step": 13680
},
{
"epoch": 0.5159612557946708,
"grad_norm": 1.5164194761342433,
"learning_rate": 5.593590365083126e-06,
"loss": 0.4973,
"step": 13690
},
{
"epoch": 0.5163381449515697,
"grad_norm": 1.571042115479235,
"learning_rate": 5.587058224371553e-06,
"loss": 0.4708,
"step": 13700
},
{
"epoch": 0.5167150341084688,
"grad_norm": 2.1629006116268683,
"learning_rate": 5.580525067531585e-06,
"loss": 0.4937,
"step": 13710
},
{
"epoch": 0.5170919232653677,
"grad_norm": 1.842138362917266,
"learning_rate": 5.57399090587134e-06,
"loss": 0.499,
"step": 13720
},
{
"epoch": 0.5174688124222666,
"grad_norm": 1.4590850155680444,
"learning_rate": 5.5674557507006846e-06,
"loss": 0.4989,
"step": 13730
},
{
"epoch": 0.5178457015791655,
"grad_norm": 1.5433721159408116,
"learning_rate": 5.560919613331197e-06,
"loss": 0.5099,
"step": 13740
},
{
"epoch": 0.5182225907360645,
"grad_norm": 1.8050635225380678,
"learning_rate": 5.554382505076157e-06,
"loss": 0.4918,
"step": 13750
},
{
"epoch": 0.5185994798929635,
"grad_norm": 1.3538997043392071,
"learning_rate": 5.54784443725053e-06,
"loss": 0.4787,
"step": 13760
},
{
"epoch": 0.5189763690498624,
"grad_norm": 1.7172296005933367,
"learning_rate": 5.541305421170936e-06,
"loss": 0.4926,
"step": 13770
},
{
"epoch": 0.5193532582067614,
"grad_norm": 1.682395972487519,
"learning_rate": 5.534765468155641e-06,
"loss": 0.4837,
"step": 13780
},
{
"epoch": 0.5197301473636603,
"grad_norm": 1.5176445024464842,
"learning_rate": 5.528224589524527e-06,
"loss": 0.4976,
"step": 13790
},
{
"epoch": 0.5201070365205593,
"grad_norm": 1.5257635716345566,
"learning_rate": 5.521682796599086e-06,
"loss": 0.496,
"step": 13800
},
{
"epoch": 0.5204839256774583,
"grad_norm": 1.5260587749295311,
"learning_rate": 5.515140100702385e-06,
"loss": 0.4884,
"step": 13810
},
{
"epoch": 0.5208608148343572,
"grad_norm": 1.6982387061332391,
"learning_rate": 5.508596513159059e-06,
"loss": 0.5005,
"step": 13820
},
{
"epoch": 0.5212377039912561,
"grad_norm": 1.4479971134584864,
"learning_rate": 5.502052045295286e-06,
"loss": 0.4982,
"step": 13830
},
{
"epoch": 0.5216145931481552,
"grad_norm": 1.857022364604521,
"learning_rate": 5.495506708438763e-06,
"loss": 0.5174,
"step": 13840
},
{
"epoch": 0.5219914823050541,
"grad_norm": 1.6885724207882848,
"learning_rate": 5.488960513918695e-06,
"loss": 0.522,
"step": 13850
},
{
"epoch": 0.522368371461953,
"grad_norm": 1.4128662344535574,
"learning_rate": 5.482413473065775e-06,
"loss": 0.5039,
"step": 13860
},
{
"epoch": 0.522745260618852,
"grad_norm": 1.9106751275521132,
"learning_rate": 5.475865597212152e-06,
"loss": 0.462,
"step": 13870
},
{
"epoch": 0.523122149775751,
"grad_norm": 1.5304732560419125,
"learning_rate": 5.469316897691428e-06,
"loss": 0.4906,
"step": 13880
},
{
"epoch": 0.5234990389326499,
"grad_norm": 1.6509200854352029,
"learning_rate": 5.4627673858386255e-06,
"loss": 0.4937,
"step": 13890
},
{
"epoch": 0.5238759280895489,
"grad_norm": 1.692631204971466,
"learning_rate": 5.456217072990178e-06,
"loss": 0.4782,
"step": 13900
},
{
"epoch": 0.5242528172464478,
"grad_norm": 1.5218720962530914,
"learning_rate": 5.4496659704839e-06,
"loss": 0.4917,
"step": 13910
},
{
"epoch": 0.5246297064033468,
"grad_norm": 1.50914557920104,
"learning_rate": 5.44311408965898e-06,
"loss": 0.5226,
"step": 13920
},
{
"epoch": 0.5250065955602458,
"grad_norm": 1.6708689941350754,
"learning_rate": 5.436561441855942e-06,
"loss": 0.4741,
"step": 13930
},
{
"epoch": 0.5253834847171447,
"grad_norm": 2.0239362250295336,
"learning_rate": 5.430008038416653e-06,
"loss": 0.4797,
"step": 13940
},
{
"epoch": 0.5257603738740436,
"grad_norm": 1.6524565432727731,
"learning_rate": 5.423453890684274e-06,
"loss": 0.5416,
"step": 13950
},
{
"epoch": 0.5261372630309425,
"grad_norm": 1.4470951746948137,
"learning_rate": 5.416899010003264e-06,
"loss": 0.4571,
"step": 13960
},
{
"epoch": 0.5265141521878416,
"grad_norm": 1.713191989373032,
"learning_rate": 5.410343407719343e-06,
"loss": 0.5086,
"step": 13970
},
{
"epoch": 0.5268910413447405,
"grad_norm": 1.511732776736868,
"learning_rate": 5.4037870951794856e-06,
"loss": 0.4623,
"step": 13980
},
{
"epoch": 0.5272679305016394,
"grad_norm": 1.6985510028227295,
"learning_rate": 5.397230083731894e-06,
"loss": 0.4956,
"step": 13990
},
{
"epoch": 0.5276448196585384,
"grad_norm": 1.8917478296516785,
"learning_rate": 5.390672384725979e-06,
"loss": 0.5007,
"step": 14000
},
{
"epoch": 0.5280217088154374,
"grad_norm": 1.45118679961553,
"learning_rate": 5.384114009512343e-06,
"loss": 0.4753,
"step": 14010
},
{
"epoch": 0.5283985979723363,
"grad_norm": 1.5887735261780631,
"learning_rate": 5.37755496944276e-06,
"loss": 0.5109,
"step": 14020
},
{
"epoch": 0.5287754871292353,
"grad_norm": 1.6042273339896562,
"learning_rate": 5.37099527587015e-06,
"loss": 0.5093,
"step": 14030
},
{
"epoch": 0.5291523762861342,
"grad_norm": 1.5185256947676165,
"learning_rate": 5.3644349401485695e-06,
"loss": 0.512,
"step": 14040
},
{
"epoch": 0.5295292654430332,
"grad_norm": 1.5903571571587558,
"learning_rate": 5.3578739736331846e-06,
"loss": 0.5145,
"step": 14050
},
{
"epoch": 0.5299061545999322,
"grad_norm": 1.6617724641502587,
"learning_rate": 5.351312387680249e-06,
"loss": 0.4734,
"step": 14060
},
{
"epoch": 0.5302830437568311,
"grad_norm": 1.5942708990624488,
"learning_rate": 5.344750193647097e-06,
"loss": 0.4822,
"step": 14070
},
{
"epoch": 0.53065993291373,
"grad_norm": 1.6417425014333669,
"learning_rate": 5.338187402892108e-06,
"loss": 0.5148,
"step": 14080
},
{
"epoch": 0.5310368220706291,
"grad_norm": 1.6747154379450402,
"learning_rate": 5.331624026774698e-06,
"loss": 0.4797,
"step": 14090
},
{
"epoch": 0.531413711227528,
"grad_norm": 1.7559506025087541,
"learning_rate": 5.325060076655295e-06,
"loss": 0.4998,
"step": 14100
},
{
"epoch": 0.5317906003844269,
"grad_norm": 1.5737701449749422,
"learning_rate": 5.3184955638953215e-06,
"loss": 0.4771,
"step": 14110
},
{
"epoch": 0.5321674895413259,
"grad_norm": 1.4261947265219483,
"learning_rate": 5.311930499857173e-06,
"loss": 0.483,
"step": 14120
},
{
"epoch": 0.5325443786982249,
"grad_norm": 1.5998345020742453,
"learning_rate": 5.3053648959041995e-06,
"loss": 0.4958,
"step": 14130
},
{
"epoch": 0.5329212678551238,
"grad_norm": 1.4631565618451698,
"learning_rate": 5.2987987634006845e-06,
"loss": 0.4755,
"step": 14140
},
{
"epoch": 0.5332981570120228,
"grad_norm": 1.5593631613031096,
"learning_rate": 5.2922321137118285e-06,
"loss": 0.4837,
"step": 14150
},
{
"epoch": 0.5336750461689217,
"grad_norm": 1.373456275581913,
"learning_rate": 5.285664958203723e-06,
"loss": 0.488,
"step": 14160
},
{
"epoch": 0.5340519353258206,
"grad_norm": 1.4401671838017023,
"learning_rate": 5.2790973082433415e-06,
"loss": 0.4953,
"step": 14170
},
{
"epoch": 0.5344288244827197,
"grad_norm": 1.7640373010765296,
"learning_rate": 5.2725291751985085e-06,
"loss": 0.464,
"step": 14180
},
{
"epoch": 0.5348057136396186,
"grad_norm": 3.309243521738535,
"learning_rate": 5.2659605704378855e-06,
"loss": 0.4997,
"step": 14190
},
{
"epoch": 0.5351826027965175,
"grad_norm": 1.7411014099972144,
"learning_rate": 5.259391505330952e-06,
"loss": 0.487,
"step": 14200
},
{
"epoch": 0.5355594919534165,
"grad_norm": 1.736020357499271,
"learning_rate": 5.252821991247983e-06,
"loss": 0.5079,
"step": 14210
},
{
"epoch": 0.5359363811103155,
"grad_norm": 1.608976151378783,
"learning_rate": 5.246252039560029e-06,
"loss": 0.4948,
"step": 14220
},
{
"epoch": 0.5363132702672144,
"grad_norm": 1.4342748366323024,
"learning_rate": 5.239681661638902e-06,
"loss": 0.4819,
"step": 14230
},
{
"epoch": 0.5366901594241134,
"grad_norm": 1.5495611294979035,
"learning_rate": 5.233110868857148e-06,
"loss": 0.4804,
"step": 14240
},
{
"epoch": 0.5370670485810123,
"grad_norm": 1.8546486328260592,
"learning_rate": 5.2265396725880354e-06,
"loss": 0.5026,
"step": 14250
},
{
"epoch": 0.5374439377379113,
"grad_norm": 1.4351730050068885,
"learning_rate": 5.219968084205525e-06,
"loss": 0.4984,
"step": 14260
},
{
"epoch": 0.5378208268948103,
"grad_norm": 1.5059171293734337,
"learning_rate": 5.213396115084261e-06,
"loss": 0.486,
"step": 14270
},
{
"epoch": 0.5381977160517092,
"grad_norm": 1.5964828714981016,
"learning_rate": 5.206823776599544e-06,
"loss": 0.4951,
"step": 14280
},
{
"epoch": 0.5385746052086081,
"grad_norm": 1.4533398425449158,
"learning_rate": 5.200251080127318e-06,
"loss": 0.5093,
"step": 14290
},
{
"epoch": 0.5389514943655072,
"grad_norm": 1.6285262584640723,
"learning_rate": 5.1936780370441395e-06,
"loss": 0.4896,
"step": 14300
},
{
"epoch": 0.5393283835224061,
"grad_norm": 1.6246946608117687,
"learning_rate": 5.187104658727173e-06,
"loss": 0.4775,
"step": 14310
},
{
"epoch": 0.539705272679305,
"grad_norm": 1.4908047445233454,
"learning_rate": 5.180530956554158e-06,
"loss": 0.4975,
"step": 14320
},
{
"epoch": 0.5400821618362039,
"grad_norm": 1.5621540920543053,
"learning_rate": 5.173956941903395e-06,
"loss": 0.4687,
"step": 14330
},
{
"epoch": 0.5404590509931029,
"grad_norm": 1.7060966317168311,
"learning_rate": 5.167382626153727e-06,
"loss": 0.5019,
"step": 14340
},
{
"epoch": 0.5408359401500019,
"grad_norm": 1.7020605336609178,
"learning_rate": 5.160808020684519e-06,
"loss": 0.4993,
"step": 14350
},
{
"epoch": 0.5412128293069008,
"grad_norm": 1.7730954600238917,
"learning_rate": 5.154233136875633e-06,
"loss": 0.485,
"step": 14360
},
{
"epoch": 0.5415897184637998,
"grad_norm": 1.9393987985330001,
"learning_rate": 5.147657986107417e-06,
"loss": 0.5375,
"step": 14370
},
{
"epoch": 0.5419666076206987,
"grad_norm": 1.6471427503287805,
"learning_rate": 5.1410825797606816e-06,
"loss": 0.4997,
"step": 14380
},
{
"epoch": 0.5423434967775977,
"grad_norm": 1.4369947353354569,
"learning_rate": 5.134506929216674e-06,
"loss": 0.4739,
"step": 14390
},
{
"epoch": 0.5427203859344967,
"grad_norm": 1.7681719081478857,
"learning_rate": 5.127931045857073e-06,
"loss": 0.4991,
"step": 14400
},
{
"epoch": 0.5430972750913956,
"grad_norm": 1.6691866607079882,
"learning_rate": 5.1213549410639515e-06,
"loss": 0.4623,
"step": 14410
},
{
"epoch": 0.5434741642482945,
"grad_norm": 1.4149702534749613,
"learning_rate": 5.114778626219772e-06,
"loss": 0.4828,
"step": 14420
},
{
"epoch": 0.5438510534051936,
"grad_norm": 1.6627823221828433,
"learning_rate": 5.108202112707357e-06,
"loss": 0.5027,
"step": 14430
},
{
"epoch": 0.5442279425620925,
"grad_norm": 1.250526612258918,
"learning_rate": 5.101625411909874e-06,
"loss": 0.4779,
"step": 14440
},
{
"epoch": 0.5446048317189914,
"grad_norm": 1.5283584890240594,
"learning_rate": 5.0950485352108145e-06,
"loss": 0.494,
"step": 14450
},
{
"epoch": 0.5449817208758904,
"grad_norm": 1.337913464919517,
"learning_rate": 5.088471493993977e-06,
"loss": 0.459,
"step": 14460
},
{
"epoch": 0.5453586100327894,
"grad_norm": 1.481063864916166,
"learning_rate": 5.081894299643439e-06,
"loss": 0.5148,
"step": 14470
},
{
"epoch": 0.5457354991896883,
"grad_norm": 1.5902256444957483,
"learning_rate": 5.07531696354355e-06,
"loss": 0.4776,
"step": 14480
},
{
"epoch": 0.5461123883465873,
"grad_norm": 1.443883382589487,
"learning_rate": 5.068739497078898e-06,
"loss": 0.5122,
"step": 14490
},
{
"epoch": 0.5464892775034862,
"grad_norm": 1.6573811027305527,
"learning_rate": 5.0621619116343e-06,
"loss": 0.5163,
"step": 14500
},
{
"epoch": 0.5468661666603852,
"grad_norm": 1.609253979586725,
"learning_rate": 5.055584218594782e-06,
"loss": 0.4862,
"step": 14510
},
{
"epoch": 0.5472430558172842,
"grad_norm": 1.7791228748733698,
"learning_rate": 5.049006429345552e-06,
"loss": 0.5079,
"step": 14520
},
{
"epoch": 0.5476199449741831,
"grad_norm": 1.5440262873205584,
"learning_rate": 5.0424285552719845e-06,
"loss": 0.4881,
"step": 14530
},
{
"epoch": 0.547996834131082,
"grad_norm": 1.67027687894764,
"learning_rate": 5.0358506077596035e-06,
"loss": 0.4948,
"step": 14540
},
{
"epoch": 0.548373723287981,
"grad_norm": 1.7084316467056804,
"learning_rate": 5.029272598194057e-06,
"loss": 0.4836,
"step": 14550
},
{
"epoch": 0.54875061244488,
"grad_norm": 1.6824534462487943,
"learning_rate": 5.022694537961105e-06,
"loss": 0.4904,
"step": 14560
},
{
"epoch": 0.5491275016017789,
"grad_norm": 1.684452437139124,
"learning_rate": 5.016116438446588e-06,
"loss": 0.5244,
"step": 14570
},
{
"epoch": 0.5495043907586779,
"grad_norm": 1.7169193584179046,
"learning_rate": 5.009538311036422e-06,
"loss": 0.5139,
"step": 14580
},
{
"epoch": 0.5498812799155768,
"grad_norm": 1.6319964128441222,
"learning_rate": 5.002960167116567e-06,
"loss": 0.4995,
"step": 14590
},
{
"epoch": 0.5502581690724758,
"grad_norm": 1.7828166102258067,
"learning_rate": 4.9963820180730125e-06,
"loss": 0.4847,
"step": 14600
},
{
"epoch": 0.5506350582293748,
"grad_norm": 1.4715536669658034,
"learning_rate": 4.989803875291759e-06,
"loss": 0.5032,
"step": 14610
},
{
"epoch": 0.5510119473862737,
"grad_norm": 1.6193456161052595,
"learning_rate": 4.983225750158789e-06,
"loss": 0.4738,
"step": 14620
},
{
"epoch": 0.5513888365431726,
"grad_norm": 1.6523635284605276,
"learning_rate": 4.976647654060064e-06,
"loss": 0.4697,
"step": 14630
},
{
"epoch": 0.5517657257000717,
"grad_norm": 1.5353363387196937,
"learning_rate": 4.970069598381489e-06,
"loss": 0.4766,
"step": 14640
},
{
"epoch": 0.5521426148569706,
"grad_norm": 1.599767794638566,
"learning_rate": 4.963491594508904e-06,
"loss": 0.486,
"step": 14650
},
{
"epoch": 0.5525195040138695,
"grad_norm": 1.5072462327621345,
"learning_rate": 4.956913653828051e-06,
"loss": 0.5029,
"step": 14660
},
{
"epoch": 0.5528963931707684,
"grad_norm": 1.576083520001881,
"learning_rate": 4.950335787724571e-06,
"loss": 0.5008,
"step": 14670
},
{
"epoch": 0.5532732823276675,
"grad_norm": 1.7206709649650131,
"learning_rate": 4.943758007583972e-06,
"loss": 0.4899,
"step": 14680
},
{
"epoch": 0.5536501714845664,
"grad_norm": 2.125129340848355,
"learning_rate": 4.937180324791616e-06,
"loss": 0.4858,
"step": 14690
},
{
"epoch": 0.5540270606414653,
"grad_norm": 1.776386341218708,
"learning_rate": 4.930602750732691e-06,
"loss": 0.5114,
"step": 14700
},
{
"epoch": 0.5544039497983643,
"grad_norm": 1.8272872066498904,
"learning_rate": 4.924025296792202e-06,
"loss": 0.4938,
"step": 14710
},
{
"epoch": 0.5547808389552633,
"grad_norm": 1.5539132471817085,
"learning_rate": 4.917447974354944e-06,
"loss": 0.5012,
"step": 14720
},
{
"epoch": 0.5551577281121622,
"grad_norm": 1.5058070102826557,
"learning_rate": 4.910870794805484e-06,
"loss": 0.4982,
"step": 14730
},
{
"epoch": 0.5555346172690612,
"grad_norm": 1.710828327508459,
"learning_rate": 4.904293769528146e-06,
"loss": 0.4756,
"step": 14740
},
{
"epoch": 0.5559115064259601,
"grad_norm": 1.8818911198800967,
"learning_rate": 4.8977169099069774e-06,
"loss": 0.4877,
"step": 14750
},
{
"epoch": 0.556288395582859,
"grad_norm": 1.5555517297318513,
"learning_rate": 4.891140227325749e-06,
"loss": 0.5072,
"step": 14760
},
{
"epoch": 0.5566652847397581,
"grad_norm": 1.3837969704058737,
"learning_rate": 4.884563733167921e-06,
"loss": 0.4493,
"step": 14770
},
{
"epoch": 0.557042173896657,
"grad_norm": 1.605935176191625,
"learning_rate": 4.877987438816626e-06,
"loss": 0.5043,
"step": 14780
},
{
"epoch": 0.5574190630535559,
"grad_norm": 1.4152291686855751,
"learning_rate": 4.8714113556546526e-06,
"loss": 0.4896,
"step": 14790
},
{
"epoch": 0.5577959522104549,
"grad_norm": 1.555671007575408,
"learning_rate": 4.864835495064422e-06,
"loss": 0.4948,
"step": 14800
},
{
"epoch": 0.5581728413673539,
"grad_norm": 1.7501287152105198,
"learning_rate": 4.858259868427975e-06,
"loss": 0.528,
"step": 14810
},
{
"epoch": 0.5585497305242528,
"grad_norm": 1.4977704984866138,
"learning_rate": 4.851684487126942e-06,
"loss": 0.4995,
"step": 14820
},
{
"epoch": 0.5589266196811518,
"grad_norm": 1.5881770020676744,
"learning_rate": 4.845109362542531e-06,
"loss": 0.5033,
"step": 14830
},
{
"epoch": 0.5593035088380507,
"grad_norm": 1.6759640529299542,
"learning_rate": 4.838534506055505e-06,
"loss": 0.5153,
"step": 14840
},
{
"epoch": 0.5596803979949497,
"grad_norm": 1.444873322408345,
"learning_rate": 4.8319599290461644e-06,
"loss": 0.4907,
"step": 14850
},
{
"epoch": 0.5600572871518487,
"grad_norm": 1.8264686526034841,
"learning_rate": 4.825385642894325e-06,
"loss": 0.5306,
"step": 14860
},
{
"epoch": 0.5604341763087476,
"grad_norm": 1.5497290772020473,
"learning_rate": 4.818811658979298e-06,
"loss": 0.5152,
"step": 14870
},
{
"epoch": 0.5608110654656465,
"grad_norm": 1.37844981662825,
"learning_rate": 4.8122379886798714e-06,
"loss": 0.4983,
"step": 14880
},
{
"epoch": 0.5611879546225456,
"grad_norm": 1.3444131954230294,
"learning_rate": 4.805664643374295e-06,
"loss": 0.4925,
"step": 14890
},
{
"epoch": 0.5615648437794445,
"grad_norm": 1.5532769744703083,
"learning_rate": 4.799091634440251e-06,
"loss": 0.5188,
"step": 14900
},
{
"epoch": 0.5619417329363434,
"grad_norm": 1.555196739372832,
"learning_rate": 4.7925189732548396e-06,
"loss": 0.4683,
"step": 14910
},
{
"epoch": 0.5623186220932423,
"grad_norm": 1.4684972837534254,
"learning_rate": 4.7859466711945616e-06,
"loss": 0.479,
"step": 14920
},
{
"epoch": 0.5626955112501413,
"grad_norm": 1.6202095473956792,
"learning_rate": 4.7793747396352945e-06,
"loss": 0.494,
"step": 14930
},
{
"epoch": 0.5630724004070403,
"grad_norm": 1.7528710855944911,
"learning_rate": 4.7728031899522775e-06,
"loss": 0.4671,
"step": 14940
},
{
"epoch": 0.5634492895639392,
"grad_norm": 1.8853181007805313,
"learning_rate": 4.7662320335200815e-06,
"loss": 0.4952,
"step": 14950
},
{
"epoch": 0.5638261787208382,
"grad_norm": 1.5382530586672782,
"learning_rate": 4.759661281712605e-06,
"loss": 0.4707,
"step": 14960
},
{
"epoch": 0.5642030678777371,
"grad_norm": 1.525234396856901,
"learning_rate": 4.753090945903043e-06,
"loss": 0.479,
"step": 14970
},
{
"epoch": 0.5645799570346361,
"grad_norm": 1.4131804151431138,
"learning_rate": 4.74652103746387e-06,
"loss": 0.5002,
"step": 14980
},
{
"epoch": 0.5649568461915351,
"grad_norm": 1.3413955318599295,
"learning_rate": 4.739951567766819e-06,
"loss": 0.5014,
"step": 14990
},
{
"epoch": 0.565333735348434,
"grad_norm": 1.707719627926162,
"learning_rate": 4.733382548182867e-06,
"loss": 0.4803,
"step": 15000
},
{
"epoch": 0.5657106245053329,
"grad_norm": 1.783248272136694,
"learning_rate": 4.726813990082208e-06,
"loss": 0.521,
"step": 15010
},
{
"epoch": 0.566087513662232,
"grad_norm": 1.6966076793346403,
"learning_rate": 4.720245904834247e-06,
"loss": 0.491,
"step": 15020
},
{
"epoch": 0.5664644028191309,
"grad_norm": 1.718872464504697,
"learning_rate": 4.713678303807554e-06,
"loss": 0.508,
"step": 15030
},
{
"epoch": 0.5668412919760298,
"grad_norm": 1.3577910290768516,
"learning_rate": 4.707111198369875e-06,
"loss": 0.4792,
"step": 15040
},
{
"epoch": 0.5672181811329288,
"grad_norm": 2.011765897335492,
"learning_rate": 4.700544599888092e-06,
"loss": 0.4792,
"step": 15050
},
{
"epoch": 0.5675950702898278,
"grad_norm": 1.5685090926240022,
"learning_rate": 4.693978519728214e-06,
"loss": 0.487,
"step": 15060
},
{
"epoch": 0.5679719594467267,
"grad_norm": 1.9465868567085707,
"learning_rate": 4.687412969255344e-06,
"loss": 0.5383,
"step": 15070
},
{
"epoch": 0.5683488486036257,
"grad_norm": 1.9601527980802476,
"learning_rate": 4.680847959833678e-06,
"loss": 0.5143,
"step": 15080
},
{
"epoch": 0.5687257377605246,
"grad_norm": 1.3360212285500883,
"learning_rate": 4.674283502826469e-06,
"loss": 0.5006,
"step": 15090
},
{
"epoch": 0.5691026269174236,
"grad_norm": 1.8144442698478411,
"learning_rate": 4.667719609596017e-06,
"loss": 0.4999,
"step": 15100
},
{
"epoch": 0.5694795160743226,
"grad_norm": 1.8698769948828864,
"learning_rate": 4.661156291503648e-06,
"loss": 0.4859,
"step": 15110
},
{
"epoch": 0.5698564052312215,
"grad_norm": 1.49460133455783,
"learning_rate": 4.654593559909686e-06,
"loss": 0.4845,
"step": 15120
},
{
"epoch": 0.5702332943881204,
"grad_norm": 1.4660243676447777,
"learning_rate": 4.648031426173445e-06,
"loss": 0.5125,
"step": 15130
},
{
"epoch": 0.5706101835450194,
"grad_norm": 1.3870825072557174,
"learning_rate": 4.641469901653202e-06,
"loss": 0.5016,
"step": 15140
},
{
"epoch": 0.5709870727019184,
"grad_norm": 1.6431723296673835,
"learning_rate": 4.634908997706185e-06,
"loss": 0.4987,
"step": 15150
},
{
"epoch": 0.5713639618588173,
"grad_norm": 1.7105138212254234,
"learning_rate": 4.628348725688535e-06,
"loss": 0.4854,
"step": 15160
},
{
"epoch": 0.5717408510157163,
"grad_norm": 1.9455745122203643,
"learning_rate": 4.621789096955314e-06,
"loss": 0.4913,
"step": 15170
},
{
"epoch": 0.5721177401726152,
"grad_norm": 1.6110602983128681,
"learning_rate": 4.615230122860463e-06,
"loss": 0.478,
"step": 15180
},
{
"epoch": 0.5724946293295142,
"grad_norm": 1.570236871676212,
"learning_rate": 4.608671814756789e-06,
"loss": 0.4966,
"step": 15190
},
{
"epoch": 0.5728715184864132,
"grad_norm": 1.6101020345767108,
"learning_rate": 4.60211418399595e-06,
"loss": 0.4921,
"step": 15200
},
{
"epoch": 0.5732484076433121,
"grad_norm": 1.3277725999257566,
"learning_rate": 4.595557241928428e-06,
"loss": 0.4492,
"step": 15210
},
{
"epoch": 0.573625296800211,
"grad_norm": 1.4944481859425578,
"learning_rate": 4.589000999903514e-06,
"loss": 0.508,
"step": 15220
},
{
"epoch": 0.5740021859571101,
"grad_norm": 1.949554647184248,
"learning_rate": 4.582445469269293e-06,
"loss": 0.4783,
"step": 15230
},
{
"epoch": 0.574379075114009,
"grad_norm": 1.6382300534433607,
"learning_rate": 4.575890661372608e-06,
"loss": 0.5262,
"step": 15240
},
{
"epoch": 0.5747559642709079,
"grad_norm": 1.7414910131507269,
"learning_rate": 4.569336587559058e-06,
"loss": 0.5097,
"step": 15250
},
{
"epoch": 0.5751328534278068,
"grad_norm": 1.5308534197031307,
"learning_rate": 4.562783259172972e-06,
"loss": 0.4743,
"step": 15260
},
{
"epoch": 0.5755097425847059,
"grad_norm": 1.544445790287268,
"learning_rate": 4.556230687557387e-06,
"loss": 0.4855,
"step": 15270
},
{
"epoch": 0.5758866317416048,
"grad_norm": 1.5649621936574907,
"learning_rate": 4.549678884054028e-06,
"loss": 0.4827,
"step": 15280
},
{
"epoch": 0.5762635208985037,
"grad_norm": 1.5699103732057733,
"learning_rate": 4.543127860003291e-06,
"loss": 0.5126,
"step": 15290
},
{
"epoch": 0.5766404100554027,
"grad_norm": 1.6479924327503963,
"learning_rate": 4.536577626744229e-06,
"loss": 0.5087,
"step": 15300
},
{
"epoch": 0.5770172992123017,
"grad_norm": 1.754860584177109,
"learning_rate": 4.53002819561452e-06,
"loss": 0.4868,
"step": 15310
},
{
"epoch": 0.5773941883692006,
"grad_norm": 1.538257113940453,
"learning_rate": 4.523479577950452e-06,
"loss": 0.4828,
"step": 15320
},
{
"epoch": 0.5777710775260996,
"grad_norm": 1.6723499369767234,
"learning_rate": 4.516931785086911e-06,
"loss": 0.4702,
"step": 15330
},
{
"epoch": 0.5781479666829985,
"grad_norm": 1.441994971838687,
"learning_rate": 4.510384828357352e-06,
"loss": 0.5074,
"step": 15340
},
{
"epoch": 0.5785248558398974,
"grad_norm": 1.676436195429064,
"learning_rate": 4.503838719093785e-06,
"loss": 0.498,
"step": 15350
},
{
"epoch": 0.5789017449967965,
"grad_norm": 1.5193324381147915,
"learning_rate": 4.4972934686267465e-06,
"loss": 0.488,
"step": 15360
},
{
"epoch": 0.5792786341536954,
"grad_norm": 1.644408204909188,
"learning_rate": 4.4907490882852945e-06,
"loss": 0.4658,
"step": 15370
},
{
"epoch": 0.5796555233105943,
"grad_norm": 1.3830347527818876,
"learning_rate": 4.484205589396979e-06,
"loss": 0.5044,
"step": 15380
},
{
"epoch": 0.5800324124674933,
"grad_norm": 1.5656512441657606,
"learning_rate": 4.477662983287823e-06,
"loss": 0.4713,
"step": 15390
},
{
"epoch": 0.5804093016243923,
"grad_norm": 1.8212831299721022,
"learning_rate": 4.4711212812823015e-06,
"loss": 0.5006,
"step": 15400
},
{
"epoch": 0.5807861907812912,
"grad_norm": 5.814852464154508,
"learning_rate": 4.46458049470333e-06,
"loss": 0.5273,
"step": 15410
},
{
"epoch": 0.5811630799381902,
"grad_norm": 1.5922280470024501,
"learning_rate": 4.458040634872234e-06,
"loss": 0.512,
"step": 15420
},
{
"epoch": 0.5815399690950891,
"grad_norm": 1.383810258386157,
"learning_rate": 4.451501713108744e-06,
"loss": 0.4864,
"step": 15430
},
{
"epoch": 0.5819168582519881,
"grad_norm": 1.5168290662535795,
"learning_rate": 4.444963740730953e-06,
"loss": 0.5101,
"step": 15440
},
{
"epoch": 0.5822937474088871,
"grad_norm": 1.628078473199888,
"learning_rate": 4.438426729055324e-06,
"loss": 0.4838,
"step": 15450
},
{
"epoch": 0.582670636565786,
"grad_norm": 1.3493978928610881,
"learning_rate": 4.431890689396649e-06,
"loss": 0.4719,
"step": 15460
},
{
"epoch": 0.5830475257226849,
"grad_norm": 1.740559583658373,
"learning_rate": 4.425355633068041e-06,
"loss": 0.5067,
"step": 15470
},
{
"epoch": 0.583424414879584,
"grad_norm": 1.7981946745212039,
"learning_rate": 4.418821571380911e-06,
"loss": 0.4932,
"step": 15480
},
{
"epoch": 0.5838013040364829,
"grad_norm": 1.5473002005079304,
"learning_rate": 4.4122885156449445e-06,
"loss": 0.4904,
"step": 15490
},
{
"epoch": 0.5841781931933818,
"grad_norm": 1.8784776516281418,
"learning_rate": 4.40575647716809e-06,
"loss": 0.5048,
"step": 15500
},
{
"epoch": 0.5845550823502808,
"grad_norm": 1.4719513927457284,
"learning_rate": 4.399225467256535e-06,
"loss": 0.4481,
"step": 15510
},
{
"epoch": 0.5849319715071798,
"grad_norm": 1.825241349848936,
"learning_rate": 4.392695497214688e-06,
"loss": 0.4983,
"step": 15520
},
{
"epoch": 0.5853088606640787,
"grad_norm": 1.5528305044356574,
"learning_rate": 4.38616657834515e-06,
"loss": 0.5041,
"step": 15530
},
{
"epoch": 0.5856857498209777,
"grad_norm": 1.6870571870401214,
"learning_rate": 4.3796387219487105e-06,
"loss": 0.4874,
"step": 15540
},
{
"epoch": 0.5860626389778766,
"grad_norm": 1.7920265982059933,
"learning_rate": 4.373111939324317e-06,
"loss": 0.4999,
"step": 15550
},
{
"epoch": 0.5864395281347755,
"grad_norm": 1.341117566474037,
"learning_rate": 4.366586241769061e-06,
"loss": 0.4557,
"step": 15560
},
{
"epoch": 0.5868164172916746,
"grad_norm": 1.3355923074651992,
"learning_rate": 4.36006164057815e-06,
"loss": 0.475,
"step": 15570
},
{
"epoch": 0.5871933064485735,
"grad_norm": 1.5410068065396823,
"learning_rate": 4.353538147044899e-06,
"loss": 0.4794,
"step": 15580
},
{
"epoch": 0.5875701956054724,
"grad_norm": 1.4096817479193515,
"learning_rate": 4.347015772460705e-06,
"loss": 0.5178,
"step": 15590
},
{
"epoch": 0.5879470847623713,
"grad_norm": 1.7430158797469388,
"learning_rate": 4.340494528115028e-06,
"loss": 0.4908,
"step": 15600
},
{
"epoch": 0.5883239739192704,
"grad_norm": 1.6723561324160858,
"learning_rate": 4.333974425295368e-06,
"loss": 0.4748,
"step": 15610
},
{
"epoch": 0.5887008630761693,
"grad_norm": 1.5864053334700638,
"learning_rate": 4.327455475287255e-06,
"loss": 0.4994,
"step": 15620
},
{
"epoch": 0.5890777522330682,
"grad_norm": 1.524780156258879,
"learning_rate": 4.3209376893742185e-06,
"loss": 0.4881,
"step": 15630
},
{
"epoch": 0.5894546413899672,
"grad_norm": 1.7296029856373045,
"learning_rate": 4.314421078837782e-06,
"loss": 0.5231,
"step": 15640
},
{
"epoch": 0.5898315305468662,
"grad_norm": 1.6109569171154967,
"learning_rate": 4.3079056549574185e-06,
"loss": 0.5001,
"step": 15650
},
{
"epoch": 0.5902084197037651,
"grad_norm": 1.3624010397586623,
"learning_rate": 4.301391429010563e-06,
"loss": 0.5082,
"step": 15660
},
{
"epoch": 0.5905853088606641,
"grad_norm": 1.7683758241128607,
"learning_rate": 4.2948784122725695e-06,
"loss": 0.4992,
"step": 15670
},
{
"epoch": 0.590962198017563,
"grad_norm": 1.8030632058982388,
"learning_rate": 4.2883666160167004e-06,
"loss": 0.4562,
"step": 15680
},
{
"epoch": 0.591339087174462,
"grad_norm": 1.4736588153729777,
"learning_rate": 4.281856051514104e-06,
"loss": 0.4598,
"step": 15690
},
{
"epoch": 0.591715976331361,
"grad_norm": 1.9040076493277438,
"learning_rate": 4.275346730033797e-06,
"loss": 0.5181,
"step": 15700
},
{
"epoch": 0.5920928654882599,
"grad_norm": 1.6217096093879626,
"learning_rate": 4.268838662842648e-06,
"loss": 0.493,
"step": 15710
},
{
"epoch": 0.5924697546451588,
"grad_norm": 1.4714622199259768,
"learning_rate": 4.262331861205353e-06,
"loss": 0.4911,
"step": 15720
},
{
"epoch": 0.5928466438020578,
"grad_norm": 1.6675985050238766,
"learning_rate": 4.255826336384413e-06,
"loss": 0.4839,
"step": 15730
},
{
"epoch": 0.5932235329589568,
"grad_norm": 1.730305207931142,
"learning_rate": 4.249322099640124e-06,
"loss": 0.4613,
"step": 15740
},
{
"epoch": 0.5936004221158557,
"grad_norm": 1.5875923671826002,
"learning_rate": 4.2428191622305515e-06,
"loss": 0.4882,
"step": 15750
},
{
"epoch": 0.5939773112727547,
"grad_norm": 1.7817428188939046,
"learning_rate": 4.2363175354115125e-06,
"loss": 0.4735,
"step": 15760
},
{
"epoch": 0.5943542004296536,
"grad_norm": 1.4446917245054236,
"learning_rate": 4.229817230436551e-06,
"loss": 0.4965,
"step": 15770
},
{
"epoch": 0.5947310895865526,
"grad_norm": 1.5351237013104135,
"learning_rate": 4.223318258556929e-06,
"loss": 0.4889,
"step": 15780
},
{
"epoch": 0.5951079787434516,
"grad_norm": 1.4127276518276473,
"learning_rate": 4.2168206310216e-06,
"loss": 0.4777,
"step": 15790
},
{
"epoch": 0.5954848679003505,
"grad_norm": 1.632038239139784,
"learning_rate": 4.210324359077188e-06,
"loss": 0.4863,
"step": 15800
},
{
"epoch": 0.5958617570572494,
"grad_norm": 1.58550770046582,
"learning_rate": 4.20382945396797e-06,
"loss": 0.4629,
"step": 15810
},
{
"epoch": 0.5962386462141485,
"grad_norm": 1.5604563182410427,
"learning_rate": 4.197335926935862e-06,
"loss": 0.4702,
"step": 15820
},
{
"epoch": 0.5966155353710474,
"grad_norm": 1.6694863244839393,
"learning_rate": 4.190843789220388e-06,
"loss": 0.4841,
"step": 15830
},
{
"epoch": 0.5969924245279463,
"grad_norm": 1.3812318270249444,
"learning_rate": 4.184353052058675e-06,
"loss": 0.4803,
"step": 15840
},
{
"epoch": 0.5973693136848452,
"grad_norm": 1.542874778740591,
"learning_rate": 4.177863726685422e-06,
"loss": 0.4774,
"step": 15850
},
{
"epoch": 0.5977462028417443,
"grad_norm": 1.5992161372449223,
"learning_rate": 4.1713758243328805e-06,
"loss": 0.4642,
"step": 15860
},
{
"epoch": 0.5981230919986432,
"grad_norm": 1.695522458255685,
"learning_rate": 4.164889356230845e-06,
"loss": 0.4984,
"step": 15870
},
{
"epoch": 0.5984999811555421,
"grad_norm": 1.5064263824602755,
"learning_rate": 4.158404333606624e-06,
"loss": 0.484,
"step": 15880
},
{
"epoch": 0.5988768703124411,
"grad_norm": 1.5539780173830111,
"learning_rate": 4.151920767685028e-06,
"loss": 0.5053,
"step": 15890
},
{
"epoch": 0.5992537594693401,
"grad_norm": 1.8156834275952407,
"learning_rate": 4.145438669688339e-06,
"loss": 0.4891,
"step": 15900
},
{
"epoch": 0.599630648626239,
"grad_norm": 1.6867172865153917,
"learning_rate": 4.138958050836305e-06,
"loss": 0.4933,
"step": 15910
},
{
"epoch": 0.600007537783138,
"grad_norm": 1.5069489029221794,
"learning_rate": 4.132478922346111e-06,
"loss": 0.4981,
"step": 15920
},
{
"epoch": 0.6003844269400369,
"grad_norm": 1.645715388170334,
"learning_rate": 4.126001295432362e-06,
"loss": 0.5092,
"step": 15930
},
{
"epoch": 0.6007613160969358,
"grad_norm": 1.5081305054375247,
"learning_rate": 4.119525181307065e-06,
"loss": 0.5007,
"step": 15940
},
{
"epoch": 0.6011382052538349,
"grad_norm": 1.7513431875588092,
"learning_rate": 4.113050591179608e-06,
"loss": 0.4818,
"step": 15950
},
{
"epoch": 0.6015150944107338,
"grad_norm": 1.4446843423733429,
"learning_rate": 4.10657753625674e-06,
"loss": 0.4891,
"step": 15960
},
{
"epoch": 0.6018919835676327,
"grad_norm": 1.5962355483326147,
"learning_rate": 4.100106027742559e-06,
"loss": 0.5487,
"step": 15970
},
{
"epoch": 0.6022688727245317,
"grad_norm": 1.5956138956675063,
"learning_rate": 4.093636076838474e-06,
"loss": 0.4953,
"step": 15980
},
{
"epoch": 0.6026457618814307,
"grad_norm": 1.6376372777266162,
"learning_rate": 4.087167694743209e-06,
"loss": 0.474,
"step": 15990
},
{
"epoch": 0.6030226510383296,
"grad_norm": 1.7797231365552175,
"learning_rate": 4.080700892652769e-06,
"loss": 0.5058,
"step": 16000
},
{
"epoch": 0.6033995401952286,
"grad_norm": 1.5483118782326815,
"learning_rate": 4.074235681760425e-06,
"loss": 0.4455,
"step": 16010
},
{
"epoch": 0.6037764293521275,
"grad_norm": 1.6389537384394115,
"learning_rate": 4.067772073256691e-06,
"loss": 0.4974,
"step": 16020
},
{
"epoch": 0.6041533185090265,
"grad_norm": 1.5711684676833029,
"learning_rate": 4.0613100783293085e-06,
"loss": 0.4745,
"step": 16030
},
{
"epoch": 0.6045302076659255,
"grad_norm": 1.583414482094785,
"learning_rate": 4.0548497081632275e-06,
"loss": 0.4832,
"step": 16040
},
{
"epoch": 0.6049070968228244,
"grad_norm": 1.9771396052065735,
"learning_rate": 4.04839097394059e-06,
"loss": 0.4997,
"step": 16050
},
{
"epoch": 0.6052839859797233,
"grad_norm": 1.4625015091341529,
"learning_rate": 4.0419338868406934e-06,
"loss": 0.4469,
"step": 16060
},
{
"epoch": 0.6056608751366224,
"grad_norm": 1.497067817501958,
"learning_rate": 4.035478458039998e-06,
"loss": 0.4967,
"step": 16070
},
{
"epoch": 0.6060377642935213,
"grad_norm": 1.5120714577785972,
"learning_rate": 4.029024698712085e-06,
"loss": 0.4829,
"step": 16080
},
{
"epoch": 0.6064146534504202,
"grad_norm": 1.37555735450956,
"learning_rate": 4.022572620027653e-06,
"loss": 0.4758,
"step": 16090
},
{
"epoch": 0.6067915426073192,
"grad_norm": 1.956108364099901,
"learning_rate": 4.016122233154483e-06,
"loss": 0.5154,
"step": 16100
},
{
"epoch": 0.6071684317642182,
"grad_norm": 1.426684210517623,
"learning_rate": 4.009673549257432e-06,
"loss": 0.5068,
"step": 16110
},
{
"epoch": 0.6075453209211171,
"grad_norm": 1.675741874097294,
"learning_rate": 4.0032265794984145e-06,
"loss": 0.4616,
"step": 16120
},
{
"epoch": 0.6079222100780161,
"grad_norm": 1.666536388372485,
"learning_rate": 3.99678133503637e-06,
"loss": 0.4775,
"step": 16130
},
{
"epoch": 0.608299099234915,
"grad_norm": 1.4010007944856846,
"learning_rate": 3.990337827027256e-06,
"loss": 0.5018,
"step": 16140
},
{
"epoch": 0.6086759883918139,
"grad_norm": 1.811100516226975,
"learning_rate": 3.983896066624021e-06,
"loss": 0.4643,
"step": 16150
},
{
"epoch": 0.609052877548713,
"grad_norm": 1.4452896868562695,
"learning_rate": 3.977456064976592e-06,
"loss": 0.5072,
"step": 16160
},
{
"epoch": 0.6094297667056119,
"grad_norm": 1.2863215835461435,
"learning_rate": 3.97101783323185e-06,
"loss": 0.4604,
"step": 16170
},
{
"epoch": 0.6098066558625108,
"grad_norm": 1.6495177612337615,
"learning_rate": 3.964581382533618e-06,
"loss": 0.4874,
"step": 16180
},
{
"epoch": 0.6101835450194097,
"grad_norm": 1.4527097080833093,
"learning_rate": 3.958146724022623e-06,
"loss": 0.4744,
"step": 16190
},
{
"epoch": 0.6105604341763088,
"grad_norm": 1.6146711317833942,
"learning_rate": 3.951713868836506e-06,
"loss": 0.4962,
"step": 16200
},
{
"epoch": 0.6109373233332077,
"grad_norm": 1.8235449585876042,
"learning_rate": 3.945282828109774e-06,
"loss": 0.4968,
"step": 16210
},
{
"epoch": 0.6113142124901066,
"grad_norm": 1.592201298917257,
"learning_rate": 3.938853612973801e-06,
"loss": 0.508,
"step": 16220
},
{
"epoch": 0.6116911016470056,
"grad_norm": 1.9031681135985004,
"learning_rate": 3.932426234556798e-06,
"loss": 0.4793,
"step": 16230
},
{
"epoch": 0.6120679908039046,
"grad_norm": 1.860838297311073,
"learning_rate": 3.926000703983795e-06,
"loss": 0.4691,
"step": 16240
},
{
"epoch": 0.6124448799608035,
"grad_norm": 1.7168068035412793,
"learning_rate": 3.919577032376628e-06,
"loss": 0.4927,
"step": 16250
},
{
"epoch": 0.6128217691177025,
"grad_norm": 1.856102414725057,
"learning_rate": 3.913155230853915e-06,
"loss": 0.4918,
"step": 16260
},
{
"epoch": 0.6131986582746014,
"grad_norm": 1.712306650139487,
"learning_rate": 3.906735310531033e-06,
"loss": 0.4969,
"step": 16270
},
{
"epoch": 0.6135755474315004,
"grad_norm": 1.6309354412368642,
"learning_rate": 3.900317282520104e-06,
"loss": 0.4693,
"step": 16280
},
{
"epoch": 0.6139524365883994,
"grad_norm": 1.4778531939363033,
"learning_rate": 3.893901157929979e-06,
"loss": 0.4735,
"step": 16290
},
{
"epoch": 0.6143293257452983,
"grad_norm": 1.8592722441894747,
"learning_rate": 3.8874869478662104e-06,
"loss": 0.4552,
"step": 16300
},
{
"epoch": 0.6147062149021972,
"grad_norm": 1.9102009395175383,
"learning_rate": 3.881074663431037e-06,
"loss": 0.4856,
"step": 16310
},
{
"epoch": 0.6150831040590963,
"grad_norm": 2.834088988880712,
"learning_rate": 3.874664315723363e-06,
"loss": 0.4862,
"step": 16320
},
{
"epoch": 0.6154599932159952,
"grad_norm": 1.5759267871394191,
"learning_rate": 3.8682559158387474e-06,
"loss": 0.4749,
"step": 16330
},
{
"epoch": 0.6158368823728941,
"grad_norm": 1.6563437776338659,
"learning_rate": 3.861849474869371e-06,
"loss": 0.4675,
"step": 16340
},
{
"epoch": 0.6162137715297931,
"grad_norm": 1.7936481213990962,
"learning_rate": 3.855445003904024e-06,
"loss": 0.4901,
"step": 16350
},
{
"epoch": 0.616590660686692,
"grad_norm": 1.6261629750224562,
"learning_rate": 3.849042514028091e-06,
"loss": 0.4817,
"step": 16360
},
{
"epoch": 0.616967549843591,
"grad_norm": 1.6014458715286652,
"learning_rate": 3.842642016323522e-06,
"loss": 0.4757,
"step": 16370
},
{
"epoch": 0.61734443900049,
"grad_norm": 1.6565625346460429,
"learning_rate": 3.836243521868828e-06,
"loss": 0.4849,
"step": 16380
},
{
"epoch": 0.6177213281573889,
"grad_norm": 1.6412335965766696,
"learning_rate": 3.82984704173904e-06,
"loss": 0.486,
"step": 16390
},
{
"epoch": 0.6180982173142878,
"grad_norm": 1.8138163964366028,
"learning_rate": 3.823452587005712e-06,
"loss": 0.5104,
"step": 16400
},
{
"epoch": 0.6184751064711869,
"grad_norm": 1.7953902099489745,
"learning_rate": 3.8170601687368905e-06,
"loss": 0.479,
"step": 16410
},
{
"epoch": 0.6188519956280858,
"grad_norm": 1.5270507793675938,
"learning_rate": 3.8106697979970952e-06,
"loss": 0.528,
"step": 16420
},
{
"epoch": 0.6192288847849847,
"grad_norm": 1.5641867404888987,
"learning_rate": 3.804281485847301e-06,
"loss": 0.4849,
"step": 16430
},
{
"epoch": 0.6196057739418837,
"grad_norm": 1.7507477091577552,
"learning_rate": 3.7978952433449223e-06,
"loss": 0.4832,
"step": 16440
},
{
"epoch": 0.6199826630987827,
"grad_norm": 1.6054491509073952,
"learning_rate": 3.7915110815437883e-06,
"loss": 0.4919,
"step": 16450
},
{
"epoch": 0.6203595522556816,
"grad_norm": 1.3576882864362545,
"learning_rate": 3.7851290114941335e-06,
"loss": 0.4842,
"step": 16460
},
{
"epoch": 0.6207364414125806,
"grad_norm": 1.5934327565220905,
"learning_rate": 3.77874904424256e-06,
"loss": 0.4787,
"step": 16470
},
{
"epoch": 0.6211133305694795,
"grad_norm": 1.4581770227833064,
"learning_rate": 3.7723711908320417e-06,
"loss": 0.4991,
"step": 16480
},
{
"epoch": 0.6214902197263785,
"grad_norm": 1.7598577411897167,
"learning_rate": 3.7659954623018875e-06,
"loss": 0.4655,
"step": 16490
},
{
"epoch": 0.6218671088832775,
"grad_norm": 1.5466125977952652,
"learning_rate": 3.759621869687731e-06,
"loss": 0.4763,
"step": 16500
},
{
"epoch": 0.6222439980401764,
"grad_norm": 1.5162276323975878,
"learning_rate": 3.753250424021506e-06,
"loss": 0.4791,
"step": 16510
},
{
"epoch": 0.6226208871970753,
"grad_norm": 1.6745379682639492,
"learning_rate": 3.746881136331431e-06,
"loss": 0.5151,
"step": 16520
},
{
"epoch": 0.6229977763539742,
"grad_norm": 1.532874094800718,
"learning_rate": 3.740514017641993e-06,
"loss": 0.4911,
"step": 16530
},
{
"epoch": 0.6233746655108733,
"grad_norm": 1.6604506659265745,
"learning_rate": 3.7341490789739205e-06,
"loss": 0.4906,
"step": 16540
},
{
"epoch": 0.6237515546677722,
"grad_norm": 1.6456103279322771,
"learning_rate": 3.727786331344171e-06,
"loss": 0.4923,
"step": 16550
},
{
"epoch": 0.6241284438246711,
"grad_norm": 1.4879389920554817,
"learning_rate": 3.7214257857659066e-06,
"loss": 0.489,
"step": 16560
},
{
"epoch": 0.6245053329815701,
"grad_norm": 2.2740003134139237,
"learning_rate": 3.715067453248481e-06,
"loss": 0.5154,
"step": 16570
},
{
"epoch": 0.6248822221384691,
"grad_norm": 1.7286954403484827,
"learning_rate": 3.7087113447974153e-06,
"loss": 0.4746,
"step": 16580
},
{
"epoch": 0.625259111295368,
"grad_norm": 1.291986369209173,
"learning_rate": 3.7023574714143858e-06,
"loss": 0.4625,
"step": 16590
},
{
"epoch": 0.625636000452267,
"grad_norm": 1.664446699881965,
"learning_rate": 3.69600584409719e-06,
"loss": 0.4541,
"step": 16600
},
{
"epoch": 0.6260128896091659,
"grad_norm": 1.5018038978083545,
"learning_rate": 3.6896564738397484e-06,
"loss": 0.4657,
"step": 16610
},
{
"epoch": 0.6263897787660649,
"grad_norm": 1.5823124760264218,
"learning_rate": 3.6833093716320693e-06,
"loss": 0.4768,
"step": 16620
},
{
"epoch": 0.6267666679229639,
"grad_norm": 1.9860629829549734,
"learning_rate": 3.6769645484602377e-06,
"loss": 0.4811,
"step": 16630
},
{
"epoch": 0.6271435570798628,
"grad_norm": 1.839944752571797,
"learning_rate": 3.6706220153063904e-06,
"loss": 0.48,
"step": 16640
},
{
"epoch": 0.6275204462367617,
"grad_norm": 1.7309100874405818,
"learning_rate": 3.664281783148702e-06,
"loss": 0.4722,
"step": 16650
},
{
"epoch": 0.6278973353936608,
"grad_norm": 1.581593496476517,
"learning_rate": 3.6579438629613682e-06,
"loss": 0.475,
"step": 16660
},
{
"epoch": 0.6282742245505597,
"grad_norm": 1.6548205842663573,
"learning_rate": 3.651608265714579e-06,
"loss": 0.4907,
"step": 16670
},
{
"epoch": 0.6286511137074586,
"grad_norm": 1.5516453686391054,
"learning_rate": 3.645275002374502e-06,
"loss": 0.4909,
"step": 16680
},
{
"epoch": 0.6290280028643576,
"grad_norm": 1.4834746500429061,
"learning_rate": 3.6389440839032687e-06,
"loss": 0.5048,
"step": 16690
},
{
"epoch": 0.6294048920212566,
"grad_norm": 1.7024990526554837,
"learning_rate": 3.6326155212589507e-06,
"loss": 0.4714,
"step": 16700
},
{
"epoch": 0.6297817811781555,
"grad_norm": 1.709920706597902,
"learning_rate": 3.6262893253955433e-06,
"loss": 0.4828,
"step": 16710
},
{
"epoch": 0.6301586703350545,
"grad_norm": 1.8336853543986507,
"learning_rate": 3.6199655072629415e-06,
"loss": 0.4847,
"step": 16720
},
{
"epoch": 0.6305355594919534,
"grad_norm": 1.4929651219976985,
"learning_rate": 3.613644077806927e-06,
"loss": 0.4786,
"step": 16730
},
{
"epoch": 0.6309124486488523,
"grad_norm": 1.7036171691777842,
"learning_rate": 3.607325047969149e-06,
"loss": 0.4957,
"step": 16740
},
{
"epoch": 0.6312893378057514,
"grad_norm": 1.73108529336121,
"learning_rate": 3.6010084286871017e-06,
"loss": 0.5082,
"step": 16750
},
{
"epoch": 0.6316662269626503,
"grad_norm": 1.6591602232622107,
"learning_rate": 3.5946942308941035e-06,
"loss": 0.4894,
"step": 16760
},
{
"epoch": 0.6320431161195492,
"grad_norm": 2.047186622437741,
"learning_rate": 3.5883824655192855e-06,
"loss": 0.4853,
"step": 16770
},
{
"epoch": 0.6324200052764481,
"grad_norm": 1.6832333116348397,
"learning_rate": 3.582073143487568e-06,
"loss": 0.5132,
"step": 16780
},
{
"epoch": 0.6327968944333472,
"grad_norm": 1.5899139760429426,
"learning_rate": 3.575766275719644e-06,
"loss": 0.5026,
"step": 16790
},
{
"epoch": 0.6331737835902461,
"grad_norm": 1.6765166894945724,
"learning_rate": 3.5694618731319507e-06,
"loss": 0.493,
"step": 16800
},
{
"epoch": 0.633550672747145,
"grad_norm": 1.5669282699472866,
"learning_rate": 3.5631599466366683e-06,
"loss": 0.4989,
"step": 16810
},
{
"epoch": 0.633927561904044,
"grad_norm": 1.6353842292836174,
"learning_rate": 3.556860507141685e-06,
"loss": 0.4842,
"step": 16820
},
{
"epoch": 0.634304451060943,
"grad_norm": 1.5863452326347784,
"learning_rate": 3.5505635655505877e-06,
"loss": 0.4808,
"step": 16830
},
{
"epoch": 0.634681340217842,
"grad_norm": 1.4750527987414617,
"learning_rate": 3.5442691327626354e-06,
"loss": 0.4833,
"step": 16840
},
{
"epoch": 0.6350582293747409,
"grad_norm": 1.5244616927874408,
"learning_rate": 3.5379772196727486e-06,
"loss": 0.4848,
"step": 16850
},
{
"epoch": 0.6354351185316398,
"grad_norm": 1.4503143224413209,
"learning_rate": 3.5316878371714838e-06,
"loss": 0.4737,
"step": 16860
},
{
"epoch": 0.6358120076885388,
"grad_norm": 1.631831674936825,
"learning_rate": 3.525400996145023e-06,
"loss": 0.4847,
"step": 16870
},
{
"epoch": 0.6361888968454378,
"grad_norm": 1.863352381042274,
"learning_rate": 3.5191167074751385e-06,
"loss": 0.461,
"step": 16880
},
{
"epoch": 0.6365657860023367,
"grad_norm": 1.6699698482794394,
"learning_rate": 3.512834982039196e-06,
"loss": 0.4629,
"step": 16890
},
{
"epoch": 0.6369426751592356,
"grad_norm": 1.487044517490085,
"learning_rate": 3.506555830710118e-06,
"loss": 0.5105,
"step": 16900
},
{
"epoch": 0.6373195643161347,
"grad_norm": 1.4479081048385236,
"learning_rate": 3.500279264356374e-06,
"loss": 0.5242,
"step": 16910
},
{
"epoch": 0.6376964534730336,
"grad_norm": 1.447171417720663,
"learning_rate": 3.4940052938419583e-06,
"loss": 0.508,
"step": 16920
},
{
"epoch": 0.6380733426299325,
"grad_norm": 1.6796141341080977,
"learning_rate": 3.4877339300263712e-06,
"loss": 0.5005,
"step": 16930
},
{
"epoch": 0.6384502317868315,
"grad_norm": 1.6662153763761518,
"learning_rate": 3.481465183764602e-06,
"loss": 0.4834,
"step": 16940
},
{
"epoch": 0.6388271209437304,
"grad_norm": 1.5754157381604186,
"learning_rate": 3.475199065907111e-06,
"loss": 0.4874,
"step": 16950
},
{
"epoch": 0.6392040101006294,
"grad_norm": 1.6041426691346072,
"learning_rate": 3.4689355872998085e-06,
"loss": 0.4865,
"step": 16960
},
{
"epoch": 0.6395808992575284,
"grad_norm": 1.6020448439122437,
"learning_rate": 3.4626747587840336e-06,
"loss": 0.4804,
"step": 16970
},
{
"epoch": 0.6399577884144273,
"grad_norm": 1.5691356747235539,
"learning_rate": 3.4564165911965407e-06,
"loss": 0.4491,
"step": 16980
},
{
"epoch": 0.6403346775713262,
"grad_norm": 1.6238593763575764,
"learning_rate": 3.4501610953694775e-06,
"loss": 0.4721,
"step": 16990
},
{
"epoch": 0.6407115667282253,
"grad_norm": 1.8524954109583753,
"learning_rate": 3.4439082821303723e-06,
"loss": 0.5065,
"step": 17000
},
{
"epoch": 0.6410884558851242,
"grad_norm": 1.5903092899398827,
"learning_rate": 3.4376581623020987e-06,
"loss": 0.4751,
"step": 17010
},
{
"epoch": 0.6414653450420231,
"grad_norm": 1.6223821940369376,
"learning_rate": 3.43141074670288e-06,
"loss": 0.482,
"step": 17020
},
{
"epoch": 0.6418422341989221,
"grad_norm": 1.7154442225202828,
"learning_rate": 3.425166046146254e-06,
"loss": 0.5018,
"step": 17030
},
{
"epoch": 0.6422191233558211,
"grad_norm": 1.4885853201247903,
"learning_rate": 3.4189240714410587e-06,
"loss": 0.4554,
"step": 17040
},
{
"epoch": 0.64259601251272,
"grad_norm": 1.7004544435150633,
"learning_rate": 3.412684833391413e-06,
"loss": 0.4831,
"step": 17050
},
{
"epoch": 0.642972901669619,
"grad_norm": 1.5122082088204818,
"learning_rate": 3.406448342796702e-06,
"loss": 0.4601,
"step": 17060
},
{
"epoch": 0.6433497908265179,
"grad_norm": 1.5886153641711012,
"learning_rate": 3.400214610451553e-06,
"loss": 0.5,
"step": 17070
},
{
"epoch": 0.6437266799834169,
"grad_norm": 1.4497829362440928,
"learning_rate": 3.393983647145823e-06,
"loss": 0.4886,
"step": 17080
},
{
"epoch": 0.6441035691403159,
"grad_norm": 1.8243610592206863,
"learning_rate": 3.387755463664567e-06,
"loss": 0.4844,
"step": 17090
},
{
"epoch": 0.6444804582972148,
"grad_norm": 1.9217986771041224,
"learning_rate": 3.3815300707880394e-06,
"loss": 0.4921,
"step": 17100
},
{
"epoch": 0.6448573474541137,
"grad_norm": 1.5791418592871644,
"learning_rate": 3.3753074792916574e-06,
"loss": 0.482,
"step": 17110
},
{
"epoch": 0.6452342366110126,
"grad_norm": 1.4431656921824283,
"learning_rate": 3.369087699945993e-06,
"loss": 0.4711,
"step": 17120
},
{
"epoch": 0.6456111257679117,
"grad_norm": 1.662138312216896,
"learning_rate": 3.3628707435167467e-06,
"loss": 0.4701,
"step": 17130
},
{
"epoch": 0.6459880149248106,
"grad_norm": 1.6549524822340256,
"learning_rate": 3.3566566207647354e-06,
"loss": 0.4761,
"step": 17140
},
{
"epoch": 0.6463649040817095,
"grad_norm": 1.7699738162116803,
"learning_rate": 3.350445342445874e-06,
"loss": 0.4945,
"step": 17150
},
{
"epoch": 0.6467417932386085,
"grad_norm": 2.1127375357724083,
"learning_rate": 3.344236919311149e-06,
"loss": 0.4765,
"step": 17160
},
{
"epoch": 0.6471186823955075,
"grad_norm": 1.5444316703665961,
"learning_rate": 3.338031362106607e-06,
"loss": 0.4936,
"step": 17170
},
{
"epoch": 0.6474955715524064,
"grad_norm": 1.6616572448362332,
"learning_rate": 3.3318286815733335e-06,
"loss": 0.4746,
"step": 17180
},
{
"epoch": 0.6478724607093054,
"grad_norm": 1.9913624341143157,
"learning_rate": 3.325628888447437e-06,
"loss": 0.5077,
"step": 17190
},
{
"epoch": 0.6482493498662043,
"grad_norm": 1.7915552215298538,
"learning_rate": 3.319431993460026e-06,
"loss": 0.504,
"step": 17200
},
{
"epoch": 0.6486262390231033,
"grad_norm": 1.732835360991205,
"learning_rate": 3.3132380073371926e-06,
"loss": 0.4925,
"step": 17210
},
{
"epoch": 0.6490031281800023,
"grad_norm": 1.3938136019682936,
"learning_rate": 3.3070469407999937e-06,
"loss": 0.4801,
"step": 17220
},
{
"epoch": 0.6493800173369012,
"grad_norm": 1.3212224075939478,
"learning_rate": 3.3008588045644357e-06,
"loss": 0.4609,
"step": 17230
},
{
"epoch": 0.6497569064938001,
"grad_norm": 1.7945950130121113,
"learning_rate": 3.2946736093414524e-06,
"loss": 0.467,
"step": 17240
},
{
"epoch": 0.6501337956506992,
"grad_norm": 1.7547142990915863,
"learning_rate": 3.288491365836881e-06,
"loss": 0.5079,
"step": 17250
},
{
"epoch": 0.6505106848075981,
"grad_norm": 1.6591091274236383,
"learning_rate": 3.2823120847514577e-06,
"loss": 0.4642,
"step": 17260
},
{
"epoch": 0.650887573964497,
"grad_norm": 1.5655640287361217,
"learning_rate": 3.2761357767807857e-06,
"loss": 0.4641,
"step": 17270
},
{
"epoch": 0.651264463121396,
"grad_norm": 1.7936591153526042,
"learning_rate": 3.269962452615326e-06,
"loss": 0.471,
"step": 17280
},
{
"epoch": 0.651641352278295,
"grad_norm": 1.5915717374592357,
"learning_rate": 3.2637921229403734e-06,
"loss": 0.4689,
"step": 17290
},
{
"epoch": 0.6520182414351939,
"grad_norm": 1.775887565559236,
"learning_rate": 3.2576247984360372e-06,
"loss": 0.4985,
"step": 17300
},
{
"epoch": 0.6523951305920929,
"grad_norm": 1.8241609832963868,
"learning_rate": 3.251460489777228e-06,
"loss": 0.4983,
"step": 17310
},
{
"epoch": 0.6527720197489918,
"grad_norm": 1.7201193138133724,
"learning_rate": 3.2452992076336356e-06,
"loss": 0.486,
"step": 17320
},
{
"epoch": 0.6531489089058907,
"grad_norm": 1.6087203461140804,
"learning_rate": 3.239140962669711e-06,
"loss": 0.4933,
"step": 17330
},
{
"epoch": 0.6535257980627898,
"grad_norm": 1.7218137998329464,
"learning_rate": 3.2329857655446483e-06,
"loss": 0.4932,
"step": 17340
},
{
"epoch": 0.6539026872196887,
"grad_norm": 1.5206186268024502,
"learning_rate": 3.2268336269123646e-06,
"loss": 0.4552,
"step": 17350
},
{
"epoch": 0.6542795763765876,
"grad_norm": 1.4491517030153769,
"learning_rate": 3.220684557421488e-06,
"loss": 0.4773,
"step": 17360
},
{
"epoch": 0.6546564655334866,
"grad_norm": 1.5069597772562586,
"learning_rate": 3.21453856771533e-06,
"loss": 0.4768,
"step": 17370
},
{
"epoch": 0.6550333546903856,
"grad_norm": 1.5693052306029478,
"learning_rate": 3.2083956684318708e-06,
"loss": 0.4817,
"step": 17380
},
{
"epoch": 0.6554102438472845,
"grad_norm": 1.6315582314566435,
"learning_rate": 3.2022558702037432e-06,
"loss": 0.4875,
"step": 17390
},
{
"epoch": 0.6557871330041835,
"grad_norm": 1.8830189051247035,
"learning_rate": 3.196119183658213e-06,
"loss": 0.5081,
"step": 17400
},
{
"epoch": 0.6561640221610824,
"grad_norm": 1.4994699080361438,
"learning_rate": 3.1899856194171607e-06,
"loss": 0.4637,
"step": 17410
},
{
"epoch": 0.6565409113179814,
"grad_norm": 1.5681733019464799,
"learning_rate": 3.183855188097057e-06,
"loss": 0.4754,
"step": 17420
},
{
"epoch": 0.6569178004748804,
"grad_norm": 1.7957738354956814,
"learning_rate": 3.177727900308958e-06,
"loss": 0.4834,
"step": 17430
},
{
"epoch": 0.6572946896317793,
"grad_norm": 1.4836118332583659,
"learning_rate": 3.171603766658472e-06,
"loss": 0.487,
"step": 17440
},
{
"epoch": 0.6576715787886782,
"grad_norm": 1.8415155422864224,
"learning_rate": 3.1654827977457526e-06,
"loss": 0.4965,
"step": 17450
},
{
"epoch": 0.6580484679455773,
"grad_norm": 1.48362840539746,
"learning_rate": 3.1593650041654716e-06,
"loss": 0.446,
"step": 17460
},
{
"epoch": 0.6584253571024762,
"grad_norm": 1.6394333815884048,
"learning_rate": 3.1532503965068073e-06,
"loss": 0.4687,
"step": 17470
},
{
"epoch": 0.6588022462593751,
"grad_norm": 1.483403229969005,
"learning_rate": 3.1471389853534217e-06,
"loss": 0.4596,
"step": 17480
},
{
"epoch": 0.659179135416274,
"grad_norm": 1.3730547797639057,
"learning_rate": 3.141030781283449e-06,
"loss": 0.4691,
"step": 17490
},
{
"epoch": 0.6595560245731731,
"grad_norm": 1.692266344168683,
"learning_rate": 3.134925794869463e-06,
"loss": 0.4649,
"step": 17500
},
{
"epoch": 0.659932913730072,
"grad_norm": 1.5190834038718941,
"learning_rate": 3.128824036678477e-06,
"loss": 0.4734,
"step": 17510
},
{
"epoch": 0.6603098028869709,
"grad_norm": 1.4798475487891651,
"learning_rate": 3.1227255172719127e-06,
"loss": 0.486,
"step": 17520
},
{
"epoch": 0.6606866920438699,
"grad_norm": 1.5780730702677566,
"learning_rate": 3.1166302472055873e-06,
"loss": 0.462,
"step": 17530
},
{
"epoch": 0.6610635812007688,
"grad_norm": 1.645103230020474,
"learning_rate": 3.11053823702969e-06,
"loss": 0.4983,
"step": 17540
},
{
"epoch": 0.6614404703576678,
"grad_norm": 1.6169975944237835,
"learning_rate": 3.104449497288772e-06,
"loss": 0.4693,
"step": 17550
},
{
"epoch": 0.6618173595145668,
"grad_norm": 1.6644257874809296,
"learning_rate": 3.0983640385217224e-06,
"loss": 0.537,
"step": 17560
},
{
"epoch": 0.6621942486714657,
"grad_norm": 1.5913845909506998,
"learning_rate": 3.092281871261752e-06,
"loss": 0.4874,
"step": 17570
},
{
"epoch": 0.6625711378283646,
"grad_norm": 1.6846273341706974,
"learning_rate": 3.086203006036371e-06,
"loss": 0.4407,
"step": 17580
},
{
"epoch": 0.6629480269852637,
"grad_norm": 1.4314112760740891,
"learning_rate": 3.0801274533673776e-06,
"loss": 0.4778,
"step": 17590
},
{
"epoch": 0.6633249161421626,
"grad_norm": 1.528021919193015,
"learning_rate": 3.0740552237708366e-06,
"loss": 0.5041,
"step": 17600
},
{
"epoch": 0.6637018052990615,
"grad_norm": 1.679731483162306,
"learning_rate": 3.0679863277570566e-06,
"loss": 0.4939,
"step": 17610
},
{
"epoch": 0.6640786944559605,
"grad_norm": 1.4342878274161748,
"learning_rate": 3.0619207758305848e-06,
"loss": 0.4895,
"step": 17620
},
{
"epoch": 0.6644555836128595,
"grad_norm": 1.7346799588693844,
"learning_rate": 3.0558585784901675e-06,
"loss": 0.486,
"step": 17630
},
{
"epoch": 0.6648324727697584,
"grad_norm": 1.783807850705795,
"learning_rate": 3.0497997462287566e-06,
"loss": 0.492,
"step": 17640
},
{
"epoch": 0.6652093619266574,
"grad_norm": 2.0262857478932634,
"learning_rate": 3.0437442895334734e-06,
"loss": 0.5014,
"step": 17650
},
{
"epoch": 0.6655862510835563,
"grad_norm": 1.5453260298900549,
"learning_rate": 3.037692218885599e-06,
"loss": 0.4923,
"step": 17660
},
{
"epoch": 0.6659631402404553,
"grad_norm": 2.203790859304095,
"learning_rate": 3.0316435447605495e-06,
"loss": 0.4846,
"step": 17670
},
{
"epoch": 0.6663400293973543,
"grad_norm": 1.546001802031686,
"learning_rate": 3.025598277627866e-06,
"loss": 0.4971,
"step": 17680
},
{
"epoch": 0.6667169185542532,
"grad_norm": 1.5128697377492308,
"learning_rate": 3.0195564279511925e-06,
"loss": 0.4797,
"step": 17690
},
{
"epoch": 0.6670938077111521,
"grad_norm": 2.0573665058922135,
"learning_rate": 3.0135180061882564e-06,
"loss": 0.4543,
"step": 17700
},
{
"epoch": 0.6674706968680512,
"grad_norm": 1.7400957327471658,
"learning_rate": 3.0074830227908514e-06,
"loss": 0.4574,
"step": 17710
},
{
"epoch": 0.6678475860249501,
"grad_norm": 1.3380341450114601,
"learning_rate": 3.0014514882048195e-06,
"loss": 0.5058,
"step": 17720
},
{
"epoch": 0.668224475181849,
"grad_norm": 2.076718786647805,
"learning_rate": 2.995423412870036e-06,
"loss": 0.4859,
"step": 17730
},
{
"epoch": 0.668601364338748,
"grad_norm": 1.4195821383830665,
"learning_rate": 2.9893988072203867e-06,
"loss": 0.492,
"step": 17740
},
{
"epoch": 0.6689782534956469,
"grad_norm": 1.8805532322048002,
"learning_rate": 2.98337768168375e-06,
"loss": 0.483,
"step": 17750
},
{
"epoch": 0.6693551426525459,
"grad_norm": 1.6317624518967453,
"learning_rate": 2.977360046681983e-06,
"loss": 0.478,
"step": 17760
},
{
"epoch": 0.6697320318094448,
"grad_norm": 2.0847886406534513,
"learning_rate": 2.971345912630902e-06,
"loss": 0.4872,
"step": 17770
},
{
"epoch": 0.6701089209663438,
"grad_norm": 1.7246432982403883,
"learning_rate": 2.965335289940263e-06,
"loss": 0.5001,
"step": 17780
},
{
"epoch": 0.6704858101232427,
"grad_norm": 1.6363668958491415,
"learning_rate": 2.9593281890137404e-06,
"loss": 0.4686,
"step": 17790
},
{
"epoch": 0.6708626992801417,
"grad_norm": 1.7985482020730335,
"learning_rate": 2.9533246202489173e-06,
"loss": 0.4969,
"step": 17800
},
{
"epoch": 0.6712395884370407,
"grad_norm": 1.6264485996061422,
"learning_rate": 2.9473245940372608e-06,
"loss": 0.4936,
"step": 17810
},
{
"epoch": 0.6716164775939396,
"grad_norm": 1.6890174531918563,
"learning_rate": 2.9413281207641114e-06,
"loss": 0.4799,
"step": 17820
},
{
"epoch": 0.6719933667508385,
"grad_norm": 1.5841849546055524,
"learning_rate": 2.9353352108086485e-06,
"loss": 0.4998,
"step": 17830
},
{
"epoch": 0.6723702559077376,
"grad_norm": 1.6089769681179087,
"learning_rate": 2.929345874543896e-06,
"loss": 0.4887,
"step": 17840
},
{
"epoch": 0.6727471450646365,
"grad_norm": 1.5674713179458315,
"learning_rate": 2.923360122336686e-06,
"loss": 0.4847,
"step": 17850
},
{
"epoch": 0.6731240342215354,
"grad_norm": 1.584156919079405,
"learning_rate": 2.9173779645476474e-06,
"loss": 0.4628,
"step": 17860
},
{
"epoch": 0.6735009233784344,
"grad_norm": 1.6929345771758708,
"learning_rate": 2.911399411531188e-06,
"loss": 0.4748,
"step": 17870
},
{
"epoch": 0.6738778125353334,
"grad_norm": 1.787787378046894,
"learning_rate": 2.9054244736354766e-06,
"loss": 0.4735,
"step": 17880
},
{
"epoch": 0.6742547016922323,
"grad_norm": 1.5201306101583867,
"learning_rate": 2.899453161202425e-06,
"loss": 0.4833,
"step": 17890
},
{
"epoch": 0.6746315908491313,
"grad_norm": 1.9695556108547445,
"learning_rate": 2.893485484567669e-06,
"loss": 0.4861,
"step": 17900
},
{
"epoch": 0.6750084800060302,
"grad_norm": 1.4351406646686231,
"learning_rate": 2.887521454060551e-06,
"loss": 0.4673,
"step": 17910
},
{
"epoch": 0.6753853691629291,
"grad_norm": 2.0856261193107732,
"learning_rate": 2.881561080004104e-06,
"loss": 0.4938,
"step": 17920
},
{
"epoch": 0.6757622583198282,
"grad_norm": 1.609969345939983,
"learning_rate": 2.8756043727150295e-06,
"loss": 0.4843,
"step": 17930
},
{
"epoch": 0.6761391474767271,
"grad_norm": 1.3392614292717313,
"learning_rate": 2.8696513425036874e-06,
"loss": 0.4819,
"step": 17940
},
{
"epoch": 0.676516036633626,
"grad_norm": 1.7102447232516564,
"learning_rate": 2.8637019996740624e-06,
"loss": 0.494,
"step": 17950
},
{
"epoch": 0.676892925790525,
"grad_norm": 1.6998269355745326,
"learning_rate": 2.8577563545237686e-06,
"loss": 0.4653,
"step": 17960
},
{
"epoch": 0.677269814947424,
"grad_norm": 1.4862506147625136,
"learning_rate": 2.8518144173440153e-06,
"loss": 0.4922,
"step": 17970
},
{
"epoch": 0.6776467041043229,
"grad_norm": 1.510405588964004,
"learning_rate": 2.8458761984195913e-06,
"loss": 0.4763,
"step": 17980
},
{
"epoch": 0.6780235932612219,
"grad_norm": 1.5288462993209506,
"learning_rate": 2.839941708028856e-06,
"loss": 0.4736,
"step": 17990
},
{
"epoch": 0.6784004824181208,
"grad_norm": 1.649944605527973,
"learning_rate": 2.8340109564437028e-06,
"loss": 0.5076,
"step": 18000
},
{
"epoch": 0.6787773715750198,
"grad_norm": 1.517180625990711,
"learning_rate": 2.8280839539295685e-06,
"loss": 0.4903,
"step": 18010
},
{
"epoch": 0.6791542607319188,
"grad_norm": 1.6911443889236912,
"learning_rate": 2.822160710745392e-06,
"loss": 0.4963,
"step": 18020
},
{
"epoch": 0.6795311498888177,
"grad_norm": 1.870392805720581,
"learning_rate": 2.8162412371436087e-06,
"loss": 0.4636,
"step": 18030
},
{
"epoch": 0.6799080390457166,
"grad_norm": 1.7551336831390325,
"learning_rate": 2.8103255433701238e-06,
"loss": 0.505,
"step": 18040
},
{
"epoch": 0.6802849282026157,
"grad_norm": 2.112747546430556,
"learning_rate": 2.804413639664306e-06,
"loss": 0.4923,
"step": 18050
},
{
"epoch": 0.6806618173595146,
"grad_norm": 1.389558866431419,
"learning_rate": 2.7985055362589597e-06,
"loss": 0.4746,
"step": 18060
},
{
"epoch": 0.6810387065164135,
"grad_norm": 1.8602009007028841,
"learning_rate": 2.792601243380321e-06,
"loss": 0.4935,
"step": 18070
},
{
"epoch": 0.6814155956733124,
"grad_norm": 1.8232438996039204,
"learning_rate": 2.7867007712480145e-06,
"loss": 0.4638,
"step": 18080
},
{
"epoch": 0.6817924848302115,
"grad_norm": 1.422637041046491,
"learning_rate": 2.780804130075064e-06,
"loss": 0.4859,
"step": 18090
},
{
"epoch": 0.6821693739871104,
"grad_norm": 1.79592896537598,
"learning_rate": 2.7749113300678576e-06,
"loss": 0.4994,
"step": 18100
},
{
"epoch": 0.6825462631440093,
"grad_norm": 1.7163110389066987,
"learning_rate": 2.7690223814261358e-06,
"loss": 0.4649,
"step": 18110
},
{
"epoch": 0.6829231523009083,
"grad_norm": 1.4437242473003,
"learning_rate": 2.7631372943429724e-06,
"loss": 0.4818,
"step": 18120
},
{
"epoch": 0.6833000414578072,
"grad_norm": 1.799293879805964,
"learning_rate": 2.757256079004758e-06,
"loss": 0.5199,
"step": 18130
},
{
"epoch": 0.6836769306147062,
"grad_norm": 1.8978824221262882,
"learning_rate": 2.751378745591181e-06,
"loss": 0.4743,
"step": 18140
},
{
"epoch": 0.6840538197716052,
"grad_norm": 1.7079497057612991,
"learning_rate": 2.74550530427521e-06,
"loss": 0.4572,
"step": 18150
},
{
"epoch": 0.6844307089285041,
"grad_norm": 1.6167362676629933,
"learning_rate": 2.739635765223079e-06,
"loss": 0.4645,
"step": 18160
},
{
"epoch": 0.684807598085403,
"grad_norm": 1.1239553035502026,
"learning_rate": 2.7337701385942655e-06,
"loss": 0.4916,
"step": 18170
},
{
"epoch": 0.6851844872423021,
"grad_norm": 1.80414731066518,
"learning_rate": 2.7279084345414765e-06,
"loss": 0.4832,
"step": 18180
},
{
"epoch": 0.685561376399201,
"grad_norm": 1.6220429458848973,
"learning_rate": 2.7220506632106304e-06,
"loss": 0.4637,
"step": 18190
},
{
"epoch": 0.6859382655560999,
"grad_norm": 1.6869049749820662,
"learning_rate": 2.7161968347408325e-06,
"loss": 0.4966,
"step": 18200
},
{
"epoch": 0.6863151547129989,
"grad_norm": 1.6657505964750594,
"learning_rate": 2.710346959264369e-06,
"loss": 0.4561,
"step": 18210
},
{
"epoch": 0.6866920438698979,
"grad_norm": 1.3973709297787218,
"learning_rate": 2.7045010469066864e-06,
"loss": 0.4933,
"step": 18220
},
{
"epoch": 0.6870689330267968,
"grad_norm": 1.5860018125512794,
"learning_rate": 2.6986591077863677e-06,
"loss": 0.4857,
"step": 18230
},
{
"epoch": 0.6874458221836958,
"grad_norm": 1.4618918574737128,
"learning_rate": 2.692821152015116e-06,
"loss": 0.4591,
"step": 18240
},
{
"epoch": 0.6878227113405947,
"grad_norm": 1.638643622605981,
"learning_rate": 2.686987189697744e-06,
"loss": 0.4951,
"step": 18250
},
{
"epoch": 0.6881996004974937,
"grad_norm": 1.7231621888652404,
"learning_rate": 2.6811572309321487e-06,
"loss": 0.4845,
"step": 18260
},
{
"epoch": 0.6885764896543927,
"grad_norm": 1.834141519895122,
"learning_rate": 2.6753312858093056e-06,
"loss": 0.4793,
"step": 18270
},
{
"epoch": 0.6889533788112916,
"grad_norm": 1.401487676831547,
"learning_rate": 2.669509364413232e-06,
"loss": 0.4864,
"step": 18280
},
{
"epoch": 0.6893302679681905,
"grad_norm": 1.6493898881695628,
"learning_rate": 2.6636914768209867e-06,
"loss": 0.4856,
"step": 18290
},
{
"epoch": 0.6897071571250896,
"grad_norm": 1.751769186322749,
"learning_rate": 2.6578776331026456e-06,
"loss": 0.5033,
"step": 18300
},
{
"epoch": 0.6900840462819885,
"grad_norm": 1.3871471388084073,
"learning_rate": 2.6520678433212854e-06,
"loss": 0.4726,
"step": 18310
},
{
"epoch": 0.6904609354388874,
"grad_norm": 1.5531242747701188,
"learning_rate": 2.646262117532966e-06,
"loss": 0.4818,
"step": 18320
},
{
"epoch": 0.6908378245957864,
"grad_norm": 1.5566017006302422,
"learning_rate": 2.640460465786711e-06,
"loss": 0.4743,
"step": 18330
},
{
"epoch": 0.6912147137526853,
"grad_norm": 1.7092683735724328,
"learning_rate": 2.634662898124495e-06,
"loss": 0.4473,
"step": 18340
},
{
"epoch": 0.6915916029095843,
"grad_norm": 1.7674173470502206,
"learning_rate": 2.6288694245812217e-06,
"loss": 0.5052,
"step": 18350
},
{
"epoch": 0.6919684920664833,
"grad_norm": 1.7113075330809617,
"learning_rate": 2.6230800551847096e-06,
"loss": 0.4968,
"step": 18360
},
{
"epoch": 0.6923453812233822,
"grad_norm": 1.666331092982349,
"learning_rate": 2.6172947999556723e-06,
"loss": 0.4852,
"step": 18370
},
{
"epoch": 0.6927222703802811,
"grad_norm": 1.7496234849403591,
"learning_rate": 2.6115136689077037e-06,
"loss": 0.4809,
"step": 18380
},
{
"epoch": 0.6930991595371802,
"grad_norm": 1.4830201051445864,
"learning_rate": 2.605736672047257e-06,
"loss": 0.4579,
"step": 18390
},
{
"epoch": 0.6934760486940791,
"grad_norm": 1.6817677793368029,
"learning_rate": 2.5999638193736337e-06,
"loss": 0.4922,
"step": 18400
},
{
"epoch": 0.693852937850978,
"grad_norm": 1.4954573467617007,
"learning_rate": 2.594195120878954e-06,
"loss": 0.5002,
"step": 18410
},
{
"epoch": 0.6942298270078769,
"grad_norm": 1.5504183128022453,
"learning_rate": 2.5884305865481572e-06,
"loss": 0.4834,
"step": 18420
},
{
"epoch": 0.694606716164776,
"grad_norm": 1.554112427963254,
"learning_rate": 2.582670226358971e-06,
"loss": 0.4486,
"step": 18430
},
{
"epoch": 0.6949836053216749,
"grad_norm": 1.4901063381514676,
"learning_rate": 2.576914050281899e-06,
"loss": 0.494,
"step": 18440
},
{
"epoch": 0.6953604944785738,
"grad_norm": 1.5107903863102292,
"learning_rate": 2.5711620682801973e-06,
"loss": 0.4662,
"step": 18450
},
{
"epoch": 0.6957373836354728,
"grad_norm": 1.4705557147554638,
"learning_rate": 2.56541429030987e-06,
"loss": 0.4955,
"step": 18460
},
{
"epoch": 0.6961142727923718,
"grad_norm": 1.5353961394271842,
"learning_rate": 2.5596707263196386e-06,
"loss": 0.4796,
"step": 18470
},
{
"epoch": 0.6964911619492707,
"grad_norm": 1.3895564519970267,
"learning_rate": 2.55393138625094e-06,
"loss": 0.4634,
"step": 18480
},
{
"epoch": 0.6968680511061697,
"grad_norm": 1.3450383420888639,
"learning_rate": 2.548196280037886e-06,
"loss": 0.4798,
"step": 18490
},
{
"epoch": 0.6972449402630686,
"grad_norm": 1.8244663013187354,
"learning_rate": 2.5424654176072714e-06,
"loss": 0.4746,
"step": 18500
},
{
"epoch": 0.6976218294199676,
"grad_norm": 1.5654399272762263,
"learning_rate": 2.5367388088785413e-06,
"loss": 0.4899,
"step": 18510
},
{
"epoch": 0.6979987185768666,
"grad_norm": 1.4654205360338723,
"learning_rate": 2.5310164637637773e-06,
"loss": 0.49,
"step": 18520
},
{
"epoch": 0.6983756077337655,
"grad_norm": 1.4467059234382345,
"learning_rate": 2.525298392167683e-06,
"loss": 0.4427,
"step": 18530
},
{
"epoch": 0.6987524968906644,
"grad_norm": 1.5408140891984066,
"learning_rate": 2.519584603987566e-06,
"loss": 0.4758,
"step": 18540
},
{
"epoch": 0.6991293860475634,
"grad_norm": 1.7821474974934417,
"learning_rate": 2.513875109113316e-06,
"loss": 0.4865,
"step": 18550
},
{
"epoch": 0.6995062752044624,
"grad_norm": 1.9953848627201554,
"learning_rate": 2.5081699174273955e-06,
"loss": 0.4808,
"step": 18560
},
{
"epoch": 0.6998831643613613,
"grad_norm": 1.741343264928784,
"learning_rate": 2.5024690388048154e-06,
"loss": 0.502,
"step": 18570
},
{
"epoch": 0.7002600535182603,
"grad_norm": 1.6437287473675368,
"learning_rate": 2.4967724831131244e-06,
"loss": 0.4969,
"step": 18580
},
{
"epoch": 0.7006369426751592,
"grad_norm": 1.6241010396370417,
"learning_rate": 2.4910802602123865e-06,
"loss": 0.5005,
"step": 18590
},
{
"epoch": 0.7010138318320582,
"grad_norm": 1.4898387628775849,
"learning_rate": 2.4853923799551677e-06,
"loss": 0.484,
"step": 18600
},
{
"epoch": 0.7013907209889572,
"grad_norm": 1.6400342378919166,
"learning_rate": 2.4797088521865138e-06,
"loss": 0.4722,
"step": 18610
},
{
"epoch": 0.7017676101458561,
"grad_norm": 1.479505953040376,
"learning_rate": 2.474029686743939e-06,
"loss": 0.5037,
"step": 18620
},
{
"epoch": 0.702144499302755,
"grad_norm": 1.4589932060948365,
"learning_rate": 2.4683548934574115e-06,
"loss": 0.4609,
"step": 18630
},
{
"epoch": 0.7025213884596541,
"grad_norm": 1.551678370051871,
"learning_rate": 2.462684482149327e-06,
"loss": 0.4801,
"step": 18640
},
{
"epoch": 0.702898277616553,
"grad_norm": 1.7192058552759832,
"learning_rate": 2.4570184626344944e-06,
"loss": 0.4644,
"step": 18650
},
{
"epoch": 0.7032751667734519,
"grad_norm": 1.631280002083957,
"learning_rate": 2.451356844720125e-06,
"loss": 0.4907,
"step": 18660
},
{
"epoch": 0.7036520559303509,
"grad_norm": 1.6750923039054713,
"learning_rate": 2.445699638205809e-06,
"loss": 0.5031,
"step": 18670
},
{
"epoch": 0.7040289450872499,
"grad_norm": 1.9691622944065195,
"learning_rate": 2.440046852883507e-06,
"loss": 0.4536,
"step": 18680
},
{
"epoch": 0.7044058342441488,
"grad_norm": 1.507322764879853,
"learning_rate": 2.4343984985375167e-06,
"loss": 0.4725,
"step": 18690
},
{
"epoch": 0.7047827234010478,
"grad_norm": 1.7740189927150876,
"learning_rate": 2.4287545849444747e-06,
"loss": 0.4698,
"step": 18700
},
{
"epoch": 0.7051596125579467,
"grad_norm": 1.5752161156901476,
"learning_rate": 2.423115121873328e-06,
"loss": 0.4571,
"step": 18710
},
{
"epoch": 0.7055365017148456,
"grad_norm": 1.9762064673353243,
"learning_rate": 2.4174801190853196e-06,
"loss": 0.4921,
"step": 18720
},
{
"epoch": 0.7059133908717447,
"grad_norm": 1.6174279577484105,
"learning_rate": 2.411849586333974e-06,
"loss": 0.4975,
"step": 18730
},
{
"epoch": 0.7062902800286436,
"grad_norm": 1.7193669580907476,
"learning_rate": 2.406223533365078e-06,
"loss": 0.4873,
"step": 18740
},
{
"epoch": 0.7066671691855425,
"grad_norm": 1.7235537874317444,
"learning_rate": 2.4006019699166643e-06,
"loss": 0.4685,
"step": 18750
},
{
"epoch": 0.7070440583424414,
"grad_norm": 1.63971711730806,
"learning_rate": 2.394984905718994e-06,
"loss": 0.4818,
"step": 18760
},
{
"epoch": 0.7074209474993405,
"grad_norm": 1.4461491095827899,
"learning_rate": 2.3893723504945425e-06,
"loss": 0.4914,
"step": 18770
},
{
"epoch": 0.7077978366562394,
"grad_norm": 1.4529129185509562,
"learning_rate": 2.3837643139579786e-06,
"loss": 0.4628,
"step": 18780
},
{
"epoch": 0.7081747258131383,
"grad_norm": 1.53376236798455,
"learning_rate": 2.378160805816151e-06,
"loss": 0.4969,
"step": 18790
},
{
"epoch": 0.7085516149700373,
"grad_norm": 1.6666468688910057,
"learning_rate": 2.3725618357680697e-06,
"loss": 0.4796,
"step": 18800
},
{
"epoch": 0.7089285041269363,
"grad_norm": 1.6506198653287087,
"learning_rate": 2.366967413504892e-06,
"loss": 0.4854,
"step": 18810
},
{
"epoch": 0.7093053932838352,
"grad_norm": 1.7178732497859122,
"learning_rate": 2.361377548709897e-06,
"loss": 0.4727,
"step": 18820
},
{
"epoch": 0.7096822824407342,
"grad_norm": 1.4181246211634644,
"learning_rate": 2.3557922510584837e-06,
"loss": 0.4836,
"step": 18830
},
{
"epoch": 0.7100591715976331,
"grad_norm": 1.5200708457900483,
"learning_rate": 2.3502115302181415e-06,
"loss": 0.4736,
"step": 18840
},
{
"epoch": 0.7104360607545321,
"grad_norm": 1.8352918119756971,
"learning_rate": 2.3446353958484404e-06,
"loss": 0.4932,
"step": 18850
},
{
"epoch": 0.7108129499114311,
"grad_norm": 1.6181777322718822,
"learning_rate": 2.339063857601006e-06,
"loss": 0.481,
"step": 18860
},
{
"epoch": 0.71118983906833,
"grad_norm": 1.6727732163944582,
"learning_rate": 2.3334969251195137e-06,
"loss": 0.4688,
"step": 18870
},
{
"epoch": 0.7115667282252289,
"grad_norm": 1.5382920829842202,
"learning_rate": 2.3279346080396652e-06,
"loss": 0.4799,
"step": 18880
},
{
"epoch": 0.711943617382128,
"grad_norm": 1.6126228289797586,
"learning_rate": 2.322376915989178e-06,
"loss": 0.4579,
"step": 18890
},
{
"epoch": 0.7123205065390269,
"grad_norm": 2.051868736900158,
"learning_rate": 2.3168238585877552e-06,
"loss": 0.5088,
"step": 18900
},
{
"epoch": 0.7126973956959258,
"grad_norm": 1.485292408430921,
"learning_rate": 2.3112754454470847e-06,
"loss": 0.4821,
"step": 18910
},
{
"epoch": 0.7130742848528248,
"grad_norm": 1.488486027715864,
"learning_rate": 2.305731686170814e-06,
"loss": 0.4498,
"step": 18920
},
{
"epoch": 0.7134511740097237,
"grad_norm": 1.5197855092868775,
"learning_rate": 2.300192590354534e-06,
"loss": 0.466,
"step": 18930
},
{
"epoch": 0.7138280631666227,
"grad_norm": 1.723349595195898,
"learning_rate": 2.2946581675857667e-06,
"loss": 0.4954,
"step": 18940
},
{
"epoch": 0.7142049523235217,
"grad_norm": 1.8046482293904333,
"learning_rate": 2.2891284274439424e-06,
"loss": 0.4798,
"step": 18950
},
{
"epoch": 0.7145818414804206,
"grad_norm": 1.5075542664335193,
"learning_rate": 2.2836033795003882e-06,
"loss": 0.4625,
"step": 18960
},
{
"epoch": 0.7149587306373195,
"grad_norm": 1.7547546524633186,
"learning_rate": 2.2780830333183086e-06,
"loss": 0.4916,
"step": 18970
},
{
"epoch": 0.7153356197942186,
"grad_norm": 1.5599589166635623,
"learning_rate": 2.2725673984527706e-06,
"loss": 0.4878,
"step": 18980
},
{
"epoch": 0.7157125089511175,
"grad_norm": 1.5938966644066468,
"learning_rate": 2.2670564844506863e-06,
"loss": 0.4914,
"step": 18990
},
{
"epoch": 0.7160893981080164,
"grad_norm": 1.758549701257888,
"learning_rate": 2.2615503008507965e-06,
"loss": 0.46,
"step": 19000
},
{
"epoch": 0.7164662872649153,
"grad_norm": 1.6218207774272693,
"learning_rate": 2.256048857183656e-06,
"loss": 0.4709,
"step": 19010
},
{
"epoch": 0.7168431764218144,
"grad_norm": 2.0658511464902447,
"learning_rate": 2.2505521629716095e-06,
"loss": 0.4902,
"step": 19020
},
{
"epoch": 0.7172200655787133,
"grad_norm": 1.6814039721656933,
"learning_rate": 2.245060227728785e-06,
"loss": 0.4711,
"step": 19030
},
{
"epoch": 0.7175969547356122,
"grad_norm": 1.7568224370312793,
"learning_rate": 2.2395730609610777e-06,
"loss": 0.4949,
"step": 19040
},
{
"epoch": 0.7179738438925112,
"grad_norm": 1.8189719042512071,
"learning_rate": 2.234090672166122e-06,
"loss": 0.5321,
"step": 19050
},
{
"epoch": 0.7183507330494102,
"grad_norm": 1.568239365153995,
"learning_rate": 2.2286130708332876e-06,
"loss": 0.4724,
"step": 19060
},
{
"epoch": 0.7187276222063091,
"grad_norm": 2.0389164815636325,
"learning_rate": 2.22314026644365e-06,
"loss": 0.4657,
"step": 19070
},
{
"epoch": 0.7191045113632081,
"grad_norm": 1.385299223521904,
"learning_rate": 2.2176722684699882e-06,
"loss": 0.4835,
"step": 19080
},
{
"epoch": 0.719481400520107,
"grad_norm": 1.4633858575844483,
"learning_rate": 2.2122090863767627e-06,
"loss": 0.4542,
"step": 19090
},
{
"epoch": 0.719858289677006,
"grad_norm": 1.7748405690606124,
"learning_rate": 2.206750729620097e-06,
"loss": 0.4782,
"step": 19100
},
{
"epoch": 0.720235178833905,
"grad_norm": 1.5545823449736953,
"learning_rate": 2.201297207647757e-06,
"loss": 0.4548,
"step": 19110
},
{
"epoch": 0.7206120679908039,
"grad_norm": 2.292728574012736,
"learning_rate": 2.195848529899147e-06,
"loss": 0.4722,
"step": 19120
},
{
"epoch": 0.7209889571477028,
"grad_norm": 1.716053797160062,
"learning_rate": 2.1904047058052842e-06,
"loss": 0.4828,
"step": 19130
},
{
"epoch": 0.7213658463046018,
"grad_norm": 1.714097262044458,
"learning_rate": 2.1849657447887847e-06,
"loss": 0.4735,
"step": 19140
},
{
"epoch": 0.7217427354615008,
"grad_norm": 1.7301553058622174,
"learning_rate": 2.1795316562638462e-06,
"loss": 0.485,
"step": 19150
},
{
"epoch": 0.7221196246183997,
"grad_norm": 2.437219895216966,
"learning_rate": 2.1741024496362344e-06,
"loss": 0.455,
"step": 19160
},
{
"epoch": 0.7224965137752987,
"grad_norm": 1.7131734298817816,
"learning_rate": 2.1686781343032647e-06,
"loss": 0.484,
"step": 19170
},
{
"epoch": 0.7228734029321976,
"grad_norm": 1.4800874315130503,
"learning_rate": 2.1632587196537853e-06,
"loss": 0.4646,
"step": 19180
},
{
"epoch": 0.7232502920890966,
"grad_norm": 1.6136385919031264,
"learning_rate": 2.1578442150681615e-06,
"loss": 0.4615,
"step": 19190
},
{
"epoch": 0.7236271812459956,
"grad_norm": 1.678439760720894,
"learning_rate": 2.1524346299182626e-06,
"loss": 0.4632,
"step": 19200
},
{
"epoch": 0.7240040704028945,
"grad_norm": 1.5309141919283373,
"learning_rate": 2.14702997356744e-06,
"loss": 0.4585,
"step": 19210
},
{
"epoch": 0.7243809595597934,
"grad_norm": 1.393910668679631,
"learning_rate": 2.1416302553705165e-06,
"loss": 0.4844,
"step": 19220
},
{
"epoch": 0.7247578487166925,
"grad_norm": 1.6745490665396063,
"learning_rate": 2.136235484673761e-06,
"loss": 0.4523,
"step": 19230
},
{
"epoch": 0.7251347378735914,
"grad_norm": 1.6646971203310925,
"learning_rate": 2.1308456708148896e-06,
"loss": 0.4777,
"step": 19240
},
{
"epoch": 0.7255116270304903,
"grad_norm": 1.7651217240185753,
"learning_rate": 2.1254608231230312e-06,
"loss": 0.4985,
"step": 19250
},
{
"epoch": 0.7258885161873893,
"grad_norm": 1.458188978373751,
"learning_rate": 2.120080950918722e-06,
"loss": 0.4609,
"step": 19260
},
{
"epoch": 0.7262654053442883,
"grad_norm": 1.3512368514891746,
"learning_rate": 2.1147060635138817e-06,
"loss": 0.4586,
"step": 19270
},
{
"epoch": 0.7266422945011872,
"grad_norm": 1.6354165388054114,
"learning_rate": 2.1093361702118065e-06,
"loss": 0.4673,
"step": 19280
},
{
"epoch": 0.7270191836580862,
"grad_norm": 1.6908458833172149,
"learning_rate": 2.103971280307146e-06,
"loss": 0.4652,
"step": 19290
},
{
"epoch": 0.7273960728149851,
"grad_norm": 1.5384882126868258,
"learning_rate": 2.098611403085895e-06,
"loss": 0.4553,
"step": 19300
},
{
"epoch": 0.727772961971884,
"grad_norm": 1.3879436105661611,
"learning_rate": 2.0932565478253624e-06,
"loss": 0.4606,
"step": 19310
},
{
"epoch": 0.728149851128783,
"grad_norm": 1.7839927445388464,
"learning_rate": 2.087906723794171e-06,
"loss": 0.4913,
"step": 19320
},
{
"epoch": 0.728526740285682,
"grad_norm": 1.5679146539305475,
"learning_rate": 2.0825619402522356e-06,
"loss": 0.4764,
"step": 19330
},
{
"epoch": 0.7289036294425809,
"grad_norm": 1.6427710422238193,
"learning_rate": 2.077222206450743e-06,
"loss": 0.4941,
"step": 19340
},
{
"epoch": 0.7292805185994798,
"grad_norm": 1.9163137570823914,
"learning_rate": 2.0718875316321413e-06,
"loss": 0.4728,
"step": 19350
},
{
"epoch": 0.7296574077563789,
"grad_norm": 1.7523481576736066,
"learning_rate": 2.066557925030123e-06,
"loss": 0.4837,
"step": 19360
},
{
"epoch": 0.7300342969132778,
"grad_norm": 1.6581516586288727,
"learning_rate": 2.0612333958696068e-06,
"loss": 0.4649,
"step": 19370
},
{
"epoch": 0.7304111860701767,
"grad_norm": 1.833543727989413,
"learning_rate": 2.0559139533667227e-06,
"loss": 0.4888,
"step": 19380
},
{
"epoch": 0.7307880752270757,
"grad_norm": 2.948145026805694,
"learning_rate": 2.050599606728798e-06,
"loss": 0.4679,
"step": 19390
},
{
"epoch": 0.7311649643839747,
"grad_norm": 1.6265548517672246,
"learning_rate": 2.045290365154338e-06,
"loss": 0.4561,
"step": 19400
},
{
"epoch": 0.7315418535408736,
"grad_norm": 1.5522637397176864,
"learning_rate": 2.039986237833012e-06,
"loss": 0.4481,
"step": 19410
},
{
"epoch": 0.7319187426977726,
"grad_norm": 1.5713552061547922,
"learning_rate": 2.0346872339456385e-06,
"loss": 0.4555,
"step": 19420
},
{
"epoch": 0.7322956318546715,
"grad_norm": 1.9565945231630675,
"learning_rate": 2.0293933626641677e-06,
"loss": 0.4705,
"step": 19430
},
{
"epoch": 0.7326725210115705,
"grad_norm": 1.4336544390295196,
"learning_rate": 2.0241046331516596e-06,
"loss": 0.475,
"step": 19440
},
{
"epoch": 0.7330494101684695,
"grad_norm": 1.9396889631970387,
"learning_rate": 2.018821054562286e-06,
"loss": 0.4681,
"step": 19450
},
{
"epoch": 0.7334262993253684,
"grad_norm": 1.567151805567963,
"learning_rate": 2.0135426360412945e-06,
"loss": 0.4835,
"step": 19460
},
{
"epoch": 0.7338031884822673,
"grad_norm": 1.9444973709798723,
"learning_rate": 2.008269386725006e-06,
"loss": 0.4698,
"step": 19470
},
{
"epoch": 0.7341800776391664,
"grad_norm": 1.6143737110809373,
"learning_rate": 2.003001315740788e-06,
"loss": 0.4718,
"step": 19480
},
{
"epoch": 0.7345569667960653,
"grad_norm": 1.4062299288988356,
"learning_rate": 1.997738432207048e-06,
"loss": 0.486,
"step": 19490
},
{
"epoch": 0.7349338559529642,
"grad_norm": 1.8345777555224132,
"learning_rate": 1.9924807452332203e-06,
"loss": 0.4982,
"step": 19500
},
{
"epoch": 0.7353107451098632,
"grad_norm": 1.3571830592619372,
"learning_rate": 1.9872282639197384e-06,
"loss": 0.4541,
"step": 19510
},
{
"epoch": 0.7356876342667621,
"grad_norm": 1.5972192547656638,
"learning_rate": 1.981980997358023e-06,
"loss": 0.4876,
"step": 19520
},
{
"epoch": 0.7360645234236611,
"grad_norm": 1.6793172446301055,
"learning_rate": 1.976738954630475e-06,
"loss": 0.4902,
"step": 19530
},
{
"epoch": 0.7364414125805601,
"grad_norm": 1.7207394609259994,
"learning_rate": 1.97150214481045e-06,
"loss": 0.4631,
"step": 19540
},
{
"epoch": 0.736818301737459,
"grad_norm": 1.7659650208337772,
"learning_rate": 1.9662705769622473e-06,
"loss": 0.475,
"step": 19550
},
{
"epoch": 0.7371951908943579,
"grad_norm": 2.259295033352332,
"learning_rate": 1.9610442601410924e-06,
"loss": 0.4748,
"step": 19560
},
{
"epoch": 0.737572080051257,
"grad_norm": 1.4373497805396753,
"learning_rate": 1.955823203393122e-06,
"loss": 0.4417,
"step": 19570
},
{
"epoch": 0.7379489692081559,
"grad_norm": 1.7830963074391613,
"learning_rate": 1.9506074157553674e-06,
"loss": 0.4995,
"step": 19580
},
{
"epoch": 0.7383258583650548,
"grad_norm": 1.6506977257382058,
"learning_rate": 1.9453969062557413e-06,
"loss": 0.4704,
"step": 19590
},
{
"epoch": 0.7387027475219538,
"grad_norm": 1.8345545514824426,
"learning_rate": 1.94019168391302e-06,
"loss": 0.4513,
"step": 19600
},
{
"epoch": 0.7390796366788528,
"grad_norm": 1.5681906345509673,
"learning_rate": 1.9349917577368278e-06,
"loss": 0.4633,
"step": 19610
},
{
"epoch": 0.7394565258357517,
"grad_norm": 1.1866445291178405,
"learning_rate": 1.929797136727622e-06,
"loss": 0.4482,
"step": 19620
},
{
"epoch": 0.7398334149926507,
"grad_norm": 1.7441782836688702,
"learning_rate": 1.924607829876679e-06,
"loss": 0.4656,
"step": 19630
},
{
"epoch": 0.7402103041495496,
"grad_norm": 1.5560618510253559,
"learning_rate": 1.9194238461660715e-06,
"loss": 0.4713,
"step": 19640
},
{
"epoch": 0.7405871933064486,
"grad_norm": 1.612264222679392,
"learning_rate": 1.9142451945686675e-06,
"loss": 0.4888,
"step": 19650
},
{
"epoch": 0.7409640824633476,
"grad_norm": 1.6525004938247971,
"learning_rate": 1.909071884048098e-06,
"loss": 0.4831,
"step": 19660
},
{
"epoch": 0.7413409716202465,
"grad_norm": 1.8991946449473516,
"learning_rate": 1.9039039235587549e-06,
"loss": 0.4793,
"step": 19670
},
{
"epoch": 0.7417178607771454,
"grad_norm": 1.661925662248117,
"learning_rate": 1.898741322045763e-06,
"loss": 0.468,
"step": 19680
},
{
"epoch": 0.7420947499340445,
"grad_norm": 1.6008784105624398,
"learning_rate": 1.8935840884449774e-06,
"loss": 0.4959,
"step": 19690
},
{
"epoch": 0.7424716390909434,
"grad_norm": 1.8462741745907572,
"learning_rate": 1.888432231682958e-06,
"loss": 0.447,
"step": 19700
},
{
"epoch": 0.7428485282478423,
"grad_norm": 1.6621308425745434,
"learning_rate": 1.8832857606769645e-06,
"loss": 0.454,
"step": 19710
},
{
"epoch": 0.7432254174047412,
"grad_norm": 1.4464720693906488,
"learning_rate": 1.8781446843349255e-06,
"loss": 0.4744,
"step": 19720
},
{
"epoch": 0.7436023065616402,
"grad_norm": 1.5289864438887075,
"learning_rate": 1.8730090115554377e-06,
"loss": 0.4847,
"step": 19730
},
{
"epoch": 0.7439791957185392,
"grad_norm": 1.499672820897478,
"learning_rate": 1.8678787512277441e-06,
"loss": 0.4696,
"step": 19740
},
{
"epoch": 0.7443560848754381,
"grad_norm": 1.9522488619546912,
"learning_rate": 1.8627539122317184e-06,
"loss": 0.51,
"step": 19750
},
{
"epoch": 0.7447329740323371,
"grad_norm": 1.578646901148932,
"learning_rate": 1.8576345034378518e-06,
"loss": 0.4626,
"step": 19760
},
{
"epoch": 0.745109863189236,
"grad_norm": 1.6897078881033194,
"learning_rate": 1.8525205337072356e-06,
"loss": 0.4986,
"step": 19770
},
{
"epoch": 0.745486752346135,
"grad_norm": 1.6933365658689057,
"learning_rate": 1.8474120118915468e-06,
"loss": 0.4989,
"step": 19780
},
{
"epoch": 0.745863641503034,
"grad_norm": 1.5425636755893886,
"learning_rate": 1.8423089468330323e-06,
"loss": 0.4755,
"step": 19790
},
{
"epoch": 0.7462405306599329,
"grad_norm": 1.7854964281266579,
"learning_rate": 1.8372113473644954e-06,
"loss": 0.4677,
"step": 19800
},
{
"epoch": 0.7466174198168318,
"grad_norm": 1.5758981880661738,
"learning_rate": 1.8321192223092783e-06,
"loss": 0.4596,
"step": 19810
},
{
"epoch": 0.7469943089737309,
"grad_norm": 1.7615382948562168,
"learning_rate": 1.8270325804812467e-06,
"loss": 0.4959,
"step": 19820
},
{
"epoch": 0.7473711981306298,
"grad_norm": 1.6845149695852346,
"learning_rate": 1.8219514306847769e-06,
"loss": 0.453,
"step": 19830
},
{
"epoch": 0.7477480872875287,
"grad_norm": 1.5622386509669985,
"learning_rate": 1.8168757817147408e-06,
"loss": 0.4636,
"step": 19840
},
{
"epoch": 0.7481249764444277,
"grad_norm": 1.659724890019801,
"learning_rate": 1.8118056423564807e-06,
"loss": 0.4773,
"step": 19850
},
{
"epoch": 0.7485018656013267,
"grad_norm": 1.6175612238786405,
"learning_rate": 1.8067410213858144e-06,
"loss": 0.4586,
"step": 19860
},
{
"epoch": 0.7488787547582256,
"grad_norm": 1.2204409557789897,
"learning_rate": 1.8016819275690005e-06,
"loss": 0.4876,
"step": 19870
},
{
"epoch": 0.7492556439151246,
"grad_norm": 1.5479726038649555,
"learning_rate": 1.7966283696627334e-06,
"loss": 0.4903,
"step": 19880
},
{
"epoch": 0.7496325330720235,
"grad_norm": 1.5825271274712456,
"learning_rate": 1.791580356414122e-06,
"loss": 0.48,
"step": 19890
},
{
"epoch": 0.7500094222289225,
"grad_norm": 1.6729980724400284,
"learning_rate": 1.7865378965606816e-06,
"loss": 0.4971,
"step": 19900
},
{
"epoch": 0.7503863113858215,
"grad_norm": 1.49855379749909,
"learning_rate": 1.7815009988303128e-06,
"loss": 0.4559,
"step": 19910
},
{
"epoch": 0.7507632005427204,
"grad_norm": 1.867864298503486,
"learning_rate": 1.7764696719412955e-06,
"loss": 0.4676,
"step": 19920
},
{
"epoch": 0.7511400896996193,
"grad_norm": 1.398138122231671,
"learning_rate": 1.7714439246022563e-06,
"loss": 0.4665,
"step": 19930
},
{
"epoch": 0.7515169788565182,
"grad_norm": 1.679272548470173,
"learning_rate": 1.7664237655121712e-06,
"loss": 0.4845,
"step": 19940
},
{
"epoch": 0.7518938680134173,
"grad_norm": 1.6071742957955386,
"learning_rate": 1.7614092033603435e-06,
"loss": 0.4762,
"step": 19950
},
{
"epoch": 0.7522707571703162,
"grad_norm": 2.0121603774745,
"learning_rate": 1.7564002468263864e-06,
"loss": 0.4983,
"step": 19960
},
{
"epoch": 0.7526476463272151,
"grad_norm": 1.6878110868707674,
"learning_rate": 1.7513969045802121e-06,
"loss": 0.4774,
"step": 19970
},
{
"epoch": 0.7530245354841141,
"grad_norm": 1.5812116606356919,
"learning_rate": 1.7463991852820146e-06,
"loss": 0.4694,
"step": 19980
},
{
"epoch": 0.7534014246410131,
"grad_norm": 1.6728801843104153,
"learning_rate": 1.741407097582255e-06,
"loss": 0.4812,
"step": 19990
},
{
"epoch": 0.753778313797912,
"grad_norm": 1.558056793965425,
"learning_rate": 1.7364206501216468e-06,
"loss": 0.4771,
"step": 20000
},
{
"epoch": 0.754155202954811,
"grad_norm": 1.572534621526344,
"learning_rate": 1.7314398515311425e-06,
"loss": 0.4825,
"step": 20010
},
{
"epoch": 0.7545320921117099,
"grad_norm": 1.7319010605390628,
"learning_rate": 1.7264647104319144e-06,
"loss": 0.4924,
"step": 20020
},
{
"epoch": 0.754908981268609,
"grad_norm": 1.8088713988535818,
"learning_rate": 1.7214952354353442e-06,
"loss": 0.5092,
"step": 20030
},
{
"epoch": 0.7552858704255079,
"grad_norm": 1.6367538520605005,
"learning_rate": 1.7165314351430073e-06,
"loss": 0.4853,
"step": 20040
},
{
"epoch": 0.7556627595824068,
"grad_norm": 1.8009115510393299,
"learning_rate": 1.7115733181466521e-06,
"loss": 0.4861,
"step": 20050
},
{
"epoch": 0.7560396487393057,
"grad_norm": 1.647359105340635,
"learning_rate": 1.706620893028193e-06,
"loss": 0.4872,
"step": 20060
},
{
"epoch": 0.7564165378962048,
"grad_norm": 1.7977265334517736,
"learning_rate": 1.7016741683596956e-06,
"loss": 0.4861,
"step": 20070
},
{
"epoch": 0.7567934270531037,
"grad_norm": 1.4658968392116936,
"learning_rate": 1.696733152703356e-06,
"loss": 0.4621,
"step": 20080
},
{
"epoch": 0.7571703162100026,
"grad_norm": 1.400405223101517,
"learning_rate": 1.6917978546114844e-06,
"loss": 0.4567,
"step": 20090
},
{
"epoch": 0.7575472053669016,
"grad_norm": 1.7826408832538072,
"learning_rate": 1.686868282626501e-06,
"loss": 0.455,
"step": 20100
},
{
"epoch": 0.7579240945238005,
"grad_norm": 1.369980544067672,
"learning_rate": 1.6819444452809097e-06,
"loss": 0.4498,
"step": 20110
},
{
"epoch": 0.7583009836806995,
"grad_norm": 1.606503602489862,
"learning_rate": 1.6770263510972967e-06,
"loss": 0.4784,
"step": 20120
},
{
"epoch": 0.7586778728375985,
"grad_norm": 1.7717643562693965,
"learning_rate": 1.6721140085882958e-06,
"loss": 0.4983,
"step": 20130
},
{
"epoch": 0.7590547619944974,
"grad_norm": 1.5525199466594755,
"learning_rate": 1.6672074262565935e-06,
"loss": 0.4724,
"step": 20140
},
{
"epoch": 0.7594316511513963,
"grad_norm": 1.8260818838800927,
"learning_rate": 1.6623066125949039e-06,
"loss": 0.4855,
"step": 20150
},
{
"epoch": 0.7598085403082954,
"grad_norm": 1.5847972521629938,
"learning_rate": 1.6574115760859565e-06,
"loss": 0.4962,
"step": 20160
},
{
"epoch": 0.7601854294651943,
"grad_norm": 1.9147538929254044,
"learning_rate": 1.6525223252024803e-06,
"loss": 0.4906,
"step": 20170
},
{
"epoch": 0.7605623186220932,
"grad_norm": 1.8192442389995647,
"learning_rate": 1.6476388684071904e-06,
"loss": 0.4461,
"step": 20180
},
{
"epoch": 0.7609392077789922,
"grad_norm": 1.6057172465162213,
"learning_rate": 1.6427612141527737e-06,
"loss": 0.4661,
"step": 20190
},
{
"epoch": 0.7613160969358912,
"grad_norm": 1.5700052558656017,
"learning_rate": 1.6378893708818737e-06,
"loss": 0.4579,
"step": 20200
},
{
"epoch": 0.7616929860927901,
"grad_norm": 2.4569585216041556,
"learning_rate": 1.6330233470270745e-06,
"loss": 0.4794,
"step": 20210
},
{
"epoch": 0.762069875249689,
"grad_norm": 1.889553496160301,
"learning_rate": 1.6281631510108886e-06,
"loss": 0.442,
"step": 20220
},
{
"epoch": 0.762446764406588,
"grad_norm": 1.5962321807838258,
"learning_rate": 1.6233087912457412e-06,
"loss": 0.4672,
"step": 20230
},
{
"epoch": 0.762823653563487,
"grad_norm": 1.5965451145332648,
"learning_rate": 1.618460276133954e-06,
"loss": 0.4676,
"step": 20240
},
{
"epoch": 0.763200542720386,
"grad_norm": 1.6567069219014103,
"learning_rate": 1.6136176140677368e-06,
"loss": 0.4783,
"step": 20250
},
{
"epoch": 0.7635774318772849,
"grad_norm": 1.4557357506093793,
"learning_rate": 1.6087808134291593e-06,
"loss": 0.4823,
"step": 20260
},
{
"epoch": 0.7639543210341838,
"grad_norm": 1.876565177528593,
"learning_rate": 1.6039498825901568e-06,
"loss": 0.4774,
"step": 20270
},
{
"epoch": 0.7643312101910829,
"grad_norm": 1.7263871602732919,
"learning_rate": 1.5991248299124978e-06,
"loss": 0.4738,
"step": 20280
},
{
"epoch": 0.7647080993479818,
"grad_norm": 1.6090188313058575,
"learning_rate": 1.5943056637477804e-06,
"loss": 0.4567,
"step": 20290
},
{
"epoch": 0.7650849885048807,
"grad_norm": 1.8159775563602787,
"learning_rate": 1.5894923924374077e-06,
"loss": 0.4996,
"step": 20300
},
{
"epoch": 0.7654618776617796,
"grad_norm": 1.694825125868995,
"learning_rate": 1.5846850243125856e-06,
"loss": 0.467,
"step": 20310
},
{
"epoch": 0.7658387668186786,
"grad_norm": 1.4834845730262398,
"learning_rate": 1.5798835676942976e-06,
"loss": 0.4733,
"step": 20320
},
{
"epoch": 0.7662156559755776,
"grad_norm": 1.6457346282769572,
"learning_rate": 1.5750880308933036e-06,
"loss": 0.4775,
"step": 20330
},
{
"epoch": 0.7665925451324765,
"grad_norm": 1.6332831296059798,
"learning_rate": 1.5702984222101053e-06,
"loss": 0.4598,
"step": 20340
},
{
"epoch": 0.7669694342893755,
"grad_norm": 2.0230516472784417,
"learning_rate": 1.565514749934951e-06,
"loss": 0.501,
"step": 20350
},
{
"epoch": 0.7673463234462744,
"grad_norm": 1.9624821551932368,
"learning_rate": 1.5607370223478118e-06,
"loss": 0.4863,
"step": 20360
},
{
"epoch": 0.7677232126031734,
"grad_norm": 1.3909923765612713,
"learning_rate": 1.5559652477183702e-06,
"loss": 0.4592,
"step": 20370
},
{
"epoch": 0.7681001017600724,
"grad_norm": 1.6465814513557648,
"learning_rate": 1.5511994343060033e-06,
"loss": 0.4755,
"step": 20380
},
{
"epoch": 0.7684769909169713,
"grad_norm": 1.610226051279983,
"learning_rate": 1.5464395903597713e-06,
"loss": 0.4469,
"step": 20390
},
{
"epoch": 0.7688538800738702,
"grad_norm": 1.7453869621269993,
"learning_rate": 1.5416857241184007e-06,
"loss": 0.4547,
"step": 20400
},
{
"epoch": 0.7692307692307693,
"grad_norm": 1.310381314751475,
"learning_rate": 1.5369378438102728e-06,
"loss": 0.4668,
"step": 20410
},
{
"epoch": 0.7696076583876682,
"grad_norm": 1.6872384131950084,
"learning_rate": 1.5321959576534073e-06,
"loss": 0.5217,
"step": 20420
},
{
"epoch": 0.7699845475445671,
"grad_norm": 1.6894678298188206,
"learning_rate": 1.527460073855448e-06,
"loss": 0.4665,
"step": 20430
},
{
"epoch": 0.7703614367014661,
"grad_norm": 1.8097958205422997,
"learning_rate": 1.52273020061365e-06,
"loss": 0.4564,
"step": 20440
},
{
"epoch": 0.7707383258583651,
"grad_norm": 1.7701718906608808,
"learning_rate": 1.5180063461148675e-06,
"loss": 0.4664,
"step": 20450
},
{
"epoch": 0.771115215015264,
"grad_norm": 1.2961421273522296,
"learning_rate": 1.5132885185355294e-06,
"loss": 0.4527,
"step": 20460
},
{
"epoch": 0.771492104172163,
"grad_norm": 1.8199021031458331,
"learning_rate": 1.5085767260416396e-06,
"loss": 0.4945,
"step": 20470
},
{
"epoch": 0.7718689933290619,
"grad_norm": 1.5671269899021723,
"learning_rate": 1.5038709767887548e-06,
"loss": 0.4816,
"step": 20480
},
{
"epoch": 0.7722458824859609,
"grad_norm": 1.818867610498672,
"learning_rate": 1.4991712789219714e-06,
"loss": 0.4778,
"step": 20490
},
{
"epoch": 0.7726227716428599,
"grad_norm": 1.5948143698764703,
"learning_rate": 1.4944776405759115e-06,
"loss": 0.4652,
"step": 20500
},
{
"epoch": 0.7729996607997588,
"grad_norm": 1.4821620142605674,
"learning_rate": 1.4897900698747047e-06,
"loss": 0.4422,
"step": 20510
},
{
"epoch": 0.7733765499566577,
"grad_norm": 1.8116026121890576,
"learning_rate": 1.4851085749319827e-06,
"loss": 0.469,
"step": 20520
},
{
"epoch": 0.7737534391135567,
"grad_norm": 1.523784502143446,
"learning_rate": 1.4804331638508623e-06,
"loss": 0.4773,
"step": 20530
},
{
"epoch": 0.7741303282704557,
"grad_norm": 1.8206430937956362,
"learning_rate": 1.4757638447239276e-06,
"loss": 0.4724,
"step": 20540
},
{
"epoch": 0.7745072174273546,
"grad_norm": 1.7588855241689347,
"learning_rate": 1.4711006256332156e-06,
"loss": 0.4382,
"step": 20550
},
{
"epoch": 0.7748841065842536,
"grad_norm": 1.584901601395231,
"learning_rate": 1.4664435146502083e-06,
"loss": 0.4674,
"step": 20560
},
{
"epoch": 0.7752609957411525,
"grad_norm": 1.7973543634379379,
"learning_rate": 1.461792519835814e-06,
"loss": 0.4826,
"step": 20570
},
{
"epoch": 0.7756378848980515,
"grad_norm": 1.673141519768556,
"learning_rate": 1.4571476492403563e-06,
"loss": 0.4753,
"step": 20580
},
{
"epoch": 0.7760147740549505,
"grad_norm": 1.5795162978217505,
"learning_rate": 1.452508910903556e-06,
"loss": 0.4965,
"step": 20590
},
{
"epoch": 0.7763916632118494,
"grad_norm": 1.71797844150418,
"learning_rate": 1.447876312854521e-06,
"loss": 0.466,
"step": 20600
},
{
"epoch": 0.7767685523687483,
"grad_norm": 1.6656630817840206,
"learning_rate": 1.4432498631117314e-06,
"loss": 0.43,
"step": 20610
},
{
"epoch": 0.7771454415256474,
"grad_norm": 1.5139791631000281,
"learning_rate": 1.438629569683025e-06,
"loss": 0.4855,
"step": 20620
},
{
"epoch": 0.7775223306825463,
"grad_norm": 1.6906826740177294,
"learning_rate": 1.4340154405655826e-06,
"loss": 0.456,
"step": 20630
},
{
"epoch": 0.7778992198394452,
"grad_norm": 1.6698594166197502,
"learning_rate": 1.4294074837459177e-06,
"loss": 0.4912,
"step": 20640
},
{
"epoch": 0.7782761089963441,
"grad_norm": 2.250059784898189,
"learning_rate": 1.4248057071998578e-06,
"loss": 0.4998,
"step": 20650
},
{
"epoch": 0.7786529981532432,
"grad_norm": 1.575246221565116,
"learning_rate": 1.420210118892536e-06,
"loss": 0.4768,
"step": 20660
},
{
"epoch": 0.7790298873101421,
"grad_norm": 1.5597959754124424,
"learning_rate": 1.4156207267783679e-06,
"loss": 0.4652,
"step": 20670
},
{
"epoch": 0.779406776467041,
"grad_norm": 1.628610962575096,
"learning_rate": 1.4110375388010538e-06,
"loss": 0.4876,
"step": 20680
},
{
"epoch": 0.77978366562394,
"grad_norm": 1.644374962717556,
"learning_rate": 1.4064605628935479e-06,
"loss": 0.4679,
"step": 20690
},
{
"epoch": 0.780160554780839,
"grad_norm": 1.8253088878455022,
"learning_rate": 1.4018898069780572e-06,
"loss": 0.4772,
"step": 20700
},
{
"epoch": 0.7805374439377379,
"grad_norm": 1.5207862619368617,
"learning_rate": 1.3973252789660158e-06,
"loss": 0.4574,
"step": 20710
},
{
"epoch": 0.7809143330946369,
"grad_norm": 1.7050624400247496,
"learning_rate": 1.3927669867580845e-06,
"loss": 0.4554,
"step": 20720
},
{
"epoch": 0.7812912222515358,
"grad_norm": 1.9273191609007996,
"learning_rate": 1.3882149382441262e-06,
"loss": 0.5077,
"step": 20730
},
{
"epoch": 0.7816681114084347,
"grad_norm": 1.4282728575268597,
"learning_rate": 1.3836691413032045e-06,
"loss": 0.4596,
"step": 20740
},
{
"epoch": 0.7820450005653338,
"grad_norm": 1.5766792437516781,
"learning_rate": 1.37912960380355e-06,
"loss": 0.475,
"step": 20750
},
{
"epoch": 0.7824218897222327,
"grad_norm": 1.588717645461143,
"learning_rate": 1.3745963336025692e-06,
"loss": 0.4701,
"step": 20760
},
{
"epoch": 0.7827987788791316,
"grad_norm": 1.628374898696195,
"learning_rate": 1.3700693385468156e-06,
"loss": 0.467,
"step": 20770
},
{
"epoch": 0.7831756680360306,
"grad_norm": 1.8101040525930028,
"learning_rate": 1.3655486264719832e-06,
"loss": 0.4655,
"step": 20780
},
{
"epoch": 0.7835525571929296,
"grad_norm": 1.5363813815541065,
"learning_rate": 1.3610342052028897e-06,
"loss": 0.4629,
"step": 20790
},
{
"epoch": 0.7839294463498285,
"grad_norm": 1.7639661489622709,
"learning_rate": 1.3565260825534653e-06,
"loss": 0.4502,
"step": 20800
},
{
"epoch": 0.7843063355067275,
"grad_norm": 1.5089833035960092,
"learning_rate": 1.3520242663267375e-06,
"loss": 0.4871,
"step": 20810
},
{
"epoch": 0.7846832246636264,
"grad_norm": 1.5073945769296926,
"learning_rate": 1.3475287643148178e-06,
"loss": 0.4786,
"step": 20820
},
{
"epoch": 0.7850601138205254,
"grad_norm": 1.5526068937170885,
"learning_rate": 1.3430395842988886e-06,
"loss": 0.4549,
"step": 20830
},
{
"epoch": 0.7854370029774244,
"grad_norm": 1.4425597365312917,
"learning_rate": 1.3385567340491901e-06,
"loss": 0.453,
"step": 20840
},
{
"epoch": 0.7858138921343233,
"grad_norm": 1.6718319416849392,
"learning_rate": 1.334080221325006e-06,
"loss": 0.4755,
"step": 20850
},
{
"epoch": 0.7861907812912222,
"grad_norm": 1.4577265197030453,
"learning_rate": 1.3296100538746514e-06,
"loss": 0.4454,
"step": 20860
},
{
"epoch": 0.7865676704481213,
"grad_norm": 1.3692713929909994,
"learning_rate": 1.3251462394354585e-06,
"loss": 0.4927,
"step": 20870
},
{
"epoch": 0.7869445596050202,
"grad_norm": 1.6442519408672833,
"learning_rate": 1.3206887857337586e-06,
"loss": 0.4492,
"step": 20880
},
{
"epoch": 0.7873214487619191,
"grad_norm": 1.6104969996274061,
"learning_rate": 1.3162377004848814e-06,
"loss": 0.4515,
"step": 20890
},
{
"epoch": 0.787698337918818,
"grad_norm": 1.7066857405575866,
"learning_rate": 1.3117929913931277e-06,
"loss": 0.4553,
"step": 20900
},
{
"epoch": 0.788075227075717,
"grad_norm": 1.731131533765537,
"learning_rate": 1.3073546661517655e-06,
"loss": 0.4822,
"step": 20910
},
{
"epoch": 0.788452116232616,
"grad_norm": 1.696849854193515,
"learning_rate": 1.3029227324430077e-06,
"loss": 0.4536,
"step": 20920
},
{
"epoch": 0.788829005389515,
"grad_norm": 1.6266173084112765,
"learning_rate": 1.298497197938008e-06,
"loss": 0.4464,
"step": 20930
},
{
"epoch": 0.7892058945464139,
"grad_norm": 1.5230400863153157,
"learning_rate": 1.2940780702968464e-06,
"loss": 0.4838,
"step": 20940
},
{
"epoch": 0.7895827837033128,
"grad_norm": 1.805355977994606,
"learning_rate": 1.2896653571685108e-06,
"loss": 0.4808,
"step": 20950
},
{
"epoch": 0.7899596728602118,
"grad_norm": 1.6563216644658791,
"learning_rate": 1.2852590661908826e-06,
"loss": 0.4793,
"step": 20960
},
{
"epoch": 0.7903365620171108,
"grad_norm": 1.7324681232443127,
"learning_rate": 1.280859204990732e-06,
"loss": 0.4659,
"step": 20970
},
{
"epoch": 0.7907134511740097,
"grad_norm": 1.6887405218479397,
"learning_rate": 1.2764657811836995e-06,
"loss": 0.4829,
"step": 20980
},
{
"epoch": 0.7910903403309086,
"grad_norm": 1.723570674808702,
"learning_rate": 1.2720788023742819e-06,
"loss": 0.495,
"step": 20990
},
{
"epoch": 0.7914672294878077,
"grad_norm": 1.7913544321855928,
"learning_rate": 1.267698276155821e-06,
"loss": 0.4626,
"step": 21000
},
{
"epoch": 0.7918441186447066,
"grad_norm": 1.7911338951212774,
"learning_rate": 1.2633242101104904e-06,
"loss": 0.4577,
"step": 21010
},
{
"epoch": 0.7922210078016055,
"grad_norm": 1.8879029948128403,
"learning_rate": 1.2589566118092805e-06,
"loss": 0.5013,
"step": 21020
},
{
"epoch": 0.7925978969585045,
"grad_norm": 1.602586178784185,
"learning_rate": 1.2545954888119882e-06,
"loss": 0.4872,
"step": 21030
},
{
"epoch": 0.7929747861154035,
"grad_norm": 1.816604176967041,
"learning_rate": 1.2502408486672018e-06,
"loss": 0.4898,
"step": 21040
},
{
"epoch": 0.7933516752723024,
"grad_norm": 1.6554081568794934,
"learning_rate": 1.2458926989122894e-06,
"loss": 0.4739,
"step": 21050
},
{
"epoch": 0.7937285644292014,
"grad_norm": 1.8212477680147816,
"learning_rate": 1.2415510470733832e-06,
"loss": 0.4574,
"step": 21060
},
{
"epoch": 0.7941054535861003,
"grad_norm": 1.2942280081366884,
"learning_rate": 1.2372159006653711e-06,
"loss": 0.4775,
"step": 21070
},
{
"epoch": 0.7944823427429993,
"grad_norm": 1.7198494071850723,
"learning_rate": 1.2328872671918752e-06,
"loss": 0.5035,
"step": 21080
},
{
"epoch": 0.7948592318998983,
"grad_norm": 1.717727877702444,
"learning_rate": 1.2285651541452526e-06,
"loss": 0.4602,
"step": 21090
},
{
"epoch": 0.7952361210567972,
"grad_norm": 1.7030104890295072,
"learning_rate": 1.2242495690065687e-06,
"loss": 0.4563,
"step": 21100
},
{
"epoch": 0.7956130102136961,
"grad_norm": 1.550857418455602,
"learning_rate": 1.219940519245592e-06,
"loss": 0.4906,
"step": 21110
},
{
"epoch": 0.795989899370595,
"grad_norm": 1.1895997553169184,
"learning_rate": 1.2156380123207761e-06,
"loss": 0.476,
"step": 21120
},
{
"epoch": 0.7963667885274941,
"grad_norm": 1.6911416575004463,
"learning_rate": 1.2113420556792539e-06,
"loss": 0.4591,
"step": 21130
},
{
"epoch": 0.796743677684393,
"grad_norm": 1.439704510030527,
"learning_rate": 1.2070526567568164e-06,
"loss": 0.4489,
"step": 21140
},
{
"epoch": 0.797120566841292,
"grad_norm": 1.6272324810392764,
"learning_rate": 1.20276982297791e-06,
"loss": 0.4763,
"step": 21150
},
{
"epoch": 0.7974974559981909,
"grad_norm": 1.661794252836065,
"learning_rate": 1.1984935617556104e-06,
"loss": 0.4652,
"step": 21160
},
{
"epoch": 0.7978743451550899,
"grad_norm": 1.6161485954397128,
"learning_rate": 1.1942238804916213e-06,
"loss": 0.478,
"step": 21170
},
{
"epoch": 0.7982512343119889,
"grad_norm": 1.5127230860148504,
"learning_rate": 1.1899607865762563e-06,
"loss": 0.4879,
"step": 21180
},
{
"epoch": 0.7986281234688878,
"grad_norm": 1.6559970652967313,
"learning_rate": 1.1857042873884272e-06,
"loss": 0.4557,
"step": 21190
},
{
"epoch": 0.7990050126257867,
"grad_norm": 1.6399527198328363,
"learning_rate": 1.181454390295631e-06,
"loss": 0.4691,
"step": 21200
},
{
"epoch": 0.7993819017826858,
"grad_norm": 1.5670234998624126,
"learning_rate": 1.1772111026539374e-06,
"loss": 0.454,
"step": 21210
},
{
"epoch": 0.7997587909395847,
"grad_norm": 1.7735935156287603,
"learning_rate": 1.172974431807975e-06,
"loss": 0.4637,
"step": 21220
},
{
"epoch": 0.8001356800964836,
"grad_norm": 1.6014800132057636,
"learning_rate": 1.1687443850909208e-06,
"loss": 0.4736,
"step": 21230
},
{
"epoch": 0.8005125692533825,
"grad_norm": 1.63595133372094,
"learning_rate": 1.1645209698244857e-06,
"loss": 0.4642,
"step": 21240
},
{
"epoch": 0.8008894584102816,
"grad_norm": 1.7792664084505718,
"learning_rate": 1.1603041933189024e-06,
"loss": 0.5004,
"step": 21250
},
{
"epoch": 0.8012663475671805,
"grad_norm": 1.7417841561819936,
"learning_rate": 1.1560940628729129e-06,
"loss": 0.4851,
"step": 21260
},
{
"epoch": 0.8016432367240794,
"grad_norm": 1.7474085529041983,
"learning_rate": 1.1518905857737544e-06,
"loss": 0.506,
"step": 21270
},
{
"epoch": 0.8020201258809784,
"grad_norm": 1.4496625205949163,
"learning_rate": 1.1476937692971508e-06,
"loss": 0.4375,
"step": 21280
},
{
"epoch": 0.8023970150378774,
"grad_norm": 1.6179305557255557,
"learning_rate": 1.1435036207072913e-06,
"loss": 0.448,
"step": 21290
},
{
"epoch": 0.8027739041947763,
"grad_norm": 1.6365956374761494,
"learning_rate": 1.1393201472568322e-06,
"loss": 0.4974,
"step": 21300
},
{
"epoch": 0.8031507933516753,
"grad_norm": 1.3450521505509172,
"learning_rate": 1.1351433561868697e-06,
"loss": 0.4508,
"step": 21310
},
{
"epoch": 0.8035276825085742,
"grad_norm": 1.4538600265225352,
"learning_rate": 1.130973254726937e-06,
"loss": 0.4614,
"step": 21320
},
{
"epoch": 0.8039045716654731,
"grad_norm": 1.587273756529849,
"learning_rate": 1.1268098500949843e-06,
"loss": 0.4577,
"step": 21330
},
{
"epoch": 0.8042814608223722,
"grad_norm": 1.6193327198648404,
"learning_rate": 1.122653149497373e-06,
"loss": 0.4548,
"step": 21340
},
{
"epoch": 0.8046583499792711,
"grad_norm": 1.7408766583273092,
"learning_rate": 1.1185031601288627e-06,
"loss": 0.4792,
"step": 21350
},
{
"epoch": 0.80503523913617,
"grad_norm": 1.745321600962186,
"learning_rate": 1.1143598891725948e-06,
"loss": 0.4503,
"step": 21360
},
{
"epoch": 0.805412128293069,
"grad_norm": 1.6321545726426894,
"learning_rate": 1.1102233438000786e-06,
"loss": 0.4755,
"step": 21370
},
{
"epoch": 0.805789017449968,
"grad_norm": 1.6372516624414968,
"learning_rate": 1.1060935311711873e-06,
"loss": 0.4491,
"step": 21380
},
{
"epoch": 0.8061659066068669,
"grad_norm": 1.8295447316903106,
"learning_rate": 1.1019704584341374e-06,
"loss": 0.4645,
"step": 21390
},
{
"epoch": 0.8065427957637659,
"grad_norm": 1.6650412672495543,
"learning_rate": 1.097854132725481e-06,
"loss": 0.4669,
"step": 21400
},
{
"epoch": 0.8069196849206648,
"grad_norm": 1.5629750029935003,
"learning_rate": 1.093744561170092e-06,
"loss": 0.469,
"step": 21410
},
{
"epoch": 0.8072965740775638,
"grad_norm": 1.421621390086491,
"learning_rate": 1.0896417508811518e-06,
"loss": 0.4753,
"step": 21420
},
{
"epoch": 0.8076734632344628,
"grad_norm": 1.6379330117232693,
"learning_rate": 1.0855457089601407e-06,
"loss": 0.4587,
"step": 21430
},
{
"epoch": 0.8080503523913617,
"grad_norm": 1.7407275341596244,
"learning_rate": 1.0814564424968226e-06,
"loss": 0.4966,
"step": 21440
},
{
"epoch": 0.8084272415482606,
"grad_norm": 1.846067209557093,
"learning_rate": 1.0773739585692356e-06,
"loss": 0.5118,
"step": 21450
},
{
"epoch": 0.8088041307051597,
"grad_norm": 1.456547037165189,
"learning_rate": 1.0732982642436757e-06,
"loss": 0.4466,
"step": 21460
},
{
"epoch": 0.8091810198620586,
"grad_norm": 1.7012035006791435,
"learning_rate": 1.0692293665746884e-06,
"loss": 0.4926,
"step": 21470
},
{
"epoch": 0.8095579090189575,
"grad_norm": 1.5902195620043145,
"learning_rate": 1.065167272605056e-06,
"loss": 0.4765,
"step": 21480
},
{
"epoch": 0.8099347981758565,
"grad_norm": 1.527190606227678,
"learning_rate": 1.061111989365779e-06,
"loss": 0.4694,
"step": 21490
},
{
"epoch": 0.8103116873327554,
"grad_norm": 1.6866265884761789,
"learning_rate": 1.0570635238760774e-06,
"loss": 0.4623,
"step": 21500
},
{
"epoch": 0.8106885764896544,
"grad_norm": 1.575765143637899,
"learning_rate": 1.0530218831433652e-06,
"loss": 0.4482,
"step": 21510
},
{
"epoch": 0.8110654656465534,
"grad_norm": 2.4146188153476995,
"learning_rate": 1.0489870741632456e-06,
"loss": 0.4633,
"step": 21520
},
{
"epoch": 0.8114423548034523,
"grad_norm": 1.6718557277711972,
"learning_rate": 1.044959103919494e-06,
"loss": 0.4811,
"step": 21530
},
{
"epoch": 0.8118192439603512,
"grad_norm": 1.7873992974852744,
"learning_rate": 1.0409379793840518e-06,
"loss": 0.4546,
"step": 21540
},
{
"epoch": 0.8121961331172503,
"grad_norm": 1.8256231709433406,
"learning_rate": 1.0369237075170091e-06,
"loss": 0.4743,
"step": 21550
},
{
"epoch": 0.8125730222741492,
"grad_norm": 1.6028991502176622,
"learning_rate": 1.0329162952666e-06,
"loss": 0.441,
"step": 21560
},
{
"epoch": 0.8129499114310481,
"grad_norm": 1.7523235315102614,
"learning_rate": 1.028915749569177e-06,
"loss": 0.4874,
"step": 21570
},
{
"epoch": 0.813326800587947,
"grad_norm": 1.574362879271632,
"learning_rate": 1.0249220773492142e-06,
"loss": 0.47,
"step": 21580
},
{
"epoch": 0.8137036897448461,
"grad_norm": 1.4217729969659767,
"learning_rate": 1.020935285519285e-06,
"loss": 0.4655,
"step": 21590
},
{
"epoch": 0.814080578901745,
"grad_norm": 1.7611978152515075,
"learning_rate": 1.0169553809800543e-06,
"loss": 0.4992,
"step": 21600
},
{
"epoch": 0.8144574680586439,
"grad_norm": 1.4580463296463873,
"learning_rate": 1.0129823706202696e-06,
"loss": 0.4516,
"step": 21610
},
{
"epoch": 0.8148343572155429,
"grad_norm": 1.5284529560123907,
"learning_rate": 1.0090162613167393e-06,
"loss": 0.4668,
"step": 21620
},
{
"epoch": 0.8152112463724419,
"grad_norm": 1.6642481846748813,
"learning_rate": 1.0050570599343302e-06,
"loss": 0.4533,
"step": 21630
},
{
"epoch": 0.8155881355293408,
"grad_norm": 1.3683583910657175,
"learning_rate": 1.0011047733259521e-06,
"loss": 0.4597,
"step": 21640
},
{
"epoch": 0.8159650246862398,
"grad_norm": 1.8875658078070612,
"learning_rate": 9.97159408332547e-07,
"loss": 0.4782,
"step": 21650
},
{
"epoch": 0.8163419138431387,
"grad_norm": 1.692047018964554,
"learning_rate": 9.932209717830744e-07,
"loss": 0.462,
"step": 21660
},
{
"epoch": 0.8167188030000377,
"grad_norm": 1.5378872143686608,
"learning_rate": 9.892894704945022e-07,
"loss": 0.4656,
"step": 21670
},
{
"epoch": 0.8170956921569367,
"grad_norm": 1.5613665579925253,
"learning_rate": 9.85364911271795e-07,
"loss": 0.476,
"step": 21680
},
{
"epoch": 0.8174725813138356,
"grad_norm": 1.6137324346165267,
"learning_rate": 9.814473009079017e-07,
"loss": 0.5071,
"step": 21690
},
{
"epoch": 0.8178494704707345,
"grad_norm": 1.614130393792285,
"learning_rate": 9.7753664618374e-07,
"loss": 0.4664,
"step": 21700
},
{
"epoch": 0.8182263596276335,
"grad_norm": 1.6240132524422568,
"learning_rate": 9.736329538681932e-07,
"loss": 0.4683,
"step": 21710
},
{
"epoch": 0.8186032487845325,
"grad_norm": 1.5296419317199343,
"learning_rate": 9.697362307180918e-07,
"loss": 0.4939,
"step": 21720
},
{
"epoch": 0.8189801379414314,
"grad_norm": 1.5321812462004734,
"learning_rate": 9.658464834782033e-07,
"loss": 0.4778,
"step": 21730
},
{
"epoch": 0.8193570270983304,
"grad_norm": 1.5185831568058594,
"learning_rate": 9.619637188812175e-07,
"loss": 0.46,
"step": 21740
},
{
"epoch": 0.8197339162552293,
"grad_norm": 1.6821769094096455,
"learning_rate": 9.58087943647743e-07,
"loss": 0.4879,
"step": 21750
},
{
"epoch": 0.8201108054121283,
"grad_norm": 1.467098506919944,
"learning_rate": 9.542191644862869e-07,
"loss": 0.4338,
"step": 21760
},
{
"epoch": 0.8204876945690273,
"grad_norm": 1.5832840990797274,
"learning_rate": 9.503573880932527e-07,
"loss": 0.4617,
"step": 21770
},
{
"epoch": 0.8208645837259262,
"grad_norm": 1.4725123019965634,
"learning_rate": 9.465026211529149e-07,
"loss": 0.4591,
"step": 21780
},
{
"epoch": 0.8212414728828251,
"grad_norm": 1.4443596976324486,
"learning_rate": 9.42654870337421e-07,
"loss": 0.4723,
"step": 21790
},
{
"epoch": 0.8216183620397242,
"grad_norm": 1.7715020744201238,
"learning_rate": 9.38814142306772e-07,
"loss": 0.4414,
"step": 21800
},
{
"epoch": 0.8219952511966231,
"grad_norm": 1.6263211713573036,
"learning_rate": 9.349804437088155e-07,
"loss": 0.4657,
"step": 21810
},
{
"epoch": 0.822372140353522,
"grad_norm": 2.675858370147406,
"learning_rate": 9.311537811792299e-07,
"loss": 0.4547,
"step": 21820
},
{
"epoch": 0.822749029510421,
"grad_norm": 1.6989825689942237,
"learning_rate": 9.273341613415155e-07,
"loss": 0.4486,
"step": 21830
},
{
"epoch": 0.82312591866732,
"grad_norm": 1.758883283595281,
"learning_rate": 9.235215908069828e-07,
"loss": 0.4752,
"step": 21840
},
{
"epoch": 0.8235028078242189,
"grad_norm": 1.6500955266665545,
"learning_rate": 9.197160761747415e-07,
"loss": 0.4578,
"step": 21850
},
{
"epoch": 0.8238796969811178,
"grad_norm": 1.4200589057349262,
"learning_rate": 9.159176240316869e-07,
"loss": 0.4292,
"step": 21860
},
{
"epoch": 0.8242565861380168,
"grad_norm": 1.7138258286525525,
"learning_rate": 9.121262409524906e-07,
"loss": 0.479,
"step": 21870
},
{
"epoch": 0.8246334752949158,
"grad_norm": 1.6629136940635971,
"learning_rate": 9.08341933499589e-07,
"loss": 0.4676,
"step": 21880
},
{
"epoch": 0.8250103644518147,
"grad_norm": 1.7116482891389613,
"learning_rate": 9.045647082231729e-07,
"loss": 0.4717,
"step": 21890
},
{
"epoch": 0.8253872536087137,
"grad_norm": 1.695232561294525,
"learning_rate": 9.007945716611688e-07,
"loss": 0.4766,
"step": 21900
},
{
"epoch": 0.8257641427656126,
"grad_norm": 1.7002498190688085,
"learning_rate": 8.970315303392379e-07,
"loss": 0.4901,
"step": 21910
},
{
"epoch": 0.8261410319225115,
"grad_norm": 1.492022987501469,
"learning_rate": 8.93275590770763e-07,
"loss": 0.4444,
"step": 21920
},
{
"epoch": 0.8265179210794106,
"grad_norm": 1.7137434822176278,
"learning_rate": 8.895267594568302e-07,
"loss": 0.4595,
"step": 21930
},
{
"epoch": 0.8268948102363095,
"grad_norm": 1.5218510147047475,
"learning_rate": 8.857850428862241e-07,
"loss": 0.4631,
"step": 21940
},
{
"epoch": 0.8272716993932084,
"grad_norm": 1.8826119986337635,
"learning_rate": 8.820504475354119e-07,
"loss": 0.5034,
"step": 21950
},
{
"epoch": 0.8276485885501074,
"grad_norm": 1.5920446251275133,
"learning_rate": 8.783229798685361e-07,
"loss": 0.467,
"step": 21960
},
{
"epoch": 0.8280254777070064,
"grad_norm": 1.7063227060767632,
"learning_rate": 8.746026463374058e-07,
"loss": 0.4541,
"step": 21970
},
{
"epoch": 0.8284023668639053,
"grad_norm": 1.618446816633231,
"learning_rate": 8.708894533814788e-07,
"loss": 0.4466,
"step": 21980
},
{
"epoch": 0.8287792560208043,
"grad_norm": 1.7677621425455639,
"learning_rate": 8.671834074278496e-07,
"loss": 0.4488,
"step": 21990
},
{
"epoch": 0.8291561451777032,
"grad_norm": 1.8004192808155495,
"learning_rate": 8.63484514891248e-07,
"loss": 0.4781,
"step": 22000
},
{
"epoch": 0.8295330343346022,
"grad_norm": 1.638331409201194,
"learning_rate": 8.597927821740188e-07,
"loss": 0.4847,
"step": 22010
},
{
"epoch": 0.8299099234915012,
"grad_norm": 1.547508824093798,
"learning_rate": 8.56108215666116e-07,
"loss": 0.467,
"step": 22020
},
{
"epoch": 0.8302868126484001,
"grad_norm": 1.6865585478017953,
"learning_rate": 8.524308217450883e-07,
"loss": 0.4976,
"step": 22030
},
{
"epoch": 0.830663701805299,
"grad_norm": 1.5407631720098116,
"learning_rate": 8.487606067760695e-07,
"loss": 0.4503,
"step": 22040
},
{
"epoch": 0.8310405909621981,
"grad_norm": 1.7278683855838521,
"learning_rate": 8.450975771117686e-07,
"loss": 0.4766,
"step": 22050
},
{
"epoch": 0.831417480119097,
"grad_norm": 1.6881225655115248,
"learning_rate": 8.414417390924567e-07,
"loss": 0.4529,
"step": 22060
},
{
"epoch": 0.8317943692759959,
"grad_norm": 1.5343097557556282,
"learning_rate": 8.37793099045957e-07,
"loss": 0.4654,
"step": 22070
},
{
"epoch": 0.8321712584328949,
"grad_norm": 1.5443651240466079,
"learning_rate": 8.341516632876345e-07,
"loss": 0.4725,
"step": 22080
},
{
"epoch": 0.8325481475897939,
"grad_norm": 1.709840635782194,
"learning_rate": 8.30517438120384e-07,
"loss": 0.4648,
"step": 22090
},
{
"epoch": 0.8329250367466928,
"grad_norm": 1.734867377598363,
"learning_rate": 8.268904298346215e-07,
"loss": 0.4862,
"step": 22100
},
{
"epoch": 0.8333019259035918,
"grad_norm": 1.5853579920685308,
"learning_rate": 8.232706447082644e-07,
"loss": 0.4554,
"step": 22110
},
{
"epoch": 0.8336788150604907,
"grad_norm": 1.8218704126887049,
"learning_rate": 8.196580890067379e-07,
"loss": 0.4712,
"step": 22120
},
{
"epoch": 0.8340557042173896,
"grad_norm": 1.7644284565478414,
"learning_rate": 8.160527689829473e-07,
"loss": 0.5028,
"step": 22130
},
{
"epoch": 0.8344325933742887,
"grad_norm": 1.6857508469766067,
"learning_rate": 8.124546908772768e-07,
"loss": 0.4622,
"step": 22140
},
{
"epoch": 0.8348094825311876,
"grad_norm": 1.6855492767191518,
"learning_rate": 8.088638609175719e-07,
"loss": 0.4948,
"step": 22150
},
{
"epoch": 0.8351863716880865,
"grad_norm": 1.3978162670227476,
"learning_rate": 8.052802853191355e-07,
"loss": 0.4743,
"step": 22160
},
{
"epoch": 0.8355632608449854,
"grad_norm": 1.6951740065317433,
"learning_rate": 8.01703970284713e-07,
"loss": 0.4754,
"step": 22170
},
{
"epoch": 0.8359401500018845,
"grad_norm": 1.7573161295973543,
"learning_rate": 7.98134922004486e-07,
"loss": 0.4871,
"step": 22180
},
{
"epoch": 0.8363170391587834,
"grad_norm": 1.4918859167709155,
"learning_rate": 7.945731466560519e-07,
"loss": 0.4794,
"step": 22190
},
{
"epoch": 0.8366939283156823,
"grad_norm": 1.7049198529331042,
"learning_rate": 7.910186504044237e-07,
"loss": 0.4611,
"step": 22200
},
{
"epoch": 0.8370708174725813,
"grad_norm": 1.4292962627440216,
"learning_rate": 7.874714394020145e-07,
"loss": 0.4175,
"step": 22210
},
{
"epoch": 0.8374477066294803,
"grad_norm": 2.0016660592227145,
"learning_rate": 7.839315197886277e-07,
"loss": 0.468,
"step": 22220
},
{
"epoch": 0.8378245957863792,
"grad_norm": 1.8041549181742595,
"learning_rate": 7.803988976914451e-07,
"loss": 0.4661,
"step": 22230
},
{
"epoch": 0.8382014849432782,
"grad_norm": 1.793146229523631,
"learning_rate": 7.768735792250176e-07,
"loss": 0.4874,
"step": 22240
},
{
"epoch": 0.8385783741001771,
"grad_norm": 1.8151183016027572,
"learning_rate": 7.73355570491256e-07,
"loss": 0.4688,
"step": 22250
},
{
"epoch": 0.8389552632570761,
"grad_norm": 1.6753537342016527,
"learning_rate": 7.698448775794171e-07,
"loss": 0.4918,
"step": 22260
},
{
"epoch": 0.8393321524139751,
"grad_norm": 1.5400170574876808,
"learning_rate": 7.663415065660951e-07,
"loss": 0.4734,
"step": 22270
},
{
"epoch": 0.839709041570874,
"grad_norm": 1.4967554695098977,
"learning_rate": 7.628454635152111e-07,
"loss": 0.4725,
"step": 22280
},
{
"epoch": 0.8400859307277729,
"grad_norm": 1.8581940585235912,
"learning_rate": 7.593567544780028e-07,
"loss": 0.4813,
"step": 22290
},
{
"epoch": 0.8404628198846719,
"grad_norm": 1.7617654233248232,
"learning_rate": 7.558753854930129e-07,
"loss": 0.4798,
"step": 22300
},
{
"epoch": 0.8408397090415709,
"grad_norm": 1.8228739549825173,
"learning_rate": 7.52401362586081e-07,
"loss": 0.4661,
"step": 22310
},
{
"epoch": 0.8412165981984698,
"grad_norm": 1.5249651201885777,
"learning_rate": 7.489346917703261e-07,
"loss": 0.4881,
"step": 22320
},
{
"epoch": 0.8415934873553688,
"grad_norm": 1.6186808625257767,
"learning_rate": 7.4547537904615e-07,
"loss": 0.4733,
"step": 22330
},
{
"epoch": 0.8419703765122677,
"grad_norm": 2.203606849350494,
"learning_rate": 7.420234304012119e-07,
"loss": 0.4488,
"step": 22340
},
{
"epoch": 0.8423472656691667,
"grad_norm": 1.791129399689499,
"learning_rate": 7.385788518104287e-07,
"loss": 0.4419,
"step": 22350
},
{
"epoch": 0.8427241548260657,
"grad_norm": 1.586850556241108,
"learning_rate": 7.351416492359564e-07,
"loss": 0.4632,
"step": 22360
},
{
"epoch": 0.8431010439829646,
"grad_norm": 1.4777261662520846,
"learning_rate": 7.317118286271869e-07,
"loss": 0.4428,
"step": 22370
},
{
"epoch": 0.8434779331398635,
"grad_norm": 1.6300982151377412,
"learning_rate": 7.282893959207354e-07,
"loss": 0.4844,
"step": 22380
},
{
"epoch": 0.8438548222967626,
"grad_norm": 1.7666022912394777,
"learning_rate": 7.248743570404293e-07,
"loss": 0.4825,
"step": 22390
},
{
"epoch": 0.8442317114536615,
"grad_norm": 1.5994223812698616,
"learning_rate": 7.214667178972951e-07,
"loss": 0.4678,
"step": 22400
},
{
"epoch": 0.8446086006105604,
"grad_norm": 1.5930748274830833,
"learning_rate": 7.180664843895536e-07,
"loss": 0.4825,
"step": 22410
},
{
"epoch": 0.8449854897674594,
"grad_norm": 1.8952726942929914,
"learning_rate": 7.146736624026073e-07,
"loss": 0.4619,
"step": 22420
},
{
"epoch": 0.8453623789243584,
"grad_norm": 1.6535903612300786,
"learning_rate": 7.112882578090308e-07,
"loss": 0.442,
"step": 22430
},
{
"epoch": 0.8457392680812573,
"grad_norm": 1.8113816961588787,
"learning_rate": 7.079102764685592e-07,
"loss": 0.4727,
"step": 22440
},
{
"epoch": 0.8461161572381563,
"grad_norm": 1.6381559303129163,
"learning_rate": 7.045397242280782e-07,
"loss": 0.4739,
"step": 22450
},
{
"epoch": 0.8464930463950552,
"grad_norm": 1.6420241463187408,
"learning_rate": 7.011766069216153e-07,
"loss": 0.4728,
"step": 22460
},
{
"epoch": 0.8468699355519542,
"grad_norm": 1.8579503334291299,
"learning_rate": 6.978209303703298e-07,
"loss": 0.467,
"step": 22470
},
{
"epoch": 0.8472468247088532,
"grad_norm": 1.8303598632267788,
"learning_rate": 6.944727003825014e-07,
"loss": 0.459,
"step": 22480
},
{
"epoch": 0.8476237138657521,
"grad_norm": 1.6128953601572278,
"learning_rate": 6.91131922753519e-07,
"loss": 0.4592,
"step": 22490
},
{
"epoch": 0.848000603022651,
"grad_norm": 1.7095865691859484,
"learning_rate": 6.877986032658751e-07,
"loss": 0.48,
"step": 22500
},
{
"epoch": 0.8483774921795499,
"grad_norm": 1.4938798824764543,
"learning_rate": 6.844727476891521e-07,
"loss": 0.4781,
"step": 22510
},
{
"epoch": 0.848754381336449,
"grad_norm": 1.6485957619451577,
"learning_rate": 6.811543617800104e-07,
"loss": 0.5045,
"step": 22520
},
{
"epoch": 0.8491312704933479,
"grad_norm": 1.837620635777043,
"learning_rate": 6.778434512821863e-07,
"loss": 0.4748,
"step": 22530
},
{
"epoch": 0.8495081596502468,
"grad_norm": 1.4879964061685897,
"learning_rate": 6.745400219264736e-07,
"loss": 0.4304,
"step": 22540
},
{
"epoch": 0.8498850488071458,
"grad_norm": 1.7156819983043117,
"learning_rate": 6.712440794307191e-07,
"loss": 0.466,
"step": 22550
},
{
"epoch": 0.8502619379640448,
"grad_norm": 1.3621122589006487,
"learning_rate": 6.67955629499808e-07,
"loss": 0.4704,
"step": 22560
},
{
"epoch": 0.8506388271209437,
"grad_norm": 2.0534498348755834,
"learning_rate": 6.646746778256591e-07,
"loss": 0.4854,
"step": 22570
},
{
"epoch": 0.8510157162778427,
"grad_norm": 1.6005946712317962,
"learning_rate": 6.614012300872108e-07,
"loss": 0.4695,
"step": 22580
},
{
"epoch": 0.8513926054347416,
"grad_norm": 1.5414012999559674,
"learning_rate": 6.581352919504175e-07,
"loss": 0.4707,
"step": 22590
},
{
"epoch": 0.8517694945916406,
"grad_norm": 1.8269075507153945,
"learning_rate": 6.548768690682295e-07,
"loss": 0.4661,
"step": 22600
},
{
"epoch": 0.8521463837485396,
"grad_norm": 1.8104476310135393,
"learning_rate": 6.516259670805914e-07,
"loss": 0.4715,
"step": 22610
},
{
"epoch": 0.8525232729054385,
"grad_norm": 1.5462248619686105,
"learning_rate": 6.483825916144315e-07,
"loss": 0.4634,
"step": 22620
},
{
"epoch": 0.8529001620623374,
"grad_norm": 1.7572378321726936,
"learning_rate": 6.451467482836493e-07,
"loss": 0.47,
"step": 22630
},
{
"epoch": 0.8532770512192365,
"grad_norm": 1.719401134457696,
"learning_rate": 6.419184426891062e-07,
"loss": 0.4417,
"step": 22640
},
{
"epoch": 0.8536539403761354,
"grad_norm": 1.8799617216498543,
"learning_rate": 6.386976804186185e-07,
"loss": 0.4543,
"step": 22650
},
{
"epoch": 0.8540308295330343,
"grad_norm": 1.7409813880394684,
"learning_rate": 6.354844670469446e-07,
"loss": 0.4555,
"step": 22660
},
{
"epoch": 0.8544077186899333,
"grad_norm": 1.7233766286902579,
"learning_rate": 6.322788081357767e-07,
"loss": 0.4894,
"step": 22670
},
{
"epoch": 0.8547846078468323,
"grad_norm": 1.822979772670787,
"learning_rate": 6.290807092337325e-07,
"loss": 0.456,
"step": 22680
},
{
"epoch": 0.8551614970037312,
"grad_norm": 1.7840609931799256,
"learning_rate": 6.258901758763425e-07,
"loss": 0.4467,
"step": 22690
},
{
"epoch": 0.8555383861606302,
"grad_norm": 1.7330476182654266,
"learning_rate": 6.227072135860424e-07,
"loss": 0.4887,
"step": 22700
},
{
"epoch": 0.8559152753175291,
"grad_norm": 1.634410872367023,
"learning_rate": 6.195318278721646e-07,
"loss": 0.4589,
"step": 22710
},
{
"epoch": 0.856292164474428,
"grad_norm": 1.8487043942748613,
"learning_rate": 6.163640242309271e-07,
"loss": 0.4514,
"step": 22720
},
{
"epoch": 0.8566690536313271,
"grad_norm": 1.6693062741828186,
"learning_rate": 6.132038081454206e-07,
"loss": 0.4654,
"step": 22730
},
{
"epoch": 0.857045942788226,
"grad_norm": 1.6243325415551528,
"learning_rate": 6.100511850856083e-07,
"loss": 0.4877,
"step": 22740
},
{
"epoch": 0.8574228319451249,
"grad_norm": 1.76930759153765,
"learning_rate": 6.069061605083076e-07,
"loss": 0.4678,
"step": 22750
},
{
"epoch": 0.8577997211020238,
"grad_norm": 1.6289028881940613,
"learning_rate": 6.037687398571846e-07,
"loss": 0.4247,
"step": 22760
},
{
"epoch": 0.8581766102589229,
"grad_norm": 1.6696350756219092,
"learning_rate": 6.006389285627423e-07,
"loss": 0.4883,
"step": 22770
},
{
"epoch": 0.8585534994158218,
"grad_norm": 1.513884778231805,
"learning_rate": 5.975167320423137e-07,
"loss": 0.4606,
"step": 22780
},
{
"epoch": 0.8589303885727207,
"grad_norm": 1.6184179790577693,
"learning_rate": 5.94402155700054e-07,
"loss": 0.4408,
"step": 22790
},
{
"epoch": 0.8593072777296197,
"grad_norm": 1.8267950570439317,
"learning_rate": 5.912952049269271e-07,
"loss": 0.4638,
"step": 22800
},
{
"epoch": 0.8596841668865187,
"grad_norm": 1.411486088307513,
"learning_rate": 5.881958851006952e-07,
"loss": 0.4581,
"step": 22810
},
{
"epoch": 0.8600610560434176,
"grad_norm": 1.4674642682660017,
"learning_rate": 5.851042015859154e-07,
"loss": 0.4565,
"step": 22820
},
{
"epoch": 0.8604379452003166,
"grad_norm": 1.3939142639115625,
"learning_rate": 5.82020159733927e-07,
"loss": 0.4704,
"step": 22830
},
{
"epoch": 0.8608148343572155,
"grad_norm": 1.5415763619326572,
"learning_rate": 5.789437648828411e-07,
"loss": 0.433,
"step": 22840
},
{
"epoch": 0.8611917235141145,
"grad_norm": 1.6303887934059382,
"learning_rate": 5.758750223575344e-07,
"loss": 0.4747,
"step": 22850
},
{
"epoch": 0.8615686126710135,
"grad_norm": 1.6198622243082812,
"learning_rate": 5.728139374696368e-07,
"loss": 0.4741,
"step": 22860
},
{
"epoch": 0.8619455018279124,
"grad_norm": 1.6527922202005432,
"learning_rate": 5.697605155175246e-07,
"loss": 0.441,
"step": 22870
},
{
"epoch": 0.8623223909848113,
"grad_norm": 1.6598779751043562,
"learning_rate": 5.667147617863106e-07,
"loss": 0.465,
"step": 22880
},
{
"epoch": 0.8626992801417104,
"grad_norm": 1.601592197543877,
"learning_rate": 5.636766815478346e-07,
"loss": 0.4664,
"step": 22890
},
{
"epoch": 0.8630761692986093,
"grad_norm": 1.8518020978894572,
"learning_rate": 5.606462800606538e-07,
"loss": 0.4765,
"step": 22900
},
{
"epoch": 0.8634530584555082,
"grad_norm": 1.8569990976395905,
"learning_rate": 5.576235625700355e-07,
"loss": 0.4947,
"step": 22910
},
{
"epoch": 0.8638299476124072,
"grad_norm": 1.78143629478858,
"learning_rate": 5.546085343079472e-07,
"loss": 0.4733,
"step": 22920
},
{
"epoch": 0.8642068367693061,
"grad_norm": 1.7977943755656154,
"learning_rate": 5.516012004930432e-07,
"loss": 0.4677,
"step": 22930
},
{
"epoch": 0.8645837259262051,
"grad_norm": 1.3894902301565615,
"learning_rate": 5.486015663306665e-07,
"loss": 0.4603,
"step": 22940
},
{
"epoch": 0.8649606150831041,
"grad_norm": 1.8030057987995889,
"learning_rate": 5.456096370128277e-07,
"loss": 0.438,
"step": 22950
},
{
"epoch": 0.865337504240003,
"grad_norm": 1.7403239254852352,
"learning_rate": 5.426254177182039e-07,
"loss": 0.4651,
"step": 22960
},
{
"epoch": 0.8657143933969019,
"grad_norm": 1.5972874364344118,
"learning_rate": 5.396489136121241e-07,
"loss": 0.4621,
"step": 22970
},
{
"epoch": 0.866091282553801,
"grad_norm": 1.9303613177891243,
"learning_rate": 5.366801298465662e-07,
"loss": 0.4713,
"step": 22980
},
{
"epoch": 0.8664681717106999,
"grad_norm": 1.6872934044203467,
"learning_rate": 5.337190715601426e-07,
"loss": 0.4905,
"step": 22990
},
{
"epoch": 0.8668450608675988,
"grad_norm": 1.7126679035862156,
"learning_rate": 5.307657438780988e-07,
"loss": 0.4803,
"step": 23000
},
{
"epoch": 0.8672219500244978,
"grad_norm": 1.7350738496917792,
"learning_rate": 5.278201519122922e-07,
"loss": 0.4845,
"step": 23010
},
{
"epoch": 0.8675988391813968,
"grad_norm": 1.787434922425744,
"learning_rate": 5.248823007611964e-07,
"loss": 0.4893,
"step": 23020
},
{
"epoch": 0.8679757283382957,
"grad_norm": 1.872251424045816,
"learning_rate": 5.219521955098833e-07,
"loss": 0.4786,
"step": 23030
},
{
"epoch": 0.8683526174951947,
"grad_norm": 1.7715299011721564,
"learning_rate": 5.190298412300182e-07,
"loss": 0.4791,
"step": 23040
},
{
"epoch": 0.8687295066520936,
"grad_norm": 1.8357529427154649,
"learning_rate": 5.161152429798538e-07,
"loss": 0.4772,
"step": 23050
},
{
"epoch": 0.8691063958089926,
"grad_norm": 1.6077824935411622,
"learning_rate": 5.13208405804213e-07,
"loss": 0.4591,
"step": 23060
},
{
"epoch": 0.8694832849658916,
"grad_norm": 2.0708649306446567,
"learning_rate": 5.103093347344872e-07,
"loss": 0.4645,
"step": 23070
},
{
"epoch": 0.8698601741227905,
"grad_norm": 1.4566190018720826,
"learning_rate": 5.074180347886265e-07,
"loss": 0.4395,
"step": 23080
},
{
"epoch": 0.8702370632796894,
"grad_norm": 1.8556474690031441,
"learning_rate": 5.045345109711291e-07,
"loss": 0.4897,
"step": 23090
},
{
"epoch": 0.8706139524365883,
"grad_norm": 1.8191111683824606,
"learning_rate": 5.016587682730339e-07,
"loss": 0.4656,
"step": 23100
},
{
"epoch": 0.8709908415934874,
"grad_norm": 1.8020154384020783,
"learning_rate": 4.98790811671912e-07,
"loss": 0.4805,
"step": 23110
},
{
"epoch": 0.8713677307503863,
"grad_norm": 1.6434790782350468,
"learning_rate": 4.959306461318563e-07,
"loss": 0.4614,
"step": 23120
},
{
"epoch": 0.8717446199072852,
"grad_norm": 1.820609481455034,
"learning_rate": 4.930782766034775e-07,
"loss": 0.461,
"step": 23130
},
{
"epoch": 0.8721215090641842,
"grad_norm": 1.5856064167725252,
"learning_rate": 4.902337080238862e-07,
"loss": 0.4856,
"step": 23140
},
{
"epoch": 0.8724983982210832,
"grad_norm": 1.7306241882494389,
"learning_rate": 4.873969453166982e-07,
"loss": 0.4874,
"step": 23150
},
{
"epoch": 0.8728752873779821,
"grad_norm": 1.5611552008896656,
"learning_rate": 4.845679933920122e-07,
"loss": 0.4528,
"step": 23160
},
{
"epoch": 0.8732521765348811,
"grad_norm": 1.4214411570399696,
"learning_rate": 4.817468571464118e-07,
"loss": 0.4712,
"step": 23170
},
{
"epoch": 0.87362906569178,
"grad_norm": 1.4281643083118118,
"learning_rate": 4.789335414629481e-07,
"loss": 0.4506,
"step": 23180
},
{
"epoch": 0.874005954848679,
"grad_norm": 1.737558818061735,
"learning_rate": 4.761280512111377e-07,
"loss": 0.4896,
"step": 23190
},
{
"epoch": 0.874382844005578,
"grad_norm": 1.9801433378045732,
"learning_rate": 4.733303912469545e-07,
"loss": 0.4325,
"step": 23200
},
{
"epoch": 0.8747597331624769,
"grad_norm": 1.5293237221434222,
"learning_rate": 4.7054056641281767e-07,
"loss": 0.4658,
"step": 23210
},
{
"epoch": 0.8751366223193758,
"grad_norm": 1.5764509216468041,
"learning_rate": 4.6775858153758237e-07,
"loss": 0.4849,
"step": 23220
},
{
"epoch": 0.8755135114762749,
"grad_norm": 1.4828757798168408,
"learning_rate": 4.649844414365357e-07,
"loss": 0.4399,
"step": 23230
},
{
"epoch": 0.8758904006331738,
"grad_norm": 1.6081292880236426,
"learning_rate": 4.622181509113871e-07,
"loss": 0.4631,
"step": 23240
},
{
"epoch": 0.8762672897900727,
"grad_norm": 1.6761532852837226,
"learning_rate": 4.594597147502583e-07,
"loss": 0.4814,
"step": 23250
},
{
"epoch": 0.8766441789469717,
"grad_norm": 1.7471109969458456,
"learning_rate": 4.5670913772767665e-07,
"loss": 0.4681,
"step": 23260
},
{
"epoch": 0.8770210681038707,
"grad_norm": 1.670160553063871,
"learning_rate": 4.53966424604565e-07,
"loss": 0.4832,
"step": 23270
},
{
"epoch": 0.8773979572607696,
"grad_norm": 2.012186736194834,
"learning_rate": 4.5123158012823577e-07,
"loss": 0.4324,
"step": 23280
},
{
"epoch": 0.8777748464176686,
"grad_norm": 1.6439554732480879,
"learning_rate": 4.4850460903238193e-07,
"loss": 0.4924,
"step": 23290
},
{
"epoch": 0.8781517355745675,
"grad_norm": 1.5734128031410948,
"learning_rate": 4.4578551603706834e-07,
"loss": 0.479,
"step": 23300
},
{
"epoch": 0.8785286247314664,
"grad_norm": 1.5385928479066775,
"learning_rate": 4.4307430584872247e-07,
"loss": 0.4493,
"step": 23310
},
{
"epoch": 0.8789055138883655,
"grad_norm": 1.495012993416791,
"learning_rate": 4.403709831601299e-07,
"loss": 0.4394,
"step": 23320
},
{
"epoch": 0.8792824030452644,
"grad_norm": 1.7809672218234174,
"learning_rate": 4.3767555265042283e-07,
"loss": 0.4492,
"step": 23330
},
{
"epoch": 0.8796592922021633,
"grad_norm": 1.7530873048425202,
"learning_rate": 4.3498801898507027e-07,
"loss": 0.4556,
"step": 23340
},
{
"epoch": 0.8800361813590623,
"grad_norm": 1.6880143734094333,
"learning_rate": 4.323083868158784e-07,
"loss": 0.4908,
"step": 23350
},
{
"epoch": 0.8804130705159613,
"grad_norm": 1.7763398568413087,
"learning_rate": 4.2963666078097267e-07,
"loss": 0.464,
"step": 23360
},
{
"epoch": 0.8807899596728602,
"grad_norm": 1.534011001228688,
"learning_rate": 4.26972845504795e-07,
"loss": 0.466,
"step": 23370
},
{
"epoch": 0.8811668488297592,
"grad_norm": 1.6994612004248872,
"learning_rate": 4.2431694559809554e-07,
"loss": 0.5048,
"step": 23380
},
{
"epoch": 0.8815437379866581,
"grad_norm": 1.7973858705958985,
"learning_rate": 4.216689656579215e-07,
"loss": 0.4866,
"step": 23390
},
{
"epoch": 0.8819206271435571,
"grad_norm": 1.7439539082800413,
"learning_rate": 4.1902891026761316e-07,
"loss": 0.4917,
"step": 23400
},
{
"epoch": 0.882297516300456,
"grad_norm": 1.8633764405786568,
"learning_rate": 4.1639678399679586e-07,
"loss": 0.474,
"step": 23410
},
{
"epoch": 0.882674405457355,
"grad_norm": 3.0062093045952927,
"learning_rate": 4.137725914013696e-07,
"loss": 0.4931,
"step": 23420
},
{
"epoch": 0.8830512946142539,
"grad_norm": 1.6834986673559424,
"learning_rate": 4.1115633702349943e-07,
"loss": 0.4785,
"step": 23430
},
{
"epoch": 0.883428183771153,
"grad_norm": 1.528153535189198,
"learning_rate": 4.0854802539161353e-07,
"loss": 0.4653,
"step": 23440
},
{
"epoch": 0.8838050729280519,
"grad_norm": 1.9539698847148672,
"learning_rate": 4.0594766102039e-07,
"loss": 0.4624,
"step": 23450
},
{
"epoch": 0.8841819620849508,
"grad_norm": 1.8627808627003384,
"learning_rate": 4.03355248410755e-07,
"loss": 0.4597,
"step": 23460
},
{
"epoch": 0.8845588512418497,
"grad_norm": 1.8122593075205515,
"learning_rate": 4.007707920498649e-07,
"loss": 0.456,
"step": 23470
},
{
"epoch": 0.8849357403987488,
"grad_norm": 1.6711500055529624,
"learning_rate": 3.9819429641111074e-07,
"loss": 0.4732,
"step": 23480
},
{
"epoch": 0.8853126295556477,
"grad_norm": 1.5192643064244584,
"learning_rate": 3.956257659541002e-07,
"loss": 0.468,
"step": 23490
},
{
"epoch": 0.8856895187125466,
"grad_norm": 1.813662608056834,
"learning_rate": 3.93065205124657e-07,
"loss": 0.4752,
"step": 23500
},
{
"epoch": 0.8860664078694456,
"grad_norm": 1.7914587106316096,
"learning_rate": 3.905126183548086e-07,
"loss": 0.45,
"step": 23510
},
{
"epoch": 0.8864432970263445,
"grad_norm": 1.634445231205134,
"learning_rate": 3.879680100627814e-07,
"loss": 0.4823,
"step": 23520
},
{
"epoch": 0.8868201861832435,
"grad_norm": 1.5155533043912437,
"learning_rate": 3.854313846529917e-07,
"loss": 0.4738,
"step": 23530
},
{
"epoch": 0.8871970753401425,
"grad_norm": 1.3069686366746593,
"learning_rate": 3.8290274651603844e-07,
"loss": 0.4295,
"step": 23540
},
{
"epoch": 0.8875739644970414,
"grad_norm": 1.6900219347834147,
"learning_rate": 3.803821000286939e-07,
"loss": 0.4713,
"step": 23550
},
{
"epoch": 0.8879508536539403,
"grad_norm": 1.7207203874610701,
"learning_rate": 3.7786944955390094e-07,
"loss": 0.4833,
"step": 23560
},
{
"epoch": 0.8883277428108394,
"grad_norm": 1.6381226224767984,
"learning_rate": 3.7536479944075946e-07,
"loss": 0.4789,
"step": 23570
},
{
"epoch": 0.8887046319677383,
"grad_norm": 1.468117460132038,
"learning_rate": 3.7286815402452436e-07,
"loss": 0.4666,
"step": 23580
},
{
"epoch": 0.8890815211246372,
"grad_norm": 1.5096914213423873,
"learning_rate": 3.703795176265912e-07,
"loss": 0.4633,
"step": 23590
},
{
"epoch": 0.8894584102815362,
"grad_norm": 1.8984329064476526,
"learning_rate": 3.678988945544976e-07,
"loss": 0.4806,
"step": 23600
},
{
"epoch": 0.8898352994384352,
"grad_norm": 1.941908868591099,
"learning_rate": 3.654262891019067e-07,
"loss": 0.4769,
"step": 23610
},
{
"epoch": 0.8902121885953341,
"grad_norm": 1.8348751421543537,
"learning_rate": 3.6296170554860954e-07,
"loss": 0.4828,
"step": 23620
},
{
"epoch": 0.8905890777522331,
"grad_norm": 1.5016426776021727,
"learning_rate": 3.60505148160506e-07,
"loss": 0.4548,
"step": 23630
},
{
"epoch": 0.890965966909132,
"grad_norm": 1.7145005520740269,
"learning_rate": 3.5805662118960747e-07,
"loss": 0.5038,
"step": 23640
},
{
"epoch": 0.891342856066031,
"grad_norm": 1.705443534960991,
"learning_rate": 3.5561612887402565e-07,
"loss": 0.4417,
"step": 23650
},
{
"epoch": 0.89171974522293,
"grad_norm": 1.4890162691299267,
"learning_rate": 3.531836754379625e-07,
"loss": 0.462,
"step": 23660
},
{
"epoch": 0.8920966343798289,
"grad_norm": 3.142113368721328,
"learning_rate": 3.507592650917091e-07,
"loss": 0.5031,
"step": 23670
},
{
"epoch": 0.8924735235367278,
"grad_norm": 1.6644517980326772,
"learning_rate": 3.48342902031632e-07,
"loss": 0.4791,
"step": 23680
},
{
"epoch": 0.8928504126936267,
"grad_norm": 1.7717700337165432,
"learning_rate": 3.459345904401712e-07,
"loss": 0.4588,
"step": 23690
},
{
"epoch": 0.8932273018505258,
"grad_norm": 1.7866048077368766,
"learning_rate": 3.435343344858283e-07,
"loss": 0.4523,
"step": 23700
},
{
"epoch": 0.8936041910074247,
"grad_norm": 1.6579103062766696,
"learning_rate": 3.411421383231628e-07,
"loss": 0.4613,
"step": 23710
},
{
"epoch": 0.8939810801643236,
"grad_norm": 1.2395669709919346,
"learning_rate": 3.387580060927842e-07,
"loss": 0.4635,
"step": 23720
},
{
"epoch": 0.8943579693212226,
"grad_norm": 1.637561199613229,
"learning_rate": 3.363819419213432e-07,
"loss": 0.4709,
"step": 23730
},
{
"epoch": 0.8947348584781216,
"grad_norm": 1.3235058842010576,
"learning_rate": 3.3401394992152615e-07,
"loss": 0.4477,
"step": 23740
},
{
"epoch": 0.8951117476350205,
"grad_norm": 1.9666315503654384,
"learning_rate": 3.316540341920477e-07,
"loss": 0.5028,
"step": 23750
},
{
"epoch": 0.8954886367919195,
"grad_norm": 1.5304868239681946,
"learning_rate": 3.293021988176409e-07,
"loss": 0.494,
"step": 23760
},
{
"epoch": 0.8958655259488184,
"grad_norm": 1.7286366317517883,
"learning_rate": 3.269584478690574e-07,
"loss": 0.4889,
"step": 23770
},
{
"epoch": 0.8962424151057174,
"grad_norm": 1.3521612831885137,
"learning_rate": 3.2462278540305205e-07,
"loss": 0.4572,
"step": 23780
},
{
"epoch": 0.8966193042626164,
"grad_norm": 1.5149236125626533,
"learning_rate": 3.2229521546238097e-07,
"loss": 0.4456,
"step": 23790
},
{
"epoch": 0.8969961934195153,
"grad_norm": 1.673486048314399,
"learning_rate": 3.199757420757915e-07,
"loss": 0.4624,
"step": 23800
},
{
"epoch": 0.8973730825764142,
"grad_norm": 1.6567774362547631,
"learning_rate": 3.176643692580184e-07,
"loss": 0.4809,
"step": 23810
},
{
"epoch": 0.8977499717333133,
"grad_norm": 1.8358040993755154,
"learning_rate": 3.1536110100977514e-07,
"loss": 0.4801,
"step": 23820
},
{
"epoch": 0.8981268608902122,
"grad_norm": 1.6459275011239465,
"learning_rate": 3.130659413177478e-07,
"loss": 0.4345,
"step": 23830
},
{
"epoch": 0.8985037500471111,
"grad_norm": 1.8349612600952543,
"learning_rate": 3.107788941545842e-07,
"loss": 0.4614,
"step": 23840
},
{
"epoch": 0.8988806392040101,
"grad_norm": 1.7411885119540658,
"learning_rate": 3.0849996347889434e-07,
"loss": 0.4839,
"step": 23850
},
{
"epoch": 0.8992575283609091,
"grad_norm": 1.9747342875954943,
"learning_rate": 3.0622915323523683e-07,
"loss": 0.472,
"step": 23860
},
{
"epoch": 0.899634417517808,
"grad_norm": 1.5726613697693645,
"learning_rate": 3.039664673541165e-07,
"loss": 0.4805,
"step": 23870
},
{
"epoch": 0.900011306674707,
"grad_norm": 1.7695993345825292,
"learning_rate": 3.0171190975197553e-07,
"loss": 0.4646,
"step": 23880
},
{
"epoch": 0.9003881958316059,
"grad_norm": 1.6476558150869725,
"learning_rate": 2.994654843311856e-07,
"loss": 0.4738,
"step": 23890
},
{
"epoch": 0.9007650849885048,
"grad_norm": 1.839852845526703,
"learning_rate": 2.972271949800443e-07,
"loss": 0.4871,
"step": 23900
},
{
"epoch": 0.9011419741454039,
"grad_norm": 1.6296225754693838,
"learning_rate": 2.949970455727652e-07,
"loss": 0.4948,
"step": 23910
},
{
"epoch": 0.9015188633023028,
"grad_norm": 1.446451999177883,
"learning_rate": 2.9277503996947453e-07,
"loss": 0.4458,
"step": 23920
},
{
"epoch": 0.9018957524592017,
"grad_norm": 1.5091359658108487,
"learning_rate": 2.905611820162002e-07,
"loss": 0.4676,
"step": 23930
},
{
"epoch": 0.9022726416161007,
"grad_norm": 1.4935637256342174,
"learning_rate": 2.883554755448692e-07,
"loss": 0.4402,
"step": 23940
},
{
"epoch": 0.9026495307729997,
"grad_norm": 1.7255741199144814,
"learning_rate": 2.861579243732993e-07,
"loss": 0.4814,
"step": 23950
},
{
"epoch": 0.9030264199298986,
"grad_norm": 1.7337660198803386,
"learning_rate": 2.8396853230518993e-07,
"loss": 0.4631,
"step": 23960
},
{
"epoch": 0.9034033090867976,
"grad_norm": 1.8762174059045174,
"learning_rate": 2.8178730313012215e-07,
"loss": 0.4773,
"step": 23970
},
{
"epoch": 0.9037801982436965,
"grad_norm": 1.7366313753924991,
"learning_rate": 2.796142406235447e-07,
"loss": 0.4423,
"step": 23980
},
{
"epoch": 0.9041570874005955,
"grad_norm": 1.6724834106832989,
"learning_rate": 2.7744934854677274e-07,
"loss": 0.4486,
"step": 23990
},
{
"epoch": 0.9045339765574945,
"grad_norm": 1.935280413678451,
"learning_rate": 2.752926306469772e-07,
"loss": 0.4678,
"step": 24000
},
{
"epoch": 0.9049108657143934,
"grad_norm": 1.714097876276615,
"learning_rate": 2.731440906571825e-07,
"loss": 0.4606,
"step": 24010
},
{
"epoch": 0.9052877548712923,
"grad_norm": 1.4926141470560026,
"learning_rate": 2.710037322962572e-07,
"loss": 0.4681,
"step": 24020
},
{
"epoch": 0.9056646440281914,
"grad_norm": 1.722145408901395,
"learning_rate": 2.688715592689101e-07,
"loss": 0.4426,
"step": 24030
},
{
"epoch": 0.9060415331850903,
"grad_norm": 1.9291301430266703,
"learning_rate": 2.6674757526567895e-07,
"loss": 0.4764,
"step": 24040
},
{
"epoch": 0.9064184223419892,
"grad_norm": 1.7351194729472024,
"learning_rate": 2.646317839629292e-07,
"loss": 0.4556,
"step": 24050
},
{
"epoch": 0.9067953114988881,
"grad_norm": 1.3921208085094439,
"learning_rate": 2.625241890228464e-07,
"loss": 0.4437,
"step": 24060
},
{
"epoch": 0.9071722006557872,
"grad_norm": 5.211286582046159,
"learning_rate": 2.6042479409342734e-07,
"loss": 0.4792,
"step": 24070
},
{
"epoch": 0.9075490898126861,
"grad_norm": 1.6508702793082282,
"learning_rate": 2.5833360280847707e-07,
"loss": 0.4854,
"step": 24080
},
{
"epoch": 0.907925978969585,
"grad_norm": 1.5904848357115617,
"learning_rate": 2.562506187876007e-07,
"loss": 0.4795,
"step": 24090
},
{
"epoch": 0.908302868126484,
"grad_norm": 1.6275384635774426,
"learning_rate": 2.5417584563619647e-07,
"loss": 0.4781,
"step": 24100
},
{
"epoch": 0.9086797572833829,
"grad_norm": 1.5228159132695211,
"learning_rate": 2.521092869454528e-07,
"loss": 0.47,
"step": 24110
},
{
"epoch": 0.909056646440282,
"grad_norm": 1.5131657204163729,
"learning_rate": 2.5005094629233726e-07,
"loss": 0.4746,
"step": 24120
},
{
"epoch": 0.9094335355971809,
"grad_norm": 1.803558175951124,
"learning_rate": 2.4800082723959505e-07,
"loss": 0.4672,
"step": 24130
},
{
"epoch": 0.9098104247540798,
"grad_norm": 1.5532182659903644,
"learning_rate": 2.459589333357393e-07,
"loss": 0.465,
"step": 24140
},
{
"epoch": 0.9101873139109787,
"grad_norm": 1.8682246003822802,
"learning_rate": 2.439252681150472e-07,
"loss": 0.4661,
"step": 24150
},
{
"epoch": 0.9105642030678778,
"grad_norm": 1.6186148451233608,
"learning_rate": 2.418998350975543e-07,
"loss": 0.4562,
"step": 24160
},
{
"epoch": 0.9109410922247767,
"grad_norm": 1.9076729059759836,
"learning_rate": 2.398826377890423e-07,
"loss": 0.4791,
"step": 24170
},
{
"epoch": 0.9113179813816756,
"grad_norm": 1.5131142108015385,
"learning_rate": 2.378736796810449e-07,
"loss": 0.4574,
"step": 24180
},
{
"epoch": 0.9116948705385746,
"grad_norm": 1.6267367473954135,
"learning_rate": 2.3587296425082894e-07,
"loss": 0.4434,
"step": 24190
},
{
"epoch": 0.9120717596954736,
"grad_norm": 1.756928999491046,
"learning_rate": 2.338804949613982e-07,
"loss": 0.4753,
"step": 24200
},
{
"epoch": 0.9124486488523725,
"grad_norm": 1.678600318807189,
"learning_rate": 2.3189627526148007e-07,
"loss": 0.4892,
"step": 24210
},
{
"epoch": 0.9128255380092715,
"grad_norm": 1.6170139181507344,
"learning_rate": 2.299203085855234e-07,
"loss": 0.4578,
"step": 24220
},
{
"epoch": 0.9132024271661704,
"grad_norm": 1.5717138547339975,
"learning_rate": 2.279525983536951e-07,
"loss": 0.4751,
"step": 24230
},
{
"epoch": 0.9135793163230694,
"grad_norm": 1.7504585367583836,
"learning_rate": 2.2599314797186857e-07,
"loss": 0.4996,
"step": 24240
},
{
"epoch": 0.9139562054799684,
"grad_norm": 1.8589504523667113,
"learning_rate": 2.2404196083161968e-07,
"loss": 0.464,
"step": 24250
},
{
"epoch": 0.9143330946368673,
"grad_norm": 1.5549339824814492,
"learning_rate": 2.2209904031022356e-07,
"loss": 0.474,
"step": 24260
},
{
"epoch": 0.9147099837937662,
"grad_norm": 1.5431790462672454,
"learning_rate": 2.2016438977064624e-07,
"loss": 0.4688,
"step": 24270
},
{
"epoch": 0.9150868729506653,
"grad_norm": 1.5231675790646082,
"learning_rate": 2.18238012561538e-07,
"loss": 0.4459,
"step": 24280
},
{
"epoch": 0.9154637621075642,
"grad_norm": 1.213036543371949,
"learning_rate": 2.1631991201723102e-07,
"loss": 0.4214,
"step": 24290
},
{
"epoch": 0.9158406512644631,
"grad_norm": 1.6789791239635883,
"learning_rate": 2.1441009145773074e-07,
"loss": 0.4608,
"step": 24300
},
{
"epoch": 0.916217540421362,
"grad_norm": 1.646949624163508,
"learning_rate": 2.1250855418871008e-07,
"loss": 0.4597,
"step": 24310
},
{
"epoch": 0.916594429578261,
"grad_norm": 1.8335341035244128,
"learning_rate": 2.106153035015057e-07,
"loss": 0.4875,
"step": 24320
},
{
"epoch": 0.91697131873516,
"grad_norm": 1.5713652425889508,
"learning_rate": 2.0873034267311131e-07,
"loss": 0.4644,
"step": 24330
},
{
"epoch": 0.917348207892059,
"grad_norm": 1.7316298479198824,
"learning_rate": 2.0685367496617037e-07,
"loss": 0.4834,
"step": 24340
},
{
"epoch": 0.9177250970489579,
"grad_norm": 1.7723989836411413,
"learning_rate": 2.0498530362897283e-07,
"loss": 0.4794,
"step": 24350
},
{
"epoch": 0.9181019862058568,
"grad_norm": 1.5053617798457812,
"learning_rate": 2.031252318954502e-07,
"loss": 0.468,
"step": 24360
},
{
"epoch": 0.9184788753627559,
"grad_norm": 1.6353066163088765,
"learning_rate": 2.0127346298516426e-07,
"loss": 0.5002,
"step": 24370
},
{
"epoch": 0.9188557645196548,
"grad_norm": 1.6543832590665188,
"learning_rate": 1.9943000010331005e-07,
"loss": 0.4606,
"step": 24380
},
{
"epoch": 0.9192326536765537,
"grad_norm": 1.4329476172975082,
"learning_rate": 1.9759484644070347e-07,
"loss": 0.4732,
"step": 24390
},
{
"epoch": 0.9196095428334526,
"grad_norm": 1.803480078827421,
"learning_rate": 1.9576800517377924e-07,
"loss": 0.4525,
"step": 24400
},
{
"epoch": 0.9199864319903517,
"grad_norm": 1.5271475875148783,
"learning_rate": 1.9394947946458243e-07,
"loss": 0.4276,
"step": 24410
},
{
"epoch": 0.9203633211472506,
"grad_norm": 1.7436847616380555,
"learning_rate": 1.9213927246076623e-07,
"loss": 0.4601,
"step": 24420
},
{
"epoch": 0.9207402103041495,
"grad_norm": 1.7878744772222794,
"learning_rate": 1.9033738729558437e-07,
"loss": 0.4736,
"step": 24430
},
{
"epoch": 0.9211170994610485,
"grad_norm": 1.6372286383071935,
"learning_rate": 1.8854382708788976e-07,
"loss": 0.4829,
"step": 24440
},
{
"epoch": 0.9214939886179475,
"grad_norm": 1.8458554633785587,
"learning_rate": 1.8675859494212078e-07,
"loss": 0.4831,
"step": 24450
},
{
"epoch": 0.9218708777748464,
"grad_norm": 1.7303090763015152,
"learning_rate": 1.84981693948304e-07,
"loss": 0.4581,
"step": 24460
},
{
"epoch": 0.9222477669317454,
"grad_norm": 1.6324828539105698,
"learning_rate": 1.8321312718204477e-07,
"loss": 0.4652,
"step": 24470
},
{
"epoch": 0.9226246560886443,
"grad_norm": 1.6706150940449451,
"learning_rate": 1.8145289770452323e-07,
"loss": 0.4862,
"step": 24480
},
{
"epoch": 0.9230015452455432,
"grad_norm": 1.6307388216734264,
"learning_rate": 1.7970100856248896e-07,
"loss": 0.4872,
"step": 24490
},
{
"epoch": 0.9233784344024423,
"grad_norm": 1.584467065772082,
"learning_rate": 1.7795746278825465e-07,
"loss": 0.476,
"step": 24500
},
{
"epoch": 0.9237553235593412,
"grad_norm": 1.5002376627559966,
"learning_rate": 1.7622226339969185e-07,
"loss": 0.4608,
"step": 24510
},
{
"epoch": 0.9241322127162401,
"grad_norm": 1.3947526048448347,
"learning_rate": 1.7449541340022526e-07,
"loss": 0.4641,
"step": 24520
},
{
"epoch": 0.9245091018731391,
"grad_norm": 1.6798334967661213,
"learning_rate": 1.7277691577882892e-07,
"loss": 0.4469,
"step": 24530
},
{
"epoch": 0.9248859910300381,
"grad_norm": 1.589068317695499,
"learning_rate": 1.7106677351001855e-07,
"loss": 0.458,
"step": 24540
},
{
"epoch": 0.925262880186937,
"grad_norm": 1.8869275914521926,
"learning_rate": 1.693649895538485e-07,
"loss": 0.505,
"step": 24550
},
{
"epoch": 0.925639769343836,
"grad_norm": 1.9569489786208991,
"learning_rate": 1.6767156685590536e-07,
"loss": 0.451,
"step": 24560
},
{
"epoch": 0.9260166585007349,
"grad_norm": 1.6847190222460249,
"learning_rate": 1.65986508347305e-07,
"loss": 0.4609,
"step": 24570
},
{
"epoch": 0.9263935476576339,
"grad_norm": 1.8408894133770082,
"learning_rate": 1.6430981694468162e-07,
"loss": 0.4766,
"step": 24580
},
{
"epoch": 0.9267704368145329,
"grad_norm": 1.674533122441784,
"learning_rate": 1.626414955501926e-07,
"loss": 0.45,
"step": 24590
},
{
"epoch": 0.9271473259714318,
"grad_norm": 1.8741720379805382,
"learning_rate": 1.6098154705150416e-07,
"loss": 0.4661,
"step": 24600
},
{
"epoch": 0.9275242151283307,
"grad_norm": 1.733162525973201,
"learning_rate": 1.593299743217913e-07,
"loss": 0.4568,
"step": 24610
},
{
"epoch": 0.9279011042852298,
"grad_norm": 4.339443575234999,
"learning_rate": 1.5768678021973016e-07,
"loss": 0.4862,
"step": 24620
},
{
"epoch": 0.9282779934421287,
"grad_norm": 1.750943994731508,
"learning_rate": 1.5605196758949614e-07,
"loss": 0.4562,
"step": 24630
},
{
"epoch": 0.9286548825990276,
"grad_norm": 1.5174387434569223,
"learning_rate": 1.5442553926075687e-07,
"loss": 0.4362,
"step": 24640
},
{
"epoch": 0.9290317717559265,
"grad_norm": 1.724853168012421,
"learning_rate": 1.528074980486677e-07,
"loss": 0.4665,
"step": 24650
},
{
"epoch": 0.9294086609128256,
"grad_norm": 2.2911044470949418,
"learning_rate": 1.5119784675386607e-07,
"loss": 0.4655,
"step": 24660
},
{
"epoch": 0.9297855500697245,
"grad_norm": 1.7545886816782916,
"learning_rate": 1.4959658816246836e-07,
"loss": 0.4788,
"step": 24670
},
{
"epoch": 0.9301624392266234,
"grad_norm": 1.7138768298267077,
"learning_rate": 1.480037250460642e-07,
"loss": 0.4483,
"step": 24680
},
{
"epoch": 0.9305393283835224,
"grad_norm": 1.5647732024525882,
"learning_rate": 1.4641926016171092e-07,
"loss": 0.4594,
"step": 24690
},
{
"epoch": 0.9309162175404213,
"grad_norm": 1.704892085076998,
"learning_rate": 1.4484319625193033e-07,
"loss": 0.4761,
"step": 24700
},
{
"epoch": 0.9312931066973203,
"grad_norm": 1.8079933221397861,
"learning_rate": 1.4327553604470246e-07,
"loss": 0.4568,
"step": 24710
},
{
"epoch": 0.9316699958542193,
"grad_norm": 3.18841995794234,
"learning_rate": 1.4171628225346234e-07,
"loss": 0.4503,
"step": 24720
},
{
"epoch": 0.9320468850111182,
"grad_norm": 1.645937879683613,
"learning_rate": 1.4016543757709332e-07,
"loss": 0.4734,
"step": 24730
},
{
"epoch": 0.9324237741680171,
"grad_norm": 1.6358083401976105,
"learning_rate": 1.3862300469992484e-07,
"loss": 0.4707,
"step": 24740
},
{
"epoch": 0.9328006633249162,
"grad_norm": 1.6561818821247405,
"learning_rate": 1.3708898629172518e-07,
"loss": 0.4728,
"step": 24750
},
{
"epoch": 0.9331775524818151,
"grad_norm": 1.6439915924189832,
"learning_rate": 1.3556338500769982e-07,
"loss": 0.4671,
"step": 24760
},
{
"epoch": 0.933554441638714,
"grad_norm": 1.3928410136757627,
"learning_rate": 1.3404620348848375e-07,
"loss": 0.4766,
"step": 24770
},
{
"epoch": 0.933931330795613,
"grad_norm": 1.6629613706509152,
"learning_rate": 1.325374443601385e-07,
"loss": 0.4634,
"step": 24780
},
{
"epoch": 0.934308219952512,
"grad_norm": 1.3229160640821136,
"learning_rate": 1.310371102341479e-07,
"loss": 0.4527,
"step": 24790
},
{
"epoch": 0.9346851091094109,
"grad_norm": 1.6985771267573568,
"learning_rate": 1.2954520370741408e-07,
"loss": 0.4449,
"step": 24800
},
{
"epoch": 0.9350619982663099,
"grad_norm": 1.776417571410387,
"learning_rate": 1.280617273622492e-07,
"loss": 0.4278,
"step": 24810
},
{
"epoch": 0.9354388874232088,
"grad_norm": 1.9248752747557138,
"learning_rate": 1.2658668376637705e-07,
"loss": 0.4785,
"step": 24820
},
{
"epoch": 0.9358157765801078,
"grad_norm": 1.5537222976277234,
"learning_rate": 1.251200754729226e-07,
"loss": 0.4956,
"step": 24830
},
{
"epoch": 0.9361926657370068,
"grad_norm": 1.6183831830487874,
"learning_rate": 1.2366190502041186e-07,
"loss": 0.4592,
"step": 24840
},
{
"epoch": 0.9365695548939057,
"grad_norm": 1.7490907041034809,
"learning_rate": 1.222121749327654e-07,
"loss": 0.4916,
"step": 24850
},
{
"epoch": 0.9369464440508046,
"grad_norm": 1.568020359678583,
"learning_rate": 1.2077088771929535e-07,
"loss": 0.459,
"step": 24860
},
{
"epoch": 0.9373233332077037,
"grad_norm": 1.6766487321093162,
"learning_rate": 1.1933804587469843e-07,
"loss": 0.473,
"step": 24870
},
{
"epoch": 0.9377002223646026,
"grad_norm": 1.6215811602508874,
"learning_rate": 1.1791365187905524e-07,
"loss": 0.451,
"step": 24880
},
{
"epoch": 0.9380771115215015,
"grad_norm": 1.7382912119631357,
"learning_rate": 1.1649770819782247e-07,
"loss": 0.4738,
"step": 24890
},
{
"epoch": 0.9384540006784005,
"grad_norm": 1.882585550263763,
"learning_rate": 1.1509021728183301e-07,
"loss": 0.4945,
"step": 24900
},
{
"epoch": 0.9388308898352994,
"grad_norm": 1.412211827907065,
"learning_rate": 1.1369118156728587e-07,
"loss": 0.458,
"step": 24910
},
{
"epoch": 0.9392077789921984,
"grad_norm": 1.8359718454808982,
"learning_rate": 1.1230060347574679e-07,
"loss": 0.4584,
"step": 24920
},
{
"epoch": 0.9395846681490974,
"grad_norm": 1.489126957012612,
"learning_rate": 1.1091848541414262e-07,
"loss": 0.4817,
"step": 24930
},
{
"epoch": 0.9399615573059963,
"grad_norm": 2.0450896007301416,
"learning_rate": 1.0954482977475533e-07,
"loss": 0.4906,
"step": 24940
},
{
"epoch": 0.9403384464628952,
"grad_norm": 1.8209636581396178,
"learning_rate": 1.0817963893522132e-07,
"loss": 0.4844,
"step": 24950
},
{
"epoch": 0.9407153356197943,
"grad_norm": 1.708902383560193,
"learning_rate": 1.0682291525852484e-07,
"loss": 0.4711,
"step": 24960
},
{
"epoch": 0.9410922247766932,
"grad_norm": 3.052634153804913,
"learning_rate": 1.0547466109299298e-07,
"loss": 0.4866,
"step": 24970
},
{
"epoch": 0.9414691139335921,
"grad_norm": 1.6048817510008635,
"learning_rate": 1.0413487877229566e-07,
"loss": 0.4614,
"step": 24980
},
{
"epoch": 0.941846003090491,
"grad_norm": 1.6211384980618553,
"learning_rate": 1.0280357061543622e-07,
"loss": 0.496,
"step": 24990
},
{
"epoch": 0.9422228922473901,
"grad_norm": 1.7208732672554823,
"learning_rate": 1.0148073892675358e-07,
"loss": 0.4515,
"step": 25000
},
{
"epoch": 0.942599781404289,
"grad_norm": 1.242397602277574,
"learning_rate": 1.0016638599591122e-07,
"loss": 0.4522,
"step": 25010
},
{
"epoch": 0.942976670561188,
"grad_norm": 1.864373295532734,
"learning_rate": 9.886051409790042e-08,
"loss": 0.4541,
"step": 25020
},
{
"epoch": 0.9433535597180869,
"grad_norm": 1.7432924180401044,
"learning_rate": 9.756312549302982e-08,
"loss": 0.4807,
"step": 25030
},
{
"epoch": 0.9437304488749859,
"grad_norm": 1.3858687614024912,
"learning_rate": 9.627422242692585e-08,
"loss": 0.4545,
"step": 25040
},
{
"epoch": 0.9441073380318848,
"grad_norm": 1.7270495665971028,
"learning_rate": 9.499380713052785e-08,
"loss": 0.4589,
"step": 25050
},
{
"epoch": 0.9444842271887838,
"grad_norm": 1.5950305399452118,
"learning_rate": 9.372188182008358e-08,
"loss": 0.4564,
"step": 25060
},
{
"epoch": 0.9448611163456827,
"grad_norm": 1.7644110153181087,
"learning_rate": 9.245844869714471e-08,
"loss": 0.4859,
"step": 25070
},
{
"epoch": 0.9452380055025816,
"grad_norm": 1.440552445480469,
"learning_rate": 9.120350994856475e-08,
"loss": 0.4771,
"step": 25080
},
{
"epoch": 0.9456148946594807,
"grad_norm": 3.239934112076224,
"learning_rate": 8.995706774649504e-08,
"loss": 0.4731,
"step": 25090
},
{
"epoch": 0.9459917838163796,
"grad_norm": 1.9158332648079843,
"learning_rate": 8.87191242483787e-08,
"loss": 0.4597,
"step": 25100
},
{
"epoch": 0.9463686729732785,
"grad_norm": 1.5922507680686822,
"learning_rate": 8.748968159695004e-08,
"loss": 0.4316,
"step": 25110
},
{
"epoch": 0.9467455621301775,
"grad_norm": 1.792335011843171,
"learning_rate": 8.626874192022905e-08,
"loss": 0.5036,
"step": 25120
},
{
"epoch": 0.9471224512870765,
"grad_norm": 1.4714034304995909,
"learning_rate": 8.505630733151803e-08,
"loss": 0.453,
"step": 25130
},
{
"epoch": 0.9474993404439754,
"grad_norm": 1.4587868542295341,
"learning_rate": 8.385237992939777e-08,
"loss": 0.4731,
"step": 25140
},
{
"epoch": 0.9478762296008744,
"grad_norm": 1.7817219201477925,
"learning_rate": 8.265696179772465e-08,
"loss": 0.4456,
"step": 25150
},
{
"epoch": 0.9482531187577733,
"grad_norm": 1.621641692161085,
"learning_rate": 8.147005500562577e-08,
"loss": 0.4435,
"step": 25160
},
{
"epoch": 0.9486300079146723,
"grad_norm": 1.6494310365282547,
"learning_rate": 8.029166160749668e-08,
"loss": 0.4665,
"step": 25170
},
{
"epoch": 0.9490068970715713,
"grad_norm": 1.4731323966452758,
"learning_rate": 7.912178364299694e-08,
"loss": 0.4633,
"step": 25180
},
{
"epoch": 0.9493837862284702,
"grad_norm": 1.4539950989850374,
"learning_rate": 7.796042313704733e-08,
"loss": 0.4761,
"step": 25190
},
{
"epoch": 0.9497606753853691,
"grad_norm": 1.7614354811781805,
"learning_rate": 7.680758209982541e-08,
"loss": 0.4874,
"step": 25200
},
{
"epoch": 0.9501375645422682,
"grad_norm": 1.5146197995816046,
"learning_rate": 7.566326252676226e-08,
"loss": 0.4565,
"step": 25210
},
{
"epoch": 0.9505144536991671,
"grad_norm": 1.4964331021798207,
"learning_rate": 7.452746639854069e-08,
"loss": 0.4732,
"step": 25220
},
{
"epoch": 0.950891342856066,
"grad_norm": 1.6531417972845415,
"learning_rate": 7.34001956810898e-08,
"loss": 0.4539,
"step": 25230
},
{
"epoch": 0.951268232012965,
"grad_norm": 1.4443997141550056,
"learning_rate": 7.228145232558048e-08,
"loss": 0.4651,
"step": 25240
},
{
"epoch": 0.951645121169864,
"grad_norm": 1.6488863792857795,
"learning_rate": 7.117123826842598e-08,
"loss": 0.4655,
"step": 25250
},
{
"epoch": 0.9520220103267629,
"grad_norm": 2.066125366240161,
"learning_rate": 7.00695554312758e-08,
"loss": 0.4818,
"step": 25260
},
{
"epoch": 0.9523988994836619,
"grad_norm": 1.7989280362677504,
"learning_rate": 6.897640572101294e-08,
"loss": 0.4592,
"step": 25270
},
{
"epoch": 0.9527757886405608,
"grad_norm": 1.5907695066167318,
"learning_rate": 6.789179102974996e-08,
"loss": 0.4615,
"step": 25280
},
{
"epoch": 0.9531526777974597,
"grad_norm": 1.6474294146755808,
"learning_rate": 6.681571323482628e-08,
"loss": 0.4745,
"step": 25290
},
{
"epoch": 0.9535295669543588,
"grad_norm": 1.8575461910907407,
"learning_rate": 6.574817419880586e-08,
"loss": 0.4843,
"step": 25300
},
{
"epoch": 0.9539064561112577,
"grad_norm": 1.5427872527092095,
"learning_rate": 6.468917576947287e-08,
"loss": 0.4616,
"step": 25310
},
{
"epoch": 0.9542833452681566,
"grad_norm": 2.4865362259718116,
"learning_rate": 6.363871977982827e-08,
"loss": 0.4617,
"step": 25320
},
{
"epoch": 0.9546602344250555,
"grad_norm": 1.7543026874769245,
"learning_rate": 6.259680804808654e-08,
"loss": 0.4666,
"step": 25330
},
{
"epoch": 0.9550371235819546,
"grad_norm": 1.604570679456589,
"learning_rate": 6.156344237767453e-08,
"loss": 0.494,
"step": 25340
},
{
"epoch": 0.9554140127388535,
"grad_norm": 1.8669803638424052,
"learning_rate": 6.053862455722593e-08,
"loss": 0.4654,
"step": 25350
},
{
"epoch": 0.9557909018957524,
"grad_norm": 1.3581166661184105,
"learning_rate": 5.952235636057902e-08,
"loss": 0.4403,
"step": 25360
},
{
"epoch": 0.9561677910526514,
"grad_norm": 1.5400521124436184,
"learning_rate": 5.851463954677394e-08,
"loss": 0.4653,
"step": 25370
},
{
"epoch": 0.9565446802095504,
"grad_norm": 1.8105900226878286,
"learning_rate": 5.7515475860049354e-08,
"loss": 0.4874,
"step": 25380
},
{
"epoch": 0.9569215693664493,
"grad_norm": 1.4382499444470058,
"learning_rate": 5.652486702984017e-08,
"loss": 0.4648,
"step": 25390
},
{
"epoch": 0.9572984585233483,
"grad_norm": 1.7631024504492547,
"learning_rate": 5.554281477077206e-08,
"loss": 0.4704,
"step": 25400
},
{
"epoch": 0.9576753476802472,
"grad_norm": 1.6429543944439136,
"learning_rate": 5.456932078266197e-08,
"loss": 0.4655,
"step": 25410
},
{
"epoch": 0.9580522368371462,
"grad_norm": 1.6616938198698554,
"learning_rate": 5.360438675051316e-08,
"loss": 0.4773,
"step": 25420
},
{
"epoch": 0.9584291259940452,
"grad_norm": 1.7874329868405086,
"learning_rate": 5.264801434451239e-08,
"loss": 0.4466,
"step": 25430
},
{
"epoch": 0.9588060151509441,
"grad_norm": 1.2294510476080451,
"learning_rate": 5.170020522002661e-08,
"loss": 0.4711,
"step": 25440
},
{
"epoch": 0.959182904307843,
"grad_norm": 1.9210918144126727,
"learning_rate": 5.0760961017602415e-08,
"loss": 0.4725,
"step": 25450
},
{
"epoch": 0.9595597934647421,
"grad_norm": 1.8227209874755572,
"learning_rate": 4.983028336295881e-08,
"loss": 0.4476,
"step": 25460
},
{
"epoch": 0.959936682621641,
"grad_norm": 1.788350611886206,
"learning_rate": 4.8908173866990535e-08,
"loss": 0.4567,
"step": 25470
},
{
"epoch": 0.9603135717785399,
"grad_norm": 1.775020734539053,
"learning_rate": 4.799463412575978e-08,
"loss": 0.5112,
"step": 25480
},
{
"epoch": 0.9606904609354389,
"grad_norm": 1.5375809927016806,
"learning_rate": 4.7089665720495e-08,
"loss": 0.4391,
"step": 25490
},
{
"epoch": 0.9610673500923378,
"grad_norm": 1.5639240748918983,
"learning_rate": 4.619327021759046e-08,
"loss": 0.4785,
"step": 25500
},
{
"epoch": 0.9614442392492368,
"grad_norm": 1.764963080417895,
"learning_rate": 4.5305449168600024e-08,
"loss": 0.4736,
"step": 25510
},
{
"epoch": 0.9618211284061358,
"grad_norm": 1.6562005841477103,
"learning_rate": 4.4426204110237794e-08,
"loss": 0.4747,
"step": 25520
},
{
"epoch": 0.9621980175630347,
"grad_norm": 1.6196235223477182,
"learning_rate": 4.35555365643725e-08,
"loss": 0.4522,
"step": 25530
},
{
"epoch": 0.9625749067199336,
"grad_norm": 1.5383045592640703,
"learning_rate": 4.2693448038026996e-08,
"loss": 0.4704,
"step": 25540
},
{
"epoch": 0.9629517958768327,
"grad_norm": 1.8859648544559755,
"learning_rate": 4.1839940023374884e-08,
"loss": 0.4595,
"step": 25550
},
{
"epoch": 0.9633286850337316,
"grad_norm": 1.703004713270723,
"learning_rate": 4.0995013997736644e-08,
"loss": 0.4767,
"step": 25560
},
{
"epoch": 0.9637055741906305,
"grad_norm": 1.76025981546061,
"learning_rate": 4.015867142358076e-08,
"loss": 0.4687,
"step": 25570
},
{
"epoch": 0.9640824633475295,
"grad_norm": 1.9694305314081857,
"learning_rate": 3.933091374851594e-08,
"loss": 0.4558,
"step": 25580
},
{
"epoch": 0.9644593525044285,
"grad_norm": 1.3980420080759448,
"learning_rate": 3.8511742405293875e-08,
"loss": 0.4618,
"step": 25590
},
{
"epoch": 0.9648362416613274,
"grad_norm": 1.6493052417006149,
"learning_rate": 3.7701158811803694e-08,
"loss": 0.504,
"step": 25600
},
{
"epoch": 0.9652131308182264,
"grad_norm": 1.619654736340804,
"learning_rate": 3.6899164371068105e-08,
"loss": 0.4445,
"step": 25610
},
{
"epoch": 0.9655900199751253,
"grad_norm": 1.843793532222181,
"learning_rate": 3.610576047124614e-08,
"loss": 0.4668,
"step": 25620
},
{
"epoch": 0.9659669091320243,
"grad_norm": 2.1787530344860424,
"learning_rate": 3.5320948485625395e-08,
"loss": 0.4555,
"step": 25630
},
{
"epoch": 0.9663437982889233,
"grad_norm": 1.6664586339835825,
"learning_rate": 3.454472977262369e-08,
"loss": 0.4812,
"step": 25640
},
{
"epoch": 0.9667206874458222,
"grad_norm": 1.9221822760952356,
"learning_rate": 3.3777105675782965e-08,
"loss": 0.4762,
"step": 25650
},
{
"epoch": 0.9670975766027211,
"grad_norm": 1.5535350148707952,
"learning_rate": 3.3018077523769844e-08,
"loss": 0.4743,
"step": 25660
},
{
"epoch": 0.9674744657596202,
"grad_norm": 1.6826206259214103,
"learning_rate": 3.226764663037285e-08,
"loss": 0.4819,
"step": 25670
},
{
"epoch": 0.9678513549165191,
"grad_norm": 1.5372332783224485,
"learning_rate": 3.152581429449853e-08,
"loss": 0.4529,
"step": 25680
},
{
"epoch": 0.968228244073418,
"grad_norm": 1.6814181811804552,
"learning_rate": 3.079258180017142e-08,
"loss": 0.4824,
"step": 25690
},
{
"epoch": 0.9686051332303169,
"grad_norm": 1.7034619827760054,
"learning_rate": 3.006795041653021e-08,
"loss": 0.4618,
"step": 25700
},
{
"epoch": 0.9689820223872159,
"grad_norm": 2.003962281008904,
"learning_rate": 2.9351921397826055e-08,
"loss": 0.4614,
"step": 25710
},
{
"epoch": 0.9693589115441149,
"grad_norm": 1.787441426014292,
"learning_rate": 2.864449598342034e-08,
"loss": 0.4533,
"step": 25720
},
{
"epoch": 0.9697358007010138,
"grad_norm": 1.5820307731521495,
"learning_rate": 2.794567539778359e-08,
"loss": 0.4599,
"step": 25730
},
{
"epoch": 0.9701126898579128,
"grad_norm": 1.7447057484780453,
"learning_rate": 2.725546085049047e-08,
"loss": 0.4572,
"step": 25740
},
{
"epoch": 0.9704895790148117,
"grad_norm": 1.7232041715746915,
"learning_rate": 2.6573853536221992e-08,
"loss": 0.475,
"step": 25750
},
{
"epoch": 0.9708664681717107,
"grad_norm": 1.6685687883415614,
"learning_rate": 2.590085463475944e-08,
"loss": 0.4587,
"step": 25760
},
{
"epoch": 0.9712433573286097,
"grad_norm": 1.7482487866337566,
"learning_rate": 2.5236465310984336e-08,
"loss": 0.4702,
"step": 25770
},
{
"epoch": 0.9716202464855086,
"grad_norm": 1.7353631005801522,
"learning_rate": 2.458068671487568e-08,
"loss": 0.462,
"step": 25780
},
{
"epoch": 0.9719971356424075,
"grad_norm": 1.9350104216433035,
"learning_rate": 2.3933519981508834e-08,
"loss": 0.4588,
"step": 25790
},
{
"epoch": 0.9723740247993066,
"grad_norm": 1.6857080051807094,
"learning_rate": 2.3294966231053873e-08,
"loss": 0.468,
"step": 25800
},
{
"epoch": 0.9727509139562055,
"grad_norm": 1.7680489987031007,
"learning_rate": 2.266502656877001e-08,
"loss": 0.4694,
"step": 25810
},
{
"epoch": 0.9731278031131044,
"grad_norm": 1.4689352131990807,
"learning_rate": 2.2043702085010056e-08,
"loss": 0.4501,
"step": 25820
},
{
"epoch": 0.9735046922700034,
"grad_norm": 1.5690350494704872,
"learning_rate": 2.1430993855212635e-08,
"loss": 0.4789,
"step": 25830
},
{
"epoch": 0.9738815814269024,
"grad_norm": 1.4356673233784094,
"learning_rate": 2.0826902939903304e-08,
"loss": 0.4585,
"step": 25840
},
{
"epoch": 0.9742584705838013,
"grad_norm": 1.757515012502445,
"learning_rate": 2.023143038469233e-08,
"loss": 0.4944,
"step": 25850
},
{
"epoch": 0.9746353597407003,
"grad_norm": 1.6306453233507494,
"learning_rate": 1.9644577220271354e-08,
"loss": 0.4539,
"step": 25860
},
{
"epoch": 0.9750122488975992,
"grad_norm": 1.4314817446192643,
"learning_rate": 1.906634446241451e-08,
"loss": 0.4734,
"step": 25870
},
{
"epoch": 0.9753891380544981,
"grad_norm": 1.5806180685416475,
"learning_rate": 1.849673311197453e-08,
"loss": 0.4399,
"step": 25880
},
{
"epoch": 0.9757660272113972,
"grad_norm": 1.6520753364457346,
"learning_rate": 1.7935744154881087e-08,
"loss": 0.4566,
"step": 25890
},
{
"epoch": 0.9761429163682961,
"grad_norm": 1.810709467270653,
"learning_rate": 1.7383378562139674e-08,
"loss": 0.4487,
"step": 25900
},
{
"epoch": 0.976519805525195,
"grad_norm": 1.61252056021322,
"learning_rate": 1.683963728983051e-08,
"loss": 0.4715,
"step": 25910
},
{
"epoch": 0.976896694682094,
"grad_norm": 2.425481703892548,
"learning_rate": 1.630452127910409e-08,
"loss": 0.4637,
"step": 25920
},
{
"epoch": 0.977273583838993,
"grad_norm": 1.540972978097639,
"learning_rate": 1.5778031456184507e-08,
"loss": 0.4608,
"step": 25930
},
{
"epoch": 0.9776504729958919,
"grad_norm": 1.509850379830826,
"learning_rate": 1.5260168732362245e-08,
"loss": 0.4722,
"step": 25940
},
{
"epoch": 0.9780273621527908,
"grad_norm": 1.6782554143346065,
"learning_rate": 1.4750934003996965e-08,
"loss": 0.4587,
"step": 25950
},
{
"epoch": 0.9784042513096898,
"grad_norm": 1.5550524539915491,
"learning_rate": 1.4250328152514147e-08,
"loss": 0.4865,
"step": 25960
},
{
"epoch": 0.9787811404665888,
"grad_norm": 1.6682526527444668,
"learning_rate": 1.3758352044402345e-08,
"loss": 0.4975,
"step": 25970
},
{
"epoch": 0.9791580296234877,
"grad_norm": 1.613736685118058,
"learning_rate": 1.3275006531215384e-08,
"loss": 0.4624,
"step": 25980
},
{
"epoch": 0.9795349187803867,
"grad_norm": 1.6329195096750337,
"learning_rate": 1.2800292449566265e-08,
"loss": 0.4681,
"step": 25990
},
{
"epoch": 0.9799118079372856,
"grad_norm": 1.7759191946586195,
"learning_rate": 1.2334210621128827e-08,
"loss": 0.4862,
"step": 26000
},
{
"epoch": 0.9802886970941846,
"grad_norm": 1.6432481031917991,
"learning_rate": 1.1876761852636642e-08,
"loss": 0.472,
"step": 26010
},
{
"epoch": 0.9806655862510836,
"grad_norm": 1.902773292717396,
"learning_rate": 1.1427946935878009e-08,
"loss": 0.4545,
"step": 26020
},
{
"epoch": 0.9810424754079825,
"grad_norm": 1.6705242880751991,
"learning_rate": 1.0987766647699849e-08,
"loss": 0.4702,
"step": 26030
},
{
"epoch": 0.9814193645648814,
"grad_norm": 1.971413464598701,
"learning_rate": 1.055622175000104e-08,
"loss": 0.4794,
"step": 26040
},
{
"epoch": 0.9817962537217805,
"grad_norm": 1.6534906902045443,
"learning_rate": 1.01333129897363e-08,
"loss": 0.4578,
"step": 26050
},
{
"epoch": 0.9821731428786794,
"grad_norm": 1.54977127095544,
"learning_rate": 9.719041098909532e-09,
"loss": 0.4777,
"step": 26060
},
{
"epoch": 0.9825500320355783,
"grad_norm": 1.6720729481583012,
"learning_rate": 9.31340679457604e-09,
"loss": 0.4823,
"step": 26070
},
{
"epoch": 0.9829269211924773,
"grad_norm": 1.558146371421349,
"learning_rate": 8.916410778841978e-09,
"loss": 0.491,
"step": 26080
},
{
"epoch": 0.9833038103493762,
"grad_norm": 1.4251003076555135,
"learning_rate": 8.528053738860453e-09,
"loss": 0.4644,
"step": 26090
},
{
"epoch": 0.9836806995062752,
"grad_norm": 1.8009507558620492,
"learning_rate": 8.148336346830987e-09,
"loss": 0.4888,
"step": 26100
},
{
"epoch": 0.9840575886631742,
"grad_norm": 1.814480801176586,
"learning_rate": 7.777259259999503e-09,
"loss": 0.4629,
"step": 26110
},
{
"epoch": 0.9844344778200731,
"grad_norm": 2.196528914285027,
"learning_rate": 7.4148231206566665e-09,
"loss": 0.4731,
"step": 26120
},
{
"epoch": 0.984811366976972,
"grad_norm": 1.7187541640111528,
"learning_rate": 7.061028556136773e-09,
"loss": 0.4639,
"step": 26130
},
{
"epoch": 0.9851882561338711,
"grad_norm": 1.5937258075062353,
"learning_rate": 6.715876178816638e-09,
"loss": 0.4674,
"step": 26140
},
{
"epoch": 0.98556514529077,
"grad_norm": 1.8506500710664755,
"learning_rate": 6.379366586113933e-09,
"loss": 0.4652,
"step": 26150
},
{
"epoch": 0.9859420344476689,
"grad_norm": 1.6855411533697344,
"learning_rate": 6.051500360486628e-09,
"loss": 0.4527,
"step": 26160
},
{
"epoch": 0.9863189236045679,
"grad_norm": 1.5117238865140932,
"learning_rate": 5.732278069432995e-09,
"loss": 0.4779,
"step": 26170
},
{
"epoch": 0.9866958127614669,
"grad_norm": 1.6187884945554707,
"learning_rate": 5.421700265488827e-09,
"loss": 0.4611,
"step": 26180
},
{
"epoch": 0.9870727019183658,
"grad_norm": 1.3918686813033736,
"learning_rate": 5.119767486228e-09,
"loss": 0.4547,
"step": 26190
},
{
"epoch": 0.9874495910752648,
"grad_norm": 1.6724439609048405,
"learning_rate": 4.826480254259691e-09,
"loss": 0.4562,
"step": 26200
},
{
"epoch": 0.9878264802321637,
"grad_norm": 1.540870827083842,
"learning_rate": 4.541839077230048e-09,
"loss": 0.4569,
"step": 26210
},
{
"epoch": 0.9882033693890627,
"grad_norm": 1.5544229067227138,
"learning_rate": 4.265844447818856e-09,
"loss": 0.4467,
"step": 26220
},
{
"epoch": 0.9885802585459617,
"grad_norm": 1.6957676041843368,
"learning_rate": 3.9984968437406515e-09,
"loss": 0.4758,
"step": 26230
},
{
"epoch": 0.9889571477028606,
"grad_norm": 1.4992330073607234,
"learning_rate": 3.73979672774194e-09,
"loss": 0.4748,
"step": 26240
},
{
"epoch": 0.9893340368597595,
"grad_norm": 1.8025093840339432,
"learning_rate": 3.4897445476028692e-09,
"loss": 0.4879,
"step": 26250
},
{
"epoch": 0.9897109260166586,
"grad_norm": 1.8270840898752077,
"learning_rate": 3.2483407361338926e-09,
"loss": 0.4738,
"step": 26260
},
{
"epoch": 0.9900878151735575,
"grad_norm": 1.6829652388909495,
"learning_rate": 3.0155857111757724e-09,
"loss": 0.456,
"step": 26270
},
{
"epoch": 0.9904647043304564,
"grad_norm": 1.5636345290979268,
"learning_rate": 2.791479875600689e-09,
"loss": 0.454,
"step": 26280
},
{
"epoch": 0.9908415934873553,
"grad_norm": 1.4293143834926183,
"learning_rate": 2.5760236173094643e-09,
"loss": 0.4662,
"step": 26290
},
{
"epoch": 0.9912184826442543,
"grad_norm": 1.5866582767032813,
"learning_rate": 2.369217309231009e-09,
"loss": 0.4303,
"step": 26300
},
{
"epoch": 0.9915953718011533,
"grad_norm": 1.6824826996769227,
"learning_rate": 2.171061309322875e-09,
"loss": 0.4625,
"step": 26310
},
{
"epoch": 0.9919722609580522,
"grad_norm": 1.4469762688853414,
"learning_rate": 1.981555960569037e-09,
"loss": 0.4659,
"step": 26320
},
{
"epoch": 0.9923491501149512,
"grad_norm": 1.863209068946735,
"learning_rate": 1.8007015909815574e-09,
"loss": 0.4444,
"step": 26330
},
{
"epoch": 0.9927260392718501,
"grad_norm": 1.3864226632030934,
"learning_rate": 1.6284985135978093e-09,
"loss": 0.4594,
"step": 26340
},
{
"epoch": 0.9931029284287491,
"grad_norm": 1.8507882555475772,
"learning_rate": 1.4649470264810339e-09,
"loss": 0.47,
"step": 26350
},
{
"epoch": 0.9934798175856481,
"grad_norm": 1.727307667076328,
"learning_rate": 1.3100474127192285e-09,
"loss": 0.4829,
"step": 26360
},
{
"epoch": 0.993856706742547,
"grad_norm": 1.5872381530005595,
"learning_rate": 1.1637999404257027e-09,
"loss": 0.4752,
"step": 26370
},
{
"epoch": 0.9942335958994459,
"grad_norm": 1.7561750947121424,
"learning_rate": 1.0262048627374121e-09,
"loss": 0.4572,
"step": 26380
},
{
"epoch": 0.994610485056345,
"grad_norm": 1.523030077019712,
"learning_rate": 8.972624178149592e-10,
"loss": 0.4498,
"step": 26390
},
{
"epoch": 0.9949873742132439,
"grad_norm": 1.8336481206683812,
"learning_rate": 7.769728288420375e-10,
"loss": 0.4931,
"step": 26400
},
{
"epoch": 0.9953642633701428,
"grad_norm": 1.6093654004058522,
"learning_rate": 6.653363040270978e-10,
"loss": 0.4906,
"step": 26410
},
{
"epoch": 0.9957411525270418,
"grad_norm": 1.168615916694283,
"learning_rate": 5.62353036598351e-10,
"loss": 0.4779,
"step": 26420
},
{
"epoch": 0.9961180416839408,
"grad_norm": 1.55589136590255,
"learning_rate": 4.680232048087652e-10,
"loss": 0.4741,
"step": 26430
},
{
"epoch": 0.9964949308408397,
"grad_norm": 1.7754177648108063,
"learning_rate": 3.823469719316242e-10,
"loss": 0.4721,
"step": 26440
},
{
"epoch": 0.9968718199977387,
"grad_norm": 1.6083002564281654,
"learning_rate": 3.053244862616378e-10,
"loss": 0.4733,
"step": 26450
},
{
"epoch": 0.9972487091546376,
"grad_norm": 1.7559817189307763,
"learning_rate": 2.369558811171624e-10,
"loss": 0.4724,
"step": 26460
},
{
"epoch": 0.9976255983115366,
"grad_norm": 1.6315234881797243,
"learning_rate": 1.772412748352048e-10,
"loss": 0.4552,
"step": 26470
},
{
"epoch": 0.9980024874684356,
"grad_norm": 1.8229450451292533,
"learning_rate": 1.26180770774198e-10,
"loss": 0.475,
"step": 26480
},
{
"epoch": 0.9983793766253345,
"grad_norm": 1.6948887666570254,
"learning_rate": 8.377445731511114e-11,
"loss": 0.4713,
"step": 26490
},
{
"epoch": 0.9987562657822334,
"grad_norm": 1.6821945503509856,
"learning_rate": 5.002240785756396e-11,
"loss": 0.4639,
"step": 26500
},
{
"epoch": 0.9991331549391324,
"grad_norm": 1.5658822340781955,
"learning_rate": 2.4924680822602242e-11,
"loss": 0.4707,
"step": 26510
},
{
"epoch": 0.9995100440960314,
"grad_norm": 1.8251970618778395,
"learning_rate": 8.481319651032493e-12,
"loss": 0.4814,
"step": 26520
},
{
"epoch": 0.9998869332529303,
"grad_norm": 1.5466187547604036,
"learning_rate": 6.923528045321703e-13,
"loss": 0.4557,
"step": 26530
},
{
"epoch": 1.0,
"step": 26533,
"total_flos": 1663266888056832.0,
"train_loss": 0.5075337708668052,
"train_runtime": 63984.8167,
"train_samples_per_second": 3.317,
"train_steps_per_second": 0.415
}
],
"logging_steps": 10,
"max_steps": 26533,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 300,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1663266888056832.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}