| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 26533, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.000376889156898956, |
| "grad_norm": 3.3290820403013632, |
| "learning_rate": 3.391107761868877e-08, |
| "loss": 0.8407, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.000753778313797912, |
| "grad_norm": 2.5764460749760327, |
| "learning_rate": 7.15900527505652e-08, |
| "loss": 0.8487, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0011306674706968681, |
| "grad_norm": 3.1381979941321165, |
| "learning_rate": 1.092690278824416e-07, |
| "loss": 0.8437, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.001507556627595824, |
| "grad_norm": 2.9762629392340583, |
| "learning_rate": 1.4694800301431802e-07, |
| "loss": 0.8462, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0018844457844947801, |
| "grad_norm": 3.8702318858093343, |
| "learning_rate": 1.8462697814619442e-07, |
| "loss": 0.8557, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0022613349413937362, |
| "grad_norm": 2.567740305342297, |
| "learning_rate": 2.2230595327807085e-07, |
| "loss": 0.7769, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.002638224098292692, |
| "grad_norm": 2.3831563848971293, |
| "learning_rate": 2.5998492840994723e-07, |
| "loss": 0.825, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.003015113255191648, |
| "grad_norm": 2.2771940101499366, |
| "learning_rate": 2.976639035418237e-07, |
| "loss": 0.8116, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.003392002412090604, |
| "grad_norm": 1.5832075179588132, |
| "learning_rate": 3.353428786737001e-07, |
| "loss": 0.7362, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0037688915689895602, |
| "grad_norm": 2.3176022862671526, |
| "learning_rate": 3.7302185380557655e-07, |
| "loss": 0.776, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.004145780725888516, |
| "grad_norm": 2.3560080639273617, |
| "learning_rate": 4.107008289374529e-07, |
| "loss": 0.7496, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0045226698827874725, |
| "grad_norm": 1.439480969015099, |
| "learning_rate": 4.4837980406932935e-07, |
| "loss": 0.7045, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.004899559039686428, |
| "grad_norm": 1.9309695894872856, |
| "learning_rate": 4.860587792012058e-07, |
| "loss": 0.7354, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.005276448196585384, |
| "grad_norm": 1.9566795366950147, |
| "learning_rate": 5.237377543330822e-07, |
| "loss": 0.7171, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.00565333735348434, |
| "grad_norm": 1.9730496032128708, |
| "learning_rate": 5.614167294649587e-07, |
| "loss": 0.7264, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.006030226510383296, |
| "grad_norm": 1.5644394630847538, |
| "learning_rate": 5.99095704596835e-07, |
| "loss": 0.6523, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.006407115667282253, |
| "grad_norm": 1.8613164375921365, |
| "learning_rate": 6.367746797287114e-07, |
| "loss": 0.6955, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.006784004824181208, |
| "grad_norm": 2.0520868176808023, |
| "learning_rate": 6.744536548605879e-07, |
| "loss": 0.7115, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.007160893981080164, |
| "grad_norm": 1.9416771515631597, |
| "learning_rate": 7.121326299924643e-07, |
| "loss": 0.6734, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.0075377831379791205, |
| "grad_norm": 2.4867970424407413, |
| "learning_rate": 7.498116051243407e-07, |
| "loss": 0.6785, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.007914672294878076, |
| "grad_norm": 1.7774834832854693, |
| "learning_rate": 7.874905802562172e-07, |
| "loss": 0.6227, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.008291561451777032, |
| "grad_norm": 1.9812937141799203, |
| "learning_rate": 8.251695553880935e-07, |
| "loss": 0.6806, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.00866845060867599, |
| "grad_norm": 2.921310201335651, |
| "learning_rate": 8.628485305199699e-07, |
| "loss": 0.6409, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.009045339765574945, |
| "grad_norm": 2.158350529344399, |
| "learning_rate": 9.005275056518463e-07, |
| "loss": 0.6757, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.0094222289224739, |
| "grad_norm": 2.0780037382003784, |
| "learning_rate": 9.382064807837228e-07, |
| "loss": 0.6759, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.009799118079372856, |
| "grad_norm": 1.9015358166448175, |
| "learning_rate": 9.758854559155991e-07, |
| "loss": 0.5856, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.010176007236271812, |
| "grad_norm": 1.7097668450833368, |
| "learning_rate": 1.0135644310474755e-06, |
| "loss": 0.6735, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.010552896393170768, |
| "grad_norm": 1.9842362284855692, |
| "learning_rate": 1.051243406179352e-06, |
| "loss": 0.6286, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.010929785550069725, |
| "grad_norm": 1.6319392350237532, |
| "learning_rate": 1.0889223813112285e-06, |
| "loss": 0.5993, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.01130667470696868, |
| "grad_norm": 1.9629194279960003, |
| "learning_rate": 1.126601356443105e-06, |
| "loss": 0.5989, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.011683563863867636, |
| "grad_norm": 1.635011424220465, |
| "learning_rate": 1.1642803315749811e-06, |
| "loss": 0.6292, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.012060453020766592, |
| "grad_norm": 2.7449123371579023, |
| "learning_rate": 1.2019593067068578e-06, |
| "loss": 0.6444, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.012437342177665548, |
| "grad_norm": 1.7611175230514056, |
| "learning_rate": 1.2396382818387342e-06, |
| "loss": 0.6001, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.012814231334564505, |
| "grad_norm": 1.7299587835974657, |
| "learning_rate": 1.2773172569706106e-06, |
| "loss": 0.6373, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.01319112049146346, |
| "grad_norm": 2.1190034707614367, |
| "learning_rate": 1.314996232102487e-06, |
| "loss": 0.6386, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.013568009648362417, |
| "grad_norm": 1.6507907146985035, |
| "learning_rate": 1.3526752072343632e-06, |
| "loss": 0.6025, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.013944898805261372, |
| "grad_norm": 1.6248644379536064, |
| "learning_rate": 1.3903541823662398e-06, |
| "loss": 0.633, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.014321787962160328, |
| "grad_norm": 1.8214328463856349, |
| "learning_rate": 1.4280331574981162e-06, |
| "loss": 0.6231, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.014698677119059285, |
| "grad_norm": 1.7233452084340422, |
| "learning_rate": 1.4657121326299926e-06, |
| "loss": 0.6086, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.015075566275958241, |
| "grad_norm": 1.9949384108347457, |
| "learning_rate": 1.503391107761869e-06, |
| "loss": 0.5944, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.015452455432857197, |
| "grad_norm": 1.7891389236926964, |
| "learning_rate": 1.5410700828937456e-06, |
| "loss": 0.5935, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.015829344589756152, |
| "grad_norm": 1.9798760306416079, |
| "learning_rate": 1.5787490580256218e-06, |
| "loss": 0.6405, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.01620623374665511, |
| "grad_norm": 1.8498442500498027, |
| "learning_rate": 1.6164280331574982e-06, |
| "loss": 0.5948, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.016583122903554064, |
| "grad_norm": 1.7813340027075553, |
| "learning_rate": 1.6541070082893746e-06, |
| "loss": 0.6369, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.01696001206045302, |
| "grad_norm": 1.9276634385297304, |
| "learning_rate": 1.691785983421251e-06, |
| "loss": 0.5752, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.01733690121735198, |
| "grad_norm": 1.6512215587375432, |
| "learning_rate": 1.7294649585531276e-06, |
| "loss": 0.5961, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.017713790374250932, |
| "grad_norm": 1.80925998198896, |
| "learning_rate": 1.7671439336850038e-06, |
| "loss": 0.6343, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.01809067953114989, |
| "grad_norm": 1.837322385048155, |
| "learning_rate": 1.8048229088168804e-06, |
| "loss": 0.6125, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.018467568688048844, |
| "grad_norm": 1.976044009740098, |
| "learning_rate": 1.8425018839487568e-06, |
| "loss": 0.5939, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.0188444578449478, |
| "grad_norm": 2.16596793862554, |
| "learning_rate": 1.880180859080633e-06, |
| "loss": 0.5832, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.01922134700184676, |
| "grad_norm": 1.7490489143874088, |
| "learning_rate": 1.9178598342125096e-06, |
| "loss": 0.6141, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.019598236158745713, |
| "grad_norm": 2.7984276241901394, |
| "learning_rate": 1.955538809344386e-06, |
| "loss": 0.5967, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.01997512531564467, |
| "grad_norm": 1.8460062687463663, |
| "learning_rate": 1.9932177844762624e-06, |
| "loss": 0.5855, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.020352014472543624, |
| "grad_norm": 3.071868251764471, |
| "learning_rate": 2.030896759608139e-06, |
| "loss": 0.5923, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.02072890362944258, |
| "grad_norm": 2.249558954464503, |
| "learning_rate": 2.0685757347400153e-06, |
| "loss": 0.6267, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.021105792786341535, |
| "grad_norm": 1.7739493406466322, |
| "learning_rate": 2.1062547098718917e-06, |
| "loss": 0.5733, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.021482681943240493, |
| "grad_norm": 1.9571521040237465, |
| "learning_rate": 2.143933685003768e-06, |
| "loss": 0.5901, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.02185957110013945, |
| "grad_norm": 1.8862454156341757, |
| "learning_rate": 2.1816126601356445e-06, |
| "loss": 0.5668, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.022236460257038404, |
| "grad_norm": 1.9035008144788996, |
| "learning_rate": 2.219291635267521e-06, |
| "loss": 0.6003, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.02261334941393736, |
| "grad_norm": 2.1401923526726114, |
| "learning_rate": 2.2569706103993973e-06, |
| "loss": 0.6069, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.022990238570836315, |
| "grad_norm": 1.8983635538399102, |
| "learning_rate": 2.2946495855312737e-06, |
| "loss": 0.5551, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.023367127727735273, |
| "grad_norm": 1.8341056062808996, |
| "learning_rate": 2.33232856066315e-06, |
| "loss": 0.6195, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.02374401688463423, |
| "grad_norm": 1.9001660208603897, |
| "learning_rate": 2.3700075357950265e-06, |
| "loss": 0.5787, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.024120906041533184, |
| "grad_norm": 2.024583452910895, |
| "learning_rate": 2.407686510926903e-06, |
| "loss": 0.6006, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.02449779519843214, |
| "grad_norm": 1.9671891126956806, |
| "learning_rate": 2.4453654860587793e-06, |
| "loss": 0.6045, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.024874684355331095, |
| "grad_norm": 1.9647192549218615, |
| "learning_rate": 2.483044461190656e-06, |
| "loss": 0.5632, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.025251573512230053, |
| "grad_norm": 1.6577621142537855, |
| "learning_rate": 2.5207234363225325e-06, |
| "loss": 0.5713, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.02562846266912901, |
| "grad_norm": 1.8104039197666246, |
| "learning_rate": 2.5584024114544085e-06, |
| "loss": 0.6227, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.026005351826027964, |
| "grad_norm": 1.8269715554295154, |
| "learning_rate": 2.596081386586285e-06, |
| "loss": 0.5576, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.02638224098292692, |
| "grad_norm": 2.132660241375296, |
| "learning_rate": 2.6337603617181617e-06, |
| "loss": 0.5985, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.026759130139825876, |
| "grad_norm": 1.9004356716576036, |
| "learning_rate": 2.671439336850038e-06, |
| "loss": 0.5741, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.027136019296724833, |
| "grad_norm": 1.9904870404606059, |
| "learning_rate": 2.709118311981914e-06, |
| "loss": 0.608, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.02751290845362379, |
| "grad_norm": 1.7934156140344455, |
| "learning_rate": 2.7467972871137905e-06, |
| "loss": 0.6041, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.027889797610522744, |
| "grad_norm": 1.9452679410394749, |
| "learning_rate": 2.7844762622456674e-06, |
| "loss": 0.569, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.028266686767421702, |
| "grad_norm": 1.9646256949492564, |
| "learning_rate": 2.8221552373775433e-06, |
| "loss": 0.5566, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.028643575924320656, |
| "grad_norm": 1.6964066105708113, |
| "learning_rate": 2.8598342125094197e-06, |
| "loss": 0.5771, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.029020465081219613, |
| "grad_norm": 1.626500474046293, |
| "learning_rate": 2.8975131876412966e-06, |
| "loss": 0.6028, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.02939735423811857, |
| "grad_norm": 1.705518791196053, |
| "learning_rate": 2.935192162773173e-06, |
| "loss": 0.598, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.029774243395017524, |
| "grad_norm": 2.023608938357471, |
| "learning_rate": 2.972871137905049e-06, |
| "loss": 0.599, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.030151132551916482, |
| "grad_norm": 1.9251923030741178, |
| "learning_rate": 3.0105501130369258e-06, |
| "loss": 0.5705, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.030528021708815436, |
| "grad_norm": 1.8099259001405903, |
| "learning_rate": 3.048229088168802e-06, |
| "loss": 0.5824, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.030904910865714393, |
| "grad_norm": 1.9287634604630308, |
| "learning_rate": 3.085908063300678e-06, |
| "loss": 0.5761, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.03128180002261335, |
| "grad_norm": 1.783048816707533, |
| "learning_rate": 3.123587038432555e-06, |
| "loss": 0.579, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.031658689179512305, |
| "grad_norm": 1.8166051797381484, |
| "learning_rate": 3.1612660135644314e-06, |
| "loss": 0.5877, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.03203557833641126, |
| "grad_norm": 2.038736220774468, |
| "learning_rate": 3.1989449886963074e-06, |
| "loss": 0.572, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.03241246749331022, |
| "grad_norm": 1.738601505380381, |
| "learning_rate": 3.236623963828184e-06, |
| "loss": 0.5777, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.03278935665020918, |
| "grad_norm": 1.6676309446260524, |
| "learning_rate": 3.2743029389600606e-06, |
| "loss": 0.5637, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.03316624580710813, |
| "grad_norm": 2.161220914250185, |
| "learning_rate": 3.311981914091937e-06, |
| "loss": 0.5862, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.033543134964007085, |
| "grad_norm": 1.4967702154384854, |
| "learning_rate": 3.3496608892238134e-06, |
| "loss": 0.5529, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.03392002412090604, |
| "grad_norm": 2.2279038438220877, |
| "learning_rate": 3.38733986435569e-06, |
| "loss": 0.5979, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.034296913277805, |
| "grad_norm": 1.8345971384276711, |
| "learning_rate": 3.4250188394875662e-06, |
| "loss": 0.5669, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.03467380243470396, |
| "grad_norm": 1.8142008601019335, |
| "learning_rate": 3.462697814619443e-06, |
| "loss": 0.6041, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.03505069159160291, |
| "grad_norm": 1.6693434904318734, |
| "learning_rate": 3.500376789751319e-06, |
| "loss": 0.5272, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.035427580748501865, |
| "grad_norm": 1.945433750394992, |
| "learning_rate": 3.5380557648831954e-06, |
| "loss": 0.558, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.03580446990540082, |
| "grad_norm": 2.1129599329826614, |
| "learning_rate": 3.5757347400150723e-06, |
| "loss": 0.5801, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.03618135906229978, |
| "grad_norm": 1.905120951845058, |
| "learning_rate": 3.6134137151469482e-06, |
| "loss": 0.572, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.03655824821919874, |
| "grad_norm": 1.972585987519169, |
| "learning_rate": 3.6510926902788246e-06, |
| "loss": 0.5995, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.03693513737609769, |
| "grad_norm": 1.9036055814375914, |
| "learning_rate": 3.688771665410701e-06, |
| "loss": 0.5993, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.037312026532996645, |
| "grad_norm": 1.8646464012423685, |
| "learning_rate": 3.7264506405425774e-06, |
| "loss": 0.5733, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.0376889156898956, |
| "grad_norm": 2.3089435664838933, |
| "learning_rate": 3.764129615674454e-06, |
| "loss": 0.5835, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.03806580484679456, |
| "grad_norm": 2.17191968568418, |
| "learning_rate": 3.8018085908063303e-06, |
| "loss": 0.5412, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.03844269400369352, |
| "grad_norm": 1.8949936547455895, |
| "learning_rate": 3.839487565938207e-06, |
| "loss": 0.5966, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.03881958316059247, |
| "grad_norm": 2.473982294301116, |
| "learning_rate": 3.877166541070083e-06, |
| "loss": 0.5615, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.039196472317491425, |
| "grad_norm": 1.7089999900943973, |
| "learning_rate": 3.9148455162019595e-06, |
| "loss": 0.5752, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.03957336147439038, |
| "grad_norm": 1.85392931543439, |
| "learning_rate": 3.952524491333836e-06, |
| "loss": 0.578, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.03995025063128934, |
| "grad_norm": 1.4905002182549854, |
| "learning_rate": 3.990203466465712e-06, |
| "loss": 0.553, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.04032713978818829, |
| "grad_norm": 1.7544817835449993, |
| "learning_rate": 4.027882441597589e-06, |
| "loss": 0.5564, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.04070402894508725, |
| "grad_norm": 1.798461388686054, |
| "learning_rate": 4.065561416729465e-06, |
| "loss": 0.5635, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.041080918101986205, |
| "grad_norm": 1.7675958852701312, |
| "learning_rate": 4.1032403918613415e-06, |
| "loss": 0.5683, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.04145780725888516, |
| "grad_norm": 2.1395567530774895, |
| "learning_rate": 4.140919366993218e-06, |
| "loss": 0.5678, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.04183469641578412, |
| "grad_norm": 1.703017057556802, |
| "learning_rate": 4.178598342125095e-06, |
| "loss": 0.552, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.04221158557268307, |
| "grad_norm": 1.6184789818203074, |
| "learning_rate": 4.216277317256971e-06, |
| "loss": 0.5655, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.04258847472958203, |
| "grad_norm": 2.0996907497711574, |
| "learning_rate": 4.253956292388847e-06, |
| "loss": 0.5525, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.042965363886480985, |
| "grad_norm": 1.8890470204414929, |
| "learning_rate": 4.291635267520724e-06, |
| "loss": 0.5608, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.04334225304337994, |
| "grad_norm": 2.080468775436257, |
| "learning_rate": 4.3293142426526e-06, |
| "loss": 0.5511, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.0437191422002789, |
| "grad_norm": 1.8224041675858285, |
| "learning_rate": 4.366993217784476e-06, |
| "loss": 0.5727, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.04409603135717785, |
| "grad_norm": 1.6149909377743559, |
| "learning_rate": 4.4046721929163536e-06, |
| "loss": 0.5448, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.04447292051407681, |
| "grad_norm": 1.9061771805744654, |
| "learning_rate": 4.442351168048229e-06, |
| "loss": 0.5637, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.044849809670975765, |
| "grad_norm": 1.807877264857849, |
| "learning_rate": 4.4800301431801055e-06, |
| "loss": 0.5511, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.04522669882787472, |
| "grad_norm": 2.0181868598887487, |
| "learning_rate": 4.517709118311983e-06, |
| "loss": 0.5582, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.04560358798477368, |
| "grad_norm": 1.573360107816235, |
| "learning_rate": 4.555388093443859e-06, |
| "loss": 0.5644, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.04598047714167263, |
| "grad_norm": 1.771584321526413, |
| "learning_rate": 4.593067068575735e-06, |
| "loss": 0.5689, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.04635736629857159, |
| "grad_norm": 2.0063138861207364, |
| "learning_rate": 4.630746043707611e-06, |
| "loss": 0.5724, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.046734255455470546, |
| "grad_norm": 1.6920600006872095, |
| "learning_rate": 4.668425018839488e-06, |
| "loss": 0.5566, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.0471111446123695, |
| "grad_norm": 1.8971593808774774, |
| "learning_rate": 4.706103993971364e-06, |
| "loss": 0.5699, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.04748803376926846, |
| "grad_norm": 1.587611604567327, |
| "learning_rate": 4.74378296910324e-06, |
| "loss": 0.5649, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.04786492292616741, |
| "grad_norm": 2.0265270332073246, |
| "learning_rate": 4.781461944235118e-06, |
| "loss": 0.5374, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.04824181208306637, |
| "grad_norm": 2.0590285975116083, |
| "learning_rate": 4.819140919366993e-06, |
| "loss": 0.576, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.048618701239965326, |
| "grad_norm": 1.7133539806651839, |
| "learning_rate": 4.8568198944988696e-06, |
| "loss": 0.5706, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.04899559039686428, |
| "grad_norm": 1.8988624169513535, |
| "learning_rate": 4.894498869630747e-06, |
| "loss": 0.5552, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.04937247955376324, |
| "grad_norm": 2.0372135320392086, |
| "learning_rate": 4.932177844762623e-06, |
| "loss": 0.5459, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.04974936871066219, |
| "grad_norm": 1.7794743693654322, |
| "learning_rate": 4.969856819894499e-06, |
| "loss": 0.5548, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.05012625786756115, |
| "grad_norm": 1.7352843761532744, |
| "learning_rate": 5.007535795026376e-06, |
| "loss": 0.571, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.050503147024460106, |
| "grad_norm": 1.8996845237311992, |
| "learning_rate": 5.0452147701582524e-06, |
| "loss": 0.5493, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.05088003618135906, |
| "grad_norm": 2.1612445033194962, |
| "learning_rate": 5.082893745290128e-06, |
| "loss": 0.5707, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.05125692533825802, |
| "grad_norm": 2.022115535247945, |
| "learning_rate": 5.120572720422004e-06, |
| "loss": 0.5541, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.05163381449515697, |
| "grad_norm": 1.714380161573395, |
| "learning_rate": 5.158251695553881e-06, |
| "loss": 0.5522, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.05201070365205593, |
| "grad_norm": 1.8375152499536425, |
| "learning_rate": 5.195930670685758e-06, |
| "loss": 0.5607, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.052387592808954886, |
| "grad_norm": 1.7211038349954213, |
| "learning_rate": 5.2336096458176345e-06, |
| "loss": 0.5429, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.05276448196585384, |
| "grad_norm": 1.9959664354157889, |
| "learning_rate": 5.271288620949511e-06, |
| "loss": 0.5656, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.0531413711227528, |
| "grad_norm": 1.7536569082906184, |
| "learning_rate": 5.308967596081387e-06, |
| "loss": 0.5452, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.05351826027965175, |
| "grad_norm": 1.7231610063386784, |
| "learning_rate": 5.346646571213263e-06, |
| "loss": 0.5353, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.05389514943655071, |
| "grad_norm": 1.8970119256356177, |
| "learning_rate": 5.384325546345139e-06, |
| "loss": 0.5302, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.054272038593449666, |
| "grad_norm": 1.5800174842538417, |
| "learning_rate": 5.4220045214770165e-06, |
| "loss": 0.5624, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.05464892775034862, |
| "grad_norm": 2.0740091270784053, |
| "learning_rate": 5.459683496608893e-06, |
| "loss": 0.5377, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.05502581690724758, |
| "grad_norm": 1.8294455280243227, |
| "learning_rate": 5.497362471740769e-06, |
| "loss": 0.5566, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.05540270606414653, |
| "grad_norm": 1.5838428291746807, |
| "learning_rate": 5.535041446872646e-06, |
| "loss": 0.5446, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.05577959522104549, |
| "grad_norm": 2.1056158207672233, |
| "learning_rate": 5.572720422004522e-06, |
| "loss": 0.5726, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.056156484377944446, |
| "grad_norm": 1.8677258424310819, |
| "learning_rate": 5.610399397136398e-06, |
| "loss": 0.5678, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.056533373534843404, |
| "grad_norm": 2.014882289331746, |
| "learning_rate": 5.648078372268275e-06, |
| "loss": 0.5623, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.05691026269174236, |
| "grad_norm": 1.8459621787433622, |
| "learning_rate": 5.685757347400151e-06, |
| "loss": 0.5775, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.05728715184864131, |
| "grad_norm": 1.7030968368942396, |
| "learning_rate": 5.723436322532028e-06, |
| "loss": 0.5456, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.05766404100554027, |
| "grad_norm": 1.7530142515014413, |
| "learning_rate": 5.761115297663904e-06, |
| "loss": 0.5493, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.058040930162439226, |
| "grad_norm": 1.7511504402842564, |
| "learning_rate": 5.7987942727957805e-06, |
| "loss": 0.5633, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.058417819319338184, |
| "grad_norm": 2.1931311143367607, |
| "learning_rate": 5.836473247927656e-06, |
| "loss": 0.5581, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.05879470847623714, |
| "grad_norm": 3.636694230456593, |
| "learning_rate": 5.874152223059534e-06, |
| "loss": 0.546, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.05917159763313609, |
| "grad_norm": 2.3094095827220302, |
| "learning_rate": 5.91183119819141e-06, |
| "loss": 0.5875, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.05954848679003505, |
| "grad_norm": 2.0011971290567794, |
| "learning_rate": 5.949510173323286e-06, |
| "loss": 0.5575, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.059925375946934006, |
| "grad_norm": 1.6650652223048363, |
| "learning_rate": 5.9871891484551625e-06, |
| "loss": 0.5525, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.060302265103832964, |
| "grad_norm": 1.9533169899266176, |
| "learning_rate": 6.024868123587039e-06, |
| "loss": 0.574, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.06067915426073192, |
| "grad_norm": 1.685943647766806, |
| "learning_rate": 6.062547098718915e-06, |
| "loss": 0.5149, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.06105604341763087, |
| "grad_norm": 1.3728725575465968, |
| "learning_rate": 6.100226073850791e-06, |
| "loss": 0.5436, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.06143293257452983, |
| "grad_norm": 1.8741392657740152, |
| "learning_rate": 6.137905048982669e-06, |
| "loss": 0.5628, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.061809821731428786, |
| "grad_norm": 1.9761429074276111, |
| "learning_rate": 6.1755840241145446e-06, |
| "loss": 0.5713, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.062186710888327744, |
| "grad_norm": 2.03436821453595, |
| "learning_rate": 6.213262999246421e-06, |
| "loss": 0.6002, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.0625636000452267, |
| "grad_norm": 1.832975464096823, |
| "learning_rate": 6.250941974378297e-06, |
| "loss": 0.5728, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.06294048920212565, |
| "grad_norm": 1.633511698263684, |
| "learning_rate": 6.288620949510174e-06, |
| "loss": 0.5552, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.06331737835902461, |
| "grad_norm": 2.090535151172006, |
| "learning_rate": 6.32629992464205e-06, |
| "loss": 0.531, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.06369426751592357, |
| "grad_norm": 1.6050052308654612, |
| "learning_rate": 6.363978899773927e-06, |
| "loss": 0.5891, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.06407115667282252, |
| "grad_norm": 2.212232256420831, |
| "learning_rate": 6.401657874905803e-06, |
| "loss": 0.5752, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.06444804582972148, |
| "grad_norm": 1.877674806453838, |
| "learning_rate": 6.439336850037679e-06, |
| "loss": 0.5778, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.06482493498662044, |
| "grad_norm": 1.8261529051865222, |
| "learning_rate": 6.477015825169556e-06, |
| "loss": 0.5437, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.0652018241435194, |
| "grad_norm": 2.1674429445191032, |
| "learning_rate": 6.514694800301432e-06, |
| "loss": 0.5439, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.06557871330041835, |
| "grad_norm": 1.7036520231489474, |
| "learning_rate": 6.552373775433309e-06, |
| "loss": 0.5716, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.0659556024573173, |
| "grad_norm": 2.031911396789848, |
| "learning_rate": 6.590052750565186e-06, |
| "loss": 0.5365, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.06633249161421625, |
| "grad_norm": 1.784884433990894, |
| "learning_rate": 6.627731725697062e-06, |
| "loss": 0.5556, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.06670938077111521, |
| "grad_norm": 1.7250914216810251, |
| "learning_rate": 6.665410700828938e-06, |
| "loss": 0.577, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.06708626992801417, |
| "grad_norm": 2.0118741851139417, |
| "learning_rate": 6.703089675960814e-06, |
| "loss": 0.5792, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.06746315908491313, |
| "grad_norm": 2.083914596235162, |
| "learning_rate": 6.740768651092691e-06, |
| "loss": 0.5817, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.06784004824181208, |
| "grad_norm": 1.8512470922958455, |
| "learning_rate": 6.778447626224567e-06, |
| "loss": 0.5421, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.06821693739871104, |
| "grad_norm": 1.8612678054005098, |
| "learning_rate": 6.816126601356444e-06, |
| "loss": 0.5392, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.06859382655561, |
| "grad_norm": 1.928991115625655, |
| "learning_rate": 6.853805576488321e-06, |
| "loss": 0.5819, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.06897071571250896, |
| "grad_norm": 1.8008419029962375, |
| "learning_rate": 6.891484551620197e-06, |
| "loss": 0.5547, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.06934760486940791, |
| "grad_norm": 2.0597436169161973, |
| "learning_rate": 6.929163526752073e-06, |
| "loss": 0.5599, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.06972449402630686, |
| "grad_norm": 2.0570020874528567, |
| "learning_rate": 6.966842501883949e-06, |
| "loss": 0.5694, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.07010138318320581, |
| "grad_norm": 1.5952035385299899, |
| "learning_rate": 7.0045214770158254e-06, |
| "loss": 0.5406, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.07047827234010477, |
| "grad_norm": 1.7271708038895457, |
| "learning_rate": 7.042200452147702e-06, |
| "loss": 0.555, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.07085516149700373, |
| "grad_norm": 2.0101886368535813, |
| "learning_rate": 7.079879427279579e-06, |
| "loss": 0.5716, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.07123205065390269, |
| "grad_norm": 1.6065477858483548, |
| "learning_rate": 7.1175584024114555e-06, |
| "loss": 0.5476, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.07160893981080164, |
| "grad_norm": 2.02286611730447, |
| "learning_rate": 7.155237377543331e-06, |
| "loss": 0.5383, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.0719858289677006, |
| "grad_norm": 2.037314841983119, |
| "learning_rate": 7.1929163526752075e-06, |
| "loss": 0.5587, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.07236271812459956, |
| "grad_norm": 1.8264192102516816, |
| "learning_rate": 7.230595327807084e-06, |
| "loss": 0.5553, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.07273960728149852, |
| "grad_norm": 1.9520150148970015, |
| "learning_rate": 7.26827430293896e-06, |
| "loss": 0.567, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.07311649643839747, |
| "grad_norm": 1.6905896085482799, |
| "learning_rate": 7.3059532780708375e-06, |
| "loss": 0.5252, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.07349338559529642, |
| "grad_norm": 1.8762657820598212, |
| "learning_rate": 7.343632253202714e-06, |
| "loss": 0.5696, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.07387027475219538, |
| "grad_norm": 1.7670005320740287, |
| "learning_rate": 7.38131122833459e-06, |
| "loss": 0.5444, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.07424716390909433, |
| "grad_norm": 1.7538737345614326, |
| "learning_rate": 7.418990203466466e-06, |
| "loss": 0.5884, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.07462405306599329, |
| "grad_norm": 1.515985834449344, |
| "learning_rate": 7.456669178598342e-06, |
| "loss": 0.5446, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.07500094222289225, |
| "grad_norm": 1.5815994265482278, |
| "learning_rate": 7.494348153730219e-06, |
| "loss": 0.5591, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.0753778313797912, |
| "grad_norm": 2.0488961746529486, |
| "learning_rate": 7.532027128862096e-06, |
| "loss": 0.5706, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.07575472053669016, |
| "grad_norm": 1.969269141686112, |
| "learning_rate": 7.569706103993972e-06, |
| "loss": 0.558, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.07613160969358912, |
| "grad_norm": 2.266330513109259, |
| "learning_rate": 7.607385079125849e-06, |
| "loss": 0.5396, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.07650849885048808, |
| "grad_norm": 1.8646005092443476, |
| "learning_rate": 7.645064054257724e-06, |
| "loss": 0.5423, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.07688538800738703, |
| "grad_norm": 1.7399985247018814, |
| "learning_rate": 7.682743029389602e-06, |
| "loss": 0.549, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.07726227716428598, |
| "grad_norm": 1.8255156942684898, |
| "learning_rate": 7.720422004521477e-06, |
| "loss": 0.5549, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.07763916632118494, |
| "grad_norm": 1.946851326618598, |
| "learning_rate": 7.758100979653354e-06, |
| "loss": 0.5666, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.07801605547808389, |
| "grad_norm": 1.9532587449674659, |
| "learning_rate": 7.795779954785232e-06, |
| "loss": 0.5711, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.07839294463498285, |
| "grad_norm": 1.7952693056787907, |
| "learning_rate": 7.833458929917107e-06, |
| "loss": 0.5386, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.07876983379188181, |
| "grad_norm": 1.8221658229163284, |
| "learning_rate": 7.871137905048983e-06, |
| "loss": 0.5731, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.07914672294878076, |
| "grad_norm": 1.833428780602818, |
| "learning_rate": 7.90881688018086e-06, |
| "loss": 0.5546, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.07952361210567972, |
| "grad_norm": 1.7570404308629375, |
| "learning_rate": 7.946495855312736e-06, |
| "loss": 0.5323, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.07990050126257868, |
| "grad_norm": 1.802778650409238, |
| "learning_rate": 7.984174830444613e-06, |
| "loss": 0.5422, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.08027739041947764, |
| "grad_norm": 1.6222520770975064, |
| "learning_rate": 8.02185380557649e-06, |
| "loss": 0.5573, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.08065427957637658, |
| "grad_norm": 1.9266827677857417, |
| "learning_rate": 8.059532780708366e-06, |
| "loss": 0.5559, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.08103116873327554, |
| "grad_norm": 2.050814084562181, |
| "learning_rate": 8.097211755840241e-06, |
| "loss": 0.551, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.0814080578901745, |
| "grad_norm": 1.7585001593613225, |
| "learning_rate": 8.134890730972118e-06, |
| "loss": 0.5364, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.08178494704707345, |
| "grad_norm": 1.8883866215853788, |
| "learning_rate": 8.172569706103994e-06, |
| "loss": 0.5888, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.08216183620397241, |
| "grad_norm": 1.758532254936197, |
| "learning_rate": 8.210248681235871e-06, |
| "loss": 0.5712, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.08253872536087137, |
| "grad_norm": 1.7452201238698966, |
| "learning_rate": 8.247927656367748e-06, |
| "loss": 0.562, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.08291561451777033, |
| "grad_norm": 2.0545039008121373, |
| "learning_rate": 8.285606631499624e-06, |
| "loss": 0.5326, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.08329250367466928, |
| "grad_norm": 1.7697141454393999, |
| "learning_rate": 8.323285606631501e-06, |
| "loss": 0.5777, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.08366939283156824, |
| "grad_norm": 1.9252812449566288, |
| "learning_rate": 8.360964581763377e-06, |
| "loss": 0.5747, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.0840462819884672, |
| "grad_norm": 1.8673410308838145, |
| "learning_rate": 8.398643556895252e-06, |
| "loss": 0.5352, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.08442317114536614, |
| "grad_norm": 1.649056430020697, |
| "learning_rate": 8.43632253202713e-06, |
| "loss": 0.5661, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.0848000603022651, |
| "grad_norm": 1.8813402816460187, |
| "learning_rate": 8.474001507159007e-06, |
| "loss": 0.5323, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.08517694945916406, |
| "grad_norm": 1.8849842368697092, |
| "learning_rate": 8.511680482290882e-06, |
| "loss": 0.5502, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.08555383861606301, |
| "grad_norm": 1.7894509940448735, |
| "learning_rate": 8.54935945742276e-06, |
| "loss": 0.5819, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.08593072777296197, |
| "grad_norm": 1.7627934782136696, |
| "learning_rate": 8.587038432554635e-06, |
| "loss": 0.5574, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.08630761692986093, |
| "grad_norm": 1.7684239573461413, |
| "learning_rate": 8.62471740768651e-06, |
| "loss": 0.5421, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.08668450608675989, |
| "grad_norm": 1.8420421564268776, |
| "learning_rate": 8.662396382818388e-06, |
| "loss": 0.5786, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.08706139524365884, |
| "grad_norm": 1.600074045318061, |
| "learning_rate": 8.700075357950264e-06, |
| "loss": 0.535, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.0874382844005578, |
| "grad_norm": 2.151450395002285, |
| "learning_rate": 8.737754333082141e-06, |
| "loss": 0.5821, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.08781517355745676, |
| "grad_norm": 1.805383691002577, |
| "learning_rate": 8.775433308214018e-06, |
| "loss": 0.5787, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.0881920627143557, |
| "grad_norm": 1.9600049150179448, |
| "learning_rate": 8.813112283345894e-06, |
| "loss": 0.5501, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.08856895187125466, |
| "grad_norm": 2.0847026403429485, |
| "learning_rate": 8.85079125847777e-06, |
| "loss": 0.5462, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.08894584102815362, |
| "grad_norm": 1.7768359780016918, |
| "learning_rate": 8.888470233609646e-06, |
| "loss": 0.5837, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.08932273018505257, |
| "grad_norm": 2.0158186623423386, |
| "learning_rate": 8.926149208741522e-06, |
| "loss": 0.5616, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.08969961934195153, |
| "grad_norm": 1.6581403784334712, |
| "learning_rate": 8.9638281838734e-06, |
| "loss": 0.5743, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.09007650849885049, |
| "grad_norm": 2.5790022328647044, |
| "learning_rate": 9.001507159005277e-06, |
| "loss": 0.5744, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.09045339765574945, |
| "grad_norm": 1.723560039585966, |
| "learning_rate": 9.039186134137152e-06, |
| "loss": 0.5602, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.0908302868126484, |
| "grad_norm": 1.351214600774913, |
| "learning_rate": 9.07686510926903e-06, |
| "loss": 0.5331, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.09120717596954736, |
| "grad_norm": 1.8039553055086206, |
| "learning_rate": 9.114544084400905e-06, |
| "loss": 0.5677, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.09158406512644632, |
| "grad_norm": 1.9123242622126195, |
| "learning_rate": 9.15222305953278e-06, |
| "loss": 0.5588, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.09196095428334526, |
| "grad_norm": 2.006300165478271, |
| "learning_rate": 9.189902034664658e-06, |
| "loss": 0.5812, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.09233784344024422, |
| "grad_norm": 1.9159468022645592, |
| "learning_rate": 9.227581009796535e-06, |
| "loss": 0.5372, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.09271473259714318, |
| "grad_norm": 1.7917577869195462, |
| "learning_rate": 9.26525998492841e-06, |
| "loss": 0.5675, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.09309162175404213, |
| "grad_norm": 5.4204153510281, |
| "learning_rate": 9.302938960060288e-06, |
| "loss": 0.5674, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.09346851091094109, |
| "grad_norm": 1.6618922792887658, |
| "learning_rate": 9.340617935192163e-06, |
| "loss": 0.5745, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.09384540006784005, |
| "grad_norm": 1.665306637026819, |
| "learning_rate": 9.378296910324039e-06, |
| "loss": 0.5556, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.094222289224739, |
| "grad_norm": 1.825366672374041, |
| "learning_rate": 9.415975885455916e-06, |
| "loss": 0.5318, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.09459917838163796, |
| "grad_norm": 1.9696174903266126, |
| "learning_rate": 9.453654860587793e-06, |
| "loss": 0.5411, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.09497606753853692, |
| "grad_norm": 1.7049399302783044, |
| "learning_rate": 9.491333835719669e-06, |
| "loss": 0.5706, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.09535295669543588, |
| "grad_norm": 1.7849968576762765, |
| "learning_rate": 9.529012810851546e-06, |
| "loss": 0.5665, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.09572984585233482, |
| "grad_norm": 1.353686035468744, |
| "learning_rate": 9.566691785983422e-06, |
| "loss": 0.5459, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.09610673500923378, |
| "grad_norm": 1.7623768145461352, |
| "learning_rate": 9.604370761115297e-06, |
| "loss": 0.5866, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.09648362416613274, |
| "grad_norm": 1.9641841168234557, |
| "learning_rate": 9.642049736247175e-06, |
| "loss": 0.5466, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.0968605133230317, |
| "grad_norm": 1.3736072242645836, |
| "learning_rate": 9.679728711379052e-06, |
| "loss": 0.5391, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.09723740247993065, |
| "grad_norm": 1.654177532173035, |
| "learning_rate": 9.717407686510927e-06, |
| "loss": 0.5592, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.09761429163682961, |
| "grad_norm": 1.675077992009493, |
| "learning_rate": 9.755086661642805e-06, |
| "loss": 0.5484, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.09799118079372857, |
| "grad_norm": 1.7745168797445137, |
| "learning_rate": 9.79276563677468e-06, |
| "loss": 0.5656, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.09836806995062752, |
| "grad_norm": 1.665358523138359, |
| "learning_rate": 9.830444611906557e-06, |
| "loss": 0.5544, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.09874495910752648, |
| "grad_norm": 2.373990573362573, |
| "learning_rate": 9.868123587038433e-06, |
| "loss": 0.5448, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.09912184826442544, |
| "grad_norm": 1.6903642288991887, |
| "learning_rate": 9.90580256217031e-06, |
| "loss": 0.5562, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.09949873742132438, |
| "grad_norm": 1.711502598162347, |
| "learning_rate": 9.943481537302186e-06, |
| "loss": 0.5582, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.09987562657822334, |
| "grad_norm": 1.7630192145087855, |
| "learning_rate": 9.981160512434063e-06, |
| "loss": 0.5568, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.1002525157351223, |
| "grad_norm": 1.9391272093641074, |
| "learning_rate": 9.999998918198758e-06, |
| "loss": 0.5814, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.10062940489202125, |
| "grad_norm": 1.1176379107900862, |
| "learning_rate": 9.999990263791625e-06, |
| "loss": 0.5327, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.10100629404892021, |
| "grad_norm": 1.7144355514907554, |
| "learning_rate": 9.99997295499234e-06, |
| "loss": 0.5424, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.10138318320581917, |
| "grad_norm": 1.3867888190170987, |
| "learning_rate": 9.99994699183086e-06, |
| "loss": 0.5498, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.10176007236271813, |
| "grad_norm": 1.6251131139875976, |
| "learning_rate": 9.999912374352125e-06, |
| "loss": 0.5526, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.10213696151961708, |
| "grad_norm": 1.758258285021875, |
| "learning_rate": 9.999869102616057e-06, |
| "loss": 0.5647, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.10251385067651604, |
| "grad_norm": 1.8922253129449746, |
| "learning_rate": 9.99981717669755e-06, |
| "loss": 0.569, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.102890739833415, |
| "grad_norm": 1.6492463045156762, |
| "learning_rate": 9.999756596686483e-06, |
| "loss": 0.5639, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.10326762899031394, |
| "grad_norm": 1.6884719814636986, |
| "learning_rate": 9.999687362687714e-06, |
| "loss": 0.5302, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.1036445181472129, |
| "grad_norm": 1.7663588108147437, |
| "learning_rate": 9.999609474821078e-06, |
| "loss": 0.5776, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.10402140730411186, |
| "grad_norm": 1.6023970608406795, |
| "learning_rate": 9.999522933221389e-06, |
| "loss": 0.5609, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.10439829646101081, |
| "grad_norm": 1.582018849874625, |
| "learning_rate": 9.999427738038438e-06, |
| "loss": 0.5432, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.10477518561790977, |
| "grad_norm": 1.8661113330406751, |
| "learning_rate": 9.999323889437004e-06, |
| "loss": 0.5505, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.10515207477480873, |
| "grad_norm": 2.4377666457132405, |
| "learning_rate": 9.99921138759683e-06, |
| "loss": 0.552, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.10552896393170769, |
| "grad_norm": 1.9769702999570735, |
| "learning_rate": 9.999090232712648e-06, |
| "loss": 0.5706, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.10590585308860664, |
| "grad_norm": 1.5852863285358036, |
| "learning_rate": 9.998960424994157e-06, |
| "loss": 0.5468, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.1062827422455056, |
| "grad_norm": 1.9487316640058903, |
| "learning_rate": 9.998821964666043e-06, |
| "loss": 0.5566, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.10665963140240456, |
| "grad_norm": 1.470091180676127, |
| "learning_rate": 9.998674851967965e-06, |
| "loss": 0.5375, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.1070365205593035, |
| "grad_norm": 1.8861024609523855, |
| "learning_rate": 9.998519087154555e-06, |
| "loss": 0.5714, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.10741340971620246, |
| "grad_norm": 1.8625015944443275, |
| "learning_rate": 9.998354670495426e-06, |
| "loss": 0.5718, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.10779029887310142, |
| "grad_norm": 1.9173353321183049, |
| "learning_rate": 9.99818160227516e-06, |
| "loss": 0.5591, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.10816718803000037, |
| "grad_norm": 1.6563786175924313, |
| "learning_rate": 9.997999882793323e-06, |
| "loss": 0.5644, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.10854407718689933, |
| "grad_norm": 1.8385719564959482, |
| "learning_rate": 9.997809512364447e-06, |
| "loss": 0.5574, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.10892096634379829, |
| "grad_norm": 1.7214173447884336, |
| "learning_rate": 9.99761049131804e-06, |
| "loss": 0.5575, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.10929785550069725, |
| "grad_norm": 2.263948770043836, |
| "learning_rate": 9.997402819998585e-06, |
| "loss": 0.5717, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.1096747446575962, |
| "grad_norm": 1.617474690577473, |
| "learning_rate": 9.997186498765537e-06, |
| "loss": 0.5381, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.11005163381449516, |
| "grad_norm": 1.9206922286733692, |
| "learning_rate": 9.996961527993322e-06, |
| "loss": 0.5403, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.11042852297139412, |
| "grad_norm": 1.6363891592183775, |
| "learning_rate": 9.996727908071337e-06, |
| "loss": 0.5504, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.11080541212829306, |
| "grad_norm": 1.7892900410710342, |
| "learning_rate": 9.99648563940395e-06, |
| "loss": 0.5398, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.11118230128519202, |
| "grad_norm": 1.837754723538967, |
| "learning_rate": 9.996234722410501e-06, |
| "loss": 0.5501, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.11155919044209098, |
| "grad_norm": 1.870686763606362, |
| "learning_rate": 9.995975157525298e-06, |
| "loss": 0.5697, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.11193607959898993, |
| "grad_norm": 1.8670194608446757, |
| "learning_rate": 9.995706945197616e-06, |
| "loss": 0.5819, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.11231296875588889, |
| "grad_norm": 1.764875781288927, |
| "learning_rate": 9.995430085891698e-06, |
| "loss": 0.5558, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.11268985791278785, |
| "grad_norm": 1.6461496237741704, |
| "learning_rate": 9.995144580086757e-06, |
| "loss": 0.5528, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.11306674706968681, |
| "grad_norm": 1.6525100198106808, |
| "learning_rate": 9.99485042827697e-06, |
| "loss": 0.5412, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.11344363622658576, |
| "grad_norm": 1.7111537561130798, |
| "learning_rate": 9.994547630971476e-06, |
| "loss": 0.556, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.11382052538348472, |
| "grad_norm": 1.6948046733289588, |
| "learning_rate": 9.994236188694384e-06, |
| "loss": 0.5634, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.11419741454038368, |
| "grad_norm": 1.7787138971691205, |
| "learning_rate": 9.99391610198476e-06, |
| "loss": 0.5099, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.11457430369728262, |
| "grad_norm": 1.6976810544764316, |
| "learning_rate": 9.993587371396642e-06, |
| "loss": 0.5526, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.11495119285418158, |
| "grad_norm": 1.8408592594962616, |
| "learning_rate": 9.99324999749902e-06, |
| "loss": 0.5328, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.11532808201108054, |
| "grad_norm": 1.7138365866573475, |
| "learning_rate": 9.992903980875849e-06, |
| "loss": 0.5233, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.1157049711679795, |
| "grad_norm": 1.6023041921101526, |
| "learning_rate": 9.992549322126044e-06, |
| "loss": 0.5562, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.11608186032487845, |
| "grad_norm": 1.7101785936588327, |
| "learning_rate": 9.992186021863475e-06, |
| "loss": 0.5625, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.11645874948177741, |
| "grad_norm": 1.7736884499183863, |
| "learning_rate": 9.991814080716974e-06, |
| "loss": 0.5624, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.11683563863867637, |
| "grad_norm": 2.1118966740309313, |
| "learning_rate": 9.991433499330326e-06, |
| "loss": 0.569, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.11721252779557532, |
| "grad_norm": 1.882358555790821, |
| "learning_rate": 9.991044278362274e-06, |
| "loss": 0.5556, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.11758941695247428, |
| "grad_norm": 1.4807120288858666, |
| "learning_rate": 9.990646418486512e-06, |
| "loss": 0.5332, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.11796630610937324, |
| "grad_norm": 1.9456842492772166, |
| "learning_rate": 9.990239920391687e-06, |
| "loss": 0.5456, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.11834319526627218, |
| "grad_norm": 1.9332110812656393, |
| "learning_rate": 9.9898247847814e-06, |
| "loss": 0.5914, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.11872008442317114, |
| "grad_norm": 1.819320948849462, |
| "learning_rate": 9.989401012374207e-06, |
| "loss": 0.5212, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.1190969735800701, |
| "grad_norm": 1.5937443837733034, |
| "learning_rate": 9.9889686039036e-06, |
| "loss": 0.5719, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.11947386273696906, |
| "grad_norm": 1.4724382207969984, |
| "learning_rate": 9.988527560118033e-06, |
| "loss": 0.5514, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.11985075189386801, |
| "grad_norm": 1.7128687990343496, |
| "learning_rate": 9.988077881780896e-06, |
| "loss": 0.5342, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.12022764105076697, |
| "grad_norm": 1.7843398826770018, |
| "learning_rate": 9.987619569670532e-06, |
| "loss": 0.5425, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.12060453020766593, |
| "grad_norm": 1.6747602178489263, |
| "learning_rate": 9.987152624580223e-06, |
| "loss": 0.5498, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.12098141936456488, |
| "grad_norm": 1.9740415780360288, |
| "learning_rate": 9.986677047318199e-06, |
| "loss": 0.5787, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.12135830852146384, |
| "grad_norm": 1.5401247418757107, |
| "learning_rate": 9.986192838707624e-06, |
| "loss": 0.529, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.1217351976783628, |
| "grad_norm": 1.6649769987957272, |
| "learning_rate": 9.98569999958661e-06, |
| "loss": 0.5532, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.12211208683526174, |
| "grad_norm": 2.0615017580666546, |
| "learning_rate": 9.9851985308082e-06, |
| "loss": 0.5306, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.1224889759921607, |
| "grad_norm": 1.7489740809134746, |
| "learning_rate": 9.98468843324038e-06, |
| "loss": 0.5728, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.12286586514905966, |
| "grad_norm": 1.9584508485487868, |
| "learning_rate": 9.984169707766065e-06, |
| "loss": 0.5497, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.12324275430595862, |
| "grad_norm": 1.8986564469950582, |
| "learning_rate": 9.98364235528311e-06, |
| "loss": 0.531, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.12361964346285757, |
| "grad_norm": 1.796715014709634, |
| "learning_rate": 9.983106376704299e-06, |
| "loss": 0.5604, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.12399653261975653, |
| "grad_norm": 1.4479882986534132, |
| "learning_rate": 9.98256177295735e-06, |
| "loss": 0.5441, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.12437342177665549, |
| "grad_norm": 1.9018212839289914, |
| "learning_rate": 9.982008544984902e-06, |
| "loss": 0.586, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.12475031093355445, |
| "grad_norm": 1.6731029649854103, |
| "learning_rate": 9.981446693744532e-06, |
| "loss": 0.5422, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.1251272000904534, |
| "grad_norm": 1.8224277036655119, |
| "learning_rate": 9.980876220208738e-06, |
| "loss": 0.574, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.12550408924735235, |
| "grad_norm": 1.7502821063481213, |
| "learning_rate": 9.980297125364939e-06, |
| "loss": 0.5182, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.1258809784042513, |
| "grad_norm": 1.6833110345844349, |
| "learning_rate": 9.979709410215483e-06, |
| "loss": 0.5732, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.12625786756115026, |
| "grad_norm": 2.1392787884736824, |
| "learning_rate": 9.979113075777636e-06, |
| "loss": 0.5421, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.12663475671804922, |
| "grad_norm": 1.7605201424192343, |
| "learning_rate": 9.978508123083579e-06, |
| "loss": 0.5693, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.12701164587494818, |
| "grad_norm": 1.814620805028767, |
| "learning_rate": 9.977894553180414e-06, |
| "loss": 0.5044, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.12738853503184713, |
| "grad_norm": 1.7333387641365299, |
| "learning_rate": 9.977272367130161e-06, |
| "loss": 0.5642, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.1277654241887461, |
| "grad_norm": 6.5511623986160705, |
| "learning_rate": 9.97664156600975e-06, |
| "loss": 0.539, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.12814231334564505, |
| "grad_norm": 1.8697849048435933, |
| "learning_rate": 9.97600215091102e-06, |
| "loss": 0.5723, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.128519202502544, |
| "grad_norm": 1.840910802370967, |
| "learning_rate": 9.975354122940725e-06, |
| "loss": 0.563, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.12889609165944296, |
| "grad_norm": 1.6208553393789018, |
| "learning_rate": 9.974697483220526e-06, |
| "loss": 0.569, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.12927298081634192, |
| "grad_norm": 1.7217470801750827, |
| "learning_rate": 9.974032232886988e-06, |
| "loss": 0.561, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.12964986997324088, |
| "grad_norm": 1.8150748187971744, |
| "learning_rate": 9.973358373091578e-06, |
| "loss": 0.5663, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.13002675913013984, |
| "grad_norm": 1.8613735972835608, |
| "learning_rate": 9.972675905000672e-06, |
| "loss": 0.5412, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.1304036482870388, |
| "grad_norm": 1.9863460603182592, |
| "learning_rate": 9.971984829795539e-06, |
| "loss": 0.5483, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.13078053744393775, |
| "grad_norm": 1.4584507302056484, |
| "learning_rate": 9.971285148672347e-06, |
| "loss": 0.5359, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.1311574266008367, |
| "grad_norm": 1.6661289692524046, |
| "learning_rate": 9.970576862842165e-06, |
| "loss": 0.5518, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.13153431575773564, |
| "grad_norm": 1.8117322128837796, |
| "learning_rate": 9.96985997353095e-06, |
| "loss": 0.5294, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.1319112049146346, |
| "grad_norm": 1.7990258695528125, |
| "learning_rate": 9.969134481979554e-06, |
| "loss": 0.5487, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.13228809407153355, |
| "grad_norm": 1.6917648391788271, |
| "learning_rate": 9.968400389443715e-06, |
| "loss": 0.5488, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.1326649832284325, |
| "grad_norm": 1.7815906576006262, |
| "learning_rate": 9.967657697194062e-06, |
| "loss": 0.5435, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.13304187238533147, |
| "grad_norm": 1.8019408287680707, |
| "learning_rate": 9.966906406516106e-06, |
| "loss": 0.5317, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.13341876154223042, |
| "grad_norm": 1.5650851056879715, |
| "learning_rate": 9.966146518710244e-06, |
| "loss": 0.5443, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.13379565069912938, |
| "grad_norm": 1.6846258897654576, |
| "learning_rate": 9.965378035091753e-06, |
| "loss": 0.5384, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.13417253985602834, |
| "grad_norm": 1.6977199699358678, |
| "learning_rate": 9.964600956990785e-06, |
| "loss": 0.5454, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.1345494290129273, |
| "grad_norm": 5.101071946029098, |
| "learning_rate": 9.963815285752369e-06, |
| "loss": 0.5174, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.13492631816982625, |
| "grad_norm": 1.5806913588105131, |
| "learning_rate": 9.963021022736413e-06, |
| "loss": 0.5455, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.1353032073267252, |
| "grad_norm": 1.540462795524182, |
| "learning_rate": 9.962218169317688e-06, |
| "loss": 0.5385, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.13568009648362417, |
| "grad_norm": 1.8117995998543306, |
| "learning_rate": 9.961406726885844e-06, |
| "loss": 0.547, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.13605698564052313, |
| "grad_norm": 1.702664311368578, |
| "learning_rate": 9.960586696845387e-06, |
| "loss": 0.5425, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.13643387479742208, |
| "grad_norm": 1.6558884575569928, |
| "learning_rate": 9.959758080615694e-06, |
| "loss": 0.5636, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.13681076395432104, |
| "grad_norm": 1.5110746291185808, |
| "learning_rate": 9.958920879631002e-06, |
| "loss": 0.5568, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.13718765311122, |
| "grad_norm": 2.139574069411458, |
| "learning_rate": 9.958075095340408e-06, |
| "loss": 0.505, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.13756454226811896, |
| "grad_norm": 1.9376102058687341, |
| "learning_rate": 9.957220729207862e-06, |
| "loss": 0.5498, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.1379414314250179, |
| "grad_norm": 2.2672765573662264, |
| "learning_rate": 9.95635778271217e-06, |
| "loss": 0.5435, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.13831832058191687, |
| "grad_norm": 1.5668715176963142, |
| "learning_rate": 9.955486257346995e-06, |
| "loss": 0.5589, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.13869520973881583, |
| "grad_norm": 1.7293817862316867, |
| "learning_rate": 9.954606154620841e-06, |
| "loss": 0.5536, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.13907209889571476, |
| "grad_norm": 1.5203042242279239, |
| "learning_rate": 9.953717476057062e-06, |
| "loss": 0.5157, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.13944898805261371, |
| "grad_norm": 1.6809131976448295, |
| "learning_rate": 9.952820223193856e-06, |
| "loss": 0.5581, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.13982587720951267, |
| "grad_norm": 1.6390460530372517, |
| "learning_rate": 9.951914397584262e-06, |
| "loss": 0.518, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.14020276636641163, |
| "grad_norm": 1.756920987537259, |
| "learning_rate": 9.951000000796159e-06, |
| "loss": 0.5561, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.1405796555233106, |
| "grad_norm": 1.6270655698945786, |
| "learning_rate": 9.950077034412254e-06, |
| "loss": 0.5417, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.14095654468020954, |
| "grad_norm": 1.7443369340079147, |
| "learning_rate": 9.949145500030099e-06, |
| "loss": 0.5546, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.1413334338371085, |
| "grad_norm": 1.9623236145537395, |
| "learning_rate": 9.948205399262066e-06, |
| "loss": 0.5614, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.14171032299400746, |
| "grad_norm": 1.8286963445588205, |
| "learning_rate": 9.94725673373536e-06, |
| "loss": 0.5194, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.14208721215090642, |
| "grad_norm": 1.6490782226850094, |
| "learning_rate": 9.946299505092008e-06, |
| "loss": 0.5133, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.14246410130780537, |
| "grad_norm": 1.459857790438441, |
| "learning_rate": 9.945333714988859e-06, |
| "loss": 0.5274, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.14284099046470433, |
| "grad_norm": 2.766851100980767, |
| "learning_rate": 9.944359365097584e-06, |
| "loss": 0.5637, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.1432178796216033, |
| "grad_norm": 1.773166799745728, |
| "learning_rate": 9.943376457104665e-06, |
| "loss": 0.5431, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.14359476877850225, |
| "grad_norm": 1.8707486173547303, |
| "learning_rate": 9.942384992711402e-06, |
| "loss": 0.5413, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.1439716579354012, |
| "grad_norm": 1.789830817687169, |
| "learning_rate": 9.9413849736339e-06, |
| "loss": 0.5488, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.14434854709230016, |
| "grad_norm": 1.7488430625940725, |
| "learning_rate": 9.940376401603077e-06, |
| "loss": 0.5345, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.14472543624919912, |
| "grad_norm": 1.5053305184156898, |
| "learning_rate": 9.939359278364648e-06, |
| "loss": 0.5323, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.14510232540609808, |
| "grad_norm": 1.7325706869085533, |
| "learning_rate": 9.938333605679137e-06, |
| "loss": 0.549, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.14547921456299703, |
| "grad_norm": 2.298650093349881, |
| "learning_rate": 9.937299385321858e-06, |
| "loss": 0.5629, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.145856103719896, |
| "grad_norm": 2.010120739235166, |
| "learning_rate": 9.936256619082928e-06, |
| "loss": 0.5546, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.14623299287679495, |
| "grad_norm": 1.6730215675721205, |
| "learning_rate": 9.935205308767251e-06, |
| "loss": 0.5592, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.14660988203369388, |
| "grad_norm": 1.623819818612049, |
| "learning_rate": 9.934145456194522e-06, |
| "loss": 0.5522, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.14698677119059284, |
| "grad_norm": 1.8737714445343006, |
| "learning_rate": 9.933077063199217e-06, |
| "loss": 0.5395, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.1473636603474918, |
| "grad_norm": 1.8896976945133552, |
| "learning_rate": 9.9320001316306e-06, |
| "loss": 0.5155, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.14774054950439075, |
| "grad_norm": 1.7999273816656425, |
| "learning_rate": 9.930914663352713e-06, |
| "loss": 0.552, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.1481174386612897, |
| "grad_norm": 1.5451191295487272, |
| "learning_rate": 9.929820660244372e-06, |
| "loss": 0.5308, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.14849432781818867, |
| "grad_norm": 1.6325314113099194, |
| "learning_rate": 9.928718124199168e-06, |
| "loss": 0.527, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.14887121697508762, |
| "grad_norm": 2.044279798697509, |
| "learning_rate": 9.927607057125461e-06, |
| "loss": 0.5257, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.14924810613198658, |
| "grad_norm": 1.6860219589179097, |
| "learning_rate": 9.926487460946375e-06, |
| "loss": 0.5179, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.14962499528888554, |
| "grad_norm": 1.8333348047186415, |
| "learning_rate": 9.9253593375998e-06, |
| "loss": 0.5467, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.1500018844457845, |
| "grad_norm": 1.8252914858844747, |
| "learning_rate": 9.924222689038384e-06, |
| "loss": 0.5698, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.15037877360268345, |
| "grad_norm": 1.636078330532333, |
| "learning_rate": 9.923077517229531e-06, |
| "loss": 0.5817, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.1507556627595824, |
| "grad_norm": 1.6310153659702036, |
| "learning_rate": 9.921923824155399e-06, |
| "loss": 0.5632, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.15113255191648137, |
| "grad_norm": 1.703545293572265, |
| "learning_rate": 9.920761611812892e-06, |
| "loss": 0.5362, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.15150944107338032, |
| "grad_norm": 1.5247266902845538, |
| "learning_rate": 9.919590882213666e-06, |
| "loss": 0.5847, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.15188633023027928, |
| "grad_norm": 1.8205620209169613, |
| "learning_rate": 9.91841163738411e-06, |
| "loss": 0.5758, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.15226321938717824, |
| "grad_norm": 1.6632230310524354, |
| "learning_rate": 9.917223879365366e-06, |
| "loss": 0.5373, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.1526401085440772, |
| "grad_norm": 1.6718013294159164, |
| "learning_rate": 9.916027610213293e-06, |
| "loss": 0.5557, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.15301699770097615, |
| "grad_norm": 1.5988376223664982, |
| "learning_rate": 9.914822831998498e-06, |
| "loss": 0.5453, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.1533938868578751, |
| "grad_norm": 1.659616299752992, |
| "learning_rate": 9.913609546806306e-06, |
| "loss": 0.5504, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.15377077601477407, |
| "grad_norm": 1.4037121435179074, |
| "learning_rate": 9.912387756736773e-06, |
| "loss": 0.5198, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.154147665171673, |
| "grad_norm": 1.5183583233165177, |
| "learning_rate": 9.911157463904673e-06, |
| "loss": 0.5566, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.15452455432857196, |
| "grad_norm": 1.680791972841532, |
| "learning_rate": 9.909918670439494e-06, |
| "loss": 0.5403, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.1549014434854709, |
| "grad_norm": 1.7482553689079507, |
| "learning_rate": 9.908671378485445e-06, |
| "loss": 0.5445, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.15527833264236987, |
| "grad_norm": 1.3584719888404202, |
| "learning_rate": 9.907415590201442e-06, |
| "loss": 0.5459, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.15565522179926883, |
| "grad_norm": 1.6501271620909048, |
| "learning_rate": 9.906151307761101e-06, |
| "loss": 0.5549, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.15603211095616779, |
| "grad_norm": 2.656483215686597, |
| "learning_rate": 9.90487853335275e-06, |
| "loss": 0.5504, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.15640900011306674, |
| "grad_norm": 1.9444522460365512, |
| "learning_rate": 9.903597269179406e-06, |
| "loss": 0.5713, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.1567858892699657, |
| "grad_norm": 1.8488494105763427, |
| "learning_rate": 9.902307517458791e-06, |
| "loss": 0.5422, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.15716277842686466, |
| "grad_norm": 1.8661983415681136, |
| "learning_rate": 9.90100928042331e-06, |
| "loss": 0.5326, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.15753966758376362, |
| "grad_norm": 1.5515586490627968, |
| "learning_rate": 9.89970256032006e-06, |
| "loss": 0.5282, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.15791655674066257, |
| "grad_norm": 1.7714176130932602, |
| "learning_rate": 9.898387359410817e-06, |
| "loss": 0.576, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.15829344589756153, |
| "grad_norm": 1.76322105124278, |
| "learning_rate": 9.89706367997204e-06, |
| "loss": 0.578, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.1586703350544605, |
| "grad_norm": 1.8101667561564285, |
| "learning_rate": 9.89573152429486e-06, |
| "loss": 0.5534, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.15904722421135944, |
| "grad_norm": 1.4490662091941684, |
| "learning_rate": 9.894390894685082e-06, |
| "loss": 0.5771, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.1594241133682584, |
| "grad_norm": 1.540551931413724, |
| "learning_rate": 9.893041793463176e-06, |
| "loss": 0.5581, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.15980100252515736, |
| "grad_norm": 1.7528415875090508, |
| "learning_rate": 9.89168422296428e-06, |
| "loss": 0.5631, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.16017789168205632, |
| "grad_norm": 1.8668229751083296, |
| "learning_rate": 9.890318185538183e-06, |
| "loss": 0.5586, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.16055478083895527, |
| "grad_norm": 1.490437975787403, |
| "learning_rate": 9.88894368354934e-06, |
| "loss": 0.5478, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.16093166999585423, |
| "grad_norm": 1.8869524648789562, |
| "learning_rate": 9.887560719376848e-06, |
| "loss": 0.5419, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.16130855915275316, |
| "grad_norm": 1.7852914761656666, |
| "learning_rate": 9.886169295414454e-06, |
| "loss": 0.5486, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.16168544830965212, |
| "grad_norm": 1.770938471455256, |
| "learning_rate": 9.884769414070551e-06, |
| "loss": 0.5546, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.16206233746655108, |
| "grad_norm": 2.005194363321807, |
| "learning_rate": 9.883361077768166e-06, |
| "loss": 0.5842, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.16243922662345003, |
| "grad_norm": 1.6800086790787019, |
| "learning_rate": 9.881944288944964e-06, |
| "loss": 0.5463, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.162816115780349, |
| "grad_norm": 2.1202763345448115, |
| "learning_rate": 9.880519050053239e-06, |
| "loss": 0.5609, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.16319300493724795, |
| "grad_norm": 1.7584386869370296, |
| "learning_rate": 9.879085363559911e-06, |
| "loss": 0.5503, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.1635698940941469, |
| "grad_norm": 1.6620875457762982, |
| "learning_rate": 9.87764323194652e-06, |
| "loss": 0.5751, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.16394678325104586, |
| "grad_norm": 1.9926548707738148, |
| "learning_rate": 9.876192657709227e-06, |
| "loss": 0.5921, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.16432367240794482, |
| "grad_norm": 1.820283969890227, |
| "learning_rate": 9.874733643358806e-06, |
| "loss": 0.5522, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.16470056156484378, |
| "grad_norm": 1.3361373532537333, |
| "learning_rate": 9.873266191420635e-06, |
| "loss": 0.5341, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.16507745072174274, |
| "grad_norm": 1.5975055201627169, |
| "learning_rate": 9.8717903044347e-06, |
| "loss": 0.5282, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.1654543398786417, |
| "grad_norm": 1.607578610847404, |
| "learning_rate": 9.870305984955591e-06, |
| "loss": 0.545, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.16583122903554065, |
| "grad_norm": 1.458228796237765, |
| "learning_rate": 9.868813235552485e-06, |
| "loss": 0.4811, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.1662081181924396, |
| "grad_norm": 1.6414986429735758, |
| "learning_rate": 9.86731205880916e-06, |
| "loss": 0.5616, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.16658500734933857, |
| "grad_norm": 1.8317176023383224, |
| "learning_rate": 9.86580245732397e-06, |
| "loss": 0.559, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.16696189650623752, |
| "grad_norm": 1.7709755238978837, |
| "learning_rate": 9.864284433709859e-06, |
| "loss": 0.549, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.16733878566313648, |
| "grad_norm": 1.982880798943511, |
| "learning_rate": 9.862757990594348e-06, |
| "loss": 0.5296, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.16771567482003544, |
| "grad_norm": 1.7614598486822042, |
| "learning_rate": 9.861223130619525e-06, |
| "loss": 0.575, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.1680925639769344, |
| "grad_norm": 1.3611867438499565, |
| "learning_rate": 9.859679856442058e-06, |
| "loss": 0.5224, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.16846945313383335, |
| "grad_norm": 4.687044934475357, |
| "learning_rate": 9.858128170733166e-06, |
| "loss": 0.5552, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.16884634229073228, |
| "grad_norm": 1.59571418195565, |
| "learning_rate": 9.856568076178637e-06, |
| "loss": 0.5229, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.16922323144763124, |
| "grad_norm": 2.3788819118048736, |
| "learning_rate": 9.85499957547881e-06, |
| "loss": 0.565, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.1696001206045302, |
| "grad_norm": 1.8329399955214953, |
| "learning_rate": 9.853422671348573e-06, |
| "loss": 0.5343, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.16997700976142915, |
| "grad_norm": 1.7468489996738379, |
| "learning_rate": 9.85183736651736e-06, |
| "loss": 0.5201, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.1703538989183281, |
| "grad_norm": 1.6120690658450187, |
| "learning_rate": 9.850243663729151e-06, |
| "loss": 0.5325, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.17073078807522707, |
| "grad_norm": 1.676405391419548, |
| "learning_rate": 9.848641565742451e-06, |
| "loss": 0.5358, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.17110767723212603, |
| "grad_norm": 1.5692375310803846, |
| "learning_rate": 9.847031075330305e-06, |
| "loss": 0.5469, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.17148456638902498, |
| "grad_norm": 1.7013387896565297, |
| "learning_rate": 9.845412195280283e-06, |
| "loss": 0.5415, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.17186145554592394, |
| "grad_norm": 1.6239671776400033, |
| "learning_rate": 9.843784928394473e-06, |
| "loss": 0.5337, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.1722383447028229, |
| "grad_norm": 1.7752808165423049, |
| "learning_rate": 9.842149277489482e-06, |
| "loss": 0.5581, |
| "step": 4570 |
| }, |
| { |
| "epoch": 0.17261523385972186, |
| "grad_norm": 1.6381139718605806, |
| "learning_rate": 9.840505245396428e-06, |
| "loss": 0.5512, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.1729921230166208, |
| "grad_norm": 1.3980621024689306, |
| "learning_rate": 9.838852834960937e-06, |
| "loss": 0.5475, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.17336901217351977, |
| "grad_norm": 1.8099458012944554, |
| "learning_rate": 9.837192049043138e-06, |
| "loss": 0.5288, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.17374590133041873, |
| "grad_norm": 1.7008772187496575, |
| "learning_rate": 9.83552289051765e-06, |
| "loss": 0.5559, |
| "step": 4610 |
| }, |
| { |
| "epoch": 0.17412279048731769, |
| "grad_norm": 7.078798933085531, |
| "learning_rate": 9.833845362273595e-06, |
| "loss": 0.538, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.17449967964421664, |
| "grad_norm": 1.8058836761237198, |
| "learning_rate": 9.832159467214571e-06, |
| "loss": 0.5366, |
| "step": 4630 |
| }, |
| { |
| "epoch": 0.1748765688011156, |
| "grad_norm": 1.6224697567837014, |
| "learning_rate": 9.830465208258667e-06, |
| "loss": 0.5315, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.17525345795801456, |
| "grad_norm": 1.9433130297018304, |
| "learning_rate": 9.828762588338442e-06, |
| "loss": 0.5412, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.17563034711491352, |
| "grad_norm": 1.6991522803860752, |
| "learning_rate": 9.827051610400933e-06, |
| "loss": 0.5403, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.17600723627181247, |
| "grad_norm": 1.705218216007172, |
| "learning_rate": 9.825332277407637e-06, |
| "loss": 0.5556, |
| "step": 4670 |
| }, |
| { |
| "epoch": 0.1763841254287114, |
| "grad_norm": 1.688592333353397, |
| "learning_rate": 9.823604592334519e-06, |
| "loss": 0.551, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.17676101458561036, |
| "grad_norm": 1.41612023446685, |
| "learning_rate": 9.821868558171996e-06, |
| "loss": 0.5173, |
| "step": 4690 |
| }, |
| { |
| "epoch": 0.17713790374250932, |
| "grad_norm": 1.6816541249040635, |
| "learning_rate": 9.820124177924939e-06, |
| "loss": 0.5428, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.17751479289940827, |
| "grad_norm": 1.3749122464689933, |
| "learning_rate": 9.818371454612664e-06, |
| "loss": 0.5341, |
| "step": 4710 |
| }, |
| { |
| "epoch": 0.17789168205630723, |
| "grad_norm": 1.426068903641529, |
| "learning_rate": 9.816610391268927e-06, |
| "loss": 0.4913, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.1782685712132062, |
| "grad_norm": 1.5924856232245717, |
| "learning_rate": 9.814840990941921e-06, |
| "loss": 0.541, |
| "step": 4730 |
| }, |
| { |
| "epoch": 0.17864546037010515, |
| "grad_norm": 1.4530573814344718, |
| "learning_rate": 9.813063256694268e-06, |
| "loss": 0.5088, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.1790223495270041, |
| "grad_norm": 1.8559077656986256, |
| "learning_rate": 9.811277191603018e-06, |
| "loss": 0.5432, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.17939923868390306, |
| "grad_norm": 1.668223036589219, |
| "learning_rate": 9.809482798759636e-06, |
| "loss": 0.5257, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.17977612784080202, |
| "grad_norm": 1.6197394995482917, |
| "learning_rate": 9.807680081270005e-06, |
| "loss": 0.5453, |
| "step": 4770 |
| }, |
| { |
| "epoch": 0.18015301699770098, |
| "grad_norm": 1.7676016632355371, |
| "learning_rate": 9.805869042254419e-06, |
| "loss": 0.5454, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.18052990615459993, |
| "grad_norm": 1.630694868729753, |
| "learning_rate": 9.804049684847566e-06, |
| "loss": 0.5327, |
| "step": 4790 |
| }, |
| { |
| "epoch": 0.1809067953114989, |
| "grad_norm": 1.7202050190497062, |
| "learning_rate": 9.802222012198543e-06, |
| "loss": 0.5609, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.18128368446839785, |
| "grad_norm": 1.6203695895840649, |
| "learning_rate": 9.800386027470836e-06, |
| "loss": 0.5196, |
| "step": 4810 |
| }, |
| { |
| "epoch": 0.1816605736252968, |
| "grad_norm": 1.7217972266536141, |
| "learning_rate": 9.798541733842315e-06, |
| "loss": 0.5357, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.18203746278219576, |
| "grad_norm": 1.653697053819612, |
| "learning_rate": 9.796689134505234e-06, |
| "loss": 0.5222, |
| "step": 4830 |
| }, |
| { |
| "epoch": 0.18241435193909472, |
| "grad_norm": 1.5596182552523383, |
| "learning_rate": 9.794828232666227e-06, |
| "loss": 0.5552, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.18279124109599368, |
| "grad_norm": 1.867472446965036, |
| "learning_rate": 9.79295903154629e-06, |
| "loss": 0.5339, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.18316813025289264, |
| "grad_norm": 1.6702383439481707, |
| "learning_rate": 9.791081534380796e-06, |
| "loss": 0.5151, |
| "step": 4860 |
| }, |
| { |
| "epoch": 0.1835450194097916, |
| "grad_norm": 1.3291743718971485, |
| "learning_rate": 9.789195744419463e-06, |
| "loss": 0.5607, |
| "step": 4870 |
| }, |
| { |
| "epoch": 0.18392190856669052, |
| "grad_norm": 1.5921907230443966, |
| "learning_rate": 9.787301664926376e-06, |
| "loss": 0.5339, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.18429879772358948, |
| "grad_norm": 1.8736268796358457, |
| "learning_rate": 9.78539929917996e-06, |
| "loss": 0.5302, |
| "step": 4890 |
| }, |
| { |
| "epoch": 0.18467568688048844, |
| "grad_norm": 1.8805034291822773, |
| "learning_rate": 9.783488650472988e-06, |
| "loss": 0.5445, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.1850525760373874, |
| "grad_norm": 1.7419614721480812, |
| "learning_rate": 9.781569722112564e-06, |
| "loss": 0.5428, |
| "step": 4910 |
| }, |
| { |
| "epoch": 0.18542946519428635, |
| "grad_norm": 1.9542910752412086, |
| "learning_rate": 9.779642517420129e-06, |
| "loss": 0.538, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.1858063543511853, |
| "grad_norm": 1.5668762657315158, |
| "learning_rate": 9.777707039731443e-06, |
| "loss": 0.5516, |
| "step": 4930 |
| }, |
| { |
| "epoch": 0.18618324350808427, |
| "grad_norm": 1.603854576231463, |
| "learning_rate": 9.775763292396591e-06, |
| "loss": 0.5535, |
| "step": 4940 |
| }, |
| { |
| "epoch": 0.18656013266498322, |
| "grad_norm": 1.675798754667643, |
| "learning_rate": 9.773811278779972e-06, |
| "loss": 0.524, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.18693702182188218, |
| "grad_norm": 2.348035421509291, |
| "learning_rate": 9.771851002260288e-06, |
| "loss": 0.5478, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.18731391097878114, |
| "grad_norm": 1.6072598485464797, |
| "learning_rate": 9.769882466230546e-06, |
| "loss": 0.5287, |
| "step": 4970 |
| }, |
| { |
| "epoch": 0.1876908001356801, |
| "grad_norm": 1.746528796476459, |
| "learning_rate": 9.767905674098051e-06, |
| "loss": 0.5475, |
| "step": 4980 |
| }, |
| { |
| "epoch": 0.18806768929257905, |
| "grad_norm": 1.386585738427929, |
| "learning_rate": 9.765920629284396e-06, |
| "loss": 0.5098, |
| "step": 4990 |
| }, |
| { |
| "epoch": 0.188444578449478, |
| "grad_norm": 1.8437041912808763, |
| "learning_rate": 9.763927335225458e-06, |
| "loss": 0.5286, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.18882146760637697, |
| "grad_norm": 1.9482474264160459, |
| "learning_rate": 9.761925795371394e-06, |
| "loss": 0.5442, |
| "step": 5010 |
| }, |
| { |
| "epoch": 0.18919835676327593, |
| "grad_norm": 1.7719952789842508, |
| "learning_rate": 9.759916013186633e-06, |
| "loss": 0.5461, |
| "step": 5020 |
| }, |
| { |
| "epoch": 0.18957524592017488, |
| "grad_norm": 1.6946989567888198, |
| "learning_rate": 9.757897992149868e-06, |
| "loss": 0.5798, |
| "step": 5030 |
| }, |
| { |
| "epoch": 0.18995213507707384, |
| "grad_norm": 1.4374415950854045, |
| "learning_rate": 9.755871735754058e-06, |
| "loss": 0.5406, |
| "step": 5040 |
| }, |
| { |
| "epoch": 0.1903290242339728, |
| "grad_norm": 1.6824526905803994, |
| "learning_rate": 9.753837247506415e-06, |
| "loss": 0.574, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.19070591339087176, |
| "grad_norm": 1.394126270724554, |
| "learning_rate": 9.751794530928394e-06, |
| "loss": 0.5327, |
| "step": 5060 |
| }, |
| { |
| "epoch": 0.1910828025477707, |
| "grad_norm": 1.8155055295556857, |
| "learning_rate": 9.749743589555696e-06, |
| "loss": 0.5551, |
| "step": 5070 |
| }, |
| { |
| "epoch": 0.19145969170466964, |
| "grad_norm": 1.6444154255861156, |
| "learning_rate": 9.747684426938259e-06, |
| "loss": 0.5324, |
| "step": 5080 |
| }, |
| { |
| "epoch": 0.1918365808615686, |
| "grad_norm": 1.693478657126227, |
| "learning_rate": 9.74561704664025e-06, |
| "loss": 0.5293, |
| "step": 5090 |
| }, |
| { |
| "epoch": 0.19221347001846756, |
| "grad_norm": 1.7991179686677223, |
| "learning_rate": 9.743541452240062e-06, |
| "loss": 0.5306, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.19259035917536652, |
| "grad_norm": 1.6938448201059806, |
| "learning_rate": 9.7414576473303e-06, |
| "loss": 0.5557, |
| "step": 5110 |
| }, |
| { |
| "epoch": 0.19296724833226547, |
| "grad_norm": 1.708193540793155, |
| "learning_rate": 9.739365635517786e-06, |
| "loss": 0.5527, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.19334413748916443, |
| "grad_norm": 1.623469387205484, |
| "learning_rate": 9.737265420423545e-06, |
| "loss": 0.5677, |
| "step": 5130 |
| }, |
| { |
| "epoch": 0.1937210266460634, |
| "grad_norm": 1.5336259453452732, |
| "learning_rate": 9.735157005682802e-06, |
| "loss": 0.5304, |
| "step": 5140 |
| }, |
| { |
| "epoch": 0.19409791580296235, |
| "grad_norm": 1.6029263301801304, |
| "learning_rate": 9.733040394944972e-06, |
| "loss": 0.5036, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.1944748049598613, |
| "grad_norm": 1.4027867586519995, |
| "learning_rate": 9.73091559187366e-06, |
| "loss": 0.5571, |
| "step": 5160 |
| }, |
| { |
| "epoch": 0.19485169411676026, |
| "grad_norm": 1.422018727809762, |
| "learning_rate": 9.728782600146646e-06, |
| "loss": 0.512, |
| "step": 5170 |
| }, |
| { |
| "epoch": 0.19522858327365922, |
| "grad_norm": 1.7086928414235798, |
| "learning_rate": 9.726641423455889e-06, |
| "loss": 0.5479, |
| "step": 5180 |
| }, |
| { |
| "epoch": 0.19560547243055817, |
| "grad_norm": 1.4865928775148851, |
| "learning_rate": 9.724492065507512e-06, |
| "loss": 0.5303, |
| "step": 5190 |
| }, |
| { |
| "epoch": 0.19598236158745713, |
| "grad_norm": 1.7652350174862295, |
| "learning_rate": 9.722334530021798e-06, |
| "loss": 0.568, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.1963592507443561, |
| "grad_norm": 1.6654751000696695, |
| "learning_rate": 9.720168820733189e-06, |
| "loss": 0.5425, |
| "step": 5210 |
| }, |
| { |
| "epoch": 0.19673613990125505, |
| "grad_norm": 1.6237026180449996, |
| "learning_rate": 9.717994941390269e-06, |
| "loss": 0.5256, |
| "step": 5220 |
| }, |
| { |
| "epoch": 0.197113029058154, |
| "grad_norm": 1.4809115306845473, |
| "learning_rate": 9.71581289575577e-06, |
| "loss": 0.5559, |
| "step": 5230 |
| }, |
| { |
| "epoch": 0.19748991821505296, |
| "grad_norm": 1.6736674947367551, |
| "learning_rate": 9.71362268760655e-06, |
| "loss": 0.5379, |
| "step": 5240 |
| }, |
| { |
| "epoch": 0.19786680737195192, |
| "grad_norm": 1.6771556923236743, |
| "learning_rate": 9.711424320733605e-06, |
| "loss": 0.5539, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.19824369652885088, |
| "grad_norm": 1.7283773691144386, |
| "learning_rate": 9.709217798942045e-06, |
| "loss": 0.5289, |
| "step": 5260 |
| }, |
| { |
| "epoch": 0.19862058568574983, |
| "grad_norm": 1.4438252244390533, |
| "learning_rate": 9.7070031260511e-06, |
| "loss": 0.5562, |
| "step": 5270 |
| }, |
| { |
| "epoch": 0.19899747484264876, |
| "grad_norm": 1.612252201951247, |
| "learning_rate": 9.704780305894107e-06, |
| "loss": 0.5508, |
| "step": 5280 |
| }, |
| { |
| "epoch": 0.19937436399954772, |
| "grad_norm": 1.520592481703519, |
| "learning_rate": 9.702549342318503e-06, |
| "loss": 0.522, |
| "step": 5290 |
| }, |
| { |
| "epoch": 0.19975125315644668, |
| "grad_norm": 1.6900037644390822, |
| "learning_rate": 9.700310239185824e-06, |
| "loss": 0.5495, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.20012814231334564, |
| "grad_norm": 1.709571109702106, |
| "learning_rate": 9.698063000371693e-06, |
| "loss": 0.5348, |
| "step": 5310 |
| }, |
| { |
| "epoch": 0.2005050314702446, |
| "grad_norm": 1.730178730299619, |
| "learning_rate": 9.695807629765815e-06, |
| "loss": 0.5141, |
| "step": 5320 |
| }, |
| { |
| "epoch": 0.20088192062714355, |
| "grad_norm": 1.6296835391691085, |
| "learning_rate": 9.69354413127197e-06, |
| "loss": 0.5361, |
| "step": 5330 |
| }, |
| { |
| "epoch": 0.2012588097840425, |
| "grad_norm": 1.6987810718130525, |
| "learning_rate": 9.691272508808006e-06, |
| "loss": 0.5452, |
| "step": 5340 |
| }, |
| { |
| "epoch": 0.20163569894094147, |
| "grad_norm": 1.650234117971004, |
| "learning_rate": 9.68899276630583e-06, |
| "loss": 0.5197, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.20201258809784042, |
| "grad_norm": 1.7189731868307763, |
| "learning_rate": 9.68670490771141e-06, |
| "loss": 0.5582, |
| "step": 5360 |
| }, |
| { |
| "epoch": 0.20238947725473938, |
| "grad_norm": 1.629642772885024, |
| "learning_rate": 9.68440893698476e-06, |
| "loss": 0.5467, |
| "step": 5370 |
| }, |
| { |
| "epoch": 0.20276636641163834, |
| "grad_norm": 1.714044057568933, |
| "learning_rate": 9.682104858099932e-06, |
| "loss": 0.5586, |
| "step": 5380 |
| }, |
| { |
| "epoch": 0.2031432555685373, |
| "grad_norm": 1.7776904536246745, |
| "learning_rate": 9.679792675045015e-06, |
| "loss": 0.5527, |
| "step": 5390 |
| }, |
| { |
| "epoch": 0.20352014472543625, |
| "grad_norm": 1.8482409209459776, |
| "learning_rate": 9.677472391822122e-06, |
| "loss": 0.5444, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.2038970338823352, |
| "grad_norm": 2.039065180414514, |
| "learning_rate": 9.675144012447393e-06, |
| "loss": 0.5495, |
| "step": 5410 |
| }, |
| { |
| "epoch": 0.20427392303923417, |
| "grad_norm": 1.6471101788933047, |
| "learning_rate": 9.672807540950976e-06, |
| "loss": 0.5304, |
| "step": 5420 |
| }, |
| { |
| "epoch": 0.20465081219613313, |
| "grad_norm": 1.7782838901225566, |
| "learning_rate": 9.670462981377024e-06, |
| "loss": 0.539, |
| "step": 5430 |
| }, |
| { |
| "epoch": 0.20502770135303208, |
| "grad_norm": 1.5599236676909736, |
| "learning_rate": 9.668110337783696e-06, |
| "loss": 0.5278, |
| "step": 5440 |
| }, |
| { |
| "epoch": 0.20540459050993104, |
| "grad_norm": 1.7551032509348412, |
| "learning_rate": 9.665749614243139e-06, |
| "loss": 0.5173, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.20578147966683, |
| "grad_norm": 1.5194286251590126, |
| "learning_rate": 9.663380814841487e-06, |
| "loss": 0.5209, |
| "step": 5460 |
| }, |
| { |
| "epoch": 0.20615836882372895, |
| "grad_norm": 1.9229370370271754, |
| "learning_rate": 9.66100394367885e-06, |
| "loss": 0.5366, |
| "step": 5470 |
| }, |
| { |
| "epoch": 0.20653525798062788, |
| "grad_norm": 1.6741605984890093, |
| "learning_rate": 9.658619004869314e-06, |
| "loss": 0.5479, |
| "step": 5480 |
| }, |
| { |
| "epoch": 0.20691214713752684, |
| "grad_norm": 1.5030012618627315, |
| "learning_rate": 9.656226002540923e-06, |
| "loss": 0.5151, |
| "step": 5490 |
| }, |
| { |
| "epoch": 0.2072890362944258, |
| "grad_norm": 1.8829102820108654, |
| "learning_rate": 9.653824940835683e-06, |
| "loss": 0.5398, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.20766592545132476, |
| "grad_norm": 1.7502902417744142, |
| "learning_rate": 9.651415823909547e-06, |
| "loss": 0.5356, |
| "step": 5510 |
| }, |
| { |
| "epoch": 0.20804281460822371, |
| "grad_norm": 1.8452401375252057, |
| "learning_rate": 9.648998655932414e-06, |
| "loss": 0.5392, |
| "step": 5520 |
| }, |
| { |
| "epoch": 0.20841970376512267, |
| "grad_norm": 1.6582272984116684, |
| "learning_rate": 9.646573441088112e-06, |
| "loss": 0.5128, |
| "step": 5530 |
| }, |
| { |
| "epoch": 0.20879659292202163, |
| "grad_norm": 1.5467497129190437, |
| "learning_rate": 9.644140183574407e-06, |
| "loss": 0.5551, |
| "step": 5540 |
| }, |
| { |
| "epoch": 0.2091734820789206, |
| "grad_norm": 1.9957256803945376, |
| "learning_rate": 9.641698887602973e-06, |
| "loss": 0.5658, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.20955037123581954, |
| "grad_norm": 1.6565250884502132, |
| "learning_rate": 9.63924955739941e-06, |
| "loss": 0.5103, |
| "step": 5560 |
| }, |
| { |
| "epoch": 0.2099272603927185, |
| "grad_norm": 1.730711290180862, |
| "learning_rate": 9.636792197203218e-06, |
| "loss": 0.5311, |
| "step": 5570 |
| }, |
| { |
| "epoch": 0.21030414954961746, |
| "grad_norm": 1.6469079302286813, |
| "learning_rate": 9.634326811267796e-06, |
| "loss": 0.558, |
| "step": 5580 |
| }, |
| { |
| "epoch": 0.21068103870651642, |
| "grad_norm": 2.1727142231298737, |
| "learning_rate": 9.631853403860437e-06, |
| "loss": 0.5467, |
| "step": 5590 |
| }, |
| { |
| "epoch": 0.21105792786341537, |
| "grad_norm": 1.7971977696668728, |
| "learning_rate": 9.629371979262314e-06, |
| "loss": 0.5043, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.21143481702031433, |
| "grad_norm": 10.80688874918979, |
| "learning_rate": 9.626882541768484e-06, |
| "loss": 0.5577, |
| "step": 5610 |
| }, |
| { |
| "epoch": 0.2118117061772133, |
| "grad_norm": 2.323690498795392, |
| "learning_rate": 9.624385095687865e-06, |
| "loss": 0.571, |
| "step": 5620 |
| }, |
| { |
| "epoch": 0.21218859533411225, |
| "grad_norm": 1.719967739675491, |
| "learning_rate": 9.621879645343245e-06, |
| "loss": 0.5411, |
| "step": 5630 |
| }, |
| { |
| "epoch": 0.2125654844910112, |
| "grad_norm": 1.532181810090769, |
| "learning_rate": 9.619366195071258e-06, |
| "loss": 0.5415, |
| "step": 5640 |
| }, |
| { |
| "epoch": 0.21294237364791016, |
| "grad_norm": 1.7442472684705252, |
| "learning_rate": 9.616844749222391e-06, |
| "loss": 0.555, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.21331926280480912, |
| "grad_norm": 1.8977510610017796, |
| "learning_rate": 9.614315312160972e-06, |
| "loss": 0.5259, |
| "step": 5660 |
| }, |
| { |
| "epoch": 0.21369615196170808, |
| "grad_norm": 1.5012868113061877, |
| "learning_rate": 9.611777888265153e-06, |
| "loss": 0.5602, |
| "step": 5670 |
| }, |
| { |
| "epoch": 0.214073041118607, |
| "grad_norm": 1.7027261474156072, |
| "learning_rate": 9.609232481926917e-06, |
| "loss": 0.4989, |
| "step": 5680 |
| }, |
| { |
| "epoch": 0.21444993027550596, |
| "grad_norm": 1.6913027886629168, |
| "learning_rate": 9.606679097552061e-06, |
| "loss": 0.5414, |
| "step": 5690 |
| }, |
| { |
| "epoch": 0.21482681943240492, |
| "grad_norm": 1.7822692679518921, |
| "learning_rate": 9.604117739560192e-06, |
| "loss": 0.5307, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.21520370858930388, |
| "grad_norm": 1.7521449233376833, |
| "learning_rate": 9.601548412384718e-06, |
| "loss": 0.5366, |
| "step": 5710 |
| }, |
| { |
| "epoch": 0.21558059774620283, |
| "grad_norm": 1.4927823655216998, |
| "learning_rate": 9.59897112047284e-06, |
| "loss": 0.5457, |
| "step": 5720 |
| }, |
| { |
| "epoch": 0.2159574869031018, |
| "grad_norm": 1.5719965609870012, |
| "learning_rate": 9.596385868285547e-06, |
| "loss": 0.5744, |
| "step": 5730 |
| }, |
| { |
| "epoch": 0.21633437606000075, |
| "grad_norm": 1.6228135876923027, |
| "learning_rate": 9.593792660297603e-06, |
| "loss": 0.5361, |
| "step": 5740 |
| }, |
| { |
| "epoch": 0.2167112652168997, |
| "grad_norm": 1.869501122929772, |
| "learning_rate": 9.591191500997545e-06, |
| "loss": 0.5824, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.21708815437379866, |
| "grad_norm": 1.6116057455831496, |
| "learning_rate": 9.588582394887674e-06, |
| "loss": 0.5208, |
| "step": 5760 |
| }, |
| { |
| "epoch": 0.21746504353069762, |
| "grad_norm": 1.7598646289067172, |
| "learning_rate": 9.58596534648404e-06, |
| "loss": 0.5172, |
| "step": 5770 |
| }, |
| { |
| "epoch": 0.21784193268759658, |
| "grad_norm": 1.459229389644524, |
| "learning_rate": 9.583340360316452e-06, |
| "loss": 0.5247, |
| "step": 5780 |
| }, |
| { |
| "epoch": 0.21821882184449554, |
| "grad_norm": 1.3090949493088406, |
| "learning_rate": 9.580707440928442e-06, |
| "loss": 0.5795, |
| "step": 5790 |
| }, |
| { |
| "epoch": 0.2185957110013945, |
| "grad_norm": 1.6277194603630043, |
| "learning_rate": 9.578066592877289e-06, |
| "loss": 0.5192, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.21897260015829345, |
| "grad_norm": 1.3790098723023494, |
| "learning_rate": 9.575417820733985e-06, |
| "loss": 0.5348, |
| "step": 5810 |
| }, |
| { |
| "epoch": 0.2193494893151924, |
| "grad_norm": 1.3968735415508085, |
| "learning_rate": 9.572761129083245e-06, |
| "loss": 0.512, |
| "step": 5820 |
| }, |
| { |
| "epoch": 0.21972637847209137, |
| "grad_norm": 1.7812282571098073, |
| "learning_rate": 9.570096522523484e-06, |
| "loss": 0.543, |
| "step": 5830 |
| }, |
| { |
| "epoch": 0.22010326762899032, |
| "grad_norm": 1.834347893716607, |
| "learning_rate": 9.567424005666825e-06, |
| "loss": 0.5329, |
| "step": 5840 |
| }, |
| { |
| "epoch": 0.22048015678588928, |
| "grad_norm": 1.8129108228485702, |
| "learning_rate": 9.564743583139076e-06, |
| "loss": 0.5384, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.22085704594278824, |
| "grad_norm": 1.6223887647007407, |
| "learning_rate": 9.562055259579731e-06, |
| "loss": 0.5268, |
| "step": 5860 |
| }, |
| { |
| "epoch": 0.2212339350996872, |
| "grad_norm": 1.606954920038498, |
| "learning_rate": 9.559359039641962e-06, |
| "loss": 0.5414, |
| "step": 5870 |
| }, |
| { |
| "epoch": 0.22161082425658613, |
| "grad_norm": 1.629843365300857, |
| "learning_rate": 9.556654927992609e-06, |
| "loss": 0.5242, |
| "step": 5880 |
| }, |
| { |
| "epoch": 0.22198771341348508, |
| "grad_norm": 1.7527089646543905, |
| "learning_rate": 9.553942929312166e-06, |
| "loss": 0.5272, |
| "step": 5890 |
| }, |
| { |
| "epoch": 0.22236460257038404, |
| "grad_norm": 1.6581171602617495, |
| "learning_rate": 9.551223048294785e-06, |
| "loss": 0.5116, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.222741491727283, |
| "grad_norm": 2.235545487251775, |
| "learning_rate": 9.548495289648254e-06, |
| "loss": 0.5385, |
| "step": 5910 |
| }, |
| { |
| "epoch": 0.22311838088418195, |
| "grad_norm": 2.0034168319325243, |
| "learning_rate": 9.545759658094007e-06, |
| "loss": 0.5557, |
| "step": 5920 |
| }, |
| { |
| "epoch": 0.2234952700410809, |
| "grad_norm": 1.4966777192122558, |
| "learning_rate": 9.543016158367093e-06, |
| "loss": 0.5405, |
| "step": 5930 |
| }, |
| { |
| "epoch": 0.22387215919797987, |
| "grad_norm": 1.7766848501696213, |
| "learning_rate": 9.540264795216192e-06, |
| "loss": 0.5104, |
| "step": 5940 |
| }, |
| { |
| "epoch": 0.22424904835487883, |
| "grad_norm": 1.6670384095637465, |
| "learning_rate": 9.537505573403582e-06, |
| "loss": 0.5436, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.22462593751177778, |
| "grad_norm": 1.5467554161425514, |
| "learning_rate": 9.534738497705153e-06, |
| "loss": 0.557, |
| "step": 5960 |
| }, |
| { |
| "epoch": 0.22500282666867674, |
| "grad_norm": 1.7813974332712823, |
| "learning_rate": 9.531963572910388e-06, |
| "loss": 0.559, |
| "step": 5970 |
| }, |
| { |
| "epoch": 0.2253797158255757, |
| "grad_norm": 1.7053744486978277, |
| "learning_rate": 9.529180803822351e-06, |
| "loss": 0.5489, |
| "step": 5980 |
| }, |
| { |
| "epoch": 0.22575660498247466, |
| "grad_norm": 1.243354982475666, |
| "learning_rate": 9.526390195257686e-06, |
| "loss": 0.4922, |
| "step": 5990 |
| }, |
| { |
| "epoch": 0.22613349413937361, |
| "grad_norm": 1.8099541389369207, |
| "learning_rate": 9.523591752046608e-06, |
| "loss": 0.5443, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.22651038329627257, |
| "grad_norm": 1.6063302471417757, |
| "learning_rate": 9.52078547903289e-06, |
| "loss": 0.519, |
| "step": 6010 |
| }, |
| { |
| "epoch": 0.22688727245317153, |
| "grad_norm": 1.480600706868146, |
| "learning_rate": 9.51797138107386e-06, |
| "loss": 0.516, |
| "step": 6020 |
| }, |
| { |
| "epoch": 0.2272641616100705, |
| "grad_norm": 1.4421735699209184, |
| "learning_rate": 9.51514946304039e-06, |
| "loss": 0.5507, |
| "step": 6030 |
| }, |
| { |
| "epoch": 0.22764105076696944, |
| "grad_norm": 1.6185750934964662, |
| "learning_rate": 9.512319729816886e-06, |
| "loss": 0.5155, |
| "step": 6040 |
| }, |
| { |
| "epoch": 0.2280179399238684, |
| "grad_norm": 1.6957212178717491, |
| "learning_rate": 9.50948218630128e-06, |
| "loss": 0.5272, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.22839482908076736, |
| "grad_norm": 1.7169594191615623, |
| "learning_rate": 9.506636837405025e-06, |
| "loss": 0.5314, |
| "step": 6060 |
| }, |
| { |
| "epoch": 0.22877171823766632, |
| "grad_norm": 1.5477125080399536, |
| "learning_rate": 9.503783688053085e-06, |
| "loss": 0.5431, |
| "step": 6070 |
| }, |
| { |
| "epoch": 0.22914860739456525, |
| "grad_norm": 1.6785046483352886, |
| "learning_rate": 9.500922743183922e-06, |
| "loss": 0.5126, |
| "step": 6080 |
| }, |
| { |
| "epoch": 0.2295254965514642, |
| "grad_norm": 1.890254359102995, |
| "learning_rate": 9.498054007749498e-06, |
| "loss": 0.5337, |
| "step": 6090 |
| }, |
| { |
| "epoch": 0.22990238570836316, |
| "grad_norm": 1.643088361181434, |
| "learning_rate": 9.495177486715247e-06, |
| "loss": 0.5793, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.23027927486526212, |
| "grad_norm": 1.735778264496379, |
| "learning_rate": 9.492293185060095e-06, |
| "loss": 0.5551, |
| "step": 6110 |
| }, |
| { |
| "epoch": 0.23065616402216108, |
| "grad_norm": 1.4390586582782128, |
| "learning_rate": 9.489401107776425e-06, |
| "loss": 0.5262, |
| "step": 6120 |
| }, |
| { |
| "epoch": 0.23103305317906003, |
| "grad_norm": 1.8374375522238977, |
| "learning_rate": 9.486501259870078e-06, |
| "loss": 0.5526, |
| "step": 6130 |
| }, |
| { |
| "epoch": 0.231409942335959, |
| "grad_norm": 1.279671922211445, |
| "learning_rate": 9.48359364636035e-06, |
| "loss": 0.5341, |
| "step": 6140 |
| }, |
| { |
| "epoch": 0.23178683149285795, |
| "grad_norm": 1.628189415008674, |
| "learning_rate": 9.480678272279976e-06, |
| "loss": 0.5088, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.2321637206497569, |
| "grad_norm": 1.6997013770455123, |
| "learning_rate": 9.477755142675125e-06, |
| "loss": 0.5629, |
| "step": 6160 |
| }, |
| { |
| "epoch": 0.23254060980665586, |
| "grad_norm": 3.5623010997302944, |
| "learning_rate": 9.474824262605386e-06, |
| "loss": 0.5488, |
| "step": 6170 |
| }, |
| { |
| "epoch": 0.23291749896355482, |
| "grad_norm": 1.7466850826897597, |
| "learning_rate": 9.47188563714377e-06, |
| "loss": 0.534, |
| "step": 6180 |
| }, |
| { |
| "epoch": 0.23329438812045378, |
| "grad_norm": 1.6695888230856442, |
| "learning_rate": 9.468939271376688e-06, |
| "loss": 0.5062, |
| "step": 6190 |
| }, |
| { |
| "epoch": 0.23367127727735273, |
| "grad_norm": 1.835339155823516, |
| "learning_rate": 9.46598517040395e-06, |
| "loss": 0.5274, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.2340481664342517, |
| "grad_norm": 1.6711258470843053, |
| "learning_rate": 9.463023339338758e-06, |
| "loss": 0.5395, |
| "step": 6210 |
| }, |
| { |
| "epoch": 0.23442505559115065, |
| "grad_norm": 1.7881856636235314, |
| "learning_rate": 9.46005378330769e-06, |
| "loss": 0.546, |
| "step": 6220 |
| }, |
| { |
| "epoch": 0.2348019447480496, |
| "grad_norm": 1.6790671962686543, |
| "learning_rate": 9.457076507450697e-06, |
| "loss": 0.5061, |
| "step": 6230 |
| }, |
| { |
| "epoch": 0.23517883390494856, |
| "grad_norm": 1.3732357055205477, |
| "learning_rate": 9.45409151692109e-06, |
| "loss": 0.521, |
| "step": 6240 |
| }, |
| { |
| "epoch": 0.23555572306184752, |
| "grad_norm": 1.7725150216025622, |
| "learning_rate": 9.451098816885538e-06, |
| "loss": 0.5429, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.23593261221874648, |
| "grad_norm": 1.714678055331704, |
| "learning_rate": 9.448098412524047e-06, |
| "loss": 0.5266, |
| "step": 6260 |
| }, |
| { |
| "epoch": 0.2363095013756454, |
| "grad_norm": 1.8750459001067499, |
| "learning_rate": 9.445090309029965e-06, |
| "loss": 0.5624, |
| "step": 6270 |
| }, |
| { |
| "epoch": 0.23668639053254437, |
| "grad_norm": 1.7380553727353028, |
| "learning_rate": 9.442074511609965e-06, |
| "loss": 0.5223, |
| "step": 6280 |
| }, |
| { |
| "epoch": 0.23706327968944332, |
| "grad_norm": 1.6267385236512102, |
| "learning_rate": 9.439051025484032e-06, |
| "loss": 0.5341, |
| "step": 6290 |
| }, |
| { |
| "epoch": 0.23744016884634228, |
| "grad_norm": 1.831598701186007, |
| "learning_rate": 9.436019855885466e-06, |
| "loss": 0.5406, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.23781705800324124, |
| "grad_norm": 2.6302827853025486, |
| "learning_rate": 9.432981008060861e-06, |
| "loss": 0.5331, |
| "step": 6310 |
| }, |
| { |
| "epoch": 0.2381939471601402, |
| "grad_norm": 1.2440710954030016, |
| "learning_rate": 9.429934487270105e-06, |
| "loss": 0.5129, |
| "step": 6320 |
| }, |
| { |
| "epoch": 0.23857083631703915, |
| "grad_norm": 1.637294830817524, |
| "learning_rate": 9.426880298786366e-06, |
| "loss": 0.539, |
| "step": 6330 |
| }, |
| { |
| "epoch": 0.2389477254739381, |
| "grad_norm": 1.446625909816762, |
| "learning_rate": 9.423818447896081e-06, |
| "loss": 0.505, |
| "step": 6340 |
| }, |
| { |
| "epoch": 0.23932461463083707, |
| "grad_norm": 2.0064803572085936, |
| "learning_rate": 9.420748939898955e-06, |
| "loss": 0.5741, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.23970150378773603, |
| "grad_norm": 1.9422133263523575, |
| "learning_rate": 9.417671780107941e-06, |
| "loss": 0.5398, |
| "step": 6360 |
| }, |
| { |
| "epoch": 0.24007839294463498, |
| "grad_norm": 1.7232411604328788, |
| "learning_rate": 9.414586973849241e-06, |
| "loss": 0.5293, |
| "step": 6370 |
| }, |
| { |
| "epoch": 0.24045528210153394, |
| "grad_norm": 1.6201469087371054, |
| "learning_rate": 9.411494526462286e-06, |
| "loss": 0.5269, |
| "step": 6380 |
| }, |
| { |
| "epoch": 0.2408321712584329, |
| "grad_norm": 1.70158816865489, |
| "learning_rate": 9.408394443299743e-06, |
| "loss": 0.5716, |
| "step": 6390 |
| }, |
| { |
| "epoch": 0.24120906041533186, |
| "grad_norm": 1.837108473006738, |
| "learning_rate": 9.405286729727483e-06, |
| "loss": 0.5208, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.2415859495722308, |
| "grad_norm": 1.7073086270634739, |
| "learning_rate": 9.402171391124597e-06, |
| "loss": 0.5428, |
| "step": 6410 |
| }, |
| { |
| "epoch": 0.24196283872912977, |
| "grad_norm": 3.051981097028862, |
| "learning_rate": 9.399048432883363e-06, |
| "loss": 0.5158, |
| "step": 6420 |
| }, |
| { |
| "epoch": 0.24233972788602873, |
| "grad_norm": 1.7864500392491454, |
| "learning_rate": 9.395917860409255e-06, |
| "loss": 0.5241, |
| "step": 6430 |
| }, |
| { |
| "epoch": 0.24271661704292768, |
| "grad_norm": 1.5968823208409386, |
| "learning_rate": 9.392779679120924e-06, |
| "loss": 0.5214, |
| "step": 6440 |
| }, |
| { |
| "epoch": 0.24309350619982664, |
| "grad_norm": 1.5815922668690165, |
| "learning_rate": 9.38963389445019e-06, |
| "loss": 0.5503, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.2434703953567256, |
| "grad_norm": 1.6823529848515881, |
| "learning_rate": 9.386480511842035e-06, |
| "loss": 0.5109, |
| "step": 6460 |
| }, |
| { |
| "epoch": 0.24384728451362453, |
| "grad_norm": 1.7524818808137763, |
| "learning_rate": 9.38331953675459e-06, |
| "loss": 0.5373, |
| "step": 6470 |
| }, |
| { |
| "epoch": 0.2442241736705235, |
| "grad_norm": 1.5806519372358918, |
| "learning_rate": 9.380150974659132e-06, |
| "loss": 0.5259, |
| "step": 6480 |
| }, |
| { |
| "epoch": 0.24460106282742244, |
| "grad_norm": 1.7916689002740533, |
| "learning_rate": 9.376974831040066e-06, |
| "loss": 0.5658, |
| "step": 6490 |
| }, |
| { |
| "epoch": 0.2449779519843214, |
| "grad_norm": 1.7760513276744359, |
| "learning_rate": 9.373791111394921e-06, |
| "loss": 0.5571, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.24535484114122036, |
| "grad_norm": 1.531656356531642, |
| "learning_rate": 9.37059982123434e-06, |
| "loss": 0.5324, |
| "step": 6510 |
| }, |
| { |
| "epoch": 0.24573173029811932, |
| "grad_norm": 1.7434004811232835, |
| "learning_rate": 9.367400966082067e-06, |
| "loss": 0.5402, |
| "step": 6520 |
| }, |
| { |
| "epoch": 0.24610861945501827, |
| "grad_norm": 1.9091316224501476, |
| "learning_rate": 9.364194551474947e-06, |
| "loss": 0.5314, |
| "step": 6530 |
| }, |
| { |
| "epoch": 0.24648550861191723, |
| "grad_norm": 1.4637280981673166, |
| "learning_rate": 9.3609805829629e-06, |
| "loss": 0.5214, |
| "step": 6540 |
| }, |
| { |
| "epoch": 0.2468623977688162, |
| "grad_norm": 1.6182959856878305, |
| "learning_rate": 9.357759066108928e-06, |
| "loss": 0.5547, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.24723928692571515, |
| "grad_norm": 1.9292790559109196, |
| "learning_rate": 9.354530006489093e-06, |
| "loss": 0.5423, |
| "step": 6560 |
| }, |
| { |
| "epoch": 0.2476161760826141, |
| "grad_norm": 2.1285426080176415, |
| "learning_rate": 9.351293409692519e-06, |
| "loss": 0.5643, |
| "step": 6570 |
| }, |
| { |
| "epoch": 0.24799306523951306, |
| "grad_norm": 1.8555701825811315, |
| "learning_rate": 9.34804928132137e-06, |
| "loss": 0.484, |
| "step": 6580 |
| }, |
| { |
| "epoch": 0.24836995439641202, |
| "grad_norm": 1.933502698125089, |
| "learning_rate": 9.344797626990851e-06, |
| "loss": 0.532, |
| "step": 6590 |
| }, |
| { |
| "epoch": 0.24874684355331098, |
| "grad_norm": 1.719405207009378, |
| "learning_rate": 9.341538452329191e-06, |
| "loss": 0.5411, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.24912373271020993, |
| "grad_norm": 1.6862541493581185, |
| "learning_rate": 9.338271762977633e-06, |
| "loss": 0.5173, |
| "step": 6610 |
| }, |
| { |
| "epoch": 0.2495006218671089, |
| "grad_norm": 1.5377963927275895, |
| "learning_rate": 9.334997564590434e-06, |
| "loss": 0.5156, |
| "step": 6620 |
| }, |
| { |
| "epoch": 0.24987751102400785, |
| "grad_norm": 1.677774421001133, |
| "learning_rate": 9.331715862834842e-06, |
| "loss": 0.5083, |
| "step": 6630 |
| }, |
| { |
| "epoch": 0.2502544001809068, |
| "grad_norm": 1.5490491612940884, |
| "learning_rate": 9.328426663391096e-06, |
| "loss": 0.5145, |
| "step": 6640 |
| }, |
| { |
| "epoch": 0.25063128933780576, |
| "grad_norm": 1.5642521485987577, |
| "learning_rate": 9.325129971952412e-06, |
| "loss": 0.5261, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.2510081784947047, |
| "grad_norm": 1.6706919162023979, |
| "learning_rate": 9.32182579422497e-06, |
| "loss": 0.529, |
| "step": 6660 |
| }, |
| { |
| "epoch": 0.2513850676516037, |
| "grad_norm": 1.5818205221721195, |
| "learning_rate": 9.318514135927916e-06, |
| "loss": 0.5369, |
| "step": 6670 |
| }, |
| { |
| "epoch": 0.2517619568085026, |
| "grad_norm": 2.0562318160924495, |
| "learning_rate": 9.315195002793335e-06, |
| "loss": 0.5234, |
| "step": 6680 |
| }, |
| { |
| "epoch": 0.2521388459654016, |
| "grad_norm": 1.67745815477172, |
| "learning_rate": 9.311868400566255e-06, |
| "loss": 0.5211, |
| "step": 6690 |
| }, |
| { |
| "epoch": 0.2525157351223005, |
| "grad_norm": 1.6591773189380454, |
| "learning_rate": 9.308534335004633e-06, |
| "loss": 0.5539, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.2528926242791995, |
| "grad_norm": 1.4697051603169902, |
| "learning_rate": 9.305192811879342e-06, |
| "loss": 0.5181, |
| "step": 6710 |
| }, |
| { |
| "epoch": 0.25326951343609844, |
| "grad_norm": 1.6681508210359333, |
| "learning_rate": 9.301843836974162e-06, |
| "loss": 0.5602, |
| "step": 6720 |
| }, |
| { |
| "epoch": 0.2536464025929974, |
| "grad_norm": 2.3108255647817724, |
| "learning_rate": 9.298487416085774e-06, |
| "loss": 0.5067, |
| "step": 6730 |
| }, |
| { |
| "epoch": 0.25402329174989635, |
| "grad_norm": 1.7726965314764336, |
| "learning_rate": 9.295123555023746e-06, |
| "loss": 0.5397, |
| "step": 6740 |
| }, |
| { |
| "epoch": 0.25440018090679534, |
| "grad_norm": 1.6529476204478943, |
| "learning_rate": 9.291752259610521e-06, |
| "loss": 0.5437, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.25477707006369427, |
| "grad_norm": 1.623414078675652, |
| "learning_rate": 9.288373535681417e-06, |
| "loss": 0.5206, |
| "step": 6760 |
| }, |
| { |
| "epoch": 0.2551539592205932, |
| "grad_norm": 1.4719649974173148, |
| "learning_rate": 9.284987389084602e-06, |
| "loss": 0.569, |
| "step": 6770 |
| }, |
| { |
| "epoch": 0.2555308483774922, |
| "grad_norm": 1.625628891396421, |
| "learning_rate": 9.281593825681102e-06, |
| "loss": 0.5398, |
| "step": 6780 |
| }, |
| { |
| "epoch": 0.2559077375343911, |
| "grad_norm": 1.792916883420293, |
| "learning_rate": 9.278192851344765e-06, |
| "loss": 0.5551, |
| "step": 6790 |
| }, |
| { |
| "epoch": 0.2562846266912901, |
| "grad_norm": 2.156619467272627, |
| "learning_rate": 9.274784471962283e-06, |
| "loss": 0.4974, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.256661515848189, |
| "grad_norm": 1.6687247274318509, |
| "learning_rate": 9.271368693433153e-06, |
| "loss": 0.5422, |
| "step": 6810 |
| }, |
| { |
| "epoch": 0.257038405005088, |
| "grad_norm": 1.7150335026062424, |
| "learning_rate": 9.267945521669687e-06, |
| "loss": 0.5179, |
| "step": 6820 |
| }, |
| { |
| "epoch": 0.25741529416198694, |
| "grad_norm": 1.5067882971197635, |
| "learning_rate": 9.264514962596989e-06, |
| "loss": 0.5207, |
| "step": 6830 |
| }, |
| { |
| "epoch": 0.2577921833188859, |
| "grad_norm": 2.1289911856188657, |
| "learning_rate": 9.261077022152953e-06, |
| "loss": 0.5437, |
| "step": 6840 |
| }, |
| { |
| "epoch": 0.25816907247578486, |
| "grad_norm": 1.9539761400644868, |
| "learning_rate": 9.257631706288246e-06, |
| "loss": 0.5517, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.25854596163268384, |
| "grad_norm": 1.9497689876502742, |
| "learning_rate": 9.254179020966303e-06, |
| "loss": 0.5288, |
| "step": 6860 |
| }, |
| { |
| "epoch": 0.25892285078958277, |
| "grad_norm": 1.5902800114741822, |
| "learning_rate": 9.250718972163312e-06, |
| "loss": 0.5244, |
| "step": 6870 |
| }, |
| { |
| "epoch": 0.25929973994648176, |
| "grad_norm": 1.7856883546213593, |
| "learning_rate": 9.247251565868214e-06, |
| "loss": 0.5054, |
| "step": 6880 |
| }, |
| { |
| "epoch": 0.2596766291033807, |
| "grad_norm": 1.5741551964446796, |
| "learning_rate": 9.243776808082675e-06, |
| "loss": 0.5371, |
| "step": 6890 |
| }, |
| { |
| "epoch": 0.26005351826027967, |
| "grad_norm": 1.648666621240082, |
| "learning_rate": 9.240294704821091e-06, |
| "loss": 0.5355, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.2604304074171786, |
| "grad_norm": 1.9842160004462486, |
| "learning_rate": 9.236805262110571e-06, |
| "loss": 0.531, |
| "step": 6910 |
| }, |
| { |
| "epoch": 0.2608072965740776, |
| "grad_norm": 1.6690746816867539, |
| "learning_rate": 9.233308485990929e-06, |
| "loss": 0.5199, |
| "step": 6920 |
| }, |
| { |
| "epoch": 0.2611841857309765, |
| "grad_norm": 2.000871754392917, |
| "learning_rate": 9.229804382514668e-06, |
| "loss": 0.5287, |
| "step": 6930 |
| }, |
| { |
| "epoch": 0.2615610748878755, |
| "grad_norm": 1.6694534259040406, |
| "learning_rate": 9.226292957746982e-06, |
| "loss": 0.5063, |
| "step": 6940 |
| }, |
| { |
| "epoch": 0.26193796404477443, |
| "grad_norm": 1.7676982056441022, |
| "learning_rate": 9.222774217765728e-06, |
| "loss": 0.542, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.2623148532016734, |
| "grad_norm": 1.4980337618111739, |
| "learning_rate": 9.21924816866143e-06, |
| "loss": 0.5138, |
| "step": 6960 |
| }, |
| { |
| "epoch": 0.26269174235857234, |
| "grad_norm": 1.5116056583689286, |
| "learning_rate": 9.215714816537265e-06, |
| "loss": 0.5265, |
| "step": 6970 |
| }, |
| { |
| "epoch": 0.2630686315154713, |
| "grad_norm": 1.4865966539266282, |
| "learning_rate": 9.212174167509044e-06, |
| "loss": 0.5403, |
| "step": 6980 |
| }, |
| { |
| "epoch": 0.26344552067237026, |
| "grad_norm": 1.6837642502881358, |
| "learning_rate": 9.208626227705212e-06, |
| "loss": 0.5434, |
| "step": 6990 |
| }, |
| { |
| "epoch": 0.2638224098292692, |
| "grad_norm": 1.7206211694739486, |
| "learning_rate": 9.205071003266838e-06, |
| "loss": 0.5293, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.2641992989861682, |
| "grad_norm": 1.5568568822181916, |
| "learning_rate": 9.201508500347592e-06, |
| "loss": 0.539, |
| "step": 7010 |
| }, |
| { |
| "epoch": 0.2645761881430671, |
| "grad_norm": 2.515135490034705, |
| "learning_rate": 9.197938725113745e-06, |
| "loss": 0.5273, |
| "step": 7020 |
| }, |
| { |
| "epoch": 0.2649530772999661, |
| "grad_norm": 1.7780698740100387, |
| "learning_rate": 9.194361683744156e-06, |
| "loss": 0.5249, |
| "step": 7030 |
| }, |
| { |
| "epoch": 0.265329966456865, |
| "grad_norm": 1.9741428022356151, |
| "learning_rate": 9.190777382430262e-06, |
| "loss": 0.5067, |
| "step": 7040 |
| }, |
| { |
| "epoch": 0.265706855613764, |
| "grad_norm": 1.5854337997969816, |
| "learning_rate": 9.187185827376065e-06, |
| "loss": 0.5162, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.26608374477066293, |
| "grad_norm": 1.8032750599953304, |
| "learning_rate": 9.183587024798122e-06, |
| "loss": 0.5337, |
| "step": 7060 |
| }, |
| { |
| "epoch": 0.2664606339275619, |
| "grad_norm": 1.6853602175583824, |
| "learning_rate": 9.179980980925533e-06, |
| "loss": 0.5619, |
| "step": 7070 |
| }, |
| { |
| "epoch": 0.26683752308446085, |
| "grad_norm": 1.668776536624248, |
| "learning_rate": 9.176367701999936e-06, |
| "loss": 0.5306, |
| "step": 7080 |
| }, |
| { |
| "epoch": 0.26721441224135983, |
| "grad_norm": 1.8011152218189725, |
| "learning_rate": 9.172747194275492e-06, |
| "loss": 0.5421, |
| "step": 7090 |
| }, |
| { |
| "epoch": 0.26759130139825876, |
| "grad_norm": 1.8708330614213387, |
| "learning_rate": 9.169119464018865e-06, |
| "loss": 0.5326, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.26796819055515775, |
| "grad_norm": 1.6632424343194352, |
| "learning_rate": 9.165484517509231e-06, |
| "loss": 0.5538, |
| "step": 7110 |
| }, |
| { |
| "epoch": 0.2683450797120567, |
| "grad_norm": 1.467226646436778, |
| "learning_rate": 9.161842361038255e-06, |
| "loss": 0.5068, |
| "step": 7120 |
| }, |
| { |
| "epoch": 0.26872196886895566, |
| "grad_norm": 1.6200121333407522, |
| "learning_rate": 9.158193000910078e-06, |
| "loss": 0.5388, |
| "step": 7130 |
| }, |
| { |
| "epoch": 0.2690988580258546, |
| "grad_norm": 1.7014337626263312, |
| "learning_rate": 9.15453644344131e-06, |
| "loss": 0.5293, |
| "step": 7140 |
| }, |
| { |
| "epoch": 0.2694757471827536, |
| "grad_norm": 1.7595131636999561, |
| "learning_rate": 9.15087269496102e-06, |
| "loss": 0.5448, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.2698526363396525, |
| "grad_norm": 1.8761488565801718, |
| "learning_rate": 9.147201761810722e-06, |
| "loss": 0.53, |
| "step": 7160 |
| }, |
| { |
| "epoch": 0.27022952549655144, |
| "grad_norm": 1.5345557406874837, |
| "learning_rate": 9.143523650344373e-06, |
| "loss": 0.5328, |
| "step": 7170 |
| }, |
| { |
| "epoch": 0.2706064146534504, |
| "grad_norm": 1.6268714379432996, |
| "learning_rate": 9.139838366928341e-06, |
| "loss": 0.5676, |
| "step": 7180 |
| }, |
| { |
| "epoch": 0.27098330381034935, |
| "grad_norm": 1.5406506948214973, |
| "learning_rate": 9.136145917941423e-06, |
| "loss": 0.5199, |
| "step": 7190 |
| }, |
| { |
| "epoch": 0.27136019296724834, |
| "grad_norm": 1.6991584106421929, |
| "learning_rate": 9.13244630977481e-06, |
| "loss": 0.5249, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.27173708212414727, |
| "grad_norm": 1.778739550927463, |
| "learning_rate": 9.128739548832084e-06, |
| "loss": 0.5493, |
| "step": 7210 |
| }, |
| { |
| "epoch": 0.27211397128104625, |
| "grad_norm": 1.5672069369770456, |
| "learning_rate": 9.125025641529212e-06, |
| "loss": 0.548, |
| "step": 7220 |
| }, |
| { |
| "epoch": 0.2724908604379452, |
| "grad_norm": 1.849271034046483, |
| "learning_rate": 9.121304594294526e-06, |
| "loss": 0.5069, |
| "step": 7230 |
| }, |
| { |
| "epoch": 0.27286774959484417, |
| "grad_norm": 1.6198818202816212, |
| "learning_rate": 9.117576413568726e-06, |
| "loss": 0.5491, |
| "step": 7240 |
| }, |
| { |
| "epoch": 0.2732446387517431, |
| "grad_norm": 1.7505219289954272, |
| "learning_rate": 9.113841105804843e-06, |
| "loss": 0.5331, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.2736215279086421, |
| "grad_norm": 1.5456684152965627, |
| "learning_rate": 9.110098677468258e-06, |
| "loss": 0.5304, |
| "step": 7260 |
| }, |
| { |
| "epoch": 0.273998417065541, |
| "grad_norm": 1.4875862331167544, |
| "learning_rate": 9.106349135036673e-06, |
| "loss": 0.5268, |
| "step": 7270 |
| }, |
| { |
| "epoch": 0.27437530622244, |
| "grad_norm": 1.7648342778101611, |
| "learning_rate": 9.102592485000101e-06, |
| "loss": 0.541, |
| "step": 7280 |
| }, |
| { |
| "epoch": 0.2747521953793389, |
| "grad_norm": 1.585590877192049, |
| "learning_rate": 9.09882873386086e-06, |
| "loss": 0.5159, |
| "step": 7290 |
| }, |
| { |
| "epoch": 0.2751290845362379, |
| "grad_norm": 1.8789850010967337, |
| "learning_rate": 9.095057888133557e-06, |
| "loss": 0.526, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.27550597369313684, |
| "grad_norm": 1.6350234581153913, |
| "learning_rate": 9.09127995434508e-06, |
| "loss": 0.5094, |
| "step": 7310 |
| }, |
| { |
| "epoch": 0.2758828628500358, |
| "grad_norm": 1.7747303027226409, |
| "learning_rate": 9.087494939034589e-06, |
| "loss": 0.5223, |
| "step": 7320 |
| }, |
| { |
| "epoch": 0.27625975200693476, |
| "grad_norm": 1.6705228626037893, |
| "learning_rate": 9.083702848753496e-06, |
| "loss": 0.5115, |
| "step": 7330 |
| }, |
| { |
| "epoch": 0.27663664116383374, |
| "grad_norm": 1.7334155324512346, |
| "learning_rate": 9.079903690065461e-06, |
| "loss": 0.536, |
| "step": 7340 |
| }, |
| { |
| "epoch": 0.27701353032073267, |
| "grad_norm": 1.7152387686021637, |
| "learning_rate": 9.076097469546378e-06, |
| "loss": 0.5174, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.27739041947763166, |
| "grad_norm": 2.7121339144898267, |
| "learning_rate": 9.072284193784366e-06, |
| "loss": 0.5278, |
| "step": 7360 |
| }, |
| { |
| "epoch": 0.2777673086345306, |
| "grad_norm": 1.4228173967742475, |
| "learning_rate": 9.068463869379755e-06, |
| "loss": 0.5315, |
| "step": 7370 |
| }, |
| { |
| "epoch": 0.2781441977914295, |
| "grad_norm": 1.5850322389947202, |
| "learning_rate": 9.064636502945074e-06, |
| "loss": 0.5437, |
| "step": 7380 |
| }, |
| { |
| "epoch": 0.2785210869483285, |
| "grad_norm": 1.462469278604766, |
| "learning_rate": 9.060802101105041e-06, |
| "loss": 0.5043, |
| "step": 7390 |
| }, |
| { |
| "epoch": 0.27889797610522743, |
| "grad_norm": 1.991811474599226, |
| "learning_rate": 9.056960670496555e-06, |
| "loss": 0.5347, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.2792748652621264, |
| "grad_norm": 1.5481573287316113, |
| "learning_rate": 9.053112217768675e-06, |
| "loss": 0.5317, |
| "step": 7410 |
| }, |
| { |
| "epoch": 0.27965175441902534, |
| "grad_norm": 1.9124383091203907, |
| "learning_rate": 9.049256749582621e-06, |
| "loss": 0.5363, |
| "step": 7420 |
| }, |
| { |
| "epoch": 0.28002864357592433, |
| "grad_norm": 1.8423235171267427, |
| "learning_rate": 9.045394272611752e-06, |
| "loss": 0.573, |
| "step": 7430 |
| }, |
| { |
| "epoch": 0.28040553273282326, |
| "grad_norm": 1.8356986741564718, |
| "learning_rate": 9.041524793541557e-06, |
| "loss": 0.527, |
| "step": 7440 |
| }, |
| { |
| "epoch": 0.28078242188972224, |
| "grad_norm": 1.709517929026637, |
| "learning_rate": 9.037648319069648e-06, |
| "loss": 0.5325, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.2811593110466212, |
| "grad_norm": 1.6511934273562192, |
| "learning_rate": 9.033764855905746e-06, |
| "loss": 0.5107, |
| "step": 7460 |
| }, |
| { |
| "epoch": 0.28153620020352016, |
| "grad_norm": 1.7382038263988389, |
| "learning_rate": 9.029874410771664e-06, |
| "loss": 0.5501, |
| "step": 7470 |
| }, |
| { |
| "epoch": 0.2819130893604191, |
| "grad_norm": 1.5833394804264111, |
| "learning_rate": 9.025976990401304e-06, |
| "loss": 0.5124, |
| "step": 7480 |
| }, |
| { |
| "epoch": 0.2822899785173181, |
| "grad_norm": 1.355478094878015, |
| "learning_rate": 9.022072601540642e-06, |
| "loss": 0.5425, |
| "step": 7490 |
| }, |
| { |
| "epoch": 0.282666867674217, |
| "grad_norm": 1.6618586288433348, |
| "learning_rate": 9.018161250947708e-06, |
| "loss": 0.5291, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.283043756831116, |
| "grad_norm": 1.273053488731193, |
| "learning_rate": 9.014242945392592e-06, |
| "loss": 0.4874, |
| "step": 7510 |
| }, |
| { |
| "epoch": 0.2834206459880149, |
| "grad_norm": 2.0289155247042383, |
| "learning_rate": 9.010317691657417e-06, |
| "loss": 0.4941, |
| "step": 7520 |
| }, |
| { |
| "epoch": 0.2837975351449139, |
| "grad_norm": 1.4934609822867562, |
| "learning_rate": 9.006385496536334e-06, |
| "loss": 0.5214, |
| "step": 7530 |
| }, |
| { |
| "epoch": 0.28417442430181283, |
| "grad_norm": 1.6263199918676976, |
| "learning_rate": 9.002446366835507e-06, |
| "loss": 0.5493, |
| "step": 7540 |
| }, |
| { |
| "epoch": 0.2845513134587118, |
| "grad_norm": 1.5893668854653922, |
| "learning_rate": 8.998500309373104e-06, |
| "loss": 0.4892, |
| "step": 7550 |
| }, |
| { |
| "epoch": 0.28492820261561075, |
| "grad_norm": 1.41806671358887, |
| "learning_rate": 8.994547330979281e-06, |
| "loss": 0.524, |
| "step": 7560 |
| }, |
| { |
| "epoch": 0.2853050917725097, |
| "grad_norm": 1.5553788303244762, |
| "learning_rate": 8.990587438496183e-06, |
| "loss": 0.5221, |
| "step": 7570 |
| }, |
| { |
| "epoch": 0.28568198092940866, |
| "grad_norm": 1.418345991264022, |
| "learning_rate": 8.986620638777911e-06, |
| "loss": 0.4997, |
| "step": 7580 |
| }, |
| { |
| "epoch": 0.2860588700863076, |
| "grad_norm": 1.508355836892063, |
| "learning_rate": 8.982646938690527e-06, |
| "loss": 0.5395, |
| "step": 7590 |
| }, |
| { |
| "epoch": 0.2864357592432066, |
| "grad_norm": 1.6201465695031843, |
| "learning_rate": 8.978666345112037e-06, |
| "loss": 0.5507, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.2868126484001055, |
| "grad_norm": 1.57811554793437, |
| "learning_rate": 8.974678864932379e-06, |
| "loss": 0.5004, |
| "step": 7610 |
| }, |
| { |
| "epoch": 0.2871895375570045, |
| "grad_norm": 1.5741977951369208, |
| "learning_rate": 8.970684505053407e-06, |
| "loss": 0.5116, |
| "step": 7620 |
| }, |
| { |
| "epoch": 0.2875664267139034, |
| "grad_norm": 1.676860663266555, |
| "learning_rate": 8.96668327238889e-06, |
| "loss": 0.5461, |
| "step": 7630 |
| }, |
| { |
| "epoch": 0.2879433158708024, |
| "grad_norm": 1.4656181144812006, |
| "learning_rate": 8.962675173864483e-06, |
| "loss": 0.513, |
| "step": 7640 |
| }, |
| { |
| "epoch": 0.28832020502770134, |
| "grad_norm": 1.7396852241340373, |
| "learning_rate": 8.958660216417735e-06, |
| "loss": 0.5341, |
| "step": 7650 |
| }, |
| { |
| "epoch": 0.2886970941846003, |
| "grad_norm": 1.4829830776509865, |
| "learning_rate": 8.954638406998062e-06, |
| "loss": 0.5364, |
| "step": 7660 |
| }, |
| { |
| "epoch": 0.28907398334149925, |
| "grad_norm": 2.003917803432353, |
| "learning_rate": 8.95060975256674e-06, |
| "loss": 0.5107, |
| "step": 7670 |
| }, |
| { |
| "epoch": 0.28945087249839824, |
| "grad_norm": 1.5880349728736471, |
| "learning_rate": 8.946574260096897e-06, |
| "loss": 0.5199, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.28982776165529717, |
| "grad_norm": 6.4085223096319295, |
| "learning_rate": 8.942531936573487e-06, |
| "loss": 0.5336, |
| "step": 7690 |
| }, |
| { |
| "epoch": 0.29020465081219615, |
| "grad_norm": 1.700617891186804, |
| "learning_rate": 8.9384827889933e-06, |
| "loss": 0.524, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.2905815399690951, |
| "grad_norm": 1.9072803846307507, |
| "learning_rate": 8.934426824364931e-06, |
| "loss": 0.4835, |
| "step": 7710 |
| }, |
| { |
| "epoch": 0.29095842912599407, |
| "grad_norm": 1.455274287063036, |
| "learning_rate": 8.93036404970877e-06, |
| "loss": 0.5229, |
| "step": 7720 |
| }, |
| { |
| "epoch": 0.291335318282893, |
| "grad_norm": 1.8160201738390154, |
| "learning_rate": 8.926294472057006e-06, |
| "loss": 0.5123, |
| "step": 7730 |
| }, |
| { |
| "epoch": 0.291712207439792, |
| "grad_norm": 1.865480030478559, |
| "learning_rate": 8.922218098453596e-06, |
| "loss": 0.5363, |
| "step": 7740 |
| }, |
| { |
| "epoch": 0.2920890965966909, |
| "grad_norm": 1.5712881260410958, |
| "learning_rate": 8.91813493595426e-06, |
| "loss": 0.5267, |
| "step": 7750 |
| }, |
| { |
| "epoch": 0.2924659857535899, |
| "grad_norm": 1.6468565924570966, |
| "learning_rate": 8.914044991626467e-06, |
| "loss": 0.5313, |
| "step": 7760 |
| }, |
| { |
| "epoch": 0.2928428749104888, |
| "grad_norm": 1.82716894869601, |
| "learning_rate": 8.90994827254943e-06, |
| "loss": 0.5278, |
| "step": 7770 |
| }, |
| { |
| "epoch": 0.29321976406738776, |
| "grad_norm": 1.5036064580532265, |
| "learning_rate": 8.905844785814086e-06, |
| "loss": 0.5086, |
| "step": 7780 |
| }, |
| { |
| "epoch": 0.29359665322428674, |
| "grad_norm": 1.5144138972292562, |
| "learning_rate": 8.901734538523083e-06, |
| "loss": 0.5081, |
| "step": 7790 |
| }, |
| { |
| "epoch": 0.29397354238118567, |
| "grad_norm": 1.7463912063578895, |
| "learning_rate": 8.897617537790775e-06, |
| "loss": 0.5414, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.29435043153808466, |
| "grad_norm": 1.4528500852259172, |
| "learning_rate": 8.893493790743205e-06, |
| "loss": 0.5077, |
| "step": 7810 |
| }, |
| { |
| "epoch": 0.2947273206949836, |
| "grad_norm": 1.6942113196018895, |
| "learning_rate": 8.889363304518088e-06, |
| "loss": 0.5399, |
| "step": 7820 |
| }, |
| { |
| "epoch": 0.29510420985188257, |
| "grad_norm": 1.5104690108892953, |
| "learning_rate": 8.88522608626481e-06, |
| "loss": 0.5459, |
| "step": 7830 |
| }, |
| { |
| "epoch": 0.2954810990087815, |
| "grad_norm": 1.8645243846646347, |
| "learning_rate": 8.881082143144405e-06, |
| "loss": 0.5134, |
| "step": 7840 |
| }, |
| { |
| "epoch": 0.2958579881656805, |
| "grad_norm": 1.6728891933665688, |
| "learning_rate": 8.876931482329554e-06, |
| "loss": 0.5177, |
| "step": 7850 |
| }, |
| { |
| "epoch": 0.2962348773225794, |
| "grad_norm": 1.2067983902923343, |
| "learning_rate": 8.872774111004553e-06, |
| "loss": 0.5047, |
| "step": 7860 |
| }, |
| { |
| "epoch": 0.2966117664794784, |
| "grad_norm": 1.5099148600629388, |
| "learning_rate": 8.868610036365324e-06, |
| "loss": 0.4986, |
| "step": 7870 |
| }, |
| { |
| "epoch": 0.29698865563637733, |
| "grad_norm": 2.34928629080023, |
| "learning_rate": 8.86443926561939e-06, |
| "loss": 0.5366, |
| "step": 7880 |
| }, |
| { |
| "epoch": 0.2973655447932763, |
| "grad_norm": 1.661924846466901, |
| "learning_rate": 8.860261805985857e-06, |
| "loss": 0.5272, |
| "step": 7890 |
| }, |
| { |
| "epoch": 0.29774243395017524, |
| "grad_norm": 1.6500446518789744, |
| "learning_rate": 8.856077664695418e-06, |
| "loss": 0.5247, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.29811932310707423, |
| "grad_norm": 1.6550912001046552, |
| "learning_rate": 8.851886848990326e-06, |
| "loss": 0.5073, |
| "step": 7910 |
| }, |
| { |
| "epoch": 0.29849621226397316, |
| "grad_norm": 2.4385332990829394, |
| "learning_rate": 8.847689366124387e-06, |
| "loss": 0.5224, |
| "step": 7920 |
| }, |
| { |
| "epoch": 0.29887310142087214, |
| "grad_norm": 1.7454181558614066, |
| "learning_rate": 8.843485223362947e-06, |
| "loss": 0.5404, |
| "step": 7930 |
| }, |
| { |
| "epoch": 0.2992499905777711, |
| "grad_norm": 1.6197152816080147, |
| "learning_rate": 8.839274427982883e-06, |
| "loss": 0.5249, |
| "step": 7940 |
| }, |
| { |
| "epoch": 0.29962687973467006, |
| "grad_norm": 1.6733433336543917, |
| "learning_rate": 8.835056987272581e-06, |
| "loss": 0.5369, |
| "step": 7950 |
| }, |
| { |
| "epoch": 0.300003768891569, |
| "grad_norm": 1.6407791190885486, |
| "learning_rate": 8.830832908531935e-06, |
| "loss": 0.5036, |
| "step": 7960 |
| }, |
| { |
| "epoch": 0.3003806580484679, |
| "grad_norm": 1.6837408609676576, |
| "learning_rate": 8.826602199072323e-06, |
| "loss": 0.5304, |
| "step": 7970 |
| }, |
| { |
| "epoch": 0.3007575472053669, |
| "grad_norm": 1.5824176969906407, |
| "learning_rate": 8.822364866216606e-06, |
| "loss": 0.4872, |
| "step": 7980 |
| }, |
| { |
| "epoch": 0.30113443636226583, |
| "grad_norm": 1.4977384439375079, |
| "learning_rate": 8.818120917299105e-06, |
| "loss": 0.4893, |
| "step": 7990 |
| }, |
| { |
| "epoch": 0.3015113255191648, |
| "grad_norm": 1.9173818601725474, |
| "learning_rate": 8.813870359665594e-06, |
| "loss": 0.515, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.30188821467606375, |
| "grad_norm": 1.224532027864391, |
| "learning_rate": 8.809613200673284e-06, |
| "loss": 0.4968, |
| "step": 8010 |
| }, |
| { |
| "epoch": 0.30226510383296273, |
| "grad_norm": 1.507185992220378, |
| "learning_rate": 8.805349447690819e-06, |
| "loss": 0.5283, |
| "step": 8020 |
| }, |
| { |
| "epoch": 0.30264199298986166, |
| "grad_norm": 1.5501471489535523, |
| "learning_rate": 8.801079108098247e-06, |
| "loss": 0.5297, |
| "step": 8030 |
| }, |
| { |
| "epoch": 0.30301888214676065, |
| "grad_norm": 1.6682468431261555, |
| "learning_rate": 8.796802189287021e-06, |
| "loss": 0.5169, |
| "step": 8040 |
| }, |
| { |
| "epoch": 0.3033957713036596, |
| "grad_norm": 1.5793954538427635, |
| "learning_rate": 8.792518698659985e-06, |
| "loss": 0.514, |
| "step": 8050 |
| }, |
| { |
| "epoch": 0.30377266046055856, |
| "grad_norm": 1.5869189857437231, |
| "learning_rate": 8.788228643631353e-06, |
| "loss": 0.5699, |
| "step": 8060 |
| }, |
| { |
| "epoch": 0.3041495496174575, |
| "grad_norm": 1.5017790526890054, |
| "learning_rate": 8.783932031626702e-06, |
| "loss": 0.5333, |
| "step": 8070 |
| }, |
| { |
| "epoch": 0.3045264387743565, |
| "grad_norm": 1.7418008438314914, |
| "learning_rate": 8.779628870082963e-06, |
| "loss": 0.5681, |
| "step": 8080 |
| }, |
| { |
| "epoch": 0.3049033279312554, |
| "grad_norm": 1.5384555545934482, |
| "learning_rate": 8.775319166448397e-06, |
| "loss": 0.4892, |
| "step": 8090 |
| }, |
| { |
| "epoch": 0.3052802170881544, |
| "grad_norm": 1.7242800734457064, |
| "learning_rate": 8.771002928182593e-06, |
| "loss": 0.5323, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.3056571062450533, |
| "grad_norm": 1.5545232251003573, |
| "learning_rate": 8.76668016275645e-06, |
| "loss": 0.5347, |
| "step": 8110 |
| }, |
| { |
| "epoch": 0.3060339954019523, |
| "grad_norm": 1.5864191351139438, |
| "learning_rate": 8.762350877652161e-06, |
| "loss": 0.5133, |
| "step": 8120 |
| }, |
| { |
| "epoch": 0.30641088455885124, |
| "grad_norm": 1.536374345634808, |
| "learning_rate": 8.758015080363209e-06, |
| "loss": 0.5429, |
| "step": 8130 |
| }, |
| { |
| "epoch": 0.3067877737157502, |
| "grad_norm": 1.3739374982043573, |
| "learning_rate": 8.753672778394348e-06, |
| "loss": 0.5071, |
| "step": 8140 |
| }, |
| { |
| "epoch": 0.30716466287264915, |
| "grad_norm": 1.9356831887412023, |
| "learning_rate": 8.749323979261586e-06, |
| "loss": 0.5598, |
| "step": 8150 |
| }, |
| { |
| "epoch": 0.30754155202954814, |
| "grad_norm": 1.8841720246001954, |
| "learning_rate": 8.744968690492183e-06, |
| "loss": 0.5251, |
| "step": 8160 |
| }, |
| { |
| "epoch": 0.30791844118644707, |
| "grad_norm": 1.4483138834088356, |
| "learning_rate": 8.740606919624628e-06, |
| "loss": 0.542, |
| "step": 8170 |
| }, |
| { |
| "epoch": 0.308295330343346, |
| "grad_norm": 1.739676604765352, |
| "learning_rate": 8.73623867420863e-06, |
| "loss": 0.5161, |
| "step": 8180 |
| }, |
| { |
| "epoch": 0.308672219500245, |
| "grad_norm": 1.4496329719941419, |
| "learning_rate": 8.731863961805108e-06, |
| "loss": 0.5142, |
| "step": 8190 |
| }, |
| { |
| "epoch": 0.3090491086571439, |
| "grad_norm": 1.586205834775067, |
| "learning_rate": 8.727482789986167e-06, |
| "loss": 0.5105, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.3094259978140429, |
| "grad_norm": 1.5822661719201925, |
| "learning_rate": 8.723095166335105e-06, |
| "loss": 0.4927, |
| "step": 8210 |
| }, |
| { |
| "epoch": 0.3098028869709418, |
| "grad_norm": 1.4744825299579403, |
| "learning_rate": 8.718701098446373e-06, |
| "loss": 0.5154, |
| "step": 8220 |
| }, |
| { |
| "epoch": 0.3101797761278408, |
| "grad_norm": 1.6760657484578674, |
| "learning_rate": 8.714300593925588e-06, |
| "loss": 0.5443, |
| "step": 8230 |
| }, |
| { |
| "epoch": 0.31055666528473974, |
| "grad_norm": 1.6507496475712244, |
| "learning_rate": 8.709893660389502e-06, |
| "loss": 0.5103, |
| "step": 8240 |
| }, |
| { |
| "epoch": 0.3109335544416387, |
| "grad_norm": 1.6267514125886644, |
| "learning_rate": 8.705480305465993e-06, |
| "loss": 0.5204, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.31131044359853766, |
| "grad_norm": 1.7139044318938548, |
| "learning_rate": 8.701060536794062e-06, |
| "loss": 0.5082, |
| "step": 8260 |
| }, |
| { |
| "epoch": 0.31168733275543664, |
| "grad_norm": 1.5364667287673175, |
| "learning_rate": 8.6966343620238e-06, |
| "loss": 0.5124, |
| "step": 8270 |
| }, |
| { |
| "epoch": 0.31206422191233557, |
| "grad_norm": 1.5973367114004897, |
| "learning_rate": 8.692201788816397e-06, |
| "loss": 0.5401, |
| "step": 8280 |
| }, |
| { |
| "epoch": 0.31244111106923456, |
| "grad_norm": 1.6171012597396603, |
| "learning_rate": 8.687762824844112e-06, |
| "loss": 0.5523, |
| "step": 8290 |
| }, |
| { |
| "epoch": 0.3128180002261335, |
| "grad_norm": 1.6798865965319494, |
| "learning_rate": 8.683317477790267e-06, |
| "loss": 0.5566, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.31319488938303247, |
| "grad_norm": 1.3965855622429122, |
| "learning_rate": 8.678865755349232e-06, |
| "loss": 0.5215, |
| "step": 8310 |
| }, |
| { |
| "epoch": 0.3135717785399314, |
| "grad_norm": 1.478475036458125, |
| "learning_rate": 8.674407665226412e-06, |
| "loss": 0.548, |
| "step": 8320 |
| }, |
| { |
| "epoch": 0.3139486676968304, |
| "grad_norm": 1.6537961782542907, |
| "learning_rate": 8.669943215138236e-06, |
| "loss": 0.5155, |
| "step": 8330 |
| }, |
| { |
| "epoch": 0.3143255568537293, |
| "grad_norm": 1.5083041356086178, |
| "learning_rate": 8.665472412812137e-06, |
| "loss": 0.5218, |
| "step": 8340 |
| }, |
| { |
| "epoch": 0.3147024460106283, |
| "grad_norm": 1.7094855096711425, |
| "learning_rate": 8.660995265986547e-06, |
| "loss": 0.4969, |
| "step": 8350 |
| }, |
| { |
| "epoch": 0.31507933516752723, |
| "grad_norm": 1.5448154600516473, |
| "learning_rate": 8.656511782410877e-06, |
| "loss": 0.5228, |
| "step": 8360 |
| }, |
| { |
| "epoch": 0.31545622432442616, |
| "grad_norm": 1.8392297958376145, |
| "learning_rate": 8.652021969845508e-06, |
| "loss": 0.5122, |
| "step": 8370 |
| }, |
| { |
| "epoch": 0.31583311348132515, |
| "grad_norm": 1.5057677282206323, |
| "learning_rate": 8.647525836061773e-06, |
| "loss": 0.5232, |
| "step": 8380 |
| }, |
| { |
| "epoch": 0.3162100026382241, |
| "grad_norm": 1.3387979618187744, |
| "learning_rate": 8.643023388841951e-06, |
| "loss": 0.4788, |
| "step": 8390 |
| }, |
| { |
| "epoch": 0.31658689179512306, |
| "grad_norm": 1.7299810103354585, |
| "learning_rate": 8.638514635979242e-06, |
| "loss": 0.5271, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.316963780952022, |
| "grad_norm": 1.5877959379659248, |
| "learning_rate": 8.633999585277769e-06, |
| "loss": 0.5492, |
| "step": 8410 |
| }, |
| { |
| "epoch": 0.317340670108921, |
| "grad_norm": 1.5926803639365381, |
| "learning_rate": 8.629478244552548e-06, |
| "loss": 0.4928, |
| "step": 8420 |
| }, |
| { |
| "epoch": 0.3177175592658199, |
| "grad_norm": 1.4153581295672915, |
| "learning_rate": 8.624950621629487e-06, |
| "loss": 0.5204, |
| "step": 8430 |
| }, |
| { |
| "epoch": 0.3180944484227189, |
| "grad_norm": 1.515638801797706, |
| "learning_rate": 8.620416724345365e-06, |
| "loss": 0.5266, |
| "step": 8440 |
| }, |
| { |
| "epoch": 0.3184713375796178, |
| "grad_norm": 1.7525263151023882, |
| "learning_rate": 8.615876560547822e-06, |
| "loss": 0.524, |
| "step": 8450 |
| }, |
| { |
| "epoch": 0.3188482267365168, |
| "grad_norm": 1.5277030246398249, |
| "learning_rate": 8.611330138095344e-06, |
| "loss": 0.5071, |
| "step": 8460 |
| }, |
| { |
| "epoch": 0.31922511589341573, |
| "grad_norm": 1.692436940543452, |
| "learning_rate": 8.606777464857254e-06, |
| "loss": 0.518, |
| "step": 8470 |
| }, |
| { |
| "epoch": 0.3196020050503147, |
| "grad_norm": 1.9842826236775062, |
| "learning_rate": 8.60221854871369e-06, |
| "loss": 0.5193, |
| "step": 8480 |
| }, |
| { |
| "epoch": 0.31997889420721365, |
| "grad_norm": 1.7689057994372488, |
| "learning_rate": 8.597653397555597e-06, |
| "loss": 0.5288, |
| "step": 8490 |
| }, |
| { |
| "epoch": 0.32035578336411263, |
| "grad_norm": 1.6259926395499629, |
| "learning_rate": 8.59308201928471e-06, |
| "loss": 0.5614, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.32073267252101156, |
| "grad_norm": 1.4818216820726773, |
| "learning_rate": 8.588504421813548e-06, |
| "loss": 0.519, |
| "step": 8510 |
| }, |
| { |
| "epoch": 0.32110956167791055, |
| "grad_norm": 1.4755097387688436, |
| "learning_rate": 8.583920613065389e-06, |
| "loss": 0.5363, |
| "step": 8520 |
| }, |
| { |
| "epoch": 0.3214864508348095, |
| "grad_norm": 1.4930239554129483, |
| "learning_rate": 8.579330600974263e-06, |
| "loss": 0.5245, |
| "step": 8530 |
| }, |
| { |
| "epoch": 0.32186333999170846, |
| "grad_norm": 1.5984210950118547, |
| "learning_rate": 8.57473439348494e-06, |
| "loss": 0.5069, |
| "step": 8540 |
| }, |
| { |
| "epoch": 0.3222402291486074, |
| "grad_norm": 1.667635983292806, |
| "learning_rate": 8.570131998552912e-06, |
| "loss": 0.5303, |
| "step": 8550 |
| }, |
| { |
| "epoch": 0.3226171183055063, |
| "grad_norm": 1.616690957226872, |
| "learning_rate": 8.56552342414438e-06, |
| "loss": 0.5231, |
| "step": 8560 |
| }, |
| { |
| "epoch": 0.3229940074624053, |
| "grad_norm": 1.8034906955411334, |
| "learning_rate": 8.560908678236243e-06, |
| "loss": 0.5207, |
| "step": 8570 |
| }, |
| { |
| "epoch": 0.32337089661930424, |
| "grad_norm": 1.629302217435465, |
| "learning_rate": 8.55628776881608e-06, |
| "loss": 0.5111, |
| "step": 8580 |
| }, |
| { |
| "epoch": 0.3237477857762032, |
| "grad_norm": 1.5524410794967876, |
| "learning_rate": 8.551660703882137e-06, |
| "loss": 0.5291, |
| "step": 8590 |
| }, |
| { |
| "epoch": 0.32412467493310215, |
| "grad_norm": 1.682436170015779, |
| "learning_rate": 8.547027491443319e-06, |
| "loss": 0.5203, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.32450156409000114, |
| "grad_norm": 1.4406822786435207, |
| "learning_rate": 8.542388139519166e-06, |
| "loss": 0.5147, |
| "step": 8610 |
| }, |
| { |
| "epoch": 0.32487845324690007, |
| "grad_norm": 1.7053903855885608, |
| "learning_rate": 8.537742656139854e-06, |
| "loss": 0.5221, |
| "step": 8620 |
| }, |
| { |
| "epoch": 0.32525534240379905, |
| "grad_norm": 1.6508021549322034, |
| "learning_rate": 8.533091049346158e-06, |
| "loss": 0.5087, |
| "step": 8630 |
| }, |
| { |
| "epoch": 0.325632231560698, |
| "grad_norm": 1.7227108959852215, |
| "learning_rate": 8.528433327189464e-06, |
| "loss": 0.5144, |
| "step": 8640 |
| }, |
| { |
| "epoch": 0.32600912071759697, |
| "grad_norm": 1.7003660286486464, |
| "learning_rate": 8.52376949773174e-06, |
| "loss": 0.5178, |
| "step": 8650 |
| }, |
| { |
| "epoch": 0.3263860098744959, |
| "grad_norm": 1.3401486704634595, |
| "learning_rate": 8.51909956904552e-06, |
| "loss": 0.5392, |
| "step": 8660 |
| }, |
| { |
| "epoch": 0.3267628990313949, |
| "grad_norm": 1.5872896977384134, |
| "learning_rate": 8.514423549213899e-06, |
| "loss": 0.5173, |
| "step": 8670 |
| }, |
| { |
| "epoch": 0.3271397881882938, |
| "grad_norm": 1.5745916384042906, |
| "learning_rate": 8.509741446330516e-06, |
| "loss": 0.5014, |
| "step": 8680 |
| }, |
| { |
| "epoch": 0.3275166773451928, |
| "grad_norm": 1.5616168597563413, |
| "learning_rate": 8.505053268499536e-06, |
| "loss": 0.5448, |
| "step": 8690 |
| }, |
| { |
| "epoch": 0.3278935665020917, |
| "grad_norm": 1.6773461052799608, |
| "learning_rate": 8.500359023835643e-06, |
| "loss": 0.5367, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.3282704556589907, |
| "grad_norm": 1.744777642171872, |
| "learning_rate": 8.49565872046402e-06, |
| "loss": 0.5294, |
| "step": 8710 |
| }, |
| { |
| "epoch": 0.32864734481588964, |
| "grad_norm": 1.4370066131355128, |
| "learning_rate": 8.490952366520332e-06, |
| "loss": 0.5104, |
| "step": 8720 |
| }, |
| { |
| "epoch": 0.3290242339727886, |
| "grad_norm": 1.5686171487455622, |
| "learning_rate": 8.486239970150726e-06, |
| "loss": 0.5082, |
| "step": 8730 |
| }, |
| { |
| "epoch": 0.32940112312968756, |
| "grad_norm": 1.729514880196974, |
| "learning_rate": 8.481521539511802e-06, |
| "loss": 0.5424, |
| "step": 8740 |
| }, |
| { |
| "epoch": 0.32977801228658654, |
| "grad_norm": 1.7026908866007286, |
| "learning_rate": 8.476797082770604e-06, |
| "loss": 0.5319, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.33015490144348547, |
| "grad_norm": 1.6815280081880184, |
| "learning_rate": 8.472066608104613e-06, |
| "loss": 0.497, |
| "step": 8760 |
| }, |
| { |
| "epoch": 0.3305317906003844, |
| "grad_norm": 1.3354916475471044, |
| "learning_rate": 8.467330123701713e-06, |
| "loss": 0.5199, |
| "step": 8770 |
| }, |
| { |
| "epoch": 0.3309086797572834, |
| "grad_norm": 1.6596093120910274, |
| "learning_rate": 8.462587637760207e-06, |
| "loss": 0.49, |
| "step": 8780 |
| }, |
| { |
| "epoch": 0.3312855689141823, |
| "grad_norm": 1.4490041698465326, |
| "learning_rate": 8.457839158488772e-06, |
| "loss": 0.5177, |
| "step": 8790 |
| }, |
| { |
| "epoch": 0.3316624580710813, |
| "grad_norm": 1.743520901799635, |
| "learning_rate": 8.453084694106468e-06, |
| "loss": 0.5554, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.33203934722798023, |
| "grad_norm": 1.3601593837886905, |
| "learning_rate": 8.448324252842708e-06, |
| "loss": 0.4939, |
| "step": 8810 |
| }, |
| { |
| "epoch": 0.3324162363848792, |
| "grad_norm": 1.0538420503197117, |
| "learning_rate": 8.443557842937257e-06, |
| "loss": 0.5129, |
| "step": 8820 |
| }, |
| { |
| "epoch": 0.33279312554177815, |
| "grad_norm": 1.580901795857022, |
| "learning_rate": 8.438785472640202e-06, |
| "loss": 0.523, |
| "step": 8830 |
| }, |
| { |
| "epoch": 0.33317001469867713, |
| "grad_norm": 1.6642518860804858, |
| "learning_rate": 8.434007150211957e-06, |
| "loss": 0.5148, |
| "step": 8840 |
| }, |
| { |
| "epoch": 0.33354690385557606, |
| "grad_norm": 1.5678025009658634, |
| "learning_rate": 8.42922288392323e-06, |
| "loss": 0.5189, |
| "step": 8850 |
| }, |
| { |
| "epoch": 0.33392379301247505, |
| "grad_norm": 1.5916235440951905, |
| "learning_rate": 8.424432682055022e-06, |
| "loss": 0.5072, |
| "step": 8860 |
| }, |
| { |
| "epoch": 0.334300682169374, |
| "grad_norm": 1.4657720753841874, |
| "learning_rate": 8.419636552898605e-06, |
| "loss": 0.5275, |
| "step": 8870 |
| }, |
| { |
| "epoch": 0.33467757132627296, |
| "grad_norm": 1.5520680065937231, |
| "learning_rate": 8.414834504755513e-06, |
| "loss": 0.5329, |
| "step": 8880 |
| }, |
| { |
| "epoch": 0.3350544604831719, |
| "grad_norm": 1.4122325295424663, |
| "learning_rate": 8.410026545937522e-06, |
| "loss": 0.5215, |
| "step": 8890 |
| }, |
| { |
| "epoch": 0.3354313496400709, |
| "grad_norm": 1.7208431430034201, |
| "learning_rate": 8.405212684766642e-06, |
| "loss": 0.5266, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.3358082387969698, |
| "grad_norm": 1.522595875256597, |
| "learning_rate": 8.400392929575098e-06, |
| "loss": 0.521, |
| "step": 8910 |
| }, |
| { |
| "epoch": 0.3361851279538688, |
| "grad_norm": 1.193362753628422, |
| "learning_rate": 8.395567288705315e-06, |
| "loss": 0.473, |
| "step": 8920 |
| }, |
| { |
| "epoch": 0.3365620171107677, |
| "grad_norm": 1.617816412749885, |
| "learning_rate": 8.390735770509909e-06, |
| "loss": 0.5367, |
| "step": 8930 |
| }, |
| { |
| "epoch": 0.3369389062676667, |
| "grad_norm": 3.7585583359479173, |
| "learning_rate": 8.385898383351662e-06, |
| "loss": 0.5273, |
| "step": 8940 |
| }, |
| { |
| "epoch": 0.33731579542456563, |
| "grad_norm": 1.570651388025663, |
| "learning_rate": 8.381055135603526e-06, |
| "loss": 0.5431, |
| "step": 8950 |
| }, |
| { |
| "epoch": 0.33769268458146456, |
| "grad_norm": 1.7069789377376001, |
| "learning_rate": 8.376206035648587e-06, |
| "loss": 0.5534, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.33806957373836355, |
| "grad_norm": 1.718778224322803, |
| "learning_rate": 8.371351091880064e-06, |
| "loss": 0.5132, |
| "step": 8970 |
| }, |
| { |
| "epoch": 0.3384464628952625, |
| "grad_norm": 1.5304391761250087, |
| "learning_rate": 8.366490312701292e-06, |
| "loss": 0.5227, |
| "step": 8980 |
| }, |
| { |
| "epoch": 0.33882335205216146, |
| "grad_norm": 1.5179430433625931, |
| "learning_rate": 8.361623706525703e-06, |
| "loss": 0.5131, |
| "step": 8990 |
| }, |
| { |
| "epoch": 0.3392002412090604, |
| "grad_norm": 1.7236639439764125, |
| "learning_rate": 8.356751281776818e-06, |
| "loss": 0.5392, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.3395771303659594, |
| "grad_norm": 1.5735386800178819, |
| "learning_rate": 8.35187304688823e-06, |
| "loss": 0.5347, |
| "step": 9010 |
| }, |
| { |
| "epoch": 0.3399540195228583, |
| "grad_norm": 1.8833870834225461, |
| "learning_rate": 8.346989010303586e-06, |
| "loss": 0.5209, |
| "step": 9020 |
| }, |
| { |
| "epoch": 0.3403309086797573, |
| "grad_norm": 1.4643995674675825, |
| "learning_rate": 8.342099180476575e-06, |
| "loss": 0.5049, |
| "step": 9030 |
| }, |
| { |
| "epoch": 0.3407077978366562, |
| "grad_norm": 1.8572382732988715, |
| "learning_rate": 8.337203565870915e-06, |
| "loss": 0.5256, |
| "step": 9040 |
| }, |
| { |
| "epoch": 0.3410846869935552, |
| "grad_norm": 1.7660917988302332, |
| "learning_rate": 8.332302174960336e-06, |
| "loss": 0.5416, |
| "step": 9050 |
| }, |
| { |
| "epoch": 0.34146157615045414, |
| "grad_norm": 1.8036514572139464, |
| "learning_rate": 8.327395016228567e-06, |
| "loss": 0.5649, |
| "step": 9060 |
| }, |
| { |
| "epoch": 0.3418384653073531, |
| "grad_norm": 1.3940242330097408, |
| "learning_rate": 8.32248209816932e-06, |
| "loss": 0.5114, |
| "step": 9070 |
| }, |
| { |
| "epoch": 0.34221535446425205, |
| "grad_norm": 1.6019993171028344, |
| "learning_rate": 8.317563429286274e-06, |
| "loss": 0.4947, |
| "step": 9080 |
| }, |
| { |
| "epoch": 0.34259224362115104, |
| "grad_norm": 2.247467137710252, |
| "learning_rate": 8.312639018093067e-06, |
| "loss": 0.5569, |
| "step": 9090 |
| }, |
| { |
| "epoch": 0.34296913277804997, |
| "grad_norm": 1.5951663374446334, |
| "learning_rate": 8.307708873113267e-06, |
| "loss": 0.4897, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.34334602193494895, |
| "grad_norm": 1.738333108559983, |
| "learning_rate": 8.302773002880377e-06, |
| "loss": 0.4987, |
| "step": 9110 |
| }, |
| { |
| "epoch": 0.3437229110918479, |
| "grad_norm": 1.5337169050623085, |
| "learning_rate": 8.297831415937802e-06, |
| "loss": 0.505, |
| "step": 9120 |
| }, |
| { |
| "epoch": 0.34409980024874687, |
| "grad_norm": 1.4472548665213318, |
| "learning_rate": 8.29288412083885e-06, |
| "loss": 0.512, |
| "step": 9130 |
| }, |
| { |
| "epoch": 0.3444766894056458, |
| "grad_norm": 1.4492396478807243, |
| "learning_rate": 8.287931126146696e-06, |
| "loss": 0.4886, |
| "step": 9140 |
| }, |
| { |
| "epoch": 0.3448535785625448, |
| "grad_norm": 1.7630091584083531, |
| "learning_rate": 8.282972440434393e-06, |
| "loss": 0.534, |
| "step": 9150 |
| }, |
| { |
| "epoch": 0.3452304677194437, |
| "grad_norm": 1.3895390793947004, |
| "learning_rate": 8.278008072284841e-06, |
| "loss": 0.4952, |
| "step": 9160 |
| }, |
| { |
| "epoch": 0.34560735687634264, |
| "grad_norm": 1.3777597188749056, |
| "learning_rate": 8.273038030290772e-06, |
| "loss": 0.5084, |
| "step": 9170 |
| }, |
| { |
| "epoch": 0.3459842460332416, |
| "grad_norm": 1.7939386012497827, |
| "learning_rate": 8.268062323054742e-06, |
| "loss": 0.5433, |
| "step": 9180 |
| }, |
| { |
| "epoch": 0.34636113519014056, |
| "grad_norm": 1.4962140700288487, |
| "learning_rate": 8.263080959189114e-06, |
| "loss": 0.5415, |
| "step": 9190 |
| }, |
| { |
| "epoch": 0.34673802434703954, |
| "grad_norm": 1.6671458345630203, |
| "learning_rate": 8.258093947316036e-06, |
| "loss": 0.5137, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.34711491350393847, |
| "grad_norm": 1.5818238301461247, |
| "learning_rate": 8.253101296067441e-06, |
| "loss": 0.5005, |
| "step": 9210 |
| }, |
| { |
| "epoch": 0.34749180266083746, |
| "grad_norm": 1.6485091022559941, |
| "learning_rate": 8.248103014085014e-06, |
| "loss": 0.526, |
| "step": 9220 |
| }, |
| { |
| "epoch": 0.3478686918177364, |
| "grad_norm": 1.6061788923973597, |
| "learning_rate": 8.243099110020191e-06, |
| "loss": 0.5299, |
| "step": 9230 |
| }, |
| { |
| "epoch": 0.34824558097463537, |
| "grad_norm": 1.5857303046251194, |
| "learning_rate": 8.238089592534143e-06, |
| "loss": 0.5272, |
| "step": 9240 |
| }, |
| { |
| "epoch": 0.3486224701315343, |
| "grad_norm": 1.5733908134969434, |
| "learning_rate": 8.233074470297746e-06, |
| "loss": 0.5027, |
| "step": 9250 |
| }, |
| { |
| "epoch": 0.3489993592884333, |
| "grad_norm": 1.2391810742375864, |
| "learning_rate": 8.228053751991586e-06, |
| "loss": 0.5147, |
| "step": 9260 |
| }, |
| { |
| "epoch": 0.3493762484453322, |
| "grad_norm": 1.6623967687120003, |
| "learning_rate": 8.223027446305939e-06, |
| "loss": 0.562, |
| "step": 9270 |
| }, |
| { |
| "epoch": 0.3497531376022312, |
| "grad_norm": 1.6535672620155857, |
| "learning_rate": 8.217995561940735e-06, |
| "loss": 0.5135, |
| "step": 9280 |
| }, |
| { |
| "epoch": 0.35013002675913013, |
| "grad_norm": 1.628954367480316, |
| "learning_rate": 8.21295810760558e-06, |
| "loss": 0.5491, |
| "step": 9290 |
| }, |
| { |
| "epoch": 0.3505069159160291, |
| "grad_norm": 1.6259980778856178, |
| "learning_rate": 8.207915092019709e-06, |
| "loss": 0.5277, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.35088380507292805, |
| "grad_norm": 1.7960271399406822, |
| "learning_rate": 8.202866523911985e-06, |
| "loss": 0.5026, |
| "step": 9310 |
| }, |
| { |
| "epoch": 0.35126069422982703, |
| "grad_norm": 1.5027810941773745, |
| "learning_rate": 8.197812412020882e-06, |
| "loss": 0.5219, |
| "step": 9320 |
| }, |
| { |
| "epoch": 0.35163758338672596, |
| "grad_norm": 1.4147518487518373, |
| "learning_rate": 8.192752765094474e-06, |
| "loss": 0.4946, |
| "step": 9330 |
| }, |
| { |
| "epoch": 0.35201447254362495, |
| "grad_norm": 1.7588868083859255, |
| "learning_rate": 8.18768759189041e-06, |
| "loss": 0.5214, |
| "step": 9340 |
| }, |
| { |
| "epoch": 0.3523913617005239, |
| "grad_norm": 1.9046948041639848, |
| "learning_rate": 8.182616901175904e-06, |
| "loss": 0.5327, |
| "step": 9350 |
| }, |
| { |
| "epoch": 0.3527682508574228, |
| "grad_norm": 1.8541351405292703, |
| "learning_rate": 8.177540701727725e-06, |
| "loss": 0.5332, |
| "step": 9360 |
| }, |
| { |
| "epoch": 0.3531451400143218, |
| "grad_norm": 1.4841893890474134, |
| "learning_rate": 8.172459002332174e-06, |
| "loss": 0.5198, |
| "step": 9370 |
| }, |
| { |
| "epoch": 0.3535220291712207, |
| "grad_norm": 1.6262991665753983, |
| "learning_rate": 8.16737181178507e-06, |
| "loss": 0.5204, |
| "step": 9380 |
| }, |
| { |
| "epoch": 0.3538989183281197, |
| "grad_norm": 1.9711118756578987, |
| "learning_rate": 8.16227913889174e-06, |
| "loss": 0.5227, |
| "step": 9390 |
| }, |
| { |
| "epoch": 0.35427580748501863, |
| "grad_norm": 1.6101287024416397, |
| "learning_rate": 8.157180992466999e-06, |
| "loss": 0.5213, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.3546526966419176, |
| "grad_norm": 1.4345161151869006, |
| "learning_rate": 8.152077381335136e-06, |
| "loss": 0.5156, |
| "step": 9410 |
| }, |
| { |
| "epoch": 0.35502958579881655, |
| "grad_norm": 1.3670300786401688, |
| "learning_rate": 8.146968314329897e-06, |
| "loss": 0.5415, |
| "step": 9420 |
| }, |
| { |
| "epoch": 0.35540647495571553, |
| "grad_norm": 2.0488645347215755, |
| "learning_rate": 8.141853800294474e-06, |
| "loss": 0.495, |
| "step": 9430 |
| }, |
| { |
| "epoch": 0.35578336411261446, |
| "grad_norm": 1.6748625828080144, |
| "learning_rate": 8.136733848081489e-06, |
| "loss": 0.5253, |
| "step": 9440 |
| }, |
| { |
| "epoch": 0.35616025326951345, |
| "grad_norm": 1.749008596784993, |
| "learning_rate": 8.131608466552968e-06, |
| "loss": 0.5306, |
| "step": 9450 |
| }, |
| { |
| "epoch": 0.3565371424264124, |
| "grad_norm": 1.4862728647070822, |
| "learning_rate": 8.126477664580347e-06, |
| "loss": 0.5124, |
| "step": 9460 |
| }, |
| { |
| "epoch": 0.35691403158331136, |
| "grad_norm": 1.6537416257961348, |
| "learning_rate": 8.121341451044433e-06, |
| "loss": 0.53, |
| "step": 9470 |
| }, |
| { |
| "epoch": 0.3572909207402103, |
| "grad_norm": 1.6263866458416927, |
| "learning_rate": 8.116199834835408e-06, |
| "loss": 0.4975, |
| "step": 9480 |
| }, |
| { |
| "epoch": 0.3576678098971093, |
| "grad_norm": 1.5395362965786443, |
| "learning_rate": 8.1110528248528e-06, |
| "loss": 0.5234, |
| "step": 9490 |
| }, |
| { |
| "epoch": 0.3580446990540082, |
| "grad_norm": 1.6712903285736607, |
| "learning_rate": 8.105900430005476e-06, |
| "loss": 0.5306, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.3584215882109072, |
| "grad_norm": 1.8324911808117614, |
| "learning_rate": 8.10074265921162e-06, |
| "loss": 0.5242, |
| "step": 9510 |
| }, |
| { |
| "epoch": 0.3587984773678061, |
| "grad_norm": 2.351195839028606, |
| "learning_rate": 8.095579521398727e-06, |
| "loss": 0.5219, |
| "step": 9520 |
| }, |
| { |
| "epoch": 0.3591753665247051, |
| "grad_norm": 1.7110044303010266, |
| "learning_rate": 8.090411025503576e-06, |
| "loss": 0.4978, |
| "step": 9530 |
| }, |
| { |
| "epoch": 0.35955225568160404, |
| "grad_norm": 1.6106891478057317, |
| "learning_rate": 8.085237180472222e-06, |
| "loss": 0.4827, |
| "step": 9540 |
| }, |
| { |
| "epoch": 0.359929144838503, |
| "grad_norm": 1.7506503522662198, |
| "learning_rate": 8.080057995259983e-06, |
| "loss": 0.5101, |
| "step": 9550 |
| }, |
| { |
| "epoch": 0.36030603399540195, |
| "grad_norm": 1.4401619042910059, |
| "learning_rate": 8.074873478831412e-06, |
| "loss": 0.4944, |
| "step": 9560 |
| }, |
| { |
| "epoch": 0.3606829231523009, |
| "grad_norm": 1.5447630644836872, |
| "learning_rate": 8.069683640160297e-06, |
| "loss": 0.5043, |
| "step": 9570 |
| }, |
| { |
| "epoch": 0.36105981230919987, |
| "grad_norm": 1.6594696899732704, |
| "learning_rate": 8.064488488229634e-06, |
| "loss": 0.5308, |
| "step": 9580 |
| }, |
| { |
| "epoch": 0.3614367014660988, |
| "grad_norm": 1.50896874784597, |
| "learning_rate": 8.059288032031616e-06, |
| "loss": 0.511, |
| "step": 9590 |
| }, |
| { |
| "epoch": 0.3618135906229978, |
| "grad_norm": 1.463806572060008, |
| "learning_rate": 8.05408228056762e-06, |
| "loss": 0.5112, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.3621904797798967, |
| "grad_norm": 2.3161399801404206, |
| "learning_rate": 8.048871242848186e-06, |
| "loss": 0.5094, |
| "step": 9610 |
| }, |
| { |
| "epoch": 0.3625673689367957, |
| "grad_norm": 1.6934623824302724, |
| "learning_rate": 8.043654927893003e-06, |
| "loss": 0.5302, |
| "step": 9620 |
| }, |
| { |
| "epoch": 0.3629442580936946, |
| "grad_norm": 1.669003654657236, |
| "learning_rate": 8.038433344730896e-06, |
| "loss": 0.5149, |
| "step": 9630 |
| }, |
| { |
| "epoch": 0.3633211472505936, |
| "grad_norm": 1.627539817842675, |
| "learning_rate": 8.033206502399811e-06, |
| "loss": 0.508, |
| "step": 9640 |
| }, |
| { |
| "epoch": 0.36369803640749254, |
| "grad_norm": 1.606477921617676, |
| "learning_rate": 8.027974409946791e-06, |
| "loss": 0.5388, |
| "step": 9650 |
| }, |
| { |
| "epoch": 0.3640749255643915, |
| "grad_norm": 1.5765425415250627, |
| "learning_rate": 8.02273707642797e-06, |
| "loss": 0.4904, |
| "step": 9660 |
| }, |
| { |
| "epoch": 0.36445181472129046, |
| "grad_norm": 1.377919727134035, |
| "learning_rate": 8.017494510908557e-06, |
| "loss": 0.4736, |
| "step": 9670 |
| }, |
| { |
| "epoch": 0.36482870387818944, |
| "grad_norm": 1.5390007963630454, |
| "learning_rate": 8.012246722462807e-06, |
| "loss": 0.5366, |
| "step": 9680 |
| }, |
| { |
| "epoch": 0.36520559303508837, |
| "grad_norm": 1.6016562038880642, |
| "learning_rate": 8.006993720174026e-06, |
| "loss": 0.5278, |
| "step": 9690 |
| }, |
| { |
| "epoch": 0.36558248219198736, |
| "grad_norm": 1.6926301666587609, |
| "learning_rate": 8.001735513134539e-06, |
| "loss": 0.524, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.3659593713488863, |
| "grad_norm": 1.4095598406907603, |
| "learning_rate": 7.996472110445682e-06, |
| "loss": 0.5248, |
| "step": 9710 |
| }, |
| { |
| "epoch": 0.36633626050578527, |
| "grad_norm": 1.931772269279146, |
| "learning_rate": 7.99120352121778e-06, |
| "loss": 0.5468, |
| "step": 9720 |
| }, |
| { |
| "epoch": 0.3667131496626842, |
| "grad_norm": 1.7501030367149104, |
| "learning_rate": 7.985929754570138e-06, |
| "loss": 0.522, |
| "step": 9730 |
| }, |
| { |
| "epoch": 0.3670900388195832, |
| "grad_norm": 1.526100066204629, |
| "learning_rate": 7.980650819631028e-06, |
| "loss": 0.5101, |
| "step": 9740 |
| }, |
| { |
| "epoch": 0.3674669279764821, |
| "grad_norm": 1.7385487243528495, |
| "learning_rate": 7.975366725537657e-06, |
| "loss": 0.5216, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.36784381713338105, |
| "grad_norm": 1.5749812078525176, |
| "learning_rate": 7.970077481436169e-06, |
| "loss": 0.5003, |
| "step": 9760 |
| }, |
| { |
| "epoch": 0.36822070629028003, |
| "grad_norm": 1.9001739721765867, |
| "learning_rate": 7.964783096481624e-06, |
| "loss": 0.5299, |
| "step": 9770 |
| }, |
| { |
| "epoch": 0.36859759544717896, |
| "grad_norm": 1.7061699416032134, |
| "learning_rate": 7.95948357983797e-06, |
| "loss": 0.5293, |
| "step": 9780 |
| }, |
| { |
| "epoch": 0.36897448460407795, |
| "grad_norm": 1.628434299258182, |
| "learning_rate": 7.954178940678048e-06, |
| "loss": 0.5365, |
| "step": 9790 |
| }, |
| { |
| "epoch": 0.3693513737609769, |
| "grad_norm": 1.5914358544888132, |
| "learning_rate": 7.94886918818356e-06, |
| "loss": 0.486, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.36972826291787586, |
| "grad_norm": 1.8260310549139975, |
| "learning_rate": 7.94355433154506e-06, |
| "loss": 0.5051, |
| "step": 9810 |
| }, |
| { |
| "epoch": 0.3701051520747748, |
| "grad_norm": 1.5743149246238135, |
| "learning_rate": 7.93823437996194e-06, |
| "loss": 0.4909, |
| "step": 9820 |
| }, |
| { |
| "epoch": 0.3704820412316738, |
| "grad_norm": 1.5885416947805906, |
| "learning_rate": 7.932909342642403e-06, |
| "loss": 0.5111, |
| "step": 9830 |
| }, |
| { |
| "epoch": 0.3708589303885727, |
| "grad_norm": 1.806111484800823, |
| "learning_rate": 7.92757922880346e-06, |
| "loss": 0.5213, |
| "step": 9840 |
| }, |
| { |
| "epoch": 0.3712358195454717, |
| "grad_norm": 1.622739814163259, |
| "learning_rate": 7.922244047670908e-06, |
| "loss": 0.5223, |
| "step": 9850 |
| }, |
| { |
| "epoch": 0.3716127087023706, |
| "grad_norm": 1.422381514282876, |
| "learning_rate": 7.916903808479316e-06, |
| "loss": 0.4954, |
| "step": 9860 |
| }, |
| { |
| "epoch": 0.3719895978592696, |
| "grad_norm": 1.4819588262177361, |
| "learning_rate": 7.911558520472007e-06, |
| "loss": 0.5041, |
| "step": 9870 |
| }, |
| { |
| "epoch": 0.37236648701616853, |
| "grad_norm": 1.6630721593622193, |
| "learning_rate": 7.906208192901043e-06, |
| "loss": 0.5031, |
| "step": 9880 |
| }, |
| { |
| "epoch": 0.3727433761730675, |
| "grad_norm": 1.5994053992133728, |
| "learning_rate": 7.900852835027207e-06, |
| "loss": 0.5212, |
| "step": 9890 |
| }, |
| { |
| "epoch": 0.37312026532996645, |
| "grad_norm": 1.6742248574206915, |
| "learning_rate": 7.89549245611999e-06, |
| "loss": 0.509, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.37349715448686543, |
| "grad_norm": 1.3401945002776963, |
| "learning_rate": 7.890127065457578e-06, |
| "loss": 0.4989, |
| "step": 9910 |
| }, |
| { |
| "epoch": 0.37387404364376436, |
| "grad_norm": 1.7744024916171353, |
| "learning_rate": 7.884756672326824e-06, |
| "loss": 0.5221, |
| "step": 9920 |
| }, |
| { |
| "epoch": 0.37425093280066335, |
| "grad_norm": 1.5151769409007294, |
| "learning_rate": 7.879381286023247e-06, |
| "loss": 0.4874, |
| "step": 9930 |
| }, |
| { |
| "epoch": 0.3746278219575623, |
| "grad_norm": 1.521150560489862, |
| "learning_rate": 7.874000915851e-06, |
| "loss": 0.5243, |
| "step": 9940 |
| }, |
| { |
| "epoch": 0.37500471111446126, |
| "grad_norm": 1.8681950791510447, |
| "learning_rate": 7.868615571122877e-06, |
| "loss": 0.5333, |
| "step": 9950 |
| }, |
| { |
| "epoch": 0.3753816002713602, |
| "grad_norm": 1.7696887020829462, |
| "learning_rate": 7.863225261160264e-06, |
| "loss": 0.5095, |
| "step": 9960 |
| }, |
| { |
| "epoch": 0.3757584894282591, |
| "grad_norm": 1.5476742902685527, |
| "learning_rate": 7.857829995293156e-06, |
| "loss": 0.5138, |
| "step": 9970 |
| }, |
| { |
| "epoch": 0.3761353785851581, |
| "grad_norm": 1.8113224519075866, |
| "learning_rate": 7.852429782860116e-06, |
| "loss": 0.5204, |
| "step": 9980 |
| }, |
| { |
| "epoch": 0.37651226774205704, |
| "grad_norm": 1.5771605683013104, |
| "learning_rate": 7.847024633208277e-06, |
| "loss": 0.5251, |
| "step": 9990 |
| }, |
| { |
| "epoch": 0.376889156898956, |
| "grad_norm": 1.7363856844555856, |
| "learning_rate": 7.841614555693315e-06, |
| "loss": 0.5374, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.37726604605585495, |
| "grad_norm": 1.4364933283426136, |
| "learning_rate": 7.83619955967943e-06, |
| "loss": 0.5058, |
| "step": 10010 |
| }, |
| { |
| "epoch": 0.37764293521275394, |
| "grad_norm": 1.7783682918591002, |
| "learning_rate": 7.830779654539347e-06, |
| "loss": 0.5219, |
| "step": 10020 |
| }, |
| { |
| "epoch": 0.37801982436965287, |
| "grad_norm": 1.502023352144145, |
| "learning_rate": 7.825354849654276e-06, |
| "loss": 0.5063, |
| "step": 10030 |
| }, |
| { |
| "epoch": 0.37839671352655185, |
| "grad_norm": 1.4662226669968323, |
| "learning_rate": 7.819925154413913e-06, |
| "loss": 0.519, |
| "step": 10040 |
| }, |
| { |
| "epoch": 0.3787736026834508, |
| "grad_norm": 1.7426260620841227, |
| "learning_rate": 7.814490578216418e-06, |
| "loss": 0.5139, |
| "step": 10050 |
| }, |
| { |
| "epoch": 0.37915049184034977, |
| "grad_norm": 1.392450084963678, |
| "learning_rate": 7.809051130468406e-06, |
| "loss": 0.5117, |
| "step": 10060 |
| }, |
| { |
| "epoch": 0.3795273809972487, |
| "grad_norm": 1.3645116377228637, |
| "learning_rate": 7.80360682058491e-06, |
| "loss": 0.4957, |
| "step": 10070 |
| }, |
| { |
| "epoch": 0.3799042701541477, |
| "grad_norm": 1.585986026276787, |
| "learning_rate": 7.798157657989393e-06, |
| "loss": 0.4932, |
| "step": 10080 |
| }, |
| { |
| "epoch": 0.3802811593110466, |
| "grad_norm": 1.732526223305691, |
| "learning_rate": 7.792703652113711e-06, |
| "loss": 0.489, |
| "step": 10090 |
| }, |
| { |
| "epoch": 0.3806580484679456, |
| "grad_norm": 1.5456835026999505, |
| "learning_rate": 7.7872448123981e-06, |
| "loss": 0.5113, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.3810349376248445, |
| "grad_norm": 1.691666553542104, |
| "learning_rate": 7.781781148291168e-06, |
| "loss": 0.5062, |
| "step": 10110 |
| }, |
| { |
| "epoch": 0.3814118267817435, |
| "grad_norm": 1.4393454733074624, |
| "learning_rate": 7.776312669249871e-06, |
| "loss": 0.5259, |
| "step": 10120 |
| }, |
| { |
| "epoch": 0.38178871593864244, |
| "grad_norm": 1.5606823602350173, |
| "learning_rate": 7.770839384739502e-06, |
| "loss": 0.481, |
| "step": 10130 |
| }, |
| { |
| "epoch": 0.3821656050955414, |
| "grad_norm": 1.5406731978486246, |
| "learning_rate": 7.765361304233669e-06, |
| "loss": 0.5022, |
| "step": 10140 |
| }, |
| { |
| "epoch": 0.38254249425244036, |
| "grad_norm": 1.5789632721608042, |
| "learning_rate": 7.759878437214279e-06, |
| "loss": 0.5205, |
| "step": 10150 |
| }, |
| { |
| "epoch": 0.3829193834093393, |
| "grad_norm": 1.4404684890173407, |
| "learning_rate": 7.75439079317153e-06, |
| "loss": 0.5194, |
| "step": 10160 |
| }, |
| { |
| "epoch": 0.38329627256623827, |
| "grad_norm": 1.5872838579011181, |
| "learning_rate": 7.748898381603885e-06, |
| "loss": 0.5209, |
| "step": 10170 |
| }, |
| { |
| "epoch": 0.3836731617231372, |
| "grad_norm": 1.7373659799447403, |
| "learning_rate": 7.743401212018058e-06, |
| "loss": 0.5338, |
| "step": 10180 |
| }, |
| { |
| "epoch": 0.3840500508800362, |
| "grad_norm": 1.5163208593264923, |
| "learning_rate": 7.737899293929e-06, |
| "loss": 0.5171, |
| "step": 10190 |
| }, |
| { |
| "epoch": 0.3844269400369351, |
| "grad_norm": 1.7459426641170264, |
| "learning_rate": 7.73239263685988e-06, |
| "loss": 0.514, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.3848038291938341, |
| "grad_norm": 1.4346771101184586, |
| "learning_rate": 7.726881250342072e-06, |
| "loss": 0.4932, |
| "step": 10210 |
| }, |
| { |
| "epoch": 0.38518071835073303, |
| "grad_norm": 1.5111843012047024, |
| "learning_rate": 7.721365143915134e-06, |
| "loss": 0.5125, |
| "step": 10220 |
| }, |
| { |
| "epoch": 0.385557607507632, |
| "grad_norm": 1.9962267341536073, |
| "learning_rate": 7.715844327126796e-06, |
| "loss": 0.5167, |
| "step": 10230 |
| }, |
| { |
| "epoch": 0.38593449666453095, |
| "grad_norm": 1.8161411611054894, |
| "learning_rate": 7.710318809532936e-06, |
| "loss": 0.5158, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.38631138582142993, |
| "grad_norm": 2.7639886954074973, |
| "learning_rate": 7.704788600697572e-06, |
| "loss": 0.5188, |
| "step": 10250 |
| }, |
| { |
| "epoch": 0.38668827497832886, |
| "grad_norm": 1.4820078773895478, |
| "learning_rate": 7.699253710192846e-06, |
| "loss": 0.5393, |
| "step": 10260 |
| }, |
| { |
| "epoch": 0.38706516413522785, |
| "grad_norm": 1.5410641109210454, |
| "learning_rate": 7.693714147598997e-06, |
| "loss": 0.5153, |
| "step": 10270 |
| }, |
| { |
| "epoch": 0.3874420532921268, |
| "grad_norm": 1.5522185520833145, |
| "learning_rate": 7.68816992250435e-06, |
| "loss": 0.501, |
| "step": 10280 |
| }, |
| { |
| "epoch": 0.38781894244902576, |
| "grad_norm": 1.5919453235813696, |
| "learning_rate": 7.682621044505307e-06, |
| "loss": 0.5129, |
| "step": 10290 |
| }, |
| { |
| "epoch": 0.3881958316059247, |
| "grad_norm": 1.8187432897104705, |
| "learning_rate": 7.67706752320632e-06, |
| "loss": 0.5119, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.3885727207628237, |
| "grad_norm": 1.7249561612487072, |
| "learning_rate": 7.671509368219876e-06, |
| "loss": 0.4994, |
| "step": 10310 |
| }, |
| { |
| "epoch": 0.3889496099197226, |
| "grad_norm": 1.6848328239504586, |
| "learning_rate": 7.665946589166487e-06, |
| "loss": 0.4925, |
| "step": 10320 |
| }, |
| { |
| "epoch": 0.3893264990766216, |
| "grad_norm": 1.5828091110560405, |
| "learning_rate": 7.660379195674661e-06, |
| "loss": 0.5038, |
| "step": 10330 |
| }, |
| { |
| "epoch": 0.3897033882335205, |
| "grad_norm": 1.5813911515185386, |
| "learning_rate": 7.654807197380905e-06, |
| "loss": 0.5195, |
| "step": 10340 |
| }, |
| { |
| "epoch": 0.3900802773904195, |
| "grad_norm": 1.6125111488328268, |
| "learning_rate": 7.649230603929682e-06, |
| "loss": 0.5015, |
| "step": 10350 |
| }, |
| { |
| "epoch": 0.39045716654731843, |
| "grad_norm": 1.3381134076751229, |
| "learning_rate": 7.643649424973423e-06, |
| "loss": 0.5088, |
| "step": 10360 |
| }, |
| { |
| "epoch": 0.39083405570421736, |
| "grad_norm": 1.7305681398298995, |
| "learning_rate": 7.638063670172484e-06, |
| "loss": 0.5185, |
| "step": 10370 |
| }, |
| { |
| "epoch": 0.39121094486111635, |
| "grad_norm": 1.4448524895747423, |
| "learning_rate": 7.632473349195148e-06, |
| "loss": 0.4951, |
| "step": 10380 |
| }, |
| { |
| "epoch": 0.3915878340180153, |
| "grad_norm": 1.4981197503697667, |
| "learning_rate": 7.626878471717601e-06, |
| "loss": 0.4968, |
| "step": 10390 |
| }, |
| { |
| "epoch": 0.39196472317491426, |
| "grad_norm": 1.7418645645645234, |
| "learning_rate": 7.621279047423913e-06, |
| "loss": 0.5008, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.3923416123318132, |
| "grad_norm": 1.5178888998524598, |
| "learning_rate": 7.615675086006027e-06, |
| "loss": 0.5289, |
| "step": 10410 |
| }, |
| { |
| "epoch": 0.3927185014887122, |
| "grad_norm": 1.830273252291855, |
| "learning_rate": 7.610066597163737e-06, |
| "loss": 0.5122, |
| "step": 10420 |
| }, |
| { |
| "epoch": 0.3930953906456111, |
| "grad_norm": 1.7128529156361387, |
| "learning_rate": 7.604453590604675e-06, |
| "loss": 0.5195, |
| "step": 10430 |
| }, |
| { |
| "epoch": 0.3934722798025101, |
| "grad_norm": 1.391617720005151, |
| "learning_rate": 7.5988360760442905e-06, |
| "loss": 0.5076, |
| "step": 10440 |
| }, |
| { |
| "epoch": 0.393849168959409, |
| "grad_norm": 1.6877860440671137, |
| "learning_rate": 7.5932140632058395e-06, |
| "loss": 0.4974, |
| "step": 10450 |
| }, |
| { |
| "epoch": 0.394226058116308, |
| "grad_norm": 1.8880275691787758, |
| "learning_rate": 7.587587561820357e-06, |
| "loss": 0.5285, |
| "step": 10460 |
| }, |
| { |
| "epoch": 0.39460294727320694, |
| "grad_norm": 2.1952687274271323, |
| "learning_rate": 7.581956581626659e-06, |
| "loss": 0.4788, |
| "step": 10470 |
| }, |
| { |
| "epoch": 0.3949798364301059, |
| "grad_norm": 1.4679354015620223, |
| "learning_rate": 7.5763211323713e-06, |
| "loss": 0.5148, |
| "step": 10480 |
| }, |
| { |
| "epoch": 0.39535672558700485, |
| "grad_norm": 5.552246755488179, |
| "learning_rate": 7.570681223808581e-06, |
| "loss": 0.5088, |
| "step": 10490 |
| }, |
| { |
| "epoch": 0.39573361474390384, |
| "grad_norm": 1.7064464108546398, |
| "learning_rate": 7.565036865700515e-06, |
| "loss": 0.5036, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.39611050390080277, |
| "grad_norm": 2.5811736419688125, |
| "learning_rate": 7.559388067816818e-06, |
| "loss": 0.4968, |
| "step": 10510 |
| }, |
| { |
| "epoch": 0.39648739305770175, |
| "grad_norm": 1.5411522330658454, |
| "learning_rate": 7.553734839934892e-06, |
| "loss": 0.5321, |
| "step": 10520 |
| }, |
| { |
| "epoch": 0.3968642822146007, |
| "grad_norm": 7.8958392331494265, |
| "learning_rate": 7.54807719183981e-06, |
| "loss": 0.5144, |
| "step": 10530 |
| }, |
| { |
| "epoch": 0.39724117137149967, |
| "grad_norm": 1.398958685780613, |
| "learning_rate": 7.5424151333242854e-06, |
| "loss": 0.5038, |
| "step": 10540 |
| }, |
| { |
| "epoch": 0.3976180605283986, |
| "grad_norm": 1.8158525212197818, |
| "learning_rate": 7.536748674188679e-06, |
| "loss": 0.5498, |
| "step": 10550 |
| }, |
| { |
| "epoch": 0.3979949496852975, |
| "grad_norm": 1.5804568739533704, |
| "learning_rate": 7.531077824240955e-06, |
| "loss": 0.4943, |
| "step": 10560 |
| }, |
| { |
| "epoch": 0.3983718388421965, |
| "grad_norm": 1.5226993293921613, |
| "learning_rate": 7.5254025932966915e-06, |
| "loss": 0.5197, |
| "step": 10570 |
| }, |
| { |
| "epoch": 0.39874872799909544, |
| "grad_norm": 1.4875198630376472, |
| "learning_rate": 7.519722991179037e-06, |
| "loss": 0.4911, |
| "step": 10580 |
| }, |
| { |
| "epoch": 0.3991256171559944, |
| "grad_norm": 1.4559827608231475, |
| "learning_rate": 7.514039027718714e-06, |
| "loss": 0.5369, |
| "step": 10590 |
| }, |
| { |
| "epoch": 0.39950250631289336, |
| "grad_norm": 1.6204937117910332, |
| "learning_rate": 7.50835071275399e-06, |
| "loss": 0.5126, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.39987939546979234, |
| "grad_norm": 1.3328838051236007, |
| "learning_rate": 7.502658056130667e-06, |
| "loss": 0.4924, |
| "step": 10610 |
| }, |
| { |
| "epoch": 0.40025628462669127, |
| "grad_norm": 1.6977987639267709, |
| "learning_rate": 7.496961067702061e-06, |
| "loss": 0.5299, |
| "step": 10620 |
| }, |
| { |
| "epoch": 0.40063317378359026, |
| "grad_norm": 1.7016949489371669, |
| "learning_rate": 7.491259757328986e-06, |
| "loss": 0.4688, |
| "step": 10630 |
| }, |
| { |
| "epoch": 0.4010100629404892, |
| "grad_norm": 1.7144533476268096, |
| "learning_rate": 7.4855541348797325e-06, |
| "loss": 0.496, |
| "step": 10640 |
| }, |
| { |
| "epoch": 0.40138695209738817, |
| "grad_norm": 1.6798605375904403, |
| "learning_rate": 7.479844210230063e-06, |
| "loss": 0.4848, |
| "step": 10650 |
| }, |
| { |
| "epoch": 0.4017638412542871, |
| "grad_norm": 1.6026398018607957, |
| "learning_rate": 7.474129993263181e-06, |
| "loss": 0.5436, |
| "step": 10660 |
| }, |
| { |
| "epoch": 0.4021407304111861, |
| "grad_norm": 2.1790501189406974, |
| "learning_rate": 7.468411493869719e-06, |
| "loss": 0.506, |
| "step": 10670 |
| }, |
| { |
| "epoch": 0.402517619568085, |
| "grad_norm": 1.5645837080947917, |
| "learning_rate": 7.462688721947724e-06, |
| "loss": 0.5218, |
| "step": 10680 |
| }, |
| { |
| "epoch": 0.402894508724984, |
| "grad_norm": 1.4088765793630955, |
| "learning_rate": 7.456961687402639e-06, |
| "loss": 0.5237, |
| "step": 10690 |
| }, |
| { |
| "epoch": 0.40327139788188293, |
| "grad_norm": 1.7292916232116071, |
| "learning_rate": 7.451230400147285e-06, |
| "loss": 0.5469, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.4036482870387819, |
| "grad_norm": 1.5186451953406783, |
| "learning_rate": 7.44549487010184e-06, |
| "loss": 0.4883, |
| "step": 10710 |
| }, |
| { |
| "epoch": 0.40402517619568085, |
| "grad_norm": 1.5087236868153622, |
| "learning_rate": 7.43975510719383e-06, |
| "loss": 0.5093, |
| "step": 10720 |
| }, |
| { |
| "epoch": 0.40440206535257983, |
| "grad_norm": 1.4677033417891505, |
| "learning_rate": 7.434011121358106e-06, |
| "loss": 0.5286, |
| "step": 10730 |
| }, |
| { |
| "epoch": 0.40477895450947876, |
| "grad_norm": 1.4802091202672563, |
| "learning_rate": 7.428262922536829e-06, |
| "loss": 0.5089, |
| "step": 10740 |
| }, |
| { |
| "epoch": 0.4051558436663777, |
| "grad_norm": 1.7355158540184825, |
| "learning_rate": 7.422510520679451e-06, |
| "loss": 0.4905, |
| "step": 10750 |
| }, |
| { |
| "epoch": 0.4055327328232767, |
| "grad_norm": 1.6236103109691675, |
| "learning_rate": 7.416753925742699e-06, |
| "loss": 0.5378, |
| "step": 10760 |
| }, |
| { |
| "epoch": 0.4059096219801756, |
| "grad_norm": 1.6609723115712154, |
| "learning_rate": 7.410993147690559e-06, |
| "loss": 0.5389, |
| "step": 10770 |
| }, |
| { |
| "epoch": 0.4062865111370746, |
| "grad_norm": 1.723187447121083, |
| "learning_rate": 7.405228196494258e-06, |
| "loss": 0.506, |
| "step": 10780 |
| }, |
| { |
| "epoch": 0.4066634002939735, |
| "grad_norm": 1.77371397243615, |
| "learning_rate": 7.399459082132245e-06, |
| "loss": 0.5513, |
| "step": 10790 |
| }, |
| { |
| "epoch": 0.4070402894508725, |
| "grad_norm": 1.61901847701063, |
| "learning_rate": 7.393685814590173e-06, |
| "loss": 0.5186, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.40741717860777144, |
| "grad_norm": 1.59625784282029, |
| "learning_rate": 7.387908403860888e-06, |
| "loss": 0.5185, |
| "step": 10810 |
| }, |
| { |
| "epoch": 0.4077940677646704, |
| "grad_norm": 1.6692144430702707, |
| "learning_rate": 7.382126859944404e-06, |
| "loss": 0.512, |
| "step": 10820 |
| }, |
| { |
| "epoch": 0.40817095692156935, |
| "grad_norm": 1.4908534978634642, |
| "learning_rate": 7.3763411928478905e-06, |
| "loss": 0.4861, |
| "step": 10830 |
| }, |
| { |
| "epoch": 0.40854784607846834, |
| "grad_norm": 1.8840748788280792, |
| "learning_rate": 7.370551412585653e-06, |
| "loss": 0.5155, |
| "step": 10840 |
| }, |
| { |
| "epoch": 0.40892473523536726, |
| "grad_norm": 1.6633231923886227, |
| "learning_rate": 7.364757529179116e-06, |
| "loss": 0.5222, |
| "step": 10850 |
| }, |
| { |
| "epoch": 0.40930162439226625, |
| "grad_norm": 1.484920688542688, |
| "learning_rate": 7.3589595526568105e-06, |
| "loss": 0.5058, |
| "step": 10860 |
| }, |
| { |
| "epoch": 0.4096785135491652, |
| "grad_norm": 1.9228281526896014, |
| "learning_rate": 7.353157493054342e-06, |
| "loss": 0.5304, |
| "step": 10870 |
| }, |
| { |
| "epoch": 0.41005540270606416, |
| "grad_norm": 1.8251544219160065, |
| "learning_rate": 7.347351360414396e-06, |
| "loss": 0.5211, |
| "step": 10880 |
| }, |
| { |
| "epoch": 0.4104322918629631, |
| "grad_norm": 1.6738905480970352, |
| "learning_rate": 7.341541164786701e-06, |
| "loss": 0.472, |
| "step": 10890 |
| }, |
| { |
| "epoch": 0.4108091810198621, |
| "grad_norm": 1.7440198392451363, |
| "learning_rate": 7.335726916228014e-06, |
| "loss": 0.5083, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.411186070176761, |
| "grad_norm": 1.6268369569369572, |
| "learning_rate": 7.329908624802118e-06, |
| "loss": 0.5208, |
| "step": 10910 |
| }, |
| { |
| "epoch": 0.41156295933366, |
| "grad_norm": 1.254862433004947, |
| "learning_rate": 7.3240863005797845e-06, |
| "loss": 0.4823, |
| "step": 10920 |
| }, |
| { |
| "epoch": 0.4119398484905589, |
| "grad_norm": 1.396064069324666, |
| "learning_rate": 7.3182599536387685e-06, |
| "loss": 0.5345, |
| "step": 10930 |
| }, |
| { |
| "epoch": 0.4123167376474579, |
| "grad_norm": 1.5132124149170976, |
| "learning_rate": 7.31242959406379e-06, |
| "loss": 0.5, |
| "step": 10940 |
| }, |
| { |
| "epoch": 0.41269362680435684, |
| "grad_norm": 1.6199593366062408, |
| "learning_rate": 7.306595231946509e-06, |
| "loss": 0.52, |
| "step": 10950 |
| }, |
| { |
| "epoch": 0.41307051596125577, |
| "grad_norm": 1.4096476517099632, |
| "learning_rate": 7.300756877385522e-06, |
| "loss": 0.5073, |
| "step": 10960 |
| }, |
| { |
| "epoch": 0.41344740511815475, |
| "grad_norm": 2.2637085400824266, |
| "learning_rate": 7.294914540486324e-06, |
| "loss": 0.5514, |
| "step": 10970 |
| }, |
| { |
| "epoch": 0.4138242942750537, |
| "grad_norm": 1.7076238981325191, |
| "learning_rate": 7.2890682313613145e-06, |
| "loss": 0.5193, |
| "step": 10980 |
| }, |
| { |
| "epoch": 0.41420118343195267, |
| "grad_norm": 1.404089053698958, |
| "learning_rate": 7.283217960129761e-06, |
| "loss": 0.4978, |
| "step": 10990 |
| }, |
| { |
| "epoch": 0.4145780725888516, |
| "grad_norm": 1.765699200284767, |
| "learning_rate": 7.277363736917793e-06, |
| "loss": 0.5045, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.4149549617457506, |
| "grad_norm": 1.2515597552306361, |
| "learning_rate": 7.271505571858378e-06, |
| "loss": 0.5161, |
| "step": 11010 |
| }, |
| { |
| "epoch": 0.4153318509026495, |
| "grad_norm": 1.5637990689131112, |
| "learning_rate": 7.265643475091308e-06, |
| "loss": 0.5056, |
| "step": 11020 |
| }, |
| { |
| "epoch": 0.4157087400595485, |
| "grad_norm": 1.6950097710271756, |
| "learning_rate": 7.25977745676318e-06, |
| "loss": 0.5305, |
| "step": 11030 |
| }, |
| { |
| "epoch": 0.41608562921644743, |
| "grad_norm": 1.3673559058916098, |
| "learning_rate": 7.253907527027377e-06, |
| "loss": 0.5156, |
| "step": 11040 |
| }, |
| { |
| "epoch": 0.4164625183733464, |
| "grad_norm": 1.7616801139174183, |
| "learning_rate": 7.2480336960440535e-06, |
| "loss": 0.5105, |
| "step": 11050 |
| }, |
| { |
| "epoch": 0.41683940753024534, |
| "grad_norm": 1.5601831166364712, |
| "learning_rate": 7.242155973980118e-06, |
| "loss": 0.4974, |
| "step": 11060 |
| }, |
| { |
| "epoch": 0.41721629668714433, |
| "grad_norm": 1.5959842163354248, |
| "learning_rate": 7.236274371009213e-06, |
| "loss": 0.5112, |
| "step": 11070 |
| }, |
| { |
| "epoch": 0.41759318584404326, |
| "grad_norm": 1.5739266782444512, |
| "learning_rate": 7.2303888973116955e-06, |
| "loss": 0.5146, |
| "step": 11080 |
| }, |
| { |
| "epoch": 0.41797007500094224, |
| "grad_norm": 1.4353518599364972, |
| "learning_rate": 7.224499563074627e-06, |
| "loss": 0.4896, |
| "step": 11090 |
| }, |
| { |
| "epoch": 0.4183469641578412, |
| "grad_norm": 1.6746462092510674, |
| "learning_rate": 7.218606378491748e-06, |
| "loss": 0.5083, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.41872385331474016, |
| "grad_norm": 1.4785054484483375, |
| "learning_rate": 7.2127093537634655e-06, |
| "loss": 0.504, |
| "step": 11110 |
| }, |
| { |
| "epoch": 0.4191007424716391, |
| "grad_norm": 2.5097265005422287, |
| "learning_rate": 7.20680849909683e-06, |
| "loss": 0.5089, |
| "step": 11120 |
| }, |
| { |
| "epoch": 0.4194776316285381, |
| "grad_norm": 1.936326911057867, |
| "learning_rate": 7.200903824705525e-06, |
| "loss": 0.4966, |
| "step": 11130 |
| }, |
| { |
| "epoch": 0.419854520785437, |
| "grad_norm": 1.3661039009657177, |
| "learning_rate": 7.194995340809845e-06, |
| "loss": 0.4992, |
| "step": 11140 |
| }, |
| { |
| "epoch": 0.42023140994233593, |
| "grad_norm": 1.7262176315985598, |
| "learning_rate": 7.189083057636677e-06, |
| "loss": 0.5468, |
| "step": 11150 |
| }, |
| { |
| "epoch": 0.4206082990992349, |
| "grad_norm": 1.4653322424571276, |
| "learning_rate": 7.183166985419482e-06, |
| "loss": 0.5141, |
| "step": 11160 |
| }, |
| { |
| "epoch": 0.42098518825613385, |
| "grad_norm": 1.401053993164947, |
| "learning_rate": 7.177247134398286e-06, |
| "loss": 0.4912, |
| "step": 11170 |
| }, |
| { |
| "epoch": 0.42136207741303283, |
| "grad_norm": 1.4861938807474284, |
| "learning_rate": 7.171323514819645e-06, |
| "loss": 0.5232, |
| "step": 11180 |
| }, |
| { |
| "epoch": 0.42173896656993176, |
| "grad_norm": 1.4194599929006113, |
| "learning_rate": 7.1653961369366495e-06, |
| "loss": 0.4814, |
| "step": 11190 |
| }, |
| { |
| "epoch": 0.42211585572683075, |
| "grad_norm": 1.76956116217899, |
| "learning_rate": 7.159465011008888e-06, |
| "loss": 0.4932, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.4224927448837297, |
| "grad_norm": 1.9109196300178812, |
| "learning_rate": 7.15353014730244e-06, |
| "loss": 0.5128, |
| "step": 11210 |
| }, |
| { |
| "epoch": 0.42286963404062866, |
| "grad_norm": 1.6509926306486402, |
| "learning_rate": 7.147591556089851e-06, |
| "loss": 0.505, |
| "step": 11220 |
| }, |
| { |
| "epoch": 0.4232465231975276, |
| "grad_norm": 1.6713792587181258, |
| "learning_rate": 7.141649247650122e-06, |
| "loss": 0.4887, |
| "step": 11230 |
| }, |
| { |
| "epoch": 0.4236234123544266, |
| "grad_norm": 1.668002839224313, |
| "learning_rate": 7.135703232268686e-06, |
| "loss": 0.4888, |
| "step": 11240 |
| }, |
| { |
| "epoch": 0.4240003015113255, |
| "grad_norm": 1.6242132187489502, |
| "learning_rate": 7.1297535202373935e-06, |
| "loss": 0.4965, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.4243771906682245, |
| "grad_norm": 1.4189185720393587, |
| "learning_rate": 7.1238001218544904e-06, |
| "loss": 0.4619, |
| "step": 11260 |
| }, |
| { |
| "epoch": 0.4247540798251234, |
| "grad_norm": 1.3574884914862415, |
| "learning_rate": 7.117843047424608e-06, |
| "loss": 0.5141, |
| "step": 11270 |
| }, |
| { |
| "epoch": 0.4251309689820224, |
| "grad_norm": 1.5298779197157868, |
| "learning_rate": 7.111882307258737e-06, |
| "loss": 0.4846, |
| "step": 11280 |
| }, |
| { |
| "epoch": 0.42550785813892134, |
| "grad_norm": 1.6463532463625445, |
| "learning_rate": 7.105917911674216e-06, |
| "loss": 0.52, |
| "step": 11290 |
| }, |
| { |
| "epoch": 0.4258847472958203, |
| "grad_norm": 1.8876680862122976, |
| "learning_rate": 7.099949870994706e-06, |
| "loss": 0.5022, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.42626163645271925, |
| "grad_norm": 1.6209020479441716, |
| "learning_rate": 7.093978195550181e-06, |
| "loss": 0.4856, |
| "step": 11310 |
| }, |
| { |
| "epoch": 0.42663852560961824, |
| "grad_norm": 1.4825939311414624, |
| "learning_rate": 7.088002895676905e-06, |
| "loss": 0.51, |
| "step": 11320 |
| }, |
| { |
| "epoch": 0.42701541476651717, |
| "grad_norm": 1.6454549725646832, |
| "learning_rate": 7.082023981717417e-06, |
| "loss": 0.5298, |
| "step": 11330 |
| }, |
| { |
| "epoch": 0.42739230392341615, |
| "grad_norm": 1.8515246881502785, |
| "learning_rate": 7.07604146402051e-06, |
| "loss": 0.5353, |
| "step": 11340 |
| }, |
| { |
| "epoch": 0.4277691930803151, |
| "grad_norm": 1.664094685534996, |
| "learning_rate": 7.0700553529412155e-06, |
| "loss": 0.5199, |
| "step": 11350 |
| }, |
| { |
| "epoch": 0.428146082237214, |
| "grad_norm": 2.1244311792060975, |
| "learning_rate": 7.064065658840782e-06, |
| "loss": 0.5145, |
| "step": 11360 |
| }, |
| { |
| "epoch": 0.428522971394113, |
| "grad_norm": 1.520255254828738, |
| "learning_rate": 7.058072392086663e-06, |
| "loss": 0.5159, |
| "step": 11370 |
| }, |
| { |
| "epoch": 0.4288998605510119, |
| "grad_norm": 1.6576637569684334, |
| "learning_rate": 7.052075563052496e-06, |
| "loss": 0.4984, |
| "step": 11380 |
| }, |
| { |
| "epoch": 0.4292767497079109, |
| "grad_norm": 1.5125466189919647, |
| "learning_rate": 7.0460751821180825e-06, |
| "loss": 0.5264, |
| "step": 11390 |
| }, |
| { |
| "epoch": 0.42965363886480984, |
| "grad_norm": 1.6676033896976015, |
| "learning_rate": 7.0400712596693735e-06, |
| "loss": 0.5229, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.4300305280217088, |
| "grad_norm": 1.7189105587329556, |
| "learning_rate": 7.034063806098447e-06, |
| "loss": 0.5207, |
| "step": 11410 |
| }, |
| { |
| "epoch": 0.43040741717860775, |
| "grad_norm": 1.7180985315889115, |
| "learning_rate": 7.0280528318034965e-06, |
| "loss": 0.5022, |
| "step": 11420 |
| }, |
| { |
| "epoch": 0.43078430633550674, |
| "grad_norm": 1.5059034039755612, |
| "learning_rate": 7.022038347188809e-06, |
| "loss": 0.5184, |
| "step": 11430 |
| }, |
| { |
| "epoch": 0.43116119549240567, |
| "grad_norm": 1.6528886008084185, |
| "learning_rate": 7.016020362664744e-06, |
| "loss": 0.5168, |
| "step": 11440 |
| }, |
| { |
| "epoch": 0.43153808464930465, |
| "grad_norm": 1.8617180030972997, |
| "learning_rate": 7.009998888647724e-06, |
| "loss": 0.502, |
| "step": 11450 |
| }, |
| { |
| "epoch": 0.4319149738062036, |
| "grad_norm": 1.3525198323198173, |
| "learning_rate": 7.003973935560206e-06, |
| "loss": 0.4869, |
| "step": 11460 |
| }, |
| { |
| "epoch": 0.43229186296310257, |
| "grad_norm": 1.677808325515248, |
| "learning_rate": 6.997945513830674e-06, |
| "loss": 0.5101, |
| "step": 11470 |
| }, |
| { |
| "epoch": 0.4326687521200015, |
| "grad_norm": 1.5676318820179806, |
| "learning_rate": 6.991913633893612e-06, |
| "loss": 0.5009, |
| "step": 11480 |
| }, |
| { |
| "epoch": 0.4330456412769005, |
| "grad_norm": 1.5538674193725228, |
| "learning_rate": 6.985878306189491e-06, |
| "loss": 0.4667, |
| "step": 11490 |
| }, |
| { |
| "epoch": 0.4334225304337994, |
| "grad_norm": 1.783690739137192, |
| "learning_rate": 6.979839541164754e-06, |
| "loss": 0.5322, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.4337994195906984, |
| "grad_norm": 1.6669015455087597, |
| "learning_rate": 6.973797349271783e-06, |
| "loss": 0.5029, |
| "step": 11510 |
| }, |
| { |
| "epoch": 0.43417630874759733, |
| "grad_norm": 1.4853940822920297, |
| "learning_rate": 6.967751740968902e-06, |
| "loss": 0.5118, |
| "step": 11520 |
| }, |
| { |
| "epoch": 0.4345531979044963, |
| "grad_norm": 1.6489294125281642, |
| "learning_rate": 6.9617027267203445e-06, |
| "loss": 0.5031, |
| "step": 11530 |
| }, |
| { |
| "epoch": 0.43493008706139524, |
| "grad_norm": 1.449669861501994, |
| "learning_rate": 6.955650316996236e-06, |
| "loss": 0.5022, |
| "step": 11540 |
| }, |
| { |
| "epoch": 0.4353069762182942, |
| "grad_norm": 1.8165282391809885, |
| "learning_rate": 6.949594522272587e-06, |
| "loss": 0.4935, |
| "step": 11550 |
| }, |
| { |
| "epoch": 0.43568386537519316, |
| "grad_norm": 1.5552590851069892, |
| "learning_rate": 6.943535353031258e-06, |
| "loss": 0.5157, |
| "step": 11560 |
| }, |
| { |
| "epoch": 0.4360607545320921, |
| "grad_norm": 1.6072285953371521, |
| "learning_rate": 6.937472819759959e-06, |
| "loss": 0.5416, |
| "step": 11570 |
| }, |
| { |
| "epoch": 0.4364376436889911, |
| "grad_norm": 1.7154718263219901, |
| "learning_rate": 6.931406932952216e-06, |
| "loss": 0.5112, |
| "step": 11580 |
| }, |
| { |
| "epoch": 0.43681453284589, |
| "grad_norm": 1.69530017912524, |
| "learning_rate": 6.92533770310736e-06, |
| "loss": 0.5259, |
| "step": 11590 |
| }, |
| { |
| "epoch": 0.437191422002789, |
| "grad_norm": 1.860148174972511, |
| "learning_rate": 6.919265140730514e-06, |
| "loss": 0.5106, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.4375683111596879, |
| "grad_norm": 1.5811409620351222, |
| "learning_rate": 6.913189256332566e-06, |
| "loss": 0.5198, |
| "step": 11610 |
| }, |
| { |
| "epoch": 0.4379452003165869, |
| "grad_norm": 1.4378401199137765, |
| "learning_rate": 6.9071100604301496e-06, |
| "loss": 0.5018, |
| "step": 11620 |
| }, |
| { |
| "epoch": 0.43832208947348583, |
| "grad_norm": 1.6434275760246106, |
| "learning_rate": 6.901027563545639e-06, |
| "loss": 0.5117, |
| "step": 11630 |
| }, |
| { |
| "epoch": 0.4386989786303848, |
| "grad_norm": 1.6446268399097024, |
| "learning_rate": 6.894941776207114e-06, |
| "loss": 0.5143, |
| "step": 11640 |
| }, |
| { |
| "epoch": 0.43907586778728375, |
| "grad_norm": 1.5857907742925326, |
| "learning_rate": 6.888852708948354e-06, |
| "loss": 0.5174, |
| "step": 11650 |
| }, |
| { |
| "epoch": 0.43945275694418273, |
| "grad_norm": 1.4020005504081066, |
| "learning_rate": 6.882760372308819e-06, |
| "loss": 0.5229, |
| "step": 11660 |
| }, |
| { |
| "epoch": 0.43982964610108166, |
| "grad_norm": 1.61069613644627, |
| "learning_rate": 6.876664776833616e-06, |
| "loss": 0.4927, |
| "step": 11670 |
| }, |
| { |
| "epoch": 0.44020653525798065, |
| "grad_norm": 1.7140951891727507, |
| "learning_rate": 6.870565933073505e-06, |
| "loss": 0.4936, |
| "step": 11680 |
| }, |
| { |
| "epoch": 0.4405834244148796, |
| "grad_norm": 1.4507770402959719, |
| "learning_rate": 6.864463851584863e-06, |
| "loss": 0.5296, |
| "step": 11690 |
| }, |
| { |
| "epoch": 0.44096031357177856, |
| "grad_norm": 1.460824925207524, |
| "learning_rate": 6.858358542929672e-06, |
| "loss": 0.5255, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.4413372027286775, |
| "grad_norm": 1.6825791692741976, |
| "learning_rate": 6.852250017675499e-06, |
| "loss": 0.5079, |
| "step": 11710 |
| }, |
| { |
| "epoch": 0.4417140918855765, |
| "grad_norm": 1.7652951082406643, |
| "learning_rate": 6.8461382863954786e-06, |
| "loss": 0.5281, |
| "step": 11720 |
| }, |
| { |
| "epoch": 0.4420909810424754, |
| "grad_norm": 1.6445005156801455, |
| "learning_rate": 6.840023359668297e-06, |
| "loss": 0.5007, |
| "step": 11730 |
| }, |
| { |
| "epoch": 0.4424678701993744, |
| "grad_norm": 3.0505367160131107, |
| "learning_rate": 6.833905248078168e-06, |
| "loss": 0.5027, |
| "step": 11740 |
| }, |
| { |
| "epoch": 0.4428447593562733, |
| "grad_norm": 1.7728481701439127, |
| "learning_rate": 6.82778396221482e-06, |
| "loss": 0.5067, |
| "step": 11750 |
| }, |
| { |
| "epoch": 0.44322164851317225, |
| "grad_norm": 1.4266982652231972, |
| "learning_rate": 6.8216595126734775e-06, |
| "loss": 0.4973, |
| "step": 11760 |
| }, |
| { |
| "epoch": 0.44359853767007124, |
| "grad_norm": 1.4465402471000843, |
| "learning_rate": 6.815531910054834e-06, |
| "loss": 0.5315, |
| "step": 11770 |
| }, |
| { |
| "epoch": 0.44397542682697017, |
| "grad_norm": 1.5371566334005535, |
| "learning_rate": 6.809401164965051e-06, |
| "loss": 0.4947, |
| "step": 11780 |
| }, |
| { |
| "epoch": 0.44435231598386915, |
| "grad_norm": 1.664248932365906, |
| "learning_rate": 6.803267288015718e-06, |
| "loss": 0.5267, |
| "step": 11790 |
| }, |
| { |
| "epoch": 0.4447292051407681, |
| "grad_norm": 1.6494483320654694, |
| "learning_rate": 6.7971302898238545e-06, |
| "loss": 0.4986, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.44510609429766707, |
| "grad_norm": 1.3037692393364804, |
| "learning_rate": 6.7909901810118785e-06, |
| "loss": 0.4724, |
| "step": 11810 |
| }, |
| { |
| "epoch": 0.445482983454566, |
| "grad_norm": 2.6200187135591713, |
| "learning_rate": 6.784846972207593e-06, |
| "loss": 0.5032, |
| "step": 11820 |
| }, |
| { |
| "epoch": 0.445859872611465, |
| "grad_norm": 1.6525404819183418, |
| "learning_rate": 6.778700674044164e-06, |
| "loss": 0.5413, |
| "step": 11830 |
| }, |
| { |
| "epoch": 0.4462367617683639, |
| "grad_norm": 1.480259604215167, |
| "learning_rate": 6.77255129716011e-06, |
| "loss": 0.5062, |
| "step": 11840 |
| }, |
| { |
| "epoch": 0.4466136509252629, |
| "grad_norm": 1.7299508446770775, |
| "learning_rate": 6.7663988521992744e-06, |
| "loss": 0.5062, |
| "step": 11850 |
| }, |
| { |
| "epoch": 0.4469905400821618, |
| "grad_norm": 1.389929475780395, |
| "learning_rate": 6.760243349810811e-06, |
| "loss": 0.5041, |
| "step": 11860 |
| }, |
| { |
| "epoch": 0.4473674292390608, |
| "grad_norm": 1.795064940876833, |
| "learning_rate": 6.754084800649169e-06, |
| "loss": 0.4923, |
| "step": 11870 |
| }, |
| { |
| "epoch": 0.44774431839595974, |
| "grad_norm": 1.5502942925119751, |
| "learning_rate": 6.747923215374068e-06, |
| "loss": 0.4851, |
| "step": 11880 |
| }, |
| { |
| "epoch": 0.4481212075528587, |
| "grad_norm": 1.635874220277167, |
| "learning_rate": 6.741758604650485e-06, |
| "loss": 0.5009, |
| "step": 11890 |
| }, |
| { |
| "epoch": 0.44849809670975765, |
| "grad_norm": 1.706031522666674, |
| "learning_rate": 6.735590979148629e-06, |
| "loss": 0.487, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.44887498586665664, |
| "grad_norm": 1.690364938457251, |
| "learning_rate": 6.729420349543934e-06, |
| "loss": 0.4947, |
| "step": 11910 |
| }, |
| { |
| "epoch": 0.44925187502355557, |
| "grad_norm": 1.7433823515896854, |
| "learning_rate": 6.7232467265170295e-06, |
| "loss": 0.5077, |
| "step": 11920 |
| }, |
| { |
| "epoch": 0.44962876418045455, |
| "grad_norm": 1.6126498482759561, |
| "learning_rate": 6.7170701207537285e-06, |
| "loss": 0.5043, |
| "step": 11930 |
| }, |
| { |
| "epoch": 0.4500056533373535, |
| "grad_norm": 1.5461921740338886, |
| "learning_rate": 6.7108905429450035e-06, |
| "loss": 0.5211, |
| "step": 11940 |
| }, |
| { |
| "epoch": 0.4503825424942524, |
| "grad_norm": 1.654741487368281, |
| "learning_rate": 6.704708003786974e-06, |
| "loss": 0.5162, |
| "step": 11950 |
| }, |
| { |
| "epoch": 0.4507594316511514, |
| "grad_norm": 1.5875885105401868, |
| "learning_rate": 6.698522513980884e-06, |
| "loss": 0.5076, |
| "step": 11960 |
| }, |
| { |
| "epoch": 0.45113632080805033, |
| "grad_norm": 1.501080589567821, |
| "learning_rate": 6.692334084233087e-06, |
| "loss": 0.5019, |
| "step": 11970 |
| }, |
| { |
| "epoch": 0.4515132099649493, |
| "grad_norm": 1.6323939585322438, |
| "learning_rate": 6.686142725255021e-06, |
| "loss": 0.5215, |
| "step": 11980 |
| }, |
| { |
| "epoch": 0.45189009912184824, |
| "grad_norm": 1.3624830176460956, |
| "learning_rate": 6.679948447763201e-06, |
| "loss": 0.5026, |
| "step": 11990 |
| }, |
| { |
| "epoch": 0.45226698827874723, |
| "grad_norm": 1.6944099137300341, |
| "learning_rate": 6.673751262479183e-06, |
| "loss": 0.5073, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.45264387743564616, |
| "grad_norm": 1.5860681363669702, |
| "learning_rate": 6.667551180129565e-06, |
| "loss": 0.5085, |
| "step": 12010 |
| }, |
| { |
| "epoch": 0.45302076659254514, |
| "grad_norm": 1.6331152902584547, |
| "learning_rate": 6.661348211445959e-06, |
| "loss": 0.5195, |
| "step": 12020 |
| }, |
| { |
| "epoch": 0.4533976557494441, |
| "grad_norm": 4.415183248235426, |
| "learning_rate": 6.655142367164967e-06, |
| "loss": 0.5005, |
| "step": 12030 |
| }, |
| { |
| "epoch": 0.45377454490634306, |
| "grad_norm": 1.5798349602106418, |
| "learning_rate": 6.648933658028174e-06, |
| "loss": 0.5045, |
| "step": 12040 |
| }, |
| { |
| "epoch": 0.454151434063242, |
| "grad_norm": 1.435313930078849, |
| "learning_rate": 6.642722094782121e-06, |
| "loss": 0.4641, |
| "step": 12050 |
| }, |
| { |
| "epoch": 0.454528323220141, |
| "grad_norm": 1.7147543461190684, |
| "learning_rate": 6.636507688178291e-06, |
| "loss": 0.5031, |
| "step": 12060 |
| }, |
| { |
| "epoch": 0.4549052123770399, |
| "grad_norm": 1.5410650369487786, |
| "learning_rate": 6.630290448973087e-06, |
| "loss": 0.5173, |
| "step": 12070 |
| }, |
| { |
| "epoch": 0.4552821015339389, |
| "grad_norm": 1.61040530239707, |
| "learning_rate": 6.624070387927811e-06, |
| "loss": 0.5205, |
| "step": 12080 |
| }, |
| { |
| "epoch": 0.4556589906908378, |
| "grad_norm": 1.4352463589168436, |
| "learning_rate": 6.61784751580866e-06, |
| "loss": 0.4747, |
| "step": 12090 |
| }, |
| { |
| "epoch": 0.4560358798477368, |
| "grad_norm": 1.8210569410817206, |
| "learning_rate": 6.611621843386684e-06, |
| "loss": 0.4858, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.45641276900463573, |
| "grad_norm": 1.9985993173284664, |
| "learning_rate": 6.605393381437792e-06, |
| "loss": 0.496, |
| "step": 12110 |
| }, |
| { |
| "epoch": 0.4567896581615347, |
| "grad_norm": 1.2939840914047982, |
| "learning_rate": 6.599162140742712e-06, |
| "loss": 0.4852, |
| "step": 12120 |
| }, |
| { |
| "epoch": 0.45716654731843365, |
| "grad_norm": 1.6530787032881842, |
| "learning_rate": 6.592928132086984e-06, |
| "loss": 0.5003, |
| "step": 12130 |
| }, |
| { |
| "epoch": 0.45754343647533263, |
| "grad_norm": 1.2909832653537678, |
| "learning_rate": 6.586691366260943e-06, |
| "loss": 0.5254, |
| "step": 12140 |
| }, |
| { |
| "epoch": 0.45792032563223156, |
| "grad_norm": 1.775707858274227, |
| "learning_rate": 6.580451854059693e-06, |
| "loss": 0.5033, |
| "step": 12150 |
| }, |
| { |
| "epoch": 0.4582972147891305, |
| "grad_norm": 1.5901192603045122, |
| "learning_rate": 6.574209606283089e-06, |
| "loss": 0.4797, |
| "step": 12160 |
| }, |
| { |
| "epoch": 0.4586741039460295, |
| "grad_norm": 1.5718931288158366, |
| "learning_rate": 6.56796463373573e-06, |
| "loss": 0.5122, |
| "step": 12170 |
| }, |
| { |
| "epoch": 0.4590509931029284, |
| "grad_norm": 1.5316527090504148, |
| "learning_rate": 6.561716947226918e-06, |
| "loss": 0.5096, |
| "step": 12180 |
| }, |
| { |
| "epoch": 0.4594278822598274, |
| "grad_norm": 1.6069192593187367, |
| "learning_rate": 6.555466557570666e-06, |
| "loss": 0.4749, |
| "step": 12190 |
| }, |
| { |
| "epoch": 0.4598047714167263, |
| "grad_norm": 1.6443701058606435, |
| "learning_rate": 6.549213475585657e-06, |
| "loss": 0.5315, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.4601816605736253, |
| "grad_norm": 1.6467073011805697, |
| "learning_rate": 6.542957712095236e-06, |
| "loss": 0.4864, |
| "step": 12210 |
| }, |
| { |
| "epoch": 0.46055854973052424, |
| "grad_norm": 1.6443944395717125, |
| "learning_rate": 6.536699277927393e-06, |
| "loss": 0.5069, |
| "step": 12220 |
| }, |
| { |
| "epoch": 0.4609354388874232, |
| "grad_norm": 1.6659779073083896, |
| "learning_rate": 6.530438183914735e-06, |
| "loss": 0.4887, |
| "step": 12230 |
| }, |
| { |
| "epoch": 0.46131232804432215, |
| "grad_norm": 1.802633741824681, |
| "learning_rate": 6.5241744408944776e-06, |
| "loss": 0.4967, |
| "step": 12240 |
| }, |
| { |
| "epoch": 0.46168921720122114, |
| "grad_norm": 1.6619516541296946, |
| "learning_rate": 6.517908059708417e-06, |
| "loss": 0.5076, |
| "step": 12250 |
| }, |
| { |
| "epoch": 0.46206610635812007, |
| "grad_norm": 1.9424205639529566, |
| "learning_rate": 6.511639051202922e-06, |
| "loss": 0.5228, |
| "step": 12260 |
| }, |
| { |
| "epoch": 0.46244299551501905, |
| "grad_norm": 1.864221195142991, |
| "learning_rate": 6.505367426228902e-06, |
| "loss": 0.5092, |
| "step": 12270 |
| }, |
| { |
| "epoch": 0.462819884671918, |
| "grad_norm": 1.5859581361435124, |
| "learning_rate": 6.499093195641801e-06, |
| "loss": 0.4919, |
| "step": 12280 |
| }, |
| { |
| "epoch": 0.46319677382881697, |
| "grad_norm": 1.5769900351453978, |
| "learning_rate": 6.49281637030157e-06, |
| "loss": 0.5003, |
| "step": 12290 |
| }, |
| { |
| "epoch": 0.4635736629857159, |
| "grad_norm": 1.962586555293073, |
| "learning_rate": 6.486536961072651e-06, |
| "loss": 0.5144, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.4639505521426149, |
| "grad_norm": 1.5960370769135634, |
| "learning_rate": 6.4802549788239585e-06, |
| "loss": 0.4968, |
| "step": 12310 |
| }, |
| { |
| "epoch": 0.4643274412995138, |
| "grad_norm": 1.4471637413402056, |
| "learning_rate": 6.473970434428865e-06, |
| "loss": 0.5133, |
| "step": 12320 |
| }, |
| { |
| "epoch": 0.4647043304564128, |
| "grad_norm": 1.5033062625403697, |
| "learning_rate": 6.467683338765169e-06, |
| "loss": 0.4983, |
| "step": 12330 |
| }, |
| { |
| "epoch": 0.4650812196133117, |
| "grad_norm": 1.8045255687380959, |
| "learning_rate": 6.461393702715093e-06, |
| "loss": 0.5212, |
| "step": 12340 |
| }, |
| { |
| "epoch": 0.46545810877021065, |
| "grad_norm": 1.5049755950938395, |
| "learning_rate": 6.455101537165251e-06, |
| "loss": 0.4964, |
| "step": 12350 |
| }, |
| { |
| "epoch": 0.46583499792710964, |
| "grad_norm": 1.307607429446019, |
| "learning_rate": 6.448806853006642e-06, |
| "loss": 0.4847, |
| "step": 12360 |
| }, |
| { |
| "epoch": 0.46621188708400857, |
| "grad_norm": 1.5148579248421497, |
| "learning_rate": 6.442509661134617e-06, |
| "loss": 0.4749, |
| "step": 12370 |
| }, |
| { |
| "epoch": 0.46658877624090755, |
| "grad_norm": 1.6520404081654247, |
| "learning_rate": 6.436209972448872e-06, |
| "loss": 0.5118, |
| "step": 12380 |
| }, |
| { |
| "epoch": 0.4669656653978065, |
| "grad_norm": 1.4969166628742265, |
| "learning_rate": 6.4299077978534215e-06, |
| "loss": 0.4866, |
| "step": 12390 |
| }, |
| { |
| "epoch": 0.46734255455470547, |
| "grad_norm": 1.688329647541456, |
| "learning_rate": 6.423603148256589e-06, |
| "loss": 0.5215, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.4677194437116044, |
| "grad_norm": 1.4741523126628993, |
| "learning_rate": 6.417296034570972e-06, |
| "loss": 0.4984, |
| "step": 12410 |
| }, |
| { |
| "epoch": 0.4680963328685034, |
| "grad_norm": 1.4962239650802354, |
| "learning_rate": 6.410986467713446e-06, |
| "loss": 0.5275, |
| "step": 12420 |
| }, |
| { |
| "epoch": 0.4684732220254023, |
| "grad_norm": 1.7317402848543209, |
| "learning_rate": 6.404674458605119e-06, |
| "loss": 0.5235, |
| "step": 12430 |
| }, |
| { |
| "epoch": 0.4688501111823013, |
| "grad_norm": 1.507370223846744, |
| "learning_rate": 6.398360018171335e-06, |
| "loss": 0.5065, |
| "step": 12440 |
| }, |
| { |
| "epoch": 0.46922700033920023, |
| "grad_norm": 1.6389685200877848, |
| "learning_rate": 6.392043157341645e-06, |
| "loss": 0.5139, |
| "step": 12450 |
| }, |
| { |
| "epoch": 0.4696038894960992, |
| "grad_norm": 4.587056243685229, |
| "learning_rate": 6.385723887049788e-06, |
| "loss": 0.5037, |
| "step": 12460 |
| }, |
| { |
| "epoch": 0.46998077865299814, |
| "grad_norm": 1.4469191726379287, |
| "learning_rate": 6.379402218233673e-06, |
| "loss": 0.5139, |
| "step": 12470 |
| }, |
| { |
| "epoch": 0.47035766780989713, |
| "grad_norm": 1.4631499108231787, |
| "learning_rate": 6.373078161835364e-06, |
| "loss": 0.5132, |
| "step": 12480 |
| }, |
| { |
| "epoch": 0.47073455696679606, |
| "grad_norm": 1.494757392645272, |
| "learning_rate": 6.366751728801051e-06, |
| "loss": 0.4952, |
| "step": 12490 |
| }, |
| { |
| "epoch": 0.47111144612369504, |
| "grad_norm": 1.6614354722718154, |
| "learning_rate": 6.360422930081045e-06, |
| "loss": 0.5236, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.471488335280594, |
| "grad_norm": 1.589063266340144, |
| "learning_rate": 6.3540917766297475e-06, |
| "loss": 0.4754, |
| "step": 12510 |
| }, |
| { |
| "epoch": 0.47186522443749296, |
| "grad_norm": 1.645641474309447, |
| "learning_rate": 6.347758279405636e-06, |
| "loss": 0.5105, |
| "step": 12520 |
| }, |
| { |
| "epoch": 0.4722421135943919, |
| "grad_norm": 1.842229947630362, |
| "learning_rate": 6.341422449371247e-06, |
| "loss": 0.5058, |
| "step": 12530 |
| }, |
| { |
| "epoch": 0.4726190027512908, |
| "grad_norm": 1.6305355750205157, |
| "learning_rate": 6.3350842974931526e-06, |
| "loss": 0.5387, |
| "step": 12540 |
| }, |
| { |
| "epoch": 0.4729958919081898, |
| "grad_norm": 1.618466891577579, |
| "learning_rate": 6.328743834741945e-06, |
| "loss": 0.4999, |
| "step": 12550 |
| }, |
| { |
| "epoch": 0.47337278106508873, |
| "grad_norm": 1.326825185706353, |
| "learning_rate": 6.322401072092216e-06, |
| "loss": 0.5027, |
| "step": 12560 |
| }, |
| { |
| "epoch": 0.4737496702219877, |
| "grad_norm": 1.5969546323969477, |
| "learning_rate": 6.316056020522538e-06, |
| "loss": 0.5222, |
| "step": 12570 |
| }, |
| { |
| "epoch": 0.47412655937888665, |
| "grad_norm": 1.5972771926737852, |
| "learning_rate": 6.309708691015443e-06, |
| "loss": 0.5015, |
| "step": 12580 |
| }, |
| { |
| "epoch": 0.47450344853578563, |
| "grad_norm": 1.5891933279912756, |
| "learning_rate": 6.303359094557411e-06, |
| "loss": 0.4977, |
| "step": 12590 |
| }, |
| { |
| "epoch": 0.47488033769268456, |
| "grad_norm": 1.7999033521409773, |
| "learning_rate": 6.297007242138842e-06, |
| "loss": 0.5161, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.47525722684958355, |
| "grad_norm": 1.5315463861951017, |
| "learning_rate": 6.290653144754043e-06, |
| "loss": 0.5215, |
| "step": 12610 |
| }, |
| { |
| "epoch": 0.4756341160064825, |
| "grad_norm": 1.6153094914746262, |
| "learning_rate": 6.2842968134012026e-06, |
| "loss": 0.4953, |
| "step": 12620 |
| }, |
| { |
| "epoch": 0.47601100516338146, |
| "grad_norm": 1.665498157343032, |
| "learning_rate": 6.277938259082382e-06, |
| "loss": 0.4995, |
| "step": 12630 |
| }, |
| { |
| "epoch": 0.4763878943202804, |
| "grad_norm": 1.3905032960020645, |
| "learning_rate": 6.271577492803486e-06, |
| "loss": 0.4796, |
| "step": 12640 |
| }, |
| { |
| "epoch": 0.4767647834771794, |
| "grad_norm": 1.6507279959491987, |
| "learning_rate": 6.265214525574248e-06, |
| "loss": 0.5157, |
| "step": 12650 |
| }, |
| { |
| "epoch": 0.4771416726340783, |
| "grad_norm": 1.2843410140916522, |
| "learning_rate": 6.258849368408213e-06, |
| "loss": 0.4858, |
| "step": 12660 |
| }, |
| { |
| "epoch": 0.4775185617909773, |
| "grad_norm": 1.8350039947211565, |
| "learning_rate": 6.252482032322716e-06, |
| "loss": 0.5029, |
| "step": 12670 |
| }, |
| { |
| "epoch": 0.4778954509478762, |
| "grad_norm": 1.5252536968641863, |
| "learning_rate": 6.246112528338864e-06, |
| "loss": 0.4954, |
| "step": 12680 |
| }, |
| { |
| "epoch": 0.4782723401047752, |
| "grad_norm": 15.626822494749767, |
| "learning_rate": 6.239740867481514e-06, |
| "loss": 0.4798, |
| "step": 12690 |
| }, |
| { |
| "epoch": 0.47864922926167414, |
| "grad_norm": 1.4368053909649985, |
| "learning_rate": 6.233367060779258e-06, |
| "loss": 0.4988, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.4790261184185731, |
| "grad_norm": 1.6672155662880033, |
| "learning_rate": 6.226991119264405e-06, |
| "loss": 0.501, |
| "step": 12710 |
| }, |
| { |
| "epoch": 0.47940300757547205, |
| "grad_norm": 1.7236312698986331, |
| "learning_rate": 6.22061305397295e-06, |
| "loss": 0.49, |
| "step": 12720 |
| }, |
| { |
| "epoch": 0.47977989673237104, |
| "grad_norm": 1.5777802364774114, |
| "learning_rate": 6.214232875944577e-06, |
| "loss": 0.4982, |
| "step": 12730 |
| }, |
| { |
| "epoch": 0.48015678588926997, |
| "grad_norm": 1.6768420216779125, |
| "learning_rate": 6.207850596222616e-06, |
| "loss": 0.4923, |
| "step": 12740 |
| }, |
| { |
| "epoch": 0.4805336750461689, |
| "grad_norm": 1.7758220533639664, |
| "learning_rate": 6.201466225854038e-06, |
| "loss": 0.5189, |
| "step": 12750 |
| }, |
| { |
| "epoch": 0.4809105642030679, |
| "grad_norm": 1.4580963940658098, |
| "learning_rate": 6.195079775889436e-06, |
| "loss": 0.4777, |
| "step": 12760 |
| }, |
| { |
| "epoch": 0.4812874533599668, |
| "grad_norm": 1.5845783588645914, |
| "learning_rate": 6.188691257382998e-06, |
| "loss": 0.5021, |
| "step": 12770 |
| }, |
| { |
| "epoch": 0.4816643425168658, |
| "grad_norm": 1.6276734887673494, |
| "learning_rate": 6.182300681392497e-06, |
| "loss": 0.5087, |
| "step": 12780 |
| }, |
| { |
| "epoch": 0.4820412316737647, |
| "grad_norm": 1.6758298613310316, |
| "learning_rate": 6.175908058979264e-06, |
| "loss": 0.4919, |
| "step": 12790 |
| }, |
| { |
| "epoch": 0.4824181208306637, |
| "grad_norm": 1.733797532127255, |
| "learning_rate": 6.169513401208169e-06, |
| "loss": 0.4979, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.48279500998756264, |
| "grad_norm": 1.497057452497915, |
| "learning_rate": 6.163116719147615e-06, |
| "loss": 0.5047, |
| "step": 12810 |
| }, |
| { |
| "epoch": 0.4831718991444616, |
| "grad_norm": 1.8210153084730252, |
| "learning_rate": 6.156718023869497e-06, |
| "loss": 0.4924, |
| "step": 12820 |
| }, |
| { |
| "epoch": 0.48354878830136055, |
| "grad_norm": 1.5847792515896284, |
| "learning_rate": 6.150317326449204e-06, |
| "loss": 0.4779, |
| "step": 12830 |
| }, |
| { |
| "epoch": 0.48392567745825954, |
| "grad_norm": 1.7086888789433332, |
| "learning_rate": 6.143914637965585e-06, |
| "loss": 0.5339, |
| "step": 12840 |
| }, |
| { |
| "epoch": 0.48430256661515847, |
| "grad_norm": 1.825053431015552, |
| "learning_rate": 6.137509969500936e-06, |
| "loss": 0.4735, |
| "step": 12850 |
| }, |
| { |
| "epoch": 0.48467945577205745, |
| "grad_norm": 1.4362907044279158, |
| "learning_rate": 6.131103332140983e-06, |
| "loss": 0.487, |
| "step": 12860 |
| }, |
| { |
| "epoch": 0.4850563449289564, |
| "grad_norm": 1.6070484026990965, |
| "learning_rate": 6.124694736974857e-06, |
| "loss": 0.5195, |
| "step": 12870 |
| }, |
| { |
| "epoch": 0.48543323408585537, |
| "grad_norm": 1.4921206455295302, |
| "learning_rate": 6.11828419509508e-06, |
| "loss": 0.4883, |
| "step": 12880 |
| }, |
| { |
| "epoch": 0.4858101232427543, |
| "grad_norm": 1.596099544918074, |
| "learning_rate": 6.111871717597542e-06, |
| "loss": 0.5001, |
| "step": 12890 |
| }, |
| { |
| "epoch": 0.4861870123996533, |
| "grad_norm": 1.6612200565392714, |
| "learning_rate": 6.10545731558148e-06, |
| "loss": 0.4955, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.4865639015565522, |
| "grad_norm": 1.5500638018436168, |
| "learning_rate": 6.09904100014947e-06, |
| "loss": 0.5034, |
| "step": 12910 |
| }, |
| { |
| "epoch": 0.4869407907134512, |
| "grad_norm": 1.4977064904230966, |
| "learning_rate": 6.092622782407395e-06, |
| "loss": 0.4963, |
| "step": 12920 |
| }, |
| { |
| "epoch": 0.48731767987035013, |
| "grad_norm": 1.556241953096911, |
| "learning_rate": 6.086202673464428e-06, |
| "loss": 0.4838, |
| "step": 12930 |
| }, |
| { |
| "epoch": 0.48769456902724906, |
| "grad_norm": 1.3784052149036907, |
| "learning_rate": 6.079780684433024e-06, |
| "loss": 0.5104, |
| "step": 12940 |
| }, |
| { |
| "epoch": 0.48807145818414804, |
| "grad_norm": 1.862515188918877, |
| "learning_rate": 6.0733568264288825e-06, |
| "loss": 0.4936, |
| "step": 12950 |
| }, |
| { |
| "epoch": 0.488448347341047, |
| "grad_norm": 1.4553084049722098, |
| "learning_rate": 6.066931110570946e-06, |
| "loss": 0.5048, |
| "step": 12960 |
| }, |
| { |
| "epoch": 0.48882523649794596, |
| "grad_norm": 1.8977016581962198, |
| "learning_rate": 6.0605035479813665e-06, |
| "loss": 0.51, |
| "step": 12970 |
| }, |
| { |
| "epoch": 0.4892021256548449, |
| "grad_norm": 1.4849897548302309, |
| "learning_rate": 6.054074149785495e-06, |
| "loss": 0.5085, |
| "step": 12980 |
| }, |
| { |
| "epoch": 0.4895790148117439, |
| "grad_norm": 1.5091872951995677, |
| "learning_rate": 6.047642927111861e-06, |
| "loss": 0.5341, |
| "step": 12990 |
| }, |
| { |
| "epoch": 0.4899559039686428, |
| "grad_norm": 1.6207478659154608, |
| "learning_rate": 6.04120989109215e-06, |
| "loss": 0.4925, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.4903327931255418, |
| "grad_norm": 1.7085152211604375, |
| "learning_rate": 6.0347750528611885e-06, |
| "loss": 0.5045, |
| "step": 13010 |
| }, |
| { |
| "epoch": 0.4907096822824407, |
| "grad_norm": 1.566132388020068, |
| "learning_rate": 6.028338423556921e-06, |
| "loss": 0.4953, |
| "step": 13020 |
| }, |
| { |
| "epoch": 0.4910865714393397, |
| "grad_norm": 5.227941826091356, |
| "learning_rate": 6.021900014320388e-06, |
| "loss": 0.4873, |
| "step": 13030 |
| }, |
| { |
| "epoch": 0.49146346059623863, |
| "grad_norm": 1.399863983250925, |
| "learning_rate": 6.015459836295719e-06, |
| "loss": 0.5148, |
| "step": 13040 |
| }, |
| { |
| "epoch": 0.4918403497531376, |
| "grad_norm": 1.7472942859455838, |
| "learning_rate": 6.0090179006301e-06, |
| "loss": 0.4913, |
| "step": 13050 |
| }, |
| { |
| "epoch": 0.49221723891003655, |
| "grad_norm": 1.6716498146417993, |
| "learning_rate": 6.002574218473759e-06, |
| "loss": 0.5056, |
| "step": 13060 |
| }, |
| { |
| "epoch": 0.49259412806693553, |
| "grad_norm": 1.4884093526859357, |
| "learning_rate": 5.996128800979949e-06, |
| "loss": 0.491, |
| "step": 13070 |
| }, |
| { |
| "epoch": 0.49297101722383446, |
| "grad_norm": 1.2765637587957481, |
| "learning_rate": 5.989681659304927e-06, |
| "loss": 0.5089, |
| "step": 13080 |
| }, |
| { |
| "epoch": 0.49334790638073345, |
| "grad_norm": 1.515026969598235, |
| "learning_rate": 5.9832328046079305e-06, |
| "loss": 0.5015, |
| "step": 13090 |
| }, |
| { |
| "epoch": 0.4937247955376324, |
| "grad_norm": 1.7949411377094697, |
| "learning_rate": 5.9767822480511685e-06, |
| "loss": 0.5161, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.49410168469453136, |
| "grad_norm": 1.4675750652458723, |
| "learning_rate": 5.970330000799787e-06, |
| "loss": 0.5104, |
| "step": 13110 |
| }, |
| { |
| "epoch": 0.4944785738514303, |
| "grad_norm": 2.0046846020153444, |
| "learning_rate": 5.963876074021868e-06, |
| "loss": 0.5292, |
| "step": 13120 |
| }, |
| { |
| "epoch": 0.4948554630083293, |
| "grad_norm": 1.5369389040150114, |
| "learning_rate": 5.957420478888393e-06, |
| "loss": 0.4792, |
| "step": 13130 |
| }, |
| { |
| "epoch": 0.4952323521652282, |
| "grad_norm": 1.4186555111290844, |
| "learning_rate": 5.950963226573237e-06, |
| "loss": 0.4947, |
| "step": 13140 |
| }, |
| { |
| "epoch": 0.49560924132212714, |
| "grad_norm": 1.8188924112261224, |
| "learning_rate": 5.944504328253137e-06, |
| "loss": 0.5111, |
| "step": 13150 |
| }, |
| { |
| "epoch": 0.4959861304790261, |
| "grad_norm": 1.5665015635774655, |
| "learning_rate": 5.9380437951076845e-06, |
| "loss": 0.487, |
| "step": 13160 |
| }, |
| { |
| "epoch": 0.49636301963592505, |
| "grad_norm": 1.408767806697384, |
| "learning_rate": 5.931581638319298e-06, |
| "loss": 0.5116, |
| "step": 13170 |
| }, |
| { |
| "epoch": 0.49673990879282404, |
| "grad_norm": 1.58728988540725, |
| "learning_rate": 5.925117869073208e-06, |
| "loss": 0.5059, |
| "step": 13180 |
| }, |
| { |
| "epoch": 0.49711679794972297, |
| "grad_norm": 1.6119437087711728, |
| "learning_rate": 5.918652498557434e-06, |
| "loss": 0.5047, |
| "step": 13190 |
| }, |
| { |
| "epoch": 0.49749368710662195, |
| "grad_norm": 1.6279721324019796, |
| "learning_rate": 5.91218553796277e-06, |
| "loss": 0.4935, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.4978705762635209, |
| "grad_norm": 1.459557111527884, |
| "learning_rate": 5.905716998482758e-06, |
| "loss": 0.4596, |
| "step": 13210 |
| }, |
| { |
| "epoch": 0.49824746542041987, |
| "grad_norm": 1.939844630405015, |
| "learning_rate": 5.899246891313678e-06, |
| "loss": 0.5423, |
| "step": 13220 |
| }, |
| { |
| "epoch": 0.4986243545773188, |
| "grad_norm": 2.0937651337717273, |
| "learning_rate": 5.892775227654518e-06, |
| "loss": 0.5223, |
| "step": 13230 |
| }, |
| { |
| "epoch": 0.4990012437342178, |
| "grad_norm": 1.6236113152239642, |
| "learning_rate": 5.886302018706964e-06, |
| "loss": 0.5015, |
| "step": 13240 |
| }, |
| { |
| "epoch": 0.4993781328911167, |
| "grad_norm": 1.701791434893844, |
| "learning_rate": 5.879827275675375e-06, |
| "loss": 0.5264, |
| "step": 13250 |
| }, |
| { |
| "epoch": 0.4997550220480157, |
| "grad_norm": 1.452378340458775, |
| "learning_rate": 5.8733510097667664e-06, |
| "loss": 0.4962, |
| "step": 13260 |
| }, |
| { |
| "epoch": 0.5001319112049146, |
| "grad_norm": 1.7488927537673415, |
| "learning_rate": 5.866873232190791e-06, |
| "loss": 0.5223, |
| "step": 13270 |
| }, |
| { |
| "epoch": 0.5005088003618136, |
| "grad_norm": 1.4944535300590263, |
| "learning_rate": 5.860393954159712e-06, |
| "loss": 0.4943, |
| "step": 13280 |
| }, |
| { |
| "epoch": 0.5008856895187126, |
| "grad_norm": 1.3543774753320355, |
| "learning_rate": 5.853913186888397e-06, |
| "loss": 0.5162, |
| "step": 13290 |
| }, |
| { |
| "epoch": 0.5012625786756115, |
| "grad_norm": 1.505410524383986, |
| "learning_rate": 5.847430941594287e-06, |
| "loss": 0.4926, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.5016394678325105, |
| "grad_norm": 1.8407394343276793, |
| "learning_rate": 5.840947229497382e-06, |
| "loss": 0.4958, |
| "step": 13310 |
| }, |
| { |
| "epoch": 0.5020163569894094, |
| "grad_norm": 1.6072319888354176, |
| "learning_rate": 5.834462061820223e-06, |
| "loss": 0.5221, |
| "step": 13320 |
| }, |
| { |
| "epoch": 0.5023932461463084, |
| "grad_norm": 1.4621312186704514, |
| "learning_rate": 5.827975449787868e-06, |
| "loss": 0.5361, |
| "step": 13330 |
| }, |
| { |
| "epoch": 0.5027701353032074, |
| "grad_norm": 1.478486209469283, |
| "learning_rate": 5.821487404627872e-06, |
| "loss": 0.4898, |
| "step": 13340 |
| }, |
| { |
| "epoch": 0.5031470244601063, |
| "grad_norm": 1.5900748132206215, |
| "learning_rate": 5.814997937570282e-06, |
| "loss": 0.4911, |
| "step": 13350 |
| }, |
| { |
| "epoch": 0.5035239136170052, |
| "grad_norm": 1.6106460671873601, |
| "learning_rate": 5.808507059847591e-06, |
| "loss": 0.5078, |
| "step": 13360 |
| }, |
| { |
| "epoch": 0.5039008027739041, |
| "grad_norm": 1.8201545845564078, |
| "learning_rate": 5.802014782694745e-06, |
| "loss": 0.5254, |
| "step": 13370 |
| }, |
| { |
| "epoch": 0.5042776919308032, |
| "grad_norm": 1.5604700154370892, |
| "learning_rate": 5.795521117349106e-06, |
| "loss": 0.5183, |
| "step": 13380 |
| }, |
| { |
| "epoch": 0.5046545810877021, |
| "grad_norm": 1.476477451573828, |
| "learning_rate": 5.789026075050445e-06, |
| "loss": 0.5158, |
| "step": 13390 |
| }, |
| { |
| "epoch": 0.505031470244601, |
| "grad_norm": 1.8253062366468507, |
| "learning_rate": 5.782529667040908e-06, |
| "loss": 0.5044, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.5054083594015, |
| "grad_norm": 1.4718401708375746, |
| "learning_rate": 5.7760319045650124e-06, |
| "loss": 0.4713, |
| "step": 13410 |
| }, |
| { |
| "epoch": 0.505785248558399, |
| "grad_norm": 2.0225600662739187, |
| "learning_rate": 5.769532798869617e-06, |
| "loss": 0.5435, |
| "step": 13420 |
| }, |
| { |
| "epoch": 0.5061621377152979, |
| "grad_norm": 1.7805238633538563, |
| "learning_rate": 5.763032361203904e-06, |
| "loss": 0.4927, |
| "step": 13430 |
| }, |
| { |
| "epoch": 0.5065390268721969, |
| "grad_norm": 1.6277608026896737, |
| "learning_rate": 5.756530602819363e-06, |
| "loss": 0.5066, |
| "step": 13440 |
| }, |
| { |
| "epoch": 0.5069159160290958, |
| "grad_norm": 1.4945525295342976, |
| "learning_rate": 5.750027534969771e-06, |
| "loss": 0.4921, |
| "step": 13450 |
| }, |
| { |
| "epoch": 0.5072928051859948, |
| "grad_norm": 1.4820174242789568, |
| "learning_rate": 5.743523168911167e-06, |
| "loss": 0.5075, |
| "step": 13460 |
| }, |
| { |
| "epoch": 0.5076696943428938, |
| "grad_norm": 1.3635089466658887, |
| "learning_rate": 5.7370175159018415e-06, |
| "loss": 0.5046, |
| "step": 13470 |
| }, |
| { |
| "epoch": 0.5080465834997927, |
| "grad_norm": 1.8437545665124715, |
| "learning_rate": 5.730510587202311e-06, |
| "loss": 0.515, |
| "step": 13480 |
| }, |
| { |
| "epoch": 0.5084234726566916, |
| "grad_norm": 1.786451934916694, |
| "learning_rate": 5.7240023940752984e-06, |
| "loss": 0.5066, |
| "step": 13490 |
| }, |
| { |
| "epoch": 0.5088003618135907, |
| "grad_norm": 1.7504343352240117, |
| "learning_rate": 5.71749294778572e-06, |
| "loss": 0.4949, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.5091772509704896, |
| "grad_norm": 1.4788751754375462, |
| "learning_rate": 5.710982259600656e-06, |
| "loss": 0.4816, |
| "step": 13510 |
| }, |
| { |
| "epoch": 0.5095541401273885, |
| "grad_norm": 1.5291626156746818, |
| "learning_rate": 5.704470340789335e-06, |
| "loss": 0.4725, |
| "step": 13520 |
| }, |
| { |
| "epoch": 0.5099310292842875, |
| "grad_norm": 1.6830268113490698, |
| "learning_rate": 5.697957202623126e-06, |
| "loss": 0.4755, |
| "step": 13530 |
| }, |
| { |
| "epoch": 0.5103079184411864, |
| "grad_norm": 1.495645523541403, |
| "learning_rate": 5.691442856375493e-06, |
| "loss": 0.4848, |
| "step": 13540 |
| }, |
| { |
| "epoch": 0.5106848075980854, |
| "grad_norm": 1.7364770312162106, |
| "learning_rate": 5.684927313322006e-06, |
| "loss": 0.4986, |
| "step": 13550 |
| }, |
| { |
| "epoch": 0.5110616967549844, |
| "grad_norm": 1.646648774368243, |
| "learning_rate": 5.678410584740296e-06, |
| "loss": 0.5062, |
| "step": 13560 |
| }, |
| { |
| "epoch": 0.5114385859118833, |
| "grad_norm": 1.4355497252644054, |
| "learning_rate": 5.671892681910052e-06, |
| "loss": 0.5043, |
| "step": 13570 |
| }, |
| { |
| "epoch": 0.5118154750687822, |
| "grad_norm": 1.5515113579431314, |
| "learning_rate": 5.6653736161129925e-06, |
| "loss": 0.5216, |
| "step": 13580 |
| }, |
| { |
| "epoch": 0.5121923642256813, |
| "grad_norm": 1.668318401565443, |
| "learning_rate": 5.658853398632849e-06, |
| "loss": 0.5192, |
| "step": 13590 |
| }, |
| { |
| "epoch": 0.5125692533825802, |
| "grad_norm": 1.4997427229219318, |
| "learning_rate": 5.6523320407553495e-06, |
| "loss": 0.4884, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.5129461425394791, |
| "grad_norm": 1.6101341859359553, |
| "learning_rate": 5.6458095537681924e-06, |
| "loss": 0.5099, |
| "step": 13610 |
| }, |
| { |
| "epoch": 0.513323031696378, |
| "grad_norm": 1.606887828085733, |
| "learning_rate": 5.63928594896103e-06, |
| "loss": 0.5299, |
| "step": 13620 |
| }, |
| { |
| "epoch": 0.5136999208532771, |
| "grad_norm": 1.416359664160664, |
| "learning_rate": 5.632761237625455e-06, |
| "loss": 0.4947, |
| "step": 13630 |
| }, |
| { |
| "epoch": 0.514076810010176, |
| "grad_norm": 1.4981521351869527, |
| "learning_rate": 5.626235431054968e-06, |
| "loss": 0.5176, |
| "step": 13640 |
| }, |
| { |
| "epoch": 0.514453699167075, |
| "grad_norm": 1.9146596793411172, |
| "learning_rate": 5.619708540544971e-06, |
| "loss": 0.4981, |
| "step": 13650 |
| }, |
| { |
| "epoch": 0.5148305883239739, |
| "grad_norm": 1.6248776563981115, |
| "learning_rate": 5.61318057739274e-06, |
| "loss": 0.5343, |
| "step": 13660 |
| }, |
| { |
| "epoch": 0.5152074774808729, |
| "grad_norm": 1.8392698085104202, |
| "learning_rate": 5.606651552897404e-06, |
| "loss": 0.4745, |
| "step": 13670 |
| }, |
| { |
| "epoch": 0.5155843666377719, |
| "grad_norm": 1.51974705908948, |
| "learning_rate": 5.6001214783599375e-06, |
| "loss": 0.484, |
| "step": 13680 |
| }, |
| { |
| "epoch": 0.5159612557946708, |
| "grad_norm": 1.5164194761342433, |
| "learning_rate": 5.593590365083126e-06, |
| "loss": 0.4973, |
| "step": 13690 |
| }, |
| { |
| "epoch": 0.5163381449515697, |
| "grad_norm": 1.571042115479235, |
| "learning_rate": 5.587058224371553e-06, |
| "loss": 0.4708, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.5167150341084688, |
| "grad_norm": 2.1629006116268683, |
| "learning_rate": 5.580525067531585e-06, |
| "loss": 0.4937, |
| "step": 13710 |
| }, |
| { |
| "epoch": 0.5170919232653677, |
| "grad_norm": 1.842138362917266, |
| "learning_rate": 5.57399090587134e-06, |
| "loss": 0.499, |
| "step": 13720 |
| }, |
| { |
| "epoch": 0.5174688124222666, |
| "grad_norm": 1.4590850155680444, |
| "learning_rate": 5.5674557507006846e-06, |
| "loss": 0.4989, |
| "step": 13730 |
| }, |
| { |
| "epoch": 0.5178457015791655, |
| "grad_norm": 1.5433721159408116, |
| "learning_rate": 5.560919613331197e-06, |
| "loss": 0.5099, |
| "step": 13740 |
| }, |
| { |
| "epoch": 0.5182225907360645, |
| "grad_norm": 1.8050635225380678, |
| "learning_rate": 5.554382505076157e-06, |
| "loss": 0.4918, |
| "step": 13750 |
| }, |
| { |
| "epoch": 0.5185994798929635, |
| "grad_norm": 1.3538997043392071, |
| "learning_rate": 5.54784443725053e-06, |
| "loss": 0.4787, |
| "step": 13760 |
| }, |
| { |
| "epoch": 0.5189763690498624, |
| "grad_norm": 1.7172296005933367, |
| "learning_rate": 5.541305421170936e-06, |
| "loss": 0.4926, |
| "step": 13770 |
| }, |
| { |
| "epoch": 0.5193532582067614, |
| "grad_norm": 1.682395972487519, |
| "learning_rate": 5.534765468155641e-06, |
| "loss": 0.4837, |
| "step": 13780 |
| }, |
| { |
| "epoch": 0.5197301473636603, |
| "grad_norm": 1.5176445024464842, |
| "learning_rate": 5.528224589524527e-06, |
| "loss": 0.4976, |
| "step": 13790 |
| }, |
| { |
| "epoch": 0.5201070365205593, |
| "grad_norm": 1.5257635716345566, |
| "learning_rate": 5.521682796599086e-06, |
| "loss": 0.496, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.5204839256774583, |
| "grad_norm": 1.5260587749295311, |
| "learning_rate": 5.515140100702385e-06, |
| "loss": 0.4884, |
| "step": 13810 |
| }, |
| { |
| "epoch": 0.5208608148343572, |
| "grad_norm": 1.6982387061332391, |
| "learning_rate": 5.508596513159059e-06, |
| "loss": 0.5005, |
| "step": 13820 |
| }, |
| { |
| "epoch": 0.5212377039912561, |
| "grad_norm": 1.4479971134584864, |
| "learning_rate": 5.502052045295286e-06, |
| "loss": 0.4982, |
| "step": 13830 |
| }, |
| { |
| "epoch": 0.5216145931481552, |
| "grad_norm": 1.857022364604521, |
| "learning_rate": 5.495506708438763e-06, |
| "loss": 0.5174, |
| "step": 13840 |
| }, |
| { |
| "epoch": 0.5219914823050541, |
| "grad_norm": 1.6885724207882848, |
| "learning_rate": 5.488960513918695e-06, |
| "loss": 0.522, |
| "step": 13850 |
| }, |
| { |
| "epoch": 0.522368371461953, |
| "grad_norm": 1.4128662344535574, |
| "learning_rate": 5.482413473065775e-06, |
| "loss": 0.5039, |
| "step": 13860 |
| }, |
| { |
| "epoch": 0.522745260618852, |
| "grad_norm": 1.9106751275521132, |
| "learning_rate": 5.475865597212152e-06, |
| "loss": 0.462, |
| "step": 13870 |
| }, |
| { |
| "epoch": 0.523122149775751, |
| "grad_norm": 1.5304732560419125, |
| "learning_rate": 5.469316897691428e-06, |
| "loss": 0.4906, |
| "step": 13880 |
| }, |
| { |
| "epoch": 0.5234990389326499, |
| "grad_norm": 1.6509200854352029, |
| "learning_rate": 5.4627673858386255e-06, |
| "loss": 0.4937, |
| "step": 13890 |
| }, |
| { |
| "epoch": 0.5238759280895489, |
| "grad_norm": 1.692631204971466, |
| "learning_rate": 5.456217072990178e-06, |
| "loss": 0.4782, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.5242528172464478, |
| "grad_norm": 1.5218720962530914, |
| "learning_rate": 5.4496659704839e-06, |
| "loss": 0.4917, |
| "step": 13910 |
| }, |
| { |
| "epoch": 0.5246297064033468, |
| "grad_norm": 1.50914557920104, |
| "learning_rate": 5.44311408965898e-06, |
| "loss": 0.5226, |
| "step": 13920 |
| }, |
| { |
| "epoch": 0.5250065955602458, |
| "grad_norm": 1.6708689941350754, |
| "learning_rate": 5.436561441855942e-06, |
| "loss": 0.4741, |
| "step": 13930 |
| }, |
| { |
| "epoch": 0.5253834847171447, |
| "grad_norm": 2.0239362250295336, |
| "learning_rate": 5.430008038416653e-06, |
| "loss": 0.4797, |
| "step": 13940 |
| }, |
| { |
| "epoch": 0.5257603738740436, |
| "grad_norm": 1.6524565432727731, |
| "learning_rate": 5.423453890684274e-06, |
| "loss": 0.5416, |
| "step": 13950 |
| }, |
| { |
| "epoch": 0.5261372630309425, |
| "grad_norm": 1.4470951746948137, |
| "learning_rate": 5.416899010003264e-06, |
| "loss": 0.4571, |
| "step": 13960 |
| }, |
| { |
| "epoch": 0.5265141521878416, |
| "grad_norm": 1.713191989373032, |
| "learning_rate": 5.410343407719343e-06, |
| "loss": 0.5086, |
| "step": 13970 |
| }, |
| { |
| "epoch": 0.5268910413447405, |
| "grad_norm": 1.511732776736868, |
| "learning_rate": 5.4037870951794856e-06, |
| "loss": 0.4623, |
| "step": 13980 |
| }, |
| { |
| "epoch": 0.5272679305016394, |
| "grad_norm": 1.6985510028227295, |
| "learning_rate": 5.397230083731894e-06, |
| "loss": 0.4956, |
| "step": 13990 |
| }, |
| { |
| "epoch": 0.5276448196585384, |
| "grad_norm": 1.8917478296516785, |
| "learning_rate": 5.390672384725979e-06, |
| "loss": 0.5007, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.5280217088154374, |
| "grad_norm": 1.45118679961553, |
| "learning_rate": 5.384114009512343e-06, |
| "loss": 0.4753, |
| "step": 14010 |
| }, |
| { |
| "epoch": 0.5283985979723363, |
| "grad_norm": 1.5887735261780631, |
| "learning_rate": 5.37755496944276e-06, |
| "loss": 0.5109, |
| "step": 14020 |
| }, |
| { |
| "epoch": 0.5287754871292353, |
| "grad_norm": 1.6042273339896562, |
| "learning_rate": 5.37099527587015e-06, |
| "loss": 0.5093, |
| "step": 14030 |
| }, |
| { |
| "epoch": 0.5291523762861342, |
| "grad_norm": 1.5185256947676165, |
| "learning_rate": 5.3644349401485695e-06, |
| "loss": 0.512, |
| "step": 14040 |
| }, |
| { |
| "epoch": 0.5295292654430332, |
| "grad_norm": 1.5903571571587558, |
| "learning_rate": 5.3578739736331846e-06, |
| "loss": 0.5145, |
| "step": 14050 |
| }, |
| { |
| "epoch": 0.5299061545999322, |
| "grad_norm": 1.6617724641502587, |
| "learning_rate": 5.351312387680249e-06, |
| "loss": 0.4734, |
| "step": 14060 |
| }, |
| { |
| "epoch": 0.5302830437568311, |
| "grad_norm": 1.5942708990624488, |
| "learning_rate": 5.344750193647097e-06, |
| "loss": 0.4822, |
| "step": 14070 |
| }, |
| { |
| "epoch": 0.53065993291373, |
| "grad_norm": 1.6417425014333669, |
| "learning_rate": 5.338187402892108e-06, |
| "loss": 0.5148, |
| "step": 14080 |
| }, |
| { |
| "epoch": 0.5310368220706291, |
| "grad_norm": 1.6747154379450402, |
| "learning_rate": 5.331624026774698e-06, |
| "loss": 0.4797, |
| "step": 14090 |
| }, |
| { |
| "epoch": 0.531413711227528, |
| "grad_norm": 1.7559506025087541, |
| "learning_rate": 5.325060076655295e-06, |
| "loss": 0.4998, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.5317906003844269, |
| "grad_norm": 1.5737701449749422, |
| "learning_rate": 5.3184955638953215e-06, |
| "loss": 0.4771, |
| "step": 14110 |
| }, |
| { |
| "epoch": 0.5321674895413259, |
| "grad_norm": 1.4261947265219483, |
| "learning_rate": 5.311930499857173e-06, |
| "loss": 0.483, |
| "step": 14120 |
| }, |
| { |
| "epoch": 0.5325443786982249, |
| "grad_norm": 1.5998345020742453, |
| "learning_rate": 5.3053648959041995e-06, |
| "loss": 0.4958, |
| "step": 14130 |
| }, |
| { |
| "epoch": 0.5329212678551238, |
| "grad_norm": 1.4631565618451698, |
| "learning_rate": 5.2987987634006845e-06, |
| "loss": 0.4755, |
| "step": 14140 |
| }, |
| { |
| "epoch": 0.5332981570120228, |
| "grad_norm": 1.5593631613031096, |
| "learning_rate": 5.2922321137118285e-06, |
| "loss": 0.4837, |
| "step": 14150 |
| }, |
| { |
| "epoch": 0.5336750461689217, |
| "grad_norm": 1.373456275581913, |
| "learning_rate": 5.285664958203723e-06, |
| "loss": 0.488, |
| "step": 14160 |
| }, |
| { |
| "epoch": 0.5340519353258206, |
| "grad_norm": 1.4401671838017023, |
| "learning_rate": 5.2790973082433415e-06, |
| "loss": 0.4953, |
| "step": 14170 |
| }, |
| { |
| "epoch": 0.5344288244827197, |
| "grad_norm": 1.7640373010765296, |
| "learning_rate": 5.2725291751985085e-06, |
| "loss": 0.464, |
| "step": 14180 |
| }, |
| { |
| "epoch": 0.5348057136396186, |
| "grad_norm": 3.309243521738535, |
| "learning_rate": 5.2659605704378855e-06, |
| "loss": 0.4997, |
| "step": 14190 |
| }, |
| { |
| "epoch": 0.5351826027965175, |
| "grad_norm": 1.7411014099972144, |
| "learning_rate": 5.259391505330952e-06, |
| "loss": 0.487, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.5355594919534165, |
| "grad_norm": 1.736020357499271, |
| "learning_rate": 5.252821991247983e-06, |
| "loss": 0.5079, |
| "step": 14210 |
| }, |
| { |
| "epoch": 0.5359363811103155, |
| "grad_norm": 1.608976151378783, |
| "learning_rate": 5.246252039560029e-06, |
| "loss": 0.4948, |
| "step": 14220 |
| }, |
| { |
| "epoch": 0.5363132702672144, |
| "grad_norm": 1.4342748366323024, |
| "learning_rate": 5.239681661638902e-06, |
| "loss": 0.4819, |
| "step": 14230 |
| }, |
| { |
| "epoch": 0.5366901594241134, |
| "grad_norm": 1.5495611294979035, |
| "learning_rate": 5.233110868857148e-06, |
| "loss": 0.4804, |
| "step": 14240 |
| }, |
| { |
| "epoch": 0.5370670485810123, |
| "grad_norm": 1.8546486328260592, |
| "learning_rate": 5.2265396725880354e-06, |
| "loss": 0.5026, |
| "step": 14250 |
| }, |
| { |
| "epoch": 0.5374439377379113, |
| "grad_norm": 1.4351730050068885, |
| "learning_rate": 5.219968084205525e-06, |
| "loss": 0.4984, |
| "step": 14260 |
| }, |
| { |
| "epoch": 0.5378208268948103, |
| "grad_norm": 1.5059171293734337, |
| "learning_rate": 5.213396115084261e-06, |
| "loss": 0.486, |
| "step": 14270 |
| }, |
| { |
| "epoch": 0.5381977160517092, |
| "grad_norm": 1.5964828714981016, |
| "learning_rate": 5.206823776599544e-06, |
| "loss": 0.4951, |
| "step": 14280 |
| }, |
| { |
| "epoch": 0.5385746052086081, |
| "grad_norm": 1.4533398425449158, |
| "learning_rate": 5.200251080127318e-06, |
| "loss": 0.5093, |
| "step": 14290 |
| }, |
| { |
| "epoch": 0.5389514943655072, |
| "grad_norm": 1.6285262584640723, |
| "learning_rate": 5.1936780370441395e-06, |
| "loss": 0.4896, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.5393283835224061, |
| "grad_norm": 1.6246946608117687, |
| "learning_rate": 5.187104658727173e-06, |
| "loss": 0.4775, |
| "step": 14310 |
| }, |
| { |
| "epoch": 0.539705272679305, |
| "grad_norm": 1.4908047445233454, |
| "learning_rate": 5.180530956554158e-06, |
| "loss": 0.4975, |
| "step": 14320 |
| }, |
| { |
| "epoch": 0.5400821618362039, |
| "grad_norm": 1.5621540920543053, |
| "learning_rate": 5.173956941903395e-06, |
| "loss": 0.4687, |
| "step": 14330 |
| }, |
| { |
| "epoch": 0.5404590509931029, |
| "grad_norm": 1.7060966317168311, |
| "learning_rate": 5.167382626153727e-06, |
| "loss": 0.5019, |
| "step": 14340 |
| }, |
| { |
| "epoch": 0.5408359401500019, |
| "grad_norm": 1.7020605336609178, |
| "learning_rate": 5.160808020684519e-06, |
| "loss": 0.4993, |
| "step": 14350 |
| }, |
| { |
| "epoch": 0.5412128293069008, |
| "grad_norm": 1.7730954600238917, |
| "learning_rate": 5.154233136875633e-06, |
| "loss": 0.485, |
| "step": 14360 |
| }, |
| { |
| "epoch": 0.5415897184637998, |
| "grad_norm": 1.9393987985330001, |
| "learning_rate": 5.147657986107417e-06, |
| "loss": 0.5375, |
| "step": 14370 |
| }, |
| { |
| "epoch": 0.5419666076206987, |
| "grad_norm": 1.6471427503287805, |
| "learning_rate": 5.1410825797606816e-06, |
| "loss": 0.4997, |
| "step": 14380 |
| }, |
| { |
| "epoch": 0.5423434967775977, |
| "grad_norm": 1.4369947353354569, |
| "learning_rate": 5.134506929216674e-06, |
| "loss": 0.4739, |
| "step": 14390 |
| }, |
| { |
| "epoch": 0.5427203859344967, |
| "grad_norm": 1.7681719081478857, |
| "learning_rate": 5.127931045857073e-06, |
| "loss": 0.4991, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.5430972750913956, |
| "grad_norm": 1.6691866607079882, |
| "learning_rate": 5.1213549410639515e-06, |
| "loss": 0.4623, |
| "step": 14410 |
| }, |
| { |
| "epoch": 0.5434741642482945, |
| "grad_norm": 1.4149702534749613, |
| "learning_rate": 5.114778626219772e-06, |
| "loss": 0.4828, |
| "step": 14420 |
| }, |
| { |
| "epoch": 0.5438510534051936, |
| "grad_norm": 1.6627823221828433, |
| "learning_rate": 5.108202112707357e-06, |
| "loss": 0.5027, |
| "step": 14430 |
| }, |
| { |
| "epoch": 0.5442279425620925, |
| "grad_norm": 1.250526612258918, |
| "learning_rate": 5.101625411909874e-06, |
| "loss": 0.4779, |
| "step": 14440 |
| }, |
| { |
| "epoch": 0.5446048317189914, |
| "grad_norm": 1.5283584890240594, |
| "learning_rate": 5.0950485352108145e-06, |
| "loss": 0.494, |
| "step": 14450 |
| }, |
| { |
| "epoch": 0.5449817208758904, |
| "grad_norm": 1.337913464919517, |
| "learning_rate": 5.088471493993977e-06, |
| "loss": 0.459, |
| "step": 14460 |
| }, |
| { |
| "epoch": 0.5453586100327894, |
| "grad_norm": 1.481063864916166, |
| "learning_rate": 5.081894299643439e-06, |
| "loss": 0.5148, |
| "step": 14470 |
| }, |
| { |
| "epoch": 0.5457354991896883, |
| "grad_norm": 1.5902256444957483, |
| "learning_rate": 5.07531696354355e-06, |
| "loss": 0.4776, |
| "step": 14480 |
| }, |
| { |
| "epoch": 0.5461123883465873, |
| "grad_norm": 1.443883382589487, |
| "learning_rate": 5.068739497078898e-06, |
| "loss": 0.5122, |
| "step": 14490 |
| }, |
| { |
| "epoch": 0.5464892775034862, |
| "grad_norm": 1.6573811027305527, |
| "learning_rate": 5.0621619116343e-06, |
| "loss": 0.5163, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.5468661666603852, |
| "grad_norm": 1.609253979586725, |
| "learning_rate": 5.055584218594782e-06, |
| "loss": 0.4862, |
| "step": 14510 |
| }, |
| { |
| "epoch": 0.5472430558172842, |
| "grad_norm": 1.7791228748733698, |
| "learning_rate": 5.049006429345552e-06, |
| "loss": 0.5079, |
| "step": 14520 |
| }, |
| { |
| "epoch": 0.5476199449741831, |
| "grad_norm": 1.5440262873205584, |
| "learning_rate": 5.0424285552719845e-06, |
| "loss": 0.4881, |
| "step": 14530 |
| }, |
| { |
| "epoch": 0.547996834131082, |
| "grad_norm": 1.67027687894764, |
| "learning_rate": 5.0358506077596035e-06, |
| "loss": 0.4948, |
| "step": 14540 |
| }, |
| { |
| "epoch": 0.548373723287981, |
| "grad_norm": 1.7084316467056804, |
| "learning_rate": 5.029272598194057e-06, |
| "loss": 0.4836, |
| "step": 14550 |
| }, |
| { |
| "epoch": 0.54875061244488, |
| "grad_norm": 1.6824534462487943, |
| "learning_rate": 5.022694537961105e-06, |
| "loss": 0.4904, |
| "step": 14560 |
| }, |
| { |
| "epoch": 0.5491275016017789, |
| "grad_norm": 1.684452437139124, |
| "learning_rate": 5.016116438446588e-06, |
| "loss": 0.5244, |
| "step": 14570 |
| }, |
| { |
| "epoch": 0.5495043907586779, |
| "grad_norm": 1.7169193584179046, |
| "learning_rate": 5.009538311036422e-06, |
| "loss": 0.5139, |
| "step": 14580 |
| }, |
| { |
| "epoch": 0.5498812799155768, |
| "grad_norm": 1.6319964128441222, |
| "learning_rate": 5.002960167116567e-06, |
| "loss": 0.4995, |
| "step": 14590 |
| }, |
| { |
| "epoch": 0.5502581690724758, |
| "grad_norm": 1.7828166102258067, |
| "learning_rate": 4.9963820180730125e-06, |
| "loss": 0.4847, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.5506350582293748, |
| "grad_norm": 1.4715536669658034, |
| "learning_rate": 4.989803875291759e-06, |
| "loss": 0.5032, |
| "step": 14610 |
| }, |
| { |
| "epoch": 0.5510119473862737, |
| "grad_norm": 1.6193456161052595, |
| "learning_rate": 4.983225750158789e-06, |
| "loss": 0.4738, |
| "step": 14620 |
| }, |
| { |
| "epoch": 0.5513888365431726, |
| "grad_norm": 1.6523635284605276, |
| "learning_rate": 4.976647654060064e-06, |
| "loss": 0.4697, |
| "step": 14630 |
| }, |
| { |
| "epoch": 0.5517657257000717, |
| "grad_norm": 1.5353363387196937, |
| "learning_rate": 4.970069598381489e-06, |
| "loss": 0.4766, |
| "step": 14640 |
| }, |
| { |
| "epoch": 0.5521426148569706, |
| "grad_norm": 1.599767794638566, |
| "learning_rate": 4.963491594508904e-06, |
| "loss": 0.486, |
| "step": 14650 |
| }, |
| { |
| "epoch": 0.5525195040138695, |
| "grad_norm": 1.5072462327621345, |
| "learning_rate": 4.956913653828051e-06, |
| "loss": 0.5029, |
| "step": 14660 |
| }, |
| { |
| "epoch": 0.5528963931707684, |
| "grad_norm": 1.576083520001881, |
| "learning_rate": 4.950335787724571e-06, |
| "loss": 0.5008, |
| "step": 14670 |
| }, |
| { |
| "epoch": 0.5532732823276675, |
| "grad_norm": 1.7206709649650131, |
| "learning_rate": 4.943758007583972e-06, |
| "loss": 0.4899, |
| "step": 14680 |
| }, |
| { |
| "epoch": 0.5536501714845664, |
| "grad_norm": 2.125129340848355, |
| "learning_rate": 4.937180324791616e-06, |
| "loss": 0.4858, |
| "step": 14690 |
| }, |
| { |
| "epoch": 0.5540270606414653, |
| "grad_norm": 1.776386341218708, |
| "learning_rate": 4.930602750732691e-06, |
| "loss": 0.5114, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.5544039497983643, |
| "grad_norm": 1.8272872066498904, |
| "learning_rate": 4.924025296792202e-06, |
| "loss": 0.4938, |
| "step": 14710 |
| }, |
| { |
| "epoch": 0.5547808389552633, |
| "grad_norm": 1.5539132471817085, |
| "learning_rate": 4.917447974354944e-06, |
| "loss": 0.5012, |
| "step": 14720 |
| }, |
| { |
| "epoch": 0.5551577281121622, |
| "grad_norm": 1.5058070102826557, |
| "learning_rate": 4.910870794805484e-06, |
| "loss": 0.4982, |
| "step": 14730 |
| }, |
| { |
| "epoch": 0.5555346172690612, |
| "grad_norm": 1.710828327508459, |
| "learning_rate": 4.904293769528146e-06, |
| "loss": 0.4756, |
| "step": 14740 |
| }, |
| { |
| "epoch": 0.5559115064259601, |
| "grad_norm": 1.8818911198800967, |
| "learning_rate": 4.8977169099069774e-06, |
| "loss": 0.4877, |
| "step": 14750 |
| }, |
| { |
| "epoch": 0.556288395582859, |
| "grad_norm": 1.5555517297318513, |
| "learning_rate": 4.891140227325749e-06, |
| "loss": 0.5072, |
| "step": 14760 |
| }, |
| { |
| "epoch": 0.5566652847397581, |
| "grad_norm": 1.3837969704058737, |
| "learning_rate": 4.884563733167921e-06, |
| "loss": 0.4493, |
| "step": 14770 |
| }, |
| { |
| "epoch": 0.557042173896657, |
| "grad_norm": 1.605935176191625, |
| "learning_rate": 4.877987438816626e-06, |
| "loss": 0.5043, |
| "step": 14780 |
| }, |
| { |
| "epoch": 0.5574190630535559, |
| "grad_norm": 1.4152291686855751, |
| "learning_rate": 4.8714113556546526e-06, |
| "loss": 0.4896, |
| "step": 14790 |
| }, |
| { |
| "epoch": 0.5577959522104549, |
| "grad_norm": 1.555671007575408, |
| "learning_rate": 4.864835495064422e-06, |
| "loss": 0.4948, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.5581728413673539, |
| "grad_norm": 1.7501287152105198, |
| "learning_rate": 4.858259868427975e-06, |
| "loss": 0.528, |
| "step": 14810 |
| }, |
| { |
| "epoch": 0.5585497305242528, |
| "grad_norm": 1.4977704984866138, |
| "learning_rate": 4.851684487126942e-06, |
| "loss": 0.4995, |
| "step": 14820 |
| }, |
| { |
| "epoch": 0.5589266196811518, |
| "grad_norm": 1.5881770020676744, |
| "learning_rate": 4.845109362542531e-06, |
| "loss": 0.5033, |
| "step": 14830 |
| }, |
| { |
| "epoch": 0.5593035088380507, |
| "grad_norm": 1.6759640529299542, |
| "learning_rate": 4.838534506055505e-06, |
| "loss": 0.5153, |
| "step": 14840 |
| }, |
| { |
| "epoch": 0.5596803979949497, |
| "grad_norm": 1.444873322408345, |
| "learning_rate": 4.8319599290461644e-06, |
| "loss": 0.4907, |
| "step": 14850 |
| }, |
| { |
| "epoch": 0.5600572871518487, |
| "grad_norm": 1.8264686526034841, |
| "learning_rate": 4.825385642894325e-06, |
| "loss": 0.5306, |
| "step": 14860 |
| }, |
| { |
| "epoch": 0.5604341763087476, |
| "grad_norm": 1.5497290772020473, |
| "learning_rate": 4.818811658979298e-06, |
| "loss": 0.5152, |
| "step": 14870 |
| }, |
| { |
| "epoch": 0.5608110654656465, |
| "grad_norm": 1.37844981662825, |
| "learning_rate": 4.8122379886798714e-06, |
| "loss": 0.4983, |
| "step": 14880 |
| }, |
| { |
| "epoch": 0.5611879546225456, |
| "grad_norm": 1.3444131954230294, |
| "learning_rate": 4.805664643374295e-06, |
| "loss": 0.4925, |
| "step": 14890 |
| }, |
| { |
| "epoch": 0.5615648437794445, |
| "grad_norm": 1.5532769744703083, |
| "learning_rate": 4.799091634440251e-06, |
| "loss": 0.5188, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.5619417329363434, |
| "grad_norm": 1.555196739372832, |
| "learning_rate": 4.7925189732548396e-06, |
| "loss": 0.4683, |
| "step": 14910 |
| }, |
| { |
| "epoch": 0.5623186220932423, |
| "grad_norm": 1.4684972837534254, |
| "learning_rate": 4.7859466711945616e-06, |
| "loss": 0.479, |
| "step": 14920 |
| }, |
| { |
| "epoch": 0.5626955112501413, |
| "grad_norm": 1.6202095473956792, |
| "learning_rate": 4.7793747396352945e-06, |
| "loss": 0.494, |
| "step": 14930 |
| }, |
| { |
| "epoch": 0.5630724004070403, |
| "grad_norm": 1.7528710855944911, |
| "learning_rate": 4.7728031899522775e-06, |
| "loss": 0.4671, |
| "step": 14940 |
| }, |
| { |
| "epoch": 0.5634492895639392, |
| "grad_norm": 1.8853181007805313, |
| "learning_rate": 4.7662320335200815e-06, |
| "loss": 0.4952, |
| "step": 14950 |
| }, |
| { |
| "epoch": 0.5638261787208382, |
| "grad_norm": 1.5382530586672782, |
| "learning_rate": 4.759661281712605e-06, |
| "loss": 0.4707, |
| "step": 14960 |
| }, |
| { |
| "epoch": 0.5642030678777371, |
| "grad_norm": 1.525234396856901, |
| "learning_rate": 4.753090945903043e-06, |
| "loss": 0.479, |
| "step": 14970 |
| }, |
| { |
| "epoch": 0.5645799570346361, |
| "grad_norm": 1.4131804151431138, |
| "learning_rate": 4.74652103746387e-06, |
| "loss": 0.5002, |
| "step": 14980 |
| }, |
| { |
| "epoch": 0.5649568461915351, |
| "grad_norm": 1.3413955318599295, |
| "learning_rate": 4.739951567766819e-06, |
| "loss": 0.5014, |
| "step": 14990 |
| }, |
| { |
| "epoch": 0.565333735348434, |
| "grad_norm": 1.707719627926162, |
| "learning_rate": 4.733382548182867e-06, |
| "loss": 0.4803, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.5657106245053329, |
| "grad_norm": 1.783248272136694, |
| "learning_rate": 4.726813990082208e-06, |
| "loss": 0.521, |
| "step": 15010 |
| }, |
| { |
| "epoch": 0.566087513662232, |
| "grad_norm": 1.6966076793346403, |
| "learning_rate": 4.720245904834247e-06, |
| "loss": 0.491, |
| "step": 15020 |
| }, |
| { |
| "epoch": 0.5664644028191309, |
| "grad_norm": 1.718872464504697, |
| "learning_rate": 4.713678303807554e-06, |
| "loss": 0.508, |
| "step": 15030 |
| }, |
| { |
| "epoch": 0.5668412919760298, |
| "grad_norm": 1.3577910290768516, |
| "learning_rate": 4.707111198369875e-06, |
| "loss": 0.4792, |
| "step": 15040 |
| }, |
| { |
| "epoch": 0.5672181811329288, |
| "grad_norm": 2.011765897335492, |
| "learning_rate": 4.700544599888092e-06, |
| "loss": 0.4792, |
| "step": 15050 |
| }, |
| { |
| "epoch": 0.5675950702898278, |
| "grad_norm": 1.5685090926240022, |
| "learning_rate": 4.693978519728214e-06, |
| "loss": 0.487, |
| "step": 15060 |
| }, |
| { |
| "epoch": 0.5679719594467267, |
| "grad_norm": 1.9465868567085707, |
| "learning_rate": 4.687412969255344e-06, |
| "loss": 0.5383, |
| "step": 15070 |
| }, |
| { |
| "epoch": 0.5683488486036257, |
| "grad_norm": 1.9601527980802476, |
| "learning_rate": 4.680847959833678e-06, |
| "loss": 0.5143, |
| "step": 15080 |
| }, |
| { |
| "epoch": 0.5687257377605246, |
| "grad_norm": 1.3360212285500883, |
| "learning_rate": 4.674283502826469e-06, |
| "loss": 0.5006, |
| "step": 15090 |
| }, |
| { |
| "epoch": 0.5691026269174236, |
| "grad_norm": 1.8144442698478411, |
| "learning_rate": 4.667719609596017e-06, |
| "loss": 0.4999, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.5694795160743226, |
| "grad_norm": 1.8698769948828864, |
| "learning_rate": 4.661156291503648e-06, |
| "loss": 0.4859, |
| "step": 15110 |
| }, |
| { |
| "epoch": 0.5698564052312215, |
| "grad_norm": 1.49460133455783, |
| "learning_rate": 4.654593559909686e-06, |
| "loss": 0.4845, |
| "step": 15120 |
| }, |
| { |
| "epoch": 0.5702332943881204, |
| "grad_norm": 1.4660243676447777, |
| "learning_rate": 4.648031426173445e-06, |
| "loss": 0.5125, |
| "step": 15130 |
| }, |
| { |
| "epoch": 0.5706101835450194, |
| "grad_norm": 1.3870825072557174, |
| "learning_rate": 4.641469901653202e-06, |
| "loss": 0.5016, |
| "step": 15140 |
| }, |
| { |
| "epoch": 0.5709870727019184, |
| "grad_norm": 1.6431723296673835, |
| "learning_rate": 4.634908997706185e-06, |
| "loss": 0.4987, |
| "step": 15150 |
| }, |
| { |
| "epoch": 0.5713639618588173, |
| "grad_norm": 1.7105138212254234, |
| "learning_rate": 4.628348725688535e-06, |
| "loss": 0.4854, |
| "step": 15160 |
| }, |
| { |
| "epoch": 0.5717408510157163, |
| "grad_norm": 1.9455745122203643, |
| "learning_rate": 4.621789096955314e-06, |
| "loss": 0.4913, |
| "step": 15170 |
| }, |
| { |
| "epoch": 0.5721177401726152, |
| "grad_norm": 1.6110602983128681, |
| "learning_rate": 4.615230122860463e-06, |
| "loss": 0.478, |
| "step": 15180 |
| }, |
| { |
| "epoch": 0.5724946293295142, |
| "grad_norm": 1.570236871676212, |
| "learning_rate": 4.608671814756789e-06, |
| "loss": 0.4966, |
| "step": 15190 |
| }, |
| { |
| "epoch": 0.5728715184864132, |
| "grad_norm": 1.6101020345767108, |
| "learning_rate": 4.60211418399595e-06, |
| "loss": 0.4921, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.5732484076433121, |
| "grad_norm": 1.3277725999257566, |
| "learning_rate": 4.595557241928428e-06, |
| "loss": 0.4492, |
| "step": 15210 |
| }, |
| { |
| "epoch": 0.573625296800211, |
| "grad_norm": 1.4944481859425578, |
| "learning_rate": 4.589000999903514e-06, |
| "loss": 0.508, |
| "step": 15220 |
| }, |
| { |
| "epoch": 0.5740021859571101, |
| "grad_norm": 1.949554647184248, |
| "learning_rate": 4.582445469269293e-06, |
| "loss": 0.4783, |
| "step": 15230 |
| }, |
| { |
| "epoch": 0.574379075114009, |
| "grad_norm": 1.6382300534433607, |
| "learning_rate": 4.575890661372608e-06, |
| "loss": 0.5262, |
| "step": 15240 |
| }, |
| { |
| "epoch": 0.5747559642709079, |
| "grad_norm": 1.7414910131507269, |
| "learning_rate": 4.569336587559058e-06, |
| "loss": 0.5097, |
| "step": 15250 |
| }, |
| { |
| "epoch": 0.5751328534278068, |
| "grad_norm": 1.5308534197031307, |
| "learning_rate": 4.562783259172972e-06, |
| "loss": 0.4743, |
| "step": 15260 |
| }, |
| { |
| "epoch": 0.5755097425847059, |
| "grad_norm": 1.544445790287268, |
| "learning_rate": 4.556230687557387e-06, |
| "loss": 0.4855, |
| "step": 15270 |
| }, |
| { |
| "epoch": 0.5758866317416048, |
| "grad_norm": 1.5649621936574907, |
| "learning_rate": 4.549678884054028e-06, |
| "loss": 0.4827, |
| "step": 15280 |
| }, |
| { |
| "epoch": 0.5762635208985037, |
| "grad_norm": 1.5699103732057733, |
| "learning_rate": 4.543127860003291e-06, |
| "loss": 0.5126, |
| "step": 15290 |
| }, |
| { |
| "epoch": 0.5766404100554027, |
| "grad_norm": 1.6479924327503963, |
| "learning_rate": 4.536577626744229e-06, |
| "loss": 0.5087, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.5770172992123017, |
| "grad_norm": 1.754860584177109, |
| "learning_rate": 4.53002819561452e-06, |
| "loss": 0.4868, |
| "step": 15310 |
| }, |
| { |
| "epoch": 0.5773941883692006, |
| "grad_norm": 1.538257113940453, |
| "learning_rate": 4.523479577950452e-06, |
| "loss": 0.4828, |
| "step": 15320 |
| }, |
| { |
| "epoch": 0.5777710775260996, |
| "grad_norm": 1.6723499369767234, |
| "learning_rate": 4.516931785086911e-06, |
| "loss": 0.4702, |
| "step": 15330 |
| }, |
| { |
| "epoch": 0.5781479666829985, |
| "grad_norm": 1.441994971838687, |
| "learning_rate": 4.510384828357352e-06, |
| "loss": 0.5074, |
| "step": 15340 |
| }, |
| { |
| "epoch": 0.5785248558398974, |
| "grad_norm": 1.676436195429064, |
| "learning_rate": 4.503838719093785e-06, |
| "loss": 0.498, |
| "step": 15350 |
| }, |
| { |
| "epoch": 0.5789017449967965, |
| "grad_norm": 1.5193324381147915, |
| "learning_rate": 4.4972934686267465e-06, |
| "loss": 0.488, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.5792786341536954, |
| "grad_norm": 1.644408204909188, |
| "learning_rate": 4.4907490882852945e-06, |
| "loss": 0.4658, |
| "step": 15370 |
| }, |
| { |
| "epoch": 0.5796555233105943, |
| "grad_norm": 1.3830347527818876, |
| "learning_rate": 4.484205589396979e-06, |
| "loss": 0.5044, |
| "step": 15380 |
| }, |
| { |
| "epoch": 0.5800324124674933, |
| "grad_norm": 1.5656512441657606, |
| "learning_rate": 4.477662983287823e-06, |
| "loss": 0.4713, |
| "step": 15390 |
| }, |
| { |
| "epoch": 0.5804093016243923, |
| "grad_norm": 1.8212831299721022, |
| "learning_rate": 4.4711212812823015e-06, |
| "loss": 0.5006, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.5807861907812912, |
| "grad_norm": 5.814852464154508, |
| "learning_rate": 4.46458049470333e-06, |
| "loss": 0.5273, |
| "step": 15410 |
| }, |
| { |
| "epoch": 0.5811630799381902, |
| "grad_norm": 1.5922280470024501, |
| "learning_rate": 4.458040634872234e-06, |
| "loss": 0.512, |
| "step": 15420 |
| }, |
| { |
| "epoch": 0.5815399690950891, |
| "grad_norm": 1.383810258386157, |
| "learning_rate": 4.451501713108744e-06, |
| "loss": 0.4864, |
| "step": 15430 |
| }, |
| { |
| "epoch": 0.5819168582519881, |
| "grad_norm": 1.5168290662535795, |
| "learning_rate": 4.444963740730953e-06, |
| "loss": 0.5101, |
| "step": 15440 |
| }, |
| { |
| "epoch": 0.5822937474088871, |
| "grad_norm": 1.628078473199888, |
| "learning_rate": 4.438426729055324e-06, |
| "loss": 0.4838, |
| "step": 15450 |
| }, |
| { |
| "epoch": 0.582670636565786, |
| "grad_norm": 1.3493978928610881, |
| "learning_rate": 4.431890689396649e-06, |
| "loss": 0.4719, |
| "step": 15460 |
| }, |
| { |
| "epoch": 0.5830475257226849, |
| "grad_norm": 1.740559583658373, |
| "learning_rate": 4.425355633068041e-06, |
| "loss": 0.5067, |
| "step": 15470 |
| }, |
| { |
| "epoch": 0.583424414879584, |
| "grad_norm": 1.7981946745212039, |
| "learning_rate": 4.418821571380911e-06, |
| "loss": 0.4932, |
| "step": 15480 |
| }, |
| { |
| "epoch": 0.5838013040364829, |
| "grad_norm": 1.5473002005079304, |
| "learning_rate": 4.4122885156449445e-06, |
| "loss": 0.4904, |
| "step": 15490 |
| }, |
| { |
| "epoch": 0.5841781931933818, |
| "grad_norm": 1.8784776516281418, |
| "learning_rate": 4.40575647716809e-06, |
| "loss": 0.5048, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.5845550823502808, |
| "grad_norm": 1.4719513927457284, |
| "learning_rate": 4.399225467256535e-06, |
| "loss": 0.4481, |
| "step": 15510 |
| }, |
| { |
| "epoch": 0.5849319715071798, |
| "grad_norm": 1.825241349848936, |
| "learning_rate": 4.392695497214688e-06, |
| "loss": 0.4983, |
| "step": 15520 |
| }, |
| { |
| "epoch": 0.5853088606640787, |
| "grad_norm": 1.5528305044356574, |
| "learning_rate": 4.38616657834515e-06, |
| "loss": 0.5041, |
| "step": 15530 |
| }, |
| { |
| "epoch": 0.5856857498209777, |
| "grad_norm": 1.6870571870401214, |
| "learning_rate": 4.3796387219487105e-06, |
| "loss": 0.4874, |
| "step": 15540 |
| }, |
| { |
| "epoch": 0.5860626389778766, |
| "grad_norm": 1.7920265982059933, |
| "learning_rate": 4.373111939324317e-06, |
| "loss": 0.4999, |
| "step": 15550 |
| }, |
| { |
| "epoch": 0.5864395281347755, |
| "grad_norm": 1.341117566474037, |
| "learning_rate": 4.366586241769061e-06, |
| "loss": 0.4557, |
| "step": 15560 |
| }, |
| { |
| "epoch": 0.5868164172916746, |
| "grad_norm": 1.3355923074651992, |
| "learning_rate": 4.36006164057815e-06, |
| "loss": 0.475, |
| "step": 15570 |
| }, |
| { |
| "epoch": 0.5871933064485735, |
| "grad_norm": 1.5410068065396823, |
| "learning_rate": 4.353538147044899e-06, |
| "loss": 0.4794, |
| "step": 15580 |
| }, |
| { |
| "epoch": 0.5875701956054724, |
| "grad_norm": 1.4096817479193515, |
| "learning_rate": 4.347015772460705e-06, |
| "loss": 0.5178, |
| "step": 15590 |
| }, |
| { |
| "epoch": 0.5879470847623713, |
| "grad_norm": 1.7430158797469388, |
| "learning_rate": 4.340494528115028e-06, |
| "loss": 0.4908, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.5883239739192704, |
| "grad_norm": 1.6723561324160858, |
| "learning_rate": 4.333974425295368e-06, |
| "loss": 0.4748, |
| "step": 15610 |
| }, |
| { |
| "epoch": 0.5887008630761693, |
| "grad_norm": 1.5864053334700638, |
| "learning_rate": 4.327455475287255e-06, |
| "loss": 0.4994, |
| "step": 15620 |
| }, |
| { |
| "epoch": 0.5890777522330682, |
| "grad_norm": 1.524780156258879, |
| "learning_rate": 4.3209376893742185e-06, |
| "loss": 0.4881, |
| "step": 15630 |
| }, |
| { |
| "epoch": 0.5894546413899672, |
| "grad_norm": 1.7296029856373045, |
| "learning_rate": 4.314421078837782e-06, |
| "loss": 0.5231, |
| "step": 15640 |
| }, |
| { |
| "epoch": 0.5898315305468662, |
| "grad_norm": 1.6109569171154967, |
| "learning_rate": 4.3079056549574185e-06, |
| "loss": 0.5001, |
| "step": 15650 |
| }, |
| { |
| "epoch": 0.5902084197037651, |
| "grad_norm": 1.3624010397586623, |
| "learning_rate": 4.301391429010563e-06, |
| "loss": 0.5082, |
| "step": 15660 |
| }, |
| { |
| "epoch": 0.5905853088606641, |
| "grad_norm": 1.7683758241128607, |
| "learning_rate": 4.2948784122725695e-06, |
| "loss": 0.4992, |
| "step": 15670 |
| }, |
| { |
| "epoch": 0.590962198017563, |
| "grad_norm": 1.8030632058982388, |
| "learning_rate": 4.2883666160167004e-06, |
| "loss": 0.4562, |
| "step": 15680 |
| }, |
| { |
| "epoch": 0.591339087174462, |
| "grad_norm": 1.4736588153729777, |
| "learning_rate": 4.281856051514104e-06, |
| "loss": 0.4598, |
| "step": 15690 |
| }, |
| { |
| "epoch": 0.591715976331361, |
| "grad_norm": 1.9040076493277438, |
| "learning_rate": 4.275346730033797e-06, |
| "loss": 0.5181, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.5920928654882599, |
| "grad_norm": 1.6217096093879626, |
| "learning_rate": 4.268838662842648e-06, |
| "loss": 0.493, |
| "step": 15710 |
| }, |
| { |
| "epoch": 0.5924697546451588, |
| "grad_norm": 1.4714622199259768, |
| "learning_rate": 4.262331861205353e-06, |
| "loss": 0.4911, |
| "step": 15720 |
| }, |
| { |
| "epoch": 0.5928466438020578, |
| "grad_norm": 1.6675985050238766, |
| "learning_rate": 4.255826336384413e-06, |
| "loss": 0.4839, |
| "step": 15730 |
| }, |
| { |
| "epoch": 0.5932235329589568, |
| "grad_norm": 1.730305207931142, |
| "learning_rate": 4.249322099640124e-06, |
| "loss": 0.4613, |
| "step": 15740 |
| }, |
| { |
| "epoch": 0.5936004221158557, |
| "grad_norm": 1.5875923671826002, |
| "learning_rate": 4.2428191622305515e-06, |
| "loss": 0.4882, |
| "step": 15750 |
| }, |
| { |
| "epoch": 0.5939773112727547, |
| "grad_norm": 1.7817428188939046, |
| "learning_rate": 4.2363175354115125e-06, |
| "loss": 0.4735, |
| "step": 15760 |
| }, |
| { |
| "epoch": 0.5943542004296536, |
| "grad_norm": 1.4446917245054236, |
| "learning_rate": 4.229817230436551e-06, |
| "loss": 0.4965, |
| "step": 15770 |
| }, |
| { |
| "epoch": 0.5947310895865526, |
| "grad_norm": 1.5351237013104135, |
| "learning_rate": 4.223318258556929e-06, |
| "loss": 0.4889, |
| "step": 15780 |
| }, |
| { |
| "epoch": 0.5951079787434516, |
| "grad_norm": 1.4127276518276473, |
| "learning_rate": 4.2168206310216e-06, |
| "loss": 0.4777, |
| "step": 15790 |
| }, |
| { |
| "epoch": 0.5954848679003505, |
| "grad_norm": 1.632038239139784, |
| "learning_rate": 4.210324359077188e-06, |
| "loss": 0.4863, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.5958617570572494, |
| "grad_norm": 1.58550770046582, |
| "learning_rate": 4.20382945396797e-06, |
| "loss": 0.4629, |
| "step": 15810 |
| }, |
| { |
| "epoch": 0.5962386462141485, |
| "grad_norm": 1.5604563182410427, |
| "learning_rate": 4.197335926935862e-06, |
| "loss": 0.4702, |
| "step": 15820 |
| }, |
| { |
| "epoch": 0.5966155353710474, |
| "grad_norm": 1.6694863244839393, |
| "learning_rate": 4.190843789220388e-06, |
| "loss": 0.4841, |
| "step": 15830 |
| }, |
| { |
| "epoch": 0.5969924245279463, |
| "grad_norm": 1.3812318270249444, |
| "learning_rate": 4.184353052058675e-06, |
| "loss": 0.4803, |
| "step": 15840 |
| }, |
| { |
| "epoch": 0.5973693136848452, |
| "grad_norm": 1.542874778740591, |
| "learning_rate": 4.177863726685422e-06, |
| "loss": 0.4774, |
| "step": 15850 |
| }, |
| { |
| "epoch": 0.5977462028417443, |
| "grad_norm": 1.5992161372449223, |
| "learning_rate": 4.1713758243328805e-06, |
| "loss": 0.4642, |
| "step": 15860 |
| }, |
| { |
| "epoch": 0.5981230919986432, |
| "grad_norm": 1.695522458255685, |
| "learning_rate": 4.164889356230845e-06, |
| "loss": 0.4984, |
| "step": 15870 |
| }, |
| { |
| "epoch": 0.5984999811555421, |
| "grad_norm": 1.5064263824602755, |
| "learning_rate": 4.158404333606624e-06, |
| "loss": 0.484, |
| "step": 15880 |
| }, |
| { |
| "epoch": 0.5988768703124411, |
| "grad_norm": 1.5539780173830111, |
| "learning_rate": 4.151920767685028e-06, |
| "loss": 0.5053, |
| "step": 15890 |
| }, |
| { |
| "epoch": 0.5992537594693401, |
| "grad_norm": 1.8156834275952407, |
| "learning_rate": 4.145438669688339e-06, |
| "loss": 0.4891, |
| "step": 15900 |
| }, |
| { |
| "epoch": 0.599630648626239, |
| "grad_norm": 1.6867172865153917, |
| "learning_rate": 4.138958050836305e-06, |
| "loss": 0.4933, |
| "step": 15910 |
| }, |
| { |
| "epoch": 0.600007537783138, |
| "grad_norm": 1.5069489029221794, |
| "learning_rate": 4.132478922346111e-06, |
| "loss": 0.4981, |
| "step": 15920 |
| }, |
| { |
| "epoch": 0.6003844269400369, |
| "grad_norm": 1.645715388170334, |
| "learning_rate": 4.126001295432362e-06, |
| "loss": 0.5092, |
| "step": 15930 |
| }, |
| { |
| "epoch": 0.6007613160969358, |
| "grad_norm": 1.5081305054375247, |
| "learning_rate": 4.119525181307065e-06, |
| "loss": 0.5007, |
| "step": 15940 |
| }, |
| { |
| "epoch": 0.6011382052538349, |
| "grad_norm": 1.7513431875588092, |
| "learning_rate": 4.113050591179608e-06, |
| "loss": 0.4818, |
| "step": 15950 |
| }, |
| { |
| "epoch": 0.6015150944107338, |
| "grad_norm": 1.4446843423733429, |
| "learning_rate": 4.10657753625674e-06, |
| "loss": 0.4891, |
| "step": 15960 |
| }, |
| { |
| "epoch": 0.6018919835676327, |
| "grad_norm": 1.5962355483326147, |
| "learning_rate": 4.100106027742559e-06, |
| "loss": 0.5487, |
| "step": 15970 |
| }, |
| { |
| "epoch": 0.6022688727245317, |
| "grad_norm": 1.5956138956675063, |
| "learning_rate": 4.093636076838474e-06, |
| "loss": 0.4953, |
| "step": 15980 |
| }, |
| { |
| "epoch": 0.6026457618814307, |
| "grad_norm": 1.6376372777266162, |
| "learning_rate": 4.087167694743209e-06, |
| "loss": 0.474, |
| "step": 15990 |
| }, |
| { |
| "epoch": 0.6030226510383296, |
| "grad_norm": 1.7797231365552175, |
| "learning_rate": 4.080700892652769e-06, |
| "loss": 0.5058, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.6033995401952286, |
| "grad_norm": 1.5483118782326815, |
| "learning_rate": 4.074235681760425e-06, |
| "loss": 0.4455, |
| "step": 16010 |
| }, |
| { |
| "epoch": 0.6037764293521275, |
| "grad_norm": 1.6389537384394115, |
| "learning_rate": 4.067772073256691e-06, |
| "loss": 0.4974, |
| "step": 16020 |
| }, |
| { |
| "epoch": 0.6041533185090265, |
| "grad_norm": 1.5711684676833029, |
| "learning_rate": 4.0613100783293085e-06, |
| "loss": 0.4745, |
| "step": 16030 |
| }, |
| { |
| "epoch": 0.6045302076659255, |
| "grad_norm": 1.583414482094785, |
| "learning_rate": 4.0548497081632275e-06, |
| "loss": 0.4832, |
| "step": 16040 |
| }, |
| { |
| "epoch": 0.6049070968228244, |
| "grad_norm": 1.9771396052065735, |
| "learning_rate": 4.04839097394059e-06, |
| "loss": 0.4997, |
| "step": 16050 |
| }, |
| { |
| "epoch": 0.6052839859797233, |
| "grad_norm": 1.4625015091341529, |
| "learning_rate": 4.0419338868406934e-06, |
| "loss": 0.4469, |
| "step": 16060 |
| }, |
| { |
| "epoch": 0.6056608751366224, |
| "grad_norm": 1.497067817501958, |
| "learning_rate": 4.035478458039998e-06, |
| "loss": 0.4967, |
| "step": 16070 |
| }, |
| { |
| "epoch": 0.6060377642935213, |
| "grad_norm": 1.5120714577785972, |
| "learning_rate": 4.029024698712085e-06, |
| "loss": 0.4829, |
| "step": 16080 |
| }, |
| { |
| "epoch": 0.6064146534504202, |
| "grad_norm": 1.37555735450956, |
| "learning_rate": 4.022572620027653e-06, |
| "loss": 0.4758, |
| "step": 16090 |
| }, |
| { |
| "epoch": 0.6067915426073192, |
| "grad_norm": 1.956108364099901, |
| "learning_rate": 4.016122233154483e-06, |
| "loss": 0.5154, |
| "step": 16100 |
| }, |
| { |
| "epoch": 0.6071684317642182, |
| "grad_norm": 1.426684210517623, |
| "learning_rate": 4.009673549257432e-06, |
| "loss": 0.5068, |
| "step": 16110 |
| }, |
| { |
| "epoch": 0.6075453209211171, |
| "grad_norm": 1.675741874097294, |
| "learning_rate": 4.0032265794984145e-06, |
| "loss": 0.4616, |
| "step": 16120 |
| }, |
| { |
| "epoch": 0.6079222100780161, |
| "grad_norm": 1.666536388372485, |
| "learning_rate": 3.99678133503637e-06, |
| "loss": 0.4775, |
| "step": 16130 |
| }, |
| { |
| "epoch": 0.608299099234915, |
| "grad_norm": 1.4010007944856846, |
| "learning_rate": 3.990337827027256e-06, |
| "loss": 0.5018, |
| "step": 16140 |
| }, |
| { |
| "epoch": 0.6086759883918139, |
| "grad_norm": 1.811100516226975, |
| "learning_rate": 3.983896066624021e-06, |
| "loss": 0.4643, |
| "step": 16150 |
| }, |
| { |
| "epoch": 0.609052877548713, |
| "grad_norm": 1.4452896868562695, |
| "learning_rate": 3.977456064976592e-06, |
| "loss": 0.5072, |
| "step": 16160 |
| }, |
| { |
| "epoch": 0.6094297667056119, |
| "grad_norm": 1.2863215835461435, |
| "learning_rate": 3.97101783323185e-06, |
| "loss": 0.4604, |
| "step": 16170 |
| }, |
| { |
| "epoch": 0.6098066558625108, |
| "grad_norm": 1.6495177612337615, |
| "learning_rate": 3.964581382533618e-06, |
| "loss": 0.4874, |
| "step": 16180 |
| }, |
| { |
| "epoch": 0.6101835450194097, |
| "grad_norm": 1.4527097080833093, |
| "learning_rate": 3.958146724022623e-06, |
| "loss": 0.4744, |
| "step": 16190 |
| }, |
| { |
| "epoch": 0.6105604341763088, |
| "grad_norm": 1.6146711317833942, |
| "learning_rate": 3.951713868836506e-06, |
| "loss": 0.4962, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.6109373233332077, |
| "grad_norm": 1.8235449585876042, |
| "learning_rate": 3.945282828109774e-06, |
| "loss": 0.4968, |
| "step": 16210 |
| }, |
| { |
| "epoch": 0.6113142124901066, |
| "grad_norm": 1.592201298917257, |
| "learning_rate": 3.938853612973801e-06, |
| "loss": 0.508, |
| "step": 16220 |
| }, |
| { |
| "epoch": 0.6116911016470056, |
| "grad_norm": 1.9031681135985004, |
| "learning_rate": 3.932426234556798e-06, |
| "loss": 0.4793, |
| "step": 16230 |
| }, |
| { |
| "epoch": 0.6120679908039046, |
| "grad_norm": 1.860838297311073, |
| "learning_rate": 3.926000703983795e-06, |
| "loss": 0.4691, |
| "step": 16240 |
| }, |
| { |
| "epoch": 0.6124448799608035, |
| "grad_norm": 1.7168068035412793, |
| "learning_rate": 3.919577032376628e-06, |
| "loss": 0.4927, |
| "step": 16250 |
| }, |
| { |
| "epoch": 0.6128217691177025, |
| "grad_norm": 1.856102414725057, |
| "learning_rate": 3.913155230853915e-06, |
| "loss": 0.4918, |
| "step": 16260 |
| }, |
| { |
| "epoch": 0.6131986582746014, |
| "grad_norm": 1.712306650139487, |
| "learning_rate": 3.906735310531033e-06, |
| "loss": 0.4969, |
| "step": 16270 |
| }, |
| { |
| "epoch": 0.6135755474315004, |
| "grad_norm": 1.6309354412368642, |
| "learning_rate": 3.900317282520104e-06, |
| "loss": 0.4693, |
| "step": 16280 |
| }, |
| { |
| "epoch": 0.6139524365883994, |
| "grad_norm": 1.4778531939363033, |
| "learning_rate": 3.893901157929979e-06, |
| "loss": 0.4735, |
| "step": 16290 |
| }, |
| { |
| "epoch": 0.6143293257452983, |
| "grad_norm": 1.8592722441894747, |
| "learning_rate": 3.8874869478662104e-06, |
| "loss": 0.4552, |
| "step": 16300 |
| }, |
| { |
| "epoch": 0.6147062149021972, |
| "grad_norm": 1.9102009395175383, |
| "learning_rate": 3.881074663431037e-06, |
| "loss": 0.4856, |
| "step": 16310 |
| }, |
| { |
| "epoch": 0.6150831040590963, |
| "grad_norm": 2.834088988880712, |
| "learning_rate": 3.874664315723363e-06, |
| "loss": 0.4862, |
| "step": 16320 |
| }, |
| { |
| "epoch": 0.6154599932159952, |
| "grad_norm": 1.5759267871394191, |
| "learning_rate": 3.8682559158387474e-06, |
| "loss": 0.4749, |
| "step": 16330 |
| }, |
| { |
| "epoch": 0.6158368823728941, |
| "grad_norm": 1.6563437776338659, |
| "learning_rate": 3.861849474869371e-06, |
| "loss": 0.4675, |
| "step": 16340 |
| }, |
| { |
| "epoch": 0.6162137715297931, |
| "grad_norm": 1.7936481213990962, |
| "learning_rate": 3.855445003904024e-06, |
| "loss": 0.4901, |
| "step": 16350 |
| }, |
| { |
| "epoch": 0.616590660686692, |
| "grad_norm": 1.6261629750224562, |
| "learning_rate": 3.849042514028091e-06, |
| "loss": 0.4817, |
| "step": 16360 |
| }, |
| { |
| "epoch": 0.616967549843591, |
| "grad_norm": 1.6014458715286652, |
| "learning_rate": 3.842642016323522e-06, |
| "loss": 0.4757, |
| "step": 16370 |
| }, |
| { |
| "epoch": 0.61734443900049, |
| "grad_norm": 1.6565625346460429, |
| "learning_rate": 3.836243521868828e-06, |
| "loss": 0.4849, |
| "step": 16380 |
| }, |
| { |
| "epoch": 0.6177213281573889, |
| "grad_norm": 1.6412335965766696, |
| "learning_rate": 3.82984704173904e-06, |
| "loss": 0.486, |
| "step": 16390 |
| }, |
| { |
| "epoch": 0.6180982173142878, |
| "grad_norm": 1.8138163964366028, |
| "learning_rate": 3.823452587005712e-06, |
| "loss": 0.5104, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.6184751064711869, |
| "grad_norm": 1.7953902099489745, |
| "learning_rate": 3.8170601687368905e-06, |
| "loss": 0.479, |
| "step": 16410 |
| }, |
| { |
| "epoch": 0.6188519956280858, |
| "grad_norm": 1.5270507793675938, |
| "learning_rate": 3.8106697979970952e-06, |
| "loss": 0.528, |
| "step": 16420 |
| }, |
| { |
| "epoch": 0.6192288847849847, |
| "grad_norm": 1.5641867404888987, |
| "learning_rate": 3.804281485847301e-06, |
| "loss": 0.4849, |
| "step": 16430 |
| }, |
| { |
| "epoch": 0.6196057739418837, |
| "grad_norm": 1.7507477091577552, |
| "learning_rate": 3.7978952433449223e-06, |
| "loss": 0.4832, |
| "step": 16440 |
| }, |
| { |
| "epoch": 0.6199826630987827, |
| "grad_norm": 1.6054491509073952, |
| "learning_rate": 3.7915110815437883e-06, |
| "loss": 0.4919, |
| "step": 16450 |
| }, |
| { |
| "epoch": 0.6203595522556816, |
| "grad_norm": 1.3576882864362545, |
| "learning_rate": 3.7851290114941335e-06, |
| "loss": 0.4842, |
| "step": 16460 |
| }, |
| { |
| "epoch": 0.6207364414125806, |
| "grad_norm": 1.5934327565220905, |
| "learning_rate": 3.77874904424256e-06, |
| "loss": 0.4787, |
| "step": 16470 |
| }, |
| { |
| "epoch": 0.6211133305694795, |
| "grad_norm": 1.4581770227833064, |
| "learning_rate": 3.7723711908320417e-06, |
| "loss": 0.4991, |
| "step": 16480 |
| }, |
| { |
| "epoch": 0.6214902197263785, |
| "grad_norm": 1.7598577411897167, |
| "learning_rate": 3.7659954623018875e-06, |
| "loss": 0.4655, |
| "step": 16490 |
| }, |
| { |
| "epoch": 0.6218671088832775, |
| "grad_norm": 1.5466125977952652, |
| "learning_rate": 3.759621869687731e-06, |
| "loss": 0.4763, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.6222439980401764, |
| "grad_norm": 1.5162276323975878, |
| "learning_rate": 3.753250424021506e-06, |
| "loss": 0.4791, |
| "step": 16510 |
| }, |
| { |
| "epoch": 0.6226208871970753, |
| "grad_norm": 1.6745379682639492, |
| "learning_rate": 3.746881136331431e-06, |
| "loss": 0.5151, |
| "step": 16520 |
| }, |
| { |
| "epoch": 0.6229977763539742, |
| "grad_norm": 1.532874094800718, |
| "learning_rate": 3.740514017641993e-06, |
| "loss": 0.4911, |
| "step": 16530 |
| }, |
| { |
| "epoch": 0.6233746655108733, |
| "grad_norm": 1.6604506659265745, |
| "learning_rate": 3.7341490789739205e-06, |
| "loss": 0.4906, |
| "step": 16540 |
| }, |
| { |
| "epoch": 0.6237515546677722, |
| "grad_norm": 1.6456103279322771, |
| "learning_rate": 3.727786331344171e-06, |
| "loss": 0.4923, |
| "step": 16550 |
| }, |
| { |
| "epoch": 0.6241284438246711, |
| "grad_norm": 1.4879389920554817, |
| "learning_rate": 3.7214257857659066e-06, |
| "loss": 0.489, |
| "step": 16560 |
| }, |
| { |
| "epoch": 0.6245053329815701, |
| "grad_norm": 2.2740003134139237, |
| "learning_rate": 3.715067453248481e-06, |
| "loss": 0.5154, |
| "step": 16570 |
| }, |
| { |
| "epoch": 0.6248822221384691, |
| "grad_norm": 1.7286954403484827, |
| "learning_rate": 3.7087113447974153e-06, |
| "loss": 0.4746, |
| "step": 16580 |
| }, |
| { |
| "epoch": 0.625259111295368, |
| "grad_norm": 1.291986369209173, |
| "learning_rate": 3.7023574714143858e-06, |
| "loss": 0.4625, |
| "step": 16590 |
| }, |
| { |
| "epoch": 0.625636000452267, |
| "grad_norm": 1.664446699881965, |
| "learning_rate": 3.69600584409719e-06, |
| "loss": 0.4541, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.6260128896091659, |
| "grad_norm": 1.5018038978083545, |
| "learning_rate": 3.6896564738397484e-06, |
| "loss": 0.4657, |
| "step": 16610 |
| }, |
| { |
| "epoch": 0.6263897787660649, |
| "grad_norm": 1.5823124760264218, |
| "learning_rate": 3.6833093716320693e-06, |
| "loss": 0.4768, |
| "step": 16620 |
| }, |
| { |
| "epoch": 0.6267666679229639, |
| "grad_norm": 1.9860629829549734, |
| "learning_rate": 3.6769645484602377e-06, |
| "loss": 0.4811, |
| "step": 16630 |
| }, |
| { |
| "epoch": 0.6271435570798628, |
| "grad_norm": 1.839944752571797, |
| "learning_rate": 3.6706220153063904e-06, |
| "loss": 0.48, |
| "step": 16640 |
| }, |
| { |
| "epoch": 0.6275204462367617, |
| "grad_norm": 1.7309100874405818, |
| "learning_rate": 3.664281783148702e-06, |
| "loss": 0.4722, |
| "step": 16650 |
| }, |
| { |
| "epoch": 0.6278973353936608, |
| "grad_norm": 1.581593496476517, |
| "learning_rate": 3.6579438629613682e-06, |
| "loss": 0.475, |
| "step": 16660 |
| }, |
| { |
| "epoch": 0.6282742245505597, |
| "grad_norm": 1.6548205842663573, |
| "learning_rate": 3.651608265714579e-06, |
| "loss": 0.4907, |
| "step": 16670 |
| }, |
| { |
| "epoch": 0.6286511137074586, |
| "grad_norm": 1.5516453686391054, |
| "learning_rate": 3.645275002374502e-06, |
| "loss": 0.4909, |
| "step": 16680 |
| }, |
| { |
| "epoch": 0.6290280028643576, |
| "grad_norm": 1.4834746500429061, |
| "learning_rate": 3.6389440839032687e-06, |
| "loss": 0.5048, |
| "step": 16690 |
| }, |
| { |
| "epoch": 0.6294048920212566, |
| "grad_norm": 1.7024990526554837, |
| "learning_rate": 3.6326155212589507e-06, |
| "loss": 0.4714, |
| "step": 16700 |
| }, |
| { |
| "epoch": 0.6297817811781555, |
| "grad_norm": 1.709920706597902, |
| "learning_rate": 3.6262893253955433e-06, |
| "loss": 0.4828, |
| "step": 16710 |
| }, |
| { |
| "epoch": 0.6301586703350545, |
| "grad_norm": 1.8336853543986507, |
| "learning_rate": 3.6199655072629415e-06, |
| "loss": 0.4847, |
| "step": 16720 |
| }, |
| { |
| "epoch": 0.6305355594919534, |
| "grad_norm": 1.4929651219976985, |
| "learning_rate": 3.613644077806927e-06, |
| "loss": 0.4786, |
| "step": 16730 |
| }, |
| { |
| "epoch": 0.6309124486488523, |
| "grad_norm": 1.7036171691777842, |
| "learning_rate": 3.607325047969149e-06, |
| "loss": 0.4957, |
| "step": 16740 |
| }, |
| { |
| "epoch": 0.6312893378057514, |
| "grad_norm": 1.73108529336121, |
| "learning_rate": 3.6010084286871017e-06, |
| "loss": 0.5082, |
| "step": 16750 |
| }, |
| { |
| "epoch": 0.6316662269626503, |
| "grad_norm": 1.6591602232622107, |
| "learning_rate": 3.5946942308941035e-06, |
| "loss": 0.4894, |
| "step": 16760 |
| }, |
| { |
| "epoch": 0.6320431161195492, |
| "grad_norm": 2.047186622437741, |
| "learning_rate": 3.5883824655192855e-06, |
| "loss": 0.4853, |
| "step": 16770 |
| }, |
| { |
| "epoch": 0.6324200052764481, |
| "grad_norm": 1.6832333116348397, |
| "learning_rate": 3.582073143487568e-06, |
| "loss": 0.5132, |
| "step": 16780 |
| }, |
| { |
| "epoch": 0.6327968944333472, |
| "grad_norm": 1.5899139760429426, |
| "learning_rate": 3.575766275719644e-06, |
| "loss": 0.5026, |
| "step": 16790 |
| }, |
| { |
| "epoch": 0.6331737835902461, |
| "grad_norm": 1.6765166894945724, |
| "learning_rate": 3.5694618731319507e-06, |
| "loss": 0.493, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.633550672747145, |
| "grad_norm": 1.5669282699472866, |
| "learning_rate": 3.5631599466366683e-06, |
| "loss": 0.4989, |
| "step": 16810 |
| }, |
| { |
| "epoch": 0.633927561904044, |
| "grad_norm": 1.6353842292836174, |
| "learning_rate": 3.556860507141685e-06, |
| "loss": 0.4842, |
| "step": 16820 |
| }, |
| { |
| "epoch": 0.634304451060943, |
| "grad_norm": 1.5863452326347784, |
| "learning_rate": 3.5505635655505877e-06, |
| "loss": 0.4808, |
| "step": 16830 |
| }, |
| { |
| "epoch": 0.634681340217842, |
| "grad_norm": 1.4750527987414617, |
| "learning_rate": 3.5442691327626354e-06, |
| "loss": 0.4833, |
| "step": 16840 |
| }, |
| { |
| "epoch": 0.6350582293747409, |
| "grad_norm": 1.5244616927874408, |
| "learning_rate": 3.5379772196727486e-06, |
| "loss": 0.4848, |
| "step": 16850 |
| }, |
| { |
| "epoch": 0.6354351185316398, |
| "grad_norm": 1.4503143224413209, |
| "learning_rate": 3.5316878371714838e-06, |
| "loss": 0.4737, |
| "step": 16860 |
| }, |
| { |
| "epoch": 0.6358120076885388, |
| "grad_norm": 1.631831674936825, |
| "learning_rate": 3.525400996145023e-06, |
| "loss": 0.4847, |
| "step": 16870 |
| }, |
| { |
| "epoch": 0.6361888968454378, |
| "grad_norm": 1.863352381042274, |
| "learning_rate": 3.5191167074751385e-06, |
| "loss": 0.461, |
| "step": 16880 |
| }, |
| { |
| "epoch": 0.6365657860023367, |
| "grad_norm": 1.6699698482794394, |
| "learning_rate": 3.512834982039196e-06, |
| "loss": 0.4629, |
| "step": 16890 |
| }, |
| { |
| "epoch": 0.6369426751592356, |
| "grad_norm": 1.487044517490085, |
| "learning_rate": 3.506555830710118e-06, |
| "loss": 0.5105, |
| "step": 16900 |
| }, |
| { |
| "epoch": 0.6373195643161347, |
| "grad_norm": 1.4479081048385236, |
| "learning_rate": 3.500279264356374e-06, |
| "loss": 0.5242, |
| "step": 16910 |
| }, |
| { |
| "epoch": 0.6376964534730336, |
| "grad_norm": 1.447171417720663, |
| "learning_rate": 3.4940052938419583e-06, |
| "loss": 0.508, |
| "step": 16920 |
| }, |
| { |
| "epoch": 0.6380733426299325, |
| "grad_norm": 1.6796141341080977, |
| "learning_rate": 3.4877339300263712e-06, |
| "loss": 0.5005, |
| "step": 16930 |
| }, |
| { |
| "epoch": 0.6384502317868315, |
| "grad_norm": 1.6662153763761518, |
| "learning_rate": 3.481465183764602e-06, |
| "loss": 0.4834, |
| "step": 16940 |
| }, |
| { |
| "epoch": 0.6388271209437304, |
| "grad_norm": 1.5754157381604186, |
| "learning_rate": 3.475199065907111e-06, |
| "loss": 0.4874, |
| "step": 16950 |
| }, |
| { |
| "epoch": 0.6392040101006294, |
| "grad_norm": 1.6041426691346072, |
| "learning_rate": 3.4689355872998085e-06, |
| "loss": 0.4865, |
| "step": 16960 |
| }, |
| { |
| "epoch": 0.6395808992575284, |
| "grad_norm": 1.6020448439122437, |
| "learning_rate": 3.4626747587840336e-06, |
| "loss": 0.4804, |
| "step": 16970 |
| }, |
| { |
| "epoch": 0.6399577884144273, |
| "grad_norm": 1.5691356747235539, |
| "learning_rate": 3.4564165911965407e-06, |
| "loss": 0.4491, |
| "step": 16980 |
| }, |
| { |
| "epoch": 0.6403346775713262, |
| "grad_norm": 1.6238593763575764, |
| "learning_rate": 3.4501610953694775e-06, |
| "loss": 0.4721, |
| "step": 16990 |
| }, |
| { |
| "epoch": 0.6407115667282253, |
| "grad_norm": 1.8524954109583753, |
| "learning_rate": 3.4439082821303723e-06, |
| "loss": 0.5065, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.6410884558851242, |
| "grad_norm": 1.5903092899398827, |
| "learning_rate": 3.4376581623020987e-06, |
| "loss": 0.4751, |
| "step": 17010 |
| }, |
| { |
| "epoch": 0.6414653450420231, |
| "grad_norm": 1.6223821940369376, |
| "learning_rate": 3.43141074670288e-06, |
| "loss": 0.482, |
| "step": 17020 |
| }, |
| { |
| "epoch": 0.6418422341989221, |
| "grad_norm": 1.7154442225202828, |
| "learning_rate": 3.425166046146254e-06, |
| "loss": 0.5018, |
| "step": 17030 |
| }, |
| { |
| "epoch": 0.6422191233558211, |
| "grad_norm": 1.4885853201247903, |
| "learning_rate": 3.4189240714410587e-06, |
| "loss": 0.4554, |
| "step": 17040 |
| }, |
| { |
| "epoch": 0.64259601251272, |
| "grad_norm": 1.7004544435150633, |
| "learning_rate": 3.412684833391413e-06, |
| "loss": 0.4831, |
| "step": 17050 |
| }, |
| { |
| "epoch": 0.642972901669619, |
| "grad_norm": 1.5122082088204818, |
| "learning_rate": 3.406448342796702e-06, |
| "loss": 0.4601, |
| "step": 17060 |
| }, |
| { |
| "epoch": 0.6433497908265179, |
| "grad_norm": 1.5886153641711012, |
| "learning_rate": 3.400214610451553e-06, |
| "loss": 0.5, |
| "step": 17070 |
| }, |
| { |
| "epoch": 0.6437266799834169, |
| "grad_norm": 1.4497829362440928, |
| "learning_rate": 3.393983647145823e-06, |
| "loss": 0.4886, |
| "step": 17080 |
| }, |
| { |
| "epoch": 0.6441035691403159, |
| "grad_norm": 1.8243610592206863, |
| "learning_rate": 3.387755463664567e-06, |
| "loss": 0.4844, |
| "step": 17090 |
| }, |
| { |
| "epoch": 0.6444804582972148, |
| "grad_norm": 1.9217986771041224, |
| "learning_rate": 3.3815300707880394e-06, |
| "loss": 0.4921, |
| "step": 17100 |
| }, |
| { |
| "epoch": 0.6448573474541137, |
| "grad_norm": 1.5791418592871644, |
| "learning_rate": 3.3753074792916574e-06, |
| "loss": 0.482, |
| "step": 17110 |
| }, |
| { |
| "epoch": 0.6452342366110126, |
| "grad_norm": 1.4431656921824283, |
| "learning_rate": 3.369087699945993e-06, |
| "loss": 0.4711, |
| "step": 17120 |
| }, |
| { |
| "epoch": 0.6456111257679117, |
| "grad_norm": 1.662138312216896, |
| "learning_rate": 3.3628707435167467e-06, |
| "loss": 0.4701, |
| "step": 17130 |
| }, |
| { |
| "epoch": 0.6459880149248106, |
| "grad_norm": 1.6549524822340256, |
| "learning_rate": 3.3566566207647354e-06, |
| "loss": 0.4761, |
| "step": 17140 |
| }, |
| { |
| "epoch": 0.6463649040817095, |
| "grad_norm": 1.7699738162116803, |
| "learning_rate": 3.350445342445874e-06, |
| "loss": 0.4945, |
| "step": 17150 |
| }, |
| { |
| "epoch": 0.6467417932386085, |
| "grad_norm": 2.1127375357724083, |
| "learning_rate": 3.344236919311149e-06, |
| "loss": 0.4765, |
| "step": 17160 |
| }, |
| { |
| "epoch": 0.6471186823955075, |
| "grad_norm": 1.5444316703665961, |
| "learning_rate": 3.338031362106607e-06, |
| "loss": 0.4936, |
| "step": 17170 |
| }, |
| { |
| "epoch": 0.6474955715524064, |
| "grad_norm": 1.6616572448362332, |
| "learning_rate": 3.3318286815733335e-06, |
| "loss": 0.4746, |
| "step": 17180 |
| }, |
| { |
| "epoch": 0.6478724607093054, |
| "grad_norm": 1.9913624341143157, |
| "learning_rate": 3.325628888447437e-06, |
| "loss": 0.5077, |
| "step": 17190 |
| }, |
| { |
| "epoch": 0.6482493498662043, |
| "grad_norm": 1.7915552215298538, |
| "learning_rate": 3.319431993460026e-06, |
| "loss": 0.504, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.6486262390231033, |
| "grad_norm": 1.732835360991205, |
| "learning_rate": 3.3132380073371926e-06, |
| "loss": 0.4925, |
| "step": 17210 |
| }, |
| { |
| "epoch": 0.6490031281800023, |
| "grad_norm": 1.3938136019682936, |
| "learning_rate": 3.3070469407999937e-06, |
| "loss": 0.4801, |
| "step": 17220 |
| }, |
| { |
| "epoch": 0.6493800173369012, |
| "grad_norm": 1.3212224075939478, |
| "learning_rate": 3.3008588045644357e-06, |
| "loss": 0.4609, |
| "step": 17230 |
| }, |
| { |
| "epoch": 0.6497569064938001, |
| "grad_norm": 1.7945950130121113, |
| "learning_rate": 3.2946736093414524e-06, |
| "loss": 0.467, |
| "step": 17240 |
| }, |
| { |
| "epoch": 0.6501337956506992, |
| "grad_norm": 1.7547142990915863, |
| "learning_rate": 3.288491365836881e-06, |
| "loss": 0.5079, |
| "step": 17250 |
| }, |
| { |
| "epoch": 0.6505106848075981, |
| "grad_norm": 1.6591091274236383, |
| "learning_rate": 3.2823120847514577e-06, |
| "loss": 0.4642, |
| "step": 17260 |
| }, |
| { |
| "epoch": 0.650887573964497, |
| "grad_norm": 1.5655640287361217, |
| "learning_rate": 3.2761357767807857e-06, |
| "loss": 0.4641, |
| "step": 17270 |
| }, |
| { |
| "epoch": 0.651264463121396, |
| "grad_norm": 1.7936591153526042, |
| "learning_rate": 3.269962452615326e-06, |
| "loss": 0.471, |
| "step": 17280 |
| }, |
| { |
| "epoch": 0.651641352278295, |
| "grad_norm": 1.5915717374592357, |
| "learning_rate": 3.2637921229403734e-06, |
| "loss": 0.4689, |
| "step": 17290 |
| }, |
| { |
| "epoch": 0.6520182414351939, |
| "grad_norm": 1.775887565559236, |
| "learning_rate": 3.2576247984360372e-06, |
| "loss": 0.4985, |
| "step": 17300 |
| }, |
| { |
| "epoch": 0.6523951305920929, |
| "grad_norm": 1.8241609832963868, |
| "learning_rate": 3.251460489777228e-06, |
| "loss": 0.4983, |
| "step": 17310 |
| }, |
| { |
| "epoch": 0.6527720197489918, |
| "grad_norm": 1.7201193138133724, |
| "learning_rate": 3.2452992076336356e-06, |
| "loss": 0.486, |
| "step": 17320 |
| }, |
| { |
| "epoch": 0.6531489089058907, |
| "grad_norm": 1.6087203461140804, |
| "learning_rate": 3.239140962669711e-06, |
| "loss": 0.4933, |
| "step": 17330 |
| }, |
| { |
| "epoch": 0.6535257980627898, |
| "grad_norm": 1.7218137998329464, |
| "learning_rate": 3.2329857655446483e-06, |
| "loss": 0.4932, |
| "step": 17340 |
| }, |
| { |
| "epoch": 0.6539026872196887, |
| "grad_norm": 1.5206186268024502, |
| "learning_rate": 3.2268336269123646e-06, |
| "loss": 0.4552, |
| "step": 17350 |
| }, |
| { |
| "epoch": 0.6542795763765876, |
| "grad_norm": 1.4491517030153769, |
| "learning_rate": 3.220684557421488e-06, |
| "loss": 0.4773, |
| "step": 17360 |
| }, |
| { |
| "epoch": 0.6546564655334866, |
| "grad_norm": 1.5069597772562586, |
| "learning_rate": 3.21453856771533e-06, |
| "loss": 0.4768, |
| "step": 17370 |
| }, |
| { |
| "epoch": 0.6550333546903856, |
| "grad_norm": 1.5693052306029478, |
| "learning_rate": 3.2083956684318708e-06, |
| "loss": 0.4817, |
| "step": 17380 |
| }, |
| { |
| "epoch": 0.6554102438472845, |
| "grad_norm": 1.6315582314566435, |
| "learning_rate": 3.2022558702037432e-06, |
| "loss": 0.4875, |
| "step": 17390 |
| }, |
| { |
| "epoch": 0.6557871330041835, |
| "grad_norm": 1.8830189051247035, |
| "learning_rate": 3.196119183658213e-06, |
| "loss": 0.5081, |
| "step": 17400 |
| }, |
| { |
| "epoch": 0.6561640221610824, |
| "grad_norm": 1.4994699080361438, |
| "learning_rate": 3.1899856194171607e-06, |
| "loss": 0.4637, |
| "step": 17410 |
| }, |
| { |
| "epoch": 0.6565409113179814, |
| "grad_norm": 1.5681733019464799, |
| "learning_rate": 3.183855188097057e-06, |
| "loss": 0.4754, |
| "step": 17420 |
| }, |
| { |
| "epoch": 0.6569178004748804, |
| "grad_norm": 1.7957738354956814, |
| "learning_rate": 3.177727900308958e-06, |
| "loss": 0.4834, |
| "step": 17430 |
| }, |
| { |
| "epoch": 0.6572946896317793, |
| "grad_norm": 1.4836118332583659, |
| "learning_rate": 3.171603766658472e-06, |
| "loss": 0.487, |
| "step": 17440 |
| }, |
| { |
| "epoch": 0.6576715787886782, |
| "grad_norm": 1.8415155422864224, |
| "learning_rate": 3.1654827977457526e-06, |
| "loss": 0.4965, |
| "step": 17450 |
| }, |
| { |
| "epoch": 0.6580484679455773, |
| "grad_norm": 1.48362840539746, |
| "learning_rate": 3.1593650041654716e-06, |
| "loss": 0.446, |
| "step": 17460 |
| }, |
| { |
| "epoch": 0.6584253571024762, |
| "grad_norm": 1.6394333815884048, |
| "learning_rate": 3.1532503965068073e-06, |
| "loss": 0.4687, |
| "step": 17470 |
| }, |
| { |
| "epoch": 0.6588022462593751, |
| "grad_norm": 1.483403229969005, |
| "learning_rate": 3.1471389853534217e-06, |
| "loss": 0.4596, |
| "step": 17480 |
| }, |
| { |
| "epoch": 0.659179135416274, |
| "grad_norm": 1.3730547797639057, |
| "learning_rate": 3.141030781283449e-06, |
| "loss": 0.4691, |
| "step": 17490 |
| }, |
| { |
| "epoch": 0.6595560245731731, |
| "grad_norm": 1.692266344168683, |
| "learning_rate": 3.134925794869463e-06, |
| "loss": 0.4649, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.659932913730072, |
| "grad_norm": 1.5190834038718941, |
| "learning_rate": 3.128824036678477e-06, |
| "loss": 0.4734, |
| "step": 17510 |
| }, |
| { |
| "epoch": 0.6603098028869709, |
| "grad_norm": 1.4798475487891651, |
| "learning_rate": 3.1227255172719127e-06, |
| "loss": 0.486, |
| "step": 17520 |
| }, |
| { |
| "epoch": 0.6606866920438699, |
| "grad_norm": 1.5780730702677566, |
| "learning_rate": 3.1166302472055873e-06, |
| "loss": 0.462, |
| "step": 17530 |
| }, |
| { |
| "epoch": 0.6610635812007688, |
| "grad_norm": 1.645103230020474, |
| "learning_rate": 3.11053823702969e-06, |
| "loss": 0.4983, |
| "step": 17540 |
| }, |
| { |
| "epoch": 0.6614404703576678, |
| "grad_norm": 1.6169975944237835, |
| "learning_rate": 3.104449497288772e-06, |
| "loss": 0.4693, |
| "step": 17550 |
| }, |
| { |
| "epoch": 0.6618173595145668, |
| "grad_norm": 1.6644257874809296, |
| "learning_rate": 3.0983640385217224e-06, |
| "loss": 0.537, |
| "step": 17560 |
| }, |
| { |
| "epoch": 0.6621942486714657, |
| "grad_norm": 1.5913845909506998, |
| "learning_rate": 3.092281871261752e-06, |
| "loss": 0.4874, |
| "step": 17570 |
| }, |
| { |
| "epoch": 0.6625711378283646, |
| "grad_norm": 1.6846273341706974, |
| "learning_rate": 3.086203006036371e-06, |
| "loss": 0.4407, |
| "step": 17580 |
| }, |
| { |
| "epoch": 0.6629480269852637, |
| "grad_norm": 1.4314112760740891, |
| "learning_rate": 3.0801274533673776e-06, |
| "loss": 0.4778, |
| "step": 17590 |
| }, |
| { |
| "epoch": 0.6633249161421626, |
| "grad_norm": 1.528021919193015, |
| "learning_rate": 3.0740552237708366e-06, |
| "loss": 0.5041, |
| "step": 17600 |
| }, |
| { |
| "epoch": 0.6637018052990615, |
| "grad_norm": 1.679731483162306, |
| "learning_rate": 3.0679863277570566e-06, |
| "loss": 0.4939, |
| "step": 17610 |
| }, |
| { |
| "epoch": 0.6640786944559605, |
| "grad_norm": 1.4342878274161748, |
| "learning_rate": 3.0619207758305848e-06, |
| "loss": 0.4895, |
| "step": 17620 |
| }, |
| { |
| "epoch": 0.6644555836128595, |
| "grad_norm": 1.7346799588693844, |
| "learning_rate": 3.0558585784901675e-06, |
| "loss": 0.486, |
| "step": 17630 |
| }, |
| { |
| "epoch": 0.6648324727697584, |
| "grad_norm": 1.783807850705795, |
| "learning_rate": 3.0497997462287566e-06, |
| "loss": 0.492, |
| "step": 17640 |
| }, |
| { |
| "epoch": 0.6652093619266574, |
| "grad_norm": 2.0262857478932634, |
| "learning_rate": 3.0437442895334734e-06, |
| "loss": 0.5014, |
| "step": 17650 |
| }, |
| { |
| "epoch": 0.6655862510835563, |
| "grad_norm": 1.5453260298900549, |
| "learning_rate": 3.037692218885599e-06, |
| "loss": 0.4923, |
| "step": 17660 |
| }, |
| { |
| "epoch": 0.6659631402404553, |
| "grad_norm": 2.203790859304095, |
| "learning_rate": 3.0316435447605495e-06, |
| "loss": 0.4846, |
| "step": 17670 |
| }, |
| { |
| "epoch": 0.6663400293973543, |
| "grad_norm": 1.546001802031686, |
| "learning_rate": 3.025598277627866e-06, |
| "loss": 0.4971, |
| "step": 17680 |
| }, |
| { |
| "epoch": 0.6667169185542532, |
| "grad_norm": 1.5128697377492308, |
| "learning_rate": 3.0195564279511925e-06, |
| "loss": 0.4797, |
| "step": 17690 |
| }, |
| { |
| "epoch": 0.6670938077111521, |
| "grad_norm": 2.0573665058922135, |
| "learning_rate": 3.0135180061882564e-06, |
| "loss": 0.4543, |
| "step": 17700 |
| }, |
| { |
| "epoch": 0.6674706968680512, |
| "grad_norm": 1.7400957327471658, |
| "learning_rate": 3.0074830227908514e-06, |
| "loss": 0.4574, |
| "step": 17710 |
| }, |
| { |
| "epoch": 0.6678475860249501, |
| "grad_norm": 1.3380341450114601, |
| "learning_rate": 3.0014514882048195e-06, |
| "loss": 0.5058, |
| "step": 17720 |
| }, |
| { |
| "epoch": 0.668224475181849, |
| "grad_norm": 2.076718786647805, |
| "learning_rate": 2.995423412870036e-06, |
| "loss": 0.4859, |
| "step": 17730 |
| }, |
| { |
| "epoch": 0.668601364338748, |
| "grad_norm": 1.4195821383830665, |
| "learning_rate": 2.9893988072203867e-06, |
| "loss": 0.492, |
| "step": 17740 |
| }, |
| { |
| "epoch": 0.6689782534956469, |
| "grad_norm": 1.8805532322048002, |
| "learning_rate": 2.98337768168375e-06, |
| "loss": 0.483, |
| "step": 17750 |
| }, |
| { |
| "epoch": 0.6693551426525459, |
| "grad_norm": 1.6317624518967453, |
| "learning_rate": 2.977360046681983e-06, |
| "loss": 0.478, |
| "step": 17760 |
| }, |
| { |
| "epoch": 0.6697320318094448, |
| "grad_norm": 2.0847886406534513, |
| "learning_rate": 2.971345912630902e-06, |
| "loss": 0.4872, |
| "step": 17770 |
| }, |
| { |
| "epoch": 0.6701089209663438, |
| "grad_norm": 1.7246432982403883, |
| "learning_rate": 2.965335289940263e-06, |
| "loss": 0.5001, |
| "step": 17780 |
| }, |
| { |
| "epoch": 0.6704858101232427, |
| "grad_norm": 1.6363668958491415, |
| "learning_rate": 2.9593281890137404e-06, |
| "loss": 0.4686, |
| "step": 17790 |
| }, |
| { |
| "epoch": 0.6708626992801417, |
| "grad_norm": 1.7985482020730335, |
| "learning_rate": 2.9533246202489173e-06, |
| "loss": 0.4969, |
| "step": 17800 |
| }, |
| { |
| "epoch": 0.6712395884370407, |
| "grad_norm": 1.6264485996061422, |
| "learning_rate": 2.9473245940372608e-06, |
| "loss": 0.4936, |
| "step": 17810 |
| }, |
| { |
| "epoch": 0.6716164775939396, |
| "grad_norm": 1.6890174531918563, |
| "learning_rate": 2.9413281207641114e-06, |
| "loss": 0.4799, |
| "step": 17820 |
| }, |
| { |
| "epoch": 0.6719933667508385, |
| "grad_norm": 1.5841849546055524, |
| "learning_rate": 2.9353352108086485e-06, |
| "loss": 0.4998, |
| "step": 17830 |
| }, |
| { |
| "epoch": 0.6723702559077376, |
| "grad_norm": 1.6089769681179087, |
| "learning_rate": 2.929345874543896e-06, |
| "loss": 0.4887, |
| "step": 17840 |
| }, |
| { |
| "epoch": 0.6727471450646365, |
| "grad_norm": 1.5674713179458315, |
| "learning_rate": 2.923360122336686e-06, |
| "loss": 0.4847, |
| "step": 17850 |
| }, |
| { |
| "epoch": 0.6731240342215354, |
| "grad_norm": 1.584156919079405, |
| "learning_rate": 2.9173779645476474e-06, |
| "loss": 0.4628, |
| "step": 17860 |
| }, |
| { |
| "epoch": 0.6735009233784344, |
| "grad_norm": 1.6929345771758708, |
| "learning_rate": 2.911399411531188e-06, |
| "loss": 0.4748, |
| "step": 17870 |
| }, |
| { |
| "epoch": 0.6738778125353334, |
| "grad_norm": 1.787787378046894, |
| "learning_rate": 2.9054244736354766e-06, |
| "loss": 0.4735, |
| "step": 17880 |
| }, |
| { |
| "epoch": 0.6742547016922323, |
| "grad_norm": 1.5201306101583867, |
| "learning_rate": 2.899453161202425e-06, |
| "loss": 0.4833, |
| "step": 17890 |
| }, |
| { |
| "epoch": 0.6746315908491313, |
| "grad_norm": 1.9695556108547445, |
| "learning_rate": 2.893485484567669e-06, |
| "loss": 0.4861, |
| "step": 17900 |
| }, |
| { |
| "epoch": 0.6750084800060302, |
| "grad_norm": 1.4351406646686231, |
| "learning_rate": 2.887521454060551e-06, |
| "loss": 0.4673, |
| "step": 17910 |
| }, |
| { |
| "epoch": 0.6753853691629291, |
| "grad_norm": 2.0856261193107732, |
| "learning_rate": 2.881561080004104e-06, |
| "loss": 0.4938, |
| "step": 17920 |
| }, |
| { |
| "epoch": 0.6757622583198282, |
| "grad_norm": 1.609969345939983, |
| "learning_rate": 2.8756043727150295e-06, |
| "loss": 0.4843, |
| "step": 17930 |
| }, |
| { |
| "epoch": 0.6761391474767271, |
| "grad_norm": 1.3392614292717313, |
| "learning_rate": 2.8696513425036874e-06, |
| "loss": 0.4819, |
| "step": 17940 |
| }, |
| { |
| "epoch": 0.676516036633626, |
| "grad_norm": 1.7102447232516564, |
| "learning_rate": 2.8637019996740624e-06, |
| "loss": 0.494, |
| "step": 17950 |
| }, |
| { |
| "epoch": 0.676892925790525, |
| "grad_norm": 1.6998269355745326, |
| "learning_rate": 2.8577563545237686e-06, |
| "loss": 0.4653, |
| "step": 17960 |
| }, |
| { |
| "epoch": 0.677269814947424, |
| "grad_norm": 1.4862506147625136, |
| "learning_rate": 2.8518144173440153e-06, |
| "loss": 0.4922, |
| "step": 17970 |
| }, |
| { |
| "epoch": 0.6776467041043229, |
| "grad_norm": 1.510405588964004, |
| "learning_rate": 2.8458761984195913e-06, |
| "loss": 0.4763, |
| "step": 17980 |
| }, |
| { |
| "epoch": 0.6780235932612219, |
| "grad_norm": 1.5288462993209506, |
| "learning_rate": 2.839941708028856e-06, |
| "loss": 0.4736, |
| "step": 17990 |
| }, |
| { |
| "epoch": 0.6784004824181208, |
| "grad_norm": 1.649944605527973, |
| "learning_rate": 2.8340109564437028e-06, |
| "loss": 0.5076, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.6787773715750198, |
| "grad_norm": 1.517180625990711, |
| "learning_rate": 2.8280839539295685e-06, |
| "loss": 0.4903, |
| "step": 18010 |
| }, |
| { |
| "epoch": 0.6791542607319188, |
| "grad_norm": 1.6911443889236912, |
| "learning_rate": 2.822160710745392e-06, |
| "loss": 0.4963, |
| "step": 18020 |
| }, |
| { |
| "epoch": 0.6795311498888177, |
| "grad_norm": 1.870392805720581, |
| "learning_rate": 2.8162412371436087e-06, |
| "loss": 0.4636, |
| "step": 18030 |
| }, |
| { |
| "epoch": 0.6799080390457166, |
| "grad_norm": 1.7551336831390325, |
| "learning_rate": 2.8103255433701238e-06, |
| "loss": 0.505, |
| "step": 18040 |
| }, |
| { |
| "epoch": 0.6802849282026157, |
| "grad_norm": 2.112747546430556, |
| "learning_rate": 2.804413639664306e-06, |
| "loss": 0.4923, |
| "step": 18050 |
| }, |
| { |
| "epoch": 0.6806618173595146, |
| "grad_norm": 1.389558866431419, |
| "learning_rate": 2.7985055362589597e-06, |
| "loss": 0.4746, |
| "step": 18060 |
| }, |
| { |
| "epoch": 0.6810387065164135, |
| "grad_norm": 1.8602009007028841, |
| "learning_rate": 2.792601243380321e-06, |
| "loss": 0.4935, |
| "step": 18070 |
| }, |
| { |
| "epoch": 0.6814155956733124, |
| "grad_norm": 1.8232438996039204, |
| "learning_rate": 2.7867007712480145e-06, |
| "loss": 0.4638, |
| "step": 18080 |
| }, |
| { |
| "epoch": 0.6817924848302115, |
| "grad_norm": 1.422637041046491, |
| "learning_rate": 2.780804130075064e-06, |
| "loss": 0.4859, |
| "step": 18090 |
| }, |
| { |
| "epoch": 0.6821693739871104, |
| "grad_norm": 1.79592896537598, |
| "learning_rate": 2.7749113300678576e-06, |
| "loss": 0.4994, |
| "step": 18100 |
| }, |
| { |
| "epoch": 0.6825462631440093, |
| "grad_norm": 1.7163110389066987, |
| "learning_rate": 2.7690223814261358e-06, |
| "loss": 0.4649, |
| "step": 18110 |
| }, |
| { |
| "epoch": 0.6829231523009083, |
| "grad_norm": 1.4437242473003, |
| "learning_rate": 2.7631372943429724e-06, |
| "loss": 0.4818, |
| "step": 18120 |
| }, |
| { |
| "epoch": 0.6833000414578072, |
| "grad_norm": 1.799293879805964, |
| "learning_rate": 2.757256079004758e-06, |
| "loss": 0.5199, |
| "step": 18130 |
| }, |
| { |
| "epoch": 0.6836769306147062, |
| "grad_norm": 1.8978824221262882, |
| "learning_rate": 2.751378745591181e-06, |
| "loss": 0.4743, |
| "step": 18140 |
| }, |
| { |
| "epoch": 0.6840538197716052, |
| "grad_norm": 1.7079497057612991, |
| "learning_rate": 2.74550530427521e-06, |
| "loss": 0.4572, |
| "step": 18150 |
| }, |
| { |
| "epoch": 0.6844307089285041, |
| "grad_norm": 1.6167362676629933, |
| "learning_rate": 2.739635765223079e-06, |
| "loss": 0.4645, |
| "step": 18160 |
| }, |
| { |
| "epoch": 0.684807598085403, |
| "grad_norm": 1.1239553035502026, |
| "learning_rate": 2.7337701385942655e-06, |
| "loss": 0.4916, |
| "step": 18170 |
| }, |
| { |
| "epoch": 0.6851844872423021, |
| "grad_norm": 1.80414731066518, |
| "learning_rate": 2.7279084345414765e-06, |
| "loss": 0.4832, |
| "step": 18180 |
| }, |
| { |
| "epoch": 0.685561376399201, |
| "grad_norm": 1.6220429458848973, |
| "learning_rate": 2.7220506632106304e-06, |
| "loss": 0.4637, |
| "step": 18190 |
| }, |
| { |
| "epoch": 0.6859382655560999, |
| "grad_norm": 1.6869049749820662, |
| "learning_rate": 2.7161968347408325e-06, |
| "loss": 0.4966, |
| "step": 18200 |
| }, |
| { |
| "epoch": 0.6863151547129989, |
| "grad_norm": 1.6657505964750594, |
| "learning_rate": 2.710346959264369e-06, |
| "loss": 0.4561, |
| "step": 18210 |
| }, |
| { |
| "epoch": 0.6866920438698979, |
| "grad_norm": 1.3973709297787218, |
| "learning_rate": 2.7045010469066864e-06, |
| "loss": 0.4933, |
| "step": 18220 |
| }, |
| { |
| "epoch": 0.6870689330267968, |
| "grad_norm": 1.5860018125512794, |
| "learning_rate": 2.6986591077863677e-06, |
| "loss": 0.4857, |
| "step": 18230 |
| }, |
| { |
| "epoch": 0.6874458221836958, |
| "grad_norm": 1.4618918574737128, |
| "learning_rate": 2.692821152015116e-06, |
| "loss": 0.4591, |
| "step": 18240 |
| }, |
| { |
| "epoch": 0.6878227113405947, |
| "grad_norm": 1.638643622605981, |
| "learning_rate": 2.686987189697744e-06, |
| "loss": 0.4951, |
| "step": 18250 |
| }, |
| { |
| "epoch": 0.6881996004974937, |
| "grad_norm": 1.7231621888652404, |
| "learning_rate": 2.6811572309321487e-06, |
| "loss": 0.4845, |
| "step": 18260 |
| }, |
| { |
| "epoch": 0.6885764896543927, |
| "grad_norm": 1.834141519895122, |
| "learning_rate": 2.6753312858093056e-06, |
| "loss": 0.4793, |
| "step": 18270 |
| }, |
| { |
| "epoch": 0.6889533788112916, |
| "grad_norm": 1.401487676831547, |
| "learning_rate": 2.669509364413232e-06, |
| "loss": 0.4864, |
| "step": 18280 |
| }, |
| { |
| "epoch": 0.6893302679681905, |
| "grad_norm": 1.6493898881695628, |
| "learning_rate": 2.6636914768209867e-06, |
| "loss": 0.4856, |
| "step": 18290 |
| }, |
| { |
| "epoch": 0.6897071571250896, |
| "grad_norm": 1.751769186322749, |
| "learning_rate": 2.6578776331026456e-06, |
| "loss": 0.5033, |
| "step": 18300 |
| }, |
| { |
| "epoch": 0.6900840462819885, |
| "grad_norm": 1.3871471388084073, |
| "learning_rate": 2.6520678433212854e-06, |
| "loss": 0.4726, |
| "step": 18310 |
| }, |
| { |
| "epoch": 0.6904609354388874, |
| "grad_norm": 1.5531242747701188, |
| "learning_rate": 2.646262117532966e-06, |
| "loss": 0.4818, |
| "step": 18320 |
| }, |
| { |
| "epoch": 0.6908378245957864, |
| "grad_norm": 1.5566017006302422, |
| "learning_rate": 2.640460465786711e-06, |
| "loss": 0.4743, |
| "step": 18330 |
| }, |
| { |
| "epoch": 0.6912147137526853, |
| "grad_norm": 1.7092683735724328, |
| "learning_rate": 2.634662898124495e-06, |
| "loss": 0.4473, |
| "step": 18340 |
| }, |
| { |
| "epoch": 0.6915916029095843, |
| "grad_norm": 1.7674173470502206, |
| "learning_rate": 2.6288694245812217e-06, |
| "loss": 0.5052, |
| "step": 18350 |
| }, |
| { |
| "epoch": 0.6919684920664833, |
| "grad_norm": 1.7113075330809617, |
| "learning_rate": 2.6230800551847096e-06, |
| "loss": 0.4968, |
| "step": 18360 |
| }, |
| { |
| "epoch": 0.6923453812233822, |
| "grad_norm": 1.666331092982349, |
| "learning_rate": 2.6172947999556723e-06, |
| "loss": 0.4852, |
| "step": 18370 |
| }, |
| { |
| "epoch": 0.6927222703802811, |
| "grad_norm": 1.7496234849403591, |
| "learning_rate": 2.6115136689077037e-06, |
| "loss": 0.4809, |
| "step": 18380 |
| }, |
| { |
| "epoch": 0.6930991595371802, |
| "grad_norm": 1.4830201051445864, |
| "learning_rate": 2.605736672047257e-06, |
| "loss": 0.4579, |
| "step": 18390 |
| }, |
| { |
| "epoch": 0.6934760486940791, |
| "grad_norm": 1.6817677793368029, |
| "learning_rate": 2.5999638193736337e-06, |
| "loss": 0.4922, |
| "step": 18400 |
| }, |
| { |
| "epoch": 0.693852937850978, |
| "grad_norm": 1.4954573467617007, |
| "learning_rate": 2.594195120878954e-06, |
| "loss": 0.5002, |
| "step": 18410 |
| }, |
| { |
| "epoch": 0.6942298270078769, |
| "grad_norm": 1.5504183128022453, |
| "learning_rate": 2.5884305865481572e-06, |
| "loss": 0.4834, |
| "step": 18420 |
| }, |
| { |
| "epoch": 0.694606716164776, |
| "grad_norm": 1.554112427963254, |
| "learning_rate": 2.582670226358971e-06, |
| "loss": 0.4486, |
| "step": 18430 |
| }, |
| { |
| "epoch": 0.6949836053216749, |
| "grad_norm": 1.4901063381514676, |
| "learning_rate": 2.576914050281899e-06, |
| "loss": 0.494, |
| "step": 18440 |
| }, |
| { |
| "epoch": 0.6953604944785738, |
| "grad_norm": 1.5107903863102292, |
| "learning_rate": 2.5711620682801973e-06, |
| "loss": 0.4662, |
| "step": 18450 |
| }, |
| { |
| "epoch": 0.6957373836354728, |
| "grad_norm": 1.4705557147554638, |
| "learning_rate": 2.56541429030987e-06, |
| "loss": 0.4955, |
| "step": 18460 |
| }, |
| { |
| "epoch": 0.6961142727923718, |
| "grad_norm": 1.5353961394271842, |
| "learning_rate": 2.5596707263196386e-06, |
| "loss": 0.4796, |
| "step": 18470 |
| }, |
| { |
| "epoch": 0.6964911619492707, |
| "grad_norm": 1.3895564519970267, |
| "learning_rate": 2.55393138625094e-06, |
| "loss": 0.4634, |
| "step": 18480 |
| }, |
| { |
| "epoch": 0.6968680511061697, |
| "grad_norm": 1.3450383420888639, |
| "learning_rate": 2.548196280037886e-06, |
| "loss": 0.4798, |
| "step": 18490 |
| }, |
| { |
| "epoch": 0.6972449402630686, |
| "grad_norm": 1.8244663013187354, |
| "learning_rate": 2.5424654176072714e-06, |
| "loss": 0.4746, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.6976218294199676, |
| "grad_norm": 1.5654399272762263, |
| "learning_rate": 2.5367388088785413e-06, |
| "loss": 0.4899, |
| "step": 18510 |
| }, |
| { |
| "epoch": 0.6979987185768666, |
| "grad_norm": 1.4654205360338723, |
| "learning_rate": 2.5310164637637773e-06, |
| "loss": 0.49, |
| "step": 18520 |
| }, |
| { |
| "epoch": 0.6983756077337655, |
| "grad_norm": 1.4467059234382345, |
| "learning_rate": 2.525298392167683e-06, |
| "loss": 0.4427, |
| "step": 18530 |
| }, |
| { |
| "epoch": 0.6987524968906644, |
| "grad_norm": 1.5408140891984066, |
| "learning_rate": 2.519584603987566e-06, |
| "loss": 0.4758, |
| "step": 18540 |
| }, |
| { |
| "epoch": 0.6991293860475634, |
| "grad_norm": 1.7821474974934417, |
| "learning_rate": 2.513875109113316e-06, |
| "loss": 0.4865, |
| "step": 18550 |
| }, |
| { |
| "epoch": 0.6995062752044624, |
| "grad_norm": 1.9953848627201554, |
| "learning_rate": 2.5081699174273955e-06, |
| "loss": 0.4808, |
| "step": 18560 |
| }, |
| { |
| "epoch": 0.6998831643613613, |
| "grad_norm": 1.741343264928784, |
| "learning_rate": 2.5024690388048154e-06, |
| "loss": 0.502, |
| "step": 18570 |
| }, |
| { |
| "epoch": 0.7002600535182603, |
| "grad_norm": 1.6437287473675368, |
| "learning_rate": 2.4967724831131244e-06, |
| "loss": 0.4969, |
| "step": 18580 |
| }, |
| { |
| "epoch": 0.7006369426751592, |
| "grad_norm": 1.6241010396370417, |
| "learning_rate": 2.4910802602123865e-06, |
| "loss": 0.5005, |
| "step": 18590 |
| }, |
| { |
| "epoch": 0.7010138318320582, |
| "grad_norm": 1.4898387628775849, |
| "learning_rate": 2.4853923799551677e-06, |
| "loss": 0.484, |
| "step": 18600 |
| }, |
| { |
| "epoch": 0.7013907209889572, |
| "grad_norm": 1.6400342378919166, |
| "learning_rate": 2.4797088521865138e-06, |
| "loss": 0.4722, |
| "step": 18610 |
| }, |
| { |
| "epoch": 0.7017676101458561, |
| "grad_norm": 1.479505953040376, |
| "learning_rate": 2.474029686743939e-06, |
| "loss": 0.5037, |
| "step": 18620 |
| }, |
| { |
| "epoch": 0.702144499302755, |
| "grad_norm": 1.4589932060948365, |
| "learning_rate": 2.4683548934574115e-06, |
| "loss": 0.4609, |
| "step": 18630 |
| }, |
| { |
| "epoch": 0.7025213884596541, |
| "grad_norm": 1.551678370051871, |
| "learning_rate": 2.462684482149327e-06, |
| "loss": 0.4801, |
| "step": 18640 |
| }, |
| { |
| "epoch": 0.702898277616553, |
| "grad_norm": 1.7192058552759832, |
| "learning_rate": 2.4570184626344944e-06, |
| "loss": 0.4644, |
| "step": 18650 |
| }, |
| { |
| "epoch": 0.7032751667734519, |
| "grad_norm": 1.631280002083957, |
| "learning_rate": 2.451356844720125e-06, |
| "loss": 0.4907, |
| "step": 18660 |
| }, |
| { |
| "epoch": 0.7036520559303509, |
| "grad_norm": 1.6750923039054713, |
| "learning_rate": 2.445699638205809e-06, |
| "loss": 0.5031, |
| "step": 18670 |
| }, |
| { |
| "epoch": 0.7040289450872499, |
| "grad_norm": 1.9691622944065195, |
| "learning_rate": 2.440046852883507e-06, |
| "loss": 0.4536, |
| "step": 18680 |
| }, |
| { |
| "epoch": 0.7044058342441488, |
| "grad_norm": 1.507322764879853, |
| "learning_rate": 2.4343984985375167e-06, |
| "loss": 0.4725, |
| "step": 18690 |
| }, |
| { |
| "epoch": 0.7047827234010478, |
| "grad_norm": 1.7740189927150876, |
| "learning_rate": 2.4287545849444747e-06, |
| "loss": 0.4698, |
| "step": 18700 |
| }, |
| { |
| "epoch": 0.7051596125579467, |
| "grad_norm": 1.5752161156901476, |
| "learning_rate": 2.423115121873328e-06, |
| "loss": 0.4571, |
| "step": 18710 |
| }, |
| { |
| "epoch": 0.7055365017148456, |
| "grad_norm": 1.9762064673353243, |
| "learning_rate": 2.4174801190853196e-06, |
| "loss": 0.4921, |
| "step": 18720 |
| }, |
| { |
| "epoch": 0.7059133908717447, |
| "grad_norm": 1.6174279577484105, |
| "learning_rate": 2.411849586333974e-06, |
| "loss": 0.4975, |
| "step": 18730 |
| }, |
| { |
| "epoch": 0.7062902800286436, |
| "grad_norm": 1.7193669580907476, |
| "learning_rate": 2.406223533365078e-06, |
| "loss": 0.4873, |
| "step": 18740 |
| }, |
| { |
| "epoch": 0.7066671691855425, |
| "grad_norm": 1.7235537874317444, |
| "learning_rate": 2.4006019699166643e-06, |
| "loss": 0.4685, |
| "step": 18750 |
| }, |
| { |
| "epoch": 0.7070440583424414, |
| "grad_norm": 1.63971711730806, |
| "learning_rate": 2.394984905718994e-06, |
| "loss": 0.4818, |
| "step": 18760 |
| }, |
| { |
| "epoch": 0.7074209474993405, |
| "grad_norm": 1.4461491095827899, |
| "learning_rate": 2.3893723504945425e-06, |
| "loss": 0.4914, |
| "step": 18770 |
| }, |
| { |
| "epoch": 0.7077978366562394, |
| "grad_norm": 1.4529129185509562, |
| "learning_rate": 2.3837643139579786e-06, |
| "loss": 0.4628, |
| "step": 18780 |
| }, |
| { |
| "epoch": 0.7081747258131383, |
| "grad_norm": 1.53376236798455, |
| "learning_rate": 2.378160805816151e-06, |
| "loss": 0.4969, |
| "step": 18790 |
| }, |
| { |
| "epoch": 0.7085516149700373, |
| "grad_norm": 1.6666468688910057, |
| "learning_rate": 2.3725618357680697e-06, |
| "loss": 0.4796, |
| "step": 18800 |
| }, |
| { |
| "epoch": 0.7089285041269363, |
| "grad_norm": 1.6506198653287087, |
| "learning_rate": 2.366967413504892e-06, |
| "loss": 0.4854, |
| "step": 18810 |
| }, |
| { |
| "epoch": 0.7093053932838352, |
| "grad_norm": 1.7178732497859122, |
| "learning_rate": 2.361377548709897e-06, |
| "loss": 0.4727, |
| "step": 18820 |
| }, |
| { |
| "epoch": 0.7096822824407342, |
| "grad_norm": 1.4181246211634644, |
| "learning_rate": 2.3557922510584837e-06, |
| "loss": 0.4836, |
| "step": 18830 |
| }, |
| { |
| "epoch": 0.7100591715976331, |
| "grad_norm": 1.5200708457900483, |
| "learning_rate": 2.3502115302181415e-06, |
| "loss": 0.4736, |
| "step": 18840 |
| }, |
| { |
| "epoch": 0.7104360607545321, |
| "grad_norm": 1.8352918119756971, |
| "learning_rate": 2.3446353958484404e-06, |
| "loss": 0.4932, |
| "step": 18850 |
| }, |
| { |
| "epoch": 0.7108129499114311, |
| "grad_norm": 1.6181777322718822, |
| "learning_rate": 2.339063857601006e-06, |
| "loss": 0.481, |
| "step": 18860 |
| }, |
| { |
| "epoch": 0.71118983906833, |
| "grad_norm": 1.6727732163944582, |
| "learning_rate": 2.3334969251195137e-06, |
| "loss": 0.4688, |
| "step": 18870 |
| }, |
| { |
| "epoch": 0.7115667282252289, |
| "grad_norm": 1.5382920829842202, |
| "learning_rate": 2.3279346080396652e-06, |
| "loss": 0.4799, |
| "step": 18880 |
| }, |
| { |
| "epoch": 0.711943617382128, |
| "grad_norm": 1.6126228289797586, |
| "learning_rate": 2.322376915989178e-06, |
| "loss": 0.4579, |
| "step": 18890 |
| }, |
| { |
| "epoch": 0.7123205065390269, |
| "grad_norm": 2.051868736900158, |
| "learning_rate": 2.3168238585877552e-06, |
| "loss": 0.5088, |
| "step": 18900 |
| }, |
| { |
| "epoch": 0.7126973956959258, |
| "grad_norm": 1.485292408430921, |
| "learning_rate": 2.3112754454470847e-06, |
| "loss": 0.4821, |
| "step": 18910 |
| }, |
| { |
| "epoch": 0.7130742848528248, |
| "grad_norm": 1.488486027715864, |
| "learning_rate": 2.305731686170814e-06, |
| "loss": 0.4498, |
| "step": 18920 |
| }, |
| { |
| "epoch": 0.7134511740097237, |
| "grad_norm": 1.5197855092868775, |
| "learning_rate": 2.300192590354534e-06, |
| "loss": 0.466, |
| "step": 18930 |
| }, |
| { |
| "epoch": 0.7138280631666227, |
| "grad_norm": 1.723349595195898, |
| "learning_rate": 2.2946581675857667e-06, |
| "loss": 0.4954, |
| "step": 18940 |
| }, |
| { |
| "epoch": 0.7142049523235217, |
| "grad_norm": 1.8046482293904333, |
| "learning_rate": 2.2891284274439424e-06, |
| "loss": 0.4798, |
| "step": 18950 |
| }, |
| { |
| "epoch": 0.7145818414804206, |
| "grad_norm": 1.5075542664335193, |
| "learning_rate": 2.2836033795003882e-06, |
| "loss": 0.4625, |
| "step": 18960 |
| }, |
| { |
| "epoch": 0.7149587306373195, |
| "grad_norm": 1.7547546524633186, |
| "learning_rate": 2.2780830333183086e-06, |
| "loss": 0.4916, |
| "step": 18970 |
| }, |
| { |
| "epoch": 0.7153356197942186, |
| "grad_norm": 1.5599589166635623, |
| "learning_rate": 2.2725673984527706e-06, |
| "loss": 0.4878, |
| "step": 18980 |
| }, |
| { |
| "epoch": 0.7157125089511175, |
| "grad_norm": 1.5938966644066468, |
| "learning_rate": 2.2670564844506863e-06, |
| "loss": 0.4914, |
| "step": 18990 |
| }, |
| { |
| "epoch": 0.7160893981080164, |
| "grad_norm": 1.758549701257888, |
| "learning_rate": 2.2615503008507965e-06, |
| "loss": 0.46, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.7164662872649153, |
| "grad_norm": 1.6218207774272693, |
| "learning_rate": 2.256048857183656e-06, |
| "loss": 0.4709, |
| "step": 19010 |
| }, |
| { |
| "epoch": 0.7168431764218144, |
| "grad_norm": 2.0658511464902447, |
| "learning_rate": 2.2505521629716095e-06, |
| "loss": 0.4902, |
| "step": 19020 |
| }, |
| { |
| "epoch": 0.7172200655787133, |
| "grad_norm": 1.6814039721656933, |
| "learning_rate": 2.245060227728785e-06, |
| "loss": 0.4711, |
| "step": 19030 |
| }, |
| { |
| "epoch": 0.7175969547356122, |
| "grad_norm": 1.7568224370312793, |
| "learning_rate": 2.2395730609610777e-06, |
| "loss": 0.4949, |
| "step": 19040 |
| }, |
| { |
| "epoch": 0.7179738438925112, |
| "grad_norm": 1.8189719042512071, |
| "learning_rate": 2.234090672166122e-06, |
| "loss": 0.5321, |
| "step": 19050 |
| }, |
| { |
| "epoch": 0.7183507330494102, |
| "grad_norm": 1.568239365153995, |
| "learning_rate": 2.2286130708332876e-06, |
| "loss": 0.4724, |
| "step": 19060 |
| }, |
| { |
| "epoch": 0.7187276222063091, |
| "grad_norm": 2.0389164815636325, |
| "learning_rate": 2.22314026644365e-06, |
| "loss": 0.4657, |
| "step": 19070 |
| }, |
| { |
| "epoch": 0.7191045113632081, |
| "grad_norm": 1.385299223521904, |
| "learning_rate": 2.2176722684699882e-06, |
| "loss": 0.4835, |
| "step": 19080 |
| }, |
| { |
| "epoch": 0.719481400520107, |
| "grad_norm": 1.4633858575844483, |
| "learning_rate": 2.2122090863767627e-06, |
| "loss": 0.4542, |
| "step": 19090 |
| }, |
| { |
| "epoch": 0.719858289677006, |
| "grad_norm": 1.7748405690606124, |
| "learning_rate": 2.206750729620097e-06, |
| "loss": 0.4782, |
| "step": 19100 |
| }, |
| { |
| "epoch": 0.720235178833905, |
| "grad_norm": 1.5545823449736953, |
| "learning_rate": 2.201297207647757e-06, |
| "loss": 0.4548, |
| "step": 19110 |
| }, |
| { |
| "epoch": 0.7206120679908039, |
| "grad_norm": 2.292728574012736, |
| "learning_rate": 2.195848529899147e-06, |
| "loss": 0.4722, |
| "step": 19120 |
| }, |
| { |
| "epoch": 0.7209889571477028, |
| "grad_norm": 1.716053797160062, |
| "learning_rate": 2.1904047058052842e-06, |
| "loss": 0.4828, |
| "step": 19130 |
| }, |
| { |
| "epoch": 0.7213658463046018, |
| "grad_norm": 1.714097262044458, |
| "learning_rate": 2.1849657447887847e-06, |
| "loss": 0.4735, |
| "step": 19140 |
| }, |
| { |
| "epoch": 0.7217427354615008, |
| "grad_norm": 1.7301553058622174, |
| "learning_rate": 2.1795316562638462e-06, |
| "loss": 0.485, |
| "step": 19150 |
| }, |
| { |
| "epoch": 0.7221196246183997, |
| "grad_norm": 2.437219895216966, |
| "learning_rate": 2.1741024496362344e-06, |
| "loss": 0.455, |
| "step": 19160 |
| }, |
| { |
| "epoch": 0.7224965137752987, |
| "grad_norm": 1.7131734298817816, |
| "learning_rate": 2.1686781343032647e-06, |
| "loss": 0.484, |
| "step": 19170 |
| }, |
| { |
| "epoch": 0.7228734029321976, |
| "grad_norm": 1.4800874315130503, |
| "learning_rate": 2.1632587196537853e-06, |
| "loss": 0.4646, |
| "step": 19180 |
| }, |
| { |
| "epoch": 0.7232502920890966, |
| "grad_norm": 1.6136385919031264, |
| "learning_rate": 2.1578442150681615e-06, |
| "loss": 0.4615, |
| "step": 19190 |
| }, |
| { |
| "epoch": 0.7236271812459956, |
| "grad_norm": 1.678439760720894, |
| "learning_rate": 2.1524346299182626e-06, |
| "loss": 0.4632, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.7240040704028945, |
| "grad_norm": 1.5309141919283373, |
| "learning_rate": 2.14702997356744e-06, |
| "loss": 0.4585, |
| "step": 19210 |
| }, |
| { |
| "epoch": 0.7243809595597934, |
| "grad_norm": 1.393910668679631, |
| "learning_rate": 2.1416302553705165e-06, |
| "loss": 0.4844, |
| "step": 19220 |
| }, |
| { |
| "epoch": 0.7247578487166925, |
| "grad_norm": 1.6745490665396063, |
| "learning_rate": 2.136235484673761e-06, |
| "loss": 0.4523, |
| "step": 19230 |
| }, |
| { |
| "epoch": 0.7251347378735914, |
| "grad_norm": 1.6646971203310925, |
| "learning_rate": 2.1308456708148896e-06, |
| "loss": 0.4777, |
| "step": 19240 |
| }, |
| { |
| "epoch": 0.7255116270304903, |
| "grad_norm": 1.7651217240185753, |
| "learning_rate": 2.1254608231230312e-06, |
| "loss": 0.4985, |
| "step": 19250 |
| }, |
| { |
| "epoch": 0.7258885161873893, |
| "grad_norm": 1.458188978373751, |
| "learning_rate": 2.120080950918722e-06, |
| "loss": 0.4609, |
| "step": 19260 |
| }, |
| { |
| "epoch": 0.7262654053442883, |
| "grad_norm": 1.3512368514891746, |
| "learning_rate": 2.1147060635138817e-06, |
| "loss": 0.4586, |
| "step": 19270 |
| }, |
| { |
| "epoch": 0.7266422945011872, |
| "grad_norm": 1.6354165388054114, |
| "learning_rate": 2.1093361702118065e-06, |
| "loss": 0.4673, |
| "step": 19280 |
| }, |
| { |
| "epoch": 0.7270191836580862, |
| "grad_norm": 1.6908458833172149, |
| "learning_rate": 2.103971280307146e-06, |
| "loss": 0.4652, |
| "step": 19290 |
| }, |
| { |
| "epoch": 0.7273960728149851, |
| "grad_norm": 1.5384882126868258, |
| "learning_rate": 2.098611403085895e-06, |
| "loss": 0.4553, |
| "step": 19300 |
| }, |
| { |
| "epoch": 0.727772961971884, |
| "grad_norm": 1.3879436105661611, |
| "learning_rate": 2.0932565478253624e-06, |
| "loss": 0.4606, |
| "step": 19310 |
| }, |
| { |
| "epoch": 0.728149851128783, |
| "grad_norm": 1.7839927445388464, |
| "learning_rate": 2.087906723794171e-06, |
| "loss": 0.4913, |
| "step": 19320 |
| }, |
| { |
| "epoch": 0.728526740285682, |
| "grad_norm": 1.5679146539305475, |
| "learning_rate": 2.0825619402522356e-06, |
| "loss": 0.4764, |
| "step": 19330 |
| }, |
| { |
| "epoch": 0.7289036294425809, |
| "grad_norm": 1.6427710422238193, |
| "learning_rate": 2.077222206450743e-06, |
| "loss": 0.4941, |
| "step": 19340 |
| }, |
| { |
| "epoch": 0.7292805185994798, |
| "grad_norm": 1.9163137570823914, |
| "learning_rate": 2.0718875316321413e-06, |
| "loss": 0.4728, |
| "step": 19350 |
| }, |
| { |
| "epoch": 0.7296574077563789, |
| "grad_norm": 1.7523481576736066, |
| "learning_rate": 2.066557925030123e-06, |
| "loss": 0.4837, |
| "step": 19360 |
| }, |
| { |
| "epoch": 0.7300342969132778, |
| "grad_norm": 1.6581516586288727, |
| "learning_rate": 2.0612333958696068e-06, |
| "loss": 0.4649, |
| "step": 19370 |
| }, |
| { |
| "epoch": 0.7304111860701767, |
| "grad_norm": 1.833543727989413, |
| "learning_rate": 2.0559139533667227e-06, |
| "loss": 0.4888, |
| "step": 19380 |
| }, |
| { |
| "epoch": 0.7307880752270757, |
| "grad_norm": 2.948145026805694, |
| "learning_rate": 2.050599606728798e-06, |
| "loss": 0.4679, |
| "step": 19390 |
| }, |
| { |
| "epoch": 0.7311649643839747, |
| "grad_norm": 1.6265548517672246, |
| "learning_rate": 2.045290365154338e-06, |
| "loss": 0.4561, |
| "step": 19400 |
| }, |
| { |
| "epoch": 0.7315418535408736, |
| "grad_norm": 1.5522637397176864, |
| "learning_rate": 2.039986237833012e-06, |
| "loss": 0.4481, |
| "step": 19410 |
| }, |
| { |
| "epoch": 0.7319187426977726, |
| "grad_norm": 1.5713552061547922, |
| "learning_rate": 2.0346872339456385e-06, |
| "loss": 0.4555, |
| "step": 19420 |
| }, |
| { |
| "epoch": 0.7322956318546715, |
| "grad_norm": 1.9565945231630675, |
| "learning_rate": 2.0293933626641677e-06, |
| "loss": 0.4705, |
| "step": 19430 |
| }, |
| { |
| "epoch": 0.7326725210115705, |
| "grad_norm": 1.4336544390295196, |
| "learning_rate": 2.0241046331516596e-06, |
| "loss": 0.475, |
| "step": 19440 |
| }, |
| { |
| "epoch": 0.7330494101684695, |
| "grad_norm": 1.9396889631970387, |
| "learning_rate": 2.018821054562286e-06, |
| "loss": 0.4681, |
| "step": 19450 |
| }, |
| { |
| "epoch": 0.7334262993253684, |
| "grad_norm": 1.567151805567963, |
| "learning_rate": 2.0135426360412945e-06, |
| "loss": 0.4835, |
| "step": 19460 |
| }, |
| { |
| "epoch": 0.7338031884822673, |
| "grad_norm": 1.9444973709798723, |
| "learning_rate": 2.008269386725006e-06, |
| "loss": 0.4698, |
| "step": 19470 |
| }, |
| { |
| "epoch": 0.7341800776391664, |
| "grad_norm": 1.6143737110809373, |
| "learning_rate": 2.003001315740788e-06, |
| "loss": 0.4718, |
| "step": 19480 |
| }, |
| { |
| "epoch": 0.7345569667960653, |
| "grad_norm": 1.4062299288988356, |
| "learning_rate": 1.997738432207048e-06, |
| "loss": 0.486, |
| "step": 19490 |
| }, |
| { |
| "epoch": 0.7349338559529642, |
| "grad_norm": 1.8345777555224132, |
| "learning_rate": 1.9924807452332203e-06, |
| "loss": 0.4982, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.7353107451098632, |
| "grad_norm": 1.3571830592619372, |
| "learning_rate": 1.9872282639197384e-06, |
| "loss": 0.4541, |
| "step": 19510 |
| }, |
| { |
| "epoch": 0.7356876342667621, |
| "grad_norm": 1.5972192547656638, |
| "learning_rate": 1.981980997358023e-06, |
| "loss": 0.4876, |
| "step": 19520 |
| }, |
| { |
| "epoch": 0.7360645234236611, |
| "grad_norm": 1.6793172446301055, |
| "learning_rate": 1.976738954630475e-06, |
| "loss": 0.4902, |
| "step": 19530 |
| }, |
| { |
| "epoch": 0.7364414125805601, |
| "grad_norm": 1.7207394609259994, |
| "learning_rate": 1.97150214481045e-06, |
| "loss": 0.4631, |
| "step": 19540 |
| }, |
| { |
| "epoch": 0.736818301737459, |
| "grad_norm": 1.7659650208337772, |
| "learning_rate": 1.9662705769622473e-06, |
| "loss": 0.475, |
| "step": 19550 |
| }, |
| { |
| "epoch": 0.7371951908943579, |
| "grad_norm": 2.259295033352332, |
| "learning_rate": 1.9610442601410924e-06, |
| "loss": 0.4748, |
| "step": 19560 |
| }, |
| { |
| "epoch": 0.737572080051257, |
| "grad_norm": 1.4373497805396753, |
| "learning_rate": 1.955823203393122e-06, |
| "loss": 0.4417, |
| "step": 19570 |
| }, |
| { |
| "epoch": 0.7379489692081559, |
| "grad_norm": 1.7830963074391613, |
| "learning_rate": 1.9506074157553674e-06, |
| "loss": 0.4995, |
| "step": 19580 |
| }, |
| { |
| "epoch": 0.7383258583650548, |
| "grad_norm": 1.6506977257382058, |
| "learning_rate": 1.9453969062557413e-06, |
| "loss": 0.4704, |
| "step": 19590 |
| }, |
| { |
| "epoch": 0.7387027475219538, |
| "grad_norm": 1.8345545514824426, |
| "learning_rate": 1.94019168391302e-06, |
| "loss": 0.4513, |
| "step": 19600 |
| }, |
| { |
| "epoch": 0.7390796366788528, |
| "grad_norm": 1.5681906345509673, |
| "learning_rate": 1.9349917577368278e-06, |
| "loss": 0.4633, |
| "step": 19610 |
| }, |
| { |
| "epoch": 0.7394565258357517, |
| "grad_norm": 1.1866445291178405, |
| "learning_rate": 1.929797136727622e-06, |
| "loss": 0.4482, |
| "step": 19620 |
| }, |
| { |
| "epoch": 0.7398334149926507, |
| "grad_norm": 1.7441782836688702, |
| "learning_rate": 1.924607829876679e-06, |
| "loss": 0.4656, |
| "step": 19630 |
| }, |
| { |
| "epoch": 0.7402103041495496, |
| "grad_norm": 1.5560618510253559, |
| "learning_rate": 1.9194238461660715e-06, |
| "loss": 0.4713, |
| "step": 19640 |
| }, |
| { |
| "epoch": 0.7405871933064486, |
| "grad_norm": 1.612264222679392, |
| "learning_rate": 1.9142451945686675e-06, |
| "loss": 0.4888, |
| "step": 19650 |
| }, |
| { |
| "epoch": 0.7409640824633476, |
| "grad_norm": 1.6525004938247971, |
| "learning_rate": 1.909071884048098e-06, |
| "loss": 0.4831, |
| "step": 19660 |
| }, |
| { |
| "epoch": 0.7413409716202465, |
| "grad_norm": 1.8991946449473516, |
| "learning_rate": 1.9039039235587549e-06, |
| "loss": 0.4793, |
| "step": 19670 |
| }, |
| { |
| "epoch": 0.7417178607771454, |
| "grad_norm": 1.661925662248117, |
| "learning_rate": 1.898741322045763e-06, |
| "loss": 0.468, |
| "step": 19680 |
| }, |
| { |
| "epoch": 0.7420947499340445, |
| "grad_norm": 1.6008784105624398, |
| "learning_rate": 1.8935840884449774e-06, |
| "loss": 0.4959, |
| "step": 19690 |
| }, |
| { |
| "epoch": 0.7424716390909434, |
| "grad_norm": 1.8462741745907572, |
| "learning_rate": 1.888432231682958e-06, |
| "loss": 0.447, |
| "step": 19700 |
| }, |
| { |
| "epoch": 0.7428485282478423, |
| "grad_norm": 1.6621308425745434, |
| "learning_rate": 1.8832857606769645e-06, |
| "loss": 0.454, |
| "step": 19710 |
| }, |
| { |
| "epoch": 0.7432254174047412, |
| "grad_norm": 1.4464720693906488, |
| "learning_rate": 1.8781446843349255e-06, |
| "loss": 0.4744, |
| "step": 19720 |
| }, |
| { |
| "epoch": 0.7436023065616402, |
| "grad_norm": 1.5289864438887075, |
| "learning_rate": 1.8730090115554377e-06, |
| "loss": 0.4847, |
| "step": 19730 |
| }, |
| { |
| "epoch": 0.7439791957185392, |
| "grad_norm": 1.499672820897478, |
| "learning_rate": 1.8678787512277441e-06, |
| "loss": 0.4696, |
| "step": 19740 |
| }, |
| { |
| "epoch": 0.7443560848754381, |
| "grad_norm": 1.9522488619546912, |
| "learning_rate": 1.8627539122317184e-06, |
| "loss": 0.51, |
| "step": 19750 |
| }, |
| { |
| "epoch": 0.7447329740323371, |
| "grad_norm": 1.578646901148932, |
| "learning_rate": 1.8576345034378518e-06, |
| "loss": 0.4626, |
| "step": 19760 |
| }, |
| { |
| "epoch": 0.745109863189236, |
| "grad_norm": 1.6897078881033194, |
| "learning_rate": 1.8525205337072356e-06, |
| "loss": 0.4986, |
| "step": 19770 |
| }, |
| { |
| "epoch": 0.745486752346135, |
| "grad_norm": 1.6933365658689057, |
| "learning_rate": 1.8474120118915468e-06, |
| "loss": 0.4989, |
| "step": 19780 |
| }, |
| { |
| "epoch": 0.745863641503034, |
| "grad_norm": 1.5425636755893886, |
| "learning_rate": 1.8423089468330323e-06, |
| "loss": 0.4755, |
| "step": 19790 |
| }, |
| { |
| "epoch": 0.7462405306599329, |
| "grad_norm": 1.7854964281266579, |
| "learning_rate": 1.8372113473644954e-06, |
| "loss": 0.4677, |
| "step": 19800 |
| }, |
| { |
| "epoch": 0.7466174198168318, |
| "grad_norm": 1.5758981880661738, |
| "learning_rate": 1.8321192223092783e-06, |
| "loss": 0.4596, |
| "step": 19810 |
| }, |
| { |
| "epoch": 0.7469943089737309, |
| "grad_norm": 1.7615382948562168, |
| "learning_rate": 1.8270325804812467e-06, |
| "loss": 0.4959, |
| "step": 19820 |
| }, |
| { |
| "epoch": 0.7473711981306298, |
| "grad_norm": 1.6845149695852346, |
| "learning_rate": 1.8219514306847769e-06, |
| "loss": 0.453, |
| "step": 19830 |
| }, |
| { |
| "epoch": 0.7477480872875287, |
| "grad_norm": 1.5622386509669985, |
| "learning_rate": 1.8168757817147408e-06, |
| "loss": 0.4636, |
| "step": 19840 |
| }, |
| { |
| "epoch": 0.7481249764444277, |
| "grad_norm": 1.659724890019801, |
| "learning_rate": 1.8118056423564807e-06, |
| "loss": 0.4773, |
| "step": 19850 |
| }, |
| { |
| "epoch": 0.7485018656013267, |
| "grad_norm": 1.6175612238786405, |
| "learning_rate": 1.8067410213858144e-06, |
| "loss": 0.4586, |
| "step": 19860 |
| }, |
| { |
| "epoch": 0.7488787547582256, |
| "grad_norm": 1.2204409557789897, |
| "learning_rate": 1.8016819275690005e-06, |
| "loss": 0.4876, |
| "step": 19870 |
| }, |
| { |
| "epoch": 0.7492556439151246, |
| "grad_norm": 1.5479726038649555, |
| "learning_rate": 1.7966283696627334e-06, |
| "loss": 0.4903, |
| "step": 19880 |
| }, |
| { |
| "epoch": 0.7496325330720235, |
| "grad_norm": 1.5825271274712456, |
| "learning_rate": 1.791580356414122e-06, |
| "loss": 0.48, |
| "step": 19890 |
| }, |
| { |
| "epoch": 0.7500094222289225, |
| "grad_norm": 1.6729980724400284, |
| "learning_rate": 1.7865378965606816e-06, |
| "loss": 0.4971, |
| "step": 19900 |
| }, |
| { |
| "epoch": 0.7503863113858215, |
| "grad_norm": 1.49855379749909, |
| "learning_rate": 1.7815009988303128e-06, |
| "loss": 0.4559, |
| "step": 19910 |
| }, |
| { |
| "epoch": 0.7507632005427204, |
| "grad_norm": 1.867864298503486, |
| "learning_rate": 1.7764696719412955e-06, |
| "loss": 0.4676, |
| "step": 19920 |
| }, |
| { |
| "epoch": 0.7511400896996193, |
| "grad_norm": 1.398138122231671, |
| "learning_rate": 1.7714439246022563e-06, |
| "loss": 0.4665, |
| "step": 19930 |
| }, |
| { |
| "epoch": 0.7515169788565182, |
| "grad_norm": 1.679272548470173, |
| "learning_rate": 1.7664237655121712e-06, |
| "loss": 0.4845, |
| "step": 19940 |
| }, |
| { |
| "epoch": 0.7518938680134173, |
| "grad_norm": 1.6071742957955386, |
| "learning_rate": 1.7614092033603435e-06, |
| "loss": 0.4762, |
| "step": 19950 |
| }, |
| { |
| "epoch": 0.7522707571703162, |
| "grad_norm": 2.0121603774745, |
| "learning_rate": 1.7564002468263864e-06, |
| "loss": 0.4983, |
| "step": 19960 |
| }, |
| { |
| "epoch": 0.7526476463272151, |
| "grad_norm": 1.6878110868707674, |
| "learning_rate": 1.7513969045802121e-06, |
| "loss": 0.4774, |
| "step": 19970 |
| }, |
| { |
| "epoch": 0.7530245354841141, |
| "grad_norm": 1.5812116606356919, |
| "learning_rate": 1.7463991852820146e-06, |
| "loss": 0.4694, |
| "step": 19980 |
| }, |
| { |
| "epoch": 0.7534014246410131, |
| "grad_norm": 1.6728801843104153, |
| "learning_rate": 1.741407097582255e-06, |
| "loss": 0.4812, |
| "step": 19990 |
| }, |
| { |
| "epoch": 0.753778313797912, |
| "grad_norm": 1.558056793965425, |
| "learning_rate": 1.7364206501216468e-06, |
| "loss": 0.4771, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.754155202954811, |
| "grad_norm": 1.572534621526344, |
| "learning_rate": 1.7314398515311425e-06, |
| "loss": 0.4825, |
| "step": 20010 |
| }, |
| { |
| "epoch": 0.7545320921117099, |
| "grad_norm": 1.7319010605390628, |
| "learning_rate": 1.7264647104319144e-06, |
| "loss": 0.4924, |
| "step": 20020 |
| }, |
| { |
| "epoch": 0.754908981268609, |
| "grad_norm": 1.8088713988535818, |
| "learning_rate": 1.7214952354353442e-06, |
| "loss": 0.5092, |
| "step": 20030 |
| }, |
| { |
| "epoch": 0.7552858704255079, |
| "grad_norm": 1.6367538520605005, |
| "learning_rate": 1.7165314351430073e-06, |
| "loss": 0.4853, |
| "step": 20040 |
| }, |
| { |
| "epoch": 0.7556627595824068, |
| "grad_norm": 1.8009115510393299, |
| "learning_rate": 1.7115733181466521e-06, |
| "loss": 0.4861, |
| "step": 20050 |
| }, |
| { |
| "epoch": 0.7560396487393057, |
| "grad_norm": 1.647359105340635, |
| "learning_rate": 1.706620893028193e-06, |
| "loss": 0.4872, |
| "step": 20060 |
| }, |
| { |
| "epoch": 0.7564165378962048, |
| "grad_norm": 1.7977265334517736, |
| "learning_rate": 1.7016741683596956e-06, |
| "loss": 0.4861, |
| "step": 20070 |
| }, |
| { |
| "epoch": 0.7567934270531037, |
| "grad_norm": 1.4658968392116936, |
| "learning_rate": 1.696733152703356e-06, |
| "loss": 0.4621, |
| "step": 20080 |
| }, |
| { |
| "epoch": 0.7571703162100026, |
| "grad_norm": 1.400405223101517, |
| "learning_rate": 1.6917978546114844e-06, |
| "loss": 0.4567, |
| "step": 20090 |
| }, |
| { |
| "epoch": 0.7575472053669016, |
| "grad_norm": 1.7826408832538072, |
| "learning_rate": 1.686868282626501e-06, |
| "loss": 0.455, |
| "step": 20100 |
| }, |
| { |
| "epoch": 0.7579240945238005, |
| "grad_norm": 1.369980544067672, |
| "learning_rate": 1.6819444452809097e-06, |
| "loss": 0.4498, |
| "step": 20110 |
| }, |
| { |
| "epoch": 0.7583009836806995, |
| "grad_norm": 1.606503602489862, |
| "learning_rate": 1.6770263510972967e-06, |
| "loss": 0.4784, |
| "step": 20120 |
| }, |
| { |
| "epoch": 0.7586778728375985, |
| "grad_norm": 1.7717643562693965, |
| "learning_rate": 1.6721140085882958e-06, |
| "loss": 0.4983, |
| "step": 20130 |
| }, |
| { |
| "epoch": 0.7590547619944974, |
| "grad_norm": 1.5525199466594755, |
| "learning_rate": 1.6672074262565935e-06, |
| "loss": 0.4724, |
| "step": 20140 |
| }, |
| { |
| "epoch": 0.7594316511513963, |
| "grad_norm": 1.8260818838800927, |
| "learning_rate": 1.6623066125949039e-06, |
| "loss": 0.4855, |
| "step": 20150 |
| }, |
| { |
| "epoch": 0.7598085403082954, |
| "grad_norm": 1.5847972521629938, |
| "learning_rate": 1.6574115760859565e-06, |
| "loss": 0.4962, |
| "step": 20160 |
| }, |
| { |
| "epoch": 0.7601854294651943, |
| "grad_norm": 1.9147538929254044, |
| "learning_rate": 1.6525223252024803e-06, |
| "loss": 0.4906, |
| "step": 20170 |
| }, |
| { |
| "epoch": 0.7605623186220932, |
| "grad_norm": 1.8192442389995647, |
| "learning_rate": 1.6476388684071904e-06, |
| "loss": 0.4461, |
| "step": 20180 |
| }, |
| { |
| "epoch": 0.7609392077789922, |
| "grad_norm": 1.6057172465162213, |
| "learning_rate": 1.6427612141527737e-06, |
| "loss": 0.4661, |
| "step": 20190 |
| }, |
| { |
| "epoch": 0.7613160969358912, |
| "grad_norm": 1.5700052558656017, |
| "learning_rate": 1.6378893708818737e-06, |
| "loss": 0.4579, |
| "step": 20200 |
| }, |
| { |
| "epoch": 0.7616929860927901, |
| "grad_norm": 2.4569585216041556, |
| "learning_rate": 1.6330233470270745e-06, |
| "loss": 0.4794, |
| "step": 20210 |
| }, |
| { |
| "epoch": 0.762069875249689, |
| "grad_norm": 1.889553496160301, |
| "learning_rate": 1.6281631510108886e-06, |
| "loss": 0.442, |
| "step": 20220 |
| }, |
| { |
| "epoch": 0.762446764406588, |
| "grad_norm": 1.5962321807838258, |
| "learning_rate": 1.6233087912457412e-06, |
| "loss": 0.4672, |
| "step": 20230 |
| }, |
| { |
| "epoch": 0.762823653563487, |
| "grad_norm": 1.5965451145332648, |
| "learning_rate": 1.618460276133954e-06, |
| "loss": 0.4676, |
| "step": 20240 |
| }, |
| { |
| "epoch": 0.763200542720386, |
| "grad_norm": 1.6567069219014103, |
| "learning_rate": 1.6136176140677368e-06, |
| "loss": 0.4783, |
| "step": 20250 |
| }, |
| { |
| "epoch": 0.7635774318772849, |
| "grad_norm": 1.4557357506093793, |
| "learning_rate": 1.6087808134291593e-06, |
| "loss": 0.4823, |
| "step": 20260 |
| }, |
| { |
| "epoch": 0.7639543210341838, |
| "grad_norm": 1.876565177528593, |
| "learning_rate": 1.6039498825901568e-06, |
| "loss": 0.4774, |
| "step": 20270 |
| }, |
| { |
| "epoch": 0.7643312101910829, |
| "grad_norm": 1.7263871602732919, |
| "learning_rate": 1.5991248299124978e-06, |
| "loss": 0.4738, |
| "step": 20280 |
| }, |
| { |
| "epoch": 0.7647080993479818, |
| "grad_norm": 1.6090188313058575, |
| "learning_rate": 1.5943056637477804e-06, |
| "loss": 0.4567, |
| "step": 20290 |
| }, |
| { |
| "epoch": 0.7650849885048807, |
| "grad_norm": 1.8159775563602787, |
| "learning_rate": 1.5894923924374077e-06, |
| "loss": 0.4996, |
| "step": 20300 |
| }, |
| { |
| "epoch": 0.7654618776617796, |
| "grad_norm": 1.694825125868995, |
| "learning_rate": 1.5846850243125856e-06, |
| "loss": 0.467, |
| "step": 20310 |
| }, |
| { |
| "epoch": 0.7658387668186786, |
| "grad_norm": 1.4834845730262398, |
| "learning_rate": 1.5798835676942976e-06, |
| "loss": 0.4733, |
| "step": 20320 |
| }, |
| { |
| "epoch": 0.7662156559755776, |
| "grad_norm": 1.6457346282769572, |
| "learning_rate": 1.5750880308933036e-06, |
| "loss": 0.4775, |
| "step": 20330 |
| }, |
| { |
| "epoch": 0.7665925451324765, |
| "grad_norm": 1.6332831296059798, |
| "learning_rate": 1.5702984222101053e-06, |
| "loss": 0.4598, |
| "step": 20340 |
| }, |
| { |
| "epoch": 0.7669694342893755, |
| "grad_norm": 2.0230516472784417, |
| "learning_rate": 1.565514749934951e-06, |
| "loss": 0.501, |
| "step": 20350 |
| }, |
| { |
| "epoch": 0.7673463234462744, |
| "grad_norm": 1.9624821551932368, |
| "learning_rate": 1.5607370223478118e-06, |
| "loss": 0.4863, |
| "step": 20360 |
| }, |
| { |
| "epoch": 0.7677232126031734, |
| "grad_norm": 1.3909923765612713, |
| "learning_rate": 1.5559652477183702e-06, |
| "loss": 0.4592, |
| "step": 20370 |
| }, |
| { |
| "epoch": 0.7681001017600724, |
| "grad_norm": 1.6465814513557648, |
| "learning_rate": 1.5511994343060033e-06, |
| "loss": 0.4755, |
| "step": 20380 |
| }, |
| { |
| "epoch": 0.7684769909169713, |
| "grad_norm": 1.610226051279983, |
| "learning_rate": 1.5464395903597713e-06, |
| "loss": 0.4469, |
| "step": 20390 |
| }, |
| { |
| "epoch": 0.7688538800738702, |
| "grad_norm": 1.7453869621269993, |
| "learning_rate": 1.5416857241184007e-06, |
| "loss": 0.4547, |
| "step": 20400 |
| }, |
| { |
| "epoch": 0.7692307692307693, |
| "grad_norm": 1.310381314751475, |
| "learning_rate": 1.5369378438102728e-06, |
| "loss": 0.4668, |
| "step": 20410 |
| }, |
| { |
| "epoch": 0.7696076583876682, |
| "grad_norm": 1.6872384131950084, |
| "learning_rate": 1.5321959576534073e-06, |
| "loss": 0.5217, |
| "step": 20420 |
| }, |
| { |
| "epoch": 0.7699845475445671, |
| "grad_norm": 1.6894678298188206, |
| "learning_rate": 1.527460073855448e-06, |
| "loss": 0.4665, |
| "step": 20430 |
| }, |
| { |
| "epoch": 0.7703614367014661, |
| "grad_norm": 1.8097958205422997, |
| "learning_rate": 1.52273020061365e-06, |
| "loss": 0.4564, |
| "step": 20440 |
| }, |
| { |
| "epoch": 0.7707383258583651, |
| "grad_norm": 1.7701718906608808, |
| "learning_rate": 1.5180063461148675e-06, |
| "loss": 0.4664, |
| "step": 20450 |
| }, |
| { |
| "epoch": 0.771115215015264, |
| "grad_norm": 1.2961421273522296, |
| "learning_rate": 1.5132885185355294e-06, |
| "loss": 0.4527, |
| "step": 20460 |
| }, |
| { |
| "epoch": 0.771492104172163, |
| "grad_norm": 1.8199021031458331, |
| "learning_rate": 1.5085767260416396e-06, |
| "loss": 0.4945, |
| "step": 20470 |
| }, |
| { |
| "epoch": 0.7718689933290619, |
| "grad_norm": 1.5671269899021723, |
| "learning_rate": 1.5038709767887548e-06, |
| "loss": 0.4816, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.7722458824859609, |
| "grad_norm": 1.818867610498672, |
| "learning_rate": 1.4991712789219714e-06, |
| "loss": 0.4778, |
| "step": 20490 |
| }, |
| { |
| "epoch": 0.7726227716428599, |
| "grad_norm": 1.5948143698764703, |
| "learning_rate": 1.4944776405759115e-06, |
| "loss": 0.4652, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.7729996607997588, |
| "grad_norm": 1.4821620142605674, |
| "learning_rate": 1.4897900698747047e-06, |
| "loss": 0.4422, |
| "step": 20510 |
| }, |
| { |
| "epoch": 0.7733765499566577, |
| "grad_norm": 1.8116026121890576, |
| "learning_rate": 1.4851085749319827e-06, |
| "loss": 0.469, |
| "step": 20520 |
| }, |
| { |
| "epoch": 0.7737534391135567, |
| "grad_norm": 1.523784502143446, |
| "learning_rate": 1.4804331638508623e-06, |
| "loss": 0.4773, |
| "step": 20530 |
| }, |
| { |
| "epoch": 0.7741303282704557, |
| "grad_norm": 1.8206430937956362, |
| "learning_rate": 1.4757638447239276e-06, |
| "loss": 0.4724, |
| "step": 20540 |
| }, |
| { |
| "epoch": 0.7745072174273546, |
| "grad_norm": 1.7588855241689347, |
| "learning_rate": 1.4711006256332156e-06, |
| "loss": 0.4382, |
| "step": 20550 |
| }, |
| { |
| "epoch": 0.7748841065842536, |
| "grad_norm": 1.584901601395231, |
| "learning_rate": 1.4664435146502083e-06, |
| "loss": 0.4674, |
| "step": 20560 |
| }, |
| { |
| "epoch": 0.7752609957411525, |
| "grad_norm": 1.7973543634379379, |
| "learning_rate": 1.461792519835814e-06, |
| "loss": 0.4826, |
| "step": 20570 |
| }, |
| { |
| "epoch": 0.7756378848980515, |
| "grad_norm": 1.673141519768556, |
| "learning_rate": 1.4571476492403563e-06, |
| "loss": 0.4753, |
| "step": 20580 |
| }, |
| { |
| "epoch": 0.7760147740549505, |
| "grad_norm": 1.5795162978217505, |
| "learning_rate": 1.452508910903556e-06, |
| "loss": 0.4965, |
| "step": 20590 |
| }, |
| { |
| "epoch": 0.7763916632118494, |
| "grad_norm": 1.71797844150418, |
| "learning_rate": 1.447876312854521e-06, |
| "loss": 0.466, |
| "step": 20600 |
| }, |
| { |
| "epoch": 0.7767685523687483, |
| "grad_norm": 1.6656630817840206, |
| "learning_rate": 1.4432498631117314e-06, |
| "loss": 0.43, |
| "step": 20610 |
| }, |
| { |
| "epoch": 0.7771454415256474, |
| "grad_norm": 1.5139791631000281, |
| "learning_rate": 1.438629569683025e-06, |
| "loss": 0.4855, |
| "step": 20620 |
| }, |
| { |
| "epoch": 0.7775223306825463, |
| "grad_norm": 1.6906826740177294, |
| "learning_rate": 1.4340154405655826e-06, |
| "loss": 0.456, |
| "step": 20630 |
| }, |
| { |
| "epoch": 0.7778992198394452, |
| "grad_norm": 1.6698594166197502, |
| "learning_rate": 1.4294074837459177e-06, |
| "loss": 0.4912, |
| "step": 20640 |
| }, |
| { |
| "epoch": 0.7782761089963441, |
| "grad_norm": 2.250059784898189, |
| "learning_rate": 1.4248057071998578e-06, |
| "loss": 0.4998, |
| "step": 20650 |
| }, |
| { |
| "epoch": 0.7786529981532432, |
| "grad_norm": 1.575246221565116, |
| "learning_rate": 1.420210118892536e-06, |
| "loss": 0.4768, |
| "step": 20660 |
| }, |
| { |
| "epoch": 0.7790298873101421, |
| "grad_norm": 1.5597959754124424, |
| "learning_rate": 1.4156207267783679e-06, |
| "loss": 0.4652, |
| "step": 20670 |
| }, |
| { |
| "epoch": 0.779406776467041, |
| "grad_norm": 1.628610962575096, |
| "learning_rate": 1.4110375388010538e-06, |
| "loss": 0.4876, |
| "step": 20680 |
| }, |
| { |
| "epoch": 0.77978366562394, |
| "grad_norm": 1.644374962717556, |
| "learning_rate": 1.4064605628935479e-06, |
| "loss": 0.4679, |
| "step": 20690 |
| }, |
| { |
| "epoch": 0.780160554780839, |
| "grad_norm": 1.8253088878455022, |
| "learning_rate": 1.4018898069780572e-06, |
| "loss": 0.4772, |
| "step": 20700 |
| }, |
| { |
| "epoch": 0.7805374439377379, |
| "grad_norm": 1.5207862619368617, |
| "learning_rate": 1.3973252789660158e-06, |
| "loss": 0.4574, |
| "step": 20710 |
| }, |
| { |
| "epoch": 0.7809143330946369, |
| "grad_norm": 1.7050624400247496, |
| "learning_rate": 1.3927669867580845e-06, |
| "loss": 0.4554, |
| "step": 20720 |
| }, |
| { |
| "epoch": 0.7812912222515358, |
| "grad_norm": 1.9273191609007996, |
| "learning_rate": 1.3882149382441262e-06, |
| "loss": 0.5077, |
| "step": 20730 |
| }, |
| { |
| "epoch": 0.7816681114084347, |
| "grad_norm": 1.4282728575268597, |
| "learning_rate": 1.3836691413032045e-06, |
| "loss": 0.4596, |
| "step": 20740 |
| }, |
| { |
| "epoch": 0.7820450005653338, |
| "grad_norm": 1.5766792437516781, |
| "learning_rate": 1.37912960380355e-06, |
| "loss": 0.475, |
| "step": 20750 |
| }, |
| { |
| "epoch": 0.7824218897222327, |
| "grad_norm": 1.588717645461143, |
| "learning_rate": 1.3745963336025692e-06, |
| "loss": 0.4701, |
| "step": 20760 |
| }, |
| { |
| "epoch": 0.7827987788791316, |
| "grad_norm": 1.628374898696195, |
| "learning_rate": 1.3700693385468156e-06, |
| "loss": 0.467, |
| "step": 20770 |
| }, |
| { |
| "epoch": 0.7831756680360306, |
| "grad_norm": 1.8101040525930028, |
| "learning_rate": 1.3655486264719832e-06, |
| "loss": 0.4655, |
| "step": 20780 |
| }, |
| { |
| "epoch": 0.7835525571929296, |
| "grad_norm": 1.5363813815541065, |
| "learning_rate": 1.3610342052028897e-06, |
| "loss": 0.4629, |
| "step": 20790 |
| }, |
| { |
| "epoch": 0.7839294463498285, |
| "grad_norm": 1.7639661489622709, |
| "learning_rate": 1.3565260825534653e-06, |
| "loss": 0.4502, |
| "step": 20800 |
| }, |
| { |
| "epoch": 0.7843063355067275, |
| "grad_norm": 1.5089833035960092, |
| "learning_rate": 1.3520242663267375e-06, |
| "loss": 0.4871, |
| "step": 20810 |
| }, |
| { |
| "epoch": 0.7846832246636264, |
| "grad_norm": 1.5073945769296926, |
| "learning_rate": 1.3475287643148178e-06, |
| "loss": 0.4786, |
| "step": 20820 |
| }, |
| { |
| "epoch": 0.7850601138205254, |
| "grad_norm": 1.5526068937170885, |
| "learning_rate": 1.3430395842988886e-06, |
| "loss": 0.4549, |
| "step": 20830 |
| }, |
| { |
| "epoch": 0.7854370029774244, |
| "grad_norm": 1.4425597365312917, |
| "learning_rate": 1.3385567340491901e-06, |
| "loss": 0.453, |
| "step": 20840 |
| }, |
| { |
| "epoch": 0.7858138921343233, |
| "grad_norm": 1.6718319416849392, |
| "learning_rate": 1.334080221325006e-06, |
| "loss": 0.4755, |
| "step": 20850 |
| }, |
| { |
| "epoch": 0.7861907812912222, |
| "grad_norm": 1.4577265197030453, |
| "learning_rate": 1.3296100538746514e-06, |
| "loss": 0.4454, |
| "step": 20860 |
| }, |
| { |
| "epoch": 0.7865676704481213, |
| "grad_norm": 1.3692713929909994, |
| "learning_rate": 1.3251462394354585e-06, |
| "loss": 0.4927, |
| "step": 20870 |
| }, |
| { |
| "epoch": 0.7869445596050202, |
| "grad_norm": 1.6442519408672833, |
| "learning_rate": 1.3206887857337586e-06, |
| "loss": 0.4492, |
| "step": 20880 |
| }, |
| { |
| "epoch": 0.7873214487619191, |
| "grad_norm": 1.6104969996274061, |
| "learning_rate": 1.3162377004848814e-06, |
| "loss": 0.4515, |
| "step": 20890 |
| }, |
| { |
| "epoch": 0.787698337918818, |
| "grad_norm": 1.7066857405575866, |
| "learning_rate": 1.3117929913931277e-06, |
| "loss": 0.4553, |
| "step": 20900 |
| }, |
| { |
| "epoch": 0.788075227075717, |
| "grad_norm": 1.731131533765537, |
| "learning_rate": 1.3073546661517655e-06, |
| "loss": 0.4822, |
| "step": 20910 |
| }, |
| { |
| "epoch": 0.788452116232616, |
| "grad_norm": 1.696849854193515, |
| "learning_rate": 1.3029227324430077e-06, |
| "loss": 0.4536, |
| "step": 20920 |
| }, |
| { |
| "epoch": 0.788829005389515, |
| "grad_norm": 1.6266173084112765, |
| "learning_rate": 1.298497197938008e-06, |
| "loss": 0.4464, |
| "step": 20930 |
| }, |
| { |
| "epoch": 0.7892058945464139, |
| "grad_norm": 1.5230400863153157, |
| "learning_rate": 1.2940780702968464e-06, |
| "loss": 0.4838, |
| "step": 20940 |
| }, |
| { |
| "epoch": 0.7895827837033128, |
| "grad_norm": 1.805355977994606, |
| "learning_rate": 1.2896653571685108e-06, |
| "loss": 0.4808, |
| "step": 20950 |
| }, |
| { |
| "epoch": 0.7899596728602118, |
| "grad_norm": 1.6563216644658791, |
| "learning_rate": 1.2852590661908826e-06, |
| "loss": 0.4793, |
| "step": 20960 |
| }, |
| { |
| "epoch": 0.7903365620171108, |
| "grad_norm": 1.7324681232443127, |
| "learning_rate": 1.280859204990732e-06, |
| "loss": 0.4659, |
| "step": 20970 |
| }, |
| { |
| "epoch": 0.7907134511740097, |
| "grad_norm": 1.6887405218479397, |
| "learning_rate": 1.2764657811836995e-06, |
| "loss": 0.4829, |
| "step": 20980 |
| }, |
| { |
| "epoch": 0.7910903403309086, |
| "grad_norm": 1.723570674808702, |
| "learning_rate": 1.2720788023742819e-06, |
| "loss": 0.495, |
| "step": 20990 |
| }, |
| { |
| "epoch": 0.7914672294878077, |
| "grad_norm": 1.7913544321855928, |
| "learning_rate": 1.267698276155821e-06, |
| "loss": 0.4626, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.7918441186447066, |
| "grad_norm": 1.7911338951212774, |
| "learning_rate": 1.2633242101104904e-06, |
| "loss": 0.4577, |
| "step": 21010 |
| }, |
| { |
| "epoch": 0.7922210078016055, |
| "grad_norm": 1.8879029948128403, |
| "learning_rate": 1.2589566118092805e-06, |
| "loss": 0.5013, |
| "step": 21020 |
| }, |
| { |
| "epoch": 0.7925978969585045, |
| "grad_norm": 1.602586178784185, |
| "learning_rate": 1.2545954888119882e-06, |
| "loss": 0.4872, |
| "step": 21030 |
| }, |
| { |
| "epoch": 0.7929747861154035, |
| "grad_norm": 1.816604176967041, |
| "learning_rate": 1.2502408486672018e-06, |
| "loss": 0.4898, |
| "step": 21040 |
| }, |
| { |
| "epoch": 0.7933516752723024, |
| "grad_norm": 1.6554081568794934, |
| "learning_rate": 1.2458926989122894e-06, |
| "loss": 0.4739, |
| "step": 21050 |
| }, |
| { |
| "epoch": 0.7937285644292014, |
| "grad_norm": 1.8212477680147816, |
| "learning_rate": 1.2415510470733832e-06, |
| "loss": 0.4574, |
| "step": 21060 |
| }, |
| { |
| "epoch": 0.7941054535861003, |
| "grad_norm": 1.2942280081366884, |
| "learning_rate": 1.2372159006653711e-06, |
| "loss": 0.4775, |
| "step": 21070 |
| }, |
| { |
| "epoch": 0.7944823427429993, |
| "grad_norm": 1.7198494071850723, |
| "learning_rate": 1.2328872671918752e-06, |
| "loss": 0.5035, |
| "step": 21080 |
| }, |
| { |
| "epoch": 0.7948592318998983, |
| "grad_norm": 1.717727877702444, |
| "learning_rate": 1.2285651541452526e-06, |
| "loss": 0.4602, |
| "step": 21090 |
| }, |
| { |
| "epoch": 0.7952361210567972, |
| "grad_norm": 1.7030104890295072, |
| "learning_rate": 1.2242495690065687e-06, |
| "loss": 0.4563, |
| "step": 21100 |
| }, |
| { |
| "epoch": 0.7956130102136961, |
| "grad_norm": 1.550857418455602, |
| "learning_rate": 1.219940519245592e-06, |
| "loss": 0.4906, |
| "step": 21110 |
| }, |
| { |
| "epoch": 0.795989899370595, |
| "grad_norm": 1.1895997553169184, |
| "learning_rate": 1.2156380123207761e-06, |
| "loss": 0.476, |
| "step": 21120 |
| }, |
| { |
| "epoch": 0.7963667885274941, |
| "grad_norm": 1.6911416575004463, |
| "learning_rate": 1.2113420556792539e-06, |
| "loss": 0.4591, |
| "step": 21130 |
| }, |
| { |
| "epoch": 0.796743677684393, |
| "grad_norm": 1.439704510030527, |
| "learning_rate": 1.2070526567568164e-06, |
| "loss": 0.4489, |
| "step": 21140 |
| }, |
| { |
| "epoch": 0.797120566841292, |
| "grad_norm": 1.6272324810392764, |
| "learning_rate": 1.20276982297791e-06, |
| "loss": 0.4763, |
| "step": 21150 |
| }, |
| { |
| "epoch": 0.7974974559981909, |
| "grad_norm": 1.661794252836065, |
| "learning_rate": 1.1984935617556104e-06, |
| "loss": 0.4652, |
| "step": 21160 |
| }, |
| { |
| "epoch": 0.7978743451550899, |
| "grad_norm": 1.6161485954397128, |
| "learning_rate": 1.1942238804916213e-06, |
| "loss": 0.478, |
| "step": 21170 |
| }, |
| { |
| "epoch": 0.7982512343119889, |
| "grad_norm": 1.5127230860148504, |
| "learning_rate": 1.1899607865762563e-06, |
| "loss": 0.4879, |
| "step": 21180 |
| }, |
| { |
| "epoch": 0.7986281234688878, |
| "grad_norm": 1.6559970652967313, |
| "learning_rate": 1.1857042873884272e-06, |
| "loss": 0.4557, |
| "step": 21190 |
| }, |
| { |
| "epoch": 0.7990050126257867, |
| "grad_norm": 1.6399527198328363, |
| "learning_rate": 1.181454390295631e-06, |
| "loss": 0.4691, |
| "step": 21200 |
| }, |
| { |
| "epoch": 0.7993819017826858, |
| "grad_norm": 1.5670234998624126, |
| "learning_rate": 1.1772111026539374e-06, |
| "loss": 0.454, |
| "step": 21210 |
| }, |
| { |
| "epoch": 0.7997587909395847, |
| "grad_norm": 1.7735935156287603, |
| "learning_rate": 1.172974431807975e-06, |
| "loss": 0.4637, |
| "step": 21220 |
| }, |
| { |
| "epoch": 0.8001356800964836, |
| "grad_norm": 1.6014800132057636, |
| "learning_rate": 1.1687443850909208e-06, |
| "loss": 0.4736, |
| "step": 21230 |
| }, |
| { |
| "epoch": 0.8005125692533825, |
| "grad_norm": 1.63595133372094, |
| "learning_rate": 1.1645209698244857e-06, |
| "loss": 0.4642, |
| "step": 21240 |
| }, |
| { |
| "epoch": 0.8008894584102816, |
| "grad_norm": 1.7792664084505718, |
| "learning_rate": 1.1603041933189024e-06, |
| "loss": 0.5004, |
| "step": 21250 |
| }, |
| { |
| "epoch": 0.8012663475671805, |
| "grad_norm": 1.7417841561819936, |
| "learning_rate": 1.1560940628729129e-06, |
| "loss": 0.4851, |
| "step": 21260 |
| }, |
| { |
| "epoch": 0.8016432367240794, |
| "grad_norm": 1.7474085529041983, |
| "learning_rate": 1.1518905857737544e-06, |
| "loss": 0.506, |
| "step": 21270 |
| }, |
| { |
| "epoch": 0.8020201258809784, |
| "grad_norm": 1.4496625205949163, |
| "learning_rate": 1.1476937692971508e-06, |
| "loss": 0.4375, |
| "step": 21280 |
| }, |
| { |
| "epoch": 0.8023970150378774, |
| "grad_norm": 1.6179305557255557, |
| "learning_rate": 1.1435036207072913e-06, |
| "loss": 0.448, |
| "step": 21290 |
| }, |
| { |
| "epoch": 0.8027739041947763, |
| "grad_norm": 1.6365956374761494, |
| "learning_rate": 1.1393201472568322e-06, |
| "loss": 0.4974, |
| "step": 21300 |
| }, |
| { |
| "epoch": 0.8031507933516753, |
| "grad_norm": 1.3450521505509172, |
| "learning_rate": 1.1351433561868697e-06, |
| "loss": 0.4508, |
| "step": 21310 |
| }, |
| { |
| "epoch": 0.8035276825085742, |
| "grad_norm": 1.4538600265225352, |
| "learning_rate": 1.130973254726937e-06, |
| "loss": 0.4614, |
| "step": 21320 |
| }, |
| { |
| "epoch": 0.8039045716654731, |
| "grad_norm": 1.587273756529849, |
| "learning_rate": 1.1268098500949843e-06, |
| "loss": 0.4577, |
| "step": 21330 |
| }, |
| { |
| "epoch": 0.8042814608223722, |
| "grad_norm": 1.6193327198648404, |
| "learning_rate": 1.122653149497373e-06, |
| "loss": 0.4548, |
| "step": 21340 |
| }, |
| { |
| "epoch": 0.8046583499792711, |
| "grad_norm": 1.7408766583273092, |
| "learning_rate": 1.1185031601288627e-06, |
| "loss": 0.4792, |
| "step": 21350 |
| }, |
| { |
| "epoch": 0.80503523913617, |
| "grad_norm": 1.745321600962186, |
| "learning_rate": 1.1143598891725948e-06, |
| "loss": 0.4503, |
| "step": 21360 |
| }, |
| { |
| "epoch": 0.805412128293069, |
| "grad_norm": 1.6321545726426894, |
| "learning_rate": 1.1102233438000786e-06, |
| "loss": 0.4755, |
| "step": 21370 |
| }, |
| { |
| "epoch": 0.805789017449968, |
| "grad_norm": 1.6372516624414968, |
| "learning_rate": 1.1060935311711873e-06, |
| "loss": 0.4491, |
| "step": 21380 |
| }, |
| { |
| "epoch": 0.8061659066068669, |
| "grad_norm": 1.8295447316903106, |
| "learning_rate": 1.1019704584341374e-06, |
| "loss": 0.4645, |
| "step": 21390 |
| }, |
| { |
| "epoch": 0.8065427957637659, |
| "grad_norm": 1.6650412672495543, |
| "learning_rate": 1.097854132725481e-06, |
| "loss": 0.4669, |
| "step": 21400 |
| }, |
| { |
| "epoch": 0.8069196849206648, |
| "grad_norm": 1.5629750029935003, |
| "learning_rate": 1.093744561170092e-06, |
| "loss": 0.469, |
| "step": 21410 |
| }, |
| { |
| "epoch": 0.8072965740775638, |
| "grad_norm": 1.421621390086491, |
| "learning_rate": 1.0896417508811518e-06, |
| "loss": 0.4753, |
| "step": 21420 |
| }, |
| { |
| "epoch": 0.8076734632344628, |
| "grad_norm": 1.6379330117232693, |
| "learning_rate": 1.0855457089601407e-06, |
| "loss": 0.4587, |
| "step": 21430 |
| }, |
| { |
| "epoch": 0.8080503523913617, |
| "grad_norm": 1.7407275341596244, |
| "learning_rate": 1.0814564424968226e-06, |
| "loss": 0.4966, |
| "step": 21440 |
| }, |
| { |
| "epoch": 0.8084272415482606, |
| "grad_norm": 1.846067209557093, |
| "learning_rate": 1.0773739585692356e-06, |
| "loss": 0.5118, |
| "step": 21450 |
| }, |
| { |
| "epoch": 0.8088041307051597, |
| "grad_norm": 1.456547037165189, |
| "learning_rate": 1.0732982642436757e-06, |
| "loss": 0.4466, |
| "step": 21460 |
| }, |
| { |
| "epoch": 0.8091810198620586, |
| "grad_norm": 1.7012035006791435, |
| "learning_rate": 1.0692293665746884e-06, |
| "loss": 0.4926, |
| "step": 21470 |
| }, |
| { |
| "epoch": 0.8095579090189575, |
| "grad_norm": 1.5902195620043145, |
| "learning_rate": 1.065167272605056e-06, |
| "loss": 0.4765, |
| "step": 21480 |
| }, |
| { |
| "epoch": 0.8099347981758565, |
| "grad_norm": 1.527190606227678, |
| "learning_rate": 1.061111989365779e-06, |
| "loss": 0.4694, |
| "step": 21490 |
| }, |
| { |
| "epoch": 0.8103116873327554, |
| "grad_norm": 1.6866265884761789, |
| "learning_rate": 1.0570635238760774e-06, |
| "loss": 0.4623, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.8106885764896544, |
| "grad_norm": 1.575765143637899, |
| "learning_rate": 1.0530218831433652e-06, |
| "loss": 0.4482, |
| "step": 21510 |
| }, |
| { |
| "epoch": 0.8110654656465534, |
| "grad_norm": 2.4146188153476995, |
| "learning_rate": 1.0489870741632456e-06, |
| "loss": 0.4633, |
| "step": 21520 |
| }, |
| { |
| "epoch": 0.8114423548034523, |
| "grad_norm": 1.6718557277711972, |
| "learning_rate": 1.044959103919494e-06, |
| "loss": 0.4811, |
| "step": 21530 |
| }, |
| { |
| "epoch": 0.8118192439603512, |
| "grad_norm": 1.7873992974852744, |
| "learning_rate": 1.0409379793840518e-06, |
| "loss": 0.4546, |
| "step": 21540 |
| }, |
| { |
| "epoch": 0.8121961331172503, |
| "grad_norm": 1.8256231709433406, |
| "learning_rate": 1.0369237075170091e-06, |
| "loss": 0.4743, |
| "step": 21550 |
| }, |
| { |
| "epoch": 0.8125730222741492, |
| "grad_norm": 1.6028991502176622, |
| "learning_rate": 1.0329162952666e-06, |
| "loss": 0.441, |
| "step": 21560 |
| }, |
| { |
| "epoch": 0.8129499114310481, |
| "grad_norm": 1.7523235315102614, |
| "learning_rate": 1.028915749569177e-06, |
| "loss": 0.4874, |
| "step": 21570 |
| }, |
| { |
| "epoch": 0.813326800587947, |
| "grad_norm": 1.574362879271632, |
| "learning_rate": 1.0249220773492142e-06, |
| "loss": 0.47, |
| "step": 21580 |
| }, |
| { |
| "epoch": 0.8137036897448461, |
| "grad_norm": 1.4217729969659767, |
| "learning_rate": 1.020935285519285e-06, |
| "loss": 0.4655, |
| "step": 21590 |
| }, |
| { |
| "epoch": 0.814080578901745, |
| "grad_norm": 1.7611978152515075, |
| "learning_rate": 1.0169553809800543e-06, |
| "loss": 0.4992, |
| "step": 21600 |
| }, |
| { |
| "epoch": 0.8144574680586439, |
| "grad_norm": 1.4580463296463873, |
| "learning_rate": 1.0129823706202696e-06, |
| "loss": 0.4516, |
| "step": 21610 |
| }, |
| { |
| "epoch": 0.8148343572155429, |
| "grad_norm": 1.5284529560123907, |
| "learning_rate": 1.0090162613167393e-06, |
| "loss": 0.4668, |
| "step": 21620 |
| }, |
| { |
| "epoch": 0.8152112463724419, |
| "grad_norm": 1.6642481846748813, |
| "learning_rate": 1.0050570599343302e-06, |
| "loss": 0.4533, |
| "step": 21630 |
| }, |
| { |
| "epoch": 0.8155881355293408, |
| "grad_norm": 1.3683583910657175, |
| "learning_rate": 1.0011047733259521e-06, |
| "loss": 0.4597, |
| "step": 21640 |
| }, |
| { |
| "epoch": 0.8159650246862398, |
| "grad_norm": 1.8875658078070612, |
| "learning_rate": 9.97159408332547e-07, |
| "loss": 0.4782, |
| "step": 21650 |
| }, |
| { |
| "epoch": 0.8163419138431387, |
| "grad_norm": 1.692047018964554, |
| "learning_rate": 9.932209717830744e-07, |
| "loss": 0.462, |
| "step": 21660 |
| }, |
| { |
| "epoch": 0.8167188030000377, |
| "grad_norm": 1.5378872143686608, |
| "learning_rate": 9.892894704945022e-07, |
| "loss": 0.4656, |
| "step": 21670 |
| }, |
| { |
| "epoch": 0.8170956921569367, |
| "grad_norm": 1.5613665579925253, |
| "learning_rate": 9.85364911271795e-07, |
| "loss": 0.476, |
| "step": 21680 |
| }, |
| { |
| "epoch": 0.8174725813138356, |
| "grad_norm": 1.6137324346165267, |
| "learning_rate": 9.814473009079017e-07, |
| "loss": 0.5071, |
| "step": 21690 |
| }, |
| { |
| "epoch": 0.8178494704707345, |
| "grad_norm": 1.614130393792285, |
| "learning_rate": 9.7753664618374e-07, |
| "loss": 0.4664, |
| "step": 21700 |
| }, |
| { |
| "epoch": 0.8182263596276335, |
| "grad_norm": 1.6240132524422568, |
| "learning_rate": 9.736329538681932e-07, |
| "loss": 0.4683, |
| "step": 21710 |
| }, |
| { |
| "epoch": 0.8186032487845325, |
| "grad_norm": 1.5296419317199343, |
| "learning_rate": 9.697362307180918e-07, |
| "loss": 0.4939, |
| "step": 21720 |
| }, |
| { |
| "epoch": 0.8189801379414314, |
| "grad_norm": 1.5321812462004734, |
| "learning_rate": 9.658464834782033e-07, |
| "loss": 0.4778, |
| "step": 21730 |
| }, |
| { |
| "epoch": 0.8193570270983304, |
| "grad_norm": 1.5185831568058594, |
| "learning_rate": 9.619637188812175e-07, |
| "loss": 0.46, |
| "step": 21740 |
| }, |
| { |
| "epoch": 0.8197339162552293, |
| "grad_norm": 1.6821769094096455, |
| "learning_rate": 9.58087943647743e-07, |
| "loss": 0.4879, |
| "step": 21750 |
| }, |
| { |
| "epoch": 0.8201108054121283, |
| "grad_norm": 1.467098506919944, |
| "learning_rate": 9.542191644862869e-07, |
| "loss": 0.4338, |
| "step": 21760 |
| }, |
| { |
| "epoch": 0.8204876945690273, |
| "grad_norm": 1.5832840990797274, |
| "learning_rate": 9.503573880932527e-07, |
| "loss": 0.4617, |
| "step": 21770 |
| }, |
| { |
| "epoch": 0.8208645837259262, |
| "grad_norm": 1.4725123019965634, |
| "learning_rate": 9.465026211529149e-07, |
| "loss": 0.4591, |
| "step": 21780 |
| }, |
| { |
| "epoch": 0.8212414728828251, |
| "grad_norm": 1.4443596976324486, |
| "learning_rate": 9.42654870337421e-07, |
| "loss": 0.4723, |
| "step": 21790 |
| }, |
| { |
| "epoch": 0.8216183620397242, |
| "grad_norm": 1.7715020744201238, |
| "learning_rate": 9.38814142306772e-07, |
| "loss": 0.4414, |
| "step": 21800 |
| }, |
| { |
| "epoch": 0.8219952511966231, |
| "grad_norm": 1.6263211713573036, |
| "learning_rate": 9.349804437088155e-07, |
| "loss": 0.4657, |
| "step": 21810 |
| }, |
| { |
| "epoch": 0.822372140353522, |
| "grad_norm": 2.675858370147406, |
| "learning_rate": 9.311537811792299e-07, |
| "loss": 0.4547, |
| "step": 21820 |
| }, |
| { |
| "epoch": 0.822749029510421, |
| "grad_norm": 1.6989825689942237, |
| "learning_rate": 9.273341613415155e-07, |
| "loss": 0.4486, |
| "step": 21830 |
| }, |
| { |
| "epoch": 0.82312591866732, |
| "grad_norm": 1.758883283595281, |
| "learning_rate": 9.235215908069828e-07, |
| "loss": 0.4752, |
| "step": 21840 |
| }, |
| { |
| "epoch": 0.8235028078242189, |
| "grad_norm": 1.6500955266665545, |
| "learning_rate": 9.197160761747415e-07, |
| "loss": 0.4578, |
| "step": 21850 |
| }, |
| { |
| "epoch": 0.8238796969811178, |
| "grad_norm": 1.4200589057349262, |
| "learning_rate": 9.159176240316869e-07, |
| "loss": 0.4292, |
| "step": 21860 |
| }, |
| { |
| "epoch": 0.8242565861380168, |
| "grad_norm": 1.7138258286525525, |
| "learning_rate": 9.121262409524906e-07, |
| "loss": 0.479, |
| "step": 21870 |
| }, |
| { |
| "epoch": 0.8246334752949158, |
| "grad_norm": 1.6629136940635971, |
| "learning_rate": 9.08341933499589e-07, |
| "loss": 0.4676, |
| "step": 21880 |
| }, |
| { |
| "epoch": 0.8250103644518147, |
| "grad_norm": 1.7116482891389613, |
| "learning_rate": 9.045647082231729e-07, |
| "loss": 0.4717, |
| "step": 21890 |
| }, |
| { |
| "epoch": 0.8253872536087137, |
| "grad_norm": 1.695232561294525, |
| "learning_rate": 9.007945716611688e-07, |
| "loss": 0.4766, |
| "step": 21900 |
| }, |
| { |
| "epoch": 0.8257641427656126, |
| "grad_norm": 1.7002498190688085, |
| "learning_rate": 8.970315303392379e-07, |
| "loss": 0.4901, |
| "step": 21910 |
| }, |
| { |
| "epoch": 0.8261410319225115, |
| "grad_norm": 1.492022987501469, |
| "learning_rate": 8.93275590770763e-07, |
| "loss": 0.4444, |
| "step": 21920 |
| }, |
| { |
| "epoch": 0.8265179210794106, |
| "grad_norm": 1.7137434822176278, |
| "learning_rate": 8.895267594568302e-07, |
| "loss": 0.4595, |
| "step": 21930 |
| }, |
| { |
| "epoch": 0.8268948102363095, |
| "grad_norm": 1.5218510147047475, |
| "learning_rate": 8.857850428862241e-07, |
| "loss": 0.4631, |
| "step": 21940 |
| }, |
| { |
| "epoch": 0.8272716993932084, |
| "grad_norm": 1.8826119986337635, |
| "learning_rate": 8.820504475354119e-07, |
| "loss": 0.5034, |
| "step": 21950 |
| }, |
| { |
| "epoch": 0.8276485885501074, |
| "grad_norm": 1.5920446251275133, |
| "learning_rate": 8.783229798685361e-07, |
| "loss": 0.467, |
| "step": 21960 |
| }, |
| { |
| "epoch": 0.8280254777070064, |
| "grad_norm": 1.7063227060767632, |
| "learning_rate": 8.746026463374058e-07, |
| "loss": 0.4541, |
| "step": 21970 |
| }, |
| { |
| "epoch": 0.8284023668639053, |
| "grad_norm": 1.618446816633231, |
| "learning_rate": 8.708894533814788e-07, |
| "loss": 0.4466, |
| "step": 21980 |
| }, |
| { |
| "epoch": 0.8287792560208043, |
| "grad_norm": 1.7677621425455639, |
| "learning_rate": 8.671834074278496e-07, |
| "loss": 0.4488, |
| "step": 21990 |
| }, |
| { |
| "epoch": 0.8291561451777032, |
| "grad_norm": 1.8004192808155495, |
| "learning_rate": 8.63484514891248e-07, |
| "loss": 0.4781, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.8295330343346022, |
| "grad_norm": 1.638331409201194, |
| "learning_rate": 8.597927821740188e-07, |
| "loss": 0.4847, |
| "step": 22010 |
| }, |
| { |
| "epoch": 0.8299099234915012, |
| "grad_norm": 1.547508824093798, |
| "learning_rate": 8.56108215666116e-07, |
| "loss": 0.467, |
| "step": 22020 |
| }, |
| { |
| "epoch": 0.8302868126484001, |
| "grad_norm": 1.6865585478017953, |
| "learning_rate": 8.524308217450883e-07, |
| "loss": 0.4976, |
| "step": 22030 |
| }, |
| { |
| "epoch": 0.830663701805299, |
| "grad_norm": 1.5407631720098116, |
| "learning_rate": 8.487606067760695e-07, |
| "loss": 0.4503, |
| "step": 22040 |
| }, |
| { |
| "epoch": 0.8310405909621981, |
| "grad_norm": 1.7278683855838521, |
| "learning_rate": 8.450975771117686e-07, |
| "loss": 0.4766, |
| "step": 22050 |
| }, |
| { |
| "epoch": 0.831417480119097, |
| "grad_norm": 1.6881225655115248, |
| "learning_rate": 8.414417390924567e-07, |
| "loss": 0.4529, |
| "step": 22060 |
| }, |
| { |
| "epoch": 0.8317943692759959, |
| "grad_norm": 1.5343097557556282, |
| "learning_rate": 8.37793099045957e-07, |
| "loss": 0.4654, |
| "step": 22070 |
| }, |
| { |
| "epoch": 0.8321712584328949, |
| "grad_norm": 1.5443651240466079, |
| "learning_rate": 8.341516632876345e-07, |
| "loss": 0.4725, |
| "step": 22080 |
| }, |
| { |
| "epoch": 0.8325481475897939, |
| "grad_norm": 1.709840635782194, |
| "learning_rate": 8.30517438120384e-07, |
| "loss": 0.4648, |
| "step": 22090 |
| }, |
| { |
| "epoch": 0.8329250367466928, |
| "grad_norm": 1.734867377598363, |
| "learning_rate": 8.268904298346215e-07, |
| "loss": 0.4862, |
| "step": 22100 |
| }, |
| { |
| "epoch": 0.8333019259035918, |
| "grad_norm": 1.5853579920685308, |
| "learning_rate": 8.232706447082644e-07, |
| "loss": 0.4554, |
| "step": 22110 |
| }, |
| { |
| "epoch": 0.8336788150604907, |
| "grad_norm": 1.8218704126887049, |
| "learning_rate": 8.196580890067379e-07, |
| "loss": 0.4712, |
| "step": 22120 |
| }, |
| { |
| "epoch": 0.8340557042173896, |
| "grad_norm": 1.7644284565478414, |
| "learning_rate": 8.160527689829473e-07, |
| "loss": 0.5028, |
| "step": 22130 |
| }, |
| { |
| "epoch": 0.8344325933742887, |
| "grad_norm": 1.6857508469766067, |
| "learning_rate": 8.124546908772768e-07, |
| "loss": 0.4622, |
| "step": 22140 |
| }, |
| { |
| "epoch": 0.8348094825311876, |
| "grad_norm": 1.6855492767191518, |
| "learning_rate": 8.088638609175719e-07, |
| "loss": 0.4948, |
| "step": 22150 |
| }, |
| { |
| "epoch": 0.8351863716880865, |
| "grad_norm": 1.3978162670227476, |
| "learning_rate": 8.052802853191355e-07, |
| "loss": 0.4743, |
| "step": 22160 |
| }, |
| { |
| "epoch": 0.8355632608449854, |
| "grad_norm": 1.6951740065317433, |
| "learning_rate": 8.01703970284713e-07, |
| "loss": 0.4754, |
| "step": 22170 |
| }, |
| { |
| "epoch": 0.8359401500018845, |
| "grad_norm": 1.7573161295973543, |
| "learning_rate": 7.98134922004486e-07, |
| "loss": 0.4871, |
| "step": 22180 |
| }, |
| { |
| "epoch": 0.8363170391587834, |
| "grad_norm": 1.4918859167709155, |
| "learning_rate": 7.945731466560519e-07, |
| "loss": 0.4794, |
| "step": 22190 |
| }, |
| { |
| "epoch": 0.8366939283156823, |
| "grad_norm": 1.7049198529331042, |
| "learning_rate": 7.910186504044237e-07, |
| "loss": 0.4611, |
| "step": 22200 |
| }, |
| { |
| "epoch": 0.8370708174725813, |
| "grad_norm": 1.4292962627440216, |
| "learning_rate": 7.874714394020145e-07, |
| "loss": 0.4175, |
| "step": 22210 |
| }, |
| { |
| "epoch": 0.8374477066294803, |
| "grad_norm": 2.0016660592227145, |
| "learning_rate": 7.839315197886277e-07, |
| "loss": 0.468, |
| "step": 22220 |
| }, |
| { |
| "epoch": 0.8378245957863792, |
| "grad_norm": 1.8041549181742595, |
| "learning_rate": 7.803988976914451e-07, |
| "loss": 0.4661, |
| "step": 22230 |
| }, |
| { |
| "epoch": 0.8382014849432782, |
| "grad_norm": 1.793146229523631, |
| "learning_rate": 7.768735792250176e-07, |
| "loss": 0.4874, |
| "step": 22240 |
| }, |
| { |
| "epoch": 0.8385783741001771, |
| "grad_norm": 1.8151183016027572, |
| "learning_rate": 7.73355570491256e-07, |
| "loss": 0.4688, |
| "step": 22250 |
| }, |
| { |
| "epoch": 0.8389552632570761, |
| "grad_norm": 1.6753537342016527, |
| "learning_rate": 7.698448775794171e-07, |
| "loss": 0.4918, |
| "step": 22260 |
| }, |
| { |
| "epoch": 0.8393321524139751, |
| "grad_norm": 1.5400170574876808, |
| "learning_rate": 7.663415065660951e-07, |
| "loss": 0.4734, |
| "step": 22270 |
| }, |
| { |
| "epoch": 0.839709041570874, |
| "grad_norm": 1.4967554695098977, |
| "learning_rate": 7.628454635152111e-07, |
| "loss": 0.4725, |
| "step": 22280 |
| }, |
| { |
| "epoch": 0.8400859307277729, |
| "grad_norm": 1.8581940585235912, |
| "learning_rate": 7.593567544780028e-07, |
| "loss": 0.4813, |
| "step": 22290 |
| }, |
| { |
| "epoch": 0.8404628198846719, |
| "grad_norm": 1.7617654233248232, |
| "learning_rate": 7.558753854930129e-07, |
| "loss": 0.4798, |
| "step": 22300 |
| }, |
| { |
| "epoch": 0.8408397090415709, |
| "grad_norm": 1.8228739549825173, |
| "learning_rate": 7.52401362586081e-07, |
| "loss": 0.4661, |
| "step": 22310 |
| }, |
| { |
| "epoch": 0.8412165981984698, |
| "grad_norm": 1.5249651201885777, |
| "learning_rate": 7.489346917703261e-07, |
| "loss": 0.4881, |
| "step": 22320 |
| }, |
| { |
| "epoch": 0.8415934873553688, |
| "grad_norm": 1.6186808625257767, |
| "learning_rate": 7.4547537904615e-07, |
| "loss": 0.4733, |
| "step": 22330 |
| }, |
| { |
| "epoch": 0.8419703765122677, |
| "grad_norm": 2.203606849350494, |
| "learning_rate": 7.420234304012119e-07, |
| "loss": 0.4488, |
| "step": 22340 |
| }, |
| { |
| "epoch": 0.8423472656691667, |
| "grad_norm": 1.791129399689499, |
| "learning_rate": 7.385788518104287e-07, |
| "loss": 0.4419, |
| "step": 22350 |
| }, |
| { |
| "epoch": 0.8427241548260657, |
| "grad_norm": 1.586850556241108, |
| "learning_rate": 7.351416492359564e-07, |
| "loss": 0.4632, |
| "step": 22360 |
| }, |
| { |
| "epoch": 0.8431010439829646, |
| "grad_norm": 1.4777261662520846, |
| "learning_rate": 7.317118286271869e-07, |
| "loss": 0.4428, |
| "step": 22370 |
| }, |
| { |
| "epoch": 0.8434779331398635, |
| "grad_norm": 1.6300982151377412, |
| "learning_rate": 7.282893959207354e-07, |
| "loss": 0.4844, |
| "step": 22380 |
| }, |
| { |
| "epoch": 0.8438548222967626, |
| "grad_norm": 1.7666022912394777, |
| "learning_rate": 7.248743570404293e-07, |
| "loss": 0.4825, |
| "step": 22390 |
| }, |
| { |
| "epoch": 0.8442317114536615, |
| "grad_norm": 1.5994223812698616, |
| "learning_rate": 7.214667178972951e-07, |
| "loss": 0.4678, |
| "step": 22400 |
| }, |
| { |
| "epoch": 0.8446086006105604, |
| "grad_norm": 1.5930748274830833, |
| "learning_rate": 7.180664843895536e-07, |
| "loss": 0.4825, |
| "step": 22410 |
| }, |
| { |
| "epoch": 0.8449854897674594, |
| "grad_norm": 1.8952726942929914, |
| "learning_rate": 7.146736624026073e-07, |
| "loss": 0.4619, |
| "step": 22420 |
| }, |
| { |
| "epoch": 0.8453623789243584, |
| "grad_norm": 1.6535903612300786, |
| "learning_rate": 7.112882578090308e-07, |
| "loss": 0.442, |
| "step": 22430 |
| }, |
| { |
| "epoch": 0.8457392680812573, |
| "grad_norm": 1.8113816961588787, |
| "learning_rate": 7.079102764685592e-07, |
| "loss": 0.4727, |
| "step": 22440 |
| }, |
| { |
| "epoch": 0.8461161572381563, |
| "grad_norm": 1.6381559303129163, |
| "learning_rate": 7.045397242280782e-07, |
| "loss": 0.4739, |
| "step": 22450 |
| }, |
| { |
| "epoch": 0.8464930463950552, |
| "grad_norm": 1.6420241463187408, |
| "learning_rate": 7.011766069216153e-07, |
| "loss": 0.4728, |
| "step": 22460 |
| }, |
| { |
| "epoch": 0.8468699355519542, |
| "grad_norm": 1.8579503334291299, |
| "learning_rate": 6.978209303703298e-07, |
| "loss": 0.467, |
| "step": 22470 |
| }, |
| { |
| "epoch": 0.8472468247088532, |
| "grad_norm": 1.8303598632267788, |
| "learning_rate": 6.944727003825014e-07, |
| "loss": 0.459, |
| "step": 22480 |
| }, |
| { |
| "epoch": 0.8476237138657521, |
| "grad_norm": 1.6128953601572278, |
| "learning_rate": 6.91131922753519e-07, |
| "loss": 0.4592, |
| "step": 22490 |
| }, |
| { |
| "epoch": 0.848000603022651, |
| "grad_norm": 1.7095865691859484, |
| "learning_rate": 6.877986032658751e-07, |
| "loss": 0.48, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.8483774921795499, |
| "grad_norm": 1.4938798824764543, |
| "learning_rate": 6.844727476891521e-07, |
| "loss": 0.4781, |
| "step": 22510 |
| }, |
| { |
| "epoch": 0.848754381336449, |
| "grad_norm": 1.6485957619451577, |
| "learning_rate": 6.811543617800104e-07, |
| "loss": 0.5045, |
| "step": 22520 |
| }, |
| { |
| "epoch": 0.8491312704933479, |
| "grad_norm": 1.837620635777043, |
| "learning_rate": 6.778434512821863e-07, |
| "loss": 0.4748, |
| "step": 22530 |
| }, |
| { |
| "epoch": 0.8495081596502468, |
| "grad_norm": 1.4879964061685897, |
| "learning_rate": 6.745400219264736e-07, |
| "loss": 0.4304, |
| "step": 22540 |
| }, |
| { |
| "epoch": 0.8498850488071458, |
| "grad_norm": 1.7156819983043117, |
| "learning_rate": 6.712440794307191e-07, |
| "loss": 0.466, |
| "step": 22550 |
| }, |
| { |
| "epoch": 0.8502619379640448, |
| "grad_norm": 1.3621122589006487, |
| "learning_rate": 6.67955629499808e-07, |
| "loss": 0.4704, |
| "step": 22560 |
| }, |
| { |
| "epoch": 0.8506388271209437, |
| "grad_norm": 2.0534498348755834, |
| "learning_rate": 6.646746778256591e-07, |
| "loss": 0.4854, |
| "step": 22570 |
| }, |
| { |
| "epoch": 0.8510157162778427, |
| "grad_norm": 1.6005946712317962, |
| "learning_rate": 6.614012300872108e-07, |
| "loss": 0.4695, |
| "step": 22580 |
| }, |
| { |
| "epoch": 0.8513926054347416, |
| "grad_norm": 1.5414012999559674, |
| "learning_rate": 6.581352919504175e-07, |
| "loss": 0.4707, |
| "step": 22590 |
| }, |
| { |
| "epoch": 0.8517694945916406, |
| "grad_norm": 1.8269075507153945, |
| "learning_rate": 6.548768690682295e-07, |
| "loss": 0.4661, |
| "step": 22600 |
| }, |
| { |
| "epoch": 0.8521463837485396, |
| "grad_norm": 1.8104476310135393, |
| "learning_rate": 6.516259670805914e-07, |
| "loss": 0.4715, |
| "step": 22610 |
| }, |
| { |
| "epoch": 0.8525232729054385, |
| "grad_norm": 1.5462248619686105, |
| "learning_rate": 6.483825916144315e-07, |
| "loss": 0.4634, |
| "step": 22620 |
| }, |
| { |
| "epoch": 0.8529001620623374, |
| "grad_norm": 1.7572378321726936, |
| "learning_rate": 6.451467482836493e-07, |
| "loss": 0.47, |
| "step": 22630 |
| }, |
| { |
| "epoch": 0.8532770512192365, |
| "grad_norm": 1.719401134457696, |
| "learning_rate": 6.419184426891062e-07, |
| "loss": 0.4417, |
| "step": 22640 |
| }, |
| { |
| "epoch": 0.8536539403761354, |
| "grad_norm": 1.8799617216498543, |
| "learning_rate": 6.386976804186185e-07, |
| "loss": 0.4543, |
| "step": 22650 |
| }, |
| { |
| "epoch": 0.8540308295330343, |
| "grad_norm": 1.7409813880394684, |
| "learning_rate": 6.354844670469446e-07, |
| "loss": 0.4555, |
| "step": 22660 |
| }, |
| { |
| "epoch": 0.8544077186899333, |
| "grad_norm": 1.7233766286902579, |
| "learning_rate": 6.322788081357767e-07, |
| "loss": 0.4894, |
| "step": 22670 |
| }, |
| { |
| "epoch": 0.8547846078468323, |
| "grad_norm": 1.822979772670787, |
| "learning_rate": 6.290807092337325e-07, |
| "loss": 0.456, |
| "step": 22680 |
| }, |
| { |
| "epoch": 0.8551614970037312, |
| "grad_norm": 1.7840609931799256, |
| "learning_rate": 6.258901758763425e-07, |
| "loss": 0.4467, |
| "step": 22690 |
| }, |
| { |
| "epoch": 0.8555383861606302, |
| "grad_norm": 1.7330476182654266, |
| "learning_rate": 6.227072135860424e-07, |
| "loss": 0.4887, |
| "step": 22700 |
| }, |
| { |
| "epoch": 0.8559152753175291, |
| "grad_norm": 1.634410872367023, |
| "learning_rate": 6.195318278721646e-07, |
| "loss": 0.4589, |
| "step": 22710 |
| }, |
| { |
| "epoch": 0.856292164474428, |
| "grad_norm": 1.8487043942748613, |
| "learning_rate": 6.163640242309271e-07, |
| "loss": 0.4514, |
| "step": 22720 |
| }, |
| { |
| "epoch": 0.8566690536313271, |
| "grad_norm": 1.6693062741828186, |
| "learning_rate": 6.132038081454206e-07, |
| "loss": 0.4654, |
| "step": 22730 |
| }, |
| { |
| "epoch": 0.857045942788226, |
| "grad_norm": 1.6243325415551528, |
| "learning_rate": 6.100511850856083e-07, |
| "loss": 0.4877, |
| "step": 22740 |
| }, |
| { |
| "epoch": 0.8574228319451249, |
| "grad_norm": 1.76930759153765, |
| "learning_rate": 6.069061605083076e-07, |
| "loss": 0.4678, |
| "step": 22750 |
| }, |
| { |
| "epoch": 0.8577997211020238, |
| "grad_norm": 1.6289028881940613, |
| "learning_rate": 6.037687398571846e-07, |
| "loss": 0.4247, |
| "step": 22760 |
| }, |
| { |
| "epoch": 0.8581766102589229, |
| "grad_norm": 1.6696350756219092, |
| "learning_rate": 6.006389285627423e-07, |
| "loss": 0.4883, |
| "step": 22770 |
| }, |
| { |
| "epoch": 0.8585534994158218, |
| "grad_norm": 1.513884778231805, |
| "learning_rate": 5.975167320423137e-07, |
| "loss": 0.4606, |
| "step": 22780 |
| }, |
| { |
| "epoch": 0.8589303885727207, |
| "grad_norm": 1.6184179790577693, |
| "learning_rate": 5.94402155700054e-07, |
| "loss": 0.4408, |
| "step": 22790 |
| }, |
| { |
| "epoch": 0.8593072777296197, |
| "grad_norm": 1.8267950570439317, |
| "learning_rate": 5.912952049269271e-07, |
| "loss": 0.4638, |
| "step": 22800 |
| }, |
| { |
| "epoch": 0.8596841668865187, |
| "grad_norm": 1.411486088307513, |
| "learning_rate": 5.881958851006952e-07, |
| "loss": 0.4581, |
| "step": 22810 |
| }, |
| { |
| "epoch": 0.8600610560434176, |
| "grad_norm": 1.4674642682660017, |
| "learning_rate": 5.851042015859154e-07, |
| "loss": 0.4565, |
| "step": 22820 |
| }, |
| { |
| "epoch": 0.8604379452003166, |
| "grad_norm": 1.3939142639115625, |
| "learning_rate": 5.82020159733927e-07, |
| "loss": 0.4704, |
| "step": 22830 |
| }, |
| { |
| "epoch": 0.8608148343572155, |
| "grad_norm": 1.5415763619326572, |
| "learning_rate": 5.789437648828411e-07, |
| "loss": 0.433, |
| "step": 22840 |
| }, |
| { |
| "epoch": 0.8611917235141145, |
| "grad_norm": 1.6303887934059382, |
| "learning_rate": 5.758750223575344e-07, |
| "loss": 0.4747, |
| "step": 22850 |
| }, |
| { |
| "epoch": 0.8615686126710135, |
| "grad_norm": 1.6198622243082812, |
| "learning_rate": 5.728139374696368e-07, |
| "loss": 0.4741, |
| "step": 22860 |
| }, |
| { |
| "epoch": 0.8619455018279124, |
| "grad_norm": 1.6527922202005432, |
| "learning_rate": 5.697605155175246e-07, |
| "loss": 0.441, |
| "step": 22870 |
| }, |
| { |
| "epoch": 0.8623223909848113, |
| "grad_norm": 1.6598779751043562, |
| "learning_rate": 5.667147617863106e-07, |
| "loss": 0.465, |
| "step": 22880 |
| }, |
| { |
| "epoch": 0.8626992801417104, |
| "grad_norm": 1.601592197543877, |
| "learning_rate": 5.636766815478346e-07, |
| "loss": 0.4664, |
| "step": 22890 |
| }, |
| { |
| "epoch": 0.8630761692986093, |
| "grad_norm": 1.8518020978894572, |
| "learning_rate": 5.606462800606538e-07, |
| "loss": 0.4765, |
| "step": 22900 |
| }, |
| { |
| "epoch": 0.8634530584555082, |
| "grad_norm": 1.8569990976395905, |
| "learning_rate": 5.576235625700355e-07, |
| "loss": 0.4947, |
| "step": 22910 |
| }, |
| { |
| "epoch": 0.8638299476124072, |
| "grad_norm": 1.78143629478858, |
| "learning_rate": 5.546085343079472e-07, |
| "loss": 0.4733, |
| "step": 22920 |
| }, |
| { |
| "epoch": 0.8642068367693061, |
| "grad_norm": 1.7977943755656154, |
| "learning_rate": 5.516012004930432e-07, |
| "loss": 0.4677, |
| "step": 22930 |
| }, |
| { |
| "epoch": 0.8645837259262051, |
| "grad_norm": 1.3894902301565615, |
| "learning_rate": 5.486015663306665e-07, |
| "loss": 0.4603, |
| "step": 22940 |
| }, |
| { |
| "epoch": 0.8649606150831041, |
| "grad_norm": 1.8030057987995889, |
| "learning_rate": 5.456096370128277e-07, |
| "loss": 0.438, |
| "step": 22950 |
| }, |
| { |
| "epoch": 0.865337504240003, |
| "grad_norm": 1.7403239254852352, |
| "learning_rate": 5.426254177182039e-07, |
| "loss": 0.4651, |
| "step": 22960 |
| }, |
| { |
| "epoch": 0.8657143933969019, |
| "grad_norm": 1.5972874364344118, |
| "learning_rate": 5.396489136121241e-07, |
| "loss": 0.4621, |
| "step": 22970 |
| }, |
| { |
| "epoch": 0.866091282553801, |
| "grad_norm": 1.9303613177891243, |
| "learning_rate": 5.366801298465662e-07, |
| "loss": 0.4713, |
| "step": 22980 |
| }, |
| { |
| "epoch": 0.8664681717106999, |
| "grad_norm": 1.6872934044203467, |
| "learning_rate": 5.337190715601426e-07, |
| "loss": 0.4905, |
| "step": 22990 |
| }, |
| { |
| "epoch": 0.8668450608675988, |
| "grad_norm": 1.7126679035862156, |
| "learning_rate": 5.307657438780988e-07, |
| "loss": 0.4803, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.8672219500244978, |
| "grad_norm": 1.7350738496917792, |
| "learning_rate": 5.278201519122922e-07, |
| "loss": 0.4845, |
| "step": 23010 |
| }, |
| { |
| "epoch": 0.8675988391813968, |
| "grad_norm": 1.787434922425744, |
| "learning_rate": 5.248823007611964e-07, |
| "loss": 0.4893, |
| "step": 23020 |
| }, |
| { |
| "epoch": 0.8679757283382957, |
| "grad_norm": 1.872251424045816, |
| "learning_rate": 5.219521955098833e-07, |
| "loss": 0.4786, |
| "step": 23030 |
| }, |
| { |
| "epoch": 0.8683526174951947, |
| "grad_norm": 1.7715299011721564, |
| "learning_rate": 5.190298412300182e-07, |
| "loss": 0.4791, |
| "step": 23040 |
| }, |
| { |
| "epoch": 0.8687295066520936, |
| "grad_norm": 1.8357529427154649, |
| "learning_rate": 5.161152429798538e-07, |
| "loss": 0.4772, |
| "step": 23050 |
| }, |
| { |
| "epoch": 0.8691063958089926, |
| "grad_norm": 1.6077824935411622, |
| "learning_rate": 5.13208405804213e-07, |
| "loss": 0.4591, |
| "step": 23060 |
| }, |
| { |
| "epoch": 0.8694832849658916, |
| "grad_norm": 2.0708649306446567, |
| "learning_rate": 5.103093347344872e-07, |
| "loss": 0.4645, |
| "step": 23070 |
| }, |
| { |
| "epoch": 0.8698601741227905, |
| "grad_norm": 1.4566190018720826, |
| "learning_rate": 5.074180347886265e-07, |
| "loss": 0.4395, |
| "step": 23080 |
| }, |
| { |
| "epoch": 0.8702370632796894, |
| "grad_norm": 1.8556474690031441, |
| "learning_rate": 5.045345109711291e-07, |
| "loss": 0.4897, |
| "step": 23090 |
| }, |
| { |
| "epoch": 0.8706139524365883, |
| "grad_norm": 1.8191111683824606, |
| "learning_rate": 5.016587682730339e-07, |
| "loss": 0.4656, |
| "step": 23100 |
| }, |
| { |
| "epoch": 0.8709908415934874, |
| "grad_norm": 1.8020154384020783, |
| "learning_rate": 4.98790811671912e-07, |
| "loss": 0.4805, |
| "step": 23110 |
| }, |
| { |
| "epoch": 0.8713677307503863, |
| "grad_norm": 1.6434790782350468, |
| "learning_rate": 4.959306461318563e-07, |
| "loss": 0.4614, |
| "step": 23120 |
| }, |
| { |
| "epoch": 0.8717446199072852, |
| "grad_norm": 1.820609481455034, |
| "learning_rate": 4.930782766034775e-07, |
| "loss": 0.461, |
| "step": 23130 |
| }, |
| { |
| "epoch": 0.8721215090641842, |
| "grad_norm": 1.5856064167725252, |
| "learning_rate": 4.902337080238862e-07, |
| "loss": 0.4856, |
| "step": 23140 |
| }, |
| { |
| "epoch": 0.8724983982210832, |
| "grad_norm": 1.7306241882494389, |
| "learning_rate": 4.873969453166982e-07, |
| "loss": 0.4874, |
| "step": 23150 |
| }, |
| { |
| "epoch": 0.8728752873779821, |
| "grad_norm": 1.5611552008896656, |
| "learning_rate": 4.845679933920122e-07, |
| "loss": 0.4528, |
| "step": 23160 |
| }, |
| { |
| "epoch": 0.8732521765348811, |
| "grad_norm": 1.4214411570399696, |
| "learning_rate": 4.817468571464118e-07, |
| "loss": 0.4712, |
| "step": 23170 |
| }, |
| { |
| "epoch": 0.87362906569178, |
| "grad_norm": 1.4281643083118118, |
| "learning_rate": 4.789335414629481e-07, |
| "loss": 0.4506, |
| "step": 23180 |
| }, |
| { |
| "epoch": 0.874005954848679, |
| "grad_norm": 1.737558818061735, |
| "learning_rate": 4.761280512111377e-07, |
| "loss": 0.4896, |
| "step": 23190 |
| }, |
| { |
| "epoch": 0.874382844005578, |
| "grad_norm": 1.9801433378045732, |
| "learning_rate": 4.733303912469545e-07, |
| "loss": 0.4325, |
| "step": 23200 |
| }, |
| { |
| "epoch": 0.8747597331624769, |
| "grad_norm": 1.5293237221434222, |
| "learning_rate": 4.7054056641281767e-07, |
| "loss": 0.4658, |
| "step": 23210 |
| }, |
| { |
| "epoch": 0.8751366223193758, |
| "grad_norm": 1.5764509216468041, |
| "learning_rate": 4.6775858153758237e-07, |
| "loss": 0.4849, |
| "step": 23220 |
| }, |
| { |
| "epoch": 0.8755135114762749, |
| "grad_norm": 1.4828757798168408, |
| "learning_rate": 4.649844414365357e-07, |
| "loss": 0.4399, |
| "step": 23230 |
| }, |
| { |
| "epoch": 0.8758904006331738, |
| "grad_norm": 1.6081292880236426, |
| "learning_rate": 4.622181509113871e-07, |
| "loss": 0.4631, |
| "step": 23240 |
| }, |
| { |
| "epoch": 0.8762672897900727, |
| "grad_norm": 1.6761532852837226, |
| "learning_rate": 4.594597147502583e-07, |
| "loss": 0.4814, |
| "step": 23250 |
| }, |
| { |
| "epoch": 0.8766441789469717, |
| "grad_norm": 1.7471109969458456, |
| "learning_rate": 4.5670913772767665e-07, |
| "loss": 0.4681, |
| "step": 23260 |
| }, |
| { |
| "epoch": 0.8770210681038707, |
| "grad_norm": 1.670160553063871, |
| "learning_rate": 4.53966424604565e-07, |
| "loss": 0.4832, |
| "step": 23270 |
| }, |
| { |
| "epoch": 0.8773979572607696, |
| "grad_norm": 2.012186736194834, |
| "learning_rate": 4.5123158012823577e-07, |
| "loss": 0.4324, |
| "step": 23280 |
| }, |
| { |
| "epoch": 0.8777748464176686, |
| "grad_norm": 1.6439554732480879, |
| "learning_rate": 4.4850460903238193e-07, |
| "loss": 0.4924, |
| "step": 23290 |
| }, |
| { |
| "epoch": 0.8781517355745675, |
| "grad_norm": 1.5734128031410948, |
| "learning_rate": 4.4578551603706834e-07, |
| "loss": 0.479, |
| "step": 23300 |
| }, |
| { |
| "epoch": 0.8785286247314664, |
| "grad_norm": 1.5385928479066775, |
| "learning_rate": 4.4307430584872247e-07, |
| "loss": 0.4493, |
| "step": 23310 |
| }, |
| { |
| "epoch": 0.8789055138883655, |
| "grad_norm": 1.495012993416791, |
| "learning_rate": 4.403709831601299e-07, |
| "loss": 0.4394, |
| "step": 23320 |
| }, |
| { |
| "epoch": 0.8792824030452644, |
| "grad_norm": 1.7809672218234174, |
| "learning_rate": 4.3767555265042283e-07, |
| "loss": 0.4492, |
| "step": 23330 |
| }, |
| { |
| "epoch": 0.8796592922021633, |
| "grad_norm": 1.7530873048425202, |
| "learning_rate": 4.3498801898507027e-07, |
| "loss": 0.4556, |
| "step": 23340 |
| }, |
| { |
| "epoch": 0.8800361813590623, |
| "grad_norm": 1.6880143734094333, |
| "learning_rate": 4.323083868158784e-07, |
| "loss": 0.4908, |
| "step": 23350 |
| }, |
| { |
| "epoch": 0.8804130705159613, |
| "grad_norm": 1.7763398568413087, |
| "learning_rate": 4.2963666078097267e-07, |
| "loss": 0.464, |
| "step": 23360 |
| }, |
| { |
| "epoch": 0.8807899596728602, |
| "grad_norm": 1.534011001228688, |
| "learning_rate": 4.26972845504795e-07, |
| "loss": 0.466, |
| "step": 23370 |
| }, |
| { |
| "epoch": 0.8811668488297592, |
| "grad_norm": 1.6994612004248872, |
| "learning_rate": 4.2431694559809554e-07, |
| "loss": 0.5048, |
| "step": 23380 |
| }, |
| { |
| "epoch": 0.8815437379866581, |
| "grad_norm": 1.7973858705958985, |
| "learning_rate": 4.216689656579215e-07, |
| "loss": 0.4866, |
| "step": 23390 |
| }, |
| { |
| "epoch": 0.8819206271435571, |
| "grad_norm": 1.7439539082800413, |
| "learning_rate": 4.1902891026761316e-07, |
| "loss": 0.4917, |
| "step": 23400 |
| }, |
| { |
| "epoch": 0.882297516300456, |
| "grad_norm": 1.8633764405786568, |
| "learning_rate": 4.1639678399679586e-07, |
| "loss": 0.474, |
| "step": 23410 |
| }, |
| { |
| "epoch": 0.882674405457355, |
| "grad_norm": 3.0062093045952927, |
| "learning_rate": 4.137725914013696e-07, |
| "loss": 0.4931, |
| "step": 23420 |
| }, |
| { |
| "epoch": 0.8830512946142539, |
| "grad_norm": 1.6834986673559424, |
| "learning_rate": 4.1115633702349943e-07, |
| "loss": 0.4785, |
| "step": 23430 |
| }, |
| { |
| "epoch": 0.883428183771153, |
| "grad_norm": 1.528153535189198, |
| "learning_rate": 4.0854802539161353e-07, |
| "loss": 0.4653, |
| "step": 23440 |
| }, |
| { |
| "epoch": 0.8838050729280519, |
| "grad_norm": 1.9539698847148672, |
| "learning_rate": 4.0594766102039e-07, |
| "loss": 0.4624, |
| "step": 23450 |
| }, |
| { |
| "epoch": 0.8841819620849508, |
| "grad_norm": 1.8627808627003384, |
| "learning_rate": 4.03355248410755e-07, |
| "loss": 0.4597, |
| "step": 23460 |
| }, |
| { |
| "epoch": 0.8845588512418497, |
| "grad_norm": 1.8122593075205515, |
| "learning_rate": 4.007707920498649e-07, |
| "loss": 0.456, |
| "step": 23470 |
| }, |
| { |
| "epoch": 0.8849357403987488, |
| "grad_norm": 1.6711500055529624, |
| "learning_rate": 3.9819429641111074e-07, |
| "loss": 0.4732, |
| "step": 23480 |
| }, |
| { |
| "epoch": 0.8853126295556477, |
| "grad_norm": 1.5192643064244584, |
| "learning_rate": 3.956257659541002e-07, |
| "loss": 0.468, |
| "step": 23490 |
| }, |
| { |
| "epoch": 0.8856895187125466, |
| "grad_norm": 1.813662608056834, |
| "learning_rate": 3.93065205124657e-07, |
| "loss": 0.4752, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.8860664078694456, |
| "grad_norm": 1.7914587106316096, |
| "learning_rate": 3.905126183548086e-07, |
| "loss": 0.45, |
| "step": 23510 |
| }, |
| { |
| "epoch": 0.8864432970263445, |
| "grad_norm": 1.634445231205134, |
| "learning_rate": 3.879680100627814e-07, |
| "loss": 0.4823, |
| "step": 23520 |
| }, |
| { |
| "epoch": 0.8868201861832435, |
| "grad_norm": 1.5155533043912437, |
| "learning_rate": 3.854313846529917e-07, |
| "loss": 0.4738, |
| "step": 23530 |
| }, |
| { |
| "epoch": 0.8871970753401425, |
| "grad_norm": 1.3069686366746593, |
| "learning_rate": 3.8290274651603844e-07, |
| "loss": 0.4295, |
| "step": 23540 |
| }, |
| { |
| "epoch": 0.8875739644970414, |
| "grad_norm": 1.6900219347834147, |
| "learning_rate": 3.803821000286939e-07, |
| "loss": 0.4713, |
| "step": 23550 |
| }, |
| { |
| "epoch": 0.8879508536539403, |
| "grad_norm": 1.7207203874610701, |
| "learning_rate": 3.7786944955390094e-07, |
| "loss": 0.4833, |
| "step": 23560 |
| }, |
| { |
| "epoch": 0.8883277428108394, |
| "grad_norm": 1.6381226224767984, |
| "learning_rate": 3.7536479944075946e-07, |
| "loss": 0.4789, |
| "step": 23570 |
| }, |
| { |
| "epoch": 0.8887046319677383, |
| "grad_norm": 1.468117460132038, |
| "learning_rate": 3.7286815402452436e-07, |
| "loss": 0.4666, |
| "step": 23580 |
| }, |
| { |
| "epoch": 0.8890815211246372, |
| "grad_norm": 1.5096914213423873, |
| "learning_rate": 3.703795176265912e-07, |
| "loss": 0.4633, |
| "step": 23590 |
| }, |
| { |
| "epoch": 0.8894584102815362, |
| "grad_norm": 1.8984329064476526, |
| "learning_rate": 3.678988945544976e-07, |
| "loss": 0.4806, |
| "step": 23600 |
| }, |
| { |
| "epoch": 0.8898352994384352, |
| "grad_norm": 1.941908868591099, |
| "learning_rate": 3.654262891019067e-07, |
| "loss": 0.4769, |
| "step": 23610 |
| }, |
| { |
| "epoch": 0.8902121885953341, |
| "grad_norm": 1.8348751421543537, |
| "learning_rate": 3.6296170554860954e-07, |
| "loss": 0.4828, |
| "step": 23620 |
| }, |
| { |
| "epoch": 0.8905890777522331, |
| "grad_norm": 1.5016426776021727, |
| "learning_rate": 3.60505148160506e-07, |
| "loss": 0.4548, |
| "step": 23630 |
| }, |
| { |
| "epoch": 0.890965966909132, |
| "grad_norm": 1.7145005520740269, |
| "learning_rate": 3.5805662118960747e-07, |
| "loss": 0.5038, |
| "step": 23640 |
| }, |
| { |
| "epoch": 0.891342856066031, |
| "grad_norm": 1.705443534960991, |
| "learning_rate": 3.5561612887402565e-07, |
| "loss": 0.4417, |
| "step": 23650 |
| }, |
| { |
| "epoch": 0.89171974522293, |
| "grad_norm": 1.4890162691299267, |
| "learning_rate": 3.531836754379625e-07, |
| "loss": 0.462, |
| "step": 23660 |
| }, |
| { |
| "epoch": 0.8920966343798289, |
| "grad_norm": 3.142113368721328, |
| "learning_rate": 3.507592650917091e-07, |
| "loss": 0.5031, |
| "step": 23670 |
| }, |
| { |
| "epoch": 0.8924735235367278, |
| "grad_norm": 1.6644517980326772, |
| "learning_rate": 3.48342902031632e-07, |
| "loss": 0.4791, |
| "step": 23680 |
| }, |
| { |
| "epoch": 0.8928504126936267, |
| "grad_norm": 1.7717700337165432, |
| "learning_rate": 3.459345904401712e-07, |
| "loss": 0.4588, |
| "step": 23690 |
| }, |
| { |
| "epoch": 0.8932273018505258, |
| "grad_norm": 1.7866048077368766, |
| "learning_rate": 3.435343344858283e-07, |
| "loss": 0.4523, |
| "step": 23700 |
| }, |
| { |
| "epoch": 0.8936041910074247, |
| "grad_norm": 1.6579103062766696, |
| "learning_rate": 3.411421383231628e-07, |
| "loss": 0.4613, |
| "step": 23710 |
| }, |
| { |
| "epoch": 0.8939810801643236, |
| "grad_norm": 1.2395669709919346, |
| "learning_rate": 3.387580060927842e-07, |
| "loss": 0.4635, |
| "step": 23720 |
| }, |
| { |
| "epoch": 0.8943579693212226, |
| "grad_norm": 1.637561199613229, |
| "learning_rate": 3.363819419213432e-07, |
| "loss": 0.4709, |
| "step": 23730 |
| }, |
| { |
| "epoch": 0.8947348584781216, |
| "grad_norm": 1.3235058842010576, |
| "learning_rate": 3.3401394992152615e-07, |
| "loss": 0.4477, |
| "step": 23740 |
| }, |
| { |
| "epoch": 0.8951117476350205, |
| "grad_norm": 1.9666315503654384, |
| "learning_rate": 3.316540341920477e-07, |
| "loss": 0.5028, |
| "step": 23750 |
| }, |
| { |
| "epoch": 0.8954886367919195, |
| "grad_norm": 1.5304868239681946, |
| "learning_rate": 3.293021988176409e-07, |
| "loss": 0.494, |
| "step": 23760 |
| }, |
| { |
| "epoch": 0.8958655259488184, |
| "grad_norm": 1.7286366317517883, |
| "learning_rate": 3.269584478690574e-07, |
| "loss": 0.4889, |
| "step": 23770 |
| }, |
| { |
| "epoch": 0.8962424151057174, |
| "grad_norm": 1.3521612831885137, |
| "learning_rate": 3.2462278540305205e-07, |
| "loss": 0.4572, |
| "step": 23780 |
| }, |
| { |
| "epoch": 0.8966193042626164, |
| "grad_norm": 1.5149236125626533, |
| "learning_rate": 3.2229521546238097e-07, |
| "loss": 0.4456, |
| "step": 23790 |
| }, |
| { |
| "epoch": 0.8969961934195153, |
| "grad_norm": 1.673486048314399, |
| "learning_rate": 3.199757420757915e-07, |
| "loss": 0.4624, |
| "step": 23800 |
| }, |
| { |
| "epoch": 0.8973730825764142, |
| "grad_norm": 1.6567774362547631, |
| "learning_rate": 3.176643692580184e-07, |
| "loss": 0.4809, |
| "step": 23810 |
| }, |
| { |
| "epoch": 0.8977499717333133, |
| "grad_norm": 1.8358040993755154, |
| "learning_rate": 3.1536110100977514e-07, |
| "loss": 0.4801, |
| "step": 23820 |
| }, |
| { |
| "epoch": 0.8981268608902122, |
| "grad_norm": 1.6459275011239465, |
| "learning_rate": 3.130659413177478e-07, |
| "loss": 0.4345, |
| "step": 23830 |
| }, |
| { |
| "epoch": 0.8985037500471111, |
| "grad_norm": 1.8349612600952543, |
| "learning_rate": 3.107788941545842e-07, |
| "loss": 0.4614, |
| "step": 23840 |
| }, |
| { |
| "epoch": 0.8988806392040101, |
| "grad_norm": 1.7411885119540658, |
| "learning_rate": 3.0849996347889434e-07, |
| "loss": 0.4839, |
| "step": 23850 |
| }, |
| { |
| "epoch": 0.8992575283609091, |
| "grad_norm": 1.9747342875954943, |
| "learning_rate": 3.0622915323523683e-07, |
| "loss": 0.472, |
| "step": 23860 |
| }, |
| { |
| "epoch": 0.899634417517808, |
| "grad_norm": 1.5726613697693645, |
| "learning_rate": 3.039664673541165e-07, |
| "loss": 0.4805, |
| "step": 23870 |
| }, |
| { |
| "epoch": 0.900011306674707, |
| "grad_norm": 1.7695993345825292, |
| "learning_rate": 3.0171190975197553e-07, |
| "loss": 0.4646, |
| "step": 23880 |
| }, |
| { |
| "epoch": 0.9003881958316059, |
| "grad_norm": 1.6476558150869725, |
| "learning_rate": 2.994654843311856e-07, |
| "loss": 0.4738, |
| "step": 23890 |
| }, |
| { |
| "epoch": 0.9007650849885048, |
| "grad_norm": 1.839852845526703, |
| "learning_rate": 2.972271949800443e-07, |
| "loss": 0.4871, |
| "step": 23900 |
| }, |
| { |
| "epoch": 0.9011419741454039, |
| "grad_norm": 1.6296225754693838, |
| "learning_rate": 2.949970455727652e-07, |
| "loss": 0.4948, |
| "step": 23910 |
| }, |
| { |
| "epoch": 0.9015188633023028, |
| "grad_norm": 1.446451999177883, |
| "learning_rate": 2.9277503996947453e-07, |
| "loss": 0.4458, |
| "step": 23920 |
| }, |
| { |
| "epoch": 0.9018957524592017, |
| "grad_norm": 1.5091359658108487, |
| "learning_rate": 2.905611820162002e-07, |
| "loss": 0.4676, |
| "step": 23930 |
| }, |
| { |
| "epoch": 0.9022726416161007, |
| "grad_norm": 1.4935637256342174, |
| "learning_rate": 2.883554755448692e-07, |
| "loss": 0.4402, |
| "step": 23940 |
| }, |
| { |
| "epoch": 0.9026495307729997, |
| "grad_norm": 1.7255741199144814, |
| "learning_rate": 2.861579243732993e-07, |
| "loss": 0.4814, |
| "step": 23950 |
| }, |
| { |
| "epoch": 0.9030264199298986, |
| "grad_norm": 1.7337660198803386, |
| "learning_rate": 2.8396853230518993e-07, |
| "loss": 0.4631, |
| "step": 23960 |
| }, |
| { |
| "epoch": 0.9034033090867976, |
| "grad_norm": 1.8762174059045174, |
| "learning_rate": 2.8178730313012215e-07, |
| "loss": 0.4773, |
| "step": 23970 |
| }, |
| { |
| "epoch": 0.9037801982436965, |
| "grad_norm": 1.7366313753924991, |
| "learning_rate": 2.796142406235447e-07, |
| "loss": 0.4423, |
| "step": 23980 |
| }, |
| { |
| "epoch": 0.9041570874005955, |
| "grad_norm": 1.6724834106832989, |
| "learning_rate": 2.7744934854677274e-07, |
| "loss": 0.4486, |
| "step": 23990 |
| }, |
| { |
| "epoch": 0.9045339765574945, |
| "grad_norm": 1.935280413678451, |
| "learning_rate": 2.752926306469772e-07, |
| "loss": 0.4678, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.9049108657143934, |
| "grad_norm": 1.714097876276615, |
| "learning_rate": 2.731440906571825e-07, |
| "loss": 0.4606, |
| "step": 24010 |
| }, |
| { |
| "epoch": 0.9052877548712923, |
| "grad_norm": 1.4926141470560026, |
| "learning_rate": 2.710037322962572e-07, |
| "loss": 0.4681, |
| "step": 24020 |
| }, |
| { |
| "epoch": 0.9056646440281914, |
| "grad_norm": 1.722145408901395, |
| "learning_rate": 2.688715592689101e-07, |
| "loss": 0.4426, |
| "step": 24030 |
| }, |
| { |
| "epoch": 0.9060415331850903, |
| "grad_norm": 1.9291301430266703, |
| "learning_rate": 2.6674757526567895e-07, |
| "loss": 0.4764, |
| "step": 24040 |
| }, |
| { |
| "epoch": 0.9064184223419892, |
| "grad_norm": 1.7351194729472024, |
| "learning_rate": 2.646317839629292e-07, |
| "loss": 0.4556, |
| "step": 24050 |
| }, |
| { |
| "epoch": 0.9067953114988881, |
| "grad_norm": 1.3921208085094439, |
| "learning_rate": 2.625241890228464e-07, |
| "loss": 0.4437, |
| "step": 24060 |
| }, |
| { |
| "epoch": 0.9071722006557872, |
| "grad_norm": 5.211286582046159, |
| "learning_rate": 2.6042479409342734e-07, |
| "loss": 0.4792, |
| "step": 24070 |
| }, |
| { |
| "epoch": 0.9075490898126861, |
| "grad_norm": 1.6508702793082282, |
| "learning_rate": 2.5833360280847707e-07, |
| "loss": 0.4854, |
| "step": 24080 |
| }, |
| { |
| "epoch": 0.907925978969585, |
| "grad_norm": 1.5904848357115617, |
| "learning_rate": 2.562506187876007e-07, |
| "loss": 0.4795, |
| "step": 24090 |
| }, |
| { |
| "epoch": 0.908302868126484, |
| "grad_norm": 1.6275384635774426, |
| "learning_rate": 2.5417584563619647e-07, |
| "loss": 0.4781, |
| "step": 24100 |
| }, |
| { |
| "epoch": 0.9086797572833829, |
| "grad_norm": 1.5228159132695211, |
| "learning_rate": 2.521092869454528e-07, |
| "loss": 0.47, |
| "step": 24110 |
| }, |
| { |
| "epoch": 0.909056646440282, |
| "grad_norm": 1.5131657204163729, |
| "learning_rate": 2.5005094629233726e-07, |
| "loss": 0.4746, |
| "step": 24120 |
| }, |
| { |
| "epoch": 0.9094335355971809, |
| "grad_norm": 1.803558175951124, |
| "learning_rate": 2.4800082723959505e-07, |
| "loss": 0.4672, |
| "step": 24130 |
| }, |
| { |
| "epoch": 0.9098104247540798, |
| "grad_norm": 1.5532182659903644, |
| "learning_rate": 2.459589333357393e-07, |
| "loss": 0.465, |
| "step": 24140 |
| }, |
| { |
| "epoch": 0.9101873139109787, |
| "grad_norm": 1.8682246003822802, |
| "learning_rate": 2.439252681150472e-07, |
| "loss": 0.4661, |
| "step": 24150 |
| }, |
| { |
| "epoch": 0.9105642030678778, |
| "grad_norm": 1.6186148451233608, |
| "learning_rate": 2.418998350975543e-07, |
| "loss": 0.4562, |
| "step": 24160 |
| }, |
| { |
| "epoch": 0.9109410922247767, |
| "grad_norm": 1.9076729059759836, |
| "learning_rate": 2.398826377890423e-07, |
| "loss": 0.4791, |
| "step": 24170 |
| }, |
| { |
| "epoch": 0.9113179813816756, |
| "grad_norm": 1.5131142108015385, |
| "learning_rate": 2.378736796810449e-07, |
| "loss": 0.4574, |
| "step": 24180 |
| }, |
| { |
| "epoch": 0.9116948705385746, |
| "grad_norm": 1.6267367473954135, |
| "learning_rate": 2.3587296425082894e-07, |
| "loss": 0.4434, |
| "step": 24190 |
| }, |
| { |
| "epoch": 0.9120717596954736, |
| "grad_norm": 1.756928999491046, |
| "learning_rate": 2.338804949613982e-07, |
| "loss": 0.4753, |
| "step": 24200 |
| }, |
| { |
| "epoch": 0.9124486488523725, |
| "grad_norm": 1.678600318807189, |
| "learning_rate": 2.3189627526148007e-07, |
| "loss": 0.4892, |
| "step": 24210 |
| }, |
| { |
| "epoch": 0.9128255380092715, |
| "grad_norm": 1.6170139181507344, |
| "learning_rate": 2.299203085855234e-07, |
| "loss": 0.4578, |
| "step": 24220 |
| }, |
| { |
| "epoch": 0.9132024271661704, |
| "grad_norm": 1.5717138547339975, |
| "learning_rate": 2.279525983536951e-07, |
| "loss": 0.4751, |
| "step": 24230 |
| }, |
| { |
| "epoch": 0.9135793163230694, |
| "grad_norm": 1.7504585367583836, |
| "learning_rate": 2.2599314797186857e-07, |
| "loss": 0.4996, |
| "step": 24240 |
| }, |
| { |
| "epoch": 0.9139562054799684, |
| "grad_norm": 1.8589504523667113, |
| "learning_rate": 2.2404196083161968e-07, |
| "loss": 0.464, |
| "step": 24250 |
| }, |
| { |
| "epoch": 0.9143330946368673, |
| "grad_norm": 1.5549339824814492, |
| "learning_rate": 2.2209904031022356e-07, |
| "loss": 0.474, |
| "step": 24260 |
| }, |
| { |
| "epoch": 0.9147099837937662, |
| "grad_norm": 1.5431790462672454, |
| "learning_rate": 2.2016438977064624e-07, |
| "loss": 0.4688, |
| "step": 24270 |
| }, |
| { |
| "epoch": 0.9150868729506653, |
| "grad_norm": 1.5231675790646082, |
| "learning_rate": 2.18238012561538e-07, |
| "loss": 0.4459, |
| "step": 24280 |
| }, |
| { |
| "epoch": 0.9154637621075642, |
| "grad_norm": 1.213036543371949, |
| "learning_rate": 2.1631991201723102e-07, |
| "loss": 0.4214, |
| "step": 24290 |
| }, |
| { |
| "epoch": 0.9158406512644631, |
| "grad_norm": 1.6789791239635883, |
| "learning_rate": 2.1441009145773074e-07, |
| "loss": 0.4608, |
| "step": 24300 |
| }, |
| { |
| "epoch": 0.916217540421362, |
| "grad_norm": 1.646949624163508, |
| "learning_rate": 2.1250855418871008e-07, |
| "loss": 0.4597, |
| "step": 24310 |
| }, |
| { |
| "epoch": 0.916594429578261, |
| "grad_norm": 1.8335341035244128, |
| "learning_rate": 2.106153035015057e-07, |
| "loss": 0.4875, |
| "step": 24320 |
| }, |
| { |
| "epoch": 0.91697131873516, |
| "grad_norm": 1.5713652425889508, |
| "learning_rate": 2.0873034267311131e-07, |
| "loss": 0.4644, |
| "step": 24330 |
| }, |
| { |
| "epoch": 0.917348207892059, |
| "grad_norm": 1.7316298479198824, |
| "learning_rate": 2.0685367496617037e-07, |
| "loss": 0.4834, |
| "step": 24340 |
| }, |
| { |
| "epoch": 0.9177250970489579, |
| "grad_norm": 1.7723989836411413, |
| "learning_rate": 2.0498530362897283e-07, |
| "loss": 0.4794, |
| "step": 24350 |
| }, |
| { |
| "epoch": 0.9181019862058568, |
| "grad_norm": 1.5053617798457812, |
| "learning_rate": 2.031252318954502e-07, |
| "loss": 0.468, |
| "step": 24360 |
| }, |
| { |
| "epoch": 0.9184788753627559, |
| "grad_norm": 1.6353066163088765, |
| "learning_rate": 2.0127346298516426e-07, |
| "loss": 0.5002, |
| "step": 24370 |
| }, |
| { |
| "epoch": 0.9188557645196548, |
| "grad_norm": 1.6543832590665188, |
| "learning_rate": 1.9943000010331005e-07, |
| "loss": 0.4606, |
| "step": 24380 |
| }, |
| { |
| "epoch": 0.9192326536765537, |
| "grad_norm": 1.4329476172975082, |
| "learning_rate": 1.9759484644070347e-07, |
| "loss": 0.4732, |
| "step": 24390 |
| }, |
| { |
| "epoch": 0.9196095428334526, |
| "grad_norm": 1.803480078827421, |
| "learning_rate": 1.9576800517377924e-07, |
| "loss": 0.4525, |
| "step": 24400 |
| }, |
| { |
| "epoch": 0.9199864319903517, |
| "grad_norm": 1.5271475875148783, |
| "learning_rate": 1.9394947946458243e-07, |
| "loss": 0.4276, |
| "step": 24410 |
| }, |
| { |
| "epoch": 0.9203633211472506, |
| "grad_norm": 1.7436847616380555, |
| "learning_rate": 1.9213927246076623e-07, |
| "loss": 0.4601, |
| "step": 24420 |
| }, |
| { |
| "epoch": 0.9207402103041495, |
| "grad_norm": 1.7878744772222794, |
| "learning_rate": 1.9033738729558437e-07, |
| "loss": 0.4736, |
| "step": 24430 |
| }, |
| { |
| "epoch": 0.9211170994610485, |
| "grad_norm": 1.6372286383071935, |
| "learning_rate": 1.8854382708788976e-07, |
| "loss": 0.4829, |
| "step": 24440 |
| }, |
| { |
| "epoch": 0.9214939886179475, |
| "grad_norm": 1.8458554633785587, |
| "learning_rate": 1.8675859494212078e-07, |
| "loss": 0.4831, |
| "step": 24450 |
| }, |
| { |
| "epoch": 0.9218708777748464, |
| "grad_norm": 1.7303090763015152, |
| "learning_rate": 1.84981693948304e-07, |
| "loss": 0.4581, |
| "step": 24460 |
| }, |
| { |
| "epoch": 0.9222477669317454, |
| "grad_norm": 1.6324828539105698, |
| "learning_rate": 1.8321312718204477e-07, |
| "loss": 0.4652, |
| "step": 24470 |
| }, |
| { |
| "epoch": 0.9226246560886443, |
| "grad_norm": 1.6706150940449451, |
| "learning_rate": 1.8145289770452323e-07, |
| "loss": 0.4862, |
| "step": 24480 |
| }, |
| { |
| "epoch": 0.9230015452455432, |
| "grad_norm": 1.6307388216734264, |
| "learning_rate": 1.7970100856248896e-07, |
| "loss": 0.4872, |
| "step": 24490 |
| }, |
| { |
| "epoch": 0.9233784344024423, |
| "grad_norm": 1.584467065772082, |
| "learning_rate": 1.7795746278825465e-07, |
| "loss": 0.476, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.9237553235593412, |
| "grad_norm": 1.5002376627559966, |
| "learning_rate": 1.7622226339969185e-07, |
| "loss": 0.4608, |
| "step": 24510 |
| }, |
| { |
| "epoch": 0.9241322127162401, |
| "grad_norm": 1.3947526048448347, |
| "learning_rate": 1.7449541340022526e-07, |
| "loss": 0.4641, |
| "step": 24520 |
| }, |
| { |
| "epoch": 0.9245091018731391, |
| "grad_norm": 1.6798334967661213, |
| "learning_rate": 1.7277691577882892e-07, |
| "loss": 0.4469, |
| "step": 24530 |
| }, |
| { |
| "epoch": 0.9248859910300381, |
| "grad_norm": 1.589068317695499, |
| "learning_rate": 1.7106677351001855e-07, |
| "loss": 0.458, |
| "step": 24540 |
| }, |
| { |
| "epoch": 0.925262880186937, |
| "grad_norm": 1.8869275914521926, |
| "learning_rate": 1.693649895538485e-07, |
| "loss": 0.505, |
| "step": 24550 |
| }, |
| { |
| "epoch": 0.925639769343836, |
| "grad_norm": 1.9569489786208991, |
| "learning_rate": 1.6767156685590536e-07, |
| "loss": 0.451, |
| "step": 24560 |
| }, |
| { |
| "epoch": 0.9260166585007349, |
| "grad_norm": 1.6847190222460249, |
| "learning_rate": 1.65986508347305e-07, |
| "loss": 0.4609, |
| "step": 24570 |
| }, |
| { |
| "epoch": 0.9263935476576339, |
| "grad_norm": 1.8408894133770082, |
| "learning_rate": 1.6430981694468162e-07, |
| "loss": 0.4766, |
| "step": 24580 |
| }, |
| { |
| "epoch": 0.9267704368145329, |
| "grad_norm": 1.674533122441784, |
| "learning_rate": 1.626414955501926e-07, |
| "loss": 0.45, |
| "step": 24590 |
| }, |
| { |
| "epoch": 0.9271473259714318, |
| "grad_norm": 1.8741720379805382, |
| "learning_rate": 1.6098154705150416e-07, |
| "loss": 0.4661, |
| "step": 24600 |
| }, |
| { |
| "epoch": 0.9275242151283307, |
| "grad_norm": 1.733162525973201, |
| "learning_rate": 1.593299743217913e-07, |
| "loss": 0.4568, |
| "step": 24610 |
| }, |
| { |
| "epoch": 0.9279011042852298, |
| "grad_norm": 4.339443575234999, |
| "learning_rate": 1.5768678021973016e-07, |
| "loss": 0.4862, |
| "step": 24620 |
| }, |
| { |
| "epoch": 0.9282779934421287, |
| "grad_norm": 1.750943994731508, |
| "learning_rate": 1.5605196758949614e-07, |
| "loss": 0.4562, |
| "step": 24630 |
| }, |
| { |
| "epoch": 0.9286548825990276, |
| "grad_norm": 1.5174387434569223, |
| "learning_rate": 1.5442553926075687e-07, |
| "loss": 0.4362, |
| "step": 24640 |
| }, |
| { |
| "epoch": 0.9290317717559265, |
| "grad_norm": 1.724853168012421, |
| "learning_rate": 1.528074980486677e-07, |
| "loss": 0.4665, |
| "step": 24650 |
| }, |
| { |
| "epoch": 0.9294086609128256, |
| "grad_norm": 2.2911044470949418, |
| "learning_rate": 1.5119784675386607e-07, |
| "loss": 0.4655, |
| "step": 24660 |
| }, |
| { |
| "epoch": 0.9297855500697245, |
| "grad_norm": 1.7545886816782916, |
| "learning_rate": 1.4959658816246836e-07, |
| "loss": 0.4788, |
| "step": 24670 |
| }, |
| { |
| "epoch": 0.9301624392266234, |
| "grad_norm": 1.7138768298267077, |
| "learning_rate": 1.480037250460642e-07, |
| "loss": 0.4483, |
| "step": 24680 |
| }, |
| { |
| "epoch": 0.9305393283835224, |
| "grad_norm": 1.5647732024525882, |
| "learning_rate": 1.4641926016171092e-07, |
| "loss": 0.4594, |
| "step": 24690 |
| }, |
| { |
| "epoch": 0.9309162175404213, |
| "grad_norm": 1.704892085076998, |
| "learning_rate": 1.4484319625193033e-07, |
| "loss": 0.4761, |
| "step": 24700 |
| }, |
| { |
| "epoch": 0.9312931066973203, |
| "grad_norm": 1.8079933221397861, |
| "learning_rate": 1.4327553604470246e-07, |
| "loss": 0.4568, |
| "step": 24710 |
| }, |
| { |
| "epoch": 0.9316699958542193, |
| "grad_norm": 3.18841995794234, |
| "learning_rate": 1.4171628225346234e-07, |
| "loss": 0.4503, |
| "step": 24720 |
| }, |
| { |
| "epoch": 0.9320468850111182, |
| "grad_norm": 1.645937879683613, |
| "learning_rate": 1.4016543757709332e-07, |
| "loss": 0.4734, |
| "step": 24730 |
| }, |
| { |
| "epoch": 0.9324237741680171, |
| "grad_norm": 1.6358083401976105, |
| "learning_rate": 1.3862300469992484e-07, |
| "loss": 0.4707, |
| "step": 24740 |
| }, |
| { |
| "epoch": 0.9328006633249162, |
| "grad_norm": 1.6561818821247405, |
| "learning_rate": 1.3708898629172518e-07, |
| "loss": 0.4728, |
| "step": 24750 |
| }, |
| { |
| "epoch": 0.9331775524818151, |
| "grad_norm": 1.6439915924189832, |
| "learning_rate": 1.3556338500769982e-07, |
| "loss": 0.4671, |
| "step": 24760 |
| }, |
| { |
| "epoch": 0.933554441638714, |
| "grad_norm": 1.3928410136757627, |
| "learning_rate": 1.3404620348848375e-07, |
| "loss": 0.4766, |
| "step": 24770 |
| }, |
| { |
| "epoch": 0.933931330795613, |
| "grad_norm": 1.6629613706509152, |
| "learning_rate": 1.325374443601385e-07, |
| "loss": 0.4634, |
| "step": 24780 |
| }, |
| { |
| "epoch": 0.934308219952512, |
| "grad_norm": 1.3229160640821136, |
| "learning_rate": 1.310371102341479e-07, |
| "loss": 0.4527, |
| "step": 24790 |
| }, |
| { |
| "epoch": 0.9346851091094109, |
| "grad_norm": 1.6985771267573568, |
| "learning_rate": 1.2954520370741408e-07, |
| "loss": 0.4449, |
| "step": 24800 |
| }, |
| { |
| "epoch": 0.9350619982663099, |
| "grad_norm": 1.776417571410387, |
| "learning_rate": 1.280617273622492e-07, |
| "loss": 0.4278, |
| "step": 24810 |
| }, |
| { |
| "epoch": 0.9354388874232088, |
| "grad_norm": 1.9248752747557138, |
| "learning_rate": 1.2658668376637705e-07, |
| "loss": 0.4785, |
| "step": 24820 |
| }, |
| { |
| "epoch": 0.9358157765801078, |
| "grad_norm": 1.5537222976277234, |
| "learning_rate": 1.251200754729226e-07, |
| "loss": 0.4956, |
| "step": 24830 |
| }, |
| { |
| "epoch": 0.9361926657370068, |
| "grad_norm": 1.6183831830487874, |
| "learning_rate": 1.2366190502041186e-07, |
| "loss": 0.4592, |
| "step": 24840 |
| }, |
| { |
| "epoch": 0.9365695548939057, |
| "grad_norm": 1.7490907041034809, |
| "learning_rate": 1.222121749327654e-07, |
| "loss": 0.4916, |
| "step": 24850 |
| }, |
| { |
| "epoch": 0.9369464440508046, |
| "grad_norm": 1.568020359678583, |
| "learning_rate": 1.2077088771929535e-07, |
| "loss": 0.459, |
| "step": 24860 |
| }, |
| { |
| "epoch": 0.9373233332077037, |
| "grad_norm": 1.6766487321093162, |
| "learning_rate": 1.1933804587469843e-07, |
| "loss": 0.473, |
| "step": 24870 |
| }, |
| { |
| "epoch": 0.9377002223646026, |
| "grad_norm": 1.6215811602508874, |
| "learning_rate": 1.1791365187905524e-07, |
| "loss": 0.451, |
| "step": 24880 |
| }, |
| { |
| "epoch": 0.9380771115215015, |
| "grad_norm": 1.7382912119631357, |
| "learning_rate": 1.1649770819782247e-07, |
| "loss": 0.4738, |
| "step": 24890 |
| }, |
| { |
| "epoch": 0.9384540006784005, |
| "grad_norm": 1.882585550263763, |
| "learning_rate": 1.1509021728183301e-07, |
| "loss": 0.4945, |
| "step": 24900 |
| }, |
| { |
| "epoch": 0.9388308898352994, |
| "grad_norm": 1.412211827907065, |
| "learning_rate": 1.1369118156728587e-07, |
| "loss": 0.458, |
| "step": 24910 |
| }, |
| { |
| "epoch": 0.9392077789921984, |
| "grad_norm": 1.8359718454808982, |
| "learning_rate": 1.1230060347574679e-07, |
| "loss": 0.4584, |
| "step": 24920 |
| }, |
| { |
| "epoch": 0.9395846681490974, |
| "grad_norm": 1.489126957012612, |
| "learning_rate": 1.1091848541414262e-07, |
| "loss": 0.4817, |
| "step": 24930 |
| }, |
| { |
| "epoch": 0.9399615573059963, |
| "grad_norm": 2.0450896007301416, |
| "learning_rate": 1.0954482977475533e-07, |
| "loss": 0.4906, |
| "step": 24940 |
| }, |
| { |
| "epoch": 0.9403384464628952, |
| "grad_norm": 1.8209636581396178, |
| "learning_rate": 1.0817963893522132e-07, |
| "loss": 0.4844, |
| "step": 24950 |
| }, |
| { |
| "epoch": 0.9407153356197943, |
| "grad_norm": 1.708902383560193, |
| "learning_rate": 1.0682291525852484e-07, |
| "loss": 0.4711, |
| "step": 24960 |
| }, |
| { |
| "epoch": 0.9410922247766932, |
| "grad_norm": 3.052634153804913, |
| "learning_rate": 1.0547466109299298e-07, |
| "loss": 0.4866, |
| "step": 24970 |
| }, |
| { |
| "epoch": 0.9414691139335921, |
| "grad_norm": 1.6048817510008635, |
| "learning_rate": 1.0413487877229566e-07, |
| "loss": 0.4614, |
| "step": 24980 |
| }, |
| { |
| "epoch": 0.941846003090491, |
| "grad_norm": 1.6211384980618553, |
| "learning_rate": 1.0280357061543622e-07, |
| "loss": 0.496, |
| "step": 24990 |
| }, |
| { |
| "epoch": 0.9422228922473901, |
| "grad_norm": 1.7208732672554823, |
| "learning_rate": 1.0148073892675358e-07, |
| "loss": 0.4515, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.942599781404289, |
| "grad_norm": 1.242397602277574, |
| "learning_rate": 1.0016638599591122e-07, |
| "loss": 0.4522, |
| "step": 25010 |
| }, |
| { |
| "epoch": 0.942976670561188, |
| "grad_norm": 1.864373295532734, |
| "learning_rate": 9.886051409790042e-08, |
| "loss": 0.4541, |
| "step": 25020 |
| }, |
| { |
| "epoch": 0.9433535597180869, |
| "grad_norm": 1.7432924180401044, |
| "learning_rate": 9.756312549302982e-08, |
| "loss": 0.4807, |
| "step": 25030 |
| }, |
| { |
| "epoch": 0.9437304488749859, |
| "grad_norm": 1.3858687614024912, |
| "learning_rate": 9.627422242692585e-08, |
| "loss": 0.4545, |
| "step": 25040 |
| }, |
| { |
| "epoch": 0.9441073380318848, |
| "grad_norm": 1.7270495665971028, |
| "learning_rate": 9.499380713052785e-08, |
| "loss": 0.4589, |
| "step": 25050 |
| }, |
| { |
| "epoch": 0.9444842271887838, |
| "grad_norm": 1.5950305399452118, |
| "learning_rate": 9.372188182008358e-08, |
| "loss": 0.4564, |
| "step": 25060 |
| }, |
| { |
| "epoch": 0.9448611163456827, |
| "grad_norm": 1.7644110153181087, |
| "learning_rate": 9.245844869714471e-08, |
| "loss": 0.4859, |
| "step": 25070 |
| }, |
| { |
| "epoch": 0.9452380055025816, |
| "grad_norm": 1.440552445480469, |
| "learning_rate": 9.120350994856475e-08, |
| "loss": 0.4771, |
| "step": 25080 |
| }, |
| { |
| "epoch": 0.9456148946594807, |
| "grad_norm": 3.239934112076224, |
| "learning_rate": 8.995706774649504e-08, |
| "loss": 0.4731, |
| "step": 25090 |
| }, |
| { |
| "epoch": 0.9459917838163796, |
| "grad_norm": 1.9158332648079843, |
| "learning_rate": 8.87191242483787e-08, |
| "loss": 0.4597, |
| "step": 25100 |
| }, |
| { |
| "epoch": 0.9463686729732785, |
| "grad_norm": 1.5922507680686822, |
| "learning_rate": 8.748968159695004e-08, |
| "loss": 0.4316, |
| "step": 25110 |
| }, |
| { |
| "epoch": 0.9467455621301775, |
| "grad_norm": 1.792335011843171, |
| "learning_rate": 8.626874192022905e-08, |
| "loss": 0.5036, |
| "step": 25120 |
| }, |
| { |
| "epoch": 0.9471224512870765, |
| "grad_norm": 1.4714034304995909, |
| "learning_rate": 8.505630733151803e-08, |
| "loss": 0.453, |
| "step": 25130 |
| }, |
| { |
| "epoch": 0.9474993404439754, |
| "grad_norm": 1.4587868542295341, |
| "learning_rate": 8.385237992939777e-08, |
| "loss": 0.4731, |
| "step": 25140 |
| }, |
| { |
| "epoch": 0.9478762296008744, |
| "grad_norm": 1.7817219201477925, |
| "learning_rate": 8.265696179772465e-08, |
| "loss": 0.4456, |
| "step": 25150 |
| }, |
| { |
| "epoch": 0.9482531187577733, |
| "grad_norm": 1.621641692161085, |
| "learning_rate": 8.147005500562577e-08, |
| "loss": 0.4435, |
| "step": 25160 |
| }, |
| { |
| "epoch": 0.9486300079146723, |
| "grad_norm": 1.6494310365282547, |
| "learning_rate": 8.029166160749668e-08, |
| "loss": 0.4665, |
| "step": 25170 |
| }, |
| { |
| "epoch": 0.9490068970715713, |
| "grad_norm": 1.4731323966452758, |
| "learning_rate": 7.912178364299694e-08, |
| "loss": 0.4633, |
| "step": 25180 |
| }, |
| { |
| "epoch": 0.9493837862284702, |
| "grad_norm": 1.4539950989850374, |
| "learning_rate": 7.796042313704733e-08, |
| "loss": 0.4761, |
| "step": 25190 |
| }, |
| { |
| "epoch": 0.9497606753853691, |
| "grad_norm": 1.7614354811781805, |
| "learning_rate": 7.680758209982541e-08, |
| "loss": 0.4874, |
| "step": 25200 |
| }, |
| { |
| "epoch": 0.9501375645422682, |
| "grad_norm": 1.5146197995816046, |
| "learning_rate": 7.566326252676226e-08, |
| "loss": 0.4565, |
| "step": 25210 |
| }, |
| { |
| "epoch": 0.9505144536991671, |
| "grad_norm": 1.4964331021798207, |
| "learning_rate": 7.452746639854069e-08, |
| "loss": 0.4732, |
| "step": 25220 |
| }, |
| { |
| "epoch": 0.950891342856066, |
| "grad_norm": 1.6531417972845415, |
| "learning_rate": 7.34001956810898e-08, |
| "loss": 0.4539, |
| "step": 25230 |
| }, |
| { |
| "epoch": 0.951268232012965, |
| "grad_norm": 1.4443997141550056, |
| "learning_rate": 7.228145232558048e-08, |
| "loss": 0.4651, |
| "step": 25240 |
| }, |
| { |
| "epoch": 0.951645121169864, |
| "grad_norm": 1.6488863792857795, |
| "learning_rate": 7.117123826842598e-08, |
| "loss": 0.4655, |
| "step": 25250 |
| }, |
| { |
| "epoch": 0.9520220103267629, |
| "grad_norm": 2.066125366240161, |
| "learning_rate": 7.00695554312758e-08, |
| "loss": 0.4818, |
| "step": 25260 |
| }, |
| { |
| "epoch": 0.9523988994836619, |
| "grad_norm": 1.7989280362677504, |
| "learning_rate": 6.897640572101294e-08, |
| "loss": 0.4592, |
| "step": 25270 |
| }, |
| { |
| "epoch": 0.9527757886405608, |
| "grad_norm": 1.5907695066167318, |
| "learning_rate": 6.789179102974996e-08, |
| "loss": 0.4615, |
| "step": 25280 |
| }, |
| { |
| "epoch": 0.9531526777974597, |
| "grad_norm": 1.6474294146755808, |
| "learning_rate": 6.681571323482628e-08, |
| "loss": 0.4745, |
| "step": 25290 |
| }, |
| { |
| "epoch": 0.9535295669543588, |
| "grad_norm": 1.8575461910907407, |
| "learning_rate": 6.574817419880586e-08, |
| "loss": 0.4843, |
| "step": 25300 |
| }, |
| { |
| "epoch": 0.9539064561112577, |
| "grad_norm": 1.5427872527092095, |
| "learning_rate": 6.468917576947287e-08, |
| "loss": 0.4616, |
| "step": 25310 |
| }, |
| { |
| "epoch": 0.9542833452681566, |
| "grad_norm": 2.4865362259718116, |
| "learning_rate": 6.363871977982827e-08, |
| "loss": 0.4617, |
| "step": 25320 |
| }, |
| { |
| "epoch": 0.9546602344250555, |
| "grad_norm": 1.7543026874769245, |
| "learning_rate": 6.259680804808654e-08, |
| "loss": 0.4666, |
| "step": 25330 |
| }, |
| { |
| "epoch": 0.9550371235819546, |
| "grad_norm": 1.604570679456589, |
| "learning_rate": 6.156344237767453e-08, |
| "loss": 0.494, |
| "step": 25340 |
| }, |
| { |
| "epoch": 0.9554140127388535, |
| "grad_norm": 1.8669803638424052, |
| "learning_rate": 6.053862455722593e-08, |
| "loss": 0.4654, |
| "step": 25350 |
| }, |
| { |
| "epoch": 0.9557909018957524, |
| "grad_norm": 1.3581166661184105, |
| "learning_rate": 5.952235636057902e-08, |
| "loss": 0.4403, |
| "step": 25360 |
| }, |
| { |
| "epoch": 0.9561677910526514, |
| "grad_norm": 1.5400521124436184, |
| "learning_rate": 5.851463954677394e-08, |
| "loss": 0.4653, |
| "step": 25370 |
| }, |
| { |
| "epoch": 0.9565446802095504, |
| "grad_norm": 1.8105900226878286, |
| "learning_rate": 5.7515475860049354e-08, |
| "loss": 0.4874, |
| "step": 25380 |
| }, |
| { |
| "epoch": 0.9569215693664493, |
| "grad_norm": 1.4382499444470058, |
| "learning_rate": 5.652486702984017e-08, |
| "loss": 0.4648, |
| "step": 25390 |
| }, |
| { |
| "epoch": 0.9572984585233483, |
| "grad_norm": 1.7631024504492547, |
| "learning_rate": 5.554281477077206e-08, |
| "loss": 0.4704, |
| "step": 25400 |
| }, |
| { |
| "epoch": 0.9576753476802472, |
| "grad_norm": 1.6429543944439136, |
| "learning_rate": 5.456932078266197e-08, |
| "loss": 0.4655, |
| "step": 25410 |
| }, |
| { |
| "epoch": 0.9580522368371462, |
| "grad_norm": 1.6616938198698554, |
| "learning_rate": 5.360438675051316e-08, |
| "loss": 0.4773, |
| "step": 25420 |
| }, |
| { |
| "epoch": 0.9584291259940452, |
| "grad_norm": 1.7874329868405086, |
| "learning_rate": 5.264801434451239e-08, |
| "loss": 0.4466, |
| "step": 25430 |
| }, |
| { |
| "epoch": 0.9588060151509441, |
| "grad_norm": 1.2294510476080451, |
| "learning_rate": 5.170020522002661e-08, |
| "loss": 0.4711, |
| "step": 25440 |
| }, |
| { |
| "epoch": 0.959182904307843, |
| "grad_norm": 1.9210918144126727, |
| "learning_rate": 5.0760961017602415e-08, |
| "loss": 0.4725, |
| "step": 25450 |
| }, |
| { |
| "epoch": 0.9595597934647421, |
| "grad_norm": 1.8227209874755572, |
| "learning_rate": 4.983028336295881e-08, |
| "loss": 0.4476, |
| "step": 25460 |
| }, |
| { |
| "epoch": 0.959936682621641, |
| "grad_norm": 1.788350611886206, |
| "learning_rate": 4.8908173866990535e-08, |
| "loss": 0.4567, |
| "step": 25470 |
| }, |
| { |
| "epoch": 0.9603135717785399, |
| "grad_norm": 1.775020734539053, |
| "learning_rate": 4.799463412575978e-08, |
| "loss": 0.5112, |
| "step": 25480 |
| }, |
| { |
| "epoch": 0.9606904609354389, |
| "grad_norm": 1.5375809927016806, |
| "learning_rate": 4.7089665720495e-08, |
| "loss": 0.4391, |
| "step": 25490 |
| }, |
| { |
| "epoch": 0.9610673500923378, |
| "grad_norm": 1.5639240748918983, |
| "learning_rate": 4.619327021759046e-08, |
| "loss": 0.4785, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.9614442392492368, |
| "grad_norm": 1.764963080417895, |
| "learning_rate": 4.5305449168600024e-08, |
| "loss": 0.4736, |
| "step": 25510 |
| }, |
| { |
| "epoch": 0.9618211284061358, |
| "grad_norm": 1.6562005841477103, |
| "learning_rate": 4.4426204110237794e-08, |
| "loss": 0.4747, |
| "step": 25520 |
| }, |
| { |
| "epoch": 0.9621980175630347, |
| "grad_norm": 1.6196235223477182, |
| "learning_rate": 4.35555365643725e-08, |
| "loss": 0.4522, |
| "step": 25530 |
| }, |
| { |
| "epoch": 0.9625749067199336, |
| "grad_norm": 1.5383045592640703, |
| "learning_rate": 4.2693448038026996e-08, |
| "loss": 0.4704, |
| "step": 25540 |
| }, |
| { |
| "epoch": 0.9629517958768327, |
| "grad_norm": 1.8859648544559755, |
| "learning_rate": 4.1839940023374884e-08, |
| "loss": 0.4595, |
| "step": 25550 |
| }, |
| { |
| "epoch": 0.9633286850337316, |
| "grad_norm": 1.703004713270723, |
| "learning_rate": 4.0995013997736644e-08, |
| "loss": 0.4767, |
| "step": 25560 |
| }, |
| { |
| "epoch": 0.9637055741906305, |
| "grad_norm": 1.76025981546061, |
| "learning_rate": 4.015867142358076e-08, |
| "loss": 0.4687, |
| "step": 25570 |
| }, |
| { |
| "epoch": 0.9640824633475295, |
| "grad_norm": 1.9694305314081857, |
| "learning_rate": 3.933091374851594e-08, |
| "loss": 0.4558, |
| "step": 25580 |
| }, |
| { |
| "epoch": 0.9644593525044285, |
| "grad_norm": 1.3980420080759448, |
| "learning_rate": 3.8511742405293875e-08, |
| "loss": 0.4618, |
| "step": 25590 |
| }, |
| { |
| "epoch": 0.9648362416613274, |
| "grad_norm": 1.6493052417006149, |
| "learning_rate": 3.7701158811803694e-08, |
| "loss": 0.504, |
| "step": 25600 |
| }, |
| { |
| "epoch": 0.9652131308182264, |
| "grad_norm": 1.619654736340804, |
| "learning_rate": 3.6899164371068105e-08, |
| "loss": 0.4445, |
| "step": 25610 |
| }, |
| { |
| "epoch": 0.9655900199751253, |
| "grad_norm": 1.843793532222181, |
| "learning_rate": 3.610576047124614e-08, |
| "loss": 0.4668, |
| "step": 25620 |
| }, |
| { |
| "epoch": 0.9659669091320243, |
| "grad_norm": 2.1787530344860424, |
| "learning_rate": 3.5320948485625395e-08, |
| "loss": 0.4555, |
| "step": 25630 |
| }, |
| { |
| "epoch": 0.9663437982889233, |
| "grad_norm": 1.6664586339835825, |
| "learning_rate": 3.454472977262369e-08, |
| "loss": 0.4812, |
| "step": 25640 |
| }, |
| { |
| "epoch": 0.9667206874458222, |
| "grad_norm": 1.9221822760952356, |
| "learning_rate": 3.3777105675782965e-08, |
| "loss": 0.4762, |
| "step": 25650 |
| }, |
| { |
| "epoch": 0.9670975766027211, |
| "grad_norm": 1.5535350148707952, |
| "learning_rate": 3.3018077523769844e-08, |
| "loss": 0.4743, |
| "step": 25660 |
| }, |
| { |
| "epoch": 0.9674744657596202, |
| "grad_norm": 1.6826206259214103, |
| "learning_rate": 3.226764663037285e-08, |
| "loss": 0.4819, |
| "step": 25670 |
| }, |
| { |
| "epoch": 0.9678513549165191, |
| "grad_norm": 1.5372332783224485, |
| "learning_rate": 3.152581429449853e-08, |
| "loss": 0.4529, |
| "step": 25680 |
| }, |
| { |
| "epoch": 0.968228244073418, |
| "grad_norm": 1.6814181811804552, |
| "learning_rate": 3.079258180017142e-08, |
| "loss": 0.4824, |
| "step": 25690 |
| }, |
| { |
| "epoch": 0.9686051332303169, |
| "grad_norm": 1.7034619827760054, |
| "learning_rate": 3.006795041653021e-08, |
| "loss": 0.4618, |
| "step": 25700 |
| }, |
| { |
| "epoch": 0.9689820223872159, |
| "grad_norm": 2.003962281008904, |
| "learning_rate": 2.9351921397826055e-08, |
| "loss": 0.4614, |
| "step": 25710 |
| }, |
| { |
| "epoch": 0.9693589115441149, |
| "grad_norm": 1.787441426014292, |
| "learning_rate": 2.864449598342034e-08, |
| "loss": 0.4533, |
| "step": 25720 |
| }, |
| { |
| "epoch": 0.9697358007010138, |
| "grad_norm": 1.5820307731521495, |
| "learning_rate": 2.794567539778359e-08, |
| "loss": 0.4599, |
| "step": 25730 |
| }, |
| { |
| "epoch": 0.9701126898579128, |
| "grad_norm": 1.7447057484780453, |
| "learning_rate": 2.725546085049047e-08, |
| "loss": 0.4572, |
| "step": 25740 |
| }, |
| { |
| "epoch": 0.9704895790148117, |
| "grad_norm": 1.7232041715746915, |
| "learning_rate": 2.6573853536221992e-08, |
| "loss": 0.475, |
| "step": 25750 |
| }, |
| { |
| "epoch": 0.9708664681717107, |
| "grad_norm": 1.6685687883415614, |
| "learning_rate": 2.590085463475944e-08, |
| "loss": 0.4587, |
| "step": 25760 |
| }, |
| { |
| "epoch": 0.9712433573286097, |
| "grad_norm": 1.7482487866337566, |
| "learning_rate": 2.5236465310984336e-08, |
| "loss": 0.4702, |
| "step": 25770 |
| }, |
| { |
| "epoch": 0.9716202464855086, |
| "grad_norm": 1.7353631005801522, |
| "learning_rate": 2.458068671487568e-08, |
| "loss": 0.462, |
| "step": 25780 |
| }, |
| { |
| "epoch": 0.9719971356424075, |
| "grad_norm": 1.9350104216433035, |
| "learning_rate": 2.3933519981508834e-08, |
| "loss": 0.4588, |
| "step": 25790 |
| }, |
| { |
| "epoch": 0.9723740247993066, |
| "grad_norm": 1.6857080051807094, |
| "learning_rate": 2.3294966231053873e-08, |
| "loss": 0.468, |
| "step": 25800 |
| }, |
| { |
| "epoch": 0.9727509139562055, |
| "grad_norm": 1.7680489987031007, |
| "learning_rate": 2.266502656877001e-08, |
| "loss": 0.4694, |
| "step": 25810 |
| }, |
| { |
| "epoch": 0.9731278031131044, |
| "grad_norm": 1.4689352131990807, |
| "learning_rate": 2.2043702085010056e-08, |
| "loss": 0.4501, |
| "step": 25820 |
| }, |
| { |
| "epoch": 0.9735046922700034, |
| "grad_norm": 1.5690350494704872, |
| "learning_rate": 2.1430993855212635e-08, |
| "loss": 0.4789, |
| "step": 25830 |
| }, |
| { |
| "epoch": 0.9738815814269024, |
| "grad_norm": 1.4356673233784094, |
| "learning_rate": 2.0826902939903304e-08, |
| "loss": 0.4585, |
| "step": 25840 |
| }, |
| { |
| "epoch": 0.9742584705838013, |
| "grad_norm": 1.757515012502445, |
| "learning_rate": 2.023143038469233e-08, |
| "loss": 0.4944, |
| "step": 25850 |
| }, |
| { |
| "epoch": 0.9746353597407003, |
| "grad_norm": 1.6306453233507494, |
| "learning_rate": 1.9644577220271354e-08, |
| "loss": 0.4539, |
| "step": 25860 |
| }, |
| { |
| "epoch": 0.9750122488975992, |
| "grad_norm": 1.4314817446192643, |
| "learning_rate": 1.906634446241451e-08, |
| "loss": 0.4734, |
| "step": 25870 |
| }, |
| { |
| "epoch": 0.9753891380544981, |
| "grad_norm": 1.5806180685416475, |
| "learning_rate": 1.849673311197453e-08, |
| "loss": 0.4399, |
| "step": 25880 |
| }, |
| { |
| "epoch": 0.9757660272113972, |
| "grad_norm": 1.6520753364457346, |
| "learning_rate": 1.7935744154881087e-08, |
| "loss": 0.4566, |
| "step": 25890 |
| }, |
| { |
| "epoch": 0.9761429163682961, |
| "grad_norm": 1.810709467270653, |
| "learning_rate": 1.7383378562139674e-08, |
| "loss": 0.4487, |
| "step": 25900 |
| }, |
| { |
| "epoch": 0.976519805525195, |
| "grad_norm": 1.61252056021322, |
| "learning_rate": 1.683963728983051e-08, |
| "loss": 0.4715, |
| "step": 25910 |
| }, |
| { |
| "epoch": 0.976896694682094, |
| "grad_norm": 2.425481703892548, |
| "learning_rate": 1.630452127910409e-08, |
| "loss": 0.4637, |
| "step": 25920 |
| }, |
| { |
| "epoch": 0.977273583838993, |
| "grad_norm": 1.540972978097639, |
| "learning_rate": 1.5778031456184507e-08, |
| "loss": 0.4608, |
| "step": 25930 |
| }, |
| { |
| "epoch": 0.9776504729958919, |
| "grad_norm": 1.509850379830826, |
| "learning_rate": 1.5260168732362245e-08, |
| "loss": 0.4722, |
| "step": 25940 |
| }, |
| { |
| "epoch": 0.9780273621527908, |
| "grad_norm": 1.6782554143346065, |
| "learning_rate": 1.4750934003996965e-08, |
| "loss": 0.4587, |
| "step": 25950 |
| }, |
| { |
| "epoch": 0.9784042513096898, |
| "grad_norm": 1.5550524539915491, |
| "learning_rate": 1.4250328152514147e-08, |
| "loss": 0.4865, |
| "step": 25960 |
| }, |
| { |
| "epoch": 0.9787811404665888, |
| "grad_norm": 1.6682526527444668, |
| "learning_rate": 1.3758352044402345e-08, |
| "loss": 0.4975, |
| "step": 25970 |
| }, |
| { |
| "epoch": 0.9791580296234877, |
| "grad_norm": 1.613736685118058, |
| "learning_rate": 1.3275006531215384e-08, |
| "loss": 0.4624, |
| "step": 25980 |
| }, |
| { |
| "epoch": 0.9795349187803867, |
| "grad_norm": 1.6329195096750337, |
| "learning_rate": 1.2800292449566265e-08, |
| "loss": 0.4681, |
| "step": 25990 |
| }, |
| { |
| "epoch": 0.9799118079372856, |
| "grad_norm": 1.7759191946586195, |
| "learning_rate": 1.2334210621128827e-08, |
| "loss": 0.4862, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.9802886970941846, |
| "grad_norm": 1.6432481031917991, |
| "learning_rate": 1.1876761852636642e-08, |
| "loss": 0.472, |
| "step": 26010 |
| }, |
| { |
| "epoch": 0.9806655862510836, |
| "grad_norm": 1.902773292717396, |
| "learning_rate": 1.1427946935878009e-08, |
| "loss": 0.4545, |
| "step": 26020 |
| }, |
| { |
| "epoch": 0.9810424754079825, |
| "grad_norm": 1.6705242880751991, |
| "learning_rate": 1.0987766647699849e-08, |
| "loss": 0.4702, |
| "step": 26030 |
| }, |
| { |
| "epoch": 0.9814193645648814, |
| "grad_norm": 1.971413464598701, |
| "learning_rate": 1.055622175000104e-08, |
| "loss": 0.4794, |
| "step": 26040 |
| }, |
| { |
| "epoch": 0.9817962537217805, |
| "grad_norm": 1.6534906902045443, |
| "learning_rate": 1.01333129897363e-08, |
| "loss": 0.4578, |
| "step": 26050 |
| }, |
| { |
| "epoch": 0.9821731428786794, |
| "grad_norm": 1.54977127095544, |
| "learning_rate": 9.719041098909532e-09, |
| "loss": 0.4777, |
| "step": 26060 |
| }, |
| { |
| "epoch": 0.9825500320355783, |
| "grad_norm": 1.6720729481583012, |
| "learning_rate": 9.31340679457604e-09, |
| "loss": 0.4823, |
| "step": 26070 |
| }, |
| { |
| "epoch": 0.9829269211924773, |
| "grad_norm": 1.558146371421349, |
| "learning_rate": 8.916410778841978e-09, |
| "loss": 0.491, |
| "step": 26080 |
| }, |
| { |
| "epoch": 0.9833038103493762, |
| "grad_norm": 1.4251003076555135, |
| "learning_rate": 8.528053738860453e-09, |
| "loss": 0.4644, |
| "step": 26090 |
| }, |
| { |
| "epoch": 0.9836806995062752, |
| "grad_norm": 1.8009507558620492, |
| "learning_rate": 8.148336346830987e-09, |
| "loss": 0.4888, |
| "step": 26100 |
| }, |
| { |
| "epoch": 0.9840575886631742, |
| "grad_norm": 1.814480801176586, |
| "learning_rate": 7.777259259999503e-09, |
| "loss": 0.4629, |
| "step": 26110 |
| }, |
| { |
| "epoch": 0.9844344778200731, |
| "grad_norm": 2.196528914285027, |
| "learning_rate": 7.4148231206566665e-09, |
| "loss": 0.4731, |
| "step": 26120 |
| }, |
| { |
| "epoch": 0.984811366976972, |
| "grad_norm": 1.7187541640111528, |
| "learning_rate": 7.061028556136773e-09, |
| "loss": 0.4639, |
| "step": 26130 |
| }, |
| { |
| "epoch": 0.9851882561338711, |
| "grad_norm": 1.5937258075062353, |
| "learning_rate": 6.715876178816638e-09, |
| "loss": 0.4674, |
| "step": 26140 |
| }, |
| { |
| "epoch": 0.98556514529077, |
| "grad_norm": 1.8506500710664755, |
| "learning_rate": 6.379366586113933e-09, |
| "loss": 0.4652, |
| "step": 26150 |
| }, |
| { |
| "epoch": 0.9859420344476689, |
| "grad_norm": 1.6855411533697344, |
| "learning_rate": 6.051500360486628e-09, |
| "loss": 0.4527, |
| "step": 26160 |
| }, |
| { |
| "epoch": 0.9863189236045679, |
| "grad_norm": 1.5117238865140932, |
| "learning_rate": 5.732278069432995e-09, |
| "loss": 0.4779, |
| "step": 26170 |
| }, |
| { |
| "epoch": 0.9866958127614669, |
| "grad_norm": 1.6187884945554707, |
| "learning_rate": 5.421700265488827e-09, |
| "loss": 0.4611, |
| "step": 26180 |
| }, |
| { |
| "epoch": 0.9870727019183658, |
| "grad_norm": 1.3918686813033736, |
| "learning_rate": 5.119767486228e-09, |
| "loss": 0.4547, |
| "step": 26190 |
| }, |
| { |
| "epoch": 0.9874495910752648, |
| "grad_norm": 1.6724439609048405, |
| "learning_rate": 4.826480254259691e-09, |
| "loss": 0.4562, |
| "step": 26200 |
| }, |
| { |
| "epoch": 0.9878264802321637, |
| "grad_norm": 1.540870827083842, |
| "learning_rate": 4.541839077230048e-09, |
| "loss": 0.4569, |
| "step": 26210 |
| }, |
| { |
| "epoch": 0.9882033693890627, |
| "grad_norm": 1.5544229067227138, |
| "learning_rate": 4.265844447818856e-09, |
| "loss": 0.4467, |
| "step": 26220 |
| }, |
| { |
| "epoch": 0.9885802585459617, |
| "grad_norm": 1.6957676041843368, |
| "learning_rate": 3.9984968437406515e-09, |
| "loss": 0.4758, |
| "step": 26230 |
| }, |
| { |
| "epoch": 0.9889571477028606, |
| "grad_norm": 1.4992330073607234, |
| "learning_rate": 3.73979672774194e-09, |
| "loss": 0.4748, |
| "step": 26240 |
| }, |
| { |
| "epoch": 0.9893340368597595, |
| "grad_norm": 1.8025093840339432, |
| "learning_rate": 3.4897445476028692e-09, |
| "loss": 0.4879, |
| "step": 26250 |
| }, |
| { |
| "epoch": 0.9897109260166586, |
| "grad_norm": 1.8270840898752077, |
| "learning_rate": 3.2483407361338926e-09, |
| "loss": 0.4738, |
| "step": 26260 |
| }, |
| { |
| "epoch": 0.9900878151735575, |
| "grad_norm": 1.6829652388909495, |
| "learning_rate": 3.0155857111757724e-09, |
| "loss": 0.456, |
| "step": 26270 |
| }, |
| { |
| "epoch": 0.9904647043304564, |
| "grad_norm": 1.5636345290979268, |
| "learning_rate": 2.791479875600689e-09, |
| "loss": 0.454, |
| "step": 26280 |
| }, |
| { |
| "epoch": 0.9908415934873553, |
| "grad_norm": 1.4293143834926183, |
| "learning_rate": 2.5760236173094643e-09, |
| "loss": 0.4662, |
| "step": 26290 |
| }, |
| { |
| "epoch": 0.9912184826442543, |
| "grad_norm": 1.5866582767032813, |
| "learning_rate": 2.369217309231009e-09, |
| "loss": 0.4303, |
| "step": 26300 |
| }, |
| { |
| "epoch": 0.9915953718011533, |
| "grad_norm": 1.6824826996769227, |
| "learning_rate": 2.171061309322875e-09, |
| "loss": 0.4625, |
| "step": 26310 |
| }, |
| { |
| "epoch": 0.9919722609580522, |
| "grad_norm": 1.4469762688853414, |
| "learning_rate": 1.981555960569037e-09, |
| "loss": 0.4659, |
| "step": 26320 |
| }, |
| { |
| "epoch": 0.9923491501149512, |
| "grad_norm": 1.863209068946735, |
| "learning_rate": 1.8007015909815574e-09, |
| "loss": 0.4444, |
| "step": 26330 |
| }, |
| { |
| "epoch": 0.9927260392718501, |
| "grad_norm": 1.3864226632030934, |
| "learning_rate": 1.6284985135978093e-09, |
| "loss": 0.4594, |
| "step": 26340 |
| }, |
| { |
| "epoch": 0.9931029284287491, |
| "grad_norm": 1.8507882555475772, |
| "learning_rate": 1.4649470264810339e-09, |
| "loss": 0.47, |
| "step": 26350 |
| }, |
| { |
| "epoch": 0.9934798175856481, |
| "grad_norm": 1.727307667076328, |
| "learning_rate": 1.3100474127192285e-09, |
| "loss": 0.4829, |
| "step": 26360 |
| }, |
| { |
| "epoch": 0.993856706742547, |
| "grad_norm": 1.5872381530005595, |
| "learning_rate": 1.1637999404257027e-09, |
| "loss": 0.4752, |
| "step": 26370 |
| }, |
| { |
| "epoch": 0.9942335958994459, |
| "grad_norm": 1.7561750947121424, |
| "learning_rate": 1.0262048627374121e-09, |
| "loss": 0.4572, |
| "step": 26380 |
| }, |
| { |
| "epoch": 0.994610485056345, |
| "grad_norm": 1.523030077019712, |
| "learning_rate": 8.972624178149592e-10, |
| "loss": 0.4498, |
| "step": 26390 |
| }, |
| { |
| "epoch": 0.9949873742132439, |
| "grad_norm": 1.8336481206683812, |
| "learning_rate": 7.769728288420375e-10, |
| "loss": 0.4931, |
| "step": 26400 |
| }, |
| { |
| "epoch": 0.9953642633701428, |
| "grad_norm": 1.6093654004058522, |
| "learning_rate": 6.653363040270978e-10, |
| "loss": 0.4906, |
| "step": 26410 |
| }, |
| { |
| "epoch": 0.9957411525270418, |
| "grad_norm": 1.168615916694283, |
| "learning_rate": 5.62353036598351e-10, |
| "loss": 0.4779, |
| "step": 26420 |
| }, |
| { |
| "epoch": 0.9961180416839408, |
| "grad_norm": 1.55589136590255, |
| "learning_rate": 4.680232048087652e-10, |
| "loss": 0.4741, |
| "step": 26430 |
| }, |
| { |
| "epoch": 0.9964949308408397, |
| "grad_norm": 1.7754177648108063, |
| "learning_rate": 3.823469719316242e-10, |
| "loss": 0.4721, |
| "step": 26440 |
| }, |
| { |
| "epoch": 0.9968718199977387, |
| "grad_norm": 1.6083002564281654, |
| "learning_rate": 3.053244862616378e-10, |
| "loss": 0.4733, |
| "step": 26450 |
| }, |
| { |
| "epoch": 0.9972487091546376, |
| "grad_norm": 1.7559817189307763, |
| "learning_rate": 2.369558811171624e-10, |
| "loss": 0.4724, |
| "step": 26460 |
| }, |
| { |
| "epoch": 0.9976255983115366, |
| "grad_norm": 1.6315234881797243, |
| "learning_rate": 1.772412748352048e-10, |
| "loss": 0.4552, |
| "step": 26470 |
| }, |
| { |
| "epoch": 0.9980024874684356, |
| "grad_norm": 1.8229450451292533, |
| "learning_rate": 1.26180770774198e-10, |
| "loss": 0.475, |
| "step": 26480 |
| }, |
| { |
| "epoch": 0.9983793766253345, |
| "grad_norm": 1.6948887666570254, |
| "learning_rate": 8.377445731511114e-11, |
| "loss": 0.4713, |
| "step": 26490 |
| }, |
| { |
| "epoch": 0.9987562657822334, |
| "grad_norm": 1.6821945503509856, |
| "learning_rate": 5.002240785756396e-11, |
| "loss": 0.4639, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.9991331549391324, |
| "grad_norm": 1.5658822340781955, |
| "learning_rate": 2.4924680822602242e-11, |
| "loss": 0.4707, |
| "step": 26510 |
| }, |
| { |
| "epoch": 0.9995100440960314, |
| "grad_norm": 1.8251970618778395, |
| "learning_rate": 8.481319651032493e-12, |
| "loss": 0.4814, |
| "step": 26520 |
| }, |
| { |
| "epoch": 0.9998869332529303, |
| "grad_norm": 1.5466187547604036, |
| "learning_rate": 6.923528045321703e-13, |
| "loss": 0.4557, |
| "step": 26530 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 26533, |
| "total_flos": 1663266888056832.0, |
| "train_loss": 0.5075337708668052, |
| "train_runtime": 63984.8167, |
| "train_samples_per_second": 3.317, |
| "train_steps_per_second": 0.415 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 26533, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 300, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1663266888056832.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|