CodCodingCode's picture
Upload folder using huggingface_hub
6b76bdb verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9769954476479514,
"eval_steps": 500,
"global_step": 28500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0006936917407327119,
"grad_norm": 12224.0,
"learning_rate": 4.998434782608696e-06,
"loss": 10.7132,
"step": 10
},
{
"epoch": 0.0013873834814654238,
"grad_norm": 19712.0,
"learning_rate": 4.9966956521739135e-06,
"loss": 1.4256,
"step": 20
},
{
"epoch": 0.0020810752221981357,
"grad_norm": 21504.0,
"learning_rate": 4.994956521739131e-06,
"loss": 0.8714,
"step": 30
},
{
"epoch": 0.0027747669629308476,
"grad_norm": 2624.0,
"learning_rate": 4.993217391304348e-06,
"loss": 0.7117,
"step": 40
},
{
"epoch": 0.0034684587036635595,
"grad_norm": 23.875,
"learning_rate": 4.991478260869566e-06,
"loss": 0.3634,
"step": 50
},
{
"epoch": 0.004162150444396271,
"grad_norm": 31.375,
"learning_rate": 4.989739130434783e-06,
"loss": 0.7818,
"step": 60
},
{
"epoch": 0.004855842185128983,
"grad_norm": 8096.0,
"learning_rate": 4.988e-06,
"loss": 0.5493,
"step": 70
},
{
"epoch": 0.005549533925861695,
"grad_norm": 27.875,
"learning_rate": 4.986260869565218e-06,
"loss": 0.457,
"step": 80
},
{
"epoch": 0.006243225666594407,
"grad_norm": 37632.0,
"learning_rate": 4.984521739130435e-06,
"loss": 0.4123,
"step": 90
},
{
"epoch": 0.006936917407327119,
"grad_norm": 61.5,
"learning_rate": 4.9827826086956525e-06,
"loss": 0.3742,
"step": 100
},
{
"epoch": 0.007630609148059831,
"grad_norm": 34.25,
"learning_rate": 4.98104347826087e-06,
"loss": 0.2794,
"step": 110
},
{
"epoch": 0.008324300888792543,
"grad_norm": 61.5,
"learning_rate": 4.979304347826087e-06,
"loss": 0.2913,
"step": 120
},
{
"epoch": 0.009017992629525256,
"grad_norm": 8.5,
"learning_rate": 4.977565217391305e-06,
"loss": 0.3476,
"step": 130
},
{
"epoch": 0.009711684370257967,
"grad_norm": 76800.0,
"learning_rate": 4.975826086956522e-06,
"loss": 1.5003,
"step": 140
},
{
"epoch": 0.01040537611099068,
"grad_norm": 120.5,
"learning_rate": 4.97408695652174e-06,
"loss": 0.3051,
"step": 150
},
{
"epoch": 0.01109906785172339,
"grad_norm": 2.28125,
"learning_rate": 4.972347826086957e-06,
"loss": 0.2632,
"step": 160
},
{
"epoch": 0.011792759592456103,
"grad_norm": 12.6875,
"learning_rate": 4.970608695652174e-06,
"loss": 0.2847,
"step": 170
},
{
"epoch": 0.012486451333188814,
"grad_norm": 20.75,
"learning_rate": 4.9688695652173914e-06,
"loss": 0.3436,
"step": 180
},
{
"epoch": 0.013180143073921527,
"grad_norm": 234.0,
"learning_rate": 4.96713043478261e-06,
"loss": 0.2752,
"step": 190
},
{
"epoch": 0.013873834814654238,
"grad_norm": 10.25,
"learning_rate": 4.965391304347826e-06,
"loss": 0.3504,
"step": 200
},
{
"epoch": 0.01456752655538695,
"grad_norm": 8.0625,
"learning_rate": 4.9636521739130436e-06,
"loss": 0.2447,
"step": 210
},
{
"epoch": 0.015261218296119662,
"grad_norm": 3.640625,
"learning_rate": 4.961913043478262e-06,
"loss": 0.2864,
"step": 220
},
{
"epoch": 0.015954910036852375,
"grad_norm": 7.6875,
"learning_rate": 4.960173913043478e-06,
"loss": 0.2455,
"step": 230
},
{
"epoch": 0.016648601777585086,
"grad_norm": 10.25,
"learning_rate": 4.958434782608696e-06,
"loss": 0.2847,
"step": 240
},
{
"epoch": 0.017342293518317797,
"grad_norm": 6.75,
"learning_rate": 4.956695652173914e-06,
"loss": 0.2797,
"step": 250
},
{
"epoch": 0.01803598525905051,
"grad_norm": 9.4375,
"learning_rate": 4.954956521739131e-06,
"loss": 0.5388,
"step": 260
},
{
"epoch": 0.018729676999783222,
"grad_norm": 125.0,
"learning_rate": 4.953217391304348e-06,
"loss": 0.2508,
"step": 270
},
{
"epoch": 0.019423368740515933,
"grad_norm": 2.515625,
"learning_rate": 4.951478260869565e-06,
"loss": 0.263,
"step": 280
},
{
"epoch": 0.020117060481248644,
"grad_norm": 1.578125,
"learning_rate": 4.949739130434783e-06,
"loss": 0.2583,
"step": 290
},
{
"epoch": 0.02081075222198136,
"grad_norm": 4.53125,
"learning_rate": 4.948000000000001e-06,
"loss": 0.2684,
"step": 300
},
{
"epoch": 0.02150444396271407,
"grad_norm": 19.625,
"learning_rate": 4.946260869565217e-06,
"loss": 0.2318,
"step": 310
},
{
"epoch": 0.02219813570344678,
"grad_norm": 15.8125,
"learning_rate": 4.9445217391304355e-06,
"loss": 0.2509,
"step": 320
},
{
"epoch": 0.022891827444179492,
"grad_norm": 4.5625,
"learning_rate": 4.942782608695653e-06,
"loss": 0.2428,
"step": 330
},
{
"epoch": 0.023585519184912206,
"grad_norm": 20.75,
"learning_rate": 4.94104347826087e-06,
"loss": 0.2789,
"step": 340
},
{
"epoch": 0.024279210925644917,
"grad_norm": 1.59375,
"learning_rate": 4.939304347826087e-06,
"loss": 0.2624,
"step": 350
},
{
"epoch": 0.02497290266637763,
"grad_norm": 1.6640625,
"learning_rate": 4.937565217391305e-06,
"loss": 0.2712,
"step": 360
},
{
"epoch": 0.02566659440711034,
"grad_norm": 2.0625,
"learning_rate": 4.935826086956522e-06,
"loss": 0.2973,
"step": 370
},
{
"epoch": 0.026360286147843054,
"grad_norm": 2.4375,
"learning_rate": 4.93408695652174e-06,
"loss": 0.2769,
"step": 380
},
{
"epoch": 0.027053977888575765,
"grad_norm": 1.3203125,
"learning_rate": 4.932347826086957e-06,
"loss": 0.2675,
"step": 390
},
{
"epoch": 0.027747669629308476,
"grad_norm": 4.21875,
"learning_rate": 4.9306086956521744e-06,
"loss": 0.2663,
"step": 400
},
{
"epoch": 0.028441361370041187,
"grad_norm": 17.75,
"learning_rate": 4.928869565217392e-06,
"loss": 0.3639,
"step": 410
},
{
"epoch": 0.0291350531107739,
"grad_norm": 5.03125,
"learning_rate": 4.927130434782609e-06,
"loss": 0.3279,
"step": 420
},
{
"epoch": 0.029828744851506613,
"grad_norm": 2.21875,
"learning_rate": 4.9253913043478266e-06,
"loss": 0.2773,
"step": 430
},
{
"epoch": 0.030522436592239324,
"grad_norm": 6.625,
"learning_rate": 4.923652173913044e-06,
"loss": 0.2445,
"step": 440
},
{
"epoch": 0.031216128332972035,
"grad_norm": 3.84375,
"learning_rate": 4.921913043478261e-06,
"loss": 0.2256,
"step": 450
},
{
"epoch": 0.03190982007370475,
"grad_norm": 3.15625,
"learning_rate": 4.920173913043479e-06,
"loss": 0.2288,
"step": 460
},
{
"epoch": 0.03260351181443746,
"grad_norm": 52.0,
"learning_rate": 4.918434782608696e-06,
"loss": 0.2662,
"step": 470
},
{
"epoch": 0.03329720355517017,
"grad_norm": 13.4375,
"learning_rate": 4.916695652173913e-06,
"loss": 0.2476,
"step": 480
},
{
"epoch": 0.033990895295902886,
"grad_norm": 49.0,
"learning_rate": 4.914956521739131e-06,
"loss": 0.2328,
"step": 490
},
{
"epoch": 0.03468458703663559,
"grad_norm": 1.5546875,
"learning_rate": 4.913217391304348e-06,
"loss": 0.2463,
"step": 500
},
{
"epoch": 0.03537827877736831,
"grad_norm": 10.5625,
"learning_rate": 4.9114782608695655e-06,
"loss": 0.2657,
"step": 510
},
{
"epoch": 0.03607197051810102,
"grad_norm": 2.1875,
"learning_rate": 4.909739130434783e-06,
"loss": 0.268,
"step": 520
},
{
"epoch": 0.03676566225883373,
"grad_norm": 1.9921875,
"learning_rate": 4.908e-06,
"loss": 0.3324,
"step": 530
},
{
"epoch": 0.037459353999566444,
"grad_norm": 1.5703125,
"learning_rate": 4.906260869565218e-06,
"loss": 0.2086,
"step": 540
},
{
"epoch": 0.03815304574029915,
"grad_norm": 2.453125,
"learning_rate": 4.904521739130435e-06,
"loss": 0.2581,
"step": 550
},
{
"epoch": 0.038846737481031866,
"grad_norm": 1.4609375,
"learning_rate": 4.902782608695652e-06,
"loss": 0.2908,
"step": 560
},
{
"epoch": 0.03954042922176458,
"grad_norm": 4.28125,
"learning_rate": 4.90104347826087e-06,
"loss": 0.2192,
"step": 570
},
{
"epoch": 0.04023412096249729,
"grad_norm": 3.859375,
"learning_rate": 4.899304347826087e-06,
"loss": 0.2916,
"step": 580
},
{
"epoch": 0.04092781270323,
"grad_norm": 1.390625,
"learning_rate": 4.8975652173913045e-06,
"loss": 0.2388,
"step": 590
},
{
"epoch": 0.04162150444396272,
"grad_norm": 1.5859375,
"learning_rate": 4.895826086956522e-06,
"loss": 0.2509,
"step": 600
},
{
"epoch": 0.042315196184695425,
"grad_norm": 1.3515625,
"learning_rate": 4.89408695652174e-06,
"loss": 0.2736,
"step": 610
},
{
"epoch": 0.04300888792542814,
"grad_norm": 5.09375,
"learning_rate": 4.892347826086957e-06,
"loss": 0.329,
"step": 620
},
{
"epoch": 0.04370257966616085,
"grad_norm": 1.15625,
"learning_rate": 4.890608695652174e-06,
"loss": 0.2188,
"step": 630
},
{
"epoch": 0.04439627140689356,
"grad_norm": 1.640625,
"learning_rate": 4.888869565217391e-06,
"loss": 0.2942,
"step": 640
},
{
"epoch": 0.045089963147626276,
"grad_norm": 1.046875,
"learning_rate": 4.8871304347826096e-06,
"loss": 0.2324,
"step": 650
},
{
"epoch": 0.045783654888358984,
"grad_norm": 6.40625,
"learning_rate": 4.885391304347826e-06,
"loss": 0.2601,
"step": 660
},
{
"epoch": 0.0464773466290917,
"grad_norm": 1.859375,
"learning_rate": 4.8836521739130435e-06,
"loss": 0.2292,
"step": 670
},
{
"epoch": 0.04717103836982441,
"grad_norm": 1.84375,
"learning_rate": 4.881913043478262e-06,
"loss": 0.2719,
"step": 680
},
{
"epoch": 0.04786473011055712,
"grad_norm": 2.125,
"learning_rate": 4.880173913043479e-06,
"loss": 0.2875,
"step": 690
},
{
"epoch": 0.048558421851289835,
"grad_norm": 2.265625,
"learning_rate": 4.878434782608696e-06,
"loss": 0.2648,
"step": 700
},
{
"epoch": 0.04925211359202254,
"grad_norm": 1.625,
"learning_rate": 4.876695652173914e-06,
"loss": 0.2901,
"step": 710
},
{
"epoch": 0.04994580533275526,
"grad_norm": 1.2421875,
"learning_rate": 4.874956521739131e-06,
"loss": 0.2532,
"step": 720
},
{
"epoch": 0.05063949707348797,
"grad_norm": 1.953125,
"learning_rate": 4.8732173913043485e-06,
"loss": 0.2736,
"step": 730
},
{
"epoch": 0.05133318881422068,
"grad_norm": 2.390625,
"learning_rate": 4.871478260869565e-06,
"loss": 0.2531,
"step": 740
},
{
"epoch": 0.05202688055495339,
"grad_norm": 1.875,
"learning_rate": 4.869739130434783e-06,
"loss": 0.262,
"step": 750
},
{
"epoch": 0.05272057229568611,
"grad_norm": 2.171875,
"learning_rate": 4.868000000000001e-06,
"loss": 0.2242,
"step": 760
},
{
"epoch": 0.053414264036418815,
"grad_norm": 2.546875,
"learning_rate": 4.866260869565218e-06,
"loss": 0.2873,
"step": 770
},
{
"epoch": 0.05410795577715153,
"grad_norm": 1.4765625,
"learning_rate": 4.864521739130435e-06,
"loss": 0.2867,
"step": 780
},
{
"epoch": 0.05480164751788424,
"grad_norm": 1.40625,
"learning_rate": 4.862782608695653e-06,
"loss": 0.2815,
"step": 790
},
{
"epoch": 0.05549533925861695,
"grad_norm": 1.7265625,
"learning_rate": 4.86104347826087e-06,
"loss": 0.2943,
"step": 800
},
{
"epoch": 0.056189030999349666,
"grad_norm": 1.4375,
"learning_rate": 4.8593043478260875e-06,
"loss": 0.2723,
"step": 810
},
{
"epoch": 0.056882722740082374,
"grad_norm": 1.25,
"learning_rate": 4.857565217391305e-06,
"loss": 0.2441,
"step": 820
},
{
"epoch": 0.05757641448081509,
"grad_norm": 1.234375,
"learning_rate": 4.855826086956522e-06,
"loss": 0.239,
"step": 830
},
{
"epoch": 0.0582701062215478,
"grad_norm": 1.2421875,
"learning_rate": 4.85408695652174e-06,
"loss": 0.3108,
"step": 840
},
{
"epoch": 0.05896379796228051,
"grad_norm": 1.3515625,
"learning_rate": 4.852347826086957e-06,
"loss": 0.2425,
"step": 850
},
{
"epoch": 0.059657489703013225,
"grad_norm": 1.390625,
"learning_rate": 4.850608695652174e-06,
"loss": 0.3201,
"step": 860
},
{
"epoch": 0.06035118144374593,
"grad_norm": 1.859375,
"learning_rate": 4.848869565217392e-06,
"loss": 0.2886,
"step": 870
},
{
"epoch": 0.06104487318447865,
"grad_norm": 1.15625,
"learning_rate": 4.847130434782609e-06,
"loss": 0.2269,
"step": 880
},
{
"epoch": 0.06173856492521136,
"grad_norm": 1.21875,
"learning_rate": 4.8453913043478265e-06,
"loss": 0.247,
"step": 890
},
{
"epoch": 0.06243225666594407,
"grad_norm": 1.484375,
"learning_rate": 4.843652173913044e-06,
"loss": 0.2773,
"step": 900
},
{
"epoch": 0.06312594840667678,
"grad_norm": 1.1171875,
"learning_rate": 4.841913043478261e-06,
"loss": 0.2464,
"step": 910
},
{
"epoch": 0.0638196401474095,
"grad_norm": 1.703125,
"learning_rate": 4.840173913043479e-06,
"loss": 0.2407,
"step": 920
},
{
"epoch": 0.0645133318881422,
"grad_norm": 1.125,
"learning_rate": 4.838434782608696e-06,
"loss": 0.2772,
"step": 930
},
{
"epoch": 0.06520702362887491,
"grad_norm": 1.015625,
"learning_rate": 4.836695652173913e-06,
"loss": 0.252,
"step": 940
},
{
"epoch": 0.06590071536960763,
"grad_norm": 1.5703125,
"learning_rate": 4.834956521739131e-06,
"loss": 0.2426,
"step": 950
},
{
"epoch": 0.06659440711034034,
"grad_norm": 1.421875,
"learning_rate": 4.833217391304348e-06,
"loss": 0.2722,
"step": 960
},
{
"epoch": 0.06728809885107305,
"grad_norm": 1.7890625,
"learning_rate": 4.8314782608695655e-06,
"loss": 0.2661,
"step": 970
},
{
"epoch": 0.06798179059180577,
"grad_norm": 1.6015625,
"learning_rate": 4.829739130434783e-06,
"loss": 0.2329,
"step": 980
},
{
"epoch": 0.06867548233253848,
"grad_norm": 1.578125,
"learning_rate": 4.828e-06,
"loss": 0.2414,
"step": 990
},
{
"epoch": 0.06936917407327119,
"grad_norm": 1.9296875,
"learning_rate": 4.826260869565218e-06,
"loss": 0.264,
"step": 1000
},
{
"epoch": 0.07006286581400391,
"grad_norm": 1.5546875,
"learning_rate": 4.824521739130435e-06,
"loss": 0.2493,
"step": 1010
},
{
"epoch": 0.07075655755473662,
"grad_norm": 1.3984375,
"learning_rate": 4.822782608695652e-06,
"loss": 0.3118,
"step": 1020
},
{
"epoch": 0.07145024929546932,
"grad_norm": 1.8828125,
"learning_rate": 4.82104347826087e-06,
"loss": 0.2739,
"step": 1030
},
{
"epoch": 0.07214394103620204,
"grad_norm": 1.0234375,
"learning_rate": 4.819304347826088e-06,
"loss": 0.2743,
"step": 1040
},
{
"epoch": 0.07283763277693475,
"grad_norm": 1.1796875,
"learning_rate": 4.817565217391304e-06,
"loss": 0.2299,
"step": 1050
},
{
"epoch": 0.07353132451766746,
"grad_norm": 1.15625,
"learning_rate": 4.815826086956522e-06,
"loss": 0.2203,
"step": 1060
},
{
"epoch": 0.07422501625840017,
"grad_norm": 0.74609375,
"learning_rate": 4.81408695652174e-06,
"loss": 0.2155,
"step": 1070
},
{
"epoch": 0.07491870799913289,
"grad_norm": 1.421875,
"learning_rate": 4.812347826086957e-06,
"loss": 0.248,
"step": 1080
},
{
"epoch": 0.0756123997398656,
"grad_norm": 1.1640625,
"learning_rate": 4.810608695652174e-06,
"loss": 0.2366,
"step": 1090
},
{
"epoch": 0.0763060914805983,
"grad_norm": 1.0234375,
"learning_rate": 4.808869565217391e-06,
"loss": 0.2251,
"step": 1100
},
{
"epoch": 0.07699978322133103,
"grad_norm": 1.5859375,
"learning_rate": 4.8071304347826095e-06,
"loss": 0.2265,
"step": 1110
},
{
"epoch": 0.07769347496206373,
"grad_norm": 1.5546875,
"learning_rate": 4.805391304347827e-06,
"loss": 0.2876,
"step": 1120
},
{
"epoch": 0.07838716670279644,
"grad_norm": 1.3359375,
"learning_rate": 4.803652173913043e-06,
"loss": 0.2942,
"step": 1130
},
{
"epoch": 0.07908085844352916,
"grad_norm": 0.8984375,
"learning_rate": 4.801913043478262e-06,
"loss": 0.2531,
"step": 1140
},
{
"epoch": 0.07977455018426187,
"grad_norm": 2.265625,
"learning_rate": 4.800173913043479e-06,
"loss": 0.2473,
"step": 1150
},
{
"epoch": 0.08046824192499458,
"grad_norm": 1.2265625,
"learning_rate": 4.7984347826086955e-06,
"loss": 0.2106,
"step": 1160
},
{
"epoch": 0.0811619336657273,
"grad_norm": 1.0,
"learning_rate": 4.796695652173914e-06,
"loss": 0.2111,
"step": 1170
},
{
"epoch": 0.08185562540646,
"grad_norm": 1.734375,
"learning_rate": 4.794956521739131e-06,
"loss": 0.262,
"step": 1180
},
{
"epoch": 0.08254931714719271,
"grad_norm": 1.125,
"learning_rate": 4.7932173913043485e-06,
"loss": 0.2402,
"step": 1190
},
{
"epoch": 0.08324300888792543,
"grad_norm": 1.2421875,
"learning_rate": 4.791478260869565e-06,
"loss": 0.2325,
"step": 1200
},
{
"epoch": 0.08393670062865814,
"grad_norm": 1.296875,
"learning_rate": 4.789739130434783e-06,
"loss": 0.2355,
"step": 1210
},
{
"epoch": 0.08463039236939085,
"grad_norm": 1.234375,
"learning_rate": 4.7880000000000006e-06,
"loss": 0.2561,
"step": 1220
},
{
"epoch": 0.08532408411012356,
"grad_norm": 1.7890625,
"learning_rate": 4.786260869565218e-06,
"loss": 0.2351,
"step": 1230
},
{
"epoch": 0.08601777585085628,
"grad_norm": 1.2421875,
"learning_rate": 4.784521739130435e-06,
"loss": 0.2571,
"step": 1240
},
{
"epoch": 0.08671146759158899,
"grad_norm": 2.640625,
"learning_rate": 4.782782608695653e-06,
"loss": 0.2187,
"step": 1250
},
{
"epoch": 0.0874051593323217,
"grad_norm": 1.1640625,
"learning_rate": 4.78104347826087e-06,
"loss": 0.313,
"step": 1260
},
{
"epoch": 0.08809885107305442,
"grad_norm": 1.9140625,
"learning_rate": 4.7793043478260874e-06,
"loss": 0.3083,
"step": 1270
},
{
"epoch": 0.08879254281378712,
"grad_norm": 1.5859375,
"learning_rate": 4.777565217391305e-06,
"loss": 0.2748,
"step": 1280
},
{
"epoch": 0.08948623455451983,
"grad_norm": 1.640625,
"learning_rate": 4.775826086956522e-06,
"loss": 0.2393,
"step": 1290
},
{
"epoch": 0.09017992629525255,
"grad_norm": 1.3359375,
"learning_rate": 4.7740869565217395e-06,
"loss": 0.2355,
"step": 1300
},
{
"epoch": 0.09087361803598526,
"grad_norm": 1.1328125,
"learning_rate": 4.772347826086957e-06,
"loss": 0.2335,
"step": 1310
},
{
"epoch": 0.09156730977671797,
"grad_norm": 1.5859375,
"learning_rate": 4.770608695652174e-06,
"loss": 0.2321,
"step": 1320
},
{
"epoch": 0.09226100151745069,
"grad_norm": 1.4453125,
"learning_rate": 4.768869565217392e-06,
"loss": 0.3314,
"step": 1330
},
{
"epoch": 0.0929546932581834,
"grad_norm": 0.99609375,
"learning_rate": 4.767130434782609e-06,
"loss": 0.2046,
"step": 1340
},
{
"epoch": 0.0936483849989161,
"grad_norm": 1.328125,
"learning_rate": 4.765391304347826e-06,
"loss": 0.2461,
"step": 1350
},
{
"epoch": 0.09434207673964883,
"grad_norm": 1.140625,
"learning_rate": 4.763652173913044e-06,
"loss": 0.2129,
"step": 1360
},
{
"epoch": 0.09503576848038153,
"grad_norm": 1.2578125,
"learning_rate": 4.761913043478261e-06,
"loss": 0.2483,
"step": 1370
},
{
"epoch": 0.09572946022111424,
"grad_norm": 1.859375,
"learning_rate": 4.7601739130434785e-06,
"loss": 0.2285,
"step": 1380
},
{
"epoch": 0.09642315196184695,
"grad_norm": 1.0390625,
"learning_rate": 4.758434782608696e-06,
"loss": 0.2333,
"step": 1390
},
{
"epoch": 0.09711684370257967,
"grad_norm": 1.4375,
"learning_rate": 4.756695652173913e-06,
"loss": 0.2183,
"step": 1400
},
{
"epoch": 0.09781053544331238,
"grad_norm": 1.3828125,
"learning_rate": 4.754956521739131e-06,
"loss": 0.2621,
"step": 1410
},
{
"epoch": 0.09850422718404508,
"grad_norm": 1.5859375,
"learning_rate": 4.753217391304348e-06,
"loss": 0.2572,
"step": 1420
},
{
"epoch": 0.0991979189247778,
"grad_norm": 1.265625,
"learning_rate": 4.751478260869566e-06,
"loss": 0.2177,
"step": 1430
},
{
"epoch": 0.09989161066551051,
"grad_norm": 1.5703125,
"learning_rate": 4.749739130434783e-06,
"loss": 0.2542,
"step": 1440
},
{
"epoch": 0.10058530240624322,
"grad_norm": 1.4609375,
"learning_rate": 4.748e-06,
"loss": 0.2459,
"step": 1450
},
{
"epoch": 0.10127899414697594,
"grad_norm": 1.640625,
"learning_rate": 4.746260869565218e-06,
"loss": 0.2786,
"step": 1460
},
{
"epoch": 0.10197268588770865,
"grad_norm": 1.8359375,
"learning_rate": 4.744521739130435e-06,
"loss": 0.2875,
"step": 1470
},
{
"epoch": 0.10266637762844136,
"grad_norm": 1.9765625,
"learning_rate": 4.742782608695652e-06,
"loss": 0.2957,
"step": 1480
},
{
"epoch": 0.10336006936917408,
"grad_norm": 1.59375,
"learning_rate": 4.74104347826087e-06,
"loss": 0.2543,
"step": 1490
},
{
"epoch": 0.10405376110990679,
"grad_norm": 1.828125,
"learning_rate": 4.739304347826088e-06,
"loss": 0.2631,
"step": 1500
},
{
"epoch": 0.1047474528506395,
"grad_norm": 1.5703125,
"learning_rate": 4.737565217391304e-06,
"loss": 0.2462,
"step": 1510
},
{
"epoch": 0.10544114459137222,
"grad_norm": 1.203125,
"learning_rate": 4.735826086956522e-06,
"loss": 0.2237,
"step": 1520
},
{
"epoch": 0.10613483633210492,
"grad_norm": 1.1875,
"learning_rate": 4.73408695652174e-06,
"loss": 0.2206,
"step": 1530
},
{
"epoch": 0.10682852807283763,
"grad_norm": 1.59375,
"learning_rate": 4.732347826086957e-06,
"loss": 0.2533,
"step": 1540
},
{
"epoch": 0.10752221981357034,
"grad_norm": 1.203125,
"learning_rate": 4.730608695652174e-06,
"loss": 0.2136,
"step": 1550
},
{
"epoch": 0.10821591155430306,
"grad_norm": 2.015625,
"learning_rate": 4.728869565217391e-06,
"loss": 0.3101,
"step": 1560
},
{
"epoch": 0.10890960329503577,
"grad_norm": 2.4375,
"learning_rate": 4.727130434782609e-06,
"loss": 0.2574,
"step": 1570
},
{
"epoch": 0.10960329503576847,
"grad_norm": 1.25,
"learning_rate": 4.725391304347827e-06,
"loss": 0.2142,
"step": 1580
},
{
"epoch": 0.1102969867765012,
"grad_norm": 2.21875,
"learning_rate": 4.723652173913043e-06,
"loss": 0.1908,
"step": 1590
},
{
"epoch": 0.1109906785172339,
"grad_norm": 1.703125,
"learning_rate": 4.7219130434782615e-06,
"loss": 0.2437,
"step": 1600
},
{
"epoch": 0.11168437025796661,
"grad_norm": 1.25,
"learning_rate": 4.720173913043479e-06,
"loss": 0.2262,
"step": 1610
},
{
"epoch": 0.11237806199869933,
"grad_norm": 0.9921875,
"learning_rate": 4.718434782608696e-06,
"loss": 0.258,
"step": 1620
},
{
"epoch": 0.11307175373943204,
"grad_norm": 1.609375,
"learning_rate": 4.716695652173914e-06,
"loss": 0.2992,
"step": 1630
},
{
"epoch": 0.11376544548016475,
"grad_norm": 1.5703125,
"learning_rate": 4.714956521739131e-06,
"loss": 0.247,
"step": 1640
},
{
"epoch": 0.11445913722089747,
"grad_norm": 1.1484375,
"learning_rate": 4.713217391304348e-06,
"loss": 0.262,
"step": 1650
},
{
"epoch": 0.11515282896163018,
"grad_norm": 1.2890625,
"learning_rate": 4.711478260869566e-06,
"loss": 0.2604,
"step": 1660
},
{
"epoch": 0.11584652070236288,
"grad_norm": 1.0390625,
"learning_rate": 4.709739130434783e-06,
"loss": 0.2101,
"step": 1670
},
{
"epoch": 0.1165402124430956,
"grad_norm": 1.8671875,
"learning_rate": 4.7080000000000005e-06,
"loss": 0.2641,
"step": 1680
},
{
"epoch": 0.11723390418382831,
"grad_norm": 1.3984375,
"learning_rate": 4.706260869565218e-06,
"loss": 0.2134,
"step": 1690
},
{
"epoch": 0.11792759592456102,
"grad_norm": 1.0546875,
"learning_rate": 4.704521739130435e-06,
"loss": 0.2305,
"step": 1700
},
{
"epoch": 0.11862128766529373,
"grad_norm": 1.2265625,
"learning_rate": 4.702782608695653e-06,
"loss": 0.2542,
"step": 1710
},
{
"epoch": 0.11931497940602645,
"grad_norm": 1.328125,
"learning_rate": 4.70104347826087e-06,
"loss": 0.2722,
"step": 1720
},
{
"epoch": 0.12000867114675916,
"grad_norm": 1.015625,
"learning_rate": 4.699304347826087e-06,
"loss": 0.2172,
"step": 1730
},
{
"epoch": 0.12070236288749187,
"grad_norm": 1.1953125,
"learning_rate": 4.697565217391305e-06,
"loss": 0.3591,
"step": 1740
},
{
"epoch": 0.12139605462822459,
"grad_norm": 1.3359375,
"learning_rate": 4.695826086956522e-06,
"loss": 0.2487,
"step": 1750
},
{
"epoch": 0.1220897463689573,
"grad_norm": 1.234375,
"learning_rate": 4.6940869565217395e-06,
"loss": 0.2619,
"step": 1760
},
{
"epoch": 0.12278343810969,
"grad_norm": 1.0078125,
"learning_rate": 4.692347826086957e-06,
"loss": 0.2614,
"step": 1770
},
{
"epoch": 0.12347712985042272,
"grad_norm": 1.0625,
"learning_rate": 4.690608695652174e-06,
"loss": 0.2354,
"step": 1780
},
{
"epoch": 0.12417082159115543,
"grad_norm": 1.109375,
"learning_rate": 4.688869565217392e-06,
"loss": 0.2302,
"step": 1790
},
{
"epoch": 0.12486451333188814,
"grad_norm": 1.203125,
"learning_rate": 4.687130434782609e-06,
"loss": 0.2436,
"step": 1800
},
{
"epoch": 0.12555820507262086,
"grad_norm": 1.9140625,
"learning_rate": 4.685391304347826e-06,
"loss": 0.2219,
"step": 1810
},
{
"epoch": 0.12625189681335355,
"grad_norm": 1.3125,
"learning_rate": 4.683652173913044e-06,
"loss": 0.237,
"step": 1820
},
{
"epoch": 0.12694558855408627,
"grad_norm": 1.1640625,
"learning_rate": 4.681913043478261e-06,
"loss": 0.2562,
"step": 1830
},
{
"epoch": 0.127639280294819,
"grad_norm": 1.2734375,
"learning_rate": 4.6801739130434784e-06,
"loss": 0.2683,
"step": 1840
},
{
"epoch": 0.1283329720355517,
"grad_norm": 1.421875,
"learning_rate": 4.678434782608696e-06,
"loss": 0.2653,
"step": 1850
},
{
"epoch": 0.1290266637762844,
"grad_norm": 1.2421875,
"learning_rate": 4.676695652173913e-06,
"loss": 0.2423,
"step": 1860
},
{
"epoch": 0.12972035551701713,
"grad_norm": 0.98828125,
"learning_rate": 4.6749565217391305e-06,
"loss": 0.2275,
"step": 1870
},
{
"epoch": 0.13041404725774983,
"grad_norm": 1.046875,
"learning_rate": 4.673217391304348e-06,
"loss": 0.229,
"step": 1880
},
{
"epoch": 0.13110773899848255,
"grad_norm": 1.453125,
"learning_rate": 4.671478260869566e-06,
"loss": 0.2491,
"step": 1890
},
{
"epoch": 0.13180143073921527,
"grad_norm": 1.3359375,
"learning_rate": 4.669739130434783e-06,
"loss": 0.2106,
"step": 1900
},
{
"epoch": 0.13249512247994796,
"grad_norm": 1.7265625,
"learning_rate": 4.668e-06,
"loss": 0.3183,
"step": 1910
},
{
"epoch": 0.13318881422068068,
"grad_norm": 1.5625,
"learning_rate": 4.666260869565218e-06,
"loss": 0.2252,
"step": 1920
},
{
"epoch": 0.1338825059614134,
"grad_norm": 1.078125,
"learning_rate": 4.664521739130436e-06,
"loss": 0.2986,
"step": 1930
},
{
"epoch": 0.1345761977021461,
"grad_norm": 1.6328125,
"learning_rate": 4.662782608695652e-06,
"loss": 0.2311,
"step": 1940
},
{
"epoch": 0.13526988944287882,
"grad_norm": 2.3125,
"learning_rate": 4.6610434782608695e-06,
"loss": 0.2383,
"step": 1950
},
{
"epoch": 0.13596358118361154,
"grad_norm": 1.4609375,
"learning_rate": 4.659304347826088e-06,
"loss": 0.2228,
"step": 1960
},
{
"epoch": 0.13665727292434424,
"grad_norm": 1.6484375,
"learning_rate": 4.657565217391305e-06,
"loss": 0.2704,
"step": 1970
},
{
"epoch": 0.13735096466507696,
"grad_norm": 1.265625,
"learning_rate": 4.655826086956522e-06,
"loss": 0.2858,
"step": 1980
},
{
"epoch": 0.13804465640580968,
"grad_norm": 1.046875,
"learning_rate": 4.65408695652174e-06,
"loss": 0.2813,
"step": 1990
},
{
"epoch": 0.13873834814654237,
"grad_norm": 1.25,
"learning_rate": 4.652347826086957e-06,
"loss": 0.2458,
"step": 2000
},
{
"epoch": 0.1394320398872751,
"grad_norm": 1.1171875,
"learning_rate": 4.650608695652175e-06,
"loss": 0.2186,
"step": 2010
},
{
"epoch": 0.14012573162800782,
"grad_norm": 2.546875,
"learning_rate": 4.648869565217391e-06,
"loss": 0.2789,
"step": 2020
},
{
"epoch": 0.1408194233687405,
"grad_norm": 1.59375,
"learning_rate": 4.647130434782609e-06,
"loss": 0.2564,
"step": 2030
},
{
"epoch": 0.14151311510947323,
"grad_norm": 1.28125,
"learning_rate": 4.645391304347827e-06,
"loss": 0.2318,
"step": 2040
},
{
"epoch": 0.14220680685020595,
"grad_norm": 1.703125,
"learning_rate": 4.643652173913044e-06,
"loss": 0.2513,
"step": 2050
},
{
"epoch": 0.14290049859093865,
"grad_norm": 0.89453125,
"learning_rate": 4.6419130434782614e-06,
"loss": 0.2211,
"step": 2060
},
{
"epoch": 0.14359419033167137,
"grad_norm": 1.1328125,
"learning_rate": 4.640173913043479e-06,
"loss": 0.2939,
"step": 2070
},
{
"epoch": 0.1442878820724041,
"grad_norm": 1.296875,
"learning_rate": 4.638434782608696e-06,
"loss": 0.2321,
"step": 2080
},
{
"epoch": 0.14498157381313678,
"grad_norm": 1.484375,
"learning_rate": 4.6366956521739136e-06,
"loss": 0.226,
"step": 2090
},
{
"epoch": 0.1456752655538695,
"grad_norm": 1.0625,
"learning_rate": 4.634956521739131e-06,
"loss": 0.2262,
"step": 2100
},
{
"epoch": 0.1463689572946022,
"grad_norm": 1.328125,
"learning_rate": 4.633217391304348e-06,
"loss": 0.2198,
"step": 2110
},
{
"epoch": 0.14706264903533492,
"grad_norm": 1.421875,
"learning_rate": 4.631478260869566e-06,
"loss": 0.2517,
"step": 2120
},
{
"epoch": 0.14775634077606764,
"grad_norm": 1.125,
"learning_rate": 4.629739130434783e-06,
"loss": 0.2736,
"step": 2130
},
{
"epoch": 0.14845003251680033,
"grad_norm": 1.4609375,
"learning_rate": 4.628e-06,
"loss": 0.2483,
"step": 2140
},
{
"epoch": 0.14914372425753306,
"grad_norm": 1.1328125,
"learning_rate": 4.626260869565218e-06,
"loss": 0.2138,
"step": 2150
},
{
"epoch": 0.14983741599826578,
"grad_norm": 1.578125,
"learning_rate": 4.624521739130435e-06,
"loss": 0.2202,
"step": 2160
},
{
"epoch": 0.15053110773899847,
"grad_norm": 1.359375,
"learning_rate": 4.6227826086956525e-06,
"loss": 0.2468,
"step": 2170
},
{
"epoch": 0.1512247994797312,
"grad_norm": 1.171875,
"learning_rate": 4.62104347826087e-06,
"loss": 0.2218,
"step": 2180
},
{
"epoch": 0.1519184912204639,
"grad_norm": 1.6328125,
"learning_rate": 4.619304347826087e-06,
"loss": 0.2741,
"step": 2190
},
{
"epoch": 0.1526121829611966,
"grad_norm": 1.078125,
"learning_rate": 4.617565217391305e-06,
"loss": 0.2553,
"step": 2200
},
{
"epoch": 0.15330587470192933,
"grad_norm": 1.2265625,
"learning_rate": 4.615826086956522e-06,
"loss": 0.234,
"step": 2210
},
{
"epoch": 0.15399956644266205,
"grad_norm": 1.4453125,
"learning_rate": 4.614086956521739e-06,
"loss": 0.2321,
"step": 2220
},
{
"epoch": 0.15469325818339474,
"grad_norm": 1.2265625,
"learning_rate": 4.612347826086957e-06,
"loss": 0.2222,
"step": 2230
},
{
"epoch": 0.15538694992412747,
"grad_norm": 1.40625,
"learning_rate": 4.610608695652174e-06,
"loss": 0.2726,
"step": 2240
},
{
"epoch": 0.1560806416648602,
"grad_norm": 1.0078125,
"learning_rate": 4.6088695652173915e-06,
"loss": 0.2418,
"step": 2250
},
{
"epoch": 0.15677433340559288,
"grad_norm": 1.3828125,
"learning_rate": 4.607130434782609e-06,
"loss": 0.2499,
"step": 2260
},
{
"epoch": 0.1574680251463256,
"grad_norm": 0.984375,
"learning_rate": 4.605391304347826e-06,
"loss": 0.2741,
"step": 2270
},
{
"epoch": 0.15816171688705832,
"grad_norm": 1.3515625,
"learning_rate": 4.6036521739130445e-06,
"loss": 0.2061,
"step": 2280
},
{
"epoch": 0.15885540862779102,
"grad_norm": 1.3046875,
"learning_rate": 4.601913043478261e-06,
"loss": 0.2696,
"step": 2290
},
{
"epoch": 0.15954910036852374,
"grad_norm": 1.359375,
"learning_rate": 4.600173913043478e-06,
"loss": 0.2767,
"step": 2300
},
{
"epoch": 0.16024279210925646,
"grad_norm": 1.3515625,
"learning_rate": 4.598434782608696e-06,
"loss": 0.2485,
"step": 2310
},
{
"epoch": 0.16093648384998915,
"grad_norm": 1.2578125,
"learning_rate": 4.596695652173914e-06,
"loss": 0.2485,
"step": 2320
},
{
"epoch": 0.16163017559072188,
"grad_norm": 1.8046875,
"learning_rate": 4.5949565217391305e-06,
"loss": 0.2586,
"step": 2330
},
{
"epoch": 0.1623238673314546,
"grad_norm": 1.0390625,
"learning_rate": 4.593217391304348e-06,
"loss": 0.1902,
"step": 2340
},
{
"epoch": 0.1630175590721873,
"grad_norm": 1.7734375,
"learning_rate": 4.591478260869566e-06,
"loss": 0.3126,
"step": 2350
},
{
"epoch": 0.16371125081292,
"grad_norm": 1.8828125,
"learning_rate": 4.5897391304347834e-06,
"loss": 0.285,
"step": 2360
},
{
"epoch": 0.16440494255365273,
"grad_norm": 1.1484375,
"learning_rate": 4.588e-06,
"loss": 0.2054,
"step": 2370
},
{
"epoch": 0.16509863429438543,
"grad_norm": 2.921875,
"learning_rate": 4.586260869565218e-06,
"loss": 0.2095,
"step": 2380
},
{
"epoch": 0.16579232603511815,
"grad_norm": 1.046875,
"learning_rate": 4.5845217391304355e-06,
"loss": 0.234,
"step": 2390
},
{
"epoch": 0.16648601777585087,
"grad_norm": 1.4765625,
"learning_rate": 4.582782608695652e-06,
"loss": 0.2567,
"step": 2400
},
{
"epoch": 0.16717970951658356,
"grad_norm": 0.94140625,
"learning_rate": 4.5810434782608694e-06,
"loss": 0.2976,
"step": 2410
},
{
"epoch": 0.16787340125731628,
"grad_norm": 1.3203125,
"learning_rate": 4.579304347826088e-06,
"loss": 0.2411,
"step": 2420
},
{
"epoch": 0.16856709299804898,
"grad_norm": 1.125,
"learning_rate": 4.577565217391305e-06,
"loss": 0.2256,
"step": 2430
},
{
"epoch": 0.1692607847387817,
"grad_norm": 1.1796875,
"learning_rate": 4.5758260869565215e-06,
"loss": 0.2514,
"step": 2440
},
{
"epoch": 0.16995447647951442,
"grad_norm": 1.0625,
"learning_rate": 4.57408695652174e-06,
"loss": 0.2754,
"step": 2450
},
{
"epoch": 0.17064816822024712,
"grad_norm": 1.2890625,
"learning_rate": 4.572347826086957e-06,
"loss": 0.2486,
"step": 2460
},
{
"epoch": 0.17134185996097984,
"grad_norm": 1.1015625,
"learning_rate": 4.5706086956521745e-06,
"loss": 0.2643,
"step": 2470
},
{
"epoch": 0.17203555170171256,
"grad_norm": 1.4453125,
"learning_rate": 4.568869565217391e-06,
"loss": 0.2401,
"step": 2480
},
{
"epoch": 0.17272924344244525,
"grad_norm": 1.3671875,
"learning_rate": 4.567130434782609e-06,
"loss": 0.2471,
"step": 2490
},
{
"epoch": 0.17342293518317797,
"grad_norm": 1.6015625,
"learning_rate": 4.565391304347827e-06,
"loss": 0.2813,
"step": 2500
},
{
"epoch": 0.1741166269239107,
"grad_norm": 2.109375,
"learning_rate": 4.563652173913044e-06,
"loss": 0.2199,
"step": 2510
},
{
"epoch": 0.1748103186646434,
"grad_norm": 1.0859375,
"learning_rate": 4.561913043478261e-06,
"loss": 0.2346,
"step": 2520
},
{
"epoch": 0.1755040104053761,
"grad_norm": 0.90234375,
"learning_rate": 4.560173913043479e-06,
"loss": 0.3302,
"step": 2530
},
{
"epoch": 0.17619770214610883,
"grad_norm": 1.234375,
"learning_rate": 4.558434782608696e-06,
"loss": 0.2683,
"step": 2540
},
{
"epoch": 0.17689139388684152,
"grad_norm": 2.34375,
"learning_rate": 4.5566956521739135e-06,
"loss": 0.2297,
"step": 2550
},
{
"epoch": 0.17758508562757425,
"grad_norm": 1.3203125,
"learning_rate": 4.554956521739131e-06,
"loss": 0.2285,
"step": 2560
},
{
"epoch": 0.17827877736830697,
"grad_norm": 1.1796875,
"learning_rate": 4.553217391304348e-06,
"loss": 0.2914,
"step": 2570
},
{
"epoch": 0.17897246910903966,
"grad_norm": 1.3359375,
"learning_rate": 4.551478260869566e-06,
"loss": 0.2983,
"step": 2580
},
{
"epoch": 0.17966616084977238,
"grad_norm": 1.1484375,
"learning_rate": 4.549739130434783e-06,
"loss": 0.2334,
"step": 2590
},
{
"epoch": 0.1803598525905051,
"grad_norm": 1.4609375,
"learning_rate": 4.548e-06,
"loss": 0.2318,
"step": 2600
},
{
"epoch": 0.1810535443312378,
"grad_norm": 1.4609375,
"learning_rate": 4.546260869565218e-06,
"loss": 0.2391,
"step": 2610
},
{
"epoch": 0.18174723607197052,
"grad_norm": 0.80859375,
"learning_rate": 4.544521739130435e-06,
"loss": 0.2697,
"step": 2620
},
{
"epoch": 0.18244092781270324,
"grad_norm": 1.0703125,
"learning_rate": 4.5427826086956524e-06,
"loss": 0.263,
"step": 2630
},
{
"epoch": 0.18313461955343593,
"grad_norm": 1.6015625,
"learning_rate": 4.54104347826087e-06,
"loss": 0.2569,
"step": 2640
},
{
"epoch": 0.18382831129416866,
"grad_norm": 1.8203125,
"learning_rate": 4.539304347826087e-06,
"loss": 0.2997,
"step": 2650
},
{
"epoch": 0.18452200303490138,
"grad_norm": 1.4765625,
"learning_rate": 4.5375652173913046e-06,
"loss": 0.2352,
"step": 2660
},
{
"epoch": 0.18521569477563407,
"grad_norm": 2.0625,
"learning_rate": 4.535826086956523e-06,
"loss": 0.2896,
"step": 2670
},
{
"epoch": 0.1859093865163668,
"grad_norm": 1.921875,
"learning_rate": 4.534086956521739e-06,
"loss": 0.2807,
"step": 2680
},
{
"epoch": 0.18660307825709951,
"grad_norm": 1.2578125,
"learning_rate": 4.532347826086957e-06,
"loss": 0.2258,
"step": 2690
},
{
"epoch": 0.1872967699978322,
"grad_norm": 0.91015625,
"learning_rate": 4.530608695652174e-06,
"loss": 0.2467,
"step": 2700
},
{
"epoch": 0.18799046173856493,
"grad_norm": 1.1796875,
"learning_rate": 4.528869565217391e-06,
"loss": 0.2375,
"step": 2710
},
{
"epoch": 0.18868415347929765,
"grad_norm": 1.0859375,
"learning_rate": 4.527130434782609e-06,
"loss": 0.2648,
"step": 2720
},
{
"epoch": 0.18937784522003034,
"grad_norm": 1.1640625,
"learning_rate": 4.525391304347826e-06,
"loss": 0.241,
"step": 2730
},
{
"epoch": 0.19007153696076307,
"grad_norm": 1.59375,
"learning_rate": 4.523652173913044e-06,
"loss": 0.2694,
"step": 2740
},
{
"epoch": 0.19076522870149576,
"grad_norm": 1.0390625,
"learning_rate": 4.521913043478261e-06,
"loss": 0.213,
"step": 2750
},
{
"epoch": 0.19145892044222848,
"grad_norm": 1.0546875,
"learning_rate": 4.520173913043478e-06,
"loss": 0.2127,
"step": 2760
},
{
"epoch": 0.1921526121829612,
"grad_norm": 1.4765625,
"learning_rate": 4.518434782608696e-06,
"loss": 0.26,
"step": 2770
},
{
"epoch": 0.1928463039236939,
"grad_norm": 1.28125,
"learning_rate": 4.516695652173914e-06,
"loss": 0.2297,
"step": 2780
},
{
"epoch": 0.19353999566442662,
"grad_norm": 1.40625,
"learning_rate": 4.51495652173913e-06,
"loss": 0.2087,
"step": 2790
},
{
"epoch": 0.19423368740515934,
"grad_norm": 1.1328125,
"learning_rate": 4.513217391304348e-06,
"loss": 0.2179,
"step": 2800
},
{
"epoch": 0.19492737914589203,
"grad_norm": 1.5625,
"learning_rate": 4.511478260869566e-06,
"loss": 0.2698,
"step": 2810
},
{
"epoch": 0.19562107088662475,
"grad_norm": 1.0625,
"learning_rate": 4.509739130434783e-06,
"loss": 0.2297,
"step": 2820
},
{
"epoch": 0.19631476262735748,
"grad_norm": 1.578125,
"learning_rate": 4.508e-06,
"loss": 0.2154,
"step": 2830
},
{
"epoch": 0.19700845436809017,
"grad_norm": 1.1171875,
"learning_rate": 4.506260869565218e-06,
"loss": 0.236,
"step": 2840
},
{
"epoch": 0.1977021461088229,
"grad_norm": 1.109375,
"learning_rate": 4.5045217391304355e-06,
"loss": 0.281,
"step": 2850
},
{
"epoch": 0.1983958378495556,
"grad_norm": 1.1171875,
"learning_rate": 4.502782608695653e-06,
"loss": 0.2373,
"step": 2860
},
{
"epoch": 0.1990895295902883,
"grad_norm": 1.375,
"learning_rate": 4.501043478260869e-06,
"loss": 0.2899,
"step": 2870
},
{
"epoch": 0.19978322133102103,
"grad_norm": 1.421875,
"learning_rate": 4.4993043478260876e-06,
"loss": 0.2459,
"step": 2880
},
{
"epoch": 0.20047691307175375,
"grad_norm": 1.1015625,
"learning_rate": 4.497565217391305e-06,
"loss": 0.2292,
"step": 2890
},
{
"epoch": 0.20117060481248644,
"grad_norm": 1.0546875,
"learning_rate": 4.495826086956522e-06,
"loss": 0.2465,
"step": 2900
},
{
"epoch": 0.20186429655321916,
"grad_norm": 1.3671875,
"learning_rate": 4.49408695652174e-06,
"loss": 0.2323,
"step": 2910
},
{
"epoch": 0.20255798829395188,
"grad_norm": 2.234375,
"learning_rate": 4.492347826086957e-06,
"loss": 0.3579,
"step": 2920
},
{
"epoch": 0.20325168003468458,
"grad_norm": 1.28125,
"learning_rate": 4.4906086956521744e-06,
"loss": 0.3302,
"step": 2930
},
{
"epoch": 0.2039453717754173,
"grad_norm": 1.4375,
"learning_rate": 4.488869565217392e-06,
"loss": 0.2271,
"step": 2940
},
{
"epoch": 0.20463906351615002,
"grad_norm": 1.1796875,
"learning_rate": 4.487130434782609e-06,
"loss": 0.2119,
"step": 2950
},
{
"epoch": 0.20533275525688272,
"grad_norm": 1.140625,
"learning_rate": 4.4853913043478265e-06,
"loss": 0.2446,
"step": 2960
},
{
"epoch": 0.20602644699761544,
"grad_norm": 0.9375,
"learning_rate": 4.483652173913044e-06,
"loss": 0.2289,
"step": 2970
},
{
"epoch": 0.20672013873834816,
"grad_norm": 1.1484375,
"learning_rate": 4.481913043478261e-06,
"loss": 0.2335,
"step": 2980
},
{
"epoch": 0.20741383047908085,
"grad_norm": 0.9609375,
"learning_rate": 4.480173913043479e-06,
"loss": 0.2208,
"step": 2990
},
{
"epoch": 0.20810752221981357,
"grad_norm": 1.5859375,
"learning_rate": 4.478434782608696e-06,
"loss": 0.2224,
"step": 3000
},
{
"epoch": 0.2088012139605463,
"grad_norm": 1.234375,
"learning_rate": 4.476695652173913e-06,
"loss": 0.2543,
"step": 3010
},
{
"epoch": 0.209494905701279,
"grad_norm": 1.171875,
"learning_rate": 4.474956521739131e-06,
"loss": 0.2274,
"step": 3020
},
{
"epoch": 0.2101885974420117,
"grad_norm": 1.3046875,
"learning_rate": 4.473217391304348e-06,
"loss": 0.2999,
"step": 3030
},
{
"epoch": 0.21088228918274443,
"grad_norm": 1.1640625,
"learning_rate": 4.4714782608695655e-06,
"loss": 0.2384,
"step": 3040
},
{
"epoch": 0.21157598092347712,
"grad_norm": 1.3984375,
"learning_rate": 4.469739130434783e-06,
"loss": 0.2618,
"step": 3050
},
{
"epoch": 0.21226967266420985,
"grad_norm": 1.453125,
"learning_rate": 4.468e-06,
"loss": 0.2545,
"step": 3060
},
{
"epoch": 0.21296336440494254,
"grad_norm": 1.1171875,
"learning_rate": 4.466260869565218e-06,
"loss": 0.2794,
"step": 3070
},
{
"epoch": 0.21365705614567526,
"grad_norm": 1.0234375,
"learning_rate": 4.464521739130435e-06,
"loss": 0.2537,
"step": 3080
},
{
"epoch": 0.21435074788640798,
"grad_norm": 1.4921875,
"learning_rate": 4.462782608695652e-06,
"loss": 0.2936,
"step": 3090
},
{
"epoch": 0.21504443962714068,
"grad_norm": 1.2890625,
"learning_rate": 4.46104347826087e-06,
"loss": 0.2446,
"step": 3100
},
{
"epoch": 0.2157381313678734,
"grad_norm": 1.171875,
"learning_rate": 4.459304347826087e-06,
"loss": 0.2443,
"step": 3110
},
{
"epoch": 0.21643182310860612,
"grad_norm": 1.2578125,
"learning_rate": 4.4575652173913045e-06,
"loss": 0.2338,
"step": 3120
},
{
"epoch": 0.2171255148493388,
"grad_norm": 1.0,
"learning_rate": 4.455826086956523e-06,
"loss": 0.2197,
"step": 3130
},
{
"epoch": 0.21781920659007153,
"grad_norm": 0.96875,
"learning_rate": 4.454086956521739e-06,
"loss": 0.2222,
"step": 3140
},
{
"epoch": 0.21851289833080426,
"grad_norm": 1.15625,
"learning_rate": 4.452347826086957e-06,
"loss": 0.2494,
"step": 3150
},
{
"epoch": 0.21920659007153695,
"grad_norm": 1.2890625,
"learning_rate": 4.450608695652174e-06,
"loss": 0.2529,
"step": 3160
},
{
"epoch": 0.21990028181226967,
"grad_norm": 0.9921875,
"learning_rate": 4.448869565217392e-06,
"loss": 0.185,
"step": 3170
},
{
"epoch": 0.2205939735530024,
"grad_norm": 1.6015625,
"learning_rate": 4.447130434782609e-06,
"loss": 0.2576,
"step": 3180
},
{
"epoch": 0.2212876652937351,
"grad_norm": 1.84375,
"learning_rate": 4.445391304347826e-06,
"loss": 0.2218,
"step": 3190
},
{
"epoch": 0.2219813570344678,
"grad_norm": 1.546875,
"learning_rate": 4.443652173913044e-06,
"loss": 0.2442,
"step": 3200
},
{
"epoch": 0.22267504877520053,
"grad_norm": 1.0859375,
"learning_rate": 4.441913043478262e-06,
"loss": 0.269,
"step": 3210
},
{
"epoch": 0.22336874051593322,
"grad_norm": 1.2578125,
"learning_rate": 4.440173913043478e-06,
"loss": 0.239,
"step": 3220
},
{
"epoch": 0.22406243225666594,
"grad_norm": 0.94140625,
"learning_rate": 4.4384347826086956e-06,
"loss": 0.2284,
"step": 3230
},
{
"epoch": 0.22475612399739867,
"grad_norm": 1.2265625,
"learning_rate": 4.436695652173914e-06,
"loss": 0.2405,
"step": 3240
},
{
"epoch": 0.22544981573813136,
"grad_norm": 1.5703125,
"learning_rate": 4.434956521739131e-06,
"loss": 0.2391,
"step": 3250
},
{
"epoch": 0.22614350747886408,
"grad_norm": 1.171875,
"learning_rate": 4.433217391304348e-06,
"loss": 0.2244,
"step": 3260
},
{
"epoch": 0.2268371992195968,
"grad_norm": 1.5,
"learning_rate": 4.431478260869566e-06,
"loss": 0.2313,
"step": 3270
},
{
"epoch": 0.2275308909603295,
"grad_norm": 1.71875,
"learning_rate": 4.429739130434783e-06,
"loss": 0.2855,
"step": 3280
},
{
"epoch": 0.22822458270106222,
"grad_norm": 1.4921875,
"learning_rate": 4.428000000000001e-06,
"loss": 0.2532,
"step": 3290
},
{
"epoch": 0.22891827444179494,
"grad_norm": 1.2890625,
"learning_rate": 4.426260869565218e-06,
"loss": 0.2226,
"step": 3300
},
{
"epoch": 0.22961196618252763,
"grad_norm": 1.3203125,
"learning_rate": 4.424521739130435e-06,
"loss": 0.2389,
"step": 3310
},
{
"epoch": 0.23030565792326035,
"grad_norm": 1.1484375,
"learning_rate": 4.422782608695653e-06,
"loss": 0.2067,
"step": 3320
},
{
"epoch": 0.23099934966399308,
"grad_norm": 1.2890625,
"learning_rate": 4.421043478260869e-06,
"loss": 0.1951,
"step": 3330
},
{
"epoch": 0.23169304140472577,
"grad_norm": 1.2265625,
"learning_rate": 4.4193043478260875e-06,
"loss": 0.2054,
"step": 3340
},
{
"epoch": 0.2323867331454585,
"grad_norm": 1.125,
"learning_rate": 4.417565217391305e-06,
"loss": 0.2557,
"step": 3350
},
{
"epoch": 0.2330804248861912,
"grad_norm": 1.203125,
"learning_rate": 4.415826086956522e-06,
"loss": 0.229,
"step": 3360
},
{
"epoch": 0.2337741166269239,
"grad_norm": 1.4921875,
"learning_rate": 4.41408695652174e-06,
"loss": 0.2895,
"step": 3370
},
{
"epoch": 0.23446780836765663,
"grad_norm": 1.0859375,
"learning_rate": 4.412347826086957e-06,
"loss": 0.299,
"step": 3380
},
{
"epoch": 0.23516150010838932,
"grad_norm": 1.1015625,
"learning_rate": 4.410608695652174e-06,
"loss": 0.2132,
"step": 3390
},
{
"epoch": 0.23585519184912204,
"grad_norm": 1.078125,
"learning_rate": 4.408869565217392e-06,
"loss": 0.2573,
"step": 3400
},
{
"epoch": 0.23654888358985476,
"grad_norm": 1.109375,
"learning_rate": 4.407130434782609e-06,
"loss": 0.2225,
"step": 3410
},
{
"epoch": 0.23724257533058746,
"grad_norm": 1.2109375,
"learning_rate": 4.4053913043478265e-06,
"loss": 0.2154,
"step": 3420
},
{
"epoch": 0.23793626707132018,
"grad_norm": 0.91015625,
"learning_rate": 4.403652173913044e-06,
"loss": 0.3088,
"step": 3430
},
{
"epoch": 0.2386299588120529,
"grad_norm": 1.9375,
"learning_rate": 4.401913043478261e-06,
"loss": 0.2877,
"step": 3440
},
{
"epoch": 0.2393236505527856,
"grad_norm": 1.984375,
"learning_rate": 4.4001739130434786e-06,
"loss": 0.2145,
"step": 3450
},
{
"epoch": 0.24001734229351832,
"grad_norm": 0.8515625,
"learning_rate": 4.398434782608696e-06,
"loss": 0.2338,
"step": 3460
},
{
"epoch": 0.24071103403425104,
"grad_norm": 1.328125,
"learning_rate": 4.396695652173913e-06,
"loss": 0.2488,
"step": 3470
},
{
"epoch": 0.24140472577498373,
"grad_norm": 1.0234375,
"learning_rate": 4.394956521739131e-06,
"loss": 0.2444,
"step": 3480
},
{
"epoch": 0.24209841751571645,
"grad_norm": 1.234375,
"learning_rate": 4.393217391304348e-06,
"loss": 0.2326,
"step": 3490
},
{
"epoch": 0.24279210925644917,
"grad_norm": 0.99609375,
"learning_rate": 4.3914782608695654e-06,
"loss": 0.2431,
"step": 3500
},
{
"epoch": 0.24348580099718187,
"grad_norm": 1.4375,
"learning_rate": 4.389739130434783e-06,
"loss": 0.2479,
"step": 3510
},
{
"epoch": 0.2441794927379146,
"grad_norm": 1.46875,
"learning_rate": 4.388e-06,
"loss": 0.2517,
"step": 3520
},
{
"epoch": 0.2448731844786473,
"grad_norm": 1.1640625,
"learning_rate": 4.3862608695652175e-06,
"loss": 0.2808,
"step": 3530
},
{
"epoch": 0.24556687621938,
"grad_norm": 1.1796875,
"learning_rate": 4.384521739130435e-06,
"loss": 0.2571,
"step": 3540
},
{
"epoch": 0.24626056796011273,
"grad_norm": 1.9921875,
"learning_rate": 4.382782608695652e-06,
"loss": 0.302,
"step": 3550
},
{
"epoch": 0.24695425970084545,
"grad_norm": 1.28125,
"learning_rate": 4.3810434782608705e-06,
"loss": 0.2128,
"step": 3560
},
{
"epoch": 0.24764795144157814,
"grad_norm": 0.96875,
"learning_rate": 4.379304347826087e-06,
"loss": 0.2285,
"step": 3570
},
{
"epoch": 0.24834164318231086,
"grad_norm": 1.3671875,
"learning_rate": 4.377565217391304e-06,
"loss": 0.2401,
"step": 3580
},
{
"epoch": 0.24903533492304358,
"grad_norm": 1.109375,
"learning_rate": 4.375826086956523e-06,
"loss": 0.2543,
"step": 3590
},
{
"epoch": 0.24972902666377628,
"grad_norm": 1.2109375,
"learning_rate": 4.37408695652174e-06,
"loss": 0.2598,
"step": 3600
},
{
"epoch": 0.25042271840450897,
"grad_norm": 1.34375,
"learning_rate": 4.3723478260869565e-06,
"loss": 0.3157,
"step": 3610
},
{
"epoch": 0.2511164101452417,
"grad_norm": 1.3671875,
"learning_rate": 4.370608695652174e-06,
"loss": 0.1943,
"step": 3620
},
{
"epoch": 0.2518101018859744,
"grad_norm": 1.1953125,
"learning_rate": 4.368869565217392e-06,
"loss": 0.2737,
"step": 3630
},
{
"epoch": 0.2525037936267071,
"grad_norm": 0.9375,
"learning_rate": 4.367130434782609e-06,
"loss": 0.2737,
"step": 3640
},
{
"epoch": 0.25319748536743986,
"grad_norm": 1.078125,
"learning_rate": 4.365391304347826e-06,
"loss": 0.2254,
"step": 3650
},
{
"epoch": 0.25389117710817255,
"grad_norm": 1.0859375,
"learning_rate": 4.363652173913044e-06,
"loss": 0.2407,
"step": 3660
},
{
"epoch": 0.25458486884890524,
"grad_norm": 1.453125,
"learning_rate": 4.361913043478262e-06,
"loss": 0.2414,
"step": 3670
},
{
"epoch": 0.255278560589638,
"grad_norm": 0.97265625,
"learning_rate": 4.360173913043478e-06,
"loss": 0.2899,
"step": 3680
},
{
"epoch": 0.2559722523303707,
"grad_norm": 1.125,
"learning_rate": 4.3584347826086955e-06,
"loss": 0.2452,
"step": 3690
},
{
"epoch": 0.2566659440711034,
"grad_norm": 1.2109375,
"learning_rate": 4.356695652173914e-06,
"loss": 0.2251,
"step": 3700
},
{
"epoch": 0.25735963581183613,
"grad_norm": 1.421875,
"learning_rate": 4.354956521739131e-06,
"loss": 0.2506,
"step": 3710
},
{
"epoch": 0.2580533275525688,
"grad_norm": 1.3125,
"learning_rate": 4.353217391304348e-06,
"loss": 0.2592,
"step": 3720
},
{
"epoch": 0.2587470192933015,
"grad_norm": 1.3984375,
"learning_rate": 4.351478260869566e-06,
"loss": 0.2523,
"step": 3730
},
{
"epoch": 0.25944071103403427,
"grad_norm": 1.3515625,
"learning_rate": 4.349739130434783e-06,
"loss": 0.2409,
"step": 3740
},
{
"epoch": 0.26013440277476696,
"grad_norm": 1.3359375,
"learning_rate": 4.3480000000000006e-06,
"loss": 0.253,
"step": 3750
},
{
"epoch": 0.26082809451549965,
"grad_norm": 1.8515625,
"learning_rate": 4.346260869565218e-06,
"loss": 0.2862,
"step": 3760
},
{
"epoch": 0.2615217862562324,
"grad_norm": 1.359375,
"learning_rate": 4.344521739130435e-06,
"loss": 0.2504,
"step": 3770
},
{
"epoch": 0.2622154779969651,
"grad_norm": 0.984375,
"learning_rate": 4.342782608695653e-06,
"loss": 0.2537,
"step": 3780
},
{
"epoch": 0.2629091697376978,
"grad_norm": 1.0078125,
"learning_rate": 4.34104347826087e-06,
"loss": 0.2099,
"step": 3790
},
{
"epoch": 0.26360286147843054,
"grad_norm": 1.0625,
"learning_rate": 4.339304347826087e-06,
"loss": 0.2753,
"step": 3800
},
{
"epoch": 0.26429655321916323,
"grad_norm": 1.125,
"learning_rate": 4.337565217391305e-06,
"loss": 0.2202,
"step": 3810
},
{
"epoch": 0.2649902449598959,
"grad_norm": 1.1484375,
"learning_rate": 4.335826086956522e-06,
"loss": 0.2382,
"step": 3820
},
{
"epoch": 0.2656839367006287,
"grad_norm": 1.09375,
"learning_rate": 4.3340869565217395e-06,
"loss": 0.2092,
"step": 3830
},
{
"epoch": 0.26637762844136137,
"grad_norm": 1.1328125,
"learning_rate": 4.332347826086957e-06,
"loss": 0.3343,
"step": 3840
},
{
"epoch": 0.26707132018209406,
"grad_norm": 1.265625,
"learning_rate": 4.330608695652174e-06,
"loss": 0.2324,
"step": 3850
},
{
"epoch": 0.2677650119228268,
"grad_norm": 1.1875,
"learning_rate": 4.328869565217392e-06,
"loss": 0.2864,
"step": 3860
},
{
"epoch": 0.2684587036635595,
"grad_norm": 1.46875,
"learning_rate": 4.327130434782609e-06,
"loss": 0.2883,
"step": 3870
},
{
"epoch": 0.2691523954042922,
"grad_norm": 1.046875,
"learning_rate": 4.325391304347826e-06,
"loss": 0.2376,
"step": 3880
},
{
"epoch": 0.26984608714502495,
"grad_norm": 1.5,
"learning_rate": 4.323652173913044e-06,
"loss": 0.2556,
"step": 3890
},
{
"epoch": 0.27053977888575764,
"grad_norm": 1.3359375,
"learning_rate": 4.321913043478261e-06,
"loss": 0.3276,
"step": 3900
},
{
"epoch": 0.27123347062649034,
"grad_norm": 1.375,
"learning_rate": 4.3201739130434785e-06,
"loss": 0.2472,
"step": 3910
},
{
"epoch": 0.2719271623672231,
"grad_norm": 1.1640625,
"learning_rate": 4.318434782608696e-06,
"loss": 0.2264,
"step": 3920
},
{
"epoch": 0.2726208541079558,
"grad_norm": 1.3515625,
"learning_rate": 4.316695652173913e-06,
"loss": 0.2494,
"step": 3930
},
{
"epoch": 0.2733145458486885,
"grad_norm": 1.59375,
"learning_rate": 4.314956521739131e-06,
"loss": 0.2339,
"step": 3940
},
{
"epoch": 0.2740082375894212,
"grad_norm": 0.89453125,
"learning_rate": 4.313217391304348e-06,
"loss": 0.2365,
"step": 3950
},
{
"epoch": 0.2747019293301539,
"grad_norm": 1.359375,
"learning_rate": 4.311478260869565e-06,
"loss": 0.2584,
"step": 3960
},
{
"epoch": 0.2753956210708866,
"grad_norm": 1.28125,
"learning_rate": 4.309739130434783e-06,
"loss": 0.2279,
"step": 3970
},
{
"epoch": 0.27608931281161936,
"grad_norm": 1.3984375,
"learning_rate": 4.308000000000001e-06,
"loss": 0.2542,
"step": 3980
},
{
"epoch": 0.27678300455235205,
"grad_norm": 1.15625,
"learning_rate": 4.3062608695652175e-06,
"loss": 0.2315,
"step": 3990
},
{
"epoch": 0.27747669629308475,
"grad_norm": 1.5,
"learning_rate": 4.304521739130435e-06,
"loss": 0.2557,
"step": 4000
},
{
"epoch": 0.2781703880338175,
"grad_norm": 0.90625,
"learning_rate": 4.302782608695652e-06,
"loss": 0.2569,
"step": 4010
},
{
"epoch": 0.2788640797745502,
"grad_norm": 1.1875,
"learning_rate": 4.30104347826087e-06,
"loss": 0.2823,
"step": 4020
},
{
"epoch": 0.2795577715152829,
"grad_norm": 0.9921875,
"learning_rate": 4.299304347826087e-06,
"loss": 0.2826,
"step": 4030
},
{
"epoch": 0.28025146325601563,
"grad_norm": 1.234375,
"learning_rate": 4.297565217391304e-06,
"loss": 0.2421,
"step": 4040
},
{
"epoch": 0.2809451549967483,
"grad_norm": 1.375,
"learning_rate": 4.2958260869565225e-06,
"loss": 0.236,
"step": 4050
},
{
"epoch": 0.281638846737481,
"grad_norm": 1.0625,
"learning_rate": 4.29408695652174e-06,
"loss": 0.2304,
"step": 4060
},
{
"epoch": 0.28233253847821377,
"grad_norm": 1.2578125,
"learning_rate": 4.2923478260869564e-06,
"loss": 0.2403,
"step": 4070
},
{
"epoch": 0.28302623021894646,
"grad_norm": 2.03125,
"learning_rate": 4.290608695652174e-06,
"loss": 0.3514,
"step": 4080
},
{
"epoch": 0.28371992195967916,
"grad_norm": 1.328125,
"learning_rate": 4.288869565217392e-06,
"loss": 0.2904,
"step": 4090
},
{
"epoch": 0.2844136137004119,
"grad_norm": 1.140625,
"learning_rate": 4.287130434782609e-06,
"loss": 0.2514,
"step": 4100
},
{
"epoch": 0.2851073054411446,
"grad_norm": 1.1953125,
"learning_rate": 4.285391304347826e-06,
"loss": 0.2677,
"step": 4110
},
{
"epoch": 0.2858009971818773,
"grad_norm": 1.46875,
"learning_rate": 4.283652173913044e-06,
"loss": 0.2288,
"step": 4120
},
{
"epoch": 0.28649468892261004,
"grad_norm": 1.328125,
"learning_rate": 4.2819130434782615e-06,
"loss": 0.2551,
"step": 4130
},
{
"epoch": 0.28718838066334273,
"grad_norm": 0.98828125,
"learning_rate": 4.280173913043479e-06,
"loss": 0.2538,
"step": 4140
},
{
"epoch": 0.28788207240407543,
"grad_norm": 1.3671875,
"learning_rate": 4.278434782608696e-06,
"loss": 0.2679,
"step": 4150
},
{
"epoch": 0.2885757641448082,
"grad_norm": 1.296875,
"learning_rate": 4.276695652173914e-06,
"loss": 0.2682,
"step": 4160
},
{
"epoch": 0.28926945588554087,
"grad_norm": 1.8046875,
"learning_rate": 4.274956521739131e-06,
"loss": 0.2981,
"step": 4170
},
{
"epoch": 0.28996314762627357,
"grad_norm": 2.015625,
"learning_rate": 4.273217391304348e-06,
"loss": 0.2792,
"step": 4180
},
{
"epoch": 0.29065683936700626,
"grad_norm": 1.234375,
"learning_rate": 4.271478260869566e-06,
"loss": 0.243,
"step": 4190
},
{
"epoch": 0.291350531107739,
"grad_norm": 1.078125,
"learning_rate": 4.269739130434783e-06,
"loss": 0.217,
"step": 4200
},
{
"epoch": 0.2920442228484717,
"grad_norm": 1.28125,
"learning_rate": 4.2680000000000005e-06,
"loss": 0.2287,
"step": 4210
},
{
"epoch": 0.2927379145892044,
"grad_norm": 1.265625,
"learning_rate": 4.266260869565218e-06,
"loss": 0.2609,
"step": 4220
},
{
"epoch": 0.29343160632993714,
"grad_norm": 1.4453125,
"learning_rate": 4.264521739130435e-06,
"loss": 0.2283,
"step": 4230
},
{
"epoch": 0.29412529807066984,
"grad_norm": 1.0078125,
"learning_rate": 4.262782608695653e-06,
"loss": 0.2076,
"step": 4240
},
{
"epoch": 0.29481898981140253,
"grad_norm": 1.6953125,
"learning_rate": 4.26104347826087e-06,
"loss": 0.2701,
"step": 4250
},
{
"epoch": 0.2955126815521353,
"grad_norm": 1.34375,
"learning_rate": 4.259304347826087e-06,
"loss": 0.2239,
"step": 4260
},
{
"epoch": 0.296206373292868,
"grad_norm": 1.015625,
"learning_rate": 4.257565217391305e-06,
"loss": 0.2263,
"step": 4270
},
{
"epoch": 0.29690006503360067,
"grad_norm": 1.0234375,
"learning_rate": 4.255826086956522e-06,
"loss": 0.2316,
"step": 4280
},
{
"epoch": 0.2975937567743334,
"grad_norm": 1.4375,
"learning_rate": 4.2540869565217394e-06,
"loss": 0.2467,
"step": 4290
},
{
"epoch": 0.2982874485150661,
"grad_norm": 1.359375,
"learning_rate": 4.252347826086957e-06,
"loss": 0.2514,
"step": 4300
},
{
"epoch": 0.2989811402557988,
"grad_norm": 1.1796875,
"learning_rate": 4.250608695652174e-06,
"loss": 0.257,
"step": 4310
},
{
"epoch": 0.29967483199653155,
"grad_norm": 1.453125,
"learning_rate": 4.2488695652173916e-06,
"loss": 0.2062,
"step": 4320
},
{
"epoch": 0.30036852373726425,
"grad_norm": 1.140625,
"learning_rate": 4.247130434782609e-06,
"loss": 0.197,
"step": 4330
},
{
"epoch": 0.30106221547799694,
"grad_norm": 0.9609375,
"learning_rate": 4.245391304347826e-06,
"loss": 0.2319,
"step": 4340
},
{
"epoch": 0.3017559072187297,
"grad_norm": 1.171875,
"learning_rate": 4.243652173913044e-06,
"loss": 0.2404,
"step": 4350
},
{
"epoch": 0.3024495989594624,
"grad_norm": 1.09375,
"learning_rate": 4.241913043478261e-06,
"loss": 0.2156,
"step": 4360
},
{
"epoch": 0.3031432907001951,
"grad_norm": 1.3046875,
"learning_rate": 4.240173913043478e-06,
"loss": 0.2299,
"step": 4370
},
{
"epoch": 0.3038369824409278,
"grad_norm": 1.375,
"learning_rate": 4.238434782608696e-06,
"loss": 0.2404,
"step": 4380
},
{
"epoch": 0.3045306741816605,
"grad_norm": 1.4140625,
"learning_rate": 4.236695652173913e-06,
"loss": 0.1988,
"step": 4390
},
{
"epoch": 0.3052243659223932,
"grad_norm": 0.87890625,
"learning_rate": 4.2349565217391305e-06,
"loss": 0.2215,
"step": 4400
},
{
"epoch": 0.30591805766312596,
"grad_norm": 1.0390625,
"learning_rate": 4.233217391304349e-06,
"loss": 0.1986,
"step": 4410
},
{
"epoch": 0.30661174940385866,
"grad_norm": 1.296875,
"learning_rate": 4.231478260869565e-06,
"loss": 0.2359,
"step": 4420
},
{
"epoch": 0.30730544114459135,
"grad_norm": 1.2109375,
"learning_rate": 4.229739130434783e-06,
"loss": 0.2575,
"step": 4430
},
{
"epoch": 0.3079991328853241,
"grad_norm": 1.2734375,
"learning_rate": 4.228000000000001e-06,
"loss": 0.2429,
"step": 4440
},
{
"epoch": 0.3086928246260568,
"grad_norm": 1.28125,
"learning_rate": 4.226260869565218e-06,
"loss": 0.2587,
"step": 4450
},
{
"epoch": 0.3093865163667895,
"grad_norm": 1.6171875,
"learning_rate": 4.224521739130435e-06,
"loss": 0.348,
"step": 4460
},
{
"epoch": 0.31008020810752224,
"grad_norm": 1.2265625,
"learning_rate": 4.222782608695652e-06,
"loss": 0.2771,
"step": 4470
},
{
"epoch": 0.31077389984825493,
"grad_norm": 1.1015625,
"learning_rate": 4.22104347826087e-06,
"loss": 0.2308,
"step": 4480
},
{
"epoch": 0.3114675915889876,
"grad_norm": 1.015625,
"learning_rate": 4.219304347826088e-06,
"loss": 0.2672,
"step": 4490
},
{
"epoch": 0.3121612833297204,
"grad_norm": 1.359375,
"learning_rate": 4.217565217391304e-06,
"loss": 0.2603,
"step": 4500
},
{
"epoch": 0.31285497507045307,
"grad_norm": 0.8671875,
"learning_rate": 4.2158260869565225e-06,
"loss": 0.2212,
"step": 4510
},
{
"epoch": 0.31354866681118576,
"grad_norm": 1.1875,
"learning_rate": 4.21408695652174e-06,
"loss": 0.2073,
"step": 4520
},
{
"epoch": 0.3142423585519185,
"grad_norm": 1.546875,
"learning_rate": 4.212347826086957e-06,
"loss": 0.2602,
"step": 4530
},
{
"epoch": 0.3149360502926512,
"grad_norm": 1.140625,
"learning_rate": 4.210608695652174e-06,
"loss": 0.2279,
"step": 4540
},
{
"epoch": 0.3156297420333839,
"grad_norm": 1.40625,
"learning_rate": 4.208869565217392e-06,
"loss": 0.2433,
"step": 4550
},
{
"epoch": 0.31632343377411665,
"grad_norm": 1.171875,
"learning_rate": 4.207130434782609e-06,
"loss": 0.3271,
"step": 4560
},
{
"epoch": 0.31701712551484934,
"grad_norm": 1.046875,
"learning_rate": 4.205391304347826e-06,
"loss": 0.3058,
"step": 4570
},
{
"epoch": 0.31771081725558203,
"grad_norm": 1.921875,
"learning_rate": 4.203652173913044e-06,
"loss": 0.2724,
"step": 4580
},
{
"epoch": 0.3184045089963148,
"grad_norm": 1.09375,
"learning_rate": 4.201913043478261e-06,
"loss": 0.2538,
"step": 4590
},
{
"epoch": 0.3190982007370475,
"grad_norm": 1.328125,
"learning_rate": 4.200173913043479e-06,
"loss": 0.2394,
"step": 4600
},
{
"epoch": 0.31979189247778017,
"grad_norm": 1.265625,
"learning_rate": 4.198434782608696e-06,
"loss": 0.2433,
"step": 4610
},
{
"epoch": 0.3204855842185129,
"grad_norm": 1.2890625,
"learning_rate": 4.1966956521739135e-06,
"loss": 0.2457,
"step": 4620
},
{
"epoch": 0.3211792759592456,
"grad_norm": 1.0390625,
"learning_rate": 4.194956521739131e-06,
"loss": 0.2291,
"step": 4630
},
{
"epoch": 0.3218729676999783,
"grad_norm": 1.40625,
"learning_rate": 4.193217391304348e-06,
"loss": 0.3308,
"step": 4640
},
{
"epoch": 0.32256665944071106,
"grad_norm": 1.234375,
"learning_rate": 4.191478260869566e-06,
"loss": 0.2558,
"step": 4650
},
{
"epoch": 0.32326035118144375,
"grad_norm": 1.2265625,
"learning_rate": 4.189739130434783e-06,
"loss": 0.308,
"step": 4660
},
{
"epoch": 0.32395404292217644,
"grad_norm": 1.578125,
"learning_rate": 4.188e-06,
"loss": 0.2524,
"step": 4670
},
{
"epoch": 0.3246477346629092,
"grad_norm": 1.125,
"learning_rate": 4.186260869565218e-06,
"loss": 0.2373,
"step": 4680
},
{
"epoch": 0.3253414264036419,
"grad_norm": 1.4609375,
"learning_rate": 4.184521739130435e-06,
"loss": 0.2355,
"step": 4690
},
{
"epoch": 0.3260351181443746,
"grad_norm": 1.0234375,
"learning_rate": 4.1827826086956525e-06,
"loss": 0.2094,
"step": 4700
},
{
"epoch": 0.32672880988510733,
"grad_norm": 1.1328125,
"learning_rate": 4.18104347826087e-06,
"loss": 0.2346,
"step": 4710
},
{
"epoch": 0.32742250162584,
"grad_norm": 1.109375,
"learning_rate": 4.179304347826087e-06,
"loss": 0.2531,
"step": 4720
},
{
"epoch": 0.3281161933665727,
"grad_norm": 1.6015625,
"learning_rate": 4.177565217391305e-06,
"loss": 0.2837,
"step": 4730
},
{
"epoch": 0.32880988510730547,
"grad_norm": 1.1484375,
"learning_rate": 4.175826086956522e-06,
"loss": 0.2844,
"step": 4740
},
{
"epoch": 0.32950357684803816,
"grad_norm": 1.25,
"learning_rate": 4.174086956521739e-06,
"loss": 0.2293,
"step": 4750
},
{
"epoch": 0.33019726858877085,
"grad_norm": 1.234375,
"learning_rate": 4.172347826086957e-06,
"loss": 0.2447,
"step": 4760
},
{
"epoch": 0.3308909603295036,
"grad_norm": 1.4453125,
"learning_rate": 4.170608695652174e-06,
"loss": 0.2422,
"step": 4770
},
{
"epoch": 0.3315846520702363,
"grad_norm": 1.2578125,
"learning_rate": 4.1688695652173915e-06,
"loss": 0.2318,
"step": 4780
},
{
"epoch": 0.332278343810969,
"grad_norm": 1.7734375,
"learning_rate": 4.167130434782609e-06,
"loss": 0.2632,
"step": 4790
},
{
"epoch": 0.33297203555170174,
"grad_norm": 0.91796875,
"learning_rate": 4.165391304347827e-06,
"loss": 0.2217,
"step": 4800
},
{
"epoch": 0.33366572729243443,
"grad_norm": 1.34375,
"learning_rate": 4.163652173913044e-06,
"loss": 0.2919,
"step": 4810
},
{
"epoch": 0.3343594190331671,
"grad_norm": 1.109375,
"learning_rate": 4.161913043478261e-06,
"loss": 0.2504,
"step": 4820
},
{
"epoch": 0.3350531107738999,
"grad_norm": 0.890625,
"learning_rate": 4.160173913043478e-06,
"loss": 0.2633,
"step": 4830
},
{
"epoch": 0.33574680251463257,
"grad_norm": 1.03125,
"learning_rate": 4.1584347826086965e-06,
"loss": 0.2394,
"step": 4840
},
{
"epoch": 0.33644049425536526,
"grad_norm": 1.1484375,
"learning_rate": 4.156695652173913e-06,
"loss": 0.2634,
"step": 4850
},
{
"epoch": 0.33713418599609796,
"grad_norm": 1.265625,
"learning_rate": 4.1549565217391304e-06,
"loss": 0.2994,
"step": 4860
},
{
"epoch": 0.3378278777368307,
"grad_norm": 1.0,
"learning_rate": 4.153217391304349e-06,
"loss": 0.2641,
"step": 4870
},
{
"epoch": 0.3385215694775634,
"grad_norm": 1.1484375,
"learning_rate": 4.151478260869565e-06,
"loss": 0.233,
"step": 4880
},
{
"epoch": 0.3392152612182961,
"grad_norm": 1.0703125,
"learning_rate": 4.1497391304347826e-06,
"loss": 0.2218,
"step": 4890
},
{
"epoch": 0.33990895295902884,
"grad_norm": 1.5546875,
"learning_rate": 4.148000000000001e-06,
"loss": 0.2219,
"step": 4900
},
{
"epoch": 0.34060264469976154,
"grad_norm": 1.1015625,
"learning_rate": 4.146260869565218e-06,
"loss": 0.2372,
"step": 4910
},
{
"epoch": 0.34129633644049423,
"grad_norm": 1.3828125,
"learning_rate": 4.144521739130435e-06,
"loss": 0.2405,
"step": 4920
},
{
"epoch": 0.341990028181227,
"grad_norm": 1.1640625,
"learning_rate": 4.142782608695652e-06,
"loss": 0.2585,
"step": 4930
},
{
"epoch": 0.3426837199219597,
"grad_norm": 1.09375,
"learning_rate": 4.14104347826087e-06,
"loss": 0.2515,
"step": 4940
},
{
"epoch": 0.34337741166269237,
"grad_norm": 1.09375,
"learning_rate": 4.139304347826088e-06,
"loss": 0.3092,
"step": 4950
},
{
"epoch": 0.3440711034034251,
"grad_norm": 1.28125,
"learning_rate": 4.137565217391304e-06,
"loss": 0.237,
"step": 4960
},
{
"epoch": 0.3447647951441578,
"grad_norm": 1.359375,
"learning_rate": 4.135826086956522e-06,
"loss": 0.2279,
"step": 4970
},
{
"epoch": 0.3454584868848905,
"grad_norm": 1.3203125,
"learning_rate": 4.13408695652174e-06,
"loss": 0.2481,
"step": 4980
},
{
"epoch": 0.34615217862562325,
"grad_norm": 1.046875,
"learning_rate": 4.132347826086957e-06,
"loss": 0.2375,
"step": 4990
},
{
"epoch": 0.34684587036635595,
"grad_norm": 1.3125,
"learning_rate": 4.130608695652174e-06,
"loss": 0.2408,
"step": 5000
},
{
"epoch": 0.34753956210708864,
"grad_norm": 1.203125,
"learning_rate": 4.128869565217392e-06,
"loss": 0.2326,
"step": 5010
},
{
"epoch": 0.3482332538478214,
"grad_norm": 1.0234375,
"learning_rate": 4.127130434782609e-06,
"loss": 0.2321,
"step": 5020
},
{
"epoch": 0.3489269455885541,
"grad_norm": 1.078125,
"learning_rate": 4.125391304347827e-06,
"loss": 0.2579,
"step": 5030
},
{
"epoch": 0.3496206373292868,
"grad_norm": 1.0390625,
"learning_rate": 4.123652173913044e-06,
"loss": 0.2407,
"step": 5040
},
{
"epoch": 0.3503143290700195,
"grad_norm": 1.53125,
"learning_rate": 4.121913043478261e-06,
"loss": 0.2519,
"step": 5050
},
{
"epoch": 0.3510080208107522,
"grad_norm": 1.1953125,
"learning_rate": 4.120173913043479e-06,
"loss": 0.2598,
"step": 5060
},
{
"epoch": 0.3517017125514849,
"grad_norm": 1.203125,
"learning_rate": 4.118434782608696e-06,
"loss": 0.2603,
"step": 5070
},
{
"epoch": 0.35239540429221766,
"grad_norm": 1.296875,
"learning_rate": 4.1166956521739135e-06,
"loss": 0.2179,
"step": 5080
},
{
"epoch": 0.35308909603295036,
"grad_norm": 0.98828125,
"learning_rate": 4.114956521739131e-06,
"loss": 0.2368,
"step": 5090
},
{
"epoch": 0.35378278777368305,
"grad_norm": 1.1015625,
"learning_rate": 4.113217391304348e-06,
"loss": 0.2639,
"step": 5100
},
{
"epoch": 0.3544764795144158,
"grad_norm": 1.03125,
"learning_rate": 4.1114782608695656e-06,
"loss": 0.224,
"step": 5110
},
{
"epoch": 0.3551701712551485,
"grad_norm": 1.2578125,
"learning_rate": 4.109739130434783e-06,
"loss": 0.2546,
"step": 5120
},
{
"epoch": 0.3558638629958812,
"grad_norm": 1.3671875,
"learning_rate": 4.108e-06,
"loss": 0.2738,
"step": 5130
},
{
"epoch": 0.35655755473661394,
"grad_norm": 1.125,
"learning_rate": 4.106260869565218e-06,
"loss": 0.2176,
"step": 5140
},
{
"epoch": 0.35725124647734663,
"grad_norm": 1.125,
"learning_rate": 4.104521739130435e-06,
"loss": 0.2345,
"step": 5150
},
{
"epoch": 0.3579449382180793,
"grad_norm": 1.25,
"learning_rate": 4.102782608695652e-06,
"loss": 0.2391,
"step": 5160
},
{
"epoch": 0.35863862995881207,
"grad_norm": 1.328125,
"learning_rate": 4.10104347826087e-06,
"loss": 0.2476,
"step": 5170
},
{
"epoch": 0.35933232169954477,
"grad_norm": 1.390625,
"learning_rate": 4.099304347826087e-06,
"loss": 0.2305,
"step": 5180
},
{
"epoch": 0.36002601344027746,
"grad_norm": 1.3984375,
"learning_rate": 4.0975652173913045e-06,
"loss": 0.2072,
"step": 5190
},
{
"epoch": 0.3607197051810102,
"grad_norm": 1.3828125,
"learning_rate": 4.095826086956522e-06,
"loss": 0.2452,
"step": 5200
},
{
"epoch": 0.3614133969217429,
"grad_norm": 1.453125,
"learning_rate": 4.094086956521739e-06,
"loss": 0.2279,
"step": 5210
},
{
"epoch": 0.3621070886624756,
"grad_norm": 0.8984375,
"learning_rate": 4.092347826086957e-06,
"loss": 0.2563,
"step": 5220
},
{
"epoch": 0.36280078040320834,
"grad_norm": 1.1953125,
"learning_rate": 4.090608695652174e-06,
"loss": 0.2403,
"step": 5230
},
{
"epoch": 0.36349447214394104,
"grad_norm": 1.4375,
"learning_rate": 4.088869565217391e-06,
"loss": 0.2423,
"step": 5240
},
{
"epoch": 0.36418816388467373,
"grad_norm": 1.0234375,
"learning_rate": 4.087130434782609e-06,
"loss": 0.2361,
"step": 5250
},
{
"epoch": 0.3648818556254065,
"grad_norm": 1.125,
"learning_rate": 4.085391304347827e-06,
"loss": 0.2578,
"step": 5260
},
{
"epoch": 0.3655755473661392,
"grad_norm": 1.5703125,
"learning_rate": 4.0836521739130435e-06,
"loss": 0.2747,
"step": 5270
},
{
"epoch": 0.36626923910687187,
"grad_norm": 0.91015625,
"learning_rate": 4.081913043478261e-06,
"loss": 0.2621,
"step": 5280
},
{
"epoch": 0.3669629308476046,
"grad_norm": 1.2265625,
"learning_rate": 4.080173913043478e-06,
"loss": 0.2146,
"step": 5290
},
{
"epoch": 0.3676566225883373,
"grad_norm": 1.2734375,
"learning_rate": 4.0784347826086965e-06,
"loss": 0.2239,
"step": 5300
},
{
"epoch": 0.36835031432907,
"grad_norm": 1.0078125,
"learning_rate": 4.076695652173913e-06,
"loss": 0.2409,
"step": 5310
},
{
"epoch": 0.36904400606980275,
"grad_norm": 1.5078125,
"learning_rate": 4.07495652173913e-06,
"loss": 0.2346,
"step": 5320
},
{
"epoch": 0.36973769781053545,
"grad_norm": 1.1796875,
"learning_rate": 4.073217391304349e-06,
"loss": 0.2461,
"step": 5330
},
{
"epoch": 0.37043138955126814,
"grad_norm": 1.328125,
"learning_rate": 4.071478260869566e-06,
"loss": 0.3231,
"step": 5340
},
{
"epoch": 0.3711250812920009,
"grad_norm": 1.0546875,
"learning_rate": 4.0697391304347825e-06,
"loss": 0.2363,
"step": 5350
},
{
"epoch": 0.3718187730327336,
"grad_norm": 1.21875,
"learning_rate": 4.068000000000001e-06,
"loss": 0.2485,
"step": 5360
},
{
"epoch": 0.3725124647734663,
"grad_norm": 1.3046875,
"learning_rate": 4.066260869565218e-06,
"loss": 0.2416,
"step": 5370
},
{
"epoch": 0.37320615651419903,
"grad_norm": 1.34375,
"learning_rate": 4.0645217391304354e-06,
"loss": 0.2117,
"step": 5380
},
{
"epoch": 0.3738998482549317,
"grad_norm": 1.1640625,
"learning_rate": 4.062782608695652e-06,
"loss": 0.2054,
"step": 5390
},
{
"epoch": 0.3745935399956644,
"grad_norm": 1.078125,
"learning_rate": 4.06104347826087e-06,
"loss": 0.216,
"step": 5400
},
{
"epoch": 0.37528723173639716,
"grad_norm": 1.0390625,
"learning_rate": 4.0593043478260875e-06,
"loss": 0.251,
"step": 5410
},
{
"epoch": 0.37598092347712986,
"grad_norm": 0.92578125,
"learning_rate": 4.057565217391305e-06,
"loss": 0.2377,
"step": 5420
},
{
"epoch": 0.37667461521786255,
"grad_norm": 1.2890625,
"learning_rate": 4.055826086956522e-06,
"loss": 0.2643,
"step": 5430
},
{
"epoch": 0.3773683069585953,
"grad_norm": 2.03125,
"learning_rate": 4.05408695652174e-06,
"loss": 0.3051,
"step": 5440
},
{
"epoch": 0.378061998699328,
"grad_norm": 0.96875,
"learning_rate": 4.052347826086957e-06,
"loss": 0.2471,
"step": 5450
},
{
"epoch": 0.3787556904400607,
"grad_norm": 1.015625,
"learning_rate": 4.050608695652174e-06,
"loss": 0.2804,
"step": 5460
},
{
"epoch": 0.37944938218079344,
"grad_norm": 1.078125,
"learning_rate": 4.048869565217392e-06,
"loss": 0.2892,
"step": 5470
},
{
"epoch": 0.38014307392152613,
"grad_norm": 1.015625,
"learning_rate": 4.047130434782609e-06,
"loss": 0.2323,
"step": 5480
},
{
"epoch": 0.3808367656622588,
"grad_norm": 1.0859375,
"learning_rate": 4.0453913043478265e-06,
"loss": 0.2362,
"step": 5490
},
{
"epoch": 0.3815304574029915,
"grad_norm": 0.96875,
"learning_rate": 4.043652173913044e-06,
"loss": 0.2262,
"step": 5500
},
{
"epoch": 0.38222414914372427,
"grad_norm": 1.1015625,
"learning_rate": 4.041913043478261e-06,
"loss": 0.2667,
"step": 5510
},
{
"epoch": 0.38291784088445696,
"grad_norm": 1.03125,
"learning_rate": 4.040173913043479e-06,
"loss": 0.2642,
"step": 5520
},
{
"epoch": 0.38361153262518966,
"grad_norm": 1.4140625,
"learning_rate": 4.038434782608696e-06,
"loss": 0.2579,
"step": 5530
},
{
"epoch": 0.3843052243659224,
"grad_norm": 0.921875,
"learning_rate": 4.036695652173913e-06,
"loss": 0.2491,
"step": 5540
},
{
"epoch": 0.3849989161066551,
"grad_norm": 1.7890625,
"learning_rate": 4.034956521739131e-06,
"loss": 0.2521,
"step": 5550
},
{
"epoch": 0.3856926078473878,
"grad_norm": 1.9453125,
"learning_rate": 4.033217391304348e-06,
"loss": 0.3052,
"step": 5560
},
{
"epoch": 0.38638629958812054,
"grad_norm": 1.1171875,
"learning_rate": 4.0314782608695655e-06,
"loss": 0.2403,
"step": 5570
},
{
"epoch": 0.38707999132885323,
"grad_norm": 1.1328125,
"learning_rate": 4.029739130434783e-06,
"loss": 0.2278,
"step": 5580
},
{
"epoch": 0.38777368306958593,
"grad_norm": 1.15625,
"learning_rate": 4.028e-06,
"loss": 0.2863,
"step": 5590
},
{
"epoch": 0.3884673748103187,
"grad_norm": 1.46875,
"learning_rate": 4.026260869565218e-06,
"loss": 0.2414,
"step": 5600
},
{
"epoch": 0.38916106655105137,
"grad_norm": 1.4375,
"learning_rate": 4.024521739130435e-06,
"loss": 0.2932,
"step": 5610
},
{
"epoch": 0.38985475829178406,
"grad_norm": 1.7421875,
"learning_rate": 4.022782608695652e-06,
"loss": 0.2212,
"step": 5620
},
{
"epoch": 0.3905484500325168,
"grad_norm": 1.28125,
"learning_rate": 4.02104347826087e-06,
"loss": 0.2225,
"step": 5630
},
{
"epoch": 0.3912421417732495,
"grad_norm": 0.9296875,
"learning_rate": 4.019304347826087e-06,
"loss": 0.2289,
"step": 5640
},
{
"epoch": 0.3919358335139822,
"grad_norm": 1.25,
"learning_rate": 4.017565217391305e-06,
"loss": 0.3125,
"step": 5650
},
{
"epoch": 0.39262952525471495,
"grad_norm": 1.7578125,
"learning_rate": 4.015826086956522e-06,
"loss": 0.278,
"step": 5660
},
{
"epoch": 0.39332321699544764,
"grad_norm": 1.1875,
"learning_rate": 4.014086956521739e-06,
"loss": 0.2555,
"step": 5670
},
{
"epoch": 0.39401690873618034,
"grad_norm": 1.1171875,
"learning_rate": 4.0123478260869566e-06,
"loss": 0.2313,
"step": 5680
},
{
"epoch": 0.3947106004769131,
"grad_norm": 1.1953125,
"learning_rate": 4.010608695652175e-06,
"loss": 0.2275,
"step": 5690
},
{
"epoch": 0.3954042922176458,
"grad_norm": 1.0390625,
"learning_rate": 4.008869565217391e-06,
"loss": 0.219,
"step": 5700
},
{
"epoch": 0.3960979839583785,
"grad_norm": 1.21875,
"learning_rate": 4.007130434782609e-06,
"loss": 0.2823,
"step": 5710
},
{
"epoch": 0.3967916756991112,
"grad_norm": 1.4296875,
"learning_rate": 4.005391304347827e-06,
"loss": 0.2273,
"step": 5720
},
{
"epoch": 0.3974853674398439,
"grad_norm": 1.0234375,
"learning_rate": 4.003652173913044e-06,
"loss": 0.2356,
"step": 5730
},
{
"epoch": 0.3981790591805766,
"grad_norm": 1.0859375,
"learning_rate": 4.001913043478261e-06,
"loss": 0.2267,
"step": 5740
},
{
"epoch": 0.39887275092130936,
"grad_norm": 1.203125,
"learning_rate": 4.000173913043478e-06,
"loss": 0.2469,
"step": 5750
},
{
"epoch": 0.39956644266204205,
"grad_norm": 1.3515625,
"learning_rate": 3.998434782608696e-06,
"loss": 0.2549,
"step": 5760
},
{
"epoch": 0.40026013440277475,
"grad_norm": 0.7578125,
"learning_rate": 3.996695652173914e-06,
"loss": 0.23,
"step": 5770
},
{
"epoch": 0.4009538261435075,
"grad_norm": 1.2734375,
"learning_rate": 3.99495652173913e-06,
"loss": 0.2393,
"step": 5780
},
{
"epoch": 0.4016475178842402,
"grad_norm": 1.328125,
"learning_rate": 3.9932173913043485e-06,
"loss": 0.3191,
"step": 5790
},
{
"epoch": 0.4023412096249729,
"grad_norm": 1.09375,
"learning_rate": 3.991478260869566e-06,
"loss": 0.2143,
"step": 5800
},
{
"epoch": 0.40303490136570563,
"grad_norm": 1.4296875,
"learning_rate": 3.989739130434782e-06,
"loss": 0.2801,
"step": 5810
},
{
"epoch": 0.4037285931064383,
"grad_norm": 1.4140625,
"learning_rate": 3.988000000000001e-06,
"loss": 0.2921,
"step": 5820
},
{
"epoch": 0.404422284847171,
"grad_norm": 1.2265625,
"learning_rate": 3.986260869565218e-06,
"loss": 0.3027,
"step": 5830
},
{
"epoch": 0.40511597658790377,
"grad_norm": 1.53125,
"learning_rate": 3.984521739130435e-06,
"loss": 0.2357,
"step": 5840
},
{
"epoch": 0.40580966832863646,
"grad_norm": 0.99609375,
"learning_rate": 3.982782608695652e-06,
"loss": 0.2426,
"step": 5850
},
{
"epoch": 0.40650336006936916,
"grad_norm": 1.3515625,
"learning_rate": 3.98104347826087e-06,
"loss": 0.2232,
"step": 5860
},
{
"epoch": 0.4071970518101019,
"grad_norm": 1.296875,
"learning_rate": 3.9793043478260875e-06,
"loss": 0.2597,
"step": 5870
},
{
"epoch": 0.4078907435508346,
"grad_norm": 1.03125,
"learning_rate": 3.977565217391305e-06,
"loss": 0.2848,
"step": 5880
},
{
"epoch": 0.4085844352915673,
"grad_norm": 1.3203125,
"learning_rate": 3.975826086956522e-06,
"loss": 0.2033,
"step": 5890
},
{
"epoch": 0.40927812703230004,
"grad_norm": 1.2578125,
"learning_rate": 3.97408695652174e-06,
"loss": 0.2851,
"step": 5900
},
{
"epoch": 0.40997181877303274,
"grad_norm": 1.078125,
"learning_rate": 3.972347826086957e-06,
"loss": 0.2773,
"step": 5910
},
{
"epoch": 0.41066551051376543,
"grad_norm": 1.375,
"learning_rate": 3.970608695652174e-06,
"loss": 0.3003,
"step": 5920
},
{
"epoch": 0.4113592022544982,
"grad_norm": 1.328125,
"learning_rate": 3.968869565217392e-06,
"loss": 0.2621,
"step": 5930
},
{
"epoch": 0.4120528939952309,
"grad_norm": 1.2890625,
"learning_rate": 3.967130434782609e-06,
"loss": 0.2287,
"step": 5940
},
{
"epoch": 0.41274658573596357,
"grad_norm": 1.1640625,
"learning_rate": 3.9653913043478264e-06,
"loss": 0.2912,
"step": 5950
},
{
"epoch": 0.4134402774766963,
"grad_norm": 1.1171875,
"learning_rate": 3.963652173913044e-06,
"loss": 0.1962,
"step": 5960
},
{
"epoch": 0.414133969217429,
"grad_norm": 0.88671875,
"learning_rate": 3.961913043478261e-06,
"loss": 0.2444,
"step": 5970
},
{
"epoch": 0.4148276609581617,
"grad_norm": 1.1328125,
"learning_rate": 3.9601739130434785e-06,
"loss": 0.2362,
"step": 5980
},
{
"epoch": 0.41552135269889445,
"grad_norm": 1.15625,
"learning_rate": 3.958434782608696e-06,
"loss": 0.2421,
"step": 5990
},
{
"epoch": 0.41621504443962715,
"grad_norm": 1.0390625,
"learning_rate": 3.956695652173913e-06,
"loss": 0.2565,
"step": 6000
},
{
"epoch": 0.41690873618035984,
"grad_norm": 1.53125,
"learning_rate": 3.954956521739131e-06,
"loss": 0.2551,
"step": 6010
},
{
"epoch": 0.4176024279210926,
"grad_norm": 1.234375,
"learning_rate": 3.953217391304348e-06,
"loss": 0.2542,
"step": 6020
},
{
"epoch": 0.4182961196618253,
"grad_norm": 1.2421875,
"learning_rate": 3.951478260869565e-06,
"loss": 0.2514,
"step": 6030
},
{
"epoch": 0.418989811402558,
"grad_norm": 1.6875,
"learning_rate": 3.949739130434783e-06,
"loss": 0.2156,
"step": 6040
},
{
"epoch": 0.4196835031432907,
"grad_norm": 0.96484375,
"learning_rate": 3.948e-06,
"loss": 0.2041,
"step": 6050
},
{
"epoch": 0.4203771948840234,
"grad_norm": 1.5859375,
"learning_rate": 3.9462608695652175e-06,
"loss": 0.245,
"step": 6060
},
{
"epoch": 0.4210708866247561,
"grad_norm": 1.125,
"learning_rate": 3.944521739130435e-06,
"loss": 0.2536,
"step": 6070
},
{
"epoch": 0.42176457836548886,
"grad_norm": 1.28125,
"learning_rate": 3.942782608695653e-06,
"loss": 0.2117,
"step": 6080
},
{
"epoch": 0.42245827010622156,
"grad_norm": 1.359375,
"learning_rate": 3.94104347826087e-06,
"loss": 0.2369,
"step": 6090
},
{
"epoch": 0.42315196184695425,
"grad_norm": 1.484375,
"learning_rate": 3.939304347826087e-06,
"loss": 0.2449,
"step": 6100
},
{
"epoch": 0.423845653587687,
"grad_norm": 1.4921875,
"learning_rate": 3.937565217391305e-06,
"loss": 0.3172,
"step": 6110
},
{
"epoch": 0.4245393453284197,
"grad_norm": 0.984375,
"learning_rate": 3.935826086956522e-06,
"loss": 0.2349,
"step": 6120
},
{
"epoch": 0.4252330370691524,
"grad_norm": 1.09375,
"learning_rate": 3.934086956521739e-06,
"loss": 0.2547,
"step": 6130
},
{
"epoch": 0.4259267288098851,
"grad_norm": 1.125,
"learning_rate": 3.9323478260869565e-06,
"loss": 0.2688,
"step": 6140
},
{
"epoch": 0.42662042055061783,
"grad_norm": 1.46875,
"learning_rate": 3.930608695652175e-06,
"loss": 0.3595,
"step": 6150
},
{
"epoch": 0.4273141122913505,
"grad_norm": 1.125,
"learning_rate": 3.928869565217391e-06,
"loss": 0.2317,
"step": 6160
},
{
"epoch": 0.4280078040320832,
"grad_norm": 1.6328125,
"learning_rate": 3.927130434782609e-06,
"loss": 0.2359,
"step": 6170
},
{
"epoch": 0.42870149577281597,
"grad_norm": 1.125,
"learning_rate": 3.925391304347827e-06,
"loss": 0.2333,
"step": 6180
},
{
"epoch": 0.42939518751354866,
"grad_norm": 1.1328125,
"learning_rate": 3.923652173913044e-06,
"loss": 0.2054,
"step": 6190
},
{
"epoch": 0.43008887925428135,
"grad_norm": 1.40625,
"learning_rate": 3.921913043478261e-06,
"loss": 0.2491,
"step": 6200
},
{
"epoch": 0.4307825709950141,
"grad_norm": 1.328125,
"learning_rate": 3.920173913043478e-06,
"loss": 0.2249,
"step": 6210
},
{
"epoch": 0.4314762627357468,
"grad_norm": 1.078125,
"learning_rate": 3.918434782608696e-06,
"loss": 0.2962,
"step": 6220
},
{
"epoch": 0.4321699544764795,
"grad_norm": 1.59375,
"learning_rate": 3.916695652173914e-06,
"loss": 0.3596,
"step": 6230
},
{
"epoch": 0.43286364621721224,
"grad_norm": 1.28125,
"learning_rate": 3.91495652173913e-06,
"loss": 0.2359,
"step": 6240
},
{
"epoch": 0.43355733795794493,
"grad_norm": 1.4140625,
"learning_rate": 3.913217391304348e-06,
"loss": 0.3381,
"step": 6250
},
{
"epoch": 0.4342510296986776,
"grad_norm": 1.171875,
"learning_rate": 3.911478260869566e-06,
"loss": 0.24,
"step": 6260
},
{
"epoch": 0.4349447214394104,
"grad_norm": 1.21875,
"learning_rate": 3.909739130434783e-06,
"loss": 0.2566,
"step": 6270
},
{
"epoch": 0.43563841318014307,
"grad_norm": 1.359375,
"learning_rate": 3.9080000000000005e-06,
"loss": 0.2599,
"step": 6280
},
{
"epoch": 0.43633210492087576,
"grad_norm": 1.234375,
"learning_rate": 3.906260869565218e-06,
"loss": 0.2257,
"step": 6290
},
{
"epoch": 0.4370257966616085,
"grad_norm": 1.078125,
"learning_rate": 3.904521739130435e-06,
"loss": 0.229,
"step": 6300
},
{
"epoch": 0.4377194884023412,
"grad_norm": 1.2109375,
"learning_rate": 3.902782608695653e-06,
"loss": 0.2464,
"step": 6310
},
{
"epoch": 0.4384131801430739,
"grad_norm": 1.34375,
"learning_rate": 3.90104347826087e-06,
"loss": 0.2392,
"step": 6320
},
{
"epoch": 0.43910687188380665,
"grad_norm": 1.6015625,
"learning_rate": 3.899304347826087e-06,
"loss": 0.2661,
"step": 6330
},
{
"epoch": 0.43980056362453934,
"grad_norm": 0.94140625,
"learning_rate": 3.897565217391305e-06,
"loss": 0.2971,
"step": 6340
},
{
"epoch": 0.44049425536527204,
"grad_norm": 1.2421875,
"learning_rate": 3.895826086956522e-06,
"loss": 0.2578,
"step": 6350
},
{
"epoch": 0.4411879471060048,
"grad_norm": 1.125,
"learning_rate": 3.8940869565217395e-06,
"loss": 0.2107,
"step": 6360
},
{
"epoch": 0.4418816388467375,
"grad_norm": 1.1640625,
"learning_rate": 3.892347826086957e-06,
"loss": 0.213,
"step": 6370
},
{
"epoch": 0.4425753305874702,
"grad_norm": 1.4375,
"learning_rate": 3.890608695652174e-06,
"loss": 0.3,
"step": 6380
},
{
"epoch": 0.4432690223282029,
"grad_norm": 1.21875,
"learning_rate": 3.888869565217392e-06,
"loss": 0.2388,
"step": 6390
},
{
"epoch": 0.4439627140689356,
"grad_norm": 1.0859375,
"learning_rate": 3.887130434782609e-06,
"loss": 0.2557,
"step": 6400
},
{
"epoch": 0.4446564058096683,
"grad_norm": 0.71484375,
"learning_rate": 3.885391304347826e-06,
"loss": 0.2638,
"step": 6410
},
{
"epoch": 0.44535009755040106,
"grad_norm": 1.5234375,
"learning_rate": 3.883652173913044e-06,
"loss": 0.2884,
"step": 6420
},
{
"epoch": 0.44604378929113375,
"grad_norm": 1.2265625,
"learning_rate": 3.881913043478261e-06,
"loss": 0.2356,
"step": 6430
},
{
"epoch": 0.44673748103186645,
"grad_norm": 1.65625,
"learning_rate": 3.8801739130434785e-06,
"loss": 0.2654,
"step": 6440
},
{
"epoch": 0.4474311727725992,
"grad_norm": 1.0390625,
"learning_rate": 3.878434782608696e-06,
"loss": 0.2445,
"step": 6450
},
{
"epoch": 0.4481248645133319,
"grad_norm": 1.1796875,
"learning_rate": 3.876695652173913e-06,
"loss": 0.2363,
"step": 6460
},
{
"epoch": 0.4488185562540646,
"grad_norm": 1.234375,
"learning_rate": 3.874956521739131e-06,
"loss": 0.2402,
"step": 6470
},
{
"epoch": 0.44951224799479733,
"grad_norm": 1.0546875,
"learning_rate": 3.873217391304348e-06,
"loss": 0.2362,
"step": 6480
},
{
"epoch": 0.45020593973553,
"grad_norm": 1.2890625,
"learning_rate": 3.871478260869565e-06,
"loss": 0.3399,
"step": 6490
},
{
"epoch": 0.4508996314762627,
"grad_norm": 1.2421875,
"learning_rate": 3.869739130434783e-06,
"loss": 0.2556,
"step": 6500
},
{
"epoch": 0.45159332321699547,
"grad_norm": 1.0390625,
"learning_rate": 3.868e-06,
"loss": 0.2391,
"step": 6510
},
{
"epoch": 0.45228701495772816,
"grad_norm": 1.1875,
"learning_rate": 3.8662608695652174e-06,
"loss": 0.315,
"step": 6520
},
{
"epoch": 0.45298070669846086,
"grad_norm": 1.5546875,
"learning_rate": 3.864521739130435e-06,
"loss": 0.2522,
"step": 6530
},
{
"epoch": 0.4536743984391936,
"grad_norm": 1.2109375,
"learning_rate": 3.862782608695653e-06,
"loss": 0.2416,
"step": 6540
},
{
"epoch": 0.4543680901799263,
"grad_norm": 1.328125,
"learning_rate": 3.8610434782608696e-06,
"loss": 0.2644,
"step": 6550
},
{
"epoch": 0.455061781920659,
"grad_norm": 1.140625,
"learning_rate": 3.859304347826087e-06,
"loss": 0.2374,
"step": 6560
},
{
"epoch": 0.45575547366139174,
"grad_norm": 1.375,
"learning_rate": 3.857565217391305e-06,
"loss": 0.2737,
"step": 6570
},
{
"epoch": 0.45644916540212443,
"grad_norm": 1.015625,
"learning_rate": 3.8558260869565225e-06,
"loss": 0.2772,
"step": 6580
},
{
"epoch": 0.45714285714285713,
"grad_norm": 1.2109375,
"learning_rate": 3.854086956521739e-06,
"loss": 0.2583,
"step": 6590
},
{
"epoch": 0.4578365488835899,
"grad_norm": 1.546875,
"learning_rate": 3.852347826086956e-06,
"loss": 0.2847,
"step": 6600
},
{
"epoch": 0.45853024062432257,
"grad_norm": 1.1875,
"learning_rate": 3.850608695652175e-06,
"loss": 0.213,
"step": 6610
},
{
"epoch": 0.45922393236505527,
"grad_norm": 0.98046875,
"learning_rate": 3.848869565217392e-06,
"loss": 0.2157,
"step": 6620
},
{
"epoch": 0.459917624105788,
"grad_norm": 1.375,
"learning_rate": 3.8471304347826085e-06,
"loss": 0.2696,
"step": 6630
},
{
"epoch": 0.4606113158465207,
"grad_norm": 1.046875,
"learning_rate": 3.845391304347827e-06,
"loss": 0.2729,
"step": 6640
},
{
"epoch": 0.4613050075872534,
"grad_norm": 1.1796875,
"learning_rate": 3.843652173913044e-06,
"loss": 0.2258,
"step": 6650
},
{
"epoch": 0.46199869932798615,
"grad_norm": 1.125,
"learning_rate": 3.8419130434782615e-06,
"loss": 0.2469,
"step": 6660
},
{
"epoch": 0.46269239106871884,
"grad_norm": 0.94140625,
"learning_rate": 3.840173913043478e-06,
"loss": 0.2038,
"step": 6670
},
{
"epoch": 0.46338608280945154,
"grad_norm": 1.1171875,
"learning_rate": 3.838434782608696e-06,
"loss": 0.315,
"step": 6680
},
{
"epoch": 0.4640797745501843,
"grad_norm": 1.453125,
"learning_rate": 3.836695652173914e-06,
"loss": 0.2534,
"step": 6690
},
{
"epoch": 0.464773466290917,
"grad_norm": 1.265625,
"learning_rate": 3.834956521739131e-06,
"loss": 0.2372,
"step": 6700
},
{
"epoch": 0.4654671580316497,
"grad_norm": 1.234375,
"learning_rate": 3.833217391304348e-06,
"loss": 0.246,
"step": 6710
},
{
"epoch": 0.4661608497723824,
"grad_norm": 1.1171875,
"learning_rate": 3.831478260869566e-06,
"loss": 0.2586,
"step": 6720
},
{
"epoch": 0.4668545415131151,
"grad_norm": 1.25,
"learning_rate": 3.829739130434783e-06,
"loss": 0.2605,
"step": 6730
},
{
"epoch": 0.4675482332538478,
"grad_norm": 1.5625,
"learning_rate": 3.8280000000000004e-06,
"loss": 0.2853,
"step": 6740
},
{
"epoch": 0.46824192499458056,
"grad_norm": 1.375,
"learning_rate": 3.826260869565218e-06,
"loss": 0.2116,
"step": 6750
},
{
"epoch": 0.46893561673531325,
"grad_norm": 1.09375,
"learning_rate": 3.824521739130435e-06,
"loss": 0.3067,
"step": 6760
},
{
"epoch": 0.46962930847604595,
"grad_norm": 1.1953125,
"learning_rate": 3.8227826086956526e-06,
"loss": 0.2128,
"step": 6770
},
{
"epoch": 0.47032300021677864,
"grad_norm": 0.85546875,
"learning_rate": 3.82104347826087e-06,
"loss": 0.2067,
"step": 6780
},
{
"epoch": 0.4710166919575114,
"grad_norm": 1.296875,
"learning_rate": 3.819304347826087e-06,
"loss": 0.2094,
"step": 6790
},
{
"epoch": 0.4717103836982441,
"grad_norm": 1.4140625,
"learning_rate": 3.817565217391305e-06,
"loss": 0.2455,
"step": 6800
},
{
"epoch": 0.4724040754389768,
"grad_norm": 1.1796875,
"learning_rate": 3.815826086956522e-06,
"loss": 0.2471,
"step": 6810
},
{
"epoch": 0.4730977671797095,
"grad_norm": 1.5546875,
"learning_rate": 3.8140869565217394e-06,
"loss": 0.2839,
"step": 6820
},
{
"epoch": 0.4737914589204422,
"grad_norm": 1.5703125,
"learning_rate": 3.812347826086957e-06,
"loss": 0.2218,
"step": 6830
},
{
"epoch": 0.4744851506611749,
"grad_norm": 1.1875,
"learning_rate": 3.810608695652174e-06,
"loss": 0.2793,
"step": 6840
},
{
"epoch": 0.47517884240190766,
"grad_norm": 1.9453125,
"learning_rate": 3.808869565217392e-06,
"loss": 0.3007,
"step": 6850
},
{
"epoch": 0.47587253414264036,
"grad_norm": 0.99609375,
"learning_rate": 3.807130434782609e-06,
"loss": 0.1971,
"step": 6860
},
{
"epoch": 0.47656622588337305,
"grad_norm": 0.89453125,
"learning_rate": 3.8053913043478263e-06,
"loss": 0.2195,
"step": 6870
},
{
"epoch": 0.4772599176241058,
"grad_norm": 1.140625,
"learning_rate": 3.803652173913044e-06,
"loss": 0.2397,
"step": 6880
},
{
"epoch": 0.4779536093648385,
"grad_norm": 1.5625,
"learning_rate": 3.8019130434782614e-06,
"loss": 0.2547,
"step": 6890
},
{
"epoch": 0.4786473011055712,
"grad_norm": 1.1015625,
"learning_rate": 3.8001739130434784e-06,
"loss": 0.2687,
"step": 6900
},
{
"epoch": 0.47934099284630394,
"grad_norm": 1.46875,
"learning_rate": 3.7984347826086958e-06,
"loss": 0.2853,
"step": 6910
},
{
"epoch": 0.48003468458703663,
"grad_norm": 1.140625,
"learning_rate": 3.7966956521739136e-06,
"loss": 0.2279,
"step": 6920
},
{
"epoch": 0.4807283763277693,
"grad_norm": 1.5078125,
"learning_rate": 3.794956521739131e-06,
"loss": 0.2167,
"step": 6930
},
{
"epoch": 0.4814220680685021,
"grad_norm": 1.1875,
"learning_rate": 3.793217391304348e-06,
"loss": 0.3339,
"step": 6940
},
{
"epoch": 0.48211575980923477,
"grad_norm": 1.46875,
"learning_rate": 3.7914782608695657e-06,
"loss": 0.2604,
"step": 6950
},
{
"epoch": 0.48280945154996746,
"grad_norm": 1.0078125,
"learning_rate": 3.789739130434783e-06,
"loss": 0.2045,
"step": 6960
},
{
"epoch": 0.4835031432907002,
"grad_norm": 1.40625,
"learning_rate": 3.7880000000000004e-06,
"loss": 0.2629,
"step": 6970
},
{
"epoch": 0.4841968350314329,
"grad_norm": 1.1484375,
"learning_rate": 3.7862608695652174e-06,
"loss": 0.2602,
"step": 6980
},
{
"epoch": 0.4848905267721656,
"grad_norm": 1.6171875,
"learning_rate": 3.784521739130435e-06,
"loss": 0.3032,
"step": 6990
},
{
"epoch": 0.48558421851289835,
"grad_norm": 0.9453125,
"learning_rate": 3.7827826086956525e-06,
"loss": 0.238,
"step": 7000
},
{
"epoch": 0.48627791025363104,
"grad_norm": 1.3203125,
"learning_rate": 3.7810434782608703e-06,
"loss": 0.2422,
"step": 7010
},
{
"epoch": 0.48697160199436373,
"grad_norm": 1.0234375,
"learning_rate": 3.7793043478260873e-06,
"loss": 0.2349,
"step": 7020
},
{
"epoch": 0.4876652937350965,
"grad_norm": 1.3125,
"learning_rate": 3.7775652173913046e-06,
"loss": 0.23,
"step": 7030
},
{
"epoch": 0.4883589854758292,
"grad_norm": 1.03125,
"learning_rate": 3.775826086956522e-06,
"loss": 0.2634,
"step": 7040
},
{
"epoch": 0.48905267721656187,
"grad_norm": 1.453125,
"learning_rate": 3.7740869565217394e-06,
"loss": 0.2912,
"step": 7050
},
{
"epoch": 0.4897463689572946,
"grad_norm": 1.0234375,
"learning_rate": 3.7723478260869567e-06,
"loss": 0.232,
"step": 7060
},
{
"epoch": 0.4904400606980273,
"grad_norm": 1.0390625,
"learning_rate": 3.770608695652174e-06,
"loss": 0.2252,
"step": 7070
},
{
"epoch": 0.49113375243876,
"grad_norm": 0.8125,
"learning_rate": 3.768869565217392e-06,
"loss": 0.2311,
"step": 7080
},
{
"epoch": 0.49182744417949276,
"grad_norm": 1.234375,
"learning_rate": 3.767130434782609e-06,
"loss": 0.2493,
"step": 7090
},
{
"epoch": 0.49252113592022545,
"grad_norm": 1.3671875,
"learning_rate": 3.7653913043478262e-06,
"loss": 0.2665,
"step": 7100
},
{
"epoch": 0.49321482766095814,
"grad_norm": 1.328125,
"learning_rate": 3.763652173913044e-06,
"loss": 0.2158,
"step": 7110
},
{
"epoch": 0.4939085194016909,
"grad_norm": 1.1328125,
"learning_rate": 3.7619130434782614e-06,
"loss": 0.2555,
"step": 7120
},
{
"epoch": 0.4946022111424236,
"grad_norm": 0.9296875,
"learning_rate": 3.7601739130434783e-06,
"loss": 0.2437,
"step": 7130
},
{
"epoch": 0.4952959028831563,
"grad_norm": 1.15625,
"learning_rate": 3.7584347826086957e-06,
"loss": 0.2703,
"step": 7140
},
{
"epoch": 0.49598959462388903,
"grad_norm": 1.203125,
"learning_rate": 3.7566956521739135e-06,
"loss": 0.2314,
"step": 7150
},
{
"epoch": 0.4966832863646217,
"grad_norm": 1.265625,
"learning_rate": 3.754956521739131e-06,
"loss": 0.2534,
"step": 7160
},
{
"epoch": 0.4973769781053544,
"grad_norm": 1.2578125,
"learning_rate": 3.753217391304348e-06,
"loss": 0.2763,
"step": 7170
},
{
"epoch": 0.49807066984608717,
"grad_norm": 1.578125,
"learning_rate": 3.7514782608695656e-06,
"loss": 0.2734,
"step": 7180
},
{
"epoch": 0.49876436158681986,
"grad_norm": 1.1640625,
"learning_rate": 3.749739130434783e-06,
"loss": 0.2158,
"step": 7190
},
{
"epoch": 0.49945805332755255,
"grad_norm": 1.4765625,
"learning_rate": 3.7480000000000004e-06,
"loss": 0.2429,
"step": 7200
},
{
"epoch": 0.5001517450682853,
"grad_norm": 1.2421875,
"learning_rate": 3.7462608695652173e-06,
"loss": 0.2961,
"step": 7210
},
{
"epoch": 0.5008454368090179,
"grad_norm": 1.4375,
"learning_rate": 3.744521739130435e-06,
"loss": 0.2368,
"step": 7220
},
{
"epoch": 0.5015391285497507,
"grad_norm": 1.5625,
"learning_rate": 3.7427826086956525e-06,
"loss": 0.2605,
"step": 7230
},
{
"epoch": 0.5022328202904834,
"grad_norm": 0.83984375,
"learning_rate": 3.7410434782608703e-06,
"loss": 0.2194,
"step": 7240
},
{
"epoch": 0.5029265120312161,
"grad_norm": 1.1953125,
"learning_rate": 3.7393043478260872e-06,
"loss": 0.2153,
"step": 7250
},
{
"epoch": 0.5036202037719488,
"grad_norm": 1.671875,
"learning_rate": 3.7375652173913046e-06,
"loss": 0.2329,
"step": 7260
},
{
"epoch": 0.5043138955126816,
"grad_norm": 1.8203125,
"learning_rate": 3.735826086956522e-06,
"loss": 0.2821,
"step": 7270
},
{
"epoch": 0.5050075872534142,
"grad_norm": 1.3984375,
"learning_rate": 3.7340869565217398e-06,
"loss": 0.263,
"step": 7280
},
{
"epoch": 0.505701278994147,
"grad_norm": 1.3046875,
"learning_rate": 3.7323478260869567e-06,
"loss": 0.2428,
"step": 7290
},
{
"epoch": 0.5063949707348797,
"grad_norm": 1.015625,
"learning_rate": 3.730608695652174e-06,
"loss": 0.2353,
"step": 7300
},
{
"epoch": 0.5070886624756124,
"grad_norm": 1.0234375,
"learning_rate": 3.728869565217392e-06,
"loss": 0.2616,
"step": 7310
},
{
"epoch": 0.5077823542163451,
"grad_norm": 1.3203125,
"learning_rate": 3.7271304347826092e-06,
"loss": 0.2314,
"step": 7320
},
{
"epoch": 0.5084760459570778,
"grad_norm": 1.171875,
"learning_rate": 3.725391304347826e-06,
"loss": 0.2484,
"step": 7330
},
{
"epoch": 0.5091697376978105,
"grad_norm": 1.1796875,
"learning_rate": 3.723652173913044e-06,
"loss": 0.2182,
"step": 7340
},
{
"epoch": 0.5098634294385432,
"grad_norm": 1.296875,
"learning_rate": 3.7219130434782614e-06,
"loss": 0.1906,
"step": 7350
},
{
"epoch": 0.510557121179276,
"grad_norm": 1.2421875,
"learning_rate": 3.7201739130434783e-06,
"loss": 0.2971,
"step": 7360
},
{
"epoch": 0.5112508129200086,
"grad_norm": 1.359375,
"learning_rate": 3.7184347826086957e-06,
"loss": 0.2281,
"step": 7370
},
{
"epoch": 0.5119445046607414,
"grad_norm": 1.4375,
"learning_rate": 3.7166956521739135e-06,
"loss": 0.2597,
"step": 7380
},
{
"epoch": 0.5126381964014741,
"grad_norm": 1.3125,
"learning_rate": 3.714956521739131e-06,
"loss": 0.2685,
"step": 7390
},
{
"epoch": 0.5133318881422068,
"grad_norm": 0.98046875,
"learning_rate": 3.713217391304348e-06,
"loss": 0.2763,
"step": 7400
},
{
"epoch": 0.5140255798829395,
"grad_norm": 1.3203125,
"learning_rate": 3.7114782608695656e-06,
"loss": 0.2088,
"step": 7410
},
{
"epoch": 0.5147192716236723,
"grad_norm": 4.9375,
"learning_rate": 3.709739130434783e-06,
"loss": 0.2388,
"step": 7420
},
{
"epoch": 0.5154129633644049,
"grad_norm": 1.3046875,
"learning_rate": 3.7080000000000003e-06,
"loss": 0.2594,
"step": 7430
},
{
"epoch": 0.5161066551051376,
"grad_norm": 1.7265625,
"learning_rate": 3.7062608695652173e-06,
"loss": 0.2595,
"step": 7440
},
{
"epoch": 0.5168003468458704,
"grad_norm": 1.2578125,
"learning_rate": 3.704521739130435e-06,
"loss": 0.2404,
"step": 7450
},
{
"epoch": 0.517494038586603,
"grad_norm": 1.25,
"learning_rate": 3.7027826086956524e-06,
"loss": 0.2635,
"step": 7460
},
{
"epoch": 0.5181877303273358,
"grad_norm": 0.875,
"learning_rate": 3.7010434782608702e-06,
"loss": 0.245,
"step": 7470
},
{
"epoch": 0.5188814220680685,
"grad_norm": 1.171875,
"learning_rate": 3.699304347826087e-06,
"loss": 0.2676,
"step": 7480
},
{
"epoch": 0.5195751138088012,
"grad_norm": 1.21875,
"learning_rate": 3.6975652173913046e-06,
"loss": 0.2699,
"step": 7490
},
{
"epoch": 0.5202688055495339,
"grad_norm": 1.234375,
"learning_rate": 3.695826086956522e-06,
"loss": 0.244,
"step": 7500
},
{
"epoch": 0.5209624972902667,
"grad_norm": 1.390625,
"learning_rate": 3.6940869565217397e-06,
"loss": 0.2438,
"step": 7510
},
{
"epoch": 0.5216561890309993,
"grad_norm": 1.2890625,
"learning_rate": 3.6923478260869567e-06,
"loss": 0.2516,
"step": 7520
},
{
"epoch": 0.5223498807717321,
"grad_norm": 1.0625,
"learning_rate": 3.690608695652174e-06,
"loss": 0.2458,
"step": 7530
},
{
"epoch": 0.5230435725124648,
"grad_norm": 1.375,
"learning_rate": 3.688869565217392e-06,
"loss": 0.2696,
"step": 7540
},
{
"epoch": 0.5237372642531974,
"grad_norm": 1.453125,
"learning_rate": 3.687130434782609e-06,
"loss": 0.2365,
"step": 7550
},
{
"epoch": 0.5244309559939302,
"grad_norm": 1.609375,
"learning_rate": 3.685391304347826e-06,
"loss": 0.2247,
"step": 7560
},
{
"epoch": 0.5251246477346629,
"grad_norm": 1.390625,
"learning_rate": 3.683652173913044e-06,
"loss": 0.2388,
"step": 7570
},
{
"epoch": 0.5258183394753956,
"grad_norm": 1.171875,
"learning_rate": 3.6819130434782613e-06,
"loss": 0.2709,
"step": 7580
},
{
"epoch": 0.5265120312161283,
"grad_norm": 1.2578125,
"learning_rate": 3.6801739130434787e-06,
"loss": 0.3069,
"step": 7590
},
{
"epoch": 0.5272057229568611,
"grad_norm": 1.046875,
"learning_rate": 3.6784347826086956e-06,
"loss": 0.2254,
"step": 7600
},
{
"epoch": 0.5278994146975937,
"grad_norm": 1.0390625,
"learning_rate": 3.6766956521739134e-06,
"loss": 0.2332,
"step": 7610
},
{
"epoch": 0.5285931064383265,
"grad_norm": 1.3359375,
"learning_rate": 3.674956521739131e-06,
"loss": 0.2917,
"step": 7620
},
{
"epoch": 0.5292867981790592,
"grad_norm": 1.203125,
"learning_rate": 3.6732173913043486e-06,
"loss": 0.2575,
"step": 7630
},
{
"epoch": 0.5299804899197919,
"grad_norm": 0.96875,
"learning_rate": 3.6714782608695655e-06,
"loss": 0.2636,
"step": 7640
},
{
"epoch": 0.5306741816605246,
"grad_norm": 1.0390625,
"learning_rate": 3.669739130434783e-06,
"loss": 0.2782,
"step": 7650
},
{
"epoch": 0.5313678734012574,
"grad_norm": 1.4296875,
"learning_rate": 3.6680000000000003e-06,
"loss": 0.2155,
"step": 7660
},
{
"epoch": 0.53206156514199,
"grad_norm": 1.96875,
"learning_rate": 3.6662608695652172e-06,
"loss": 0.2726,
"step": 7670
},
{
"epoch": 0.5327552568827227,
"grad_norm": 1.453125,
"learning_rate": 3.664521739130435e-06,
"loss": 0.2456,
"step": 7680
},
{
"epoch": 0.5334489486234555,
"grad_norm": 1.328125,
"learning_rate": 3.6627826086956524e-06,
"loss": 0.2691,
"step": 7690
},
{
"epoch": 0.5341426403641881,
"grad_norm": 1.421875,
"learning_rate": 3.66104347826087e-06,
"loss": 0.2618,
"step": 7700
},
{
"epoch": 0.5348363321049209,
"grad_norm": 1.3359375,
"learning_rate": 3.659304347826087e-06,
"loss": 0.2684,
"step": 7710
},
{
"epoch": 0.5355300238456536,
"grad_norm": 0.8828125,
"learning_rate": 3.6575652173913045e-06,
"loss": 0.2165,
"step": 7720
},
{
"epoch": 0.5362237155863863,
"grad_norm": 0.79296875,
"learning_rate": 3.655826086956522e-06,
"loss": 0.2917,
"step": 7730
},
{
"epoch": 0.536917407327119,
"grad_norm": 1.203125,
"learning_rate": 3.6540869565217397e-06,
"loss": 0.2629,
"step": 7740
},
{
"epoch": 0.5376110990678518,
"grad_norm": 1.046875,
"learning_rate": 3.6523478260869566e-06,
"loss": 0.2612,
"step": 7750
},
{
"epoch": 0.5383047908085844,
"grad_norm": 1.1953125,
"learning_rate": 3.650608695652174e-06,
"loss": 0.3481,
"step": 7760
},
{
"epoch": 0.5389984825493171,
"grad_norm": 1.5859375,
"learning_rate": 3.648869565217392e-06,
"loss": 0.2939,
"step": 7770
},
{
"epoch": 0.5396921742900499,
"grad_norm": 1.1015625,
"learning_rate": 3.647130434782609e-06,
"loss": 0.232,
"step": 7780
},
{
"epoch": 0.5403858660307825,
"grad_norm": 1.3359375,
"learning_rate": 3.645391304347826e-06,
"loss": 0.2843,
"step": 7790
},
{
"epoch": 0.5410795577715153,
"grad_norm": 1.0,
"learning_rate": 3.643652173913044e-06,
"loss": 0.2311,
"step": 7800
},
{
"epoch": 0.541773249512248,
"grad_norm": 1.078125,
"learning_rate": 3.6419130434782613e-06,
"loss": 0.2587,
"step": 7810
},
{
"epoch": 0.5424669412529807,
"grad_norm": 0.95703125,
"learning_rate": 3.6401739130434786e-06,
"loss": 0.2894,
"step": 7820
},
{
"epoch": 0.5431606329937134,
"grad_norm": 1.0078125,
"learning_rate": 3.6384347826086956e-06,
"loss": 0.2337,
"step": 7830
},
{
"epoch": 0.5438543247344462,
"grad_norm": 0.9921875,
"learning_rate": 3.6366956521739134e-06,
"loss": 0.2619,
"step": 7840
},
{
"epoch": 0.5445480164751788,
"grad_norm": 1.15625,
"learning_rate": 3.6349565217391308e-06,
"loss": 0.3071,
"step": 7850
},
{
"epoch": 0.5452417082159116,
"grad_norm": 0.87109375,
"learning_rate": 3.6332173913043486e-06,
"loss": 0.2436,
"step": 7860
},
{
"epoch": 0.5459353999566443,
"grad_norm": 0.98046875,
"learning_rate": 3.6314782608695655e-06,
"loss": 0.316,
"step": 7870
},
{
"epoch": 0.546629091697377,
"grad_norm": 1.6328125,
"learning_rate": 3.629739130434783e-06,
"loss": 0.2135,
"step": 7880
},
{
"epoch": 0.5473227834381097,
"grad_norm": 1.3671875,
"learning_rate": 3.6280000000000002e-06,
"loss": 0.2578,
"step": 7890
},
{
"epoch": 0.5480164751788424,
"grad_norm": 1.2578125,
"learning_rate": 3.626260869565218e-06,
"loss": 0.2587,
"step": 7900
},
{
"epoch": 0.5487101669195751,
"grad_norm": 1.3828125,
"learning_rate": 3.624521739130435e-06,
"loss": 0.2425,
"step": 7910
},
{
"epoch": 0.5494038586603078,
"grad_norm": 1.421875,
"learning_rate": 3.6227826086956524e-06,
"loss": 0.2497,
"step": 7920
},
{
"epoch": 0.5500975504010406,
"grad_norm": 1.0546875,
"learning_rate": 3.62104347826087e-06,
"loss": 0.1969,
"step": 7930
},
{
"epoch": 0.5507912421417732,
"grad_norm": 1.25,
"learning_rate": 3.6193043478260875e-06,
"loss": 0.2794,
"step": 7940
},
{
"epoch": 0.551484933882506,
"grad_norm": 1.4921875,
"learning_rate": 3.6175652173913045e-06,
"loss": 0.2452,
"step": 7950
},
{
"epoch": 0.5521786256232387,
"grad_norm": 1.1640625,
"learning_rate": 3.615826086956522e-06,
"loss": 0.2181,
"step": 7960
},
{
"epoch": 0.5528723173639714,
"grad_norm": 1.2265625,
"learning_rate": 3.6140869565217396e-06,
"loss": 0.2407,
"step": 7970
},
{
"epoch": 0.5535660091047041,
"grad_norm": 1.0703125,
"learning_rate": 3.6123478260869566e-06,
"loss": 0.2556,
"step": 7980
},
{
"epoch": 0.5542597008454369,
"grad_norm": 1.234375,
"learning_rate": 3.610608695652174e-06,
"loss": 0.2783,
"step": 7990
},
{
"epoch": 0.5549533925861695,
"grad_norm": 1.59375,
"learning_rate": 3.6088695652173918e-06,
"loss": 0.2702,
"step": 8000
},
{
"epoch": 0.5556470843269022,
"grad_norm": 1.359375,
"learning_rate": 3.607130434782609e-06,
"loss": 0.2606,
"step": 8010
},
{
"epoch": 0.556340776067635,
"grad_norm": 1.21875,
"learning_rate": 3.605391304347826e-06,
"loss": 0.2103,
"step": 8020
},
{
"epoch": 0.5570344678083676,
"grad_norm": 1.34375,
"learning_rate": 3.603652173913044e-06,
"loss": 0.2451,
"step": 8030
},
{
"epoch": 0.5577281595491004,
"grad_norm": 1.2734375,
"learning_rate": 3.6019130434782612e-06,
"loss": 0.2489,
"step": 8040
},
{
"epoch": 0.5584218512898331,
"grad_norm": 1.2578125,
"learning_rate": 3.6001739130434786e-06,
"loss": 0.2254,
"step": 8050
},
{
"epoch": 0.5591155430305658,
"grad_norm": 1.5625,
"learning_rate": 3.5984347826086956e-06,
"loss": 0.2404,
"step": 8060
},
{
"epoch": 0.5598092347712985,
"grad_norm": 1.3125,
"learning_rate": 3.5966956521739134e-06,
"loss": 0.3151,
"step": 8070
},
{
"epoch": 0.5605029265120313,
"grad_norm": 1.4296875,
"learning_rate": 3.5949565217391307e-06,
"loss": 0.2554,
"step": 8080
},
{
"epoch": 0.5611966182527639,
"grad_norm": 1.234375,
"learning_rate": 3.5932173913043485e-06,
"loss": 0.25,
"step": 8090
},
{
"epoch": 0.5618903099934967,
"grad_norm": 1.390625,
"learning_rate": 3.5914782608695655e-06,
"loss": 0.2968,
"step": 8100
},
{
"epoch": 0.5625840017342294,
"grad_norm": 1.2109375,
"learning_rate": 3.589739130434783e-06,
"loss": 0.2523,
"step": 8110
},
{
"epoch": 0.563277693474962,
"grad_norm": 1.2421875,
"learning_rate": 3.588e-06,
"loss": 0.2254,
"step": 8120
},
{
"epoch": 0.5639713852156948,
"grad_norm": 1.578125,
"learning_rate": 3.586260869565218e-06,
"loss": 0.272,
"step": 8130
},
{
"epoch": 0.5646650769564275,
"grad_norm": 1.6953125,
"learning_rate": 3.584521739130435e-06,
"loss": 0.3524,
"step": 8140
},
{
"epoch": 0.5653587686971602,
"grad_norm": 1.1640625,
"learning_rate": 3.5827826086956523e-06,
"loss": 0.2291,
"step": 8150
},
{
"epoch": 0.5660524604378929,
"grad_norm": 1.34375,
"learning_rate": 3.58104347826087e-06,
"loss": 0.27,
"step": 8160
},
{
"epoch": 0.5667461521786257,
"grad_norm": 1.1640625,
"learning_rate": 3.5793043478260875e-06,
"loss": 0.324,
"step": 8170
},
{
"epoch": 0.5674398439193583,
"grad_norm": 1.015625,
"learning_rate": 3.5775652173913044e-06,
"loss": 0.3121,
"step": 8180
},
{
"epoch": 0.5681335356600911,
"grad_norm": 1.0078125,
"learning_rate": 3.575826086956522e-06,
"loss": 0.2035,
"step": 8190
},
{
"epoch": 0.5688272274008238,
"grad_norm": 1.1015625,
"learning_rate": 3.5740869565217396e-06,
"loss": 0.2779,
"step": 8200
},
{
"epoch": 0.5695209191415564,
"grad_norm": 1.46875,
"learning_rate": 3.572347826086957e-06,
"loss": 0.2545,
"step": 8210
},
{
"epoch": 0.5702146108822892,
"grad_norm": 1.2734375,
"learning_rate": 3.570608695652174e-06,
"loss": 0.3188,
"step": 8220
},
{
"epoch": 0.570908302623022,
"grad_norm": 1.1875,
"learning_rate": 3.5688695652173917e-06,
"loss": 0.2356,
"step": 8230
},
{
"epoch": 0.5716019943637546,
"grad_norm": 1.1484375,
"learning_rate": 3.567130434782609e-06,
"loss": 0.2316,
"step": 8240
},
{
"epoch": 0.5722956861044873,
"grad_norm": 1.5390625,
"learning_rate": 3.5653913043478265e-06,
"loss": 0.2724,
"step": 8250
},
{
"epoch": 0.5729893778452201,
"grad_norm": 1.34375,
"learning_rate": 3.563652173913044e-06,
"loss": 0.2535,
"step": 8260
},
{
"epoch": 0.5736830695859527,
"grad_norm": 1.125,
"learning_rate": 3.561913043478261e-06,
"loss": 0.2413,
"step": 8270
},
{
"epoch": 0.5743767613266855,
"grad_norm": 0.98046875,
"learning_rate": 3.5601739130434786e-06,
"loss": 0.2488,
"step": 8280
},
{
"epoch": 0.5750704530674182,
"grad_norm": 1.140625,
"learning_rate": 3.5584347826086955e-06,
"loss": 0.2248,
"step": 8290
},
{
"epoch": 0.5757641448081509,
"grad_norm": 1.1640625,
"learning_rate": 3.5566956521739133e-06,
"loss": 0.2339,
"step": 8300
},
{
"epoch": 0.5764578365488836,
"grad_norm": 2.25,
"learning_rate": 3.5549565217391307e-06,
"loss": 0.3776,
"step": 8310
},
{
"epoch": 0.5771515282896164,
"grad_norm": 1.2109375,
"learning_rate": 3.5532173913043485e-06,
"loss": 0.2102,
"step": 8320
},
{
"epoch": 0.577845220030349,
"grad_norm": 0.9921875,
"learning_rate": 3.5514782608695654e-06,
"loss": 0.2692,
"step": 8330
},
{
"epoch": 0.5785389117710817,
"grad_norm": 1.6796875,
"learning_rate": 3.549739130434783e-06,
"loss": 0.3611,
"step": 8340
},
{
"epoch": 0.5792326035118145,
"grad_norm": 1.15625,
"learning_rate": 3.548e-06,
"loss": 0.227,
"step": 8350
},
{
"epoch": 0.5799262952525471,
"grad_norm": 1.015625,
"learning_rate": 3.546260869565218e-06,
"loss": 0.242,
"step": 8360
},
{
"epoch": 0.5806199869932799,
"grad_norm": 1.09375,
"learning_rate": 3.544521739130435e-06,
"loss": 0.3021,
"step": 8370
},
{
"epoch": 0.5813136787340125,
"grad_norm": 1.2265625,
"learning_rate": 3.5427826086956523e-06,
"loss": 0.2542,
"step": 8380
},
{
"epoch": 0.5820073704747453,
"grad_norm": 1.2734375,
"learning_rate": 3.54104347826087e-06,
"loss": 0.24,
"step": 8390
},
{
"epoch": 0.582701062215478,
"grad_norm": 1.046875,
"learning_rate": 3.5393043478260874e-06,
"loss": 0.2421,
"step": 8400
},
{
"epoch": 0.5833947539562107,
"grad_norm": 0.984375,
"learning_rate": 3.5375652173913044e-06,
"loss": 0.212,
"step": 8410
},
{
"epoch": 0.5840884456969434,
"grad_norm": 1.0390625,
"learning_rate": 3.5358260869565218e-06,
"loss": 0.2281,
"step": 8420
},
{
"epoch": 0.5847821374376762,
"grad_norm": 1.40625,
"learning_rate": 3.5340869565217396e-06,
"loss": 0.2296,
"step": 8430
},
{
"epoch": 0.5854758291784088,
"grad_norm": 1.640625,
"learning_rate": 3.532347826086957e-06,
"loss": 0.2546,
"step": 8440
},
{
"epoch": 0.5861695209191415,
"grad_norm": 1.7109375,
"learning_rate": 3.530608695652174e-06,
"loss": 0.2691,
"step": 8450
},
{
"epoch": 0.5868632126598743,
"grad_norm": 1.5078125,
"learning_rate": 3.5288695652173917e-06,
"loss": 0.2716,
"step": 8460
},
{
"epoch": 0.5875569044006069,
"grad_norm": 1.3046875,
"learning_rate": 3.527130434782609e-06,
"loss": 0.3,
"step": 8470
},
{
"epoch": 0.5882505961413397,
"grad_norm": 0.98046875,
"learning_rate": 3.5253913043478264e-06,
"loss": 0.2811,
"step": 8480
},
{
"epoch": 0.5889442878820724,
"grad_norm": 1.265625,
"learning_rate": 3.5236521739130438e-06,
"loss": 0.2327,
"step": 8490
},
{
"epoch": 0.5896379796228051,
"grad_norm": 1.03125,
"learning_rate": 3.521913043478261e-06,
"loss": 0.2393,
"step": 8500
},
{
"epoch": 0.5903316713635378,
"grad_norm": 1.03125,
"learning_rate": 3.5201739130434785e-06,
"loss": 0.2633,
"step": 8510
},
{
"epoch": 0.5910253631042706,
"grad_norm": 1.1015625,
"learning_rate": 3.5184347826086963e-06,
"loss": 0.2459,
"step": 8520
},
{
"epoch": 0.5917190548450032,
"grad_norm": 1.1328125,
"learning_rate": 3.5166956521739133e-06,
"loss": 0.2063,
"step": 8530
},
{
"epoch": 0.592412746585736,
"grad_norm": 0.88671875,
"learning_rate": 3.5149565217391306e-06,
"loss": 0.2193,
"step": 8540
},
{
"epoch": 0.5931064383264687,
"grad_norm": 1.3828125,
"learning_rate": 3.5132173913043484e-06,
"loss": 0.3143,
"step": 8550
},
{
"epoch": 0.5938001300672013,
"grad_norm": 1.2421875,
"learning_rate": 3.511478260869566e-06,
"loss": 0.2205,
"step": 8560
},
{
"epoch": 0.5944938218079341,
"grad_norm": 1.2734375,
"learning_rate": 3.5097391304347828e-06,
"loss": 0.2392,
"step": 8570
},
{
"epoch": 0.5951875135486668,
"grad_norm": 1.421875,
"learning_rate": 3.508e-06,
"loss": 0.252,
"step": 8580
},
{
"epoch": 0.5958812052893995,
"grad_norm": 1.4296875,
"learning_rate": 3.506260869565218e-06,
"loss": 0.318,
"step": 8590
},
{
"epoch": 0.5965748970301322,
"grad_norm": 1.078125,
"learning_rate": 3.504521739130435e-06,
"loss": 0.2799,
"step": 8600
},
{
"epoch": 0.597268588770865,
"grad_norm": 1.25,
"learning_rate": 3.5027826086956522e-06,
"loss": 0.2301,
"step": 8610
},
{
"epoch": 0.5979622805115976,
"grad_norm": 1.28125,
"learning_rate": 3.50104347826087e-06,
"loss": 0.2516,
"step": 8620
},
{
"epoch": 0.5986559722523304,
"grad_norm": 1.375,
"learning_rate": 3.4993043478260874e-06,
"loss": 0.2338,
"step": 8630
},
{
"epoch": 0.5993496639930631,
"grad_norm": 1.09375,
"learning_rate": 3.4975652173913044e-06,
"loss": 0.2511,
"step": 8640
},
{
"epoch": 0.6000433557337957,
"grad_norm": 1.15625,
"learning_rate": 3.4958260869565217e-06,
"loss": 0.2401,
"step": 8650
},
{
"epoch": 0.6007370474745285,
"grad_norm": 1.09375,
"learning_rate": 3.4940869565217395e-06,
"loss": 0.2961,
"step": 8660
},
{
"epoch": 0.6014307392152612,
"grad_norm": 1.1953125,
"learning_rate": 3.492347826086957e-06,
"loss": 0.2639,
"step": 8670
},
{
"epoch": 0.6021244309559939,
"grad_norm": 1.2578125,
"learning_rate": 3.490608695652174e-06,
"loss": 0.2283,
"step": 8680
},
{
"epoch": 0.6028181226967266,
"grad_norm": 1.0703125,
"learning_rate": 3.4888695652173916e-06,
"loss": 0.2404,
"step": 8690
},
{
"epoch": 0.6035118144374594,
"grad_norm": 1.375,
"learning_rate": 3.487130434782609e-06,
"loss": 0.2559,
"step": 8700
},
{
"epoch": 0.604205506178192,
"grad_norm": 1.0390625,
"learning_rate": 3.4853913043478264e-06,
"loss": 0.24,
"step": 8710
},
{
"epoch": 0.6048991979189248,
"grad_norm": 1.078125,
"learning_rate": 3.4836521739130437e-06,
"loss": 0.2297,
"step": 8720
},
{
"epoch": 0.6055928896596575,
"grad_norm": 1.296875,
"learning_rate": 3.481913043478261e-06,
"loss": 0.2517,
"step": 8730
},
{
"epoch": 0.6062865814003902,
"grad_norm": 1.1875,
"learning_rate": 3.4801739130434785e-06,
"loss": 0.2096,
"step": 8740
},
{
"epoch": 0.6069802731411229,
"grad_norm": 1.4609375,
"learning_rate": 3.4784347826086963e-06,
"loss": 0.2116,
"step": 8750
},
{
"epoch": 0.6076739648818557,
"grad_norm": 0.79296875,
"learning_rate": 3.4766956521739132e-06,
"loss": 0.1944,
"step": 8760
},
{
"epoch": 0.6083676566225883,
"grad_norm": 1.3125,
"learning_rate": 3.4749565217391306e-06,
"loss": 0.2687,
"step": 8770
},
{
"epoch": 0.609061348363321,
"grad_norm": 1.2421875,
"learning_rate": 3.4732173913043484e-06,
"loss": 0.283,
"step": 8780
},
{
"epoch": 0.6097550401040538,
"grad_norm": 1.3359375,
"learning_rate": 3.4714782608695658e-06,
"loss": 0.2059,
"step": 8790
},
{
"epoch": 0.6104487318447864,
"grad_norm": 0.9765625,
"learning_rate": 3.4697391304347827e-06,
"loss": 0.3626,
"step": 8800
},
{
"epoch": 0.6111424235855192,
"grad_norm": 1.2578125,
"learning_rate": 3.468e-06,
"loss": 0.2557,
"step": 8810
},
{
"epoch": 0.6118361153262519,
"grad_norm": 1.3203125,
"learning_rate": 3.466260869565218e-06,
"loss": 0.2391,
"step": 8820
},
{
"epoch": 0.6125298070669846,
"grad_norm": 1.265625,
"learning_rate": 3.4645217391304353e-06,
"loss": 0.296,
"step": 8830
},
{
"epoch": 0.6132234988077173,
"grad_norm": 1.046875,
"learning_rate": 3.462782608695652e-06,
"loss": 0.2482,
"step": 8840
},
{
"epoch": 0.6139171905484501,
"grad_norm": 1.359375,
"learning_rate": 3.46104347826087e-06,
"loss": 0.2761,
"step": 8850
},
{
"epoch": 0.6146108822891827,
"grad_norm": 1.2890625,
"learning_rate": 3.4593043478260874e-06,
"loss": 0.2493,
"step": 8860
},
{
"epoch": 0.6153045740299155,
"grad_norm": 1.1328125,
"learning_rate": 3.4575652173913047e-06,
"loss": 0.2301,
"step": 8870
},
{
"epoch": 0.6159982657706482,
"grad_norm": 0.99609375,
"learning_rate": 3.4558260869565217e-06,
"loss": 0.2608,
"step": 8880
},
{
"epoch": 0.6166919575113808,
"grad_norm": 0.92578125,
"learning_rate": 3.4540869565217395e-06,
"loss": 0.2481,
"step": 8890
},
{
"epoch": 0.6173856492521136,
"grad_norm": 1.4140625,
"learning_rate": 3.452347826086957e-06,
"loss": 0.2292,
"step": 8900
},
{
"epoch": 0.6180793409928463,
"grad_norm": 1.1015625,
"learning_rate": 3.450608695652174e-06,
"loss": 0.2091,
"step": 8910
},
{
"epoch": 0.618773032733579,
"grad_norm": 1.2421875,
"learning_rate": 3.4488695652173916e-06,
"loss": 0.2997,
"step": 8920
},
{
"epoch": 0.6194667244743117,
"grad_norm": 1.453125,
"learning_rate": 3.447130434782609e-06,
"loss": 0.3502,
"step": 8930
},
{
"epoch": 0.6201604162150445,
"grad_norm": 1.7578125,
"learning_rate": 3.4453913043478263e-06,
"loss": 0.313,
"step": 8940
},
{
"epoch": 0.6208541079557771,
"grad_norm": 1.2109375,
"learning_rate": 3.4436521739130437e-06,
"loss": 0.2215,
"step": 8950
},
{
"epoch": 0.6215477996965099,
"grad_norm": 1.3203125,
"learning_rate": 3.441913043478261e-06,
"loss": 0.2398,
"step": 8960
},
{
"epoch": 0.6222414914372426,
"grad_norm": 1.046875,
"learning_rate": 3.4401739130434784e-06,
"loss": 0.2002,
"step": 8970
},
{
"epoch": 0.6229351831779752,
"grad_norm": 1.484375,
"learning_rate": 3.4384347826086962e-06,
"loss": 0.2549,
"step": 8980
},
{
"epoch": 0.623628874918708,
"grad_norm": 1.1328125,
"learning_rate": 3.436695652173913e-06,
"loss": 0.2009,
"step": 8990
},
{
"epoch": 0.6243225666594407,
"grad_norm": 0.9375,
"learning_rate": 3.4349565217391306e-06,
"loss": 0.22,
"step": 9000
},
{
"epoch": 0.6250162584001734,
"grad_norm": 1.1875,
"learning_rate": 3.4332173913043484e-06,
"loss": 0.2436,
"step": 9010
},
{
"epoch": 0.6257099501409061,
"grad_norm": 0.96484375,
"learning_rate": 3.4314782608695657e-06,
"loss": 0.2411,
"step": 9020
},
{
"epoch": 0.6264036418816389,
"grad_norm": 1.15625,
"learning_rate": 3.4297391304347827e-06,
"loss": 0.2155,
"step": 9030
},
{
"epoch": 0.6270973336223715,
"grad_norm": 1.765625,
"learning_rate": 3.428e-06,
"loss": 0.26,
"step": 9040
},
{
"epoch": 0.6277910253631043,
"grad_norm": 1.296875,
"learning_rate": 3.426260869565218e-06,
"loss": 0.2602,
"step": 9050
},
{
"epoch": 0.628484717103837,
"grad_norm": 1.0859375,
"learning_rate": 3.424521739130435e-06,
"loss": 0.2459,
"step": 9060
},
{
"epoch": 0.6291784088445697,
"grad_norm": 1.515625,
"learning_rate": 3.422782608695652e-06,
"loss": 0.3279,
"step": 9070
},
{
"epoch": 0.6298721005853024,
"grad_norm": 1.09375,
"learning_rate": 3.42104347826087e-06,
"loss": 0.2208,
"step": 9080
},
{
"epoch": 0.6305657923260352,
"grad_norm": 1.046875,
"learning_rate": 3.4193043478260873e-06,
"loss": 0.2717,
"step": 9090
},
{
"epoch": 0.6312594840667678,
"grad_norm": 1.0546875,
"learning_rate": 3.4175652173913047e-06,
"loss": 0.2052,
"step": 9100
},
{
"epoch": 0.6319531758075005,
"grad_norm": 1.1171875,
"learning_rate": 3.4158260869565216e-06,
"loss": 0.2791,
"step": 9110
},
{
"epoch": 0.6326468675482333,
"grad_norm": 1.109375,
"learning_rate": 3.4140869565217394e-06,
"loss": 0.2904,
"step": 9120
},
{
"epoch": 0.6333405592889659,
"grad_norm": 1.046875,
"learning_rate": 3.412347826086957e-06,
"loss": 0.2522,
"step": 9130
},
{
"epoch": 0.6340342510296987,
"grad_norm": 1.4140625,
"learning_rate": 3.4106086956521746e-06,
"loss": 0.2051,
"step": 9140
},
{
"epoch": 0.6347279427704314,
"grad_norm": 1.515625,
"learning_rate": 3.4088695652173915e-06,
"loss": 0.2321,
"step": 9150
},
{
"epoch": 0.6354216345111641,
"grad_norm": 1.203125,
"learning_rate": 3.407130434782609e-06,
"loss": 0.2734,
"step": 9160
},
{
"epoch": 0.6361153262518968,
"grad_norm": 1.1796875,
"learning_rate": 3.4053913043478263e-06,
"loss": 0.2302,
"step": 9170
},
{
"epoch": 0.6368090179926296,
"grad_norm": 1.40625,
"learning_rate": 3.403652173913044e-06,
"loss": 0.2541,
"step": 9180
},
{
"epoch": 0.6375027097333622,
"grad_norm": 1.5,
"learning_rate": 3.401913043478261e-06,
"loss": 0.2151,
"step": 9190
},
{
"epoch": 0.638196401474095,
"grad_norm": 1.2421875,
"learning_rate": 3.4001739130434784e-06,
"loss": 0.2795,
"step": 9200
},
{
"epoch": 0.6388900932148277,
"grad_norm": 1.2734375,
"learning_rate": 3.398434782608696e-06,
"loss": 0.2498,
"step": 9210
},
{
"epoch": 0.6395837849555603,
"grad_norm": 2.0625,
"learning_rate": 3.396695652173913e-06,
"loss": 0.2836,
"step": 9220
},
{
"epoch": 0.6402774766962931,
"grad_norm": 1.125,
"learning_rate": 3.3949565217391305e-06,
"loss": 0.2346,
"step": 9230
},
{
"epoch": 0.6409711684370258,
"grad_norm": 1.03125,
"learning_rate": 3.3932173913043483e-06,
"loss": 0.2447,
"step": 9240
},
{
"epoch": 0.6416648601777585,
"grad_norm": 1.2109375,
"learning_rate": 3.3914782608695657e-06,
"loss": 0.2775,
"step": 9250
},
{
"epoch": 0.6423585519184912,
"grad_norm": 1.4296875,
"learning_rate": 3.3897391304347826e-06,
"loss": 0.2225,
"step": 9260
},
{
"epoch": 0.643052243659224,
"grad_norm": 1.4609375,
"learning_rate": 3.388e-06,
"loss": 0.2327,
"step": 9270
},
{
"epoch": 0.6437459353999566,
"grad_norm": 1.078125,
"learning_rate": 3.386260869565218e-06,
"loss": 0.2091,
"step": 9280
},
{
"epoch": 0.6444396271406894,
"grad_norm": 1.484375,
"learning_rate": 3.384521739130435e-06,
"loss": 0.244,
"step": 9290
},
{
"epoch": 0.6451333188814221,
"grad_norm": 1.25,
"learning_rate": 3.382782608695652e-06,
"loss": 0.2169,
"step": 9300
},
{
"epoch": 0.6458270106221548,
"grad_norm": 1.4296875,
"learning_rate": 3.38104347826087e-06,
"loss": 0.3279,
"step": 9310
},
{
"epoch": 0.6465207023628875,
"grad_norm": 1.046875,
"learning_rate": 3.3793043478260873e-06,
"loss": 0.2176,
"step": 9320
},
{
"epoch": 0.6472143941036202,
"grad_norm": 1.96875,
"learning_rate": 3.3775652173913047e-06,
"loss": 0.315,
"step": 9330
},
{
"epoch": 0.6479080858443529,
"grad_norm": 1.2421875,
"learning_rate": 3.3758260869565216e-06,
"loss": 0.2778,
"step": 9340
},
{
"epoch": 0.6486017775850856,
"grad_norm": 0.92578125,
"learning_rate": 3.3740869565217394e-06,
"loss": 0.2681,
"step": 9350
},
{
"epoch": 0.6492954693258184,
"grad_norm": 1.078125,
"learning_rate": 3.3723478260869568e-06,
"loss": 0.2078,
"step": 9360
},
{
"epoch": 0.649989161066551,
"grad_norm": 1.8984375,
"learning_rate": 3.3706086956521746e-06,
"loss": 0.3201,
"step": 9370
},
{
"epoch": 0.6506828528072838,
"grad_norm": 1.078125,
"learning_rate": 3.3688695652173915e-06,
"loss": 0.2226,
"step": 9380
},
{
"epoch": 0.6513765445480165,
"grad_norm": 1.171875,
"learning_rate": 3.367130434782609e-06,
"loss": 0.2847,
"step": 9390
},
{
"epoch": 0.6520702362887492,
"grad_norm": 0.99609375,
"learning_rate": 3.3653913043478263e-06,
"loss": 0.2418,
"step": 9400
},
{
"epoch": 0.6527639280294819,
"grad_norm": 2.078125,
"learning_rate": 3.363652173913044e-06,
"loss": 0.2898,
"step": 9410
},
{
"epoch": 0.6534576197702147,
"grad_norm": 1.109375,
"learning_rate": 3.361913043478261e-06,
"loss": 0.2522,
"step": 9420
},
{
"epoch": 0.6541513115109473,
"grad_norm": 1.203125,
"learning_rate": 3.3601739130434784e-06,
"loss": 0.2297,
"step": 9430
},
{
"epoch": 0.65484500325168,
"grad_norm": 1.1484375,
"learning_rate": 3.358434782608696e-06,
"loss": 0.2395,
"step": 9440
},
{
"epoch": 0.6555386949924128,
"grad_norm": 1.4453125,
"learning_rate": 3.3566956521739135e-06,
"loss": 0.2372,
"step": 9450
},
{
"epoch": 0.6562323867331454,
"grad_norm": 1.265625,
"learning_rate": 3.3549565217391305e-06,
"loss": 0.2412,
"step": 9460
},
{
"epoch": 0.6569260784738782,
"grad_norm": 1.28125,
"learning_rate": 3.3532173913043483e-06,
"loss": 0.2586,
"step": 9470
},
{
"epoch": 0.6576197702146109,
"grad_norm": 1.5703125,
"learning_rate": 3.3514782608695656e-06,
"loss": 0.269,
"step": 9480
},
{
"epoch": 0.6583134619553436,
"grad_norm": 1.1796875,
"learning_rate": 3.349739130434783e-06,
"loss": 0.2298,
"step": 9490
},
{
"epoch": 0.6590071536960763,
"grad_norm": 0.9609375,
"learning_rate": 3.348e-06,
"loss": 0.2737,
"step": 9500
},
{
"epoch": 0.6597008454368091,
"grad_norm": 1.4453125,
"learning_rate": 3.3462608695652178e-06,
"loss": 0.2729,
"step": 9510
},
{
"epoch": 0.6603945371775417,
"grad_norm": 1.0625,
"learning_rate": 3.344521739130435e-06,
"loss": 0.2285,
"step": 9520
},
{
"epoch": 0.6610882289182745,
"grad_norm": 1.0390625,
"learning_rate": 3.342782608695652e-06,
"loss": 0.2581,
"step": 9530
},
{
"epoch": 0.6617819206590072,
"grad_norm": 1.21875,
"learning_rate": 3.34104347826087e-06,
"loss": 0.3278,
"step": 9540
},
{
"epoch": 0.6624756123997398,
"grad_norm": 0.9765625,
"learning_rate": 3.3393043478260872e-06,
"loss": 0.2526,
"step": 9550
},
{
"epoch": 0.6631693041404726,
"grad_norm": 1.265625,
"learning_rate": 3.3375652173913046e-06,
"loss": 0.2401,
"step": 9560
},
{
"epoch": 0.6638629958812053,
"grad_norm": 0.9765625,
"learning_rate": 3.3358260869565216e-06,
"loss": 0.2359,
"step": 9570
},
{
"epoch": 0.664556687621938,
"grad_norm": 1.2265625,
"learning_rate": 3.3340869565217394e-06,
"loss": 0.2697,
"step": 9580
},
{
"epoch": 0.6652503793626707,
"grad_norm": 1.3203125,
"learning_rate": 3.3323478260869567e-06,
"loss": 0.2191,
"step": 9590
},
{
"epoch": 0.6659440711034035,
"grad_norm": 1.0859375,
"learning_rate": 3.3306086956521745e-06,
"loss": 0.2142,
"step": 9600
},
{
"epoch": 0.6666377628441361,
"grad_norm": 1.2578125,
"learning_rate": 3.3288695652173915e-06,
"loss": 0.2708,
"step": 9610
},
{
"epoch": 0.6673314545848689,
"grad_norm": 1.265625,
"learning_rate": 3.327130434782609e-06,
"loss": 0.2285,
"step": 9620
},
{
"epoch": 0.6680251463256016,
"grad_norm": 1.4609375,
"learning_rate": 3.325391304347826e-06,
"loss": 0.2216,
"step": 9630
},
{
"epoch": 0.6687188380663343,
"grad_norm": 1.1796875,
"learning_rate": 3.323652173913044e-06,
"loss": 0.2208,
"step": 9640
},
{
"epoch": 0.669412529807067,
"grad_norm": 0.86328125,
"learning_rate": 3.321913043478261e-06,
"loss": 0.1785,
"step": 9650
},
{
"epoch": 0.6701062215477998,
"grad_norm": 1.0390625,
"learning_rate": 3.3201739130434783e-06,
"loss": 0.2453,
"step": 9660
},
{
"epoch": 0.6707999132885324,
"grad_norm": 1.09375,
"learning_rate": 3.318434782608696e-06,
"loss": 0.2761,
"step": 9670
},
{
"epoch": 0.6714936050292651,
"grad_norm": 1.1015625,
"learning_rate": 3.3166956521739135e-06,
"loss": 0.2701,
"step": 9680
},
{
"epoch": 0.6721872967699978,
"grad_norm": 1.5546875,
"learning_rate": 3.3149565217391304e-06,
"loss": 0.258,
"step": 9690
},
{
"epoch": 0.6728809885107305,
"grad_norm": 1.21875,
"learning_rate": 3.3132173913043482e-06,
"loss": 0.2648,
"step": 9700
},
{
"epoch": 0.6735746802514633,
"grad_norm": 1.3125,
"learning_rate": 3.3114782608695656e-06,
"loss": 0.2885,
"step": 9710
},
{
"epoch": 0.6742683719921959,
"grad_norm": 1.234375,
"learning_rate": 3.309739130434783e-06,
"loss": 0.264,
"step": 9720
},
{
"epoch": 0.6749620637329287,
"grad_norm": 1.09375,
"learning_rate": 3.308e-06,
"loss": 0.2387,
"step": 9730
},
{
"epoch": 0.6756557554736614,
"grad_norm": 1.375,
"learning_rate": 3.3062608695652177e-06,
"loss": 0.3259,
"step": 9740
},
{
"epoch": 0.676349447214394,
"grad_norm": 1.421875,
"learning_rate": 3.304521739130435e-06,
"loss": 0.251,
"step": 9750
},
{
"epoch": 0.6770431389551268,
"grad_norm": 1.0859375,
"learning_rate": 3.302782608695653e-06,
"loss": 0.2134,
"step": 9760
},
{
"epoch": 0.6777368306958595,
"grad_norm": 1.1171875,
"learning_rate": 3.30104347826087e-06,
"loss": 0.2788,
"step": 9770
},
{
"epoch": 0.6784305224365922,
"grad_norm": 1.265625,
"learning_rate": 3.299304347826087e-06,
"loss": 0.2509,
"step": 9780
},
{
"epoch": 0.6791242141773249,
"grad_norm": 1.3359375,
"learning_rate": 3.2975652173913046e-06,
"loss": 0.2613,
"step": 9790
},
{
"epoch": 0.6798179059180577,
"grad_norm": 1.1328125,
"learning_rate": 3.2958260869565224e-06,
"loss": 0.2278,
"step": 9800
},
{
"epoch": 0.6805115976587903,
"grad_norm": 1.0859375,
"learning_rate": 3.2940869565217393e-06,
"loss": 0.2223,
"step": 9810
},
{
"epoch": 0.6812052893995231,
"grad_norm": 1.484375,
"learning_rate": 3.2923478260869567e-06,
"loss": 0.2233,
"step": 9820
},
{
"epoch": 0.6818989811402558,
"grad_norm": 1.234375,
"learning_rate": 3.2906086956521745e-06,
"loss": 0.2087,
"step": 9830
},
{
"epoch": 0.6825926728809885,
"grad_norm": 1.125,
"learning_rate": 3.2888695652173914e-06,
"loss": 0.2244,
"step": 9840
},
{
"epoch": 0.6832863646217212,
"grad_norm": 1.03125,
"learning_rate": 3.287130434782609e-06,
"loss": 0.2528,
"step": 9850
},
{
"epoch": 0.683980056362454,
"grad_norm": 1.3203125,
"learning_rate": 3.2853913043478266e-06,
"loss": 0.2369,
"step": 9860
},
{
"epoch": 0.6846737481031866,
"grad_norm": 1.1875,
"learning_rate": 3.283652173913044e-06,
"loss": 0.2412,
"step": 9870
},
{
"epoch": 0.6853674398439193,
"grad_norm": 1.9296875,
"learning_rate": 3.281913043478261e-06,
"loss": 0.2932,
"step": 9880
},
{
"epoch": 0.6860611315846521,
"grad_norm": 1.4296875,
"learning_rate": 3.2801739130434783e-06,
"loss": 0.2391,
"step": 9890
},
{
"epoch": 0.6867548233253847,
"grad_norm": 1.015625,
"learning_rate": 3.278434782608696e-06,
"loss": 0.2181,
"step": 9900
},
{
"epoch": 0.6874485150661175,
"grad_norm": 0.890625,
"learning_rate": 3.2766956521739134e-06,
"loss": 0.2253,
"step": 9910
},
{
"epoch": 0.6881422068068502,
"grad_norm": 1.46875,
"learning_rate": 3.2749565217391304e-06,
"loss": 0.2034,
"step": 9920
},
{
"epoch": 0.6888358985475829,
"grad_norm": 1.265625,
"learning_rate": 3.273217391304348e-06,
"loss": 0.2552,
"step": 9930
},
{
"epoch": 0.6895295902883156,
"grad_norm": 1.6171875,
"learning_rate": 3.2714782608695656e-06,
"loss": 0.3157,
"step": 9940
},
{
"epoch": 0.6902232820290484,
"grad_norm": 0.96875,
"learning_rate": 3.269739130434783e-06,
"loss": 0.218,
"step": 9950
},
{
"epoch": 0.690916973769781,
"grad_norm": 1.2265625,
"learning_rate": 3.268e-06,
"loss": 0.2232,
"step": 9960
},
{
"epoch": 0.6916106655105138,
"grad_norm": 1.296875,
"learning_rate": 3.2662608695652177e-06,
"loss": 0.2971,
"step": 9970
},
{
"epoch": 0.6923043572512465,
"grad_norm": 1.1953125,
"learning_rate": 3.264521739130435e-06,
"loss": 0.2534,
"step": 9980
},
{
"epoch": 0.6929980489919791,
"grad_norm": 1.1328125,
"learning_rate": 3.262782608695653e-06,
"loss": 0.2167,
"step": 9990
},
{
"epoch": 0.6936917407327119,
"grad_norm": 0.98828125,
"learning_rate": 3.26104347826087e-06,
"loss": 0.2506,
"step": 10000
},
{
"epoch": 0.6943854324734446,
"grad_norm": 1.1484375,
"learning_rate": 3.259304347826087e-06,
"loss": 0.2381,
"step": 10010
},
{
"epoch": 0.6950791242141773,
"grad_norm": 1.3515625,
"learning_rate": 3.2575652173913045e-06,
"loss": 0.2228,
"step": 10020
},
{
"epoch": 0.69577281595491,
"grad_norm": 1.0078125,
"learning_rate": 3.2558260869565223e-06,
"loss": 0.299,
"step": 10030
},
{
"epoch": 0.6964665076956428,
"grad_norm": 1.125,
"learning_rate": 3.2540869565217393e-06,
"loss": 0.2691,
"step": 10040
},
{
"epoch": 0.6971601994363754,
"grad_norm": 1.0625,
"learning_rate": 3.2523478260869566e-06,
"loss": 0.2412,
"step": 10050
},
{
"epoch": 0.6978538911771082,
"grad_norm": 1.453125,
"learning_rate": 3.2506086956521744e-06,
"loss": 0.2672,
"step": 10060
},
{
"epoch": 0.6985475829178409,
"grad_norm": 1.2734375,
"learning_rate": 3.248869565217392e-06,
"loss": 0.2148,
"step": 10070
},
{
"epoch": 0.6992412746585736,
"grad_norm": 1.28125,
"learning_rate": 3.2471304347826088e-06,
"loss": 0.246,
"step": 10080
},
{
"epoch": 0.6999349663993063,
"grad_norm": 0.8984375,
"learning_rate": 3.2453913043478266e-06,
"loss": 0.2934,
"step": 10090
},
{
"epoch": 0.700628658140039,
"grad_norm": 1.1875,
"learning_rate": 3.243652173913044e-06,
"loss": 0.2409,
"step": 10100
},
{
"epoch": 0.7013223498807717,
"grad_norm": 1.046875,
"learning_rate": 3.241913043478261e-06,
"loss": 0.2713,
"step": 10110
},
{
"epoch": 0.7020160416215044,
"grad_norm": 1.15625,
"learning_rate": 3.2401739130434782e-06,
"loss": 0.2528,
"step": 10120
},
{
"epoch": 0.7027097333622372,
"grad_norm": 1.21875,
"learning_rate": 3.238434782608696e-06,
"loss": 0.2619,
"step": 10130
},
{
"epoch": 0.7034034251029698,
"grad_norm": 1.1875,
"learning_rate": 3.2366956521739134e-06,
"loss": 0.2569,
"step": 10140
},
{
"epoch": 0.7040971168437026,
"grad_norm": 1.2265625,
"learning_rate": 3.2349565217391304e-06,
"loss": 0.2033,
"step": 10150
},
{
"epoch": 0.7047908085844353,
"grad_norm": 1.4765625,
"learning_rate": 3.233217391304348e-06,
"loss": 0.2447,
"step": 10160
},
{
"epoch": 0.705484500325168,
"grad_norm": 0.83203125,
"learning_rate": 3.2314782608695655e-06,
"loss": 0.2063,
"step": 10170
},
{
"epoch": 0.7061781920659007,
"grad_norm": 1.234375,
"learning_rate": 3.229739130434783e-06,
"loss": 0.235,
"step": 10180
},
{
"epoch": 0.7068718838066335,
"grad_norm": 1.375,
"learning_rate": 3.228e-06,
"loss": 0.2407,
"step": 10190
},
{
"epoch": 0.7075655755473661,
"grad_norm": 1.2421875,
"learning_rate": 3.2262608695652176e-06,
"loss": 0.2973,
"step": 10200
},
{
"epoch": 0.7082592672880988,
"grad_norm": 1.34375,
"learning_rate": 3.224521739130435e-06,
"loss": 0.2291,
"step": 10210
},
{
"epoch": 0.7089529590288316,
"grad_norm": 1.1328125,
"learning_rate": 3.222782608695653e-06,
"loss": 0.2552,
"step": 10220
},
{
"epoch": 0.7096466507695642,
"grad_norm": 1.1953125,
"learning_rate": 3.2210434782608697e-06,
"loss": 0.2343,
"step": 10230
},
{
"epoch": 0.710340342510297,
"grad_norm": 1.265625,
"learning_rate": 3.219304347826087e-06,
"loss": 0.2158,
"step": 10240
},
{
"epoch": 0.7110340342510297,
"grad_norm": 1.0703125,
"learning_rate": 3.2175652173913045e-06,
"loss": 0.2512,
"step": 10250
},
{
"epoch": 0.7117277259917624,
"grad_norm": 0.890625,
"learning_rate": 3.2158260869565223e-06,
"loss": 0.2346,
"step": 10260
},
{
"epoch": 0.7124214177324951,
"grad_norm": 1.0078125,
"learning_rate": 3.2140869565217392e-06,
"loss": 0.2133,
"step": 10270
},
{
"epoch": 0.7131151094732279,
"grad_norm": 1.15625,
"learning_rate": 3.2123478260869566e-06,
"loss": 0.2386,
"step": 10280
},
{
"epoch": 0.7138088012139605,
"grad_norm": 1.109375,
"learning_rate": 3.2106086956521744e-06,
"loss": 0.1876,
"step": 10290
},
{
"epoch": 0.7145024929546933,
"grad_norm": 0.953125,
"learning_rate": 3.2088695652173918e-06,
"loss": 0.2198,
"step": 10300
},
{
"epoch": 0.715196184695426,
"grad_norm": 0.9453125,
"learning_rate": 3.2071304347826087e-06,
"loss": 0.218,
"step": 10310
},
{
"epoch": 0.7158898764361586,
"grad_norm": 0.97265625,
"learning_rate": 3.2053913043478265e-06,
"loss": 0.2749,
"step": 10320
},
{
"epoch": 0.7165835681768914,
"grad_norm": 0.99609375,
"learning_rate": 3.203652173913044e-06,
"loss": 0.2199,
"step": 10330
},
{
"epoch": 0.7172772599176241,
"grad_norm": 0.9921875,
"learning_rate": 3.2019130434782613e-06,
"loss": 0.2523,
"step": 10340
},
{
"epoch": 0.7179709516583568,
"grad_norm": 0.98828125,
"learning_rate": 3.200173913043478e-06,
"loss": 0.2313,
"step": 10350
},
{
"epoch": 0.7186646433990895,
"grad_norm": 1.25,
"learning_rate": 3.198434782608696e-06,
"loss": 0.3035,
"step": 10360
},
{
"epoch": 0.7193583351398223,
"grad_norm": 1.6015625,
"learning_rate": 3.1966956521739134e-06,
"loss": 0.3116,
"step": 10370
},
{
"epoch": 0.7200520268805549,
"grad_norm": 1.21875,
"learning_rate": 3.194956521739131e-06,
"loss": 0.2449,
"step": 10380
},
{
"epoch": 0.7207457186212877,
"grad_norm": 1.796875,
"learning_rate": 3.193217391304348e-06,
"loss": 0.3198,
"step": 10390
},
{
"epoch": 0.7214394103620204,
"grad_norm": 1.25,
"learning_rate": 3.1914782608695655e-06,
"loss": 0.2539,
"step": 10400
},
{
"epoch": 0.722133102102753,
"grad_norm": 1.125,
"learning_rate": 3.189739130434783e-06,
"loss": 0.3254,
"step": 10410
},
{
"epoch": 0.7228267938434858,
"grad_norm": 1.375,
"learning_rate": 3.188e-06,
"loss": 0.2163,
"step": 10420
},
{
"epoch": 0.7235204855842186,
"grad_norm": 1.2109375,
"learning_rate": 3.1862608695652176e-06,
"loss": 0.253,
"step": 10430
},
{
"epoch": 0.7242141773249512,
"grad_norm": 1.0234375,
"learning_rate": 3.184521739130435e-06,
"loss": 0.2237,
"step": 10440
},
{
"epoch": 0.7249078690656839,
"grad_norm": 1.234375,
"learning_rate": 3.1827826086956528e-06,
"loss": 0.2336,
"step": 10450
},
{
"epoch": 0.7256015608064167,
"grad_norm": 1.046875,
"learning_rate": 3.1810434782608697e-06,
"loss": 0.2501,
"step": 10460
},
{
"epoch": 0.7262952525471493,
"grad_norm": 1.3515625,
"learning_rate": 3.179304347826087e-06,
"loss": 0.2205,
"step": 10470
},
{
"epoch": 0.7269889442878821,
"grad_norm": 1.515625,
"learning_rate": 3.1775652173913045e-06,
"loss": 0.2509,
"step": 10480
},
{
"epoch": 0.7276826360286148,
"grad_norm": 1.109375,
"learning_rate": 3.1758260869565222e-06,
"loss": 0.2539,
"step": 10490
},
{
"epoch": 0.7283763277693475,
"grad_norm": 1.0703125,
"learning_rate": 3.174086956521739e-06,
"loss": 0.2217,
"step": 10500
},
{
"epoch": 0.7290700195100802,
"grad_norm": 1.4375,
"learning_rate": 3.1723478260869566e-06,
"loss": 0.26,
"step": 10510
},
{
"epoch": 0.729763711250813,
"grad_norm": 1.0703125,
"learning_rate": 3.1706086956521744e-06,
"loss": 0.2377,
"step": 10520
},
{
"epoch": 0.7304574029915456,
"grad_norm": 1.265625,
"learning_rate": 3.1688695652173917e-06,
"loss": 0.2349,
"step": 10530
},
{
"epoch": 0.7311510947322784,
"grad_norm": 0.953125,
"learning_rate": 3.1671304347826087e-06,
"loss": 0.2118,
"step": 10540
},
{
"epoch": 0.7318447864730111,
"grad_norm": 1.328125,
"learning_rate": 3.1653913043478265e-06,
"loss": 0.2557,
"step": 10550
},
{
"epoch": 0.7325384782137437,
"grad_norm": 1.125,
"learning_rate": 3.163652173913044e-06,
"loss": 0.272,
"step": 10560
},
{
"epoch": 0.7332321699544765,
"grad_norm": 1.078125,
"learning_rate": 3.1619130434782612e-06,
"loss": 0.2569,
"step": 10570
},
{
"epoch": 0.7339258616952092,
"grad_norm": 0.77734375,
"learning_rate": 3.160173913043478e-06,
"loss": 0.2273,
"step": 10580
},
{
"epoch": 0.7346195534359419,
"grad_norm": 1.109375,
"learning_rate": 3.158434782608696e-06,
"loss": 0.2184,
"step": 10590
},
{
"epoch": 0.7353132451766746,
"grad_norm": 1.15625,
"learning_rate": 3.1566956521739133e-06,
"loss": 0.2226,
"step": 10600
},
{
"epoch": 0.7360069369174074,
"grad_norm": 0.8984375,
"learning_rate": 3.154956521739131e-06,
"loss": 0.2256,
"step": 10610
},
{
"epoch": 0.73670062865814,
"grad_norm": 1.578125,
"learning_rate": 3.153217391304348e-06,
"loss": 0.2162,
"step": 10620
},
{
"epoch": 0.7373943203988728,
"grad_norm": 1.15625,
"learning_rate": 3.1514782608695654e-06,
"loss": 0.2247,
"step": 10630
},
{
"epoch": 0.7380880121396055,
"grad_norm": 1.6015625,
"learning_rate": 3.149739130434783e-06,
"loss": 0.2441,
"step": 10640
},
{
"epoch": 0.7387817038803381,
"grad_norm": 1.265625,
"learning_rate": 3.1480000000000006e-06,
"loss": 0.2476,
"step": 10650
},
{
"epoch": 0.7394753956210709,
"grad_norm": 1.1953125,
"learning_rate": 3.1462608695652176e-06,
"loss": 0.301,
"step": 10660
},
{
"epoch": 0.7401690873618036,
"grad_norm": 1.296875,
"learning_rate": 3.144521739130435e-06,
"loss": 0.267,
"step": 10670
},
{
"epoch": 0.7408627791025363,
"grad_norm": 1.1328125,
"learning_rate": 3.1427826086956527e-06,
"loss": 0.2188,
"step": 10680
},
{
"epoch": 0.741556470843269,
"grad_norm": 1.1875,
"learning_rate": 3.14104347826087e-06,
"loss": 0.2561,
"step": 10690
},
{
"epoch": 0.7422501625840018,
"grad_norm": 1.171875,
"learning_rate": 3.139304347826087e-06,
"loss": 0.2115,
"step": 10700
},
{
"epoch": 0.7429438543247344,
"grad_norm": 1.40625,
"learning_rate": 3.1375652173913044e-06,
"loss": 0.2606,
"step": 10710
},
{
"epoch": 0.7436375460654672,
"grad_norm": 1.0625,
"learning_rate": 3.135826086956522e-06,
"loss": 0.3279,
"step": 10720
},
{
"epoch": 0.7443312378061999,
"grad_norm": 1.59375,
"learning_rate": 3.134086956521739e-06,
"loss": 0.2229,
"step": 10730
},
{
"epoch": 0.7450249295469326,
"grad_norm": 1.546875,
"learning_rate": 3.1323478260869565e-06,
"loss": 0.3244,
"step": 10740
},
{
"epoch": 0.7457186212876653,
"grad_norm": 1.28125,
"learning_rate": 3.1306086956521743e-06,
"loss": 0.2381,
"step": 10750
},
{
"epoch": 0.7464123130283981,
"grad_norm": 0.9765625,
"learning_rate": 3.1288695652173917e-06,
"loss": 0.1984,
"step": 10760
},
{
"epoch": 0.7471060047691307,
"grad_norm": 1.140625,
"learning_rate": 3.1271304347826086e-06,
"loss": 0.2426,
"step": 10770
},
{
"epoch": 0.7477996965098634,
"grad_norm": 1.2734375,
"learning_rate": 3.1253913043478264e-06,
"loss": 0.2656,
"step": 10780
},
{
"epoch": 0.7484933882505962,
"grad_norm": 1.359375,
"learning_rate": 3.123652173913044e-06,
"loss": 0.2148,
"step": 10790
},
{
"epoch": 0.7491870799913288,
"grad_norm": 1.28125,
"learning_rate": 3.121913043478261e-06,
"loss": 0.2359,
"step": 10800
},
{
"epoch": 0.7498807717320616,
"grad_norm": 1.25,
"learning_rate": 3.120173913043478e-06,
"loss": 0.2798,
"step": 10810
},
{
"epoch": 0.7505744634727943,
"grad_norm": 1.203125,
"learning_rate": 3.118434782608696e-06,
"loss": 0.2446,
"step": 10820
},
{
"epoch": 0.751268155213527,
"grad_norm": 1.140625,
"learning_rate": 3.1166956521739133e-06,
"loss": 0.2492,
"step": 10830
},
{
"epoch": 0.7519618469542597,
"grad_norm": 1.0234375,
"learning_rate": 3.114956521739131e-06,
"loss": 0.2174,
"step": 10840
},
{
"epoch": 0.7526555386949925,
"grad_norm": 1.0234375,
"learning_rate": 3.113217391304348e-06,
"loss": 0.2919,
"step": 10850
},
{
"epoch": 0.7533492304357251,
"grad_norm": 0.97265625,
"learning_rate": 3.1114782608695654e-06,
"loss": 0.2294,
"step": 10860
},
{
"epoch": 0.7540429221764579,
"grad_norm": 1.171875,
"learning_rate": 3.1097391304347828e-06,
"loss": 0.2915,
"step": 10870
},
{
"epoch": 0.7547366139171906,
"grad_norm": 1.5234375,
"learning_rate": 3.1080000000000006e-06,
"loss": 0.2996,
"step": 10880
},
{
"epoch": 0.7554303056579232,
"grad_norm": 1.140625,
"learning_rate": 3.1062608695652175e-06,
"loss": 0.3066,
"step": 10890
},
{
"epoch": 0.756123997398656,
"grad_norm": 1.3671875,
"learning_rate": 3.104521739130435e-06,
"loss": 0.2634,
"step": 10900
},
{
"epoch": 0.7568176891393887,
"grad_norm": 1.265625,
"learning_rate": 3.1027826086956527e-06,
"loss": 0.2178,
"step": 10910
},
{
"epoch": 0.7575113808801214,
"grad_norm": 0.91015625,
"learning_rate": 3.10104347826087e-06,
"loss": 0.2464,
"step": 10920
},
{
"epoch": 0.7582050726208541,
"grad_norm": 1.375,
"learning_rate": 3.099304347826087e-06,
"loss": 0.2308,
"step": 10930
},
{
"epoch": 0.7588987643615869,
"grad_norm": 1.1015625,
"learning_rate": 3.0975652173913044e-06,
"loss": 0.2179,
"step": 10940
},
{
"epoch": 0.7595924561023195,
"grad_norm": 1.203125,
"learning_rate": 3.095826086956522e-06,
"loss": 0.2317,
"step": 10950
},
{
"epoch": 0.7602861478430523,
"grad_norm": 0.86328125,
"learning_rate": 3.0940869565217395e-06,
"loss": 0.2443,
"step": 10960
},
{
"epoch": 0.7609798395837849,
"grad_norm": 1.3515625,
"learning_rate": 3.0923478260869565e-06,
"loss": 0.3217,
"step": 10970
},
{
"epoch": 0.7616735313245176,
"grad_norm": 1.3359375,
"learning_rate": 3.0906086956521743e-06,
"loss": 0.2126,
"step": 10980
},
{
"epoch": 0.7623672230652504,
"grad_norm": 1.1953125,
"learning_rate": 3.0888695652173916e-06,
"loss": 0.2326,
"step": 10990
},
{
"epoch": 0.763060914805983,
"grad_norm": 1.296875,
"learning_rate": 3.087130434782609e-06,
"loss": 0.2266,
"step": 11000
},
{
"epoch": 0.7637546065467158,
"grad_norm": 1.078125,
"learning_rate": 3.0853913043478264e-06,
"loss": 0.2807,
"step": 11010
},
{
"epoch": 0.7644482982874485,
"grad_norm": 1.2890625,
"learning_rate": 3.0836521739130438e-06,
"loss": 0.3183,
"step": 11020
},
{
"epoch": 0.7651419900281812,
"grad_norm": 1.2890625,
"learning_rate": 3.081913043478261e-06,
"loss": 0.2456,
"step": 11030
},
{
"epoch": 0.7658356817689139,
"grad_norm": 1.203125,
"learning_rate": 3.080173913043478e-06,
"loss": 0.2157,
"step": 11040
},
{
"epoch": 0.7665293735096467,
"grad_norm": 1.171875,
"learning_rate": 3.078434782608696e-06,
"loss": 0.2612,
"step": 11050
},
{
"epoch": 0.7672230652503793,
"grad_norm": 1.1953125,
"learning_rate": 3.0766956521739132e-06,
"loss": 0.253,
"step": 11060
},
{
"epoch": 0.7679167569911121,
"grad_norm": 0.90625,
"learning_rate": 3.074956521739131e-06,
"loss": 0.225,
"step": 11070
},
{
"epoch": 0.7686104487318448,
"grad_norm": 1.703125,
"learning_rate": 3.073217391304348e-06,
"loss": 0.2247,
"step": 11080
},
{
"epoch": 0.7693041404725774,
"grad_norm": 1.2109375,
"learning_rate": 3.0714782608695654e-06,
"loss": 0.2493,
"step": 11090
},
{
"epoch": 0.7699978322133102,
"grad_norm": 1.390625,
"learning_rate": 3.0697391304347827e-06,
"loss": 0.3239,
"step": 11100
},
{
"epoch": 0.770691523954043,
"grad_norm": 1.1015625,
"learning_rate": 3.0680000000000005e-06,
"loss": 0.2053,
"step": 11110
},
{
"epoch": 0.7713852156947756,
"grad_norm": 1.2578125,
"learning_rate": 3.0662608695652175e-06,
"loss": 0.2254,
"step": 11120
},
{
"epoch": 0.7720789074355083,
"grad_norm": 1.140625,
"learning_rate": 3.064521739130435e-06,
"loss": 0.2715,
"step": 11130
},
{
"epoch": 0.7727725991762411,
"grad_norm": 1.234375,
"learning_rate": 3.0627826086956526e-06,
"loss": 0.2294,
"step": 11140
},
{
"epoch": 0.7734662909169737,
"grad_norm": 1.390625,
"learning_rate": 3.06104347826087e-06,
"loss": 0.2423,
"step": 11150
},
{
"epoch": 0.7741599826577065,
"grad_norm": 1.453125,
"learning_rate": 3.059304347826087e-06,
"loss": 0.2477,
"step": 11160
},
{
"epoch": 0.7748536743984392,
"grad_norm": 1.2578125,
"learning_rate": 3.0575652173913043e-06,
"loss": 0.2103,
"step": 11170
},
{
"epoch": 0.7755473661391719,
"grad_norm": 1.484375,
"learning_rate": 3.055826086956522e-06,
"loss": 0.2501,
"step": 11180
},
{
"epoch": 0.7762410578799046,
"grad_norm": 1.0859375,
"learning_rate": 3.0540869565217395e-06,
"loss": 0.2741,
"step": 11190
},
{
"epoch": 0.7769347496206374,
"grad_norm": 1.4453125,
"learning_rate": 3.0523478260869564e-06,
"loss": 0.2483,
"step": 11200
},
{
"epoch": 0.77762844136137,
"grad_norm": 1.53125,
"learning_rate": 3.0506086956521742e-06,
"loss": 0.2672,
"step": 11210
},
{
"epoch": 0.7783221331021027,
"grad_norm": 1.21875,
"learning_rate": 3.0488695652173916e-06,
"loss": 0.225,
"step": 11220
},
{
"epoch": 0.7790158248428355,
"grad_norm": 1.21875,
"learning_rate": 3.047130434782609e-06,
"loss": 0.2435,
"step": 11230
},
{
"epoch": 0.7797095165835681,
"grad_norm": 1.1484375,
"learning_rate": 3.0453913043478264e-06,
"loss": 0.251,
"step": 11240
},
{
"epoch": 0.7804032083243009,
"grad_norm": 1.1015625,
"learning_rate": 3.0436521739130437e-06,
"loss": 0.2056,
"step": 11250
},
{
"epoch": 0.7810969000650336,
"grad_norm": 1.4921875,
"learning_rate": 3.041913043478261e-06,
"loss": 0.2781,
"step": 11260
},
{
"epoch": 0.7817905918057663,
"grad_norm": 1.6171875,
"learning_rate": 3.040173913043479e-06,
"loss": 0.2197,
"step": 11270
},
{
"epoch": 0.782484283546499,
"grad_norm": 1.171875,
"learning_rate": 3.038434782608696e-06,
"loss": 0.2486,
"step": 11280
},
{
"epoch": 0.7831779752872318,
"grad_norm": 1.0,
"learning_rate": 3.036695652173913e-06,
"loss": 0.2142,
"step": 11290
},
{
"epoch": 0.7838716670279644,
"grad_norm": 1.5,
"learning_rate": 3.034956521739131e-06,
"loss": 0.2634,
"step": 11300
},
{
"epoch": 0.7845653587686972,
"grad_norm": 1.3984375,
"learning_rate": 3.0332173913043484e-06,
"loss": 0.2851,
"step": 11310
},
{
"epoch": 0.7852590505094299,
"grad_norm": 1.0859375,
"learning_rate": 3.0314782608695653e-06,
"loss": 0.2177,
"step": 11320
},
{
"epoch": 0.7859527422501625,
"grad_norm": 1.15625,
"learning_rate": 3.0297391304347827e-06,
"loss": 0.2772,
"step": 11330
},
{
"epoch": 0.7866464339908953,
"grad_norm": 1.40625,
"learning_rate": 3.0280000000000005e-06,
"loss": 0.2819,
"step": 11340
},
{
"epoch": 0.787340125731628,
"grad_norm": 0.87890625,
"learning_rate": 3.0262608695652174e-06,
"loss": 0.2356,
"step": 11350
},
{
"epoch": 0.7880338174723607,
"grad_norm": 0.859375,
"learning_rate": 3.024521739130435e-06,
"loss": 0.2149,
"step": 11360
},
{
"epoch": 0.7887275092130934,
"grad_norm": 1.265625,
"learning_rate": 3.0227826086956526e-06,
"loss": 0.2252,
"step": 11370
},
{
"epoch": 0.7894212009538262,
"grad_norm": 1.8515625,
"learning_rate": 3.02104347826087e-06,
"loss": 0.2917,
"step": 11380
},
{
"epoch": 0.7901148926945588,
"grad_norm": 1.4296875,
"learning_rate": 3.019304347826087e-06,
"loss": 0.257,
"step": 11390
},
{
"epoch": 0.7908085844352916,
"grad_norm": 1.296875,
"learning_rate": 3.0175652173913043e-06,
"loss": 0.2266,
"step": 11400
},
{
"epoch": 0.7915022761760243,
"grad_norm": 1.0546875,
"learning_rate": 3.015826086956522e-06,
"loss": 0.2351,
"step": 11410
},
{
"epoch": 0.792195967916757,
"grad_norm": 1.46875,
"learning_rate": 3.0140869565217395e-06,
"loss": 0.2555,
"step": 11420
},
{
"epoch": 0.7928896596574897,
"grad_norm": 1.078125,
"learning_rate": 3.0123478260869564e-06,
"loss": 0.2709,
"step": 11430
},
{
"epoch": 0.7935833513982224,
"grad_norm": 1.3359375,
"learning_rate": 3.010608695652174e-06,
"loss": 0.2159,
"step": 11440
},
{
"epoch": 0.7942770431389551,
"grad_norm": 1.4375,
"learning_rate": 3.0088695652173916e-06,
"loss": 0.3059,
"step": 11450
},
{
"epoch": 0.7949707348796878,
"grad_norm": 1.125,
"learning_rate": 3.007130434782609e-06,
"loss": 0.2167,
"step": 11460
},
{
"epoch": 0.7956644266204206,
"grad_norm": 0.84765625,
"learning_rate": 3.0053913043478263e-06,
"loss": 0.2494,
"step": 11470
},
{
"epoch": 0.7963581183611532,
"grad_norm": 1.2421875,
"learning_rate": 3.0036521739130437e-06,
"loss": 0.2852,
"step": 11480
},
{
"epoch": 0.797051810101886,
"grad_norm": 1.171875,
"learning_rate": 3.001913043478261e-06,
"loss": 0.2671,
"step": 11490
},
{
"epoch": 0.7977455018426187,
"grad_norm": 1.265625,
"learning_rate": 3.000173913043479e-06,
"loss": 0.2252,
"step": 11500
},
{
"epoch": 0.7984391935833514,
"grad_norm": 1.171875,
"learning_rate": 2.998434782608696e-06,
"loss": 0.2481,
"step": 11510
},
{
"epoch": 0.7991328853240841,
"grad_norm": 1.2578125,
"learning_rate": 2.996695652173913e-06,
"loss": 0.2194,
"step": 11520
},
{
"epoch": 0.7998265770648169,
"grad_norm": 1.125,
"learning_rate": 2.994956521739131e-06,
"loss": 0.2264,
"step": 11530
},
{
"epoch": 0.8005202688055495,
"grad_norm": 1.3828125,
"learning_rate": 2.9932173913043483e-06,
"loss": 0.2368,
"step": 11540
},
{
"epoch": 0.8012139605462822,
"grad_norm": 1.109375,
"learning_rate": 2.9914782608695653e-06,
"loss": 0.2505,
"step": 11550
},
{
"epoch": 0.801907652287015,
"grad_norm": 1.0546875,
"learning_rate": 2.9897391304347827e-06,
"loss": 0.2175,
"step": 11560
},
{
"epoch": 0.8026013440277476,
"grad_norm": 1.0703125,
"learning_rate": 2.9880000000000004e-06,
"loss": 0.2256,
"step": 11570
},
{
"epoch": 0.8032950357684804,
"grad_norm": 1.140625,
"learning_rate": 2.986260869565218e-06,
"loss": 0.2488,
"step": 11580
},
{
"epoch": 0.8039887275092131,
"grad_norm": 1.3828125,
"learning_rate": 2.9845217391304348e-06,
"loss": 0.2574,
"step": 11590
},
{
"epoch": 0.8046824192499458,
"grad_norm": 1.0234375,
"learning_rate": 2.9827826086956526e-06,
"loss": 0.2263,
"step": 11600
},
{
"epoch": 0.8053761109906785,
"grad_norm": 1.3203125,
"learning_rate": 2.98104347826087e-06,
"loss": 0.2127,
"step": 11610
},
{
"epoch": 0.8060698027314113,
"grad_norm": 1.2109375,
"learning_rate": 2.9793043478260873e-06,
"loss": 0.2328,
"step": 11620
},
{
"epoch": 0.8067634944721439,
"grad_norm": 1.25,
"learning_rate": 2.9775652173913042e-06,
"loss": 0.279,
"step": 11630
},
{
"epoch": 0.8074571862128767,
"grad_norm": 1.265625,
"learning_rate": 2.975826086956522e-06,
"loss": 0.1984,
"step": 11640
},
{
"epoch": 0.8081508779536094,
"grad_norm": 1.203125,
"learning_rate": 2.9740869565217394e-06,
"loss": 0.2232,
"step": 11650
},
{
"epoch": 0.808844569694342,
"grad_norm": 1.3359375,
"learning_rate": 2.9723478260869564e-06,
"loss": 0.2565,
"step": 11660
},
{
"epoch": 0.8095382614350748,
"grad_norm": 1.25,
"learning_rate": 2.970608695652174e-06,
"loss": 0.2786,
"step": 11670
},
{
"epoch": 0.8102319531758075,
"grad_norm": 1.3828125,
"learning_rate": 2.9688695652173915e-06,
"loss": 0.326,
"step": 11680
},
{
"epoch": 0.8109256449165402,
"grad_norm": 1.171875,
"learning_rate": 2.967130434782609e-06,
"loss": 0.3024,
"step": 11690
},
{
"epoch": 0.8116193366572729,
"grad_norm": 1.1328125,
"learning_rate": 2.9653913043478263e-06,
"loss": 0.2609,
"step": 11700
},
{
"epoch": 0.8123130283980057,
"grad_norm": 1.2109375,
"learning_rate": 2.9636521739130436e-06,
"loss": 0.2344,
"step": 11710
},
{
"epoch": 0.8130067201387383,
"grad_norm": 1.4921875,
"learning_rate": 2.961913043478261e-06,
"loss": 0.2705,
"step": 11720
},
{
"epoch": 0.8137004118794711,
"grad_norm": 1.390625,
"learning_rate": 2.960173913043479e-06,
"loss": 0.2354,
"step": 11730
},
{
"epoch": 0.8143941036202038,
"grad_norm": 0.94140625,
"learning_rate": 2.9584347826086958e-06,
"loss": 0.2062,
"step": 11740
},
{
"epoch": 0.8150877953609365,
"grad_norm": 1.3515625,
"learning_rate": 2.956695652173913e-06,
"loss": 0.2292,
"step": 11750
},
{
"epoch": 0.8157814871016692,
"grad_norm": 1.296875,
"learning_rate": 2.954956521739131e-06,
"loss": 0.2944,
"step": 11760
},
{
"epoch": 0.816475178842402,
"grad_norm": 1.2421875,
"learning_rate": 2.9532173913043483e-06,
"loss": 0.2349,
"step": 11770
},
{
"epoch": 0.8171688705831346,
"grad_norm": 1.0859375,
"learning_rate": 2.9514782608695652e-06,
"loss": 0.2075,
"step": 11780
},
{
"epoch": 0.8178625623238673,
"grad_norm": 1.171875,
"learning_rate": 2.9497391304347826e-06,
"loss": 0.2725,
"step": 11790
},
{
"epoch": 0.8185562540646001,
"grad_norm": 0.92578125,
"learning_rate": 2.9480000000000004e-06,
"loss": 0.2386,
"step": 11800
},
{
"epoch": 0.8192499458053327,
"grad_norm": 1.3515625,
"learning_rate": 2.9462608695652178e-06,
"loss": 0.2515,
"step": 11810
},
{
"epoch": 0.8199436375460655,
"grad_norm": 1.296875,
"learning_rate": 2.9445217391304347e-06,
"loss": 0.2432,
"step": 11820
},
{
"epoch": 0.8206373292867982,
"grad_norm": 1.5703125,
"learning_rate": 2.9427826086956525e-06,
"loss": 0.2537,
"step": 11830
},
{
"epoch": 0.8213310210275309,
"grad_norm": 1.234375,
"learning_rate": 2.94104347826087e-06,
"loss": 0.2072,
"step": 11840
},
{
"epoch": 0.8220247127682636,
"grad_norm": 1.0703125,
"learning_rate": 2.9393043478260873e-06,
"loss": 0.215,
"step": 11850
},
{
"epoch": 0.8227184045089964,
"grad_norm": 1.171875,
"learning_rate": 2.937565217391304e-06,
"loss": 0.2244,
"step": 11860
},
{
"epoch": 0.823412096249729,
"grad_norm": 1.1328125,
"learning_rate": 2.935826086956522e-06,
"loss": 0.2119,
"step": 11870
},
{
"epoch": 0.8241057879904617,
"grad_norm": 1.625,
"learning_rate": 2.9340869565217394e-06,
"loss": 0.2654,
"step": 11880
},
{
"epoch": 0.8247994797311945,
"grad_norm": 2.21875,
"learning_rate": 2.932347826086957e-06,
"loss": 0.2935,
"step": 11890
},
{
"epoch": 0.8254931714719271,
"grad_norm": 1.171875,
"learning_rate": 2.930608695652174e-06,
"loss": 0.3218,
"step": 11900
},
{
"epoch": 0.8261868632126599,
"grad_norm": 1.3046875,
"learning_rate": 2.9288695652173915e-06,
"loss": 0.2326,
"step": 11910
},
{
"epoch": 0.8268805549533926,
"grad_norm": 1.3203125,
"learning_rate": 2.927130434782609e-06,
"loss": 0.265,
"step": 11920
},
{
"epoch": 0.8275742466941253,
"grad_norm": 1.3515625,
"learning_rate": 2.9253913043478267e-06,
"loss": 0.2347,
"step": 11930
},
{
"epoch": 0.828267938434858,
"grad_norm": 1.0625,
"learning_rate": 2.9236521739130436e-06,
"loss": 0.2534,
"step": 11940
},
{
"epoch": 0.8289616301755908,
"grad_norm": 1.09375,
"learning_rate": 2.921913043478261e-06,
"loss": 0.2471,
"step": 11950
},
{
"epoch": 0.8296553219163234,
"grad_norm": 1.46875,
"learning_rate": 2.9201739130434788e-06,
"loss": 0.2258,
"step": 11960
},
{
"epoch": 0.8303490136570562,
"grad_norm": 1.0078125,
"learning_rate": 2.9184347826086957e-06,
"loss": 0.2336,
"step": 11970
},
{
"epoch": 0.8310427053977889,
"grad_norm": 0.9296875,
"learning_rate": 2.916695652173913e-06,
"loss": 0.1999,
"step": 11980
},
{
"epoch": 0.8317363971385215,
"grad_norm": 1.3828125,
"learning_rate": 2.914956521739131e-06,
"loss": 0.2423,
"step": 11990
},
{
"epoch": 0.8324300888792543,
"grad_norm": 0.98046875,
"learning_rate": 2.9132173913043483e-06,
"loss": 0.3134,
"step": 12000
},
{
"epoch": 0.833123780619987,
"grad_norm": 1.2109375,
"learning_rate": 2.911478260869565e-06,
"loss": 0.2421,
"step": 12010
},
{
"epoch": 0.8338174723607197,
"grad_norm": 1.2578125,
"learning_rate": 2.9097391304347826e-06,
"loss": 0.2408,
"step": 12020
},
{
"epoch": 0.8345111641014524,
"grad_norm": 1.09375,
"learning_rate": 2.9080000000000004e-06,
"loss": 0.3132,
"step": 12030
},
{
"epoch": 0.8352048558421852,
"grad_norm": 1.4921875,
"learning_rate": 2.9062608695652177e-06,
"loss": 0.3039,
"step": 12040
},
{
"epoch": 0.8358985475829178,
"grad_norm": 1.3046875,
"learning_rate": 2.9045217391304347e-06,
"loss": 0.2368,
"step": 12050
},
{
"epoch": 0.8365922393236506,
"grad_norm": 1.1015625,
"learning_rate": 2.9027826086956525e-06,
"loss": 0.2429,
"step": 12060
},
{
"epoch": 0.8372859310643833,
"grad_norm": 1.34375,
"learning_rate": 2.90104347826087e-06,
"loss": 0.2199,
"step": 12070
},
{
"epoch": 0.837979622805116,
"grad_norm": 0.9609375,
"learning_rate": 2.8993043478260872e-06,
"loss": 0.2216,
"step": 12080
},
{
"epoch": 0.8386733145458487,
"grad_norm": 1.1171875,
"learning_rate": 2.897565217391304e-06,
"loss": 0.2114,
"step": 12090
},
{
"epoch": 0.8393670062865815,
"grad_norm": 1.0859375,
"learning_rate": 2.895826086956522e-06,
"loss": 0.2314,
"step": 12100
},
{
"epoch": 0.8400606980273141,
"grad_norm": 1.2421875,
"learning_rate": 2.8940869565217393e-06,
"loss": 0.2591,
"step": 12110
},
{
"epoch": 0.8407543897680468,
"grad_norm": 1.1953125,
"learning_rate": 2.892347826086957e-06,
"loss": 0.2981,
"step": 12120
},
{
"epoch": 0.8414480815087796,
"grad_norm": 1.2734375,
"learning_rate": 2.890608695652174e-06,
"loss": 0.2615,
"step": 12130
},
{
"epoch": 0.8421417732495122,
"grad_norm": 1.1328125,
"learning_rate": 2.8888695652173914e-06,
"loss": 0.2214,
"step": 12140
},
{
"epoch": 0.842835464990245,
"grad_norm": 1.46875,
"learning_rate": 2.887130434782609e-06,
"loss": 0.2462,
"step": 12150
},
{
"epoch": 0.8435291567309777,
"grad_norm": 1.2109375,
"learning_rate": 2.8853913043478266e-06,
"loss": 0.2383,
"step": 12160
},
{
"epoch": 0.8442228484717104,
"grad_norm": 1.1484375,
"learning_rate": 2.8836521739130436e-06,
"loss": 0.2856,
"step": 12170
},
{
"epoch": 0.8449165402124431,
"grad_norm": 1.4375,
"learning_rate": 2.881913043478261e-06,
"loss": 0.2847,
"step": 12180
},
{
"epoch": 0.8456102319531759,
"grad_norm": 1.15625,
"learning_rate": 2.8801739130434787e-06,
"loss": 0.2508,
"step": 12190
},
{
"epoch": 0.8463039236939085,
"grad_norm": 1.34375,
"learning_rate": 2.878434782608696e-06,
"loss": 0.2317,
"step": 12200
},
{
"epoch": 0.8469976154346412,
"grad_norm": 1.3359375,
"learning_rate": 2.876695652173913e-06,
"loss": 0.3621,
"step": 12210
},
{
"epoch": 0.847691307175374,
"grad_norm": 1.109375,
"learning_rate": 2.874956521739131e-06,
"loss": 0.2664,
"step": 12220
},
{
"epoch": 0.8483849989161066,
"grad_norm": 0.86328125,
"learning_rate": 2.873217391304348e-06,
"loss": 0.2206,
"step": 12230
},
{
"epoch": 0.8490786906568394,
"grad_norm": 1.6484375,
"learning_rate": 2.8714782608695656e-06,
"loss": 0.3451,
"step": 12240
},
{
"epoch": 0.849772382397572,
"grad_norm": 1.5390625,
"learning_rate": 2.8697391304347825e-06,
"loss": 0.2597,
"step": 12250
},
{
"epoch": 0.8504660741383048,
"grad_norm": 1.1328125,
"learning_rate": 2.8680000000000003e-06,
"loss": 0.2391,
"step": 12260
},
{
"epoch": 0.8511597658790375,
"grad_norm": 0.92578125,
"learning_rate": 2.8662608695652177e-06,
"loss": 0.2241,
"step": 12270
},
{
"epoch": 0.8518534576197702,
"grad_norm": 1.1796875,
"learning_rate": 2.8645217391304346e-06,
"loss": 0.2928,
"step": 12280
},
{
"epoch": 0.8525471493605029,
"grad_norm": 1.1640625,
"learning_rate": 2.8627826086956524e-06,
"loss": 0.2468,
"step": 12290
},
{
"epoch": 0.8532408411012357,
"grad_norm": 1.0546875,
"learning_rate": 2.86104347826087e-06,
"loss": 0.2234,
"step": 12300
},
{
"epoch": 0.8539345328419683,
"grad_norm": 1.2421875,
"learning_rate": 2.859304347826087e-06,
"loss": 0.2329,
"step": 12310
},
{
"epoch": 0.854628224582701,
"grad_norm": 1.390625,
"learning_rate": 2.857565217391304e-06,
"loss": 0.2267,
"step": 12320
},
{
"epoch": 0.8553219163234338,
"grad_norm": 1.3203125,
"learning_rate": 2.855826086956522e-06,
"loss": 0.2083,
"step": 12330
},
{
"epoch": 0.8560156080641664,
"grad_norm": 1.0546875,
"learning_rate": 2.8540869565217393e-06,
"loss": 0.2738,
"step": 12340
},
{
"epoch": 0.8567092998048992,
"grad_norm": 1.421875,
"learning_rate": 2.852347826086957e-06,
"loss": 0.2128,
"step": 12350
},
{
"epoch": 0.8574029915456319,
"grad_norm": 1.515625,
"learning_rate": 2.850608695652174e-06,
"loss": 0.2226,
"step": 12360
},
{
"epoch": 0.8580966832863646,
"grad_norm": 1.40625,
"learning_rate": 2.8488695652173914e-06,
"loss": 0.2691,
"step": 12370
},
{
"epoch": 0.8587903750270973,
"grad_norm": 1.3359375,
"learning_rate": 2.8471304347826088e-06,
"loss": 0.2323,
"step": 12380
},
{
"epoch": 0.8594840667678301,
"grad_norm": 0.9921875,
"learning_rate": 2.8453913043478266e-06,
"loss": 0.228,
"step": 12390
},
{
"epoch": 0.8601777585085627,
"grad_norm": 0.96484375,
"learning_rate": 2.8436521739130435e-06,
"loss": 0.2741,
"step": 12400
},
{
"epoch": 0.8608714502492955,
"grad_norm": 1.3046875,
"learning_rate": 2.841913043478261e-06,
"loss": 0.2483,
"step": 12410
},
{
"epoch": 0.8615651419900282,
"grad_norm": 1.3515625,
"learning_rate": 2.8401739130434787e-06,
"loss": 0.2552,
"step": 12420
},
{
"epoch": 0.8622588337307608,
"grad_norm": 1.3828125,
"learning_rate": 2.838434782608696e-06,
"loss": 0.2283,
"step": 12430
},
{
"epoch": 0.8629525254714936,
"grad_norm": 1.2109375,
"learning_rate": 2.836695652173913e-06,
"loss": 0.226,
"step": 12440
},
{
"epoch": 0.8636462172122263,
"grad_norm": 1.3828125,
"learning_rate": 2.834956521739131e-06,
"loss": 0.2357,
"step": 12450
},
{
"epoch": 0.864339908952959,
"grad_norm": 1.1484375,
"learning_rate": 2.833217391304348e-06,
"loss": 0.2832,
"step": 12460
},
{
"epoch": 0.8650336006936917,
"grad_norm": 0.9375,
"learning_rate": 2.8314782608695655e-06,
"loss": 0.2512,
"step": 12470
},
{
"epoch": 0.8657272924344245,
"grad_norm": 1.1015625,
"learning_rate": 2.8297391304347825e-06,
"loss": 0.1749,
"step": 12480
},
{
"epoch": 0.8664209841751571,
"grad_norm": 1.578125,
"learning_rate": 2.8280000000000003e-06,
"loss": 0.3021,
"step": 12490
},
{
"epoch": 0.8671146759158899,
"grad_norm": 1.015625,
"learning_rate": 2.8262608695652177e-06,
"loss": 0.257,
"step": 12500
},
{
"epoch": 0.8678083676566226,
"grad_norm": 1.1328125,
"learning_rate": 2.8245217391304354e-06,
"loss": 0.229,
"step": 12510
},
{
"epoch": 0.8685020593973553,
"grad_norm": 1.0234375,
"learning_rate": 2.8227826086956524e-06,
"loss": 0.2163,
"step": 12520
},
{
"epoch": 0.869195751138088,
"grad_norm": 1.4609375,
"learning_rate": 2.8210434782608698e-06,
"loss": 0.264,
"step": 12530
},
{
"epoch": 0.8698894428788208,
"grad_norm": 1.1171875,
"learning_rate": 2.819304347826087e-06,
"loss": 0.2647,
"step": 12540
},
{
"epoch": 0.8705831346195534,
"grad_norm": 1.25,
"learning_rate": 2.817565217391305e-06,
"loss": 0.2262,
"step": 12550
},
{
"epoch": 0.8712768263602861,
"grad_norm": 1.2578125,
"learning_rate": 2.815826086956522e-06,
"loss": 0.2163,
"step": 12560
},
{
"epoch": 0.8719705181010189,
"grad_norm": 1.3515625,
"learning_rate": 2.8140869565217393e-06,
"loss": 0.2299,
"step": 12570
},
{
"epoch": 0.8726642098417515,
"grad_norm": 1.0703125,
"learning_rate": 2.812347826086957e-06,
"loss": 0.2756,
"step": 12580
},
{
"epoch": 0.8733579015824843,
"grad_norm": 1.3125,
"learning_rate": 2.810608695652174e-06,
"loss": 0.2429,
"step": 12590
},
{
"epoch": 0.874051593323217,
"grad_norm": 1.328125,
"learning_rate": 2.8088695652173914e-06,
"loss": 0.2404,
"step": 12600
},
{
"epoch": 0.8747452850639497,
"grad_norm": 1.1640625,
"learning_rate": 2.8071304347826087e-06,
"loss": 0.2412,
"step": 12610
},
{
"epoch": 0.8754389768046824,
"grad_norm": 1.1875,
"learning_rate": 2.8053913043478265e-06,
"loss": 0.2682,
"step": 12620
},
{
"epoch": 0.8761326685454152,
"grad_norm": 1.203125,
"learning_rate": 2.8036521739130435e-06,
"loss": 0.2255,
"step": 12630
},
{
"epoch": 0.8768263602861478,
"grad_norm": 1.1875,
"learning_rate": 2.801913043478261e-06,
"loss": 0.2766,
"step": 12640
},
{
"epoch": 0.8775200520268805,
"grad_norm": 1.1796875,
"learning_rate": 2.8001739130434786e-06,
"loss": 0.2353,
"step": 12650
},
{
"epoch": 0.8782137437676133,
"grad_norm": 0.8671875,
"learning_rate": 2.798434782608696e-06,
"loss": 0.1858,
"step": 12660
},
{
"epoch": 0.8789074355083459,
"grad_norm": 1.2890625,
"learning_rate": 2.796695652173913e-06,
"loss": 0.214,
"step": 12670
},
{
"epoch": 0.8796011272490787,
"grad_norm": 1.40625,
"learning_rate": 2.7949565217391308e-06,
"loss": 0.2371,
"step": 12680
},
{
"epoch": 0.8802948189898114,
"grad_norm": 1.2734375,
"learning_rate": 2.793217391304348e-06,
"loss": 0.23,
"step": 12690
},
{
"epoch": 0.8809885107305441,
"grad_norm": 1.28125,
"learning_rate": 2.7914782608695655e-06,
"loss": 0.2453,
"step": 12700
},
{
"epoch": 0.8816822024712768,
"grad_norm": 1.265625,
"learning_rate": 2.7897391304347824e-06,
"loss": 0.2131,
"step": 12710
},
{
"epoch": 0.8823758942120096,
"grad_norm": 1.390625,
"learning_rate": 2.7880000000000002e-06,
"loss": 0.242,
"step": 12720
},
{
"epoch": 0.8830695859527422,
"grad_norm": 0.953125,
"learning_rate": 2.7862608695652176e-06,
"loss": 0.3201,
"step": 12730
},
{
"epoch": 0.883763277693475,
"grad_norm": 0.94140625,
"learning_rate": 2.7845217391304354e-06,
"loss": 0.2541,
"step": 12740
},
{
"epoch": 0.8844569694342077,
"grad_norm": 1.0859375,
"learning_rate": 2.7827826086956524e-06,
"loss": 0.2767,
"step": 12750
},
{
"epoch": 0.8851506611749403,
"grad_norm": 1.28125,
"learning_rate": 2.7810434782608697e-06,
"loss": 0.2321,
"step": 12760
},
{
"epoch": 0.8858443529156731,
"grad_norm": 1.203125,
"learning_rate": 2.779304347826087e-06,
"loss": 0.2789,
"step": 12770
},
{
"epoch": 0.8865380446564058,
"grad_norm": 1.5546875,
"learning_rate": 2.777565217391305e-06,
"loss": 0.2188,
"step": 12780
},
{
"epoch": 0.8872317363971385,
"grad_norm": 1.21875,
"learning_rate": 2.775826086956522e-06,
"loss": 0.2593,
"step": 12790
},
{
"epoch": 0.8879254281378712,
"grad_norm": 1.015625,
"learning_rate": 2.774086956521739e-06,
"loss": 0.215,
"step": 12800
},
{
"epoch": 0.888619119878604,
"grad_norm": 1.3203125,
"learning_rate": 2.772347826086957e-06,
"loss": 0.2497,
"step": 12810
},
{
"epoch": 0.8893128116193366,
"grad_norm": 0.984375,
"learning_rate": 2.7706086956521744e-06,
"loss": 0.2003,
"step": 12820
},
{
"epoch": 0.8900065033600694,
"grad_norm": 1.171875,
"learning_rate": 2.7688695652173913e-06,
"loss": 0.2473,
"step": 12830
},
{
"epoch": 0.8907001951008021,
"grad_norm": 0.90234375,
"learning_rate": 2.7671304347826087e-06,
"loss": 0.2628,
"step": 12840
},
{
"epoch": 0.8913938868415348,
"grad_norm": 1.0546875,
"learning_rate": 2.7653913043478265e-06,
"loss": 0.2674,
"step": 12850
},
{
"epoch": 0.8920875785822675,
"grad_norm": 1.3828125,
"learning_rate": 2.763652173913044e-06,
"loss": 0.2468,
"step": 12860
},
{
"epoch": 0.8927812703230003,
"grad_norm": 1.109375,
"learning_rate": 2.761913043478261e-06,
"loss": 0.2161,
"step": 12870
},
{
"epoch": 0.8934749620637329,
"grad_norm": 1.1484375,
"learning_rate": 2.7601739130434786e-06,
"loss": 0.2284,
"step": 12880
},
{
"epoch": 0.8941686538044656,
"grad_norm": 1.1640625,
"learning_rate": 2.758434782608696e-06,
"loss": 0.2432,
"step": 12890
},
{
"epoch": 0.8948623455451984,
"grad_norm": 1.3359375,
"learning_rate": 2.756695652173913e-06,
"loss": 0.2964,
"step": 12900
},
{
"epoch": 0.895556037285931,
"grad_norm": 1.375,
"learning_rate": 2.7549565217391307e-06,
"loss": 0.2177,
"step": 12910
},
{
"epoch": 0.8962497290266638,
"grad_norm": 1.484375,
"learning_rate": 2.753217391304348e-06,
"loss": 0.2864,
"step": 12920
},
{
"epoch": 0.8969434207673965,
"grad_norm": 1.078125,
"learning_rate": 2.7514782608695655e-06,
"loss": 0.265,
"step": 12930
},
{
"epoch": 0.8976371125081292,
"grad_norm": 1.53125,
"learning_rate": 2.7497391304347824e-06,
"loss": 0.3763,
"step": 12940
},
{
"epoch": 0.8983308042488619,
"grad_norm": 1.1640625,
"learning_rate": 2.748e-06,
"loss": 0.2525,
"step": 12950
},
{
"epoch": 0.8990244959895947,
"grad_norm": 1.3984375,
"learning_rate": 2.7462608695652176e-06,
"loss": 0.2358,
"step": 12960
},
{
"epoch": 0.8997181877303273,
"grad_norm": 1.40625,
"learning_rate": 2.7445217391304354e-06,
"loss": 0.2315,
"step": 12970
},
{
"epoch": 0.90041187947106,
"grad_norm": 1.328125,
"learning_rate": 2.7427826086956523e-06,
"loss": 0.2688,
"step": 12980
},
{
"epoch": 0.9011055712117928,
"grad_norm": 1.4296875,
"learning_rate": 2.7410434782608697e-06,
"loss": 0.2491,
"step": 12990
},
{
"epoch": 0.9017992629525254,
"grad_norm": 1.3203125,
"learning_rate": 2.739304347826087e-06,
"loss": 0.2402,
"step": 13000
},
{
"epoch": 0.9024929546932582,
"grad_norm": 1.15625,
"learning_rate": 2.737565217391305e-06,
"loss": 0.258,
"step": 13010
},
{
"epoch": 0.9031866464339909,
"grad_norm": 1.109375,
"learning_rate": 2.735826086956522e-06,
"loss": 0.263,
"step": 13020
},
{
"epoch": 0.9038803381747236,
"grad_norm": 1.203125,
"learning_rate": 2.734086956521739e-06,
"loss": 0.2312,
"step": 13030
},
{
"epoch": 0.9045740299154563,
"grad_norm": 1.1875,
"learning_rate": 2.732347826086957e-06,
"loss": 0.2835,
"step": 13040
},
{
"epoch": 0.9052677216561891,
"grad_norm": 1.1796875,
"learning_rate": 2.7306086956521743e-06,
"loss": 0.2328,
"step": 13050
},
{
"epoch": 0.9059614133969217,
"grad_norm": 1.2734375,
"learning_rate": 2.7288695652173913e-06,
"loss": 0.2367,
"step": 13060
},
{
"epoch": 0.9066551051376545,
"grad_norm": 1.6796875,
"learning_rate": 2.7271304347826087e-06,
"loss": 0.2629,
"step": 13070
},
{
"epoch": 0.9073487968783872,
"grad_norm": 1.1171875,
"learning_rate": 2.7253913043478264e-06,
"loss": 0.2833,
"step": 13080
},
{
"epoch": 0.9080424886191198,
"grad_norm": 1.078125,
"learning_rate": 2.723652173913044e-06,
"loss": 0.2077,
"step": 13090
},
{
"epoch": 0.9087361803598526,
"grad_norm": 1.2734375,
"learning_rate": 2.7219130434782608e-06,
"loss": 0.213,
"step": 13100
},
{
"epoch": 0.9094298721005853,
"grad_norm": 1.3515625,
"learning_rate": 2.7201739130434786e-06,
"loss": 0.2784,
"step": 13110
},
{
"epoch": 0.910123563841318,
"grad_norm": 1.3203125,
"learning_rate": 2.718434782608696e-06,
"loss": 0.2505,
"step": 13120
},
{
"epoch": 0.9108172555820507,
"grad_norm": 1.171875,
"learning_rate": 2.7166956521739133e-06,
"loss": 0.2348,
"step": 13130
},
{
"epoch": 0.9115109473227835,
"grad_norm": 1.828125,
"learning_rate": 2.7149565217391307e-06,
"loss": 0.2753,
"step": 13140
},
{
"epoch": 0.9122046390635161,
"grad_norm": 1.5625,
"learning_rate": 2.713217391304348e-06,
"loss": 0.263,
"step": 13150
},
{
"epoch": 0.9128983308042489,
"grad_norm": 1.1875,
"learning_rate": 2.7114782608695654e-06,
"loss": 0.2445,
"step": 13160
},
{
"epoch": 0.9135920225449816,
"grad_norm": 1.5078125,
"learning_rate": 2.7097391304347832e-06,
"loss": 0.2877,
"step": 13170
},
{
"epoch": 0.9142857142857143,
"grad_norm": 1.0859375,
"learning_rate": 2.708e-06,
"loss": 0.2414,
"step": 13180
},
{
"epoch": 0.914979406026447,
"grad_norm": 1.171875,
"learning_rate": 2.7062608695652175e-06,
"loss": 0.2208,
"step": 13190
},
{
"epoch": 0.9156730977671798,
"grad_norm": 1.21875,
"learning_rate": 2.7045217391304353e-06,
"loss": 0.2381,
"step": 13200
},
{
"epoch": 0.9163667895079124,
"grad_norm": 1.5078125,
"learning_rate": 2.7027826086956523e-06,
"loss": 0.2036,
"step": 13210
},
{
"epoch": 0.9170604812486451,
"grad_norm": 1.2109375,
"learning_rate": 2.7010434782608696e-06,
"loss": 0.2259,
"step": 13220
},
{
"epoch": 0.9177541729893779,
"grad_norm": 1.0078125,
"learning_rate": 2.699304347826087e-06,
"loss": 0.2317,
"step": 13230
},
{
"epoch": 0.9184478647301105,
"grad_norm": 1.359375,
"learning_rate": 2.697565217391305e-06,
"loss": 0.2817,
"step": 13240
},
{
"epoch": 0.9191415564708433,
"grad_norm": 1.46875,
"learning_rate": 2.6958260869565218e-06,
"loss": 0.2527,
"step": 13250
},
{
"epoch": 0.919835248211576,
"grad_norm": 1.15625,
"learning_rate": 2.694086956521739e-06,
"loss": 0.2398,
"step": 13260
},
{
"epoch": 0.9205289399523087,
"grad_norm": 1.203125,
"learning_rate": 2.692347826086957e-06,
"loss": 0.2632,
"step": 13270
},
{
"epoch": 0.9212226316930414,
"grad_norm": 1.2109375,
"learning_rate": 2.6906086956521743e-06,
"loss": 0.218,
"step": 13280
},
{
"epoch": 0.9219163234337742,
"grad_norm": 1.296875,
"learning_rate": 2.6888695652173912e-06,
"loss": 0.2906,
"step": 13290
},
{
"epoch": 0.9226100151745068,
"grad_norm": 1.21875,
"learning_rate": 2.6871304347826086e-06,
"loss": 0.233,
"step": 13300
},
{
"epoch": 0.9233037069152396,
"grad_norm": 1.1875,
"learning_rate": 2.6853913043478264e-06,
"loss": 0.2417,
"step": 13310
},
{
"epoch": 0.9239973986559723,
"grad_norm": 1.3671875,
"learning_rate": 2.6836521739130438e-06,
"loss": 0.3028,
"step": 13320
},
{
"epoch": 0.9246910903967049,
"grad_norm": 1.2578125,
"learning_rate": 2.6819130434782607e-06,
"loss": 0.2826,
"step": 13330
},
{
"epoch": 0.9253847821374377,
"grad_norm": 1.3515625,
"learning_rate": 2.6801739130434785e-06,
"loss": 0.238,
"step": 13340
},
{
"epoch": 0.9260784738781704,
"grad_norm": 1.0234375,
"learning_rate": 2.678434782608696e-06,
"loss": 0.2322,
"step": 13350
},
{
"epoch": 0.9267721656189031,
"grad_norm": 1.3515625,
"learning_rate": 2.6766956521739133e-06,
"loss": 0.2983,
"step": 13360
},
{
"epoch": 0.9274658573596358,
"grad_norm": 0.9609375,
"learning_rate": 2.6749565217391306e-06,
"loss": 0.2692,
"step": 13370
},
{
"epoch": 0.9281595491003686,
"grad_norm": 1.28125,
"learning_rate": 2.673217391304348e-06,
"loss": 0.2064,
"step": 13380
},
{
"epoch": 0.9288532408411012,
"grad_norm": 1.265625,
"learning_rate": 2.6714782608695654e-06,
"loss": 0.2225,
"step": 13390
},
{
"epoch": 0.929546932581834,
"grad_norm": 1.375,
"learning_rate": 2.669739130434783e-06,
"loss": 0.2397,
"step": 13400
},
{
"epoch": 0.9302406243225667,
"grad_norm": 1.28125,
"learning_rate": 2.668e-06,
"loss": 0.238,
"step": 13410
},
{
"epoch": 0.9309343160632993,
"grad_norm": 1.8828125,
"learning_rate": 2.6662608695652175e-06,
"loss": 0.2753,
"step": 13420
},
{
"epoch": 0.9316280078040321,
"grad_norm": 0.8671875,
"learning_rate": 2.6645217391304353e-06,
"loss": 0.225,
"step": 13430
},
{
"epoch": 0.9323216995447648,
"grad_norm": 1.2421875,
"learning_rate": 2.6627826086956527e-06,
"loss": 0.2449,
"step": 13440
},
{
"epoch": 0.9330153912854975,
"grad_norm": 0.98046875,
"learning_rate": 2.6610434782608696e-06,
"loss": 0.2416,
"step": 13450
},
{
"epoch": 0.9337090830262302,
"grad_norm": 1.0703125,
"learning_rate": 2.659304347826087e-06,
"loss": 0.217,
"step": 13460
},
{
"epoch": 0.934402774766963,
"grad_norm": 1.5078125,
"learning_rate": 2.6575652173913048e-06,
"loss": 0.2213,
"step": 13470
},
{
"epoch": 0.9350964665076956,
"grad_norm": 1.0390625,
"learning_rate": 2.655826086956522e-06,
"loss": 0.3161,
"step": 13480
},
{
"epoch": 0.9357901582484284,
"grad_norm": 1.5234375,
"learning_rate": 2.654086956521739e-06,
"loss": 0.2487,
"step": 13490
},
{
"epoch": 0.9364838499891611,
"grad_norm": 1.6796875,
"learning_rate": 2.652347826086957e-06,
"loss": 0.3213,
"step": 13500
},
{
"epoch": 0.9371775417298938,
"grad_norm": 1.1875,
"learning_rate": 2.6506086956521743e-06,
"loss": 0.2486,
"step": 13510
},
{
"epoch": 0.9378712334706265,
"grad_norm": 1.5078125,
"learning_rate": 2.648869565217391e-06,
"loss": 0.2348,
"step": 13520
},
{
"epoch": 0.9385649252113591,
"grad_norm": 1.0859375,
"learning_rate": 2.6471304347826086e-06,
"loss": 0.2137,
"step": 13530
},
{
"epoch": 0.9392586169520919,
"grad_norm": 1.1640625,
"learning_rate": 2.6453913043478264e-06,
"loss": 0.2431,
"step": 13540
},
{
"epoch": 0.9399523086928246,
"grad_norm": 0.82421875,
"learning_rate": 2.6436521739130437e-06,
"loss": 0.2795,
"step": 13550
},
{
"epoch": 0.9406460004335573,
"grad_norm": 1.109375,
"learning_rate": 2.6419130434782607e-06,
"loss": 0.2674,
"step": 13560
},
{
"epoch": 0.94133969217429,
"grad_norm": 1.0859375,
"learning_rate": 2.6401739130434785e-06,
"loss": 0.2346,
"step": 13570
},
{
"epoch": 0.9420333839150228,
"grad_norm": 1.3125,
"learning_rate": 2.638434782608696e-06,
"loss": 0.2416,
"step": 13580
},
{
"epoch": 0.9427270756557554,
"grad_norm": 1.703125,
"learning_rate": 2.6366956521739132e-06,
"loss": 0.2999,
"step": 13590
},
{
"epoch": 0.9434207673964882,
"grad_norm": 1.1015625,
"learning_rate": 2.6349565217391306e-06,
"loss": 0.1903,
"step": 13600
},
{
"epoch": 0.9441144591372209,
"grad_norm": 0.96875,
"learning_rate": 2.633217391304348e-06,
"loss": 0.2281,
"step": 13610
},
{
"epoch": 0.9448081508779536,
"grad_norm": 1.203125,
"learning_rate": 2.6314782608695653e-06,
"loss": 0.2115,
"step": 13620
},
{
"epoch": 0.9455018426186863,
"grad_norm": 1.2578125,
"learning_rate": 2.629739130434783e-06,
"loss": 0.2522,
"step": 13630
},
{
"epoch": 0.946195534359419,
"grad_norm": 1.1796875,
"learning_rate": 2.628e-06,
"loss": 0.2353,
"step": 13640
},
{
"epoch": 0.9468892261001517,
"grad_norm": 1.3671875,
"learning_rate": 2.6262608695652175e-06,
"loss": 0.2585,
"step": 13650
},
{
"epoch": 0.9475829178408844,
"grad_norm": 1.0546875,
"learning_rate": 2.6245217391304352e-06,
"loss": 0.2067,
"step": 13660
},
{
"epoch": 0.9482766095816172,
"grad_norm": 1.4609375,
"learning_rate": 2.6227826086956526e-06,
"loss": 0.2414,
"step": 13670
},
{
"epoch": 0.9489703013223498,
"grad_norm": 0.9921875,
"learning_rate": 2.6210434782608696e-06,
"loss": 0.2992,
"step": 13680
},
{
"epoch": 0.9496639930630826,
"grad_norm": 1.578125,
"learning_rate": 2.619304347826087e-06,
"loss": 0.2543,
"step": 13690
},
{
"epoch": 0.9503576848038153,
"grad_norm": 1.3046875,
"learning_rate": 2.6175652173913047e-06,
"loss": 0.2452,
"step": 13700
},
{
"epoch": 0.951051376544548,
"grad_norm": 1.2421875,
"learning_rate": 2.615826086956522e-06,
"loss": 0.2564,
"step": 13710
},
{
"epoch": 0.9517450682852807,
"grad_norm": 1.2109375,
"learning_rate": 2.614086956521739e-06,
"loss": 0.2446,
"step": 13720
},
{
"epoch": 0.9524387600260135,
"grad_norm": 1.3515625,
"learning_rate": 2.612347826086957e-06,
"loss": 0.2343,
"step": 13730
},
{
"epoch": 0.9531324517667461,
"grad_norm": 1.0546875,
"learning_rate": 2.6106086956521742e-06,
"loss": 0.2255,
"step": 13740
},
{
"epoch": 0.9538261435074789,
"grad_norm": 1.6328125,
"learning_rate": 2.6088695652173916e-06,
"loss": 0.2345,
"step": 13750
},
{
"epoch": 0.9545198352482116,
"grad_norm": 1.4921875,
"learning_rate": 2.6071304347826085e-06,
"loss": 0.2453,
"step": 13760
},
{
"epoch": 0.9552135269889442,
"grad_norm": 1.2421875,
"learning_rate": 2.6053913043478263e-06,
"loss": 0.2377,
"step": 13770
},
{
"epoch": 0.955907218729677,
"grad_norm": 0.88671875,
"learning_rate": 2.6036521739130437e-06,
"loss": 0.2412,
"step": 13780
},
{
"epoch": 0.9566009104704097,
"grad_norm": 1.265625,
"learning_rate": 2.6019130434782615e-06,
"loss": 0.3032,
"step": 13790
},
{
"epoch": 0.9572946022111424,
"grad_norm": 1.5703125,
"learning_rate": 2.6001739130434784e-06,
"loss": 0.2317,
"step": 13800
},
{
"epoch": 0.9579882939518751,
"grad_norm": 1.125,
"learning_rate": 2.598434782608696e-06,
"loss": 0.2275,
"step": 13810
},
{
"epoch": 0.9586819856926079,
"grad_norm": 1.25,
"learning_rate": 2.596695652173913e-06,
"loss": 0.2155,
"step": 13820
},
{
"epoch": 0.9593756774333405,
"grad_norm": 1.2734375,
"learning_rate": 2.5949565217391306e-06,
"loss": 0.2261,
"step": 13830
},
{
"epoch": 0.9600693691740733,
"grad_norm": 1.3203125,
"learning_rate": 2.593217391304348e-06,
"loss": 0.2183,
"step": 13840
},
{
"epoch": 0.960763060914806,
"grad_norm": 1.328125,
"learning_rate": 2.5914782608695653e-06,
"loss": 0.2203,
"step": 13850
},
{
"epoch": 0.9614567526555386,
"grad_norm": 1.2421875,
"learning_rate": 2.589739130434783e-06,
"loss": 0.2219,
"step": 13860
},
{
"epoch": 0.9621504443962714,
"grad_norm": 1.0546875,
"learning_rate": 2.588e-06,
"loss": 0.25,
"step": 13870
},
{
"epoch": 0.9628441361370041,
"grad_norm": 0.921875,
"learning_rate": 2.5862608695652174e-06,
"loss": 0.2036,
"step": 13880
},
{
"epoch": 0.9635378278777368,
"grad_norm": 1.015625,
"learning_rate": 2.584521739130435e-06,
"loss": 0.2385,
"step": 13890
},
{
"epoch": 0.9642315196184695,
"grad_norm": 1.2421875,
"learning_rate": 2.5827826086956526e-06,
"loss": 0.2036,
"step": 13900
},
{
"epoch": 0.9649252113592023,
"grad_norm": 1.3203125,
"learning_rate": 2.5810434782608695e-06,
"loss": 0.2155,
"step": 13910
},
{
"epoch": 0.9656189030999349,
"grad_norm": 0.93359375,
"learning_rate": 2.579304347826087e-06,
"loss": 0.238,
"step": 13920
},
{
"epoch": 0.9663125948406677,
"grad_norm": 1.1640625,
"learning_rate": 2.5775652173913047e-06,
"loss": 0.2178,
"step": 13930
},
{
"epoch": 0.9670062865814004,
"grad_norm": 1.15625,
"learning_rate": 2.575826086956522e-06,
"loss": 0.2891,
"step": 13940
},
{
"epoch": 0.9676999783221331,
"grad_norm": 1.140625,
"learning_rate": 2.574086956521739e-06,
"loss": 0.2354,
"step": 13950
},
{
"epoch": 0.9683936700628658,
"grad_norm": 1.40625,
"learning_rate": 2.572347826086957e-06,
"loss": 0.2305,
"step": 13960
},
{
"epoch": 0.9690873618035986,
"grad_norm": 1.0625,
"learning_rate": 2.570608695652174e-06,
"loss": 0.2175,
"step": 13970
},
{
"epoch": 0.9697810535443312,
"grad_norm": 1.1484375,
"learning_rate": 2.5688695652173915e-06,
"loss": 0.232,
"step": 13980
},
{
"epoch": 0.9704747452850639,
"grad_norm": 1.03125,
"learning_rate": 2.5671304347826085e-06,
"loss": 0.2387,
"step": 13990
},
{
"epoch": 0.9711684370257967,
"grad_norm": 0.8671875,
"learning_rate": 2.5653913043478263e-06,
"loss": 0.3122,
"step": 14000
},
{
"epoch": 0.9718621287665293,
"grad_norm": 1.1171875,
"learning_rate": 2.5636521739130437e-06,
"loss": 0.2158,
"step": 14010
},
{
"epoch": 0.9725558205072621,
"grad_norm": 0.75390625,
"learning_rate": 2.5619130434782615e-06,
"loss": 0.222,
"step": 14020
},
{
"epoch": 0.9732495122479948,
"grad_norm": 1.125,
"learning_rate": 2.5601739130434784e-06,
"loss": 0.2692,
"step": 14030
},
{
"epoch": 0.9739432039887275,
"grad_norm": 1.4453125,
"learning_rate": 2.5584347826086958e-06,
"loss": 0.2721,
"step": 14040
},
{
"epoch": 0.9746368957294602,
"grad_norm": 1.5234375,
"learning_rate": 2.5566956521739136e-06,
"loss": 0.2335,
"step": 14050
},
{
"epoch": 0.975330587470193,
"grad_norm": 1.0703125,
"learning_rate": 2.554956521739131e-06,
"loss": 0.2132,
"step": 14060
},
{
"epoch": 0.9760242792109256,
"grad_norm": 1.25,
"learning_rate": 2.553217391304348e-06,
"loss": 0.2424,
"step": 14070
},
{
"epoch": 0.9767179709516584,
"grad_norm": 1.140625,
"learning_rate": 2.5514782608695653e-06,
"loss": 0.2583,
"step": 14080
},
{
"epoch": 0.9774116626923911,
"grad_norm": 1.21875,
"learning_rate": 2.549739130434783e-06,
"loss": 0.2165,
"step": 14090
},
{
"epoch": 0.9781053544331237,
"grad_norm": 1.2734375,
"learning_rate": 2.5480000000000004e-06,
"loss": 0.2442,
"step": 14100
},
{
"epoch": 0.9787990461738565,
"grad_norm": 1.21875,
"learning_rate": 2.5462608695652174e-06,
"loss": 0.3018,
"step": 14110
},
{
"epoch": 0.9794927379145892,
"grad_norm": 1.1640625,
"learning_rate": 2.544521739130435e-06,
"loss": 0.2094,
"step": 14120
},
{
"epoch": 0.9801864296553219,
"grad_norm": 1.0859375,
"learning_rate": 2.5427826086956525e-06,
"loss": 0.2546,
"step": 14130
},
{
"epoch": 0.9808801213960546,
"grad_norm": 1.3515625,
"learning_rate": 2.5410434782608695e-06,
"loss": 0.2204,
"step": 14140
},
{
"epoch": 0.9815738131367874,
"grad_norm": 1.3515625,
"learning_rate": 2.539304347826087e-06,
"loss": 0.176,
"step": 14150
},
{
"epoch": 0.98226750487752,
"grad_norm": 1.015625,
"learning_rate": 2.5375652173913046e-06,
"loss": 0.2255,
"step": 14160
},
{
"epoch": 0.9829611966182528,
"grad_norm": 0.9765625,
"learning_rate": 2.535826086956522e-06,
"loss": 0.228,
"step": 14170
},
{
"epoch": 0.9836548883589855,
"grad_norm": 1.421875,
"learning_rate": 2.534086956521739e-06,
"loss": 0.2245,
"step": 14180
},
{
"epoch": 0.9843485800997182,
"grad_norm": 1.0703125,
"learning_rate": 2.5323478260869568e-06,
"loss": 0.2602,
"step": 14190
},
{
"epoch": 0.9850422718404509,
"grad_norm": 1.453125,
"learning_rate": 2.530608695652174e-06,
"loss": 0.3022,
"step": 14200
},
{
"epoch": 0.9857359635811836,
"grad_norm": 1.2421875,
"learning_rate": 2.5288695652173915e-06,
"loss": 0.2025,
"step": 14210
},
{
"epoch": 0.9864296553219163,
"grad_norm": 1.140625,
"learning_rate": 2.527130434782609e-06,
"loss": 0.2147,
"step": 14220
},
{
"epoch": 0.987123347062649,
"grad_norm": 1.28125,
"learning_rate": 2.5253913043478262e-06,
"loss": 0.2519,
"step": 14230
},
{
"epoch": 0.9878170388033818,
"grad_norm": 1.15625,
"learning_rate": 2.5236521739130436e-06,
"loss": 0.2693,
"step": 14240
},
{
"epoch": 0.9885107305441144,
"grad_norm": 0.90625,
"learning_rate": 2.5219130434782614e-06,
"loss": 0.246,
"step": 14250
},
{
"epoch": 0.9892044222848472,
"grad_norm": 1.125,
"learning_rate": 2.5201739130434784e-06,
"loss": 0.235,
"step": 14260
},
{
"epoch": 0.9898981140255799,
"grad_norm": 1.1640625,
"learning_rate": 2.5184347826086957e-06,
"loss": 0.2056,
"step": 14270
},
{
"epoch": 0.9905918057663126,
"grad_norm": 1.2578125,
"learning_rate": 2.5166956521739135e-06,
"loss": 0.2063,
"step": 14280
},
{
"epoch": 0.9912854975070453,
"grad_norm": 1.484375,
"learning_rate": 2.514956521739131e-06,
"loss": 0.2341,
"step": 14290
},
{
"epoch": 0.9919791892477781,
"grad_norm": 1.3671875,
"learning_rate": 2.513217391304348e-06,
"loss": 0.21,
"step": 14300
},
{
"epoch": 0.9926728809885107,
"grad_norm": 1.4140625,
"learning_rate": 2.5114782608695652e-06,
"loss": 0.2236,
"step": 14310
},
{
"epoch": 0.9933665727292434,
"grad_norm": 1.2109375,
"learning_rate": 2.509739130434783e-06,
"loss": 0.2257,
"step": 14320
},
{
"epoch": 0.9940602644699762,
"grad_norm": 1.4453125,
"learning_rate": 2.5080000000000004e-06,
"loss": 0.2403,
"step": 14330
},
{
"epoch": 0.9947539562107088,
"grad_norm": 1.0390625,
"learning_rate": 2.5062608695652173e-06,
"loss": 0.2051,
"step": 14340
},
{
"epoch": 0.9954476479514416,
"grad_norm": 0.94921875,
"learning_rate": 2.504521739130435e-06,
"loss": 0.2264,
"step": 14350
},
{
"epoch": 0.9961413396921743,
"grad_norm": 1.1953125,
"learning_rate": 2.5027826086956525e-06,
"loss": 0.2702,
"step": 14360
},
{
"epoch": 0.996835031432907,
"grad_norm": 0.92578125,
"learning_rate": 2.50104347826087e-06,
"loss": 0.2241,
"step": 14370
},
{
"epoch": 0.9975287231736397,
"grad_norm": 1.0625,
"learning_rate": 2.4993043478260872e-06,
"loss": 0.2191,
"step": 14380
},
{
"epoch": 0.9982224149143725,
"grad_norm": 1.5546875,
"learning_rate": 2.4975652173913046e-06,
"loss": 0.3005,
"step": 14390
},
{
"epoch": 0.9989161066551051,
"grad_norm": 1.2578125,
"learning_rate": 2.495826086956522e-06,
"loss": 0.3011,
"step": 14400
},
{
"epoch": 0.9996097983958379,
"grad_norm": 1.7109375,
"learning_rate": 2.4940869565217394e-06,
"loss": 0.2558,
"step": 14410
},
{
"epoch": 1.0002774766962932,
"grad_norm": 1.484375,
"learning_rate": 2.4923478260869567e-06,
"loss": 0.2414,
"step": 14420
},
{
"epoch": 1.0009711684370257,
"grad_norm": 1.03125,
"learning_rate": 2.490608695652174e-06,
"loss": 0.2269,
"step": 14430
},
{
"epoch": 1.0016648601777585,
"grad_norm": 1.203125,
"learning_rate": 2.4888695652173915e-06,
"loss": 0.2052,
"step": 14440
},
{
"epoch": 1.0023585519184912,
"grad_norm": 1.09375,
"learning_rate": 2.487130434782609e-06,
"loss": 0.2343,
"step": 14450
},
{
"epoch": 1.003052243659224,
"grad_norm": 1.046875,
"learning_rate": 2.485391304347826e-06,
"loss": 0.2279,
"step": 14460
},
{
"epoch": 1.0037459353999567,
"grad_norm": 0.94921875,
"learning_rate": 2.4836521739130436e-06,
"loss": 0.2713,
"step": 14470
},
{
"epoch": 1.0044396271406895,
"grad_norm": 1.2109375,
"learning_rate": 2.481913043478261e-06,
"loss": 0.2522,
"step": 14480
},
{
"epoch": 1.005133318881422,
"grad_norm": 1.3046875,
"learning_rate": 2.4801739130434783e-06,
"loss": 0.2336,
"step": 14490
},
{
"epoch": 1.0058270106221547,
"grad_norm": 1.140625,
"learning_rate": 2.4784347826086957e-06,
"loss": 0.228,
"step": 14500
},
{
"epoch": 1.0065207023628875,
"grad_norm": 1.1640625,
"learning_rate": 2.4766956521739135e-06,
"loss": 0.2082,
"step": 14510
},
{
"epoch": 1.0072143941036202,
"grad_norm": 1.25,
"learning_rate": 2.4749565217391304e-06,
"loss": 0.2847,
"step": 14520
},
{
"epoch": 1.007908085844353,
"grad_norm": 0.9453125,
"learning_rate": 2.4732173913043482e-06,
"loss": 0.2132,
"step": 14530
},
{
"epoch": 1.0086017775850857,
"grad_norm": 1.3671875,
"learning_rate": 2.471478260869565e-06,
"loss": 0.2656,
"step": 14540
},
{
"epoch": 1.0092954693258183,
"grad_norm": 1.3046875,
"learning_rate": 2.469739130434783e-06,
"loss": 0.2238,
"step": 14550
},
{
"epoch": 1.009989161066551,
"grad_norm": 1.6640625,
"learning_rate": 2.468e-06,
"loss": 0.3057,
"step": 14560
},
{
"epoch": 1.0106828528072838,
"grad_norm": 1.1484375,
"learning_rate": 2.4662608695652177e-06,
"loss": 0.2365,
"step": 14570
},
{
"epoch": 1.0113765445480165,
"grad_norm": 0.98046875,
"learning_rate": 2.464521739130435e-06,
"loss": 0.2215,
"step": 14580
},
{
"epoch": 1.0120702362887493,
"grad_norm": 1.4921875,
"learning_rate": 2.4627826086956525e-06,
"loss": 0.2266,
"step": 14590
},
{
"epoch": 1.012763928029482,
"grad_norm": 2.265625,
"learning_rate": 2.46104347826087e-06,
"loss": 0.2978,
"step": 14600
},
{
"epoch": 1.0134576197702145,
"grad_norm": 1.328125,
"learning_rate": 2.459304347826087e-06,
"loss": 0.2397,
"step": 14610
},
{
"epoch": 1.0141513115109473,
"grad_norm": 1.2109375,
"learning_rate": 2.4575652173913046e-06,
"loss": 0.3155,
"step": 14620
},
{
"epoch": 1.01484500325168,
"grad_norm": 1.3828125,
"learning_rate": 2.455826086956522e-06,
"loss": 0.2873,
"step": 14630
},
{
"epoch": 1.0155386949924128,
"grad_norm": 1.7734375,
"learning_rate": 2.4540869565217393e-06,
"loss": 0.2612,
"step": 14640
},
{
"epoch": 1.0162323867331455,
"grad_norm": 1.1171875,
"learning_rate": 2.4523478260869567e-06,
"loss": 0.2231,
"step": 14650
},
{
"epoch": 1.0169260784738783,
"grad_norm": 1.5234375,
"learning_rate": 2.450608695652174e-06,
"loss": 0.3073,
"step": 14660
},
{
"epoch": 1.0176197702146108,
"grad_norm": 1.1875,
"learning_rate": 2.4488695652173914e-06,
"loss": 0.3001,
"step": 14670
},
{
"epoch": 1.0183134619553436,
"grad_norm": 1.2109375,
"learning_rate": 2.447130434782609e-06,
"loss": 0.2108,
"step": 14680
},
{
"epoch": 1.0190071536960763,
"grad_norm": 1.1953125,
"learning_rate": 2.4453913043478266e-06,
"loss": 0.2234,
"step": 14690
},
{
"epoch": 1.019700845436809,
"grad_norm": 1.25,
"learning_rate": 2.4436521739130435e-06,
"loss": 0.235,
"step": 14700
},
{
"epoch": 1.0203945371775418,
"grad_norm": 1.1171875,
"learning_rate": 2.4419130434782613e-06,
"loss": 0.2167,
"step": 14710
},
{
"epoch": 1.0210882289182743,
"grad_norm": 0.97265625,
"learning_rate": 2.4401739130434783e-06,
"loss": 0.2383,
"step": 14720
},
{
"epoch": 1.021781920659007,
"grad_norm": 1.375,
"learning_rate": 2.438434782608696e-06,
"loss": 0.2186,
"step": 14730
},
{
"epoch": 1.0224756123997398,
"grad_norm": 1.1015625,
"learning_rate": 2.4366956521739134e-06,
"loss": 0.2027,
"step": 14740
},
{
"epoch": 1.0231693041404726,
"grad_norm": 1.078125,
"learning_rate": 2.4349565217391304e-06,
"loss": 0.2397,
"step": 14750
},
{
"epoch": 1.0238629958812053,
"grad_norm": 0.9921875,
"learning_rate": 2.433217391304348e-06,
"loss": 0.2239,
"step": 14760
},
{
"epoch": 1.024556687621938,
"grad_norm": 1.21875,
"learning_rate": 2.431478260869565e-06,
"loss": 0.2252,
"step": 14770
},
{
"epoch": 1.0252503793626706,
"grad_norm": 1.296875,
"learning_rate": 2.429739130434783e-06,
"loss": 0.2448,
"step": 14780
},
{
"epoch": 1.0259440711034034,
"grad_norm": 1.375,
"learning_rate": 2.428e-06,
"loss": 0.2628,
"step": 14790
},
{
"epoch": 1.026637762844136,
"grad_norm": 1.5703125,
"learning_rate": 2.4262608695652177e-06,
"loss": 0.2583,
"step": 14800
},
{
"epoch": 1.0273314545848689,
"grad_norm": 1.171875,
"learning_rate": 2.424521739130435e-06,
"loss": 0.2329,
"step": 14810
},
{
"epoch": 1.0280251463256016,
"grad_norm": 1.171875,
"learning_rate": 2.4227826086956524e-06,
"loss": 0.2318,
"step": 14820
},
{
"epoch": 1.0287188380663344,
"grad_norm": 1.46875,
"learning_rate": 2.4210434782608698e-06,
"loss": 0.2351,
"step": 14830
},
{
"epoch": 1.0294125298070669,
"grad_norm": 1.078125,
"learning_rate": 2.419304347826087e-06,
"loss": 0.2533,
"step": 14840
},
{
"epoch": 1.0301062215477996,
"grad_norm": 1.6328125,
"learning_rate": 2.4175652173913045e-06,
"loss": 0.2254,
"step": 14850
},
{
"epoch": 1.0307999132885324,
"grad_norm": 0.90234375,
"learning_rate": 2.415826086956522e-06,
"loss": 0.2149,
"step": 14860
},
{
"epoch": 1.0314936050292651,
"grad_norm": 1.4453125,
"learning_rate": 2.4140869565217393e-06,
"loss": 0.2698,
"step": 14870
},
{
"epoch": 1.0321872967699979,
"grad_norm": 1.40625,
"learning_rate": 2.4123478260869566e-06,
"loss": 0.1998,
"step": 14880
},
{
"epoch": 1.0328809885107306,
"grad_norm": 1.1484375,
"learning_rate": 2.410608695652174e-06,
"loss": 0.241,
"step": 14890
},
{
"epoch": 1.0335746802514632,
"grad_norm": 1.0859375,
"learning_rate": 2.4088695652173914e-06,
"loss": 0.2182,
"step": 14900
},
{
"epoch": 1.034268371992196,
"grad_norm": 1.3359375,
"learning_rate": 2.4071304347826088e-06,
"loss": 0.2561,
"step": 14910
},
{
"epoch": 1.0349620637329286,
"grad_norm": 1.0859375,
"learning_rate": 2.4053913043478265e-06,
"loss": 0.2321,
"step": 14920
},
{
"epoch": 1.0356557554736614,
"grad_norm": 1.3828125,
"learning_rate": 2.4036521739130435e-06,
"loss": 0.2393,
"step": 14930
},
{
"epoch": 1.0363494472143941,
"grad_norm": 0.8984375,
"learning_rate": 2.4019130434782613e-06,
"loss": 0.23,
"step": 14940
},
{
"epoch": 1.037043138955127,
"grad_norm": 1.125,
"learning_rate": 2.4001739130434782e-06,
"loss": 0.2397,
"step": 14950
},
{
"epoch": 1.0377368306958594,
"grad_norm": 1.078125,
"learning_rate": 2.398434782608696e-06,
"loss": 0.3271,
"step": 14960
},
{
"epoch": 1.0384305224365922,
"grad_norm": 1.1953125,
"learning_rate": 2.3966956521739134e-06,
"loss": 0.2324,
"step": 14970
},
{
"epoch": 1.039124214177325,
"grad_norm": 1.265625,
"learning_rate": 2.3949565217391308e-06,
"loss": 0.2342,
"step": 14980
},
{
"epoch": 1.0398179059180577,
"grad_norm": 1.0625,
"learning_rate": 2.393217391304348e-06,
"loss": 0.2481,
"step": 14990
},
{
"epoch": 1.0405115976587904,
"grad_norm": 1.1875,
"learning_rate": 2.3914782608695655e-06,
"loss": 0.2157,
"step": 15000
},
{
"epoch": 1.0412052893995232,
"grad_norm": 1.515625,
"learning_rate": 2.389739130434783e-06,
"loss": 0.2339,
"step": 15010
},
{
"epoch": 1.0418989811402557,
"grad_norm": 1.1484375,
"learning_rate": 2.3880000000000003e-06,
"loss": 0.2366,
"step": 15020
},
{
"epoch": 1.0425926728809884,
"grad_norm": 1.0859375,
"learning_rate": 2.3862608695652176e-06,
"loss": 0.227,
"step": 15030
},
{
"epoch": 1.0432863646217212,
"grad_norm": 1.1796875,
"learning_rate": 2.384521739130435e-06,
"loss": 0.2355,
"step": 15040
},
{
"epoch": 1.043980056362454,
"grad_norm": 1.25,
"learning_rate": 2.3827826086956524e-06,
"loss": 0.2874,
"step": 15050
},
{
"epoch": 1.0446737481031867,
"grad_norm": 1.0703125,
"learning_rate": 2.3810434782608697e-06,
"loss": 0.235,
"step": 15060
},
{
"epoch": 1.0453674398439194,
"grad_norm": 1.0546875,
"learning_rate": 2.379304347826087e-06,
"loss": 0.2298,
"step": 15070
},
{
"epoch": 1.046061131584652,
"grad_norm": 1.328125,
"learning_rate": 2.3775652173913045e-06,
"loss": 0.2549,
"step": 15080
},
{
"epoch": 1.0467548233253847,
"grad_norm": 1.0859375,
"learning_rate": 2.375826086956522e-06,
"loss": 0.1974,
"step": 15090
},
{
"epoch": 1.0474485150661175,
"grad_norm": 1.359375,
"learning_rate": 2.3740869565217392e-06,
"loss": 0.2338,
"step": 15100
},
{
"epoch": 1.0481422068068502,
"grad_norm": 1.640625,
"learning_rate": 2.3723478260869566e-06,
"loss": 0.2567,
"step": 15110
},
{
"epoch": 1.048835898547583,
"grad_norm": 1.453125,
"learning_rate": 2.370608695652174e-06,
"loss": 0.2979,
"step": 15120
},
{
"epoch": 1.0495295902883157,
"grad_norm": 1.328125,
"learning_rate": 2.3688695652173913e-06,
"loss": 0.2831,
"step": 15130
},
{
"epoch": 1.0502232820290482,
"grad_norm": 1.15625,
"learning_rate": 2.3671304347826087e-06,
"loss": 0.2648,
"step": 15140
},
{
"epoch": 1.050916973769781,
"grad_norm": 1.1015625,
"learning_rate": 2.3653913043478265e-06,
"loss": 0.2652,
"step": 15150
},
{
"epoch": 1.0516106655105137,
"grad_norm": 0.89453125,
"learning_rate": 2.3636521739130435e-06,
"loss": 0.1974,
"step": 15160
},
{
"epoch": 1.0523043572512465,
"grad_norm": 1.0,
"learning_rate": 2.3619130434782613e-06,
"loss": 0.2288,
"step": 15170
},
{
"epoch": 1.0529980489919792,
"grad_norm": 1.0546875,
"learning_rate": 2.360173913043478e-06,
"loss": 0.2257,
"step": 15180
},
{
"epoch": 1.053691740732712,
"grad_norm": 1.0078125,
"learning_rate": 2.358434782608696e-06,
"loss": 0.2343,
"step": 15190
},
{
"epoch": 1.0543854324734445,
"grad_norm": 1.78125,
"learning_rate": 2.3566956521739134e-06,
"loss": 0.2327,
"step": 15200
},
{
"epoch": 1.0550791242141773,
"grad_norm": 1.1875,
"learning_rate": 2.3549565217391307e-06,
"loss": 0.2529,
"step": 15210
},
{
"epoch": 1.05577281595491,
"grad_norm": 1.09375,
"learning_rate": 2.353217391304348e-06,
"loss": 0.2498,
"step": 15220
},
{
"epoch": 1.0564665076956428,
"grad_norm": 1.046875,
"learning_rate": 2.3514782608695655e-06,
"loss": 0.2349,
"step": 15230
},
{
"epoch": 1.0571601994363755,
"grad_norm": 0.9765625,
"learning_rate": 2.349739130434783e-06,
"loss": 0.2656,
"step": 15240
},
{
"epoch": 1.0578538911771083,
"grad_norm": 1.25,
"learning_rate": 2.3480000000000002e-06,
"loss": 0.2531,
"step": 15250
},
{
"epoch": 1.0585475829178408,
"grad_norm": 1.6171875,
"learning_rate": 2.3462608695652176e-06,
"loss": 0.2521,
"step": 15260
},
{
"epoch": 1.0592412746585735,
"grad_norm": 1.375,
"learning_rate": 2.344521739130435e-06,
"loss": 0.266,
"step": 15270
},
{
"epoch": 1.0599349663993063,
"grad_norm": 1.171875,
"learning_rate": 2.3427826086956523e-06,
"loss": 0.2564,
"step": 15280
},
{
"epoch": 1.060628658140039,
"grad_norm": 1.1171875,
"learning_rate": 2.3410434782608697e-06,
"loss": 0.2466,
"step": 15290
},
{
"epoch": 1.0613223498807718,
"grad_norm": 1.125,
"learning_rate": 2.339304347826087e-06,
"loss": 0.2282,
"step": 15300
},
{
"epoch": 1.0620160416215045,
"grad_norm": 1.1796875,
"learning_rate": 2.3375652173913044e-06,
"loss": 0.3059,
"step": 15310
},
{
"epoch": 1.062709733362237,
"grad_norm": 1.265625,
"learning_rate": 2.335826086956522e-06,
"loss": 0.3653,
"step": 15320
},
{
"epoch": 1.0634034251029698,
"grad_norm": 1.265625,
"learning_rate": 2.3340869565217396e-06,
"loss": 0.228,
"step": 15330
},
{
"epoch": 1.0640971168437026,
"grad_norm": 1.0390625,
"learning_rate": 2.3323478260869566e-06,
"loss": 0.2498,
"step": 15340
},
{
"epoch": 1.0647908085844353,
"grad_norm": 1.0078125,
"learning_rate": 2.3306086956521744e-06,
"loss": 0.2772,
"step": 15350
},
{
"epoch": 1.065484500325168,
"grad_norm": 1.1015625,
"learning_rate": 2.3288695652173913e-06,
"loss": 0.2754,
"step": 15360
},
{
"epoch": 1.0661781920659008,
"grad_norm": 1.140625,
"learning_rate": 2.3271304347826087e-06,
"loss": 0.2358,
"step": 15370
},
{
"epoch": 1.0668718838066333,
"grad_norm": 1.265625,
"learning_rate": 2.3253913043478265e-06,
"loss": 0.3299,
"step": 15380
},
{
"epoch": 1.067565575547366,
"grad_norm": 1.1171875,
"learning_rate": 2.3236521739130434e-06,
"loss": 0.2327,
"step": 15390
},
{
"epoch": 1.0682592672880988,
"grad_norm": 1.1328125,
"learning_rate": 2.321913043478261e-06,
"loss": 0.2344,
"step": 15400
},
{
"epoch": 1.0689529590288316,
"grad_norm": 1.3203125,
"learning_rate": 2.320173913043478e-06,
"loss": 0.2527,
"step": 15410
},
{
"epoch": 1.0696466507695643,
"grad_norm": 1.625,
"learning_rate": 2.318434782608696e-06,
"loss": 0.2592,
"step": 15420
},
{
"epoch": 1.070340342510297,
"grad_norm": 1.3203125,
"learning_rate": 2.3166956521739133e-06,
"loss": 0.271,
"step": 15430
},
{
"epoch": 1.0710340342510296,
"grad_norm": 1.2109375,
"learning_rate": 2.3149565217391307e-06,
"loss": 0.2486,
"step": 15440
},
{
"epoch": 1.0717277259917624,
"grad_norm": 1.09375,
"learning_rate": 2.313217391304348e-06,
"loss": 0.2046,
"step": 15450
},
{
"epoch": 1.072421417732495,
"grad_norm": 1.3984375,
"learning_rate": 2.3114782608695654e-06,
"loss": 0.2947,
"step": 15460
},
{
"epoch": 1.0731151094732279,
"grad_norm": 1.078125,
"learning_rate": 2.309739130434783e-06,
"loss": 0.2328,
"step": 15470
},
{
"epoch": 1.0738088012139606,
"grad_norm": 1.2734375,
"learning_rate": 2.308e-06,
"loss": 0.2694,
"step": 15480
},
{
"epoch": 1.0745024929546934,
"grad_norm": 1.171875,
"learning_rate": 2.3062608695652176e-06,
"loss": 0.2282,
"step": 15490
},
{
"epoch": 1.0751961846954259,
"grad_norm": 1.15625,
"learning_rate": 2.304521739130435e-06,
"loss": 0.2505,
"step": 15500
},
{
"epoch": 1.0758898764361586,
"grad_norm": 1.34375,
"learning_rate": 2.3027826086956523e-06,
"loss": 0.219,
"step": 15510
},
{
"epoch": 1.0765835681768914,
"grad_norm": 1.2578125,
"learning_rate": 2.3010434782608697e-06,
"loss": 0.2595,
"step": 15520
},
{
"epoch": 1.0772772599176241,
"grad_norm": 0.9609375,
"learning_rate": 2.299304347826087e-06,
"loss": 0.2773,
"step": 15530
},
{
"epoch": 1.0779709516583569,
"grad_norm": 1.0546875,
"learning_rate": 2.2975652173913044e-06,
"loss": 0.2845,
"step": 15540
},
{
"epoch": 1.0786646433990896,
"grad_norm": 1.3203125,
"learning_rate": 2.2958260869565218e-06,
"loss": 0.2097,
"step": 15550
},
{
"epoch": 1.0793583351398222,
"grad_norm": 1.390625,
"learning_rate": 2.2940869565217396e-06,
"loss": 0.2575,
"step": 15560
},
{
"epoch": 1.080052026880555,
"grad_norm": 1.421875,
"learning_rate": 2.2923478260869565e-06,
"loss": 0.2349,
"step": 15570
},
{
"epoch": 1.0807457186212877,
"grad_norm": 1.203125,
"learning_rate": 2.2906086956521743e-06,
"loss": 0.2144,
"step": 15580
},
{
"epoch": 1.0814394103620204,
"grad_norm": 1.5234375,
"learning_rate": 2.2888695652173913e-06,
"loss": 0.2695,
"step": 15590
},
{
"epoch": 1.0821331021027532,
"grad_norm": 1.171875,
"learning_rate": 2.287130434782609e-06,
"loss": 0.2336,
"step": 15600
},
{
"epoch": 1.082826793843486,
"grad_norm": 1.046875,
"learning_rate": 2.2853913043478264e-06,
"loss": 0.2254,
"step": 15610
},
{
"epoch": 1.0835204855842184,
"grad_norm": 1.0546875,
"learning_rate": 2.283652173913044e-06,
"loss": 0.1932,
"step": 15620
},
{
"epoch": 1.0842141773249512,
"grad_norm": 1.40625,
"learning_rate": 2.281913043478261e-06,
"loss": 0.2222,
"step": 15630
},
{
"epoch": 1.084907869065684,
"grad_norm": 1.1640625,
"learning_rate": 2.2801739130434785e-06,
"loss": 0.197,
"step": 15640
},
{
"epoch": 1.0856015608064167,
"grad_norm": 1.203125,
"learning_rate": 2.278434782608696e-06,
"loss": 0.2951,
"step": 15650
},
{
"epoch": 1.0862952525471494,
"grad_norm": 1.1953125,
"learning_rate": 2.2766956521739133e-06,
"loss": 0.2323,
"step": 15660
},
{
"epoch": 1.0869889442878822,
"grad_norm": 1.2109375,
"learning_rate": 2.2749565217391307e-06,
"loss": 0.2331,
"step": 15670
},
{
"epoch": 1.0876826360286147,
"grad_norm": 1.0234375,
"learning_rate": 2.273217391304348e-06,
"loss": 0.2251,
"step": 15680
},
{
"epoch": 1.0883763277693475,
"grad_norm": 1.2265625,
"learning_rate": 2.2714782608695654e-06,
"loss": 0.2431,
"step": 15690
},
{
"epoch": 1.0890700195100802,
"grad_norm": 1.2109375,
"learning_rate": 2.2697391304347828e-06,
"loss": 0.2459,
"step": 15700
},
{
"epoch": 1.089763711250813,
"grad_norm": 1.046875,
"learning_rate": 2.268e-06,
"loss": 0.1969,
"step": 15710
},
{
"epoch": 1.0904574029915457,
"grad_norm": 1.3125,
"learning_rate": 2.2662608695652175e-06,
"loss": 0.2647,
"step": 15720
},
{
"epoch": 1.0911510947322784,
"grad_norm": 1.0546875,
"learning_rate": 2.264521739130435e-06,
"loss": 0.2552,
"step": 15730
},
{
"epoch": 1.091844786473011,
"grad_norm": 1.0625,
"learning_rate": 2.2627826086956523e-06,
"loss": 0.2258,
"step": 15740
},
{
"epoch": 1.0925384782137437,
"grad_norm": 1.203125,
"learning_rate": 2.2610434782608696e-06,
"loss": 0.268,
"step": 15750
},
{
"epoch": 1.0932321699544765,
"grad_norm": 1.09375,
"learning_rate": 2.259304347826087e-06,
"loss": 0.2232,
"step": 15760
},
{
"epoch": 1.0939258616952092,
"grad_norm": 1.625,
"learning_rate": 2.2575652173913044e-06,
"loss": 0.2888,
"step": 15770
},
{
"epoch": 1.094619553435942,
"grad_norm": 1.03125,
"learning_rate": 2.2558260869565217e-06,
"loss": 0.2236,
"step": 15780
},
{
"epoch": 1.0953132451766745,
"grad_norm": 1.3515625,
"learning_rate": 2.2540869565217395e-06,
"loss": 0.2405,
"step": 15790
},
{
"epoch": 1.0960069369174072,
"grad_norm": 1.484375,
"learning_rate": 2.2523478260869565e-06,
"loss": 0.2135,
"step": 15800
},
{
"epoch": 1.09670062865814,
"grad_norm": 1.171875,
"learning_rate": 2.2506086956521743e-06,
"loss": 0.2673,
"step": 15810
},
{
"epoch": 1.0973943203988727,
"grad_norm": 1.0,
"learning_rate": 2.2488695652173912e-06,
"loss": 0.2108,
"step": 15820
},
{
"epoch": 1.0980880121396055,
"grad_norm": 1.3046875,
"learning_rate": 2.247130434782609e-06,
"loss": 0.2335,
"step": 15830
},
{
"epoch": 1.0987817038803382,
"grad_norm": 1.3828125,
"learning_rate": 2.2453913043478264e-06,
"loss": 0.2494,
"step": 15840
},
{
"epoch": 1.099475395621071,
"grad_norm": 1.8125,
"learning_rate": 2.2436521739130438e-06,
"loss": 0.371,
"step": 15850
},
{
"epoch": 1.1001690873618035,
"grad_norm": 1.1171875,
"learning_rate": 2.241913043478261e-06,
"loss": 0.3074,
"step": 15860
},
{
"epoch": 1.1008627791025363,
"grad_norm": 1.15625,
"learning_rate": 2.2401739130434785e-06,
"loss": 0.2505,
"step": 15870
},
{
"epoch": 1.101556470843269,
"grad_norm": 1.0859375,
"learning_rate": 2.238434782608696e-06,
"loss": 0.2206,
"step": 15880
},
{
"epoch": 1.1022501625840018,
"grad_norm": 1.1015625,
"learning_rate": 2.2366956521739132e-06,
"loss": 0.28,
"step": 15890
},
{
"epoch": 1.1029438543247345,
"grad_norm": 1.1484375,
"learning_rate": 2.2349565217391306e-06,
"loss": 0.2172,
"step": 15900
},
{
"epoch": 1.103637546065467,
"grad_norm": 1.140625,
"learning_rate": 2.233217391304348e-06,
"loss": 0.2868,
"step": 15910
},
{
"epoch": 1.1043312378061998,
"grad_norm": 0.91796875,
"learning_rate": 2.2314782608695654e-06,
"loss": 0.2454,
"step": 15920
},
{
"epoch": 1.1050249295469325,
"grad_norm": 1.4609375,
"learning_rate": 2.2297391304347827e-06,
"loss": 0.2951,
"step": 15930
},
{
"epoch": 1.1057186212876653,
"grad_norm": 1.1640625,
"learning_rate": 2.228e-06,
"loss": 0.2183,
"step": 15940
},
{
"epoch": 1.106412313028398,
"grad_norm": 1.0546875,
"learning_rate": 2.226260869565218e-06,
"loss": 0.2182,
"step": 15950
},
{
"epoch": 1.1071060047691308,
"grad_norm": 1.1875,
"learning_rate": 2.224521739130435e-06,
"loss": 0.1977,
"step": 15960
},
{
"epoch": 1.1077996965098635,
"grad_norm": 1.4140625,
"learning_rate": 2.2227826086956526e-06,
"loss": 0.2098,
"step": 15970
},
{
"epoch": 1.108493388250596,
"grad_norm": 0.9375,
"learning_rate": 2.2210434782608696e-06,
"loss": 0.2466,
"step": 15980
},
{
"epoch": 1.1091870799913288,
"grad_norm": 1.0390625,
"learning_rate": 2.219304347826087e-06,
"loss": 0.2097,
"step": 15990
},
{
"epoch": 1.1098807717320616,
"grad_norm": 1.125,
"learning_rate": 2.2175652173913043e-06,
"loss": 0.2058,
"step": 16000
},
{
"epoch": 1.1105744634727943,
"grad_norm": 1.703125,
"learning_rate": 2.2158260869565217e-06,
"loss": 0.2597,
"step": 16010
},
{
"epoch": 1.111268155213527,
"grad_norm": 1.28125,
"learning_rate": 2.2140869565217395e-06,
"loss": 0.2305,
"step": 16020
},
{
"epoch": 1.1119618469542596,
"grad_norm": 1.2265625,
"learning_rate": 2.2123478260869564e-06,
"loss": 0.2129,
"step": 16030
},
{
"epoch": 1.1126555386949923,
"grad_norm": 0.984375,
"learning_rate": 2.2106086956521742e-06,
"loss": 0.2537,
"step": 16040
},
{
"epoch": 1.113349230435725,
"grad_norm": 1.0703125,
"learning_rate": 2.208869565217391e-06,
"loss": 0.2468,
"step": 16050
},
{
"epoch": 1.1140429221764578,
"grad_norm": 1.078125,
"learning_rate": 2.207130434782609e-06,
"loss": 0.2024,
"step": 16060
},
{
"epoch": 1.1147366139171906,
"grad_norm": 1.4375,
"learning_rate": 2.2053913043478263e-06,
"loss": 0.2803,
"step": 16070
},
{
"epoch": 1.1154303056579233,
"grad_norm": 1.2421875,
"learning_rate": 2.2036521739130437e-06,
"loss": 0.2135,
"step": 16080
},
{
"epoch": 1.116123997398656,
"grad_norm": 1.0078125,
"learning_rate": 2.201913043478261e-06,
"loss": 0.2603,
"step": 16090
},
{
"epoch": 1.1168176891393886,
"grad_norm": 1.0703125,
"learning_rate": 2.2001739130434785e-06,
"loss": 0.2369,
"step": 16100
},
{
"epoch": 1.1175113808801214,
"grad_norm": 1.1953125,
"learning_rate": 2.198434782608696e-06,
"loss": 0.2354,
"step": 16110
},
{
"epoch": 1.1182050726208541,
"grad_norm": 1.109375,
"learning_rate": 2.196695652173913e-06,
"loss": 0.1965,
"step": 16120
},
{
"epoch": 1.1188987643615869,
"grad_norm": 1.0546875,
"learning_rate": 2.1949565217391306e-06,
"loss": 0.2309,
"step": 16130
},
{
"epoch": 1.1195924561023196,
"grad_norm": 1.140625,
"learning_rate": 2.193217391304348e-06,
"loss": 0.244,
"step": 16140
},
{
"epoch": 1.1202861478430521,
"grad_norm": 1.3984375,
"learning_rate": 2.1914782608695653e-06,
"loss": 0.2881,
"step": 16150
},
{
"epoch": 1.1209798395837849,
"grad_norm": 1.109375,
"learning_rate": 2.1897391304347827e-06,
"loss": 0.2484,
"step": 16160
},
{
"epoch": 1.1216735313245176,
"grad_norm": 1.0703125,
"learning_rate": 2.188e-06,
"loss": 0.2577,
"step": 16170
},
{
"epoch": 1.1223672230652504,
"grad_norm": 1.25,
"learning_rate": 2.186260869565218e-06,
"loss": 0.2607,
"step": 16180
},
{
"epoch": 1.1230609148059831,
"grad_norm": 1.7109375,
"learning_rate": 2.184521739130435e-06,
"loss": 0.2481,
"step": 16190
},
{
"epoch": 1.1237546065467159,
"grad_norm": 1.0234375,
"learning_rate": 2.1827826086956526e-06,
"loss": 0.2204,
"step": 16200
},
{
"epoch": 1.1244482982874484,
"grad_norm": 1.328125,
"learning_rate": 2.1810434782608695e-06,
"loss": 0.2596,
"step": 16210
},
{
"epoch": 1.1251419900281812,
"grad_norm": 1.109375,
"learning_rate": 2.1793043478260873e-06,
"loss": 0.2827,
"step": 16220
},
{
"epoch": 1.125835681768914,
"grad_norm": 1.109375,
"learning_rate": 2.1775652173913047e-06,
"loss": 0.2533,
"step": 16230
},
{
"epoch": 1.1265293735096467,
"grad_norm": 0.953125,
"learning_rate": 2.175826086956522e-06,
"loss": 0.2578,
"step": 16240
},
{
"epoch": 1.1272230652503794,
"grad_norm": 1.125,
"learning_rate": 2.1740869565217395e-06,
"loss": 0.2142,
"step": 16250
},
{
"epoch": 1.1279167569911122,
"grad_norm": 1.1953125,
"learning_rate": 2.172347826086957e-06,
"loss": 0.2088,
"step": 16260
},
{
"epoch": 1.1286104487318447,
"grad_norm": 1.109375,
"learning_rate": 2.170608695652174e-06,
"loss": 0.2214,
"step": 16270
},
{
"epoch": 1.1293041404725774,
"grad_norm": 1.0703125,
"learning_rate": 2.1688695652173916e-06,
"loss": 0.2313,
"step": 16280
},
{
"epoch": 1.1299978322133102,
"grad_norm": 1.0859375,
"learning_rate": 2.167130434782609e-06,
"loss": 0.2488,
"step": 16290
},
{
"epoch": 1.130691523954043,
"grad_norm": 1.1171875,
"learning_rate": 2.1653913043478263e-06,
"loss": 0.2474,
"step": 16300
},
{
"epoch": 1.1313852156947757,
"grad_norm": 1.2421875,
"learning_rate": 2.1636521739130437e-06,
"loss": 0.2476,
"step": 16310
},
{
"epoch": 1.1320789074355084,
"grad_norm": 1.03125,
"learning_rate": 2.161913043478261e-06,
"loss": 0.2353,
"step": 16320
},
{
"epoch": 1.1327725991762412,
"grad_norm": 0.796875,
"learning_rate": 2.1601739130434784e-06,
"loss": 0.2156,
"step": 16330
},
{
"epoch": 1.1334662909169737,
"grad_norm": 1.1640625,
"learning_rate": 2.158434782608696e-06,
"loss": 0.2233,
"step": 16340
},
{
"epoch": 1.1341599826577065,
"grad_norm": 1.375,
"learning_rate": 2.156695652173913e-06,
"loss": 0.2461,
"step": 16350
},
{
"epoch": 1.1348536743984392,
"grad_norm": 0.96875,
"learning_rate": 2.1549565217391305e-06,
"loss": 0.2388,
"step": 16360
},
{
"epoch": 1.135547366139172,
"grad_norm": 1.140625,
"learning_rate": 2.153217391304348e-06,
"loss": 0.1843,
"step": 16370
},
{
"epoch": 1.1362410578799047,
"grad_norm": 1.328125,
"learning_rate": 2.1514782608695653e-06,
"loss": 0.2327,
"step": 16380
},
{
"epoch": 1.1369347496206372,
"grad_norm": 1.2421875,
"learning_rate": 2.1497391304347826e-06,
"loss": 0.364,
"step": 16390
},
{
"epoch": 1.13762844136137,
"grad_norm": 1.078125,
"learning_rate": 2.148e-06,
"loss": 0.2377,
"step": 16400
},
{
"epoch": 1.1383221331021027,
"grad_norm": 1.2734375,
"learning_rate": 2.146260869565218e-06,
"loss": 0.2195,
"step": 16410
},
{
"epoch": 1.1390158248428355,
"grad_norm": 1.2265625,
"learning_rate": 2.1445217391304348e-06,
"loss": 0.2238,
"step": 16420
},
{
"epoch": 1.1397095165835682,
"grad_norm": 1.1875,
"learning_rate": 2.1427826086956526e-06,
"loss": 0.2505,
"step": 16430
},
{
"epoch": 1.140403208324301,
"grad_norm": 1.0625,
"learning_rate": 2.1410434782608695e-06,
"loss": 0.2871,
"step": 16440
},
{
"epoch": 1.1410969000650335,
"grad_norm": 0.98828125,
"learning_rate": 2.1393043478260873e-06,
"loss": 0.248,
"step": 16450
},
{
"epoch": 1.1417905918057663,
"grad_norm": 0.98828125,
"learning_rate": 2.1375652173913047e-06,
"loss": 0.2396,
"step": 16460
},
{
"epoch": 1.142484283546499,
"grad_norm": 1.171875,
"learning_rate": 2.135826086956522e-06,
"loss": 0.225,
"step": 16470
},
{
"epoch": 1.1431779752872318,
"grad_norm": 1.0390625,
"learning_rate": 2.1340869565217394e-06,
"loss": 0.2223,
"step": 16480
},
{
"epoch": 1.1438716670279645,
"grad_norm": 1.2109375,
"learning_rate": 2.1323478260869568e-06,
"loss": 0.2289,
"step": 16490
},
{
"epoch": 1.1445653587686972,
"grad_norm": 1.2734375,
"learning_rate": 2.130608695652174e-06,
"loss": 0.2196,
"step": 16500
},
{
"epoch": 1.1452590505094298,
"grad_norm": 1.375,
"learning_rate": 2.1288695652173915e-06,
"loss": 0.3,
"step": 16510
},
{
"epoch": 1.1459527422501625,
"grad_norm": 1.078125,
"learning_rate": 2.127130434782609e-06,
"loss": 0.2349,
"step": 16520
},
{
"epoch": 1.1466464339908953,
"grad_norm": 1.2578125,
"learning_rate": 2.1253913043478263e-06,
"loss": 0.2661,
"step": 16530
},
{
"epoch": 1.147340125731628,
"grad_norm": 1.0546875,
"learning_rate": 2.1236521739130436e-06,
"loss": 0.2279,
"step": 16540
},
{
"epoch": 1.1480338174723608,
"grad_norm": 0.9375,
"learning_rate": 2.121913043478261e-06,
"loss": 0.1994,
"step": 16550
},
{
"epoch": 1.1487275092130935,
"grad_norm": 1.1328125,
"learning_rate": 2.1201739130434784e-06,
"loss": 0.2183,
"step": 16560
},
{
"epoch": 1.149421200953826,
"grad_norm": 1.2109375,
"learning_rate": 2.1184347826086957e-06,
"loss": 0.213,
"step": 16570
},
{
"epoch": 1.1501148926945588,
"grad_norm": 1.140625,
"learning_rate": 2.116695652173913e-06,
"loss": 0.2149,
"step": 16580
},
{
"epoch": 1.1508085844352915,
"grad_norm": 1.171875,
"learning_rate": 2.114956521739131e-06,
"loss": 0.2344,
"step": 16590
},
{
"epoch": 1.1515022761760243,
"grad_norm": 1.1640625,
"learning_rate": 2.113217391304348e-06,
"loss": 0.307,
"step": 16600
},
{
"epoch": 1.152195967916757,
"grad_norm": 1.125,
"learning_rate": 2.1114782608695652e-06,
"loss": 0.2201,
"step": 16610
},
{
"epoch": 1.1528896596574898,
"grad_norm": 1.234375,
"learning_rate": 2.1097391304347826e-06,
"loss": 0.2571,
"step": 16620
},
{
"epoch": 1.1535833513982223,
"grad_norm": 1.4140625,
"learning_rate": 2.108e-06,
"loss": 0.2089,
"step": 16630
},
{
"epoch": 1.154277043138955,
"grad_norm": 1.25,
"learning_rate": 2.1062608695652178e-06,
"loss": 0.3009,
"step": 16640
},
{
"epoch": 1.1549707348796878,
"grad_norm": 1.15625,
"learning_rate": 2.1045217391304347e-06,
"loss": 0.2993,
"step": 16650
},
{
"epoch": 1.1556644266204206,
"grad_norm": 1.03125,
"learning_rate": 2.1027826086956525e-06,
"loss": 0.2074,
"step": 16660
},
{
"epoch": 1.1563581183611533,
"grad_norm": 1.2578125,
"learning_rate": 2.1010434782608695e-06,
"loss": 0.2703,
"step": 16670
},
{
"epoch": 1.157051810101886,
"grad_norm": 0.91796875,
"learning_rate": 2.0993043478260873e-06,
"loss": 0.2232,
"step": 16680
},
{
"epoch": 1.1577455018426186,
"grad_norm": 1.1484375,
"learning_rate": 2.0975652173913046e-06,
"loss": 0.2249,
"step": 16690
},
{
"epoch": 1.1584391935833513,
"grad_norm": 1.3984375,
"learning_rate": 2.095826086956522e-06,
"loss": 0.2514,
"step": 16700
},
{
"epoch": 1.159132885324084,
"grad_norm": 1.6171875,
"learning_rate": 2.0940869565217394e-06,
"loss": 0.2142,
"step": 16710
},
{
"epoch": 1.1598265770648168,
"grad_norm": 1.1640625,
"learning_rate": 2.0923478260869567e-06,
"loss": 0.2381,
"step": 16720
},
{
"epoch": 1.1605202688055496,
"grad_norm": 1.359375,
"learning_rate": 2.090608695652174e-06,
"loss": 0.2305,
"step": 16730
},
{
"epoch": 1.1612139605462823,
"grad_norm": 1.203125,
"learning_rate": 2.0888695652173915e-06,
"loss": 0.2416,
"step": 16740
},
{
"epoch": 1.1619076522870149,
"grad_norm": 1.5625,
"learning_rate": 2.087130434782609e-06,
"loss": 0.3016,
"step": 16750
},
{
"epoch": 1.1626013440277476,
"grad_norm": 1.9765625,
"learning_rate": 2.0853913043478262e-06,
"loss": 0.3109,
"step": 16760
},
{
"epoch": 1.1632950357684804,
"grad_norm": 1.2734375,
"learning_rate": 2.0836521739130436e-06,
"loss": 0.2107,
"step": 16770
},
{
"epoch": 1.1639887275092131,
"grad_norm": 1.2421875,
"learning_rate": 2.081913043478261e-06,
"loss": 0.2386,
"step": 16780
},
{
"epoch": 1.1646824192499459,
"grad_norm": 1.1640625,
"learning_rate": 2.0801739130434783e-06,
"loss": 0.2904,
"step": 16790
},
{
"epoch": 1.1653761109906786,
"grad_norm": 1.078125,
"learning_rate": 2.0784347826086957e-06,
"loss": 0.2027,
"step": 16800
},
{
"epoch": 1.1660698027314111,
"grad_norm": 1.3046875,
"learning_rate": 2.076695652173913e-06,
"loss": 0.2178,
"step": 16810
},
{
"epoch": 1.166763494472144,
"grad_norm": 1.3828125,
"learning_rate": 2.074956521739131e-06,
"loss": 0.2418,
"step": 16820
},
{
"epoch": 1.1674571862128766,
"grad_norm": 1.4453125,
"learning_rate": 2.073217391304348e-06,
"loss": 0.2702,
"step": 16830
},
{
"epoch": 1.1681508779536094,
"grad_norm": 1.484375,
"learning_rate": 2.0714782608695656e-06,
"loss": 0.2903,
"step": 16840
},
{
"epoch": 1.1688445696943421,
"grad_norm": 1.7109375,
"learning_rate": 2.0697391304347826e-06,
"loss": 0.2985,
"step": 16850
},
{
"epoch": 1.1695382614350747,
"grad_norm": 1.0625,
"learning_rate": 2.0680000000000004e-06,
"loss": 0.2587,
"step": 16860
},
{
"epoch": 1.1702319531758074,
"grad_norm": 1.1171875,
"learning_rate": 2.0662608695652177e-06,
"loss": 0.2319,
"step": 16870
},
{
"epoch": 1.1709256449165402,
"grad_norm": 1.1796875,
"learning_rate": 2.064521739130435e-06,
"loss": 0.2573,
"step": 16880
},
{
"epoch": 1.171619336657273,
"grad_norm": 1.3515625,
"learning_rate": 2.0627826086956525e-06,
"loss": 0.2489,
"step": 16890
},
{
"epoch": 1.1723130283980057,
"grad_norm": 1.3828125,
"learning_rate": 2.06104347826087e-06,
"loss": 0.2214,
"step": 16900
},
{
"epoch": 1.1730067201387384,
"grad_norm": 1.15625,
"learning_rate": 2.0593043478260872e-06,
"loss": 0.2204,
"step": 16910
},
{
"epoch": 1.1737004118794712,
"grad_norm": 1.234375,
"learning_rate": 2.0575652173913046e-06,
"loss": 0.2512,
"step": 16920
},
{
"epoch": 1.1743941036202037,
"grad_norm": 1.3359375,
"learning_rate": 2.055826086956522e-06,
"loss": 0.2447,
"step": 16930
},
{
"epoch": 1.1750877953609364,
"grad_norm": 1.296875,
"learning_rate": 2.0540869565217393e-06,
"loss": 0.2006,
"step": 16940
},
{
"epoch": 1.1757814871016692,
"grad_norm": 1.09375,
"learning_rate": 2.0523478260869567e-06,
"loss": 0.2405,
"step": 16950
},
{
"epoch": 1.176475178842402,
"grad_norm": 1.5703125,
"learning_rate": 2.050608695652174e-06,
"loss": 0.2739,
"step": 16960
},
{
"epoch": 1.1771688705831347,
"grad_norm": 1.1171875,
"learning_rate": 2.0488695652173914e-06,
"loss": 0.2256,
"step": 16970
},
{
"epoch": 1.1778625623238672,
"grad_norm": 1.1640625,
"learning_rate": 2.047130434782609e-06,
"loss": 0.2264,
"step": 16980
},
{
"epoch": 1.1785562540646,
"grad_norm": 1.59375,
"learning_rate": 2.045391304347826e-06,
"loss": 0.2409,
"step": 16990
},
{
"epoch": 1.1792499458053327,
"grad_norm": 1.265625,
"learning_rate": 2.0436521739130436e-06,
"loss": 0.2711,
"step": 17000
},
{
"epoch": 1.1799436375460655,
"grad_norm": 1.1328125,
"learning_rate": 2.041913043478261e-06,
"loss": 0.2456,
"step": 17010
},
{
"epoch": 1.1806373292867982,
"grad_norm": 1.171875,
"learning_rate": 2.0401739130434783e-06,
"loss": 0.244,
"step": 17020
},
{
"epoch": 1.181331021027531,
"grad_norm": 1.5390625,
"learning_rate": 2.0384347826086957e-06,
"loss": 0.2305,
"step": 17030
},
{
"epoch": 1.1820247127682637,
"grad_norm": 0.91796875,
"learning_rate": 2.036695652173913e-06,
"loss": 0.2297,
"step": 17040
},
{
"epoch": 1.1827184045089962,
"grad_norm": 1.09375,
"learning_rate": 2.034956521739131e-06,
"loss": 0.2263,
"step": 17050
},
{
"epoch": 1.183412096249729,
"grad_norm": 1.6328125,
"learning_rate": 2.0332173913043478e-06,
"loss": 0.2718,
"step": 17060
},
{
"epoch": 1.1841057879904617,
"grad_norm": 1.3359375,
"learning_rate": 2.0314782608695656e-06,
"loss": 0.2617,
"step": 17070
},
{
"epoch": 1.1847994797311945,
"grad_norm": 1.3515625,
"learning_rate": 2.0297391304347825e-06,
"loss": 0.2891,
"step": 17080
},
{
"epoch": 1.1854931714719272,
"grad_norm": 1.25,
"learning_rate": 2.0280000000000003e-06,
"loss": 0.2944,
"step": 17090
},
{
"epoch": 1.1861868632126598,
"grad_norm": 1.140625,
"learning_rate": 2.0262608695652177e-06,
"loss": 0.2248,
"step": 17100
},
{
"epoch": 1.1868805549533925,
"grad_norm": 1.078125,
"learning_rate": 2.024521739130435e-06,
"loss": 0.2692,
"step": 17110
},
{
"epoch": 1.1875742466941253,
"grad_norm": 1.328125,
"learning_rate": 2.0227826086956524e-06,
"loss": 0.2349,
"step": 17120
},
{
"epoch": 1.188267938434858,
"grad_norm": 1.234375,
"learning_rate": 2.02104347826087e-06,
"loss": 0.2191,
"step": 17130
},
{
"epoch": 1.1889616301755908,
"grad_norm": 1.0078125,
"learning_rate": 2.019304347826087e-06,
"loss": 0.2398,
"step": 17140
},
{
"epoch": 1.1896553219163235,
"grad_norm": 1.2578125,
"learning_rate": 2.0175652173913045e-06,
"loss": 0.2761,
"step": 17150
},
{
"epoch": 1.1903490136570563,
"grad_norm": 1.171875,
"learning_rate": 2.015826086956522e-06,
"loss": 0.2036,
"step": 17160
},
{
"epoch": 1.1910427053977888,
"grad_norm": 1.609375,
"learning_rate": 2.0140869565217393e-06,
"loss": 0.2639,
"step": 17170
},
{
"epoch": 1.1917363971385215,
"grad_norm": 1.25,
"learning_rate": 2.0123478260869567e-06,
"loss": 0.195,
"step": 17180
},
{
"epoch": 1.1924300888792543,
"grad_norm": 1.3125,
"learning_rate": 2.010608695652174e-06,
"loss": 0.2145,
"step": 17190
},
{
"epoch": 1.193123780619987,
"grad_norm": 1.84375,
"learning_rate": 2.0088695652173914e-06,
"loss": 0.3457,
"step": 17200
},
{
"epoch": 1.1938174723607198,
"grad_norm": 1.28125,
"learning_rate": 2.007130434782609e-06,
"loss": 0.2548,
"step": 17210
},
{
"epoch": 1.1945111641014523,
"grad_norm": 1.0234375,
"learning_rate": 2.005391304347826e-06,
"loss": 0.24,
"step": 17220
},
{
"epoch": 1.195204855842185,
"grad_norm": 1.1640625,
"learning_rate": 2.0036521739130435e-06,
"loss": 0.2872,
"step": 17230
},
{
"epoch": 1.1958985475829178,
"grad_norm": 1.296875,
"learning_rate": 2.001913043478261e-06,
"loss": 0.231,
"step": 17240
},
{
"epoch": 1.1965922393236506,
"grad_norm": 1.3515625,
"learning_rate": 2.0001739130434783e-06,
"loss": 0.2923,
"step": 17250
},
{
"epoch": 1.1972859310643833,
"grad_norm": 1.2109375,
"learning_rate": 1.9984347826086956e-06,
"loss": 0.2832,
"step": 17260
},
{
"epoch": 1.197979622805116,
"grad_norm": 1.4921875,
"learning_rate": 1.996695652173913e-06,
"loss": 0.2875,
"step": 17270
},
{
"epoch": 1.1986733145458488,
"grad_norm": 1.0390625,
"learning_rate": 1.994956521739131e-06,
"loss": 0.2527,
"step": 17280
},
{
"epoch": 1.1993670062865813,
"grad_norm": 1.265625,
"learning_rate": 1.9932173913043477e-06,
"loss": 0.2425,
"step": 17290
},
{
"epoch": 1.200060698027314,
"grad_norm": 1.265625,
"learning_rate": 1.9914782608695655e-06,
"loss": 0.2546,
"step": 17300
},
{
"epoch": 1.2007543897680468,
"grad_norm": 1.4140625,
"learning_rate": 1.9897391304347825e-06,
"loss": 0.255,
"step": 17310
},
{
"epoch": 1.2014480815087796,
"grad_norm": 0.9609375,
"learning_rate": 1.9880000000000003e-06,
"loss": 0.2478,
"step": 17320
},
{
"epoch": 1.2021417732495123,
"grad_norm": 1.0390625,
"learning_rate": 1.9862608695652176e-06,
"loss": 0.2083,
"step": 17330
},
{
"epoch": 1.2028354649902449,
"grad_norm": 1.5078125,
"learning_rate": 1.984521739130435e-06,
"loss": 0.2753,
"step": 17340
},
{
"epoch": 1.2035291567309776,
"grad_norm": 1.2265625,
"learning_rate": 1.9827826086956524e-06,
"loss": 0.3128,
"step": 17350
},
{
"epoch": 1.2042228484717103,
"grad_norm": 1.0,
"learning_rate": 1.9810434782608698e-06,
"loss": 0.2478,
"step": 17360
},
{
"epoch": 1.204916540212443,
"grad_norm": 1.15625,
"learning_rate": 1.979304347826087e-06,
"loss": 0.328,
"step": 17370
},
{
"epoch": 1.2056102319531758,
"grad_norm": 1.1875,
"learning_rate": 1.9775652173913045e-06,
"loss": 0.2781,
"step": 17380
},
{
"epoch": 1.2063039236939086,
"grad_norm": 1.203125,
"learning_rate": 1.975826086956522e-06,
"loss": 0.3133,
"step": 17390
},
{
"epoch": 1.2069976154346413,
"grad_norm": 1.140625,
"learning_rate": 1.9740869565217392e-06,
"loss": 0.212,
"step": 17400
},
{
"epoch": 1.2076913071753739,
"grad_norm": 1.3515625,
"learning_rate": 1.9723478260869566e-06,
"loss": 0.2839,
"step": 17410
},
{
"epoch": 1.2083849989161066,
"grad_norm": 1.34375,
"learning_rate": 1.970608695652174e-06,
"loss": 0.2442,
"step": 17420
},
{
"epoch": 1.2090786906568394,
"grad_norm": 1.28125,
"learning_rate": 1.9688695652173914e-06,
"loss": 0.2751,
"step": 17430
},
{
"epoch": 1.2097723823975721,
"grad_norm": 1.265625,
"learning_rate": 1.967130434782609e-06,
"loss": 0.2529,
"step": 17440
},
{
"epoch": 1.2104660741383049,
"grad_norm": 1.3125,
"learning_rate": 1.965391304347826e-06,
"loss": 0.2791,
"step": 17450
},
{
"epoch": 1.2111597658790374,
"grad_norm": 1.2109375,
"learning_rate": 1.963652173913044e-06,
"loss": 0.2276,
"step": 17460
},
{
"epoch": 1.2118534576197701,
"grad_norm": 1.7890625,
"learning_rate": 1.961913043478261e-06,
"loss": 0.2515,
"step": 17470
},
{
"epoch": 1.212547149360503,
"grad_norm": 1.140625,
"learning_rate": 1.9601739130434786e-06,
"loss": 0.2579,
"step": 17480
},
{
"epoch": 1.2132408411012356,
"grad_norm": 0.98046875,
"learning_rate": 1.9584347826086956e-06,
"loss": 0.2222,
"step": 17490
},
{
"epoch": 1.2139345328419684,
"grad_norm": 0.98046875,
"learning_rate": 1.9566956521739134e-06,
"loss": 0.2072,
"step": 17500
},
{
"epoch": 1.2146282245827011,
"grad_norm": 1.203125,
"learning_rate": 1.9549565217391308e-06,
"loss": 0.2278,
"step": 17510
},
{
"epoch": 1.215321916323434,
"grad_norm": 1.0390625,
"learning_rate": 1.953217391304348e-06,
"loss": 0.2338,
"step": 17520
},
{
"epoch": 1.2160156080641664,
"grad_norm": 1.109375,
"learning_rate": 1.9514782608695655e-06,
"loss": 0.235,
"step": 17530
},
{
"epoch": 1.2167092998048992,
"grad_norm": 1.40625,
"learning_rate": 1.9497391304347824e-06,
"loss": 0.2458,
"step": 17540
},
{
"epoch": 1.217402991545632,
"grad_norm": 1.21875,
"learning_rate": 1.9480000000000002e-06,
"loss": 0.2322,
"step": 17550
},
{
"epoch": 1.2180966832863647,
"grad_norm": 0.8671875,
"learning_rate": 1.9462608695652176e-06,
"loss": 0.2301,
"step": 17560
},
{
"epoch": 1.2187903750270974,
"grad_norm": 1.015625,
"learning_rate": 1.944521739130435e-06,
"loss": 0.2707,
"step": 17570
},
{
"epoch": 1.21948406676783,
"grad_norm": 1.3046875,
"learning_rate": 1.9427826086956524e-06,
"loss": 0.2158,
"step": 17580
},
{
"epoch": 1.2201777585085627,
"grad_norm": 1.28125,
"learning_rate": 1.9410434782608697e-06,
"loss": 0.2404,
"step": 17590
},
{
"epoch": 1.2208714502492954,
"grad_norm": 1.1484375,
"learning_rate": 1.939304347826087e-06,
"loss": 0.2562,
"step": 17600
},
{
"epoch": 1.2215651419900282,
"grad_norm": 1.2109375,
"learning_rate": 1.9375652173913045e-06,
"loss": 0.2154,
"step": 17610
},
{
"epoch": 1.222258833730761,
"grad_norm": 1.2890625,
"learning_rate": 1.935826086956522e-06,
"loss": 0.2309,
"step": 17620
},
{
"epoch": 1.2229525254714937,
"grad_norm": 1.296875,
"learning_rate": 1.934086956521739e-06,
"loss": 0.229,
"step": 17630
},
{
"epoch": 1.2236462172122264,
"grad_norm": 1.453125,
"learning_rate": 1.9323478260869566e-06,
"loss": 0.2428,
"step": 17640
},
{
"epoch": 1.224339908952959,
"grad_norm": 1.1171875,
"learning_rate": 1.930608695652174e-06,
"loss": 0.2227,
"step": 17650
},
{
"epoch": 1.2250336006936917,
"grad_norm": 1.59375,
"learning_rate": 1.9288695652173913e-06,
"loss": 0.3012,
"step": 17660
},
{
"epoch": 1.2257272924344245,
"grad_norm": 0.9765625,
"learning_rate": 1.927130434782609e-06,
"loss": 0.2796,
"step": 17670
},
{
"epoch": 1.2264209841751572,
"grad_norm": 0.953125,
"learning_rate": 1.925391304347826e-06,
"loss": 0.2393,
"step": 17680
},
{
"epoch": 1.22711467591589,
"grad_norm": 1.3984375,
"learning_rate": 1.923652173913044e-06,
"loss": 0.2685,
"step": 17690
},
{
"epoch": 1.2278083676566225,
"grad_norm": 1.15625,
"learning_rate": 1.921913043478261e-06,
"loss": 0.2598,
"step": 17700
},
{
"epoch": 1.2285020593973552,
"grad_norm": 1.0625,
"learning_rate": 1.9201739130434786e-06,
"loss": 0.237,
"step": 17710
},
{
"epoch": 1.229195751138088,
"grad_norm": 0.9765625,
"learning_rate": 1.9184347826086955e-06,
"loss": 0.2421,
"step": 17720
},
{
"epoch": 1.2298894428788207,
"grad_norm": 1.1484375,
"learning_rate": 1.9166956521739133e-06,
"loss": 0.2501,
"step": 17730
},
{
"epoch": 1.2305831346195535,
"grad_norm": 1.1640625,
"learning_rate": 1.9149565217391307e-06,
"loss": 0.21,
"step": 17740
},
{
"epoch": 1.2312768263602862,
"grad_norm": 1.1328125,
"learning_rate": 1.913217391304348e-06,
"loss": 0.2558,
"step": 17750
},
{
"epoch": 1.2319705181010188,
"grad_norm": 1.203125,
"learning_rate": 1.9114782608695655e-06,
"loss": 0.2314,
"step": 17760
},
{
"epoch": 1.2326642098417515,
"grad_norm": 1.6171875,
"learning_rate": 1.909739130434783e-06,
"loss": 0.2358,
"step": 17770
},
{
"epoch": 1.2333579015824843,
"grad_norm": 1.0234375,
"learning_rate": 1.908e-06,
"loss": 0.2434,
"step": 17780
},
{
"epoch": 1.234051593323217,
"grad_norm": 1.3515625,
"learning_rate": 1.9062608695652176e-06,
"loss": 0.2677,
"step": 17790
},
{
"epoch": 1.2347452850639498,
"grad_norm": 1.21875,
"learning_rate": 1.904521739130435e-06,
"loss": 0.2215,
"step": 17800
},
{
"epoch": 1.2354389768046825,
"grad_norm": 1.0,
"learning_rate": 1.9027826086956525e-06,
"loss": 0.2329,
"step": 17810
},
{
"epoch": 1.236132668545415,
"grad_norm": 1.203125,
"learning_rate": 1.9010434782608697e-06,
"loss": 0.2311,
"step": 17820
},
{
"epoch": 1.2368263602861478,
"grad_norm": 1.0390625,
"learning_rate": 1.8993043478260873e-06,
"loss": 0.2237,
"step": 17830
},
{
"epoch": 1.2375200520268805,
"grad_norm": 1.3984375,
"learning_rate": 1.8975652173913044e-06,
"loss": 0.254,
"step": 17840
},
{
"epoch": 1.2382137437676133,
"grad_norm": 1.171875,
"learning_rate": 1.8958260869565218e-06,
"loss": 0.2176,
"step": 17850
},
{
"epoch": 1.238907435508346,
"grad_norm": 1.3671875,
"learning_rate": 1.8940869565217394e-06,
"loss": 0.2342,
"step": 17860
},
{
"epoch": 1.2396011272490788,
"grad_norm": 1.0703125,
"learning_rate": 1.8923478260869565e-06,
"loss": 0.2417,
"step": 17870
},
{
"epoch": 1.2402948189898113,
"grad_norm": 1.2109375,
"learning_rate": 1.8906086956521741e-06,
"loss": 0.2707,
"step": 17880
},
{
"epoch": 1.240988510730544,
"grad_norm": 1.484375,
"learning_rate": 1.8888695652173913e-06,
"loss": 0.2278,
"step": 17890
},
{
"epoch": 1.2416822024712768,
"grad_norm": 1.203125,
"learning_rate": 1.8871304347826089e-06,
"loss": 0.2455,
"step": 17900
},
{
"epoch": 1.2423758942120096,
"grad_norm": 1.34375,
"learning_rate": 1.8853913043478262e-06,
"loss": 0.2575,
"step": 17910
},
{
"epoch": 1.2430695859527423,
"grad_norm": 1.5,
"learning_rate": 1.8836521739130436e-06,
"loss": 0.2959,
"step": 17920
},
{
"epoch": 1.243763277693475,
"grad_norm": 1.234375,
"learning_rate": 1.881913043478261e-06,
"loss": 0.2936,
"step": 17930
},
{
"epoch": 1.2444569694342076,
"grad_norm": 1.2265625,
"learning_rate": 1.8801739130434786e-06,
"loss": 0.3073,
"step": 17940
},
{
"epoch": 1.2451506611749403,
"grad_norm": 1.3125,
"learning_rate": 1.8784347826086957e-06,
"loss": 0.2216,
"step": 17950
},
{
"epoch": 1.245844352915673,
"grad_norm": 1.328125,
"learning_rate": 1.8766956521739133e-06,
"loss": 0.2443,
"step": 17960
},
{
"epoch": 1.2465380446564058,
"grad_norm": 1.15625,
"learning_rate": 1.8749565217391305e-06,
"loss": 0.2522,
"step": 17970
},
{
"epoch": 1.2472317363971386,
"grad_norm": 1.0546875,
"learning_rate": 1.873217391304348e-06,
"loss": 0.2404,
"step": 17980
},
{
"epoch": 1.2479254281378713,
"grad_norm": 1.3046875,
"learning_rate": 1.8714782608695652e-06,
"loss": 0.2511,
"step": 17990
},
{
"epoch": 1.2486191198786039,
"grad_norm": 0.95703125,
"learning_rate": 1.8697391304347828e-06,
"loss": 0.3144,
"step": 18000
},
{
"epoch": 1.2493128116193366,
"grad_norm": 1.0703125,
"learning_rate": 1.8680000000000002e-06,
"loss": 0.2274,
"step": 18010
},
{
"epoch": 1.2500065033600694,
"grad_norm": 1.2421875,
"learning_rate": 1.8662608695652175e-06,
"loss": 0.3148,
"step": 18020
},
{
"epoch": 1.250700195100802,
"grad_norm": 1.0546875,
"learning_rate": 1.864521739130435e-06,
"loss": 0.2123,
"step": 18030
},
{
"epoch": 1.2513938868415349,
"grad_norm": 1.25,
"learning_rate": 1.8627826086956525e-06,
"loss": 0.2383,
"step": 18040
},
{
"epoch": 1.2520875785822674,
"grad_norm": 1.140625,
"learning_rate": 1.8610434782608696e-06,
"loss": 0.2308,
"step": 18050
},
{
"epoch": 1.2527812703230001,
"grad_norm": 1.1328125,
"learning_rate": 1.8593043478260872e-06,
"loss": 0.2131,
"step": 18060
},
{
"epoch": 1.2534749620637329,
"grad_norm": 1.1640625,
"learning_rate": 1.8575652173913044e-06,
"loss": 0.264,
"step": 18070
},
{
"epoch": 1.2541686538044656,
"grad_norm": 1.1953125,
"learning_rate": 1.855826086956522e-06,
"loss": 0.2561,
"step": 18080
},
{
"epoch": 1.2548623455451984,
"grad_norm": 1.5078125,
"learning_rate": 1.8540869565217393e-06,
"loss": 0.3174,
"step": 18090
},
{
"epoch": 1.2555560372859311,
"grad_norm": 1.1015625,
"learning_rate": 1.8523478260869567e-06,
"loss": 0.2361,
"step": 18100
},
{
"epoch": 1.2562497290266639,
"grad_norm": 1.4765625,
"learning_rate": 1.850608695652174e-06,
"loss": 0.2928,
"step": 18110
},
{
"epoch": 1.2569434207673966,
"grad_norm": 1.21875,
"learning_rate": 1.8488695652173917e-06,
"loss": 0.1963,
"step": 18120
},
{
"epoch": 1.2576371125081292,
"grad_norm": 1.25,
"learning_rate": 1.8471304347826088e-06,
"loss": 0.261,
"step": 18130
},
{
"epoch": 1.258330804248862,
"grad_norm": 0.95703125,
"learning_rate": 1.8453913043478264e-06,
"loss": 0.2202,
"step": 18140
},
{
"epoch": 1.2590244959895946,
"grad_norm": 1.21875,
"learning_rate": 1.8436521739130436e-06,
"loss": 0.3148,
"step": 18150
},
{
"epoch": 1.2597181877303274,
"grad_norm": 1.1484375,
"learning_rate": 1.841913043478261e-06,
"loss": 0.2427,
"step": 18160
},
{
"epoch": 1.26041187947106,
"grad_norm": 1.125,
"learning_rate": 1.8401739130434785e-06,
"loss": 0.2313,
"step": 18170
},
{
"epoch": 1.2611055712117927,
"grad_norm": 1.40625,
"learning_rate": 1.8384347826086957e-06,
"loss": 0.2316,
"step": 18180
},
{
"epoch": 1.2617992629525254,
"grad_norm": 1.3203125,
"learning_rate": 1.8366956521739133e-06,
"loss": 0.2298,
"step": 18190
},
{
"epoch": 1.2624929546932582,
"grad_norm": 1.0234375,
"learning_rate": 1.8349565217391304e-06,
"loss": 0.2165,
"step": 18200
},
{
"epoch": 1.263186646433991,
"grad_norm": 1.296875,
"learning_rate": 1.833217391304348e-06,
"loss": 0.2387,
"step": 18210
},
{
"epoch": 1.2638803381747237,
"grad_norm": 1.0703125,
"learning_rate": 1.8314782608695652e-06,
"loss": 0.2883,
"step": 18220
},
{
"epoch": 1.2645740299154564,
"grad_norm": 1.6015625,
"learning_rate": 1.8297391304347827e-06,
"loss": 0.2552,
"step": 18230
},
{
"epoch": 1.265267721656189,
"grad_norm": 1.2734375,
"learning_rate": 1.8280000000000001e-06,
"loss": 0.2563,
"step": 18240
},
{
"epoch": 1.2659614133969217,
"grad_norm": 1.0859375,
"learning_rate": 1.8262608695652175e-06,
"loss": 0.2093,
"step": 18250
},
{
"epoch": 1.2666551051376544,
"grad_norm": 1.2265625,
"learning_rate": 1.8245217391304349e-06,
"loss": 0.241,
"step": 18260
},
{
"epoch": 1.2673487968783872,
"grad_norm": 1.4140625,
"learning_rate": 1.8227826086956524e-06,
"loss": 0.2887,
"step": 18270
},
{
"epoch": 1.26804248861912,
"grad_norm": 1.203125,
"learning_rate": 1.8210434782608696e-06,
"loss": 0.2639,
"step": 18280
},
{
"epoch": 1.2687361803598525,
"grad_norm": 1.1640625,
"learning_rate": 1.8193043478260872e-06,
"loss": 0.2598,
"step": 18290
},
{
"epoch": 1.2694298721005852,
"grad_norm": 1.1640625,
"learning_rate": 1.8175652173913043e-06,
"loss": 0.248,
"step": 18300
},
{
"epoch": 1.270123563841318,
"grad_norm": 1.296875,
"learning_rate": 1.815826086956522e-06,
"loss": 0.1977,
"step": 18310
},
{
"epoch": 1.2708172555820507,
"grad_norm": 1.5859375,
"learning_rate": 1.8140869565217393e-06,
"loss": 0.2536,
"step": 18320
},
{
"epoch": 1.2715109473227835,
"grad_norm": 1.1484375,
"learning_rate": 1.8123478260869567e-06,
"loss": 0.2557,
"step": 18330
},
{
"epoch": 1.2722046390635162,
"grad_norm": 1.3828125,
"learning_rate": 1.810608695652174e-06,
"loss": 0.3069,
"step": 18340
},
{
"epoch": 1.272898330804249,
"grad_norm": 1.0234375,
"learning_rate": 1.8088695652173916e-06,
"loss": 0.2315,
"step": 18350
},
{
"epoch": 1.2735920225449815,
"grad_norm": 1.2890625,
"learning_rate": 1.8071304347826088e-06,
"loss": 0.202,
"step": 18360
},
{
"epoch": 1.2742857142857142,
"grad_norm": 1.1796875,
"learning_rate": 1.8053913043478264e-06,
"loss": 0.304,
"step": 18370
},
{
"epoch": 1.274979406026447,
"grad_norm": 1.59375,
"learning_rate": 1.8036521739130435e-06,
"loss": 0.259,
"step": 18380
},
{
"epoch": 1.2756730977671797,
"grad_norm": 1.2578125,
"learning_rate": 1.8019130434782611e-06,
"loss": 0.2263,
"step": 18390
},
{
"epoch": 1.2763667895079125,
"grad_norm": 1.1328125,
"learning_rate": 1.8001739130434785e-06,
"loss": 0.2381,
"step": 18400
},
{
"epoch": 1.277060481248645,
"grad_norm": 1.5,
"learning_rate": 1.7984347826086958e-06,
"loss": 0.2154,
"step": 18410
},
{
"epoch": 1.2777541729893778,
"grad_norm": 1.28125,
"learning_rate": 1.7966956521739132e-06,
"loss": 0.2191,
"step": 18420
},
{
"epoch": 1.2784478647301105,
"grad_norm": 1.0,
"learning_rate": 1.7949565217391308e-06,
"loss": 0.2516,
"step": 18430
},
{
"epoch": 1.2791415564708433,
"grad_norm": 1.2109375,
"learning_rate": 1.793217391304348e-06,
"loss": 0.2271,
"step": 18440
},
{
"epoch": 1.279835248211576,
"grad_norm": 1.1015625,
"learning_rate": 1.7914782608695655e-06,
"loss": 0.2308,
"step": 18450
},
{
"epoch": 1.2805289399523088,
"grad_norm": 1.25,
"learning_rate": 1.7897391304347827e-06,
"loss": 0.3075,
"step": 18460
},
{
"epoch": 1.2812226316930415,
"grad_norm": 1.4609375,
"learning_rate": 1.788e-06,
"loss": 0.2446,
"step": 18470
},
{
"epoch": 1.281916323433774,
"grad_norm": 1.1953125,
"learning_rate": 1.7862608695652174e-06,
"loss": 0.2359,
"step": 18480
},
{
"epoch": 1.2826100151745068,
"grad_norm": 2.171875,
"learning_rate": 1.7845217391304348e-06,
"loss": 0.3021,
"step": 18490
},
{
"epoch": 1.2833037069152395,
"grad_norm": 1.4609375,
"learning_rate": 1.7827826086956524e-06,
"loss": 0.2255,
"step": 18500
},
{
"epoch": 1.2839973986559723,
"grad_norm": 1.2578125,
"learning_rate": 1.7810434782608696e-06,
"loss": 0.2562,
"step": 18510
},
{
"epoch": 1.284691090396705,
"grad_norm": 1.3515625,
"learning_rate": 1.7793043478260871e-06,
"loss": 0.2513,
"step": 18520
},
{
"epoch": 1.2853847821374376,
"grad_norm": 1.4140625,
"learning_rate": 1.7775652173913043e-06,
"loss": 0.2296,
"step": 18530
},
{
"epoch": 1.2860784738781703,
"grad_norm": 1.3046875,
"learning_rate": 1.7758260869565219e-06,
"loss": 0.2612,
"step": 18540
},
{
"epoch": 1.286772165618903,
"grad_norm": 1.1171875,
"learning_rate": 1.7740869565217393e-06,
"loss": 0.2088,
"step": 18550
},
{
"epoch": 1.2874658573596358,
"grad_norm": 1.046875,
"learning_rate": 1.7723478260869566e-06,
"loss": 0.2278,
"step": 18560
},
{
"epoch": 1.2881595491003686,
"grad_norm": 1.015625,
"learning_rate": 1.770608695652174e-06,
"loss": 0.2695,
"step": 18570
},
{
"epoch": 1.2888532408411013,
"grad_norm": 0.921875,
"learning_rate": 1.7688695652173916e-06,
"loss": 0.237,
"step": 18580
},
{
"epoch": 1.289546932581834,
"grad_norm": 1.53125,
"learning_rate": 1.7671304347826087e-06,
"loss": 0.3094,
"step": 18590
},
{
"epoch": 1.2902406243225666,
"grad_norm": 1.03125,
"learning_rate": 1.7653913043478263e-06,
"loss": 0.2231,
"step": 18600
},
{
"epoch": 1.2909343160632993,
"grad_norm": 1.1328125,
"learning_rate": 1.7636521739130435e-06,
"loss": 0.2858,
"step": 18610
},
{
"epoch": 1.291628007804032,
"grad_norm": 1.1484375,
"learning_rate": 1.761913043478261e-06,
"loss": 0.2832,
"step": 18620
},
{
"epoch": 1.2923216995447648,
"grad_norm": 1.3359375,
"learning_rate": 1.7601739130434784e-06,
"loss": 0.2447,
"step": 18630
},
{
"epoch": 1.2930153912854976,
"grad_norm": 1.1484375,
"learning_rate": 1.7584347826086958e-06,
"loss": 0.2911,
"step": 18640
},
{
"epoch": 1.2937090830262301,
"grad_norm": 1.078125,
"learning_rate": 1.7566956521739132e-06,
"loss": 0.261,
"step": 18650
},
{
"epoch": 1.2944027747669629,
"grad_norm": 1.5,
"learning_rate": 1.7549565217391308e-06,
"loss": 0.2486,
"step": 18660
},
{
"epoch": 1.2950964665076956,
"grad_norm": 0.9765625,
"learning_rate": 1.753217391304348e-06,
"loss": 0.236,
"step": 18670
},
{
"epoch": 1.2957901582484284,
"grad_norm": 1.390625,
"learning_rate": 1.7514782608695655e-06,
"loss": 0.2216,
"step": 18680
},
{
"epoch": 1.296483849989161,
"grad_norm": 1.125,
"learning_rate": 1.7497391304347827e-06,
"loss": 0.2348,
"step": 18690
},
{
"epoch": 1.2971775417298939,
"grad_norm": 1.1328125,
"learning_rate": 1.7480000000000002e-06,
"loss": 0.2821,
"step": 18700
},
{
"epoch": 1.2978712334706266,
"grad_norm": 1.0703125,
"learning_rate": 1.7462608695652174e-06,
"loss": 0.2281,
"step": 18710
},
{
"epoch": 1.2985649252113591,
"grad_norm": 1.21875,
"learning_rate": 1.744521739130435e-06,
"loss": 0.3438,
"step": 18720
},
{
"epoch": 1.2992586169520919,
"grad_norm": 1.8515625,
"learning_rate": 1.7427826086956524e-06,
"loss": 0.307,
"step": 18730
},
{
"epoch": 1.2999523086928246,
"grad_norm": 1.3125,
"learning_rate": 1.7410434782608697e-06,
"loss": 0.2792,
"step": 18740
},
{
"epoch": 1.3006460004335574,
"grad_norm": 1.328125,
"learning_rate": 1.739304347826087e-06,
"loss": 0.246,
"step": 18750
},
{
"epoch": 1.3013396921742901,
"grad_norm": 1.203125,
"learning_rate": 1.7375652173913047e-06,
"loss": 0.2229,
"step": 18760
},
{
"epoch": 1.3020333839150227,
"grad_norm": 1.4921875,
"learning_rate": 1.7358260869565218e-06,
"loss": 0.2921,
"step": 18770
},
{
"epoch": 1.3027270756557554,
"grad_norm": 0.9921875,
"learning_rate": 1.7340869565217392e-06,
"loss": 0.2882,
"step": 18780
},
{
"epoch": 1.3034207673964882,
"grad_norm": 1.2265625,
"learning_rate": 1.7323478260869566e-06,
"loss": 0.2465,
"step": 18790
},
{
"epoch": 1.304114459137221,
"grad_norm": 1.21875,
"learning_rate": 1.730608695652174e-06,
"loss": 0.2151,
"step": 18800
},
{
"epoch": 1.3048081508779537,
"grad_norm": 1.1640625,
"learning_rate": 1.7288695652173915e-06,
"loss": 0.2568,
"step": 18810
},
{
"epoch": 1.3055018426186864,
"grad_norm": 1.5703125,
"learning_rate": 1.7271304347826087e-06,
"loss": 0.2507,
"step": 18820
},
{
"epoch": 1.3061955343594192,
"grad_norm": 1.21875,
"learning_rate": 1.7253913043478263e-06,
"loss": 0.2791,
"step": 18830
},
{
"epoch": 1.3068892261001517,
"grad_norm": 1.5390625,
"learning_rate": 1.7236521739130434e-06,
"loss": 0.2841,
"step": 18840
},
{
"epoch": 1.3075829178408844,
"grad_norm": 0.9609375,
"learning_rate": 1.721913043478261e-06,
"loss": 0.2459,
"step": 18850
},
{
"epoch": 1.3082766095816172,
"grad_norm": 1.078125,
"learning_rate": 1.7201739130434784e-06,
"loss": 0.2317,
"step": 18860
},
{
"epoch": 1.30897030132235,
"grad_norm": 1.328125,
"learning_rate": 1.7184347826086958e-06,
"loss": 0.2455,
"step": 18870
},
{
"epoch": 1.3096639930630825,
"grad_norm": 1.6953125,
"learning_rate": 1.7166956521739131e-06,
"loss": 0.2476,
"step": 18880
},
{
"epoch": 1.3103576848038152,
"grad_norm": 1.484375,
"learning_rate": 1.7149565217391307e-06,
"loss": 0.2529,
"step": 18890
},
{
"epoch": 1.311051376544548,
"grad_norm": 1.2734375,
"learning_rate": 1.7132173913043479e-06,
"loss": 0.2066,
"step": 18900
},
{
"epoch": 1.3117450682852807,
"grad_norm": 1.2109375,
"learning_rate": 1.7114782608695655e-06,
"loss": 0.2273,
"step": 18910
},
{
"epoch": 1.3124387600260135,
"grad_norm": 1.1171875,
"learning_rate": 1.7097391304347826e-06,
"loss": 0.1843,
"step": 18920
},
{
"epoch": 1.3131324517667462,
"grad_norm": 1.21875,
"learning_rate": 1.7080000000000002e-06,
"loss": 0.2144,
"step": 18930
},
{
"epoch": 1.313826143507479,
"grad_norm": 1.1875,
"learning_rate": 1.7062608695652174e-06,
"loss": 0.2223,
"step": 18940
},
{
"epoch": 1.3145198352482117,
"grad_norm": 1.296875,
"learning_rate": 1.704521739130435e-06,
"loss": 0.2149,
"step": 18950
},
{
"epoch": 1.3152135269889442,
"grad_norm": 1.28125,
"learning_rate": 1.7027826086956523e-06,
"loss": 0.2343,
"step": 18960
},
{
"epoch": 1.315907218729677,
"grad_norm": 1.59375,
"learning_rate": 1.7010434782608697e-06,
"loss": 0.2878,
"step": 18970
},
{
"epoch": 1.3166009104704097,
"grad_norm": 1.484375,
"learning_rate": 1.699304347826087e-06,
"loss": 0.2503,
"step": 18980
},
{
"epoch": 1.3172946022111425,
"grad_norm": 1.328125,
"learning_rate": 1.6975652173913046e-06,
"loss": 0.2683,
"step": 18990
},
{
"epoch": 1.317988293951875,
"grad_norm": 0.9453125,
"learning_rate": 1.6958260869565218e-06,
"loss": 0.226,
"step": 19000
},
{
"epoch": 1.3186819856926077,
"grad_norm": 1.3203125,
"learning_rate": 1.6940869565217394e-06,
"loss": 0.2354,
"step": 19010
},
{
"epoch": 1.3193756774333405,
"grad_norm": 1.3359375,
"learning_rate": 1.6923478260869565e-06,
"loss": 0.2512,
"step": 19020
},
{
"epoch": 1.3200693691740732,
"grad_norm": 1.1171875,
"learning_rate": 1.6906086956521741e-06,
"loss": 0.215,
"step": 19030
},
{
"epoch": 1.320763060914806,
"grad_norm": 1.0703125,
"learning_rate": 1.6888695652173915e-06,
"loss": 0.2324,
"step": 19040
},
{
"epoch": 1.3214567526555387,
"grad_norm": 1.0390625,
"learning_rate": 1.6871304347826089e-06,
"loss": 0.2898,
"step": 19050
},
{
"epoch": 1.3221504443962715,
"grad_norm": 1.4453125,
"learning_rate": 1.6853913043478262e-06,
"loss": 0.2415,
"step": 19060
},
{
"epoch": 1.3228441361370042,
"grad_norm": 1.46875,
"learning_rate": 1.6836521739130438e-06,
"loss": 0.2365,
"step": 19070
},
{
"epoch": 1.3235378278777368,
"grad_norm": 1.2421875,
"learning_rate": 1.681913043478261e-06,
"loss": 0.2251,
"step": 19080
},
{
"epoch": 1.3242315196184695,
"grad_norm": 0.9921875,
"learning_rate": 1.6801739130434784e-06,
"loss": 0.2137,
"step": 19090
},
{
"epoch": 1.3249252113592023,
"grad_norm": 1.3203125,
"learning_rate": 1.6784347826086957e-06,
"loss": 0.2239,
"step": 19100
},
{
"epoch": 1.325618903099935,
"grad_norm": 1.4609375,
"learning_rate": 1.676695652173913e-06,
"loss": 0.2748,
"step": 19110
},
{
"epoch": 1.3263125948406675,
"grad_norm": 1.21875,
"learning_rate": 1.6749565217391307e-06,
"loss": 0.2748,
"step": 19120
},
{
"epoch": 1.3270062865814003,
"grad_norm": 1.0078125,
"learning_rate": 1.6732173913043478e-06,
"loss": 0.2845,
"step": 19130
},
{
"epoch": 1.327699978322133,
"grad_norm": 1.265625,
"learning_rate": 1.6714782608695654e-06,
"loss": 0.2227,
"step": 19140
},
{
"epoch": 1.3283936700628658,
"grad_norm": 1.140625,
"learning_rate": 1.6697391304347826e-06,
"loss": 0.2701,
"step": 19150
},
{
"epoch": 1.3290873618035985,
"grad_norm": 1.1796875,
"learning_rate": 1.6680000000000002e-06,
"loss": 0.3026,
"step": 19160
},
{
"epoch": 1.3297810535443313,
"grad_norm": 1.34375,
"learning_rate": 1.6662608695652175e-06,
"loss": 0.2401,
"step": 19170
},
{
"epoch": 1.330474745285064,
"grad_norm": 1.28125,
"learning_rate": 1.664521739130435e-06,
"loss": 0.2907,
"step": 19180
},
{
"epoch": 1.3311684370257968,
"grad_norm": 1.09375,
"learning_rate": 1.6627826086956523e-06,
"loss": 0.2404,
"step": 19190
},
{
"epoch": 1.3318621287665293,
"grad_norm": 1.3359375,
"learning_rate": 1.6610434782608699e-06,
"loss": 0.2548,
"step": 19200
},
{
"epoch": 1.332555820507262,
"grad_norm": 1.2109375,
"learning_rate": 1.659304347826087e-06,
"loss": 0.3054,
"step": 19210
},
{
"epoch": 1.3332495122479948,
"grad_norm": 1.0859375,
"learning_rate": 1.6575652173913046e-06,
"loss": 0.2174,
"step": 19220
},
{
"epoch": 1.3339432039887276,
"grad_norm": 1.2734375,
"learning_rate": 1.6558260869565218e-06,
"loss": 0.2712,
"step": 19230
},
{
"epoch": 1.33463689572946,
"grad_norm": 1.5234375,
"learning_rate": 1.6540869565217393e-06,
"loss": 0.2191,
"step": 19240
},
{
"epoch": 1.3353305874701928,
"grad_norm": 1.0859375,
"learning_rate": 1.6523478260869565e-06,
"loss": 0.2889,
"step": 19250
},
{
"epoch": 1.3360242792109256,
"grad_norm": 1.21875,
"learning_rate": 1.650608695652174e-06,
"loss": 0.2662,
"step": 19260
},
{
"epoch": 1.3367179709516583,
"grad_norm": 1.1328125,
"learning_rate": 1.6488695652173915e-06,
"loss": 0.2417,
"step": 19270
},
{
"epoch": 1.337411662692391,
"grad_norm": 1.0703125,
"learning_rate": 1.6471304347826088e-06,
"loss": 0.2668,
"step": 19280
},
{
"epoch": 1.3381053544331238,
"grad_norm": 1.0234375,
"learning_rate": 1.6453913043478262e-06,
"loss": 0.2028,
"step": 19290
},
{
"epoch": 1.3387990461738566,
"grad_norm": 1.3203125,
"learning_rate": 1.6436521739130438e-06,
"loss": 0.2831,
"step": 19300
},
{
"epoch": 1.3394927379145893,
"grad_norm": 1.0625,
"learning_rate": 1.641913043478261e-06,
"loss": 0.2186,
"step": 19310
},
{
"epoch": 1.3401864296553219,
"grad_norm": 1.359375,
"learning_rate": 1.6401739130434785e-06,
"loss": 0.2243,
"step": 19320
},
{
"epoch": 1.3408801213960546,
"grad_norm": 1.09375,
"learning_rate": 1.6384347826086957e-06,
"loss": 0.2012,
"step": 19330
},
{
"epoch": 1.3415738131367874,
"grad_norm": 1.3046875,
"learning_rate": 1.6366956521739133e-06,
"loss": 0.2255,
"step": 19340
},
{
"epoch": 1.3422675048775201,
"grad_norm": 1.046875,
"learning_rate": 1.6349565217391306e-06,
"loss": 0.2735,
"step": 19350
},
{
"epoch": 1.3429611966182526,
"grad_norm": 1.90625,
"learning_rate": 1.633217391304348e-06,
"loss": 0.3033,
"step": 19360
},
{
"epoch": 1.3436548883589854,
"grad_norm": 1.3046875,
"learning_rate": 1.6314782608695654e-06,
"loss": 0.2539,
"step": 19370
},
{
"epoch": 1.3443485800997181,
"grad_norm": 1.1171875,
"learning_rate": 1.629739130434783e-06,
"loss": 0.2277,
"step": 19380
},
{
"epoch": 1.3450422718404509,
"grad_norm": 1.09375,
"learning_rate": 1.6280000000000001e-06,
"loss": 0.2215,
"step": 19390
},
{
"epoch": 1.3457359635811836,
"grad_norm": 1.03125,
"learning_rate": 1.6262608695652175e-06,
"loss": 0.2031,
"step": 19400
},
{
"epoch": 1.3464296553219164,
"grad_norm": 1.2734375,
"learning_rate": 1.6245217391304349e-06,
"loss": 0.2154,
"step": 19410
},
{
"epoch": 1.3471233470626491,
"grad_norm": 1.359375,
"learning_rate": 1.6227826086956522e-06,
"loss": 0.2311,
"step": 19420
},
{
"epoch": 1.3478170388033819,
"grad_norm": 1.0546875,
"learning_rate": 1.6210434782608698e-06,
"loss": 0.2294,
"step": 19430
},
{
"epoch": 1.3485107305441144,
"grad_norm": 1.2421875,
"learning_rate": 1.619304347826087e-06,
"loss": 0.3163,
"step": 19440
},
{
"epoch": 1.3492044222848472,
"grad_norm": 1.0078125,
"learning_rate": 1.6175652173913046e-06,
"loss": 0.1935,
"step": 19450
},
{
"epoch": 1.34989811402558,
"grad_norm": 1.453125,
"learning_rate": 1.6158260869565217e-06,
"loss": 0.2372,
"step": 19460
},
{
"epoch": 1.3505918057663127,
"grad_norm": 1.1484375,
"learning_rate": 1.6140869565217393e-06,
"loss": 0.3196,
"step": 19470
},
{
"epoch": 1.3512854975070452,
"grad_norm": 1.21875,
"learning_rate": 1.6123478260869565e-06,
"loss": 0.2137,
"step": 19480
},
{
"epoch": 1.351979189247778,
"grad_norm": 1.0078125,
"learning_rate": 1.610608695652174e-06,
"loss": 0.2553,
"step": 19490
},
{
"epoch": 1.3526728809885107,
"grad_norm": 1.0859375,
"learning_rate": 1.6088695652173914e-06,
"loss": 0.2622,
"step": 19500
},
{
"epoch": 1.3533665727292434,
"grad_norm": 0.98046875,
"learning_rate": 1.6071304347826088e-06,
"loss": 0.2121,
"step": 19510
},
{
"epoch": 1.3540602644699762,
"grad_norm": 1.953125,
"learning_rate": 1.6053913043478262e-06,
"loss": 0.2425,
"step": 19520
},
{
"epoch": 1.354753956210709,
"grad_norm": 1.0625,
"learning_rate": 1.6036521739130437e-06,
"loss": 0.2537,
"step": 19530
},
{
"epoch": 1.3554476479514417,
"grad_norm": 1.7890625,
"learning_rate": 1.601913043478261e-06,
"loss": 0.3255,
"step": 19540
},
{
"epoch": 1.3561413396921742,
"grad_norm": 1.46875,
"learning_rate": 1.6001739130434785e-06,
"loss": 0.2601,
"step": 19550
},
{
"epoch": 1.356835031432907,
"grad_norm": 1.296875,
"learning_rate": 1.5984347826086956e-06,
"loss": 0.2258,
"step": 19560
},
{
"epoch": 1.3575287231736397,
"grad_norm": 1.3203125,
"learning_rate": 1.5966956521739132e-06,
"loss": 0.2909,
"step": 19570
},
{
"epoch": 1.3582224149143725,
"grad_norm": 1.1328125,
"learning_rate": 1.5949565217391306e-06,
"loss": 0.254,
"step": 19580
},
{
"epoch": 1.3589161066551052,
"grad_norm": 0.94140625,
"learning_rate": 1.593217391304348e-06,
"loss": 0.2798,
"step": 19590
},
{
"epoch": 1.3596097983958377,
"grad_norm": 1.2890625,
"learning_rate": 1.5914782608695653e-06,
"loss": 0.2097,
"step": 19600
},
{
"epoch": 1.3603034901365705,
"grad_norm": 1.171875,
"learning_rate": 1.589739130434783e-06,
"loss": 0.3109,
"step": 19610
},
{
"epoch": 1.3609971818773032,
"grad_norm": 0.94140625,
"learning_rate": 1.588e-06,
"loss": 0.3035,
"step": 19620
},
{
"epoch": 1.361690873618036,
"grad_norm": 1.3359375,
"learning_rate": 1.5862608695652177e-06,
"loss": 0.3036,
"step": 19630
},
{
"epoch": 1.3623845653587687,
"grad_norm": 1.203125,
"learning_rate": 1.5845217391304348e-06,
"loss": 0.2218,
"step": 19640
},
{
"epoch": 1.3630782570995015,
"grad_norm": 1.2890625,
"learning_rate": 1.5827826086956524e-06,
"loss": 0.2462,
"step": 19650
},
{
"epoch": 1.3637719488402342,
"grad_norm": 0.984375,
"learning_rate": 1.5810434782608698e-06,
"loss": 0.2062,
"step": 19660
},
{
"epoch": 1.3644656405809668,
"grad_norm": 1.1875,
"learning_rate": 1.5793043478260872e-06,
"loss": 0.2235,
"step": 19670
},
{
"epoch": 1.3651593323216995,
"grad_norm": 1.5703125,
"learning_rate": 1.5775652173913045e-06,
"loss": 0.2577,
"step": 19680
},
{
"epoch": 1.3658530240624323,
"grad_norm": 1.0625,
"learning_rate": 1.5758260869565221e-06,
"loss": 0.2769,
"step": 19690
},
{
"epoch": 1.366546715803165,
"grad_norm": 1.1171875,
"learning_rate": 1.5740869565217393e-06,
"loss": 0.222,
"step": 19700
},
{
"epoch": 1.3672404075438978,
"grad_norm": 1.1640625,
"learning_rate": 1.5723478260869564e-06,
"loss": 0.2199,
"step": 19710
},
{
"epoch": 1.3679340992846303,
"grad_norm": 1.1015625,
"learning_rate": 1.570608695652174e-06,
"loss": 0.2524,
"step": 19720
},
{
"epoch": 1.368627791025363,
"grad_norm": 1.0390625,
"learning_rate": 1.5688695652173914e-06,
"loss": 0.2201,
"step": 19730
},
{
"epoch": 1.3693214827660958,
"grad_norm": 1.0078125,
"learning_rate": 1.5671304347826088e-06,
"loss": 0.3256,
"step": 19740
},
{
"epoch": 1.3700151745068285,
"grad_norm": 1.546875,
"learning_rate": 1.5653913043478261e-06,
"loss": 0.2929,
"step": 19750
},
{
"epoch": 1.3707088662475613,
"grad_norm": 1.2421875,
"learning_rate": 1.5636521739130437e-06,
"loss": 0.2536,
"step": 19760
},
{
"epoch": 1.371402557988294,
"grad_norm": 1.3828125,
"learning_rate": 1.5619130434782609e-06,
"loss": 0.2349,
"step": 19770
},
{
"epoch": 1.3720962497290268,
"grad_norm": 1.203125,
"learning_rate": 1.5601739130434784e-06,
"loss": 0.2195,
"step": 19780
},
{
"epoch": 1.3727899414697593,
"grad_norm": 1.171875,
"learning_rate": 1.5584347826086956e-06,
"loss": 0.278,
"step": 19790
},
{
"epoch": 1.373483633210492,
"grad_norm": 1.6953125,
"learning_rate": 1.5566956521739132e-06,
"loss": 0.2423,
"step": 19800
},
{
"epoch": 1.3741773249512248,
"grad_norm": 1.140625,
"learning_rate": 1.5549565217391306e-06,
"loss": 0.2768,
"step": 19810
},
{
"epoch": 1.3748710166919575,
"grad_norm": 1.0078125,
"learning_rate": 1.553217391304348e-06,
"loss": 0.2441,
"step": 19820
},
{
"epoch": 1.3755647084326903,
"grad_norm": 1.6015625,
"learning_rate": 1.5514782608695653e-06,
"loss": 0.2551,
"step": 19830
},
{
"epoch": 1.3762584001734228,
"grad_norm": 1.90625,
"learning_rate": 1.5497391304347829e-06,
"loss": 0.2861,
"step": 19840
},
{
"epoch": 1.3769520919141556,
"grad_norm": 1.5,
"learning_rate": 1.548e-06,
"loss": 0.2289,
"step": 19850
},
{
"epoch": 1.3776457836548883,
"grad_norm": 1.3828125,
"learning_rate": 1.5462608695652176e-06,
"loss": 0.2622,
"step": 19860
},
{
"epoch": 1.378339475395621,
"grad_norm": 1.046875,
"learning_rate": 1.5445217391304348e-06,
"loss": 0.3027,
"step": 19870
},
{
"epoch": 1.3790331671363538,
"grad_norm": 1.2421875,
"learning_rate": 1.5427826086956524e-06,
"loss": 0.2128,
"step": 19880
},
{
"epoch": 1.3797268588770866,
"grad_norm": 1.1015625,
"learning_rate": 1.5410434782608697e-06,
"loss": 0.2341,
"step": 19890
},
{
"epoch": 1.3804205506178193,
"grad_norm": 1.078125,
"learning_rate": 1.5393043478260871e-06,
"loss": 0.2186,
"step": 19900
},
{
"epoch": 1.3811142423585518,
"grad_norm": 1.21875,
"learning_rate": 1.5375652173913045e-06,
"loss": 0.2346,
"step": 19910
},
{
"epoch": 1.3818079340992846,
"grad_norm": 1.15625,
"learning_rate": 1.535826086956522e-06,
"loss": 0.2231,
"step": 19920
},
{
"epoch": 1.3825016258400173,
"grad_norm": 1.1796875,
"learning_rate": 1.5340869565217392e-06,
"loss": 0.2264,
"step": 19930
},
{
"epoch": 1.38319531758075,
"grad_norm": 1.1640625,
"learning_rate": 1.5323478260869568e-06,
"loss": 0.2376,
"step": 19940
},
{
"epoch": 1.3838890093214828,
"grad_norm": 1.34375,
"learning_rate": 1.530608695652174e-06,
"loss": 0.2588,
"step": 19950
},
{
"epoch": 1.3845827010622154,
"grad_norm": 1.2578125,
"learning_rate": 1.5288695652173916e-06,
"loss": 0.2747,
"step": 19960
},
{
"epoch": 1.3852763928029481,
"grad_norm": 1.421875,
"learning_rate": 1.5271304347826087e-06,
"loss": 0.2317,
"step": 19970
},
{
"epoch": 1.3859700845436809,
"grad_norm": 1.296875,
"learning_rate": 1.5253913043478263e-06,
"loss": 0.2596,
"step": 19980
},
{
"epoch": 1.3866637762844136,
"grad_norm": 1.328125,
"learning_rate": 1.5236521739130437e-06,
"loss": 0.2658,
"step": 19990
},
{
"epoch": 1.3873574680251464,
"grad_norm": 1.140625,
"learning_rate": 1.521913043478261e-06,
"loss": 0.2576,
"step": 20000
},
{
"epoch": 1.3880511597658791,
"grad_norm": 1.5390625,
"learning_rate": 1.5201739130434784e-06,
"loss": 0.237,
"step": 20010
},
{
"epoch": 1.3887448515066119,
"grad_norm": 1.1953125,
"learning_rate": 1.5184347826086956e-06,
"loss": 0.2249,
"step": 20020
},
{
"epoch": 1.3894385432473444,
"grad_norm": 1.3359375,
"learning_rate": 1.5166956521739131e-06,
"loss": 0.2541,
"step": 20030
},
{
"epoch": 1.3901322349880771,
"grad_norm": 0.87109375,
"learning_rate": 1.5149565217391305e-06,
"loss": 0.2328,
"step": 20040
},
{
"epoch": 1.39082592672881,
"grad_norm": 0.8515625,
"learning_rate": 1.5132173913043479e-06,
"loss": 0.2299,
"step": 20050
},
{
"epoch": 1.3915196184695426,
"grad_norm": 1.21875,
"learning_rate": 1.5114782608695653e-06,
"loss": 0.2728,
"step": 20060
},
{
"epoch": 1.3922133102102754,
"grad_norm": 0.94140625,
"learning_rate": 1.5097391304347828e-06,
"loss": 0.2113,
"step": 20070
},
{
"epoch": 1.392907001951008,
"grad_norm": 1.1953125,
"learning_rate": 1.508e-06,
"loss": 0.2075,
"step": 20080
},
{
"epoch": 1.3936006936917407,
"grad_norm": 1.1484375,
"learning_rate": 1.5062608695652176e-06,
"loss": 0.2242,
"step": 20090
},
{
"epoch": 1.3942943854324734,
"grad_norm": 0.90625,
"learning_rate": 1.5045217391304347e-06,
"loss": 0.2404,
"step": 20100
},
{
"epoch": 1.3949880771732062,
"grad_norm": 1.25,
"learning_rate": 1.5027826086956523e-06,
"loss": 0.2921,
"step": 20110
},
{
"epoch": 1.395681768913939,
"grad_norm": 1.3359375,
"learning_rate": 1.5010434782608697e-06,
"loss": 0.2469,
"step": 20120
},
{
"epoch": 1.3963754606546717,
"grad_norm": 1.53125,
"learning_rate": 1.499304347826087e-06,
"loss": 0.2326,
"step": 20130
},
{
"epoch": 1.3970691523954044,
"grad_norm": 1.140625,
"learning_rate": 1.4975652173913044e-06,
"loss": 0.2378,
"step": 20140
},
{
"epoch": 1.397762844136137,
"grad_norm": 1.34375,
"learning_rate": 1.495826086956522e-06,
"loss": 0.2893,
"step": 20150
},
{
"epoch": 1.3984565358768697,
"grad_norm": 0.7890625,
"learning_rate": 1.4940869565217392e-06,
"loss": 0.2289,
"step": 20160
},
{
"epoch": 1.3991502276176024,
"grad_norm": 1.3046875,
"learning_rate": 1.4923478260869568e-06,
"loss": 0.2284,
"step": 20170
},
{
"epoch": 1.3998439193583352,
"grad_norm": 0.953125,
"learning_rate": 1.490608695652174e-06,
"loss": 0.2175,
"step": 20180
},
{
"epoch": 1.4005376110990677,
"grad_norm": 1.2421875,
"learning_rate": 1.4888695652173915e-06,
"loss": 0.213,
"step": 20190
},
{
"epoch": 1.4012313028398005,
"grad_norm": 1.6484375,
"learning_rate": 1.4871304347826087e-06,
"loss": 0.2365,
"step": 20200
},
{
"epoch": 1.4019249945805332,
"grad_norm": 1.515625,
"learning_rate": 1.4853913043478263e-06,
"loss": 0.238,
"step": 20210
},
{
"epoch": 1.402618686321266,
"grad_norm": 1.4609375,
"learning_rate": 1.4836521739130436e-06,
"loss": 0.227,
"step": 20220
},
{
"epoch": 1.4033123780619987,
"grad_norm": 1.0546875,
"learning_rate": 1.481913043478261e-06,
"loss": 0.2311,
"step": 20230
},
{
"epoch": 1.4040060698027315,
"grad_norm": 1.0390625,
"learning_rate": 1.4801739130434784e-06,
"loss": 0.2447,
"step": 20240
},
{
"epoch": 1.4046997615434642,
"grad_norm": 1.4140625,
"learning_rate": 1.478434782608696e-06,
"loss": 0.2528,
"step": 20250
},
{
"epoch": 1.405393453284197,
"grad_norm": 1.2734375,
"learning_rate": 1.4766956521739131e-06,
"loss": 0.2466,
"step": 20260
},
{
"epoch": 1.4060871450249295,
"grad_norm": 1.515625,
"learning_rate": 1.4749565217391307e-06,
"loss": 0.2668,
"step": 20270
},
{
"epoch": 1.4067808367656622,
"grad_norm": 1.25,
"learning_rate": 1.4732173913043478e-06,
"loss": 0.2382,
"step": 20280
},
{
"epoch": 1.407474528506395,
"grad_norm": 1.2265625,
"learning_rate": 1.4714782608695654e-06,
"loss": 0.2393,
"step": 20290
},
{
"epoch": 1.4081682202471277,
"grad_norm": 0.828125,
"learning_rate": 1.4697391304347828e-06,
"loss": 0.2099,
"step": 20300
},
{
"epoch": 1.4088619119878603,
"grad_norm": 1.1328125,
"learning_rate": 1.4680000000000002e-06,
"loss": 0.2382,
"step": 20310
},
{
"epoch": 1.409555603728593,
"grad_norm": 1.5703125,
"learning_rate": 1.4662608695652175e-06,
"loss": 0.3246,
"step": 20320
},
{
"epoch": 1.4102492954693258,
"grad_norm": 1.0703125,
"learning_rate": 1.4645217391304347e-06,
"loss": 0.3255,
"step": 20330
},
{
"epoch": 1.4109429872100585,
"grad_norm": 1.0234375,
"learning_rate": 1.4627826086956523e-06,
"loss": 0.1941,
"step": 20340
},
{
"epoch": 1.4116366789507913,
"grad_norm": 1.0859375,
"learning_rate": 1.4610434782608697e-06,
"loss": 0.2243,
"step": 20350
},
{
"epoch": 1.412330370691524,
"grad_norm": 1.140625,
"learning_rate": 1.459304347826087e-06,
"loss": 0.219,
"step": 20360
},
{
"epoch": 1.4130240624322568,
"grad_norm": 1.203125,
"learning_rate": 1.4575652173913044e-06,
"loss": 0.225,
"step": 20370
},
{
"epoch": 1.4137177541729895,
"grad_norm": 1.140625,
"learning_rate": 1.455826086956522e-06,
"loss": 0.2449,
"step": 20380
},
{
"epoch": 1.414411445913722,
"grad_norm": 0.984375,
"learning_rate": 1.4540869565217391e-06,
"loss": 0.2189,
"step": 20390
},
{
"epoch": 1.4151051376544548,
"grad_norm": 1.390625,
"learning_rate": 1.4523478260869567e-06,
"loss": 0.2627,
"step": 20400
},
{
"epoch": 1.4157988293951875,
"grad_norm": 1.046875,
"learning_rate": 1.4506086956521739e-06,
"loss": 0.2063,
"step": 20410
},
{
"epoch": 1.4164925211359203,
"grad_norm": 1.0078125,
"learning_rate": 1.4488695652173915e-06,
"loss": 0.213,
"step": 20420
},
{
"epoch": 1.4171862128766528,
"grad_norm": 1.3671875,
"learning_rate": 1.4471304347826086e-06,
"loss": 0.2139,
"step": 20430
},
{
"epoch": 1.4178799046173856,
"grad_norm": 1.2265625,
"learning_rate": 1.4453913043478262e-06,
"loss": 0.2276,
"step": 20440
},
{
"epoch": 1.4185735963581183,
"grad_norm": 1.234375,
"learning_rate": 1.4436521739130436e-06,
"loss": 0.2084,
"step": 20450
},
{
"epoch": 1.419267288098851,
"grad_norm": 0.9765625,
"learning_rate": 1.441913043478261e-06,
"loss": 0.2437,
"step": 20460
},
{
"epoch": 1.4199609798395838,
"grad_norm": 1.15625,
"learning_rate": 1.4401739130434783e-06,
"loss": 0.2394,
"step": 20470
},
{
"epoch": 1.4206546715803166,
"grad_norm": 1.4765625,
"learning_rate": 1.438434782608696e-06,
"loss": 0.251,
"step": 20480
},
{
"epoch": 1.4213483633210493,
"grad_norm": 1.3125,
"learning_rate": 1.436695652173913e-06,
"loss": 0.288,
"step": 20490
},
{
"epoch": 1.422042055061782,
"grad_norm": 1.0390625,
"learning_rate": 1.4349565217391306e-06,
"loss": 0.2525,
"step": 20500
},
{
"epoch": 1.4227357468025146,
"grad_norm": 1.2578125,
"learning_rate": 1.4332173913043478e-06,
"loss": 0.2392,
"step": 20510
},
{
"epoch": 1.4234294385432473,
"grad_norm": 0.96484375,
"learning_rate": 1.4314782608695654e-06,
"loss": 0.2057,
"step": 20520
},
{
"epoch": 1.42412313028398,
"grad_norm": 1.5,
"learning_rate": 1.4297391304347828e-06,
"loss": 0.2368,
"step": 20530
},
{
"epoch": 1.4248168220247128,
"grad_norm": 1.265625,
"learning_rate": 1.4280000000000001e-06,
"loss": 0.2351,
"step": 20540
},
{
"epoch": 1.4255105137654454,
"grad_norm": 1.234375,
"learning_rate": 1.4262608695652175e-06,
"loss": 0.2066,
"step": 20550
},
{
"epoch": 1.426204205506178,
"grad_norm": 1.6328125,
"learning_rate": 1.424521739130435e-06,
"loss": 0.2405,
"step": 20560
},
{
"epoch": 1.4268978972469109,
"grad_norm": 1.328125,
"learning_rate": 1.4227826086956522e-06,
"loss": 0.2749,
"step": 20570
},
{
"epoch": 1.4275915889876436,
"grad_norm": 1.5078125,
"learning_rate": 1.4210434782608698e-06,
"loss": 0.2632,
"step": 20580
},
{
"epoch": 1.4282852807283763,
"grad_norm": 1.2109375,
"learning_rate": 1.419304347826087e-06,
"loss": 0.3155,
"step": 20590
},
{
"epoch": 1.428978972469109,
"grad_norm": 1.40625,
"learning_rate": 1.4175652173913046e-06,
"loss": 0.2016,
"step": 20600
},
{
"epoch": 1.4296726642098418,
"grad_norm": 1.1484375,
"learning_rate": 1.415826086956522e-06,
"loss": 0.2906,
"step": 20610
},
{
"epoch": 1.4303663559505746,
"grad_norm": 1.7890625,
"learning_rate": 1.4140869565217393e-06,
"loss": 0.2672,
"step": 20620
},
{
"epoch": 1.4310600476913071,
"grad_norm": 1.609375,
"learning_rate": 1.4123478260869567e-06,
"loss": 0.2527,
"step": 20630
},
{
"epoch": 1.4317537394320399,
"grad_norm": 1.1796875,
"learning_rate": 1.4106086956521738e-06,
"loss": 0.3494,
"step": 20640
},
{
"epoch": 1.4324474311727726,
"grad_norm": 0.8984375,
"learning_rate": 1.4088695652173914e-06,
"loss": 0.2156,
"step": 20650
},
{
"epoch": 1.4331411229135054,
"grad_norm": 1.2421875,
"learning_rate": 1.4071304347826086e-06,
"loss": 0.2444,
"step": 20660
},
{
"epoch": 1.433834814654238,
"grad_norm": 0.8828125,
"learning_rate": 1.4053913043478262e-06,
"loss": 0.2617,
"step": 20670
},
{
"epoch": 1.4345285063949706,
"grad_norm": 1.15625,
"learning_rate": 1.4036521739130435e-06,
"loss": 0.2533,
"step": 20680
},
{
"epoch": 1.4352221981357034,
"grad_norm": 1.1328125,
"learning_rate": 1.401913043478261e-06,
"loss": 0.2659,
"step": 20690
},
{
"epoch": 1.4359158898764361,
"grad_norm": 1.484375,
"learning_rate": 1.4001739130434783e-06,
"loss": 0.2591,
"step": 20700
},
{
"epoch": 1.436609581617169,
"grad_norm": 1.1953125,
"learning_rate": 1.3984347826086959e-06,
"loss": 0.2581,
"step": 20710
},
{
"epoch": 1.4373032733579016,
"grad_norm": 0.96484375,
"learning_rate": 1.396695652173913e-06,
"loss": 0.273,
"step": 20720
},
{
"epoch": 1.4379969650986344,
"grad_norm": 1.125,
"learning_rate": 1.3949565217391306e-06,
"loss": 0.2176,
"step": 20730
},
{
"epoch": 1.4386906568393671,
"grad_norm": 1.3359375,
"learning_rate": 1.3932173913043478e-06,
"loss": 0.2122,
"step": 20740
},
{
"epoch": 1.4393843485800997,
"grad_norm": 1.4140625,
"learning_rate": 1.3914782608695654e-06,
"loss": 0.2135,
"step": 20750
},
{
"epoch": 1.4400780403208324,
"grad_norm": 0.75,
"learning_rate": 1.3897391304347827e-06,
"loss": 0.2196,
"step": 20760
},
{
"epoch": 1.4407717320615652,
"grad_norm": 0.82421875,
"learning_rate": 1.388e-06,
"loss": 0.2397,
"step": 20770
},
{
"epoch": 1.441465423802298,
"grad_norm": 1.2578125,
"learning_rate": 1.3862608695652175e-06,
"loss": 0.2441,
"step": 20780
},
{
"epoch": 1.4421591155430304,
"grad_norm": 1.1484375,
"learning_rate": 1.384521739130435e-06,
"loss": 0.2584,
"step": 20790
},
{
"epoch": 1.4428528072837632,
"grad_norm": 1.15625,
"learning_rate": 1.3827826086956522e-06,
"loss": 0.2123,
"step": 20800
},
{
"epoch": 1.443546499024496,
"grad_norm": 0.96484375,
"learning_rate": 1.3810434782608698e-06,
"loss": 0.2157,
"step": 20810
},
{
"epoch": 1.4442401907652287,
"grad_norm": 1.4609375,
"learning_rate": 1.379304347826087e-06,
"loss": 0.2407,
"step": 20820
},
{
"epoch": 1.4449338825059614,
"grad_norm": 1.3203125,
"learning_rate": 1.3775652173913045e-06,
"loss": 0.2347,
"step": 20830
},
{
"epoch": 1.4456275742466942,
"grad_norm": 1.234375,
"learning_rate": 1.375826086956522e-06,
"loss": 0.2429,
"step": 20840
},
{
"epoch": 1.446321265987427,
"grad_norm": 1.1328125,
"learning_rate": 1.3740869565217393e-06,
"loss": 0.2441,
"step": 20850
},
{
"epoch": 1.4470149577281595,
"grad_norm": 1.421875,
"learning_rate": 1.3723478260869566e-06,
"loss": 0.2429,
"step": 20860
},
{
"epoch": 1.4477086494688922,
"grad_norm": 1.0859375,
"learning_rate": 1.3706086956521742e-06,
"loss": 0.2133,
"step": 20870
},
{
"epoch": 1.448402341209625,
"grad_norm": 1.0546875,
"learning_rate": 1.3688695652173914e-06,
"loss": 0.232,
"step": 20880
},
{
"epoch": 1.4490960329503577,
"grad_norm": 1.4375,
"learning_rate": 1.367130434782609e-06,
"loss": 0.2202,
"step": 20890
},
{
"epoch": 1.4497897246910905,
"grad_norm": 1.109375,
"learning_rate": 1.3653913043478261e-06,
"loss": 0.2187,
"step": 20900
},
{
"epoch": 1.450483416431823,
"grad_norm": 1.0078125,
"learning_rate": 1.3636521739130437e-06,
"loss": 0.2306,
"step": 20910
},
{
"epoch": 1.4511771081725557,
"grad_norm": 1.40625,
"learning_rate": 1.3619130434782609e-06,
"loss": 0.2472,
"step": 20920
},
{
"epoch": 1.4518707999132885,
"grad_norm": 1.78125,
"learning_rate": 1.3601739130434782e-06,
"loss": 0.2569,
"step": 20930
},
{
"epoch": 1.4525644916540212,
"grad_norm": 0.85546875,
"learning_rate": 1.3584347826086958e-06,
"loss": 0.2277,
"step": 20940
},
{
"epoch": 1.453258183394754,
"grad_norm": 1.140625,
"learning_rate": 1.356695652173913e-06,
"loss": 0.2238,
"step": 20950
},
{
"epoch": 1.4539518751354867,
"grad_norm": 1.375,
"learning_rate": 1.3549565217391306e-06,
"loss": 0.2185,
"step": 20960
},
{
"epoch": 1.4546455668762195,
"grad_norm": 1.2734375,
"learning_rate": 1.3532173913043477e-06,
"loss": 0.2834,
"step": 20970
},
{
"epoch": 1.455339258616952,
"grad_norm": 1.25,
"learning_rate": 1.3514782608695653e-06,
"loss": 0.2911,
"step": 20980
},
{
"epoch": 1.4560329503576848,
"grad_norm": 1.4453125,
"learning_rate": 1.3497391304347827e-06,
"loss": 0.2226,
"step": 20990
},
{
"epoch": 1.4567266420984175,
"grad_norm": 1.0546875,
"learning_rate": 1.348e-06,
"loss": 0.2248,
"step": 21000
},
{
"epoch": 1.4574203338391503,
"grad_norm": 1.1171875,
"learning_rate": 1.3462608695652174e-06,
"loss": 0.2437,
"step": 21010
},
{
"epoch": 1.458114025579883,
"grad_norm": 1.3125,
"learning_rate": 1.344521739130435e-06,
"loss": 0.2421,
"step": 21020
},
{
"epoch": 1.4588077173206155,
"grad_norm": 1.328125,
"learning_rate": 1.3427826086956522e-06,
"loss": 0.2161,
"step": 21030
},
{
"epoch": 1.4595014090613483,
"grad_norm": 1.34375,
"learning_rate": 1.3410434782608697e-06,
"loss": 0.245,
"step": 21040
},
{
"epoch": 1.460195100802081,
"grad_norm": 1.4140625,
"learning_rate": 1.339304347826087e-06,
"loss": 0.2432,
"step": 21050
},
{
"epoch": 1.4608887925428138,
"grad_norm": 1.5625,
"learning_rate": 1.3375652173913045e-06,
"loss": 0.2379,
"step": 21060
},
{
"epoch": 1.4615824842835465,
"grad_norm": 1.5859375,
"learning_rate": 1.3358260869565219e-06,
"loss": 0.2513,
"step": 21070
},
{
"epoch": 1.4622761760242793,
"grad_norm": 1.203125,
"learning_rate": 1.3340869565217392e-06,
"loss": 0.3091,
"step": 21080
},
{
"epoch": 1.462969867765012,
"grad_norm": 1.4296875,
"learning_rate": 1.3323478260869566e-06,
"loss": 0.2542,
"step": 21090
},
{
"epoch": 1.4636635595057446,
"grad_norm": 1.296875,
"learning_rate": 1.3306086956521742e-06,
"loss": 0.2138,
"step": 21100
},
{
"epoch": 1.4643572512464773,
"grad_norm": 1.1484375,
"learning_rate": 1.3288695652173913e-06,
"loss": 0.252,
"step": 21110
},
{
"epoch": 1.46505094298721,
"grad_norm": 1.25,
"learning_rate": 1.327130434782609e-06,
"loss": 0.2159,
"step": 21120
},
{
"epoch": 1.4657446347279428,
"grad_norm": 1.3359375,
"learning_rate": 1.325391304347826e-06,
"loss": 0.2535,
"step": 21130
},
{
"epoch": 1.4664383264686756,
"grad_norm": 1.6328125,
"learning_rate": 1.3236521739130437e-06,
"loss": 0.3247,
"step": 21140
},
{
"epoch": 1.467132018209408,
"grad_norm": 1.1328125,
"learning_rate": 1.3219130434782608e-06,
"loss": 0.2415,
"step": 21150
},
{
"epoch": 1.4678257099501408,
"grad_norm": 1.125,
"learning_rate": 1.3201739130434784e-06,
"loss": 0.2295,
"step": 21160
},
{
"epoch": 1.4685194016908736,
"grad_norm": 1.3828125,
"learning_rate": 1.3184347826086958e-06,
"loss": 0.2348,
"step": 21170
},
{
"epoch": 1.4692130934316063,
"grad_norm": 1.1796875,
"learning_rate": 1.3166956521739134e-06,
"loss": 0.2122,
"step": 21180
},
{
"epoch": 1.469906785172339,
"grad_norm": 1.078125,
"learning_rate": 1.3149565217391305e-06,
"loss": 0.2565,
"step": 21190
},
{
"epoch": 1.4706004769130718,
"grad_norm": 1.390625,
"learning_rate": 1.3132173913043481e-06,
"loss": 0.2724,
"step": 21200
},
{
"epoch": 1.4712941686538046,
"grad_norm": 1.140625,
"learning_rate": 1.3114782608695653e-06,
"loss": 0.2413,
"step": 21210
},
{
"epoch": 1.471987860394537,
"grad_norm": 1.7890625,
"learning_rate": 1.3097391304347829e-06,
"loss": 0.235,
"step": 21220
},
{
"epoch": 1.4726815521352699,
"grad_norm": 0.92578125,
"learning_rate": 1.308e-06,
"loss": 0.2664,
"step": 21230
},
{
"epoch": 1.4733752438760026,
"grad_norm": 1.2578125,
"learning_rate": 1.3062608695652174e-06,
"loss": 0.2151,
"step": 21240
},
{
"epoch": 1.4740689356167354,
"grad_norm": 1.34375,
"learning_rate": 1.304521739130435e-06,
"loss": 0.2598,
"step": 21250
},
{
"epoch": 1.474762627357468,
"grad_norm": 1.7265625,
"learning_rate": 1.3027826086956521e-06,
"loss": 0.2444,
"step": 21260
},
{
"epoch": 1.4754563190982006,
"grad_norm": 1.671875,
"learning_rate": 1.3010434782608697e-06,
"loss": 0.3088,
"step": 21270
},
{
"epoch": 1.4761500108389334,
"grad_norm": 1.1484375,
"learning_rate": 1.2993043478260869e-06,
"loss": 0.2525,
"step": 21280
},
{
"epoch": 1.4768437025796661,
"grad_norm": 1.2109375,
"learning_rate": 1.2975652173913045e-06,
"loss": 0.2769,
"step": 21290
},
{
"epoch": 1.4775373943203989,
"grad_norm": 1.125,
"learning_rate": 1.2958260869565218e-06,
"loss": 0.2298,
"step": 21300
},
{
"epoch": 1.4782310860611316,
"grad_norm": 2.3125,
"learning_rate": 1.2940869565217392e-06,
"loss": 0.3014,
"step": 21310
},
{
"epoch": 1.4789247778018644,
"grad_norm": 2.015625,
"learning_rate": 1.2923478260869566e-06,
"loss": 0.2391,
"step": 21320
},
{
"epoch": 1.4796184695425971,
"grad_norm": 1.1640625,
"learning_rate": 1.2906086956521741e-06,
"loss": 0.2179,
"step": 21330
},
{
"epoch": 1.4803121612833297,
"grad_norm": 1.25,
"learning_rate": 1.2888695652173913e-06,
"loss": 0.2062,
"step": 21340
},
{
"epoch": 1.4810058530240624,
"grad_norm": 1.453125,
"learning_rate": 1.2871304347826089e-06,
"loss": 0.2396,
"step": 21350
},
{
"epoch": 1.4816995447647952,
"grad_norm": 1.09375,
"learning_rate": 1.285391304347826e-06,
"loss": 0.2306,
"step": 21360
},
{
"epoch": 1.482393236505528,
"grad_norm": 1.0390625,
"learning_rate": 1.2836521739130436e-06,
"loss": 0.3046,
"step": 21370
},
{
"epoch": 1.4830869282462606,
"grad_norm": 1.1796875,
"learning_rate": 1.281913043478261e-06,
"loss": 0.2427,
"step": 21380
},
{
"epoch": 1.4837806199869932,
"grad_norm": 1.96875,
"learning_rate": 1.2801739130434784e-06,
"loss": 0.2876,
"step": 21390
},
{
"epoch": 1.484474311727726,
"grad_norm": 1.25,
"learning_rate": 1.2784347826086957e-06,
"loss": 0.2684,
"step": 21400
},
{
"epoch": 1.4851680034684587,
"grad_norm": 1.34375,
"learning_rate": 1.2766956521739133e-06,
"loss": 0.2441,
"step": 21410
},
{
"epoch": 1.4858616952091914,
"grad_norm": 1.15625,
"learning_rate": 1.2749565217391305e-06,
"loss": 0.2629,
"step": 21420
},
{
"epoch": 1.4865553869499242,
"grad_norm": 1.265625,
"learning_rate": 1.273217391304348e-06,
"loss": 0.3055,
"step": 21430
},
{
"epoch": 1.487249078690657,
"grad_norm": 1.7265625,
"learning_rate": 1.2714782608695652e-06,
"loss": 0.2489,
"step": 21440
},
{
"epoch": 1.4879427704313897,
"grad_norm": 1.0703125,
"learning_rate": 1.2697391304347828e-06,
"loss": 0.2639,
"step": 21450
},
{
"epoch": 1.4886364621721222,
"grad_norm": 1.2578125,
"learning_rate": 1.268e-06,
"loss": 0.2326,
"step": 21460
},
{
"epoch": 1.489330153912855,
"grad_norm": 1.2109375,
"learning_rate": 1.2662608695652176e-06,
"loss": 0.2768,
"step": 21470
},
{
"epoch": 1.4900238456535877,
"grad_norm": 1.3671875,
"learning_rate": 1.264521739130435e-06,
"loss": 0.2324,
"step": 21480
},
{
"epoch": 1.4907175373943204,
"grad_norm": 1.1953125,
"learning_rate": 1.2627826086956523e-06,
"loss": 0.2632,
"step": 21490
},
{
"epoch": 1.491411229135053,
"grad_norm": 1.34375,
"learning_rate": 1.2610434782608697e-06,
"loss": 0.1857,
"step": 21500
},
{
"epoch": 1.4921049208757857,
"grad_norm": 1.0078125,
"learning_rate": 1.2593043478260873e-06,
"loss": 0.2186,
"step": 21510
},
{
"epoch": 1.4927986126165185,
"grad_norm": 1.234375,
"learning_rate": 1.2575652173913044e-06,
"loss": 0.2557,
"step": 21520
},
{
"epoch": 1.4934923043572512,
"grad_norm": 1.234375,
"learning_rate": 1.255826086956522e-06,
"loss": 0.2264,
"step": 21530
},
{
"epoch": 1.494185996097984,
"grad_norm": 0.99609375,
"learning_rate": 1.2540869565217392e-06,
"loss": 0.2318,
"step": 21540
},
{
"epoch": 1.4948796878387167,
"grad_norm": 1.21875,
"learning_rate": 1.2523478260869565e-06,
"loss": 0.2262,
"step": 21550
},
{
"epoch": 1.4955733795794495,
"grad_norm": 1.3515625,
"learning_rate": 1.2506086956521741e-06,
"loss": 0.2534,
"step": 21560
},
{
"epoch": 1.4962670713201822,
"grad_norm": 1.328125,
"learning_rate": 1.2488695652173915e-06,
"loss": 0.2823,
"step": 21570
},
{
"epoch": 1.4969607630609147,
"grad_norm": 1.3671875,
"learning_rate": 1.2471304347826088e-06,
"loss": 0.2572,
"step": 21580
},
{
"epoch": 1.4976544548016475,
"grad_norm": 1.109375,
"learning_rate": 1.2453913043478262e-06,
"loss": 0.2499,
"step": 21590
},
{
"epoch": 1.4983481465423802,
"grad_norm": 0.96484375,
"learning_rate": 1.2436521739130436e-06,
"loss": 0.2307,
"step": 21600
},
{
"epoch": 1.499041838283113,
"grad_norm": 1.21875,
"learning_rate": 1.241913043478261e-06,
"loss": 0.2116,
"step": 21610
},
{
"epoch": 1.4997355300238455,
"grad_norm": 1.2109375,
"learning_rate": 1.2401739130434783e-06,
"loss": 0.2305,
"step": 21620
},
{
"epoch": 1.5004292217645783,
"grad_norm": 1.3203125,
"learning_rate": 1.2384347826086957e-06,
"loss": 0.3057,
"step": 21630
},
{
"epoch": 1.501122913505311,
"grad_norm": 1.3203125,
"learning_rate": 1.2366956521739133e-06,
"loss": 0.24,
"step": 21640
},
{
"epoch": 1.5018166052460438,
"grad_norm": 1.40625,
"learning_rate": 1.2349565217391307e-06,
"loss": 0.2924,
"step": 21650
},
{
"epoch": 1.5025102969867765,
"grad_norm": 1.125,
"learning_rate": 1.233217391304348e-06,
"loss": 0.2801,
"step": 21660
},
{
"epoch": 1.5032039887275093,
"grad_norm": 1.203125,
"learning_rate": 1.2314782608695654e-06,
"loss": 0.2273,
"step": 21670
},
{
"epoch": 1.503897680468242,
"grad_norm": 1.1640625,
"learning_rate": 1.2297391304347828e-06,
"loss": 0.2464,
"step": 21680
},
{
"epoch": 1.5045913722089748,
"grad_norm": 1.1953125,
"learning_rate": 1.2280000000000001e-06,
"loss": 0.284,
"step": 21690
},
{
"epoch": 1.5052850639497075,
"grad_norm": 1.140625,
"learning_rate": 1.2262608695652175e-06,
"loss": 0.2401,
"step": 21700
},
{
"epoch": 1.50597875569044,
"grad_norm": 1.1328125,
"learning_rate": 1.2245217391304349e-06,
"loss": 0.2709,
"step": 21710
},
{
"epoch": 1.5066724474311728,
"grad_norm": 1.1328125,
"learning_rate": 1.2227826086956523e-06,
"loss": 0.2462,
"step": 21720
},
{
"epoch": 1.5073661391719055,
"grad_norm": 1.28125,
"learning_rate": 1.2210434782608696e-06,
"loss": 0.2562,
"step": 21730
},
{
"epoch": 1.508059830912638,
"grad_norm": 0.9765625,
"learning_rate": 1.219304347826087e-06,
"loss": 0.2227,
"step": 21740
},
{
"epoch": 1.5087535226533708,
"grad_norm": 1.265625,
"learning_rate": 1.2175652173913044e-06,
"loss": 0.2564,
"step": 21750
},
{
"epoch": 1.5094472143941036,
"grad_norm": 0.921875,
"learning_rate": 1.2158260869565217e-06,
"loss": 0.2372,
"step": 21760
},
{
"epoch": 1.5101409061348363,
"grad_norm": 0.99609375,
"learning_rate": 1.2140869565217391e-06,
"loss": 0.2139,
"step": 21770
},
{
"epoch": 1.510834597875569,
"grad_norm": 1.3359375,
"learning_rate": 1.2123478260869565e-06,
"loss": 0.2311,
"step": 21780
},
{
"epoch": 1.5115282896163018,
"grad_norm": 1.1796875,
"learning_rate": 1.210608695652174e-06,
"loss": 0.2667,
"step": 21790
},
{
"epoch": 1.5122219813570346,
"grad_norm": 1.1171875,
"learning_rate": 1.2088695652173914e-06,
"loss": 0.2117,
"step": 21800
},
{
"epoch": 1.5129156730977673,
"grad_norm": 1.03125,
"learning_rate": 1.2071304347826088e-06,
"loss": 0.2567,
"step": 21810
},
{
"epoch": 1.5136093648384998,
"grad_norm": 1.3671875,
"learning_rate": 1.2053913043478262e-06,
"loss": 0.2249,
"step": 21820
},
{
"epoch": 1.5143030565792326,
"grad_norm": 1.125,
"learning_rate": 1.2036521739130436e-06,
"loss": 0.2845,
"step": 21830
},
{
"epoch": 1.5149967483199653,
"grad_norm": 1.359375,
"learning_rate": 1.201913043478261e-06,
"loss": 0.2194,
"step": 21840
},
{
"epoch": 1.5156904400606979,
"grad_norm": 1.2734375,
"learning_rate": 1.2001739130434783e-06,
"loss": 0.2333,
"step": 21850
},
{
"epoch": 1.5163841318014306,
"grad_norm": 1.234375,
"learning_rate": 1.1984347826086957e-06,
"loss": 0.2,
"step": 21860
},
{
"epoch": 1.5170778235421634,
"grad_norm": 1.1328125,
"learning_rate": 1.1966956521739132e-06,
"loss": 0.2649,
"step": 21870
},
{
"epoch": 1.517771515282896,
"grad_norm": 1.6171875,
"learning_rate": 1.1949565217391306e-06,
"loss": 0.2606,
"step": 21880
},
{
"epoch": 1.5184652070236289,
"grad_norm": 1.109375,
"learning_rate": 1.193217391304348e-06,
"loss": 0.2429,
"step": 21890
},
{
"epoch": 1.5191588987643616,
"grad_norm": 1.359375,
"learning_rate": 1.1914782608695654e-06,
"loss": 0.2327,
"step": 21900
},
{
"epoch": 1.5198525905050944,
"grad_norm": 1.0,
"learning_rate": 1.1897391304347827e-06,
"loss": 0.2417,
"step": 21910
},
{
"epoch": 1.520546282245827,
"grad_norm": 1.046875,
"learning_rate": 1.188e-06,
"loss": 0.2538,
"step": 21920
},
{
"epoch": 1.5212399739865599,
"grad_norm": 1.078125,
"learning_rate": 1.1862608695652175e-06,
"loss": 0.2746,
"step": 21930
},
{
"epoch": 1.5219336657272924,
"grad_norm": 1.3359375,
"learning_rate": 1.1845217391304348e-06,
"loss": 0.2102,
"step": 21940
},
{
"epoch": 1.5226273574680251,
"grad_norm": 1.2421875,
"learning_rate": 1.1827826086956522e-06,
"loss": 0.2367,
"step": 21950
},
{
"epoch": 1.5233210492087579,
"grad_norm": 0.85546875,
"learning_rate": 1.1810434782608698e-06,
"loss": 0.2139,
"step": 21960
},
{
"epoch": 1.5240147409494904,
"grad_norm": 1.15625,
"learning_rate": 1.1793043478260872e-06,
"loss": 0.3149,
"step": 21970
},
{
"epoch": 1.5247084326902232,
"grad_norm": 1.1796875,
"learning_rate": 1.1775652173913045e-06,
"loss": 0.2062,
"step": 21980
},
{
"epoch": 1.525402124430956,
"grad_norm": 1.46875,
"learning_rate": 1.175826086956522e-06,
"loss": 0.2454,
"step": 21990
},
{
"epoch": 1.5260958161716887,
"grad_norm": 1.0078125,
"learning_rate": 1.1740869565217393e-06,
"loss": 0.1977,
"step": 22000
},
{
"epoch": 1.5267895079124214,
"grad_norm": 1.5546875,
"learning_rate": 1.1723478260869567e-06,
"loss": 0.26,
"step": 22010
},
{
"epoch": 1.5274831996531542,
"grad_norm": 1.015625,
"learning_rate": 1.170608695652174e-06,
"loss": 0.2235,
"step": 22020
},
{
"epoch": 1.528176891393887,
"grad_norm": 1.265625,
"learning_rate": 1.1688695652173914e-06,
"loss": 0.2297,
"step": 22030
},
{
"epoch": 1.5288705831346197,
"grad_norm": 1.25,
"learning_rate": 1.1671304347826088e-06,
"loss": 0.2571,
"step": 22040
},
{
"epoch": 1.5295642748753524,
"grad_norm": 1.4453125,
"learning_rate": 1.1653913043478261e-06,
"loss": 0.2845,
"step": 22050
},
{
"epoch": 1.530257966616085,
"grad_norm": 1.453125,
"learning_rate": 1.1636521739130435e-06,
"loss": 0.2432,
"step": 22060
},
{
"epoch": 1.5309516583568177,
"grad_norm": 1.96875,
"learning_rate": 1.1619130434782609e-06,
"loss": 0.2807,
"step": 22070
},
{
"epoch": 1.5316453500975504,
"grad_norm": 1.09375,
"learning_rate": 1.1601739130434783e-06,
"loss": 0.222,
"step": 22080
},
{
"epoch": 1.532339041838283,
"grad_norm": 1.1875,
"learning_rate": 1.1584347826086956e-06,
"loss": 0.2292,
"step": 22090
},
{
"epoch": 1.5330327335790157,
"grad_norm": 1.8203125,
"learning_rate": 1.1566956521739132e-06,
"loss": 0.2663,
"step": 22100
},
{
"epoch": 1.5337264253197485,
"grad_norm": 1.3359375,
"learning_rate": 1.1549565217391306e-06,
"loss": 0.2528,
"step": 22110
},
{
"epoch": 1.5344201170604812,
"grad_norm": 1.484375,
"learning_rate": 1.153217391304348e-06,
"loss": 0.2552,
"step": 22120
},
{
"epoch": 1.535113808801214,
"grad_norm": 1.0859375,
"learning_rate": 1.1514782608695653e-06,
"loss": 0.1995,
"step": 22130
},
{
"epoch": 1.5358075005419467,
"grad_norm": 1.453125,
"learning_rate": 1.1497391304347827e-06,
"loss": 0.2302,
"step": 22140
},
{
"epoch": 1.5365011922826795,
"grad_norm": 1.1640625,
"learning_rate": 1.148e-06,
"loss": 0.2027,
"step": 22150
},
{
"epoch": 1.5371948840234122,
"grad_norm": 1.1875,
"learning_rate": 1.1462608695652174e-06,
"loss": 0.234,
"step": 22160
},
{
"epoch": 1.537888575764145,
"grad_norm": 1.3359375,
"learning_rate": 1.1445217391304348e-06,
"loss": 0.2325,
"step": 22170
},
{
"epoch": 1.5385822675048775,
"grad_norm": 1.703125,
"learning_rate": 1.1427826086956522e-06,
"loss": 0.2567,
"step": 22180
},
{
"epoch": 1.5392759592456102,
"grad_norm": 1.21875,
"learning_rate": 1.1410434782608698e-06,
"loss": 0.2226,
"step": 22190
},
{
"epoch": 1.539969650986343,
"grad_norm": 1.1015625,
"learning_rate": 1.1393043478260871e-06,
"loss": 0.2206,
"step": 22200
},
{
"epoch": 1.5406633427270755,
"grad_norm": 1.0234375,
"learning_rate": 1.1375652173913045e-06,
"loss": 0.2342,
"step": 22210
},
{
"epoch": 1.5413570344678083,
"grad_norm": 1.1875,
"learning_rate": 1.1358260869565219e-06,
"loss": 0.2695,
"step": 22220
},
{
"epoch": 1.542050726208541,
"grad_norm": 1.3671875,
"learning_rate": 1.1340869565217392e-06,
"loss": 0.2812,
"step": 22230
},
{
"epoch": 1.5427444179492737,
"grad_norm": 0.95703125,
"learning_rate": 1.1323478260869566e-06,
"loss": 0.2701,
"step": 22240
},
{
"epoch": 1.5434381096900065,
"grad_norm": 1.3046875,
"learning_rate": 1.130608695652174e-06,
"loss": 0.2258,
"step": 22250
},
{
"epoch": 1.5441318014307392,
"grad_norm": 0.96484375,
"learning_rate": 1.1288695652173914e-06,
"loss": 0.2496,
"step": 22260
},
{
"epoch": 1.544825493171472,
"grad_norm": 1.1796875,
"learning_rate": 1.127130434782609e-06,
"loss": 0.2058,
"step": 22270
},
{
"epoch": 1.5455191849122047,
"grad_norm": 1.265625,
"learning_rate": 1.1253913043478263e-06,
"loss": 0.2076,
"step": 22280
},
{
"epoch": 1.5462128766529375,
"grad_norm": 0.8984375,
"learning_rate": 1.1236521739130437e-06,
"loss": 0.2568,
"step": 22290
},
{
"epoch": 1.54690656839367,
"grad_norm": 1.2578125,
"learning_rate": 1.121913043478261e-06,
"loss": 0.2804,
"step": 22300
},
{
"epoch": 1.5476002601344028,
"grad_norm": 1.109375,
"learning_rate": 1.1201739130434784e-06,
"loss": 0.2401,
"step": 22310
},
{
"epoch": 1.5482939518751355,
"grad_norm": 0.953125,
"learning_rate": 1.1184347826086958e-06,
"loss": 0.2465,
"step": 22320
},
{
"epoch": 1.548987643615868,
"grad_norm": 1.171875,
"learning_rate": 1.1166956521739132e-06,
"loss": 0.3361,
"step": 22330
},
{
"epoch": 1.5496813353566008,
"grad_norm": 1.3359375,
"learning_rate": 1.1149565217391305e-06,
"loss": 0.2217,
"step": 22340
},
{
"epoch": 1.5503750270973335,
"grad_norm": 1.1875,
"learning_rate": 1.113217391304348e-06,
"loss": 0.2402,
"step": 22350
},
{
"epoch": 1.5510687188380663,
"grad_norm": 1.09375,
"learning_rate": 1.1114782608695653e-06,
"loss": 0.2065,
"step": 22360
},
{
"epoch": 1.551762410578799,
"grad_norm": 0.921875,
"learning_rate": 1.1097391304347827e-06,
"loss": 0.237,
"step": 22370
},
{
"epoch": 1.5524561023195318,
"grad_norm": 1.03125,
"learning_rate": 1.108e-06,
"loss": 0.3074,
"step": 22380
},
{
"epoch": 1.5531497940602645,
"grad_norm": 1.1796875,
"learning_rate": 1.1062608695652174e-06,
"loss": 0.2363,
"step": 22390
},
{
"epoch": 1.5538434858009973,
"grad_norm": 1.328125,
"learning_rate": 1.1045217391304348e-06,
"loss": 0.3468,
"step": 22400
},
{
"epoch": 1.55453717754173,
"grad_norm": 1.578125,
"learning_rate": 1.1027826086956521e-06,
"loss": 0.2699,
"step": 22410
},
{
"epoch": 1.5552308692824626,
"grad_norm": 1.375,
"learning_rate": 1.1010434782608697e-06,
"loss": 0.2687,
"step": 22420
},
{
"epoch": 1.5559245610231953,
"grad_norm": 0.9375,
"learning_rate": 1.099304347826087e-06,
"loss": 0.2097,
"step": 22430
},
{
"epoch": 1.556618252763928,
"grad_norm": 1.109375,
"learning_rate": 1.0975652173913045e-06,
"loss": 0.2544,
"step": 22440
},
{
"epoch": 1.5573119445046606,
"grad_norm": 1.1015625,
"learning_rate": 1.0958260869565218e-06,
"loss": 0.229,
"step": 22450
},
{
"epoch": 1.5580056362453933,
"grad_norm": 1.15625,
"learning_rate": 1.0940869565217392e-06,
"loss": 0.3094,
"step": 22460
},
{
"epoch": 1.558699327986126,
"grad_norm": 0.9296875,
"learning_rate": 1.0923478260869566e-06,
"loss": 0.1948,
"step": 22470
},
{
"epoch": 1.5593930197268588,
"grad_norm": 1.4375,
"learning_rate": 1.090608695652174e-06,
"loss": 0.2203,
"step": 22480
},
{
"epoch": 1.5600867114675916,
"grad_norm": 1.1328125,
"learning_rate": 1.0888695652173913e-06,
"loss": 0.2523,
"step": 22490
},
{
"epoch": 1.5607804032083243,
"grad_norm": 1.203125,
"learning_rate": 1.087130434782609e-06,
"loss": 0.2415,
"step": 22500
},
{
"epoch": 1.561474094949057,
"grad_norm": 1.15625,
"learning_rate": 1.0853913043478263e-06,
"loss": 0.268,
"step": 22510
},
{
"epoch": 1.5621677866897898,
"grad_norm": 1.4453125,
"learning_rate": 1.0836521739130436e-06,
"loss": 0.2474,
"step": 22520
},
{
"epoch": 1.5628614784305226,
"grad_norm": 1.15625,
"learning_rate": 1.081913043478261e-06,
"loss": 0.3104,
"step": 22530
},
{
"epoch": 1.5635551701712551,
"grad_norm": 1.0703125,
"learning_rate": 1.0801739130434784e-06,
"loss": 0.2487,
"step": 22540
},
{
"epoch": 1.5642488619119879,
"grad_norm": 1.21875,
"learning_rate": 1.0784347826086958e-06,
"loss": 0.2224,
"step": 22550
},
{
"epoch": 1.5649425536527206,
"grad_norm": 1.046875,
"learning_rate": 1.0766956521739131e-06,
"loss": 0.2211,
"step": 22560
},
{
"epoch": 1.5656362453934531,
"grad_norm": 0.99609375,
"learning_rate": 1.0749565217391305e-06,
"loss": 0.2382,
"step": 22570
},
{
"epoch": 1.566329937134186,
"grad_norm": 1.5078125,
"learning_rate": 1.0732173913043479e-06,
"loss": 0.2285,
"step": 22580
},
{
"epoch": 1.5670236288749186,
"grad_norm": 1.5078125,
"learning_rate": 1.0714782608695655e-06,
"loss": 0.3084,
"step": 22590
},
{
"epoch": 1.5677173206156514,
"grad_norm": 1.140625,
"learning_rate": 1.0697391304347828e-06,
"loss": 0.2316,
"step": 22600
},
{
"epoch": 1.5684110123563841,
"grad_norm": 1.203125,
"learning_rate": 1.0680000000000002e-06,
"loss": 0.2359,
"step": 22610
},
{
"epoch": 1.5691047040971169,
"grad_norm": 1.0390625,
"learning_rate": 1.0662608695652176e-06,
"loss": 0.3312,
"step": 22620
},
{
"epoch": 1.5697983958378496,
"grad_norm": 1.2734375,
"learning_rate": 1.064521739130435e-06,
"loss": 0.2094,
"step": 22630
},
{
"epoch": 1.5704920875785824,
"grad_norm": 1.2265625,
"learning_rate": 1.062782608695652e-06,
"loss": 0.2005,
"step": 22640
},
{
"epoch": 1.5711857793193151,
"grad_norm": 1.3671875,
"learning_rate": 1.0610434782608697e-06,
"loss": 0.2151,
"step": 22650
},
{
"epoch": 1.5718794710600477,
"grad_norm": 1.28125,
"learning_rate": 1.059304347826087e-06,
"loss": 0.226,
"step": 22660
},
{
"epoch": 1.5725731628007804,
"grad_norm": 1.09375,
"learning_rate": 1.0575652173913044e-06,
"loss": 0.2764,
"step": 22670
},
{
"epoch": 1.5732668545415132,
"grad_norm": 1.1484375,
"learning_rate": 1.0558260869565218e-06,
"loss": 0.2149,
"step": 22680
},
{
"epoch": 1.5739605462822457,
"grad_norm": 0.8046875,
"learning_rate": 1.0540869565217392e-06,
"loss": 0.2572,
"step": 22690
},
{
"epoch": 1.5746542380229784,
"grad_norm": 1.359375,
"learning_rate": 1.0523478260869565e-06,
"loss": 0.2572,
"step": 22700
},
{
"epoch": 1.5753479297637112,
"grad_norm": 1.0703125,
"learning_rate": 1.050608695652174e-06,
"loss": 0.241,
"step": 22710
},
{
"epoch": 1.576041621504444,
"grad_norm": 1.078125,
"learning_rate": 1.0488695652173913e-06,
"loss": 0.2464,
"step": 22720
},
{
"epoch": 1.5767353132451767,
"grad_norm": 1.5,
"learning_rate": 1.0471304347826089e-06,
"loss": 0.2494,
"step": 22730
},
{
"epoch": 1.5774290049859094,
"grad_norm": 1.1640625,
"learning_rate": 1.0453913043478262e-06,
"loss": 0.314,
"step": 22740
},
{
"epoch": 1.5781226967266422,
"grad_norm": 1.203125,
"learning_rate": 1.0436521739130436e-06,
"loss": 0.231,
"step": 22750
},
{
"epoch": 1.578816388467375,
"grad_norm": 1.4453125,
"learning_rate": 1.041913043478261e-06,
"loss": 0.3283,
"step": 22760
},
{
"epoch": 1.5795100802081077,
"grad_norm": 1.0859375,
"learning_rate": 1.0401739130434783e-06,
"loss": 0.3071,
"step": 22770
},
{
"epoch": 1.5802037719488402,
"grad_norm": 1.125,
"learning_rate": 1.0384347826086957e-06,
"loss": 0.2743,
"step": 22780
},
{
"epoch": 1.580897463689573,
"grad_norm": 1.2890625,
"learning_rate": 1.036695652173913e-06,
"loss": 0.253,
"step": 22790
},
{
"epoch": 1.5815911554303057,
"grad_norm": 1.046875,
"learning_rate": 1.0349565217391305e-06,
"loss": 0.2143,
"step": 22800
},
{
"epoch": 1.5822848471710382,
"grad_norm": 1.140625,
"learning_rate": 1.0332173913043478e-06,
"loss": 0.2481,
"step": 22810
},
{
"epoch": 1.582978538911771,
"grad_norm": 1.375,
"learning_rate": 1.0314782608695654e-06,
"loss": 0.2572,
"step": 22820
},
{
"epoch": 1.5836722306525037,
"grad_norm": 1.171875,
"learning_rate": 1.0297391304347828e-06,
"loss": 0.2987,
"step": 22830
},
{
"epoch": 1.5843659223932365,
"grad_norm": 1.375,
"learning_rate": 1.0280000000000002e-06,
"loss": 0.2331,
"step": 22840
},
{
"epoch": 1.5850596141339692,
"grad_norm": 0.95703125,
"learning_rate": 1.0262608695652175e-06,
"loss": 0.2076,
"step": 22850
},
{
"epoch": 1.585753305874702,
"grad_norm": 1.125,
"learning_rate": 1.024521739130435e-06,
"loss": 0.2365,
"step": 22860
},
{
"epoch": 1.5864469976154347,
"grad_norm": 1.8828125,
"learning_rate": 1.0227826086956523e-06,
"loss": 0.2808,
"step": 22870
},
{
"epoch": 1.5871406893561675,
"grad_norm": 1.2265625,
"learning_rate": 1.0210434782608696e-06,
"loss": 0.2484,
"step": 22880
},
{
"epoch": 1.5878343810969002,
"grad_norm": 1.625,
"learning_rate": 1.019304347826087e-06,
"loss": 0.2585,
"step": 22890
},
{
"epoch": 1.5885280728376328,
"grad_norm": 1.1015625,
"learning_rate": 1.0175652173913044e-06,
"loss": 0.2316,
"step": 22900
},
{
"epoch": 1.5892217645783655,
"grad_norm": 1.6015625,
"learning_rate": 1.015826086956522e-06,
"loss": 0.2097,
"step": 22910
},
{
"epoch": 1.5899154563190983,
"grad_norm": 1.3203125,
"learning_rate": 1.0140869565217393e-06,
"loss": 0.275,
"step": 22920
},
{
"epoch": 1.5906091480598308,
"grad_norm": 1.359375,
"learning_rate": 1.0123478260869567e-06,
"loss": 0.2504,
"step": 22930
},
{
"epoch": 1.5913028398005635,
"grad_norm": 1.1953125,
"learning_rate": 1.0106086956521739e-06,
"loss": 0.2771,
"step": 22940
},
{
"epoch": 1.5919965315412963,
"grad_norm": 1.375,
"learning_rate": 1.0088695652173912e-06,
"loss": 0.2555,
"step": 22950
},
{
"epoch": 1.592690223282029,
"grad_norm": 1.2421875,
"learning_rate": 1.0071304347826088e-06,
"loss": 0.2447,
"step": 22960
},
{
"epoch": 1.5933839150227618,
"grad_norm": 1.015625,
"learning_rate": 1.0053913043478262e-06,
"loss": 0.2536,
"step": 22970
},
{
"epoch": 1.5940776067634945,
"grad_norm": 1.1328125,
"learning_rate": 1.0036521739130436e-06,
"loss": 0.2336,
"step": 22980
},
{
"epoch": 1.5947712985042273,
"grad_norm": 1.4765625,
"learning_rate": 1.001913043478261e-06,
"loss": 0.2328,
"step": 22990
},
{
"epoch": 1.59546499024496,
"grad_norm": 1.703125,
"learning_rate": 1.0001739130434783e-06,
"loss": 0.2339,
"step": 23000
},
{
"epoch": 1.5961586819856928,
"grad_norm": 1.4609375,
"learning_rate": 9.984347826086957e-07,
"loss": 0.2749,
"step": 23010
},
{
"epoch": 1.5968523737264253,
"grad_norm": 1.390625,
"learning_rate": 9.96695652173913e-07,
"loss": 0.2484,
"step": 23020
},
{
"epoch": 1.597546065467158,
"grad_norm": 1.2734375,
"learning_rate": 9.949565217391304e-07,
"loss": 0.2099,
"step": 23030
},
{
"epoch": 1.5982397572078908,
"grad_norm": 2.0,
"learning_rate": 9.932173913043478e-07,
"loss": 0.2897,
"step": 23040
},
{
"epoch": 1.5989334489486233,
"grad_norm": 1.296875,
"learning_rate": 9.914782608695654e-07,
"loss": 0.2628,
"step": 23050
},
{
"epoch": 1.599627140689356,
"grad_norm": 1.15625,
"learning_rate": 9.897391304347827e-07,
"loss": 0.1823,
"step": 23060
},
{
"epoch": 1.6003208324300888,
"grad_norm": 0.796875,
"learning_rate": 9.880000000000001e-07,
"loss": 0.2547,
"step": 23070
},
{
"epoch": 1.6010145241708216,
"grad_norm": 1.1328125,
"learning_rate": 9.862608695652175e-07,
"loss": 0.2082,
"step": 23080
},
{
"epoch": 1.6017082159115543,
"grad_norm": 0.78515625,
"learning_rate": 9.845217391304349e-07,
"loss": 0.2021,
"step": 23090
},
{
"epoch": 1.602401907652287,
"grad_norm": 1.0859375,
"learning_rate": 9.827826086956522e-07,
"loss": 0.2168,
"step": 23100
},
{
"epoch": 1.6030955993930198,
"grad_norm": 1.3984375,
"learning_rate": 9.810434782608696e-07,
"loss": 0.3124,
"step": 23110
},
{
"epoch": 1.6037892911337526,
"grad_norm": 1.234375,
"learning_rate": 9.79304347826087e-07,
"loss": 0.2166,
"step": 23120
},
{
"epoch": 1.604482982874485,
"grad_norm": 0.9609375,
"learning_rate": 9.775652173913043e-07,
"loss": 0.2531,
"step": 23130
},
{
"epoch": 1.6051766746152178,
"grad_norm": 1.109375,
"learning_rate": 9.75826086956522e-07,
"loss": 0.3279,
"step": 23140
},
{
"epoch": 1.6058703663559506,
"grad_norm": 1.453125,
"learning_rate": 9.740869565217393e-07,
"loss": 0.242,
"step": 23150
},
{
"epoch": 1.6065640580966831,
"grad_norm": 1.078125,
"learning_rate": 9.723478260869567e-07,
"loss": 0.1916,
"step": 23160
},
{
"epoch": 1.6072577498374159,
"grad_norm": 1.53125,
"learning_rate": 9.70608695652174e-07,
"loss": 0.3144,
"step": 23170
},
{
"epoch": 1.6079514415781486,
"grad_norm": 1.15625,
"learning_rate": 9.688695652173914e-07,
"loss": 0.251,
"step": 23180
},
{
"epoch": 1.6086451333188814,
"grad_norm": 1.359375,
"learning_rate": 9.671304347826088e-07,
"loss": 0.2514,
"step": 23190
},
{
"epoch": 1.6093388250596141,
"grad_norm": 1.8984375,
"learning_rate": 9.653913043478261e-07,
"loss": 0.3102,
"step": 23200
},
{
"epoch": 1.6100325168003469,
"grad_norm": 1.2421875,
"learning_rate": 9.636521739130435e-07,
"loss": 0.2325,
"step": 23210
},
{
"epoch": 1.6107262085410796,
"grad_norm": 0.984375,
"learning_rate": 9.61913043478261e-07,
"loss": 0.2195,
"step": 23220
},
{
"epoch": 1.6114199002818124,
"grad_norm": 1.28125,
"learning_rate": 9.601739130434785e-07,
"loss": 0.2705,
"step": 23230
},
{
"epoch": 1.6121135920225451,
"grad_norm": 1.0234375,
"learning_rate": 9.584347826086958e-07,
"loss": 0.2394,
"step": 23240
},
{
"epoch": 1.6128072837632776,
"grad_norm": 1.2578125,
"learning_rate": 9.56695652173913e-07,
"loss": 0.2567,
"step": 23250
},
{
"epoch": 1.6135009755040104,
"grad_norm": 0.9453125,
"learning_rate": 9.549565217391304e-07,
"loss": 0.2116,
"step": 23260
},
{
"epoch": 1.6141946672447431,
"grad_norm": 1.125,
"learning_rate": 9.532173913043479e-07,
"loss": 0.2329,
"step": 23270
},
{
"epoch": 1.6148883589854757,
"grad_norm": 1.4765625,
"learning_rate": 9.514782608695652e-07,
"loss": 0.2868,
"step": 23280
},
{
"epoch": 1.6155820507262084,
"grad_norm": 1.1796875,
"learning_rate": 9.497391304347826e-07,
"loss": 0.289,
"step": 23290
},
{
"epoch": 1.6162757424669412,
"grad_norm": 1.171875,
"learning_rate": 9.480000000000001e-07,
"loss": 0.2032,
"step": 23300
},
{
"epoch": 1.616969434207674,
"grad_norm": 1.2890625,
"learning_rate": 9.462608695652174e-07,
"loss": 0.257,
"step": 23310
},
{
"epoch": 1.6176631259484067,
"grad_norm": 1.3046875,
"learning_rate": 9.445217391304348e-07,
"loss": 0.2119,
"step": 23320
},
{
"epoch": 1.6183568176891394,
"grad_norm": 1.296875,
"learning_rate": 9.427826086956522e-07,
"loss": 0.247,
"step": 23330
},
{
"epoch": 1.6190505094298722,
"grad_norm": 1.296875,
"learning_rate": 9.410434782608697e-07,
"loss": 0.2446,
"step": 23340
},
{
"epoch": 1.619744201170605,
"grad_norm": 1.5625,
"learning_rate": 9.39304347826087e-07,
"loss": 0.2619,
"step": 23350
},
{
"epoch": 1.6204378929113377,
"grad_norm": 1.390625,
"learning_rate": 9.375652173913044e-07,
"loss": 0.257,
"step": 23360
},
{
"epoch": 1.6211315846520702,
"grad_norm": 1.296875,
"learning_rate": 9.358260869565218e-07,
"loss": 0.2905,
"step": 23370
},
{
"epoch": 1.621825276392803,
"grad_norm": 1.0703125,
"learning_rate": 9.340869565217391e-07,
"loss": 0.2398,
"step": 23380
},
{
"epoch": 1.6225189681335357,
"grad_norm": 1.2421875,
"learning_rate": 9.323478260869566e-07,
"loss": 0.2423,
"step": 23390
},
{
"epoch": 1.6232126598742682,
"grad_norm": 1.3046875,
"learning_rate": 9.30608695652174e-07,
"loss": 0.2582,
"step": 23400
},
{
"epoch": 1.623906351615001,
"grad_norm": 1.3359375,
"learning_rate": 9.288695652173914e-07,
"loss": 0.2746,
"step": 23410
},
{
"epoch": 1.6246000433557337,
"grad_norm": 1.125,
"learning_rate": 9.271304347826087e-07,
"loss": 0.2465,
"step": 23420
},
{
"epoch": 1.6252937350964665,
"grad_norm": 1.296875,
"learning_rate": 9.253913043478262e-07,
"loss": 0.223,
"step": 23430
},
{
"epoch": 1.6259874268371992,
"grad_norm": 1.203125,
"learning_rate": 9.236521739130436e-07,
"loss": 0.2014,
"step": 23440
},
{
"epoch": 1.626681118577932,
"grad_norm": 1.21875,
"learning_rate": 9.21913043478261e-07,
"loss": 0.2196,
"step": 23450
},
{
"epoch": 1.6273748103186647,
"grad_norm": 0.9765625,
"learning_rate": 9.201739130434783e-07,
"loss": 0.2466,
"step": 23460
},
{
"epoch": 1.6280685020593975,
"grad_norm": 1.03125,
"learning_rate": 9.184347826086958e-07,
"loss": 0.2713,
"step": 23470
},
{
"epoch": 1.6287621938001302,
"grad_norm": 0.7734375,
"learning_rate": 9.166956521739132e-07,
"loss": 0.2448,
"step": 23480
},
{
"epoch": 1.6294558855408627,
"grad_norm": 1.8203125,
"learning_rate": 9.149565217391305e-07,
"loss": 0.2153,
"step": 23490
},
{
"epoch": 1.6301495772815955,
"grad_norm": 1.1484375,
"learning_rate": 9.132173913043479e-07,
"loss": 0.2345,
"step": 23500
},
{
"epoch": 1.6308432690223282,
"grad_norm": 1.484375,
"learning_rate": 9.114782608695653e-07,
"loss": 0.2398,
"step": 23510
},
{
"epoch": 1.6315369607630608,
"grad_norm": 1.546875,
"learning_rate": 9.097391304347828e-07,
"loss": 0.2641,
"step": 23520
},
{
"epoch": 1.6322306525037935,
"grad_norm": 1.265625,
"learning_rate": 9.080000000000001e-07,
"loss": 0.216,
"step": 23530
},
{
"epoch": 1.6329243442445263,
"grad_norm": 1.109375,
"learning_rate": 9.062608695652175e-07,
"loss": 0.2306,
"step": 23540
},
{
"epoch": 1.633618035985259,
"grad_norm": 1.2109375,
"learning_rate": 9.045217391304349e-07,
"loss": 0.233,
"step": 23550
},
{
"epoch": 1.6343117277259918,
"grad_norm": 1.1953125,
"learning_rate": 9.027826086956521e-07,
"loss": 0.2194,
"step": 23560
},
{
"epoch": 1.6350054194667245,
"grad_norm": 0.96484375,
"learning_rate": 9.010434782608696e-07,
"loss": 0.1982,
"step": 23570
},
{
"epoch": 1.6356991112074573,
"grad_norm": 1.40625,
"learning_rate": 8.99304347826087e-07,
"loss": 0.2591,
"step": 23580
},
{
"epoch": 1.63639280294819,
"grad_norm": 1.3046875,
"learning_rate": 8.975652173913044e-07,
"loss": 0.2702,
"step": 23590
},
{
"epoch": 1.6370864946889228,
"grad_norm": 1.5625,
"learning_rate": 8.958260869565217e-07,
"loss": 0.2622,
"step": 23600
},
{
"epoch": 1.6377801864296553,
"grad_norm": 0.9453125,
"learning_rate": 8.940869565217391e-07,
"loss": 0.2386,
"step": 23610
},
{
"epoch": 1.638473878170388,
"grad_norm": 1.0234375,
"learning_rate": 8.923478260869566e-07,
"loss": 0.2167,
"step": 23620
},
{
"epoch": 1.6391675699111208,
"grad_norm": 1.109375,
"learning_rate": 8.90608695652174e-07,
"loss": 0.2375,
"step": 23630
},
{
"epoch": 1.6398612616518533,
"grad_norm": 1.21875,
"learning_rate": 8.888695652173913e-07,
"loss": 0.2261,
"step": 23640
},
{
"epoch": 1.640554953392586,
"grad_norm": 1.1953125,
"learning_rate": 8.871304347826087e-07,
"loss": 0.2658,
"step": 23650
},
{
"epoch": 1.6412486451333188,
"grad_norm": 1.0625,
"learning_rate": 8.853913043478262e-07,
"loss": 0.2705,
"step": 23660
},
{
"epoch": 1.6419423368740516,
"grad_norm": 1.0078125,
"learning_rate": 8.836521739130435e-07,
"loss": 0.2295,
"step": 23670
},
{
"epoch": 1.6426360286147843,
"grad_norm": 0.99609375,
"learning_rate": 8.819130434782609e-07,
"loss": 0.2608,
"step": 23680
},
{
"epoch": 1.643329720355517,
"grad_norm": 0.984375,
"learning_rate": 8.801739130434783e-07,
"loss": 0.2073,
"step": 23690
},
{
"epoch": 1.6440234120962498,
"grad_norm": 1.25,
"learning_rate": 8.784347826086958e-07,
"loss": 0.2729,
"step": 23700
},
{
"epoch": 1.6447171038369826,
"grad_norm": 1.09375,
"learning_rate": 8.766956521739131e-07,
"loss": 0.2614,
"step": 23710
},
{
"epoch": 1.6454107955777153,
"grad_norm": 1.09375,
"learning_rate": 8.749565217391305e-07,
"loss": 0.2278,
"step": 23720
},
{
"epoch": 1.6461044873184478,
"grad_norm": 1.2109375,
"learning_rate": 8.732173913043479e-07,
"loss": 0.2365,
"step": 23730
},
{
"epoch": 1.6467981790591806,
"grad_norm": 1.3125,
"learning_rate": 8.714782608695654e-07,
"loss": 0.2512,
"step": 23740
},
{
"epoch": 1.6474918707999133,
"grad_norm": 1.46875,
"learning_rate": 8.697391304347827e-07,
"loss": 0.3113,
"step": 23750
},
{
"epoch": 1.6481855625406459,
"grad_norm": 1.375,
"learning_rate": 8.680000000000001e-07,
"loss": 0.2599,
"step": 23760
},
{
"epoch": 1.6488792542813786,
"grad_norm": 0.9921875,
"learning_rate": 8.662608695652175e-07,
"loss": 0.2318,
"step": 23770
},
{
"epoch": 1.6495729460221114,
"grad_norm": 0.96484375,
"learning_rate": 8.645217391304348e-07,
"loss": 0.2484,
"step": 23780
},
{
"epoch": 1.650266637762844,
"grad_norm": 1.609375,
"learning_rate": 8.627826086956523e-07,
"loss": 0.2319,
"step": 23790
},
{
"epoch": 1.6509603295035769,
"grad_norm": 1.1953125,
"learning_rate": 8.610434782608697e-07,
"loss": 0.2098,
"step": 23800
},
{
"epoch": 1.6516540212443096,
"grad_norm": 1.0078125,
"learning_rate": 8.593043478260871e-07,
"loss": 0.2516,
"step": 23810
},
{
"epoch": 1.6523477129850423,
"grad_norm": 0.9765625,
"learning_rate": 8.575652173913044e-07,
"loss": 0.205,
"step": 23820
},
{
"epoch": 1.653041404725775,
"grad_norm": 1.4375,
"learning_rate": 8.558260869565219e-07,
"loss": 0.2468,
"step": 23830
},
{
"epoch": 1.6537350964665078,
"grad_norm": 1.375,
"learning_rate": 8.540869565217393e-07,
"loss": 0.2423,
"step": 23840
},
{
"epoch": 1.6544287882072404,
"grad_norm": 1.171875,
"learning_rate": 8.523478260869566e-07,
"loss": 0.2577,
"step": 23850
},
{
"epoch": 1.6551224799479731,
"grad_norm": 1.296875,
"learning_rate": 8.50608695652174e-07,
"loss": 0.2681,
"step": 23860
},
{
"epoch": 1.6558161716887059,
"grad_norm": 1.234375,
"learning_rate": 8.488695652173913e-07,
"loss": 0.2362,
"step": 23870
},
{
"epoch": 1.6565098634294384,
"grad_norm": 1.3203125,
"learning_rate": 8.471304347826087e-07,
"loss": 0.2296,
"step": 23880
},
{
"epoch": 1.6572035551701711,
"grad_norm": 1.265625,
"learning_rate": 8.453913043478261e-07,
"loss": 0.2451,
"step": 23890
},
{
"epoch": 1.657897246910904,
"grad_norm": 1.140625,
"learning_rate": 8.436521739130435e-07,
"loss": 0.2586,
"step": 23900
},
{
"epoch": 1.6585909386516366,
"grad_norm": 1.3203125,
"learning_rate": 8.419130434782609e-07,
"loss": 0.1875,
"step": 23910
},
{
"epoch": 1.6592846303923694,
"grad_norm": 1.2109375,
"learning_rate": 8.401739130434782e-07,
"loss": 0.2252,
"step": 23920
},
{
"epoch": 1.6599783221331021,
"grad_norm": 1.4375,
"learning_rate": 8.384347826086957e-07,
"loss": 0.2808,
"step": 23930
},
{
"epoch": 1.660672013873835,
"grad_norm": 1.3125,
"learning_rate": 8.366956521739131e-07,
"loss": 0.2576,
"step": 23940
},
{
"epoch": 1.6613657056145676,
"grad_norm": 1.1484375,
"learning_rate": 8.349565217391305e-07,
"loss": 0.2447,
"step": 23950
},
{
"epoch": 1.6620593973553004,
"grad_norm": 1.4921875,
"learning_rate": 8.332173913043478e-07,
"loss": 0.2325,
"step": 23960
},
{
"epoch": 1.662753089096033,
"grad_norm": 1.265625,
"learning_rate": 8.314782608695653e-07,
"loss": 0.1994,
"step": 23970
},
{
"epoch": 1.6634467808367657,
"grad_norm": 1.109375,
"learning_rate": 8.297391304347827e-07,
"loss": 0.2345,
"step": 23980
},
{
"epoch": 1.6641404725774984,
"grad_norm": 1.4453125,
"learning_rate": 8.280000000000001e-07,
"loss": 0.3115,
"step": 23990
},
{
"epoch": 1.664834164318231,
"grad_norm": 1.09375,
"learning_rate": 8.262608695652174e-07,
"loss": 0.2616,
"step": 24000
},
{
"epoch": 1.6655278560589637,
"grad_norm": 1.390625,
"learning_rate": 8.245217391304348e-07,
"loss": 0.2968,
"step": 24010
},
{
"epoch": 1.6662215477996964,
"grad_norm": 1.28125,
"learning_rate": 8.227826086956523e-07,
"loss": 0.2652,
"step": 24020
},
{
"epoch": 1.6669152395404292,
"grad_norm": 1.0625,
"learning_rate": 8.210434782608696e-07,
"loss": 0.275,
"step": 24030
},
{
"epoch": 1.667608931281162,
"grad_norm": 1.1171875,
"learning_rate": 8.19304347826087e-07,
"loss": 0.202,
"step": 24040
},
{
"epoch": 1.6683026230218947,
"grad_norm": 1.1796875,
"learning_rate": 8.175652173913044e-07,
"loss": 0.2373,
"step": 24050
},
{
"epoch": 1.6689963147626274,
"grad_norm": 1.21875,
"learning_rate": 8.158260869565219e-07,
"loss": 0.2368,
"step": 24060
},
{
"epoch": 1.6696900065033602,
"grad_norm": 1.546875,
"learning_rate": 8.140869565217392e-07,
"loss": 0.2361,
"step": 24070
},
{
"epoch": 1.670383698244093,
"grad_norm": 1.5078125,
"learning_rate": 8.123478260869566e-07,
"loss": 0.2362,
"step": 24080
},
{
"epoch": 1.6710773899848255,
"grad_norm": 1.28125,
"learning_rate": 8.10608695652174e-07,
"loss": 0.2434,
"step": 24090
},
{
"epoch": 1.6717710817255582,
"grad_norm": 1.2578125,
"learning_rate": 8.088695652173915e-07,
"loss": 0.2348,
"step": 24100
},
{
"epoch": 1.672464773466291,
"grad_norm": 1.0625,
"learning_rate": 8.071304347826088e-07,
"loss": 0.2239,
"step": 24110
},
{
"epoch": 1.6731584652070235,
"grad_norm": 1.3359375,
"learning_rate": 8.053913043478262e-07,
"loss": 0.2476,
"step": 24120
},
{
"epoch": 1.6738521569477562,
"grad_norm": 1.5234375,
"learning_rate": 8.036521739130436e-07,
"loss": 0.3045,
"step": 24130
},
{
"epoch": 1.674545848688489,
"grad_norm": 1.03125,
"learning_rate": 8.019130434782609e-07,
"loss": 0.2166,
"step": 24140
},
{
"epoch": 1.6752395404292217,
"grad_norm": 1.25,
"learning_rate": 8.001739130434784e-07,
"loss": 0.2401,
"step": 24150
},
{
"epoch": 1.6759332321699545,
"grad_norm": 1.0,
"learning_rate": 7.984347826086958e-07,
"loss": 0.2398,
"step": 24160
},
{
"epoch": 1.6766269239106872,
"grad_norm": 1.21875,
"learning_rate": 7.966956521739132e-07,
"loss": 0.2428,
"step": 24170
},
{
"epoch": 1.67732061565142,
"grad_norm": 1.0859375,
"learning_rate": 7.949565217391304e-07,
"loss": 0.2544,
"step": 24180
},
{
"epoch": 1.6780143073921527,
"grad_norm": 0.9765625,
"learning_rate": 7.932173913043478e-07,
"loss": 0.1938,
"step": 24190
},
{
"epoch": 1.6787079991328855,
"grad_norm": 1.2265625,
"learning_rate": 7.914782608695653e-07,
"loss": 0.2606,
"step": 24200
},
{
"epoch": 1.679401690873618,
"grad_norm": 1.125,
"learning_rate": 7.897391304347826e-07,
"loss": 0.2166,
"step": 24210
},
{
"epoch": 1.6800953826143508,
"grad_norm": 1.3125,
"learning_rate": 7.88e-07,
"loss": 0.2149,
"step": 24220
},
{
"epoch": 1.6807890743550835,
"grad_norm": 0.875,
"learning_rate": 7.862608695652174e-07,
"loss": 0.2509,
"step": 24230
},
{
"epoch": 1.681482766095816,
"grad_norm": 1.2734375,
"learning_rate": 7.845217391304348e-07,
"loss": 0.255,
"step": 24240
},
{
"epoch": 1.6821764578365488,
"grad_norm": 0.84375,
"learning_rate": 7.827826086956522e-07,
"loss": 0.2084,
"step": 24250
},
{
"epoch": 1.6828701495772815,
"grad_norm": 1.3671875,
"learning_rate": 7.810434782608696e-07,
"loss": 0.2378,
"step": 24260
},
{
"epoch": 1.6835638413180143,
"grad_norm": 1.2265625,
"learning_rate": 7.79304347826087e-07,
"loss": 0.2314,
"step": 24270
},
{
"epoch": 1.684257533058747,
"grad_norm": 1.3359375,
"learning_rate": 7.775652173913043e-07,
"loss": 0.2701,
"step": 24280
},
{
"epoch": 1.6849512247994798,
"grad_norm": 1.171875,
"learning_rate": 7.758260869565218e-07,
"loss": 0.2682,
"step": 24290
},
{
"epoch": 1.6856449165402125,
"grad_norm": 1.40625,
"learning_rate": 7.740869565217392e-07,
"loss": 0.235,
"step": 24300
},
{
"epoch": 1.6863386082809453,
"grad_norm": 1.4609375,
"learning_rate": 7.723478260869566e-07,
"loss": 0.2136,
"step": 24310
},
{
"epoch": 1.687032300021678,
"grad_norm": 1.4140625,
"learning_rate": 7.706086956521739e-07,
"loss": 0.2159,
"step": 24320
},
{
"epoch": 1.6877259917624106,
"grad_norm": 1.25,
"learning_rate": 7.688695652173914e-07,
"loss": 0.1848,
"step": 24330
},
{
"epoch": 1.6884196835031433,
"grad_norm": 1.3359375,
"learning_rate": 7.671304347826088e-07,
"loss": 0.2464,
"step": 24340
},
{
"epoch": 1.6891133752438758,
"grad_norm": 1.2109375,
"learning_rate": 7.653913043478262e-07,
"loss": 0.2249,
"step": 24350
},
{
"epoch": 1.6898070669846086,
"grad_norm": 1.3046875,
"learning_rate": 7.636521739130435e-07,
"loss": 0.2389,
"step": 24360
},
{
"epoch": 1.6905007587253413,
"grad_norm": 1.125,
"learning_rate": 7.619130434782609e-07,
"loss": 0.2296,
"step": 24370
},
{
"epoch": 1.691194450466074,
"grad_norm": 1.3984375,
"learning_rate": 7.601739130434784e-07,
"loss": 0.2403,
"step": 24380
},
{
"epoch": 1.6918881422068068,
"grad_norm": 1.515625,
"learning_rate": 7.584347826086957e-07,
"loss": 0.3061,
"step": 24390
},
{
"epoch": 1.6925818339475396,
"grad_norm": 1.4140625,
"learning_rate": 7.566956521739131e-07,
"loss": 0.225,
"step": 24400
},
{
"epoch": 1.6932755256882723,
"grad_norm": 1.21875,
"learning_rate": 7.549565217391305e-07,
"loss": 0.234,
"step": 24410
},
{
"epoch": 1.693969217429005,
"grad_norm": 1.046875,
"learning_rate": 7.53217391304348e-07,
"loss": 0.2418,
"step": 24420
},
{
"epoch": 1.6946629091697378,
"grad_norm": 1.375,
"learning_rate": 7.514782608695653e-07,
"loss": 0.3734,
"step": 24430
},
{
"epoch": 1.6953566009104704,
"grad_norm": 1.234375,
"learning_rate": 7.497391304347827e-07,
"loss": 0.2375,
"step": 24440
},
{
"epoch": 1.696050292651203,
"grad_norm": 1.3046875,
"learning_rate": 7.480000000000001e-07,
"loss": 0.2323,
"step": 24450
},
{
"epoch": 1.6967439843919359,
"grad_norm": 1.125,
"learning_rate": 7.462608695652176e-07,
"loss": 0.3004,
"step": 24460
},
{
"epoch": 1.6974376761326684,
"grad_norm": 1.28125,
"learning_rate": 7.445217391304349e-07,
"loss": 0.2572,
"step": 24470
},
{
"epoch": 1.6981313678734011,
"grad_norm": 1.4375,
"learning_rate": 7.427826086956523e-07,
"loss": 0.2329,
"step": 24480
},
{
"epoch": 1.6988250596141339,
"grad_norm": 1.125,
"learning_rate": 7.410434782608696e-07,
"loss": 0.2483,
"step": 24490
},
{
"epoch": 1.6995187513548666,
"grad_norm": 1.578125,
"learning_rate": 7.393043478260869e-07,
"loss": 0.205,
"step": 24500
},
{
"epoch": 1.7002124430955994,
"grad_norm": 1.25,
"learning_rate": 7.375652173913043e-07,
"loss": 0.2956,
"step": 24510
},
{
"epoch": 1.7009061348363321,
"grad_norm": 1.15625,
"learning_rate": 7.358260869565218e-07,
"loss": 0.2632,
"step": 24520
},
{
"epoch": 1.7015998265770649,
"grad_norm": 1.234375,
"learning_rate": 7.340869565217392e-07,
"loss": 0.244,
"step": 24530
},
{
"epoch": 1.7022935183177976,
"grad_norm": 1.1875,
"learning_rate": 7.323478260869565e-07,
"loss": 0.2039,
"step": 24540
},
{
"epoch": 1.7029872100585304,
"grad_norm": 1.734375,
"learning_rate": 7.306086956521739e-07,
"loss": 0.3392,
"step": 24550
},
{
"epoch": 1.703680901799263,
"grad_norm": 1.4140625,
"learning_rate": 7.288695652173914e-07,
"loss": 0.2325,
"step": 24560
},
{
"epoch": 1.7043745935399957,
"grad_norm": 1.171875,
"learning_rate": 7.271304347826087e-07,
"loss": 0.219,
"step": 24570
},
{
"epoch": 1.7050682852807284,
"grad_norm": 1.1875,
"learning_rate": 7.253913043478261e-07,
"loss": 0.229,
"step": 24580
},
{
"epoch": 1.705761977021461,
"grad_norm": 1.1328125,
"learning_rate": 7.236521739130435e-07,
"loss": 0.232,
"step": 24590
},
{
"epoch": 1.7064556687621937,
"grad_norm": 1.015625,
"learning_rate": 7.219130434782609e-07,
"loss": 0.2213,
"step": 24600
},
{
"epoch": 1.7071493605029264,
"grad_norm": 1.125,
"learning_rate": 7.201739130434783e-07,
"loss": 0.2818,
"step": 24610
},
{
"epoch": 1.7078430522436592,
"grad_norm": 1.375,
"learning_rate": 7.184347826086957e-07,
"loss": 0.2612,
"step": 24620
},
{
"epoch": 1.708536743984392,
"grad_norm": 1.609375,
"learning_rate": 7.166956521739131e-07,
"loss": 0.2474,
"step": 24630
},
{
"epoch": 1.7092304357251247,
"grad_norm": 1.484375,
"learning_rate": 7.149565217391304e-07,
"loss": 0.2656,
"step": 24640
},
{
"epoch": 1.7099241274658574,
"grad_norm": 1.1328125,
"learning_rate": 7.132173913043479e-07,
"loss": 0.2126,
"step": 24650
},
{
"epoch": 1.7106178192065902,
"grad_norm": 1.3203125,
"learning_rate": 7.114782608695653e-07,
"loss": 0.2265,
"step": 24660
},
{
"epoch": 1.711311510947323,
"grad_norm": 1.4609375,
"learning_rate": 7.097391304347827e-07,
"loss": 0.2468,
"step": 24670
},
{
"epoch": 1.7120052026880554,
"grad_norm": 1.1015625,
"learning_rate": 7.08e-07,
"loss": 0.2395,
"step": 24680
},
{
"epoch": 1.7126988944287882,
"grad_norm": 1.21875,
"learning_rate": 7.062608695652175e-07,
"loss": 0.2194,
"step": 24690
},
{
"epoch": 1.713392586169521,
"grad_norm": 1.265625,
"learning_rate": 7.045217391304349e-07,
"loss": 0.2355,
"step": 24700
},
{
"epoch": 1.7140862779102535,
"grad_norm": 1.125,
"learning_rate": 7.027826086956523e-07,
"loss": 0.2146,
"step": 24710
},
{
"epoch": 1.7147799696509862,
"grad_norm": 1.3125,
"learning_rate": 7.010434782608696e-07,
"loss": 0.2325,
"step": 24720
},
{
"epoch": 1.715473661391719,
"grad_norm": 1.046875,
"learning_rate": 6.99304347826087e-07,
"loss": 0.252,
"step": 24730
},
{
"epoch": 1.7161673531324517,
"grad_norm": 0.9921875,
"learning_rate": 6.975652173913045e-07,
"loss": 0.2504,
"step": 24740
},
{
"epoch": 1.7168610448731845,
"grad_norm": 0.97265625,
"learning_rate": 6.958260869565218e-07,
"loss": 0.2483,
"step": 24750
},
{
"epoch": 1.7175547366139172,
"grad_norm": 1.765625,
"learning_rate": 6.940869565217392e-07,
"loss": 0.2785,
"step": 24760
},
{
"epoch": 1.71824842835465,
"grad_norm": 1.828125,
"learning_rate": 6.923478260869566e-07,
"loss": 0.3625,
"step": 24770
},
{
"epoch": 1.7189421200953827,
"grad_norm": 1.21875,
"learning_rate": 6.906086956521741e-07,
"loss": 0.2435,
"step": 24780
},
{
"epoch": 1.7196358118361155,
"grad_norm": 1.5390625,
"learning_rate": 6.888695652173914e-07,
"loss": 0.2389,
"step": 24790
},
{
"epoch": 1.720329503576848,
"grad_norm": 1.25,
"learning_rate": 6.871304347826087e-07,
"loss": 0.2638,
"step": 24800
},
{
"epoch": 1.7210231953175807,
"grad_norm": 1.109375,
"learning_rate": 6.853913043478261e-07,
"loss": 0.2871,
"step": 24810
},
{
"epoch": 1.7217168870583135,
"grad_norm": 1.015625,
"learning_rate": 6.836521739130434e-07,
"loss": 0.2482,
"step": 24820
},
{
"epoch": 1.722410578799046,
"grad_norm": 1.6484375,
"learning_rate": 6.819130434782609e-07,
"loss": 0.2159,
"step": 24830
},
{
"epoch": 1.7231042705397788,
"grad_norm": 1.2421875,
"learning_rate": 6.801739130434783e-07,
"loss": 0.2426,
"step": 24840
},
{
"epoch": 1.7237979622805115,
"grad_norm": 1.6484375,
"learning_rate": 6.784347826086957e-07,
"loss": 0.2381,
"step": 24850
},
{
"epoch": 1.7244916540212443,
"grad_norm": 1.046875,
"learning_rate": 6.76695652173913e-07,
"loss": 0.2482,
"step": 24860
},
{
"epoch": 1.725185345761977,
"grad_norm": 1.09375,
"learning_rate": 6.749565217391304e-07,
"loss": 0.2337,
"step": 24870
},
{
"epoch": 1.7258790375027098,
"grad_norm": 0.7578125,
"learning_rate": 6.732173913043479e-07,
"loss": 0.2348,
"step": 24880
},
{
"epoch": 1.7265727292434425,
"grad_norm": 1.578125,
"learning_rate": 6.714782608695653e-07,
"loss": 0.271,
"step": 24890
},
{
"epoch": 1.7272664209841753,
"grad_norm": 0.96875,
"learning_rate": 6.697391304347826e-07,
"loss": 0.2612,
"step": 24900
},
{
"epoch": 1.727960112724908,
"grad_norm": 1.1796875,
"learning_rate": 6.68e-07,
"loss": 0.2523,
"step": 24910
},
{
"epoch": 1.7286538044656405,
"grad_norm": 1.3828125,
"learning_rate": 6.662608695652175e-07,
"loss": 0.2531,
"step": 24920
},
{
"epoch": 1.7293474962063733,
"grad_norm": 1.171875,
"learning_rate": 6.645217391304348e-07,
"loss": 0.226,
"step": 24930
},
{
"epoch": 1.730041187947106,
"grad_norm": 1.15625,
"learning_rate": 6.627826086956522e-07,
"loss": 0.2511,
"step": 24940
},
{
"epoch": 1.7307348796878386,
"grad_norm": 1.1328125,
"learning_rate": 6.610434782608696e-07,
"loss": 0.2648,
"step": 24950
},
{
"epoch": 1.7314285714285713,
"grad_norm": 1.4296875,
"learning_rate": 6.593043478260871e-07,
"loss": 0.2495,
"step": 24960
},
{
"epoch": 1.732122263169304,
"grad_norm": 1.03125,
"learning_rate": 6.575652173913044e-07,
"loss": 0.2491,
"step": 24970
},
{
"epoch": 1.7328159549100368,
"grad_norm": 1.1875,
"learning_rate": 6.558260869565218e-07,
"loss": 0.1996,
"step": 24980
},
{
"epoch": 1.7335096466507696,
"grad_norm": 1.265625,
"learning_rate": 6.540869565217392e-07,
"loss": 0.2316,
"step": 24990
},
{
"epoch": 1.7342033383915023,
"grad_norm": 1.1484375,
"learning_rate": 6.523478260869566e-07,
"loss": 0.2417,
"step": 25000
},
{
"epoch": 1.734897030132235,
"grad_norm": 1.0078125,
"learning_rate": 6.50608695652174e-07,
"loss": 0.2705,
"step": 25010
},
{
"epoch": 1.7355907218729678,
"grad_norm": 1.265625,
"learning_rate": 6.488695652173914e-07,
"loss": 0.234,
"step": 25020
},
{
"epoch": 1.7362844136137006,
"grad_norm": 1.3515625,
"learning_rate": 6.471304347826088e-07,
"loss": 0.2018,
"step": 25030
},
{
"epoch": 1.736978105354433,
"grad_norm": 1.71875,
"learning_rate": 6.453913043478261e-07,
"loss": 0.26,
"step": 25040
},
{
"epoch": 1.7376717970951658,
"grad_norm": 1.4375,
"learning_rate": 6.436521739130436e-07,
"loss": 0.2964,
"step": 25050
},
{
"epoch": 1.7383654888358986,
"grad_norm": 1.1484375,
"learning_rate": 6.41913043478261e-07,
"loss": 0.2349,
"step": 25060
},
{
"epoch": 1.7390591805766311,
"grad_norm": 1.359375,
"learning_rate": 6.401739130434784e-07,
"loss": 0.249,
"step": 25070
},
{
"epoch": 1.7397528723173639,
"grad_norm": 1.953125,
"learning_rate": 6.384347826086957e-07,
"loss": 0.2878,
"step": 25080
},
{
"epoch": 1.7404465640580966,
"grad_norm": 1.3671875,
"learning_rate": 6.366956521739132e-07,
"loss": 0.2922,
"step": 25090
},
{
"epoch": 1.7411402557988294,
"grad_norm": 1.328125,
"learning_rate": 6.349565217391306e-07,
"loss": 0.2339,
"step": 25100
},
{
"epoch": 1.741833947539562,
"grad_norm": 1.3671875,
"learning_rate": 6.332173913043478e-07,
"loss": 0.2677,
"step": 25110
},
{
"epoch": 1.7425276392802949,
"grad_norm": 1.5546875,
"learning_rate": 6.314782608695652e-07,
"loss": 0.2195,
"step": 25120
},
{
"epoch": 1.7432213310210276,
"grad_norm": 1.234375,
"learning_rate": 6.297391304347826e-07,
"loss": 0.2228,
"step": 25130
},
{
"epoch": 1.7439150227617604,
"grad_norm": 1.15625,
"learning_rate": 6.28e-07,
"loss": 0.1983,
"step": 25140
},
{
"epoch": 1.744608714502493,
"grad_norm": 1.3359375,
"learning_rate": 6.262608695652174e-07,
"loss": 0.2045,
"step": 25150
},
{
"epoch": 1.7453024062432256,
"grad_norm": 1.53125,
"learning_rate": 6.245217391304348e-07,
"loss": 0.2562,
"step": 25160
},
{
"epoch": 1.7459960979839584,
"grad_norm": 1.3984375,
"learning_rate": 6.227826086956523e-07,
"loss": 0.2394,
"step": 25170
},
{
"epoch": 1.7466897897246911,
"grad_norm": 0.98046875,
"learning_rate": 6.210434782608697e-07,
"loss": 0.2807,
"step": 25180
},
{
"epoch": 1.7473834814654237,
"grad_norm": 1.21875,
"learning_rate": 6.19304347826087e-07,
"loss": 0.2084,
"step": 25190
},
{
"epoch": 1.7480771732061564,
"grad_norm": 1.125,
"learning_rate": 6.175652173913044e-07,
"loss": 0.2275,
"step": 25200
},
{
"epoch": 1.7487708649468892,
"grad_norm": 1.9921875,
"learning_rate": 6.158260869565218e-07,
"loss": 0.3696,
"step": 25210
},
{
"epoch": 1.749464556687622,
"grad_norm": 1.6484375,
"learning_rate": 6.140869565217391e-07,
"loss": 0.3133,
"step": 25220
},
{
"epoch": 1.7501582484283547,
"grad_norm": 1.2265625,
"learning_rate": 6.123478260869565e-07,
"loss": 0.2205,
"step": 25230
},
{
"epoch": 1.7508519401690874,
"grad_norm": 0.9765625,
"learning_rate": 6.10608695652174e-07,
"loss": 0.2292,
"step": 25240
},
{
"epoch": 1.7515456319098202,
"grad_norm": 1.1640625,
"learning_rate": 6.088695652173914e-07,
"loss": 0.2705,
"step": 25250
},
{
"epoch": 1.752239323650553,
"grad_norm": 1.0625,
"learning_rate": 6.071304347826087e-07,
"loss": 0.2202,
"step": 25260
},
{
"epoch": 1.7529330153912857,
"grad_norm": 1.546875,
"learning_rate": 6.053913043478261e-07,
"loss": 0.2228,
"step": 25270
},
{
"epoch": 1.7536267071320182,
"grad_norm": 1.3046875,
"learning_rate": 6.036521739130436e-07,
"loss": 0.242,
"step": 25280
},
{
"epoch": 1.754320398872751,
"grad_norm": 0.96875,
"learning_rate": 6.01913043478261e-07,
"loss": 0.2173,
"step": 25290
},
{
"epoch": 1.7550140906134837,
"grad_norm": 1.375,
"learning_rate": 6.001739130434783e-07,
"loss": 0.2418,
"step": 25300
},
{
"epoch": 1.7557077823542162,
"grad_norm": 1.375,
"learning_rate": 5.984347826086957e-07,
"loss": 0.3065,
"step": 25310
},
{
"epoch": 1.756401474094949,
"grad_norm": 1.1875,
"learning_rate": 5.966956521739132e-07,
"loss": 0.2139,
"step": 25320
},
{
"epoch": 1.7570951658356817,
"grad_norm": 1.03125,
"learning_rate": 5.949565217391305e-07,
"loss": 0.2255,
"step": 25330
},
{
"epoch": 1.7577888575764145,
"grad_norm": 0.953125,
"learning_rate": 5.932173913043478e-07,
"loss": 0.2343,
"step": 25340
},
{
"epoch": 1.7584825493171472,
"grad_norm": 1.265625,
"learning_rate": 5.914782608695653e-07,
"loss": 0.2387,
"step": 25350
},
{
"epoch": 1.75917624105788,
"grad_norm": 1.171875,
"learning_rate": 5.897391304347827e-07,
"loss": 0.2096,
"step": 25360
},
{
"epoch": 1.7598699327986127,
"grad_norm": 1.6796875,
"learning_rate": 5.88e-07,
"loss": 0.2429,
"step": 25370
},
{
"epoch": 1.7605636245393455,
"grad_norm": 1.03125,
"learning_rate": 5.862608695652174e-07,
"loss": 0.2416,
"step": 25380
},
{
"epoch": 1.7612573162800782,
"grad_norm": 1.28125,
"learning_rate": 5.845217391304349e-07,
"loss": 0.2747,
"step": 25390
},
{
"epoch": 1.7619510080208107,
"grad_norm": 1.2265625,
"learning_rate": 5.827826086956522e-07,
"loss": 0.2036,
"step": 25400
},
{
"epoch": 1.7626446997615435,
"grad_norm": 1.1328125,
"learning_rate": 5.810434782608696e-07,
"loss": 0.2278,
"step": 25410
},
{
"epoch": 1.7633383915022762,
"grad_norm": 1.2890625,
"learning_rate": 5.79304347826087e-07,
"loss": 0.2085,
"step": 25420
},
{
"epoch": 1.7640320832430088,
"grad_norm": 0.98828125,
"learning_rate": 5.775652173913044e-07,
"loss": 0.2329,
"step": 25430
},
{
"epoch": 1.7647257749837415,
"grad_norm": 1.3203125,
"learning_rate": 5.758260869565218e-07,
"loss": 0.2339,
"step": 25440
},
{
"epoch": 1.7654194667244743,
"grad_norm": 1.140625,
"learning_rate": 5.740869565217392e-07,
"loss": 0.2597,
"step": 25450
},
{
"epoch": 1.766113158465207,
"grad_norm": 1.1796875,
"learning_rate": 5.723478260869566e-07,
"loss": 0.2197,
"step": 25460
},
{
"epoch": 1.7668068502059397,
"grad_norm": 1.0859375,
"learning_rate": 5.70608695652174e-07,
"loss": 0.2542,
"step": 25470
},
{
"epoch": 1.7675005419466725,
"grad_norm": 1.15625,
"learning_rate": 5.688695652173914e-07,
"loss": 0.2738,
"step": 25480
},
{
"epoch": 1.7681942336874052,
"grad_norm": 1.4140625,
"learning_rate": 5.671304347826087e-07,
"loss": 0.2285,
"step": 25490
},
{
"epoch": 1.768887925428138,
"grad_norm": 1.2265625,
"learning_rate": 5.653913043478261e-07,
"loss": 0.2296,
"step": 25500
},
{
"epoch": 1.7695816171688707,
"grad_norm": 1.6015625,
"learning_rate": 5.636521739130435e-07,
"loss": 0.2071,
"step": 25510
},
{
"epoch": 1.7702753089096033,
"grad_norm": 1.1953125,
"learning_rate": 5.619130434782609e-07,
"loss": 0.2482,
"step": 25520
},
{
"epoch": 1.770969000650336,
"grad_norm": 1.109375,
"learning_rate": 5.601739130434783e-07,
"loss": 0.3006,
"step": 25530
},
{
"epoch": 1.7716626923910688,
"grad_norm": 1.21875,
"learning_rate": 5.584347826086957e-07,
"loss": 0.2639,
"step": 25540
},
{
"epoch": 1.7723563841318013,
"grad_norm": 1.5546875,
"learning_rate": 5.566956521739131e-07,
"loss": 0.2591,
"step": 25550
},
{
"epoch": 1.773050075872534,
"grad_norm": 1.1875,
"learning_rate": 5.549565217391305e-07,
"loss": 0.3238,
"step": 25560
},
{
"epoch": 1.7737437676132668,
"grad_norm": 1.8515625,
"learning_rate": 5.532173913043479e-07,
"loss": 0.2856,
"step": 25570
},
{
"epoch": 1.7744374593539995,
"grad_norm": 1.1640625,
"learning_rate": 5.514782608695652e-07,
"loss": 0.3195,
"step": 25580
},
{
"epoch": 1.7751311510947323,
"grad_norm": 1.59375,
"learning_rate": 5.497391304347826e-07,
"loss": 0.2509,
"step": 25590
},
{
"epoch": 1.775824842835465,
"grad_norm": 1.140625,
"learning_rate": 5.480000000000001e-07,
"loss": 0.2397,
"step": 25600
},
{
"epoch": 1.7765185345761978,
"grad_norm": 1.1328125,
"learning_rate": 5.462608695652175e-07,
"loss": 0.2355,
"step": 25610
},
{
"epoch": 1.7772122263169305,
"grad_norm": 1.21875,
"learning_rate": 5.445217391304348e-07,
"loss": 0.1725,
"step": 25620
},
{
"epoch": 1.777905918057663,
"grad_norm": 1.1484375,
"learning_rate": 5.427826086956522e-07,
"loss": 0.3111,
"step": 25630
},
{
"epoch": 1.7785996097983958,
"grad_norm": 1.3046875,
"learning_rate": 5.410434782608697e-07,
"loss": 0.2527,
"step": 25640
},
{
"epoch": 1.7792933015391286,
"grad_norm": 1.28125,
"learning_rate": 5.393043478260869e-07,
"loss": 0.2612,
"step": 25650
},
{
"epoch": 1.779986993279861,
"grad_norm": 1.5390625,
"learning_rate": 5.375652173913043e-07,
"loss": 0.2509,
"step": 25660
},
{
"epoch": 1.7806806850205938,
"grad_norm": 1.390625,
"learning_rate": 5.358260869565218e-07,
"loss": 0.2457,
"step": 25670
},
{
"epoch": 1.7813743767613266,
"grad_norm": 1.15625,
"learning_rate": 5.340869565217392e-07,
"loss": 0.2486,
"step": 25680
},
{
"epoch": 1.7820680685020593,
"grad_norm": 1.3125,
"learning_rate": 5.323478260869565e-07,
"loss": 0.2046,
"step": 25690
},
{
"epoch": 1.782761760242792,
"grad_norm": 1.125,
"learning_rate": 5.306086956521739e-07,
"loss": 0.2119,
"step": 25700
},
{
"epoch": 1.7834554519835248,
"grad_norm": 1.0,
"learning_rate": 5.288695652173914e-07,
"loss": 0.2471,
"step": 25710
},
{
"epoch": 1.7841491437242576,
"grad_norm": 0.93359375,
"learning_rate": 5.271304347826088e-07,
"loss": 0.2273,
"step": 25720
},
{
"epoch": 1.7848428354649903,
"grad_norm": 0.95703125,
"learning_rate": 5.253913043478261e-07,
"loss": 0.2592,
"step": 25730
},
{
"epoch": 1.785536527205723,
"grad_norm": 1.3359375,
"learning_rate": 5.236521739130435e-07,
"loss": 0.2367,
"step": 25740
},
{
"epoch": 1.7862302189464556,
"grad_norm": 1.3046875,
"learning_rate": 5.21913043478261e-07,
"loss": 0.2357,
"step": 25750
},
{
"epoch": 1.7869239106871884,
"grad_norm": 1.2734375,
"learning_rate": 5.201739130434783e-07,
"loss": 0.219,
"step": 25760
},
{
"epoch": 1.7876176024279211,
"grad_norm": 1.171875,
"learning_rate": 5.184347826086957e-07,
"loss": 0.2937,
"step": 25770
},
{
"epoch": 1.7883112941686536,
"grad_norm": 0.984375,
"learning_rate": 5.166956521739131e-07,
"loss": 0.2343,
"step": 25780
},
{
"epoch": 1.7890049859093864,
"grad_norm": 1.3515625,
"learning_rate": 5.149565217391305e-07,
"loss": 0.226,
"step": 25790
},
{
"epoch": 1.7896986776501191,
"grad_norm": 0.921875,
"learning_rate": 5.132173913043478e-07,
"loss": 0.2387,
"step": 25800
},
{
"epoch": 1.790392369390852,
"grad_norm": 1.1015625,
"learning_rate": 5.114782608695652e-07,
"loss": 0.2074,
"step": 25810
},
{
"epoch": 1.7910860611315846,
"grad_norm": 1.1328125,
"learning_rate": 5.097391304347827e-07,
"loss": 0.243,
"step": 25820
},
{
"epoch": 1.7917797528723174,
"grad_norm": 1.21875,
"learning_rate": 5.08e-07,
"loss": 0.2289,
"step": 25830
},
{
"epoch": 1.7924734446130501,
"grad_norm": 1.1875,
"learning_rate": 5.062608695652174e-07,
"loss": 0.2942,
"step": 25840
},
{
"epoch": 1.7931671363537829,
"grad_norm": 1.1953125,
"learning_rate": 5.045217391304348e-07,
"loss": 0.2439,
"step": 25850
},
{
"epoch": 1.7938608280945156,
"grad_norm": 0.98046875,
"learning_rate": 5.027826086956522e-07,
"loss": 0.2651,
"step": 25860
},
{
"epoch": 1.7945545198352482,
"grad_norm": 1.4375,
"learning_rate": 5.010434782608696e-07,
"loss": 0.2474,
"step": 25870
},
{
"epoch": 1.795248211575981,
"grad_norm": 1.578125,
"learning_rate": 4.99304347826087e-07,
"loss": 0.2417,
"step": 25880
},
{
"epoch": 1.7959419033167137,
"grad_norm": 1.234375,
"learning_rate": 4.975652173913044e-07,
"loss": 0.2635,
"step": 25890
},
{
"epoch": 1.7966355950574462,
"grad_norm": 1.84375,
"learning_rate": 4.958260869565218e-07,
"loss": 0.3054,
"step": 25900
},
{
"epoch": 1.797329286798179,
"grad_norm": 1.1953125,
"learning_rate": 4.940869565217392e-07,
"loss": 0.2265,
"step": 25910
},
{
"epoch": 1.7980229785389117,
"grad_norm": 1.2109375,
"learning_rate": 4.923478260869566e-07,
"loss": 0.2727,
"step": 25920
},
{
"epoch": 1.7987166702796444,
"grad_norm": 1.0390625,
"learning_rate": 4.90608695652174e-07,
"loss": 0.2491,
"step": 25930
},
{
"epoch": 1.7994103620203772,
"grad_norm": 1.2578125,
"learning_rate": 4.888695652173913e-07,
"loss": 0.2025,
"step": 25940
},
{
"epoch": 1.80010405376111,
"grad_norm": 0.96484375,
"learning_rate": 4.871304347826088e-07,
"loss": 0.2422,
"step": 25950
},
{
"epoch": 1.8007977455018427,
"grad_norm": 1.4453125,
"learning_rate": 4.853913043478261e-07,
"loss": 0.2329,
"step": 25960
},
{
"epoch": 1.8014914372425754,
"grad_norm": 1.609375,
"learning_rate": 4.836521739130435e-07,
"loss": 0.2516,
"step": 25970
},
{
"epoch": 1.8021851289833082,
"grad_norm": 1.1953125,
"learning_rate": 4.819130434782609e-07,
"loss": 0.1947,
"step": 25980
},
{
"epoch": 1.8028788207240407,
"grad_norm": 1.390625,
"learning_rate": 4.801739130434783e-07,
"loss": 0.2751,
"step": 25990
},
{
"epoch": 1.8035725124647735,
"grad_norm": 1.5234375,
"learning_rate": 4.784347826086957e-07,
"loss": 0.2421,
"step": 26000
},
{
"epoch": 1.8042662042055062,
"grad_norm": 1.4609375,
"learning_rate": 4.7669565217391305e-07,
"loss": 0.2389,
"step": 26010
},
{
"epoch": 1.8049598959462387,
"grad_norm": 1.21875,
"learning_rate": 4.7495652173913047e-07,
"loss": 0.2199,
"step": 26020
},
{
"epoch": 1.8056535876869715,
"grad_norm": 1.71875,
"learning_rate": 4.7321739130434784e-07,
"loss": 0.2385,
"step": 26030
},
{
"epoch": 1.8063472794277042,
"grad_norm": 1.25,
"learning_rate": 4.7147826086956527e-07,
"loss": 0.2346,
"step": 26040
},
{
"epoch": 1.807040971168437,
"grad_norm": 1.3203125,
"learning_rate": 4.6973913043478264e-07,
"loss": 0.2409,
"step": 26050
},
{
"epoch": 1.8077346629091697,
"grad_norm": 1.2890625,
"learning_rate": 4.6800000000000006e-07,
"loss": 0.2329,
"step": 26060
},
{
"epoch": 1.8084283546499025,
"grad_norm": 1.28125,
"learning_rate": 4.6626086956521743e-07,
"loss": 0.2252,
"step": 26070
},
{
"epoch": 1.8091220463906352,
"grad_norm": 1.28125,
"learning_rate": 4.6452173913043486e-07,
"loss": 0.2564,
"step": 26080
},
{
"epoch": 1.809815738131368,
"grad_norm": 1.5078125,
"learning_rate": 4.6278260869565223e-07,
"loss": 0.2508,
"step": 26090
},
{
"epoch": 1.8105094298721007,
"grad_norm": 1.3828125,
"learning_rate": 4.6104347826086965e-07,
"loss": 0.2399,
"step": 26100
},
{
"epoch": 1.8112031216128333,
"grad_norm": 1.5234375,
"learning_rate": 4.5930434782608697e-07,
"loss": 0.2153,
"step": 26110
},
{
"epoch": 1.811896813353566,
"grad_norm": 1.3828125,
"learning_rate": 4.5756521739130434e-07,
"loss": 0.2163,
"step": 26120
},
{
"epoch": 1.8125905050942988,
"grad_norm": 1.3515625,
"learning_rate": 4.5582608695652177e-07,
"loss": 0.2719,
"step": 26130
},
{
"epoch": 1.8132841968350313,
"grad_norm": 0.9609375,
"learning_rate": 4.5408695652173914e-07,
"loss": 0.1888,
"step": 26140
},
{
"epoch": 1.813977888575764,
"grad_norm": 1.2734375,
"learning_rate": 4.5234782608695656e-07,
"loss": 0.3202,
"step": 26150
},
{
"epoch": 1.8146715803164968,
"grad_norm": 1.1171875,
"learning_rate": 4.5060869565217393e-07,
"loss": 0.21,
"step": 26160
},
{
"epoch": 1.8153652720572295,
"grad_norm": 1.1171875,
"learning_rate": 4.4886956521739136e-07,
"loss": 0.194,
"step": 26170
},
{
"epoch": 1.8160589637979623,
"grad_norm": 1.296875,
"learning_rate": 4.4713043478260873e-07,
"loss": 0.2186,
"step": 26180
},
{
"epoch": 1.816752655538695,
"grad_norm": 0.74609375,
"learning_rate": 4.4539130434782615e-07,
"loss": 0.2346,
"step": 26190
},
{
"epoch": 1.8174463472794278,
"grad_norm": 1.375,
"learning_rate": 4.436521739130435e-07,
"loss": 0.2373,
"step": 26200
},
{
"epoch": 1.8181400390201605,
"grad_norm": 1.0,
"learning_rate": 4.419130434782609e-07,
"loss": 0.262,
"step": 26210
},
{
"epoch": 1.8188337307608933,
"grad_norm": 0.97265625,
"learning_rate": 4.401739130434783e-07,
"loss": 0.2297,
"step": 26220
},
{
"epoch": 1.8195274225016258,
"grad_norm": 1.1171875,
"learning_rate": 4.384347826086957e-07,
"loss": 0.2302,
"step": 26230
},
{
"epoch": 1.8202211142423586,
"grad_norm": 0.94921875,
"learning_rate": 4.366956521739131e-07,
"loss": 0.2239,
"step": 26240
},
{
"epoch": 1.8209148059830913,
"grad_norm": 1.1796875,
"learning_rate": 4.349565217391305e-07,
"loss": 0.2234,
"step": 26250
},
{
"epoch": 1.8216084977238238,
"grad_norm": 1.09375,
"learning_rate": 4.332173913043479e-07,
"loss": 0.242,
"step": 26260
},
{
"epoch": 1.8223021894645566,
"grad_norm": 1.9609375,
"learning_rate": 4.314782608695652e-07,
"loss": 0.2617,
"step": 26270
},
{
"epoch": 1.8229958812052893,
"grad_norm": 1.4765625,
"learning_rate": 4.297391304347826e-07,
"loss": 0.2493,
"step": 26280
},
{
"epoch": 1.823689572946022,
"grad_norm": 1.3046875,
"learning_rate": 4.28e-07,
"loss": 0.2419,
"step": 26290
},
{
"epoch": 1.8243832646867548,
"grad_norm": 1.2265625,
"learning_rate": 4.262608695652174e-07,
"loss": 0.2478,
"step": 26300
},
{
"epoch": 1.8250769564274876,
"grad_norm": 1.3046875,
"learning_rate": 4.245217391304348e-07,
"loss": 0.2801,
"step": 26310
},
{
"epoch": 1.8257706481682203,
"grad_norm": 1.03125,
"learning_rate": 4.227826086956522e-07,
"loss": 0.2001,
"step": 26320
},
{
"epoch": 1.826464339908953,
"grad_norm": 0.890625,
"learning_rate": 4.210434782608696e-07,
"loss": 0.2197,
"step": 26330
},
{
"epoch": 1.8271580316496858,
"grad_norm": 1.28125,
"learning_rate": 4.19304347826087e-07,
"loss": 0.2616,
"step": 26340
},
{
"epoch": 1.8278517233904183,
"grad_norm": 1.40625,
"learning_rate": 4.175652173913044e-07,
"loss": 0.2663,
"step": 26350
},
{
"epoch": 1.828545415131151,
"grad_norm": 1.3515625,
"learning_rate": 4.158260869565218e-07,
"loss": 0.2307,
"step": 26360
},
{
"epoch": 1.8292391068718838,
"grad_norm": 1.3671875,
"learning_rate": 4.140869565217392e-07,
"loss": 0.2576,
"step": 26370
},
{
"epoch": 1.8299327986126164,
"grad_norm": 1.109375,
"learning_rate": 4.1234782608695657e-07,
"loss": 0.1986,
"step": 26380
},
{
"epoch": 1.8306264903533491,
"grad_norm": 1.3359375,
"learning_rate": 4.1060869565217394e-07,
"loss": 0.2378,
"step": 26390
},
{
"epoch": 1.8313201820940819,
"grad_norm": 1.78125,
"learning_rate": 4.0886956521739137e-07,
"loss": 0.2416,
"step": 26400
},
{
"epoch": 1.8320138738348146,
"grad_norm": 1.7890625,
"learning_rate": 4.0713043478260874e-07,
"loss": 0.3052,
"step": 26410
},
{
"epoch": 1.8327075655755474,
"grad_norm": 1.15625,
"learning_rate": 4.053913043478261e-07,
"loss": 0.2222,
"step": 26420
},
{
"epoch": 1.8334012573162801,
"grad_norm": 1.2890625,
"learning_rate": 4.036521739130435e-07,
"loss": 0.2901,
"step": 26430
},
{
"epoch": 1.8340949490570129,
"grad_norm": 1.4921875,
"learning_rate": 4.0191304347826085e-07,
"loss": 0.2526,
"step": 26440
},
{
"epoch": 1.8347886407977456,
"grad_norm": 1.234375,
"learning_rate": 4.001739130434783e-07,
"loss": 0.2955,
"step": 26450
},
{
"epoch": 1.8354823325384784,
"grad_norm": 1.8359375,
"learning_rate": 3.9843478260869565e-07,
"loss": 0.301,
"step": 26460
},
{
"epoch": 1.836176024279211,
"grad_norm": 1.28125,
"learning_rate": 3.9669565217391307e-07,
"loss": 0.2689,
"step": 26470
},
{
"epoch": 1.8368697160199436,
"grad_norm": 0.921875,
"learning_rate": 3.9495652173913044e-07,
"loss": 0.2165,
"step": 26480
},
{
"epoch": 1.8375634077606764,
"grad_norm": 1.96875,
"learning_rate": 3.9321739130434787e-07,
"loss": 0.3363,
"step": 26490
},
{
"epoch": 1.838257099501409,
"grad_norm": 1.5625,
"learning_rate": 3.9147826086956524e-07,
"loss": 0.2889,
"step": 26500
},
{
"epoch": 1.8389507912421417,
"grad_norm": 1.140625,
"learning_rate": 3.8973913043478266e-07,
"loss": 0.205,
"step": 26510
},
{
"epoch": 1.8396444829828744,
"grad_norm": 1.265625,
"learning_rate": 3.8800000000000003e-07,
"loss": 0.2951,
"step": 26520
},
{
"epoch": 1.8403381747236072,
"grad_norm": 1.25,
"learning_rate": 3.8626086956521746e-07,
"loss": 0.2299,
"step": 26530
},
{
"epoch": 1.84103186646434,
"grad_norm": 1.40625,
"learning_rate": 3.8452173913043483e-07,
"loss": 0.233,
"step": 26540
},
{
"epoch": 1.8417255582050727,
"grad_norm": 1.234375,
"learning_rate": 3.8278260869565225e-07,
"loss": 0.2723,
"step": 26550
},
{
"epoch": 1.8424192499458054,
"grad_norm": 1.21875,
"learning_rate": 3.810434782608696e-07,
"loss": 0.2245,
"step": 26560
},
{
"epoch": 1.8431129416865382,
"grad_norm": 1.0,
"learning_rate": 3.7930434782608705e-07,
"loss": 0.2378,
"step": 26570
},
{
"epoch": 1.843806633427271,
"grad_norm": 1.2734375,
"learning_rate": 3.7756521739130437e-07,
"loss": 0.2427,
"step": 26580
},
{
"epoch": 1.8445003251680034,
"grad_norm": 1.0703125,
"learning_rate": 3.7582608695652174e-07,
"loss": 0.2357,
"step": 26590
},
{
"epoch": 1.8451940169087362,
"grad_norm": 1.359375,
"learning_rate": 3.7408695652173916e-07,
"loss": 0.3183,
"step": 26600
},
{
"epoch": 1.845887708649469,
"grad_norm": 1.03125,
"learning_rate": 3.7234782608695653e-07,
"loss": 0.3006,
"step": 26610
},
{
"epoch": 1.8465814003902015,
"grad_norm": 1.5546875,
"learning_rate": 3.7060869565217396e-07,
"loss": 0.2224,
"step": 26620
},
{
"epoch": 1.8472750921309342,
"grad_norm": 1.140625,
"learning_rate": 3.6886956521739133e-07,
"loss": 0.221,
"step": 26630
},
{
"epoch": 1.847968783871667,
"grad_norm": 1.109375,
"learning_rate": 3.671304347826087e-07,
"loss": 0.2246,
"step": 26640
},
{
"epoch": 1.8486624756123997,
"grad_norm": 1.53125,
"learning_rate": 3.653913043478261e-07,
"loss": 0.2503,
"step": 26650
},
{
"epoch": 1.8493561673531325,
"grad_norm": 1.34375,
"learning_rate": 3.636521739130435e-07,
"loss": 0.1998,
"step": 26660
},
{
"epoch": 1.8500498590938652,
"grad_norm": 1.171875,
"learning_rate": 3.619130434782609e-07,
"loss": 0.2254,
"step": 26670
},
{
"epoch": 1.850743550834598,
"grad_norm": 1.25,
"learning_rate": 3.601739130434783e-07,
"loss": 0.2482,
"step": 26680
},
{
"epoch": 1.8514372425753307,
"grad_norm": 1.0,
"learning_rate": 3.584347826086957e-07,
"loss": 0.2333,
"step": 26690
},
{
"epoch": 1.8521309343160635,
"grad_norm": 1.515625,
"learning_rate": 3.566956521739131e-07,
"loss": 0.2732,
"step": 26700
},
{
"epoch": 1.852824626056796,
"grad_norm": 1.15625,
"learning_rate": 3.549565217391305e-07,
"loss": 0.2439,
"step": 26710
},
{
"epoch": 1.8535183177975287,
"grad_norm": 1.25,
"learning_rate": 3.532173913043479e-07,
"loss": 0.196,
"step": 26720
},
{
"epoch": 1.8542120095382615,
"grad_norm": 1.625,
"learning_rate": 3.514782608695652e-07,
"loss": 0.2994,
"step": 26730
},
{
"epoch": 1.854905701278994,
"grad_norm": 1.2421875,
"learning_rate": 3.497391304347826e-07,
"loss": 0.2272,
"step": 26740
},
{
"epoch": 1.8555993930197268,
"grad_norm": 1.2734375,
"learning_rate": 3.48e-07,
"loss": 0.2331,
"step": 26750
},
{
"epoch": 1.8562930847604595,
"grad_norm": 1.4453125,
"learning_rate": 3.462608695652174e-07,
"loss": 0.2274,
"step": 26760
},
{
"epoch": 1.8569867765011923,
"grad_norm": 1.53125,
"learning_rate": 3.445217391304348e-07,
"loss": 0.2571,
"step": 26770
},
{
"epoch": 1.857680468241925,
"grad_norm": 1.453125,
"learning_rate": 3.427826086956522e-07,
"loss": 0.2547,
"step": 26780
},
{
"epoch": 1.8583741599826578,
"grad_norm": 1.140625,
"learning_rate": 3.410434782608696e-07,
"loss": 0.2549,
"step": 26790
},
{
"epoch": 1.8590678517233905,
"grad_norm": 1.140625,
"learning_rate": 3.39304347826087e-07,
"loss": 0.278,
"step": 26800
},
{
"epoch": 1.8597615434641233,
"grad_norm": 0.97265625,
"learning_rate": 3.375652173913044e-07,
"loss": 0.2091,
"step": 26810
},
{
"epoch": 1.860455235204856,
"grad_norm": 2.015625,
"learning_rate": 3.3582608695652175e-07,
"loss": 0.2894,
"step": 26820
},
{
"epoch": 1.8611489269455885,
"grad_norm": 0.85546875,
"learning_rate": 3.3408695652173917e-07,
"loss": 0.2035,
"step": 26830
},
{
"epoch": 1.8618426186863213,
"grad_norm": 1.65625,
"learning_rate": 3.3234782608695654e-07,
"loss": 0.2096,
"step": 26840
},
{
"epoch": 1.862536310427054,
"grad_norm": 1.296875,
"learning_rate": 3.3060869565217397e-07,
"loss": 0.2182,
"step": 26850
},
{
"epoch": 1.8632300021677866,
"grad_norm": 1.2109375,
"learning_rate": 3.2886956521739134e-07,
"loss": 0.2323,
"step": 26860
},
{
"epoch": 1.8639236939085193,
"grad_norm": 0.96875,
"learning_rate": 3.2713043478260876e-07,
"loss": 0.307,
"step": 26870
},
{
"epoch": 1.864617385649252,
"grad_norm": 1.125,
"learning_rate": 3.2539130434782614e-07,
"loss": 0.2165,
"step": 26880
},
{
"epoch": 1.8653110773899848,
"grad_norm": 1.1015625,
"learning_rate": 3.2365217391304345e-07,
"loss": 0.2663,
"step": 26890
},
{
"epoch": 1.8660047691307176,
"grad_norm": 0.9765625,
"learning_rate": 3.219130434782609e-07,
"loss": 0.2491,
"step": 26900
},
{
"epoch": 1.8666984608714503,
"grad_norm": 1.609375,
"learning_rate": 3.2017391304347825e-07,
"loss": 0.3144,
"step": 26910
},
{
"epoch": 1.867392152612183,
"grad_norm": 1.578125,
"learning_rate": 3.1843478260869567e-07,
"loss": 0.2495,
"step": 26920
},
{
"epoch": 1.8680858443529158,
"grad_norm": 1.28125,
"learning_rate": 3.1669565217391304e-07,
"loss": 0.3146,
"step": 26930
},
{
"epoch": 1.8687795360936483,
"grad_norm": 1.2734375,
"learning_rate": 3.1495652173913047e-07,
"loss": 0.2254,
"step": 26940
},
{
"epoch": 1.869473227834381,
"grad_norm": 1.2890625,
"learning_rate": 3.1321739130434784e-07,
"loss": 0.2286,
"step": 26950
},
{
"epoch": 1.8701669195751138,
"grad_norm": 1.328125,
"learning_rate": 3.1147826086956526e-07,
"loss": 0.238,
"step": 26960
},
{
"epoch": 1.8708606113158464,
"grad_norm": 1.1015625,
"learning_rate": 3.0973913043478263e-07,
"loss": 0.2336,
"step": 26970
},
{
"epoch": 1.871554303056579,
"grad_norm": 1.15625,
"learning_rate": 3.0800000000000006e-07,
"loss": 0.2426,
"step": 26980
},
{
"epoch": 1.8722479947973119,
"grad_norm": 1.0390625,
"learning_rate": 3.0626086956521743e-07,
"loss": 0.2485,
"step": 26990
},
{
"epoch": 1.8729416865380446,
"grad_norm": 1.0625,
"learning_rate": 3.045217391304348e-07,
"loss": 0.2637,
"step": 27000
},
{
"epoch": 1.8736353782787774,
"grad_norm": 1.0859375,
"learning_rate": 3.0278260869565217e-07,
"loss": 0.2391,
"step": 27010
},
{
"epoch": 1.87432907001951,
"grad_norm": 1.3359375,
"learning_rate": 3.010434782608696e-07,
"loss": 0.2819,
"step": 27020
},
{
"epoch": 1.8750227617602429,
"grad_norm": 1.21875,
"learning_rate": 2.9930434782608697e-07,
"loss": 0.238,
"step": 27030
},
{
"epoch": 1.8757164535009756,
"grad_norm": 1.3125,
"learning_rate": 2.975652173913044e-07,
"loss": 0.2429,
"step": 27040
},
{
"epoch": 1.8764101452417083,
"grad_norm": 1.53125,
"learning_rate": 2.9582608695652176e-07,
"loss": 0.2977,
"step": 27050
},
{
"epoch": 1.8771038369824409,
"grad_norm": 1.375,
"learning_rate": 2.940869565217392e-07,
"loss": 0.2399,
"step": 27060
},
{
"epoch": 1.8777975287231736,
"grad_norm": 1.5234375,
"learning_rate": 2.9234782608695656e-07,
"loss": 0.2749,
"step": 27070
},
{
"epoch": 1.8784912204639064,
"grad_norm": 1.3203125,
"learning_rate": 2.9060869565217393e-07,
"loss": 0.2985,
"step": 27080
},
{
"epoch": 1.879184912204639,
"grad_norm": 1.3046875,
"learning_rate": 2.888695652173913e-07,
"loss": 0.228,
"step": 27090
},
{
"epoch": 1.8798786039453717,
"grad_norm": 1.3828125,
"learning_rate": 2.871304347826087e-07,
"loss": 0.2449,
"step": 27100
},
{
"epoch": 1.8805722956861044,
"grad_norm": 1.171875,
"learning_rate": 2.853913043478261e-07,
"loss": 0.1995,
"step": 27110
},
{
"epoch": 1.8812659874268371,
"grad_norm": 0.9609375,
"learning_rate": 2.836521739130435e-07,
"loss": 0.2751,
"step": 27120
},
{
"epoch": 1.88195967916757,
"grad_norm": 1.234375,
"learning_rate": 2.819130434782609e-07,
"loss": 0.2217,
"step": 27130
},
{
"epoch": 1.8826533709083026,
"grad_norm": 1.359375,
"learning_rate": 2.801739130434783e-07,
"loss": 0.2758,
"step": 27140
},
{
"epoch": 1.8833470626490354,
"grad_norm": 1.296875,
"learning_rate": 2.784347826086957e-07,
"loss": 0.2817,
"step": 27150
},
{
"epoch": 1.8840407543897681,
"grad_norm": 1.703125,
"learning_rate": 2.7669565217391306e-07,
"loss": 0.2296,
"step": 27160
},
{
"epoch": 1.884734446130501,
"grad_norm": 1.3046875,
"learning_rate": 2.7495652173913043e-07,
"loss": 0.219,
"step": 27170
},
{
"epoch": 1.8854281378712334,
"grad_norm": 1.2421875,
"learning_rate": 2.7321739130434785e-07,
"loss": 0.2468,
"step": 27180
},
{
"epoch": 1.8861218296119662,
"grad_norm": 1.6953125,
"learning_rate": 2.714782608695652e-07,
"loss": 0.2361,
"step": 27190
},
{
"epoch": 1.886815521352699,
"grad_norm": 1.296875,
"learning_rate": 2.6973913043478265e-07,
"loss": 0.2341,
"step": 27200
},
{
"epoch": 1.8875092130934314,
"grad_norm": 1.4453125,
"learning_rate": 2.68e-07,
"loss": 0.258,
"step": 27210
},
{
"epoch": 1.8882029048341642,
"grad_norm": 1.390625,
"learning_rate": 2.6626086956521744e-07,
"loss": 0.2434,
"step": 27220
},
{
"epoch": 1.888896596574897,
"grad_norm": 1.1171875,
"learning_rate": 2.645217391304348e-07,
"loss": 0.2194,
"step": 27230
},
{
"epoch": 1.8895902883156297,
"grad_norm": 1.1328125,
"learning_rate": 2.627826086956522e-07,
"loss": 0.2651,
"step": 27240
},
{
"epoch": 1.8902839800563624,
"grad_norm": 1.25,
"learning_rate": 2.6104347826086955e-07,
"loss": 0.2527,
"step": 27250
},
{
"epoch": 1.8909776717970952,
"grad_norm": 1.21875,
"learning_rate": 2.59304347826087e-07,
"loss": 0.2297,
"step": 27260
},
{
"epoch": 1.891671363537828,
"grad_norm": 1.34375,
"learning_rate": 2.5756521739130435e-07,
"loss": 0.2108,
"step": 27270
},
{
"epoch": 1.8923650552785607,
"grad_norm": 1.0234375,
"learning_rate": 2.558260869565218e-07,
"loss": 0.2001,
"step": 27280
},
{
"epoch": 1.8930587470192934,
"grad_norm": 0.9921875,
"learning_rate": 2.5408695652173915e-07,
"loss": 0.2906,
"step": 27290
},
{
"epoch": 1.893752438760026,
"grad_norm": 1.6796875,
"learning_rate": 2.5234782608695657e-07,
"loss": 0.2958,
"step": 27300
},
{
"epoch": 1.8944461305007587,
"grad_norm": 1.140625,
"learning_rate": 2.5060869565217394e-07,
"loss": 0.32,
"step": 27310
},
{
"epoch": 1.8951398222414915,
"grad_norm": 1.25,
"learning_rate": 2.488695652173913e-07,
"loss": 0.2223,
"step": 27320
},
{
"epoch": 1.895833513982224,
"grad_norm": 1.421875,
"learning_rate": 2.4713043478260874e-07,
"loss": 0.323,
"step": 27330
},
{
"epoch": 1.8965272057229567,
"grad_norm": 1.34375,
"learning_rate": 2.453913043478261e-07,
"loss": 0.2378,
"step": 27340
},
{
"epoch": 1.8972208974636895,
"grad_norm": 1.2890625,
"learning_rate": 2.436521739130435e-07,
"loss": 0.26,
"step": 27350
},
{
"epoch": 1.8979145892044222,
"grad_norm": 1.2421875,
"learning_rate": 2.419130434782609e-07,
"loss": 0.2459,
"step": 27360
},
{
"epoch": 1.898608280945155,
"grad_norm": 1.125,
"learning_rate": 2.4017391304347827e-07,
"loss": 0.2385,
"step": 27370
},
{
"epoch": 1.8993019726858877,
"grad_norm": 1.390625,
"learning_rate": 2.384347826086957e-07,
"loss": 0.2089,
"step": 27380
},
{
"epoch": 1.8999956644266205,
"grad_norm": 0.99609375,
"learning_rate": 2.3669565217391304e-07,
"loss": 0.2246,
"step": 27390
},
{
"epoch": 1.9006893561673532,
"grad_norm": 1.3515625,
"learning_rate": 2.3495652173913044e-07,
"loss": 0.2281,
"step": 27400
},
{
"epoch": 1.901383047908086,
"grad_norm": 1.4765625,
"learning_rate": 2.3321739130434784e-07,
"loss": 0.2276,
"step": 27410
},
{
"epoch": 1.9020767396488185,
"grad_norm": 1.0234375,
"learning_rate": 2.3147826086956523e-07,
"loss": 0.3057,
"step": 27420
},
{
"epoch": 1.9027704313895513,
"grad_norm": 1.203125,
"learning_rate": 2.2973913043478263e-07,
"loss": 0.237,
"step": 27430
},
{
"epoch": 1.903464123130284,
"grad_norm": 1.3828125,
"learning_rate": 2.2800000000000003e-07,
"loss": 0.2445,
"step": 27440
},
{
"epoch": 1.9041578148710165,
"grad_norm": 1.171875,
"learning_rate": 2.2626086956521743e-07,
"loss": 0.2948,
"step": 27450
},
{
"epoch": 1.9048515066117493,
"grad_norm": 1.203125,
"learning_rate": 2.2452173913043483e-07,
"loss": 0.2938,
"step": 27460
},
{
"epoch": 1.905545198352482,
"grad_norm": 1.21875,
"learning_rate": 2.2278260869565217e-07,
"loss": 0.3153,
"step": 27470
},
{
"epoch": 1.9062388900932148,
"grad_norm": 1.234375,
"learning_rate": 2.2104347826086957e-07,
"loss": 0.2448,
"step": 27480
},
{
"epoch": 1.9069325818339475,
"grad_norm": 1.046875,
"learning_rate": 2.1930434782608696e-07,
"loss": 0.2489,
"step": 27490
},
{
"epoch": 1.9076262735746803,
"grad_norm": 0.95703125,
"learning_rate": 2.1756521739130436e-07,
"loss": 0.2393,
"step": 27500
},
{
"epoch": 1.908319965315413,
"grad_norm": 0.90234375,
"learning_rate": 2.1582608695652176e-07,
"loss": 0.2245,
"step": 27510
},
{
"epoch": 1.9090136570561458,
"grad_norm": 1.3359375,
"learning_rate": 2.1408695652173916e-07,
"loss": 0.2445,
"step": 27520
},
{
"epoch": 1.9097073487968785,
"grad_norm": 1.078125,
"learning_rate": 2.1234782608695656e-07,
"loss": 0.2321,
"step": 27530
},
{
"epoch": 1.910401040537611,
"grad_norm": 1.03125,
"learning_rate": 2.1060869565217393e-07,
"loss": 0.2311,
"step": 27540
},
{
"epoch": 1.9110947322783438,
"grad_norm": 1.3359375,
"learning_rate": 2.088695652173913e-07,
"loss": 0.2688,
"step": 27550
},
{
"epoch": 1.9117884240190766,
"grad_norm": 1.1875,
"learning_rate": 2.071304347826087e-07,
"loss": 0.2381,
"step": 27560
},
{
"epoch": 1.912482115759809,
"grad_norm": 1.09375,
"learning_rate": 2.053913043478261e-07,
"loss": 0.2505,
"step": 27570
},
{
"epoch": 1.9131758075005418,
"grad_norm": 1.1796875,
"learning_rate": 2.036521739130435e-07,
"loss": 0.2384,
"step": 27580
},
{
"epoch": 1.9138694992412746,
"grad_norm": 1.3359375,
"learning_rate": 2.019130434782609e-07,
"loss": 0.2604,
"step": 27590
},
{
"epoch": 1.9145631909820073,
"grad_norm": 1.2734375,
"learning_rate": 2.0017391304347829e-07,
"loss": 0.2229,
"step": 27600
},
{
"epoch": 1.91525688272274,
"grad_norm": 1.7421875,
"learning_rate": 1.9843478260869568e-07,
"loss": 0.3559,
"step": 27610
},
{
"epoch": 1.9159505744634728,
"grad_norm": 1.5390625,
"learning_rate": 1.9669565217391305e-07,
"loss": 0.2457,
"step": 27620
},
{
"epoch": 1.9166442662042056,
"grad_norm": 1.21875,
"learning_rate": 1.9495652173913045e-07,
"loss": 0.2954,
"step": 27630
},
{
"epoch": 1.9173379579449383,
"grad_norm": 1.1640625,
"learning_rate": 1.9321739130434782e-07,
"loss": 0.3314,
"step": 27640
},
{
"epoch": 1.918031649685671,
"grad_norm": 1.0546875,
"learning_rate": 1.9147826086956522e-07,
"loss": 0.2632,
"step": 27650
},
{
"epoch": 1.9187253414264036,
"grad_norm": 1.3046875,
"learning_rate": 1.8973913043478262e-07,
"loss": 0.2361,
"step": 27660
},
{
"epoch": 1.9194190331671364,
"grad_norm": 1.34375,
"learning_rate": 1.8800000000000002e-07,
"loss": 0.2164,
"step": 27670
},
{
"epoch": 1.920112724907869,
"grad_norm": 1.265625,
"learning_rate": 1.8626086956521741e-07,
"loss": 0.2366,
"step": 27680
},
{
"epoch": 1.9208064166486016,
"grad_norm": 1.421875,
"learning_rate": 1.845217391304348e-07,
"loss": 0.2236,
"step": 27690
},
{
"epoch": 1.9215001083893344,
"grad_norm": 1.0078125,
"learning_rate": 1.8278260869565218e-07,
"loss": 0.2692,
"step": 27700
},
{
"epoch": 1.9221938001300671,
"grad_norm": 1.96875,
"learning_rate": 1.8104347826086958e-07,
"loss": 0.3485,
"step": 27710
},
{
"epoch": 1.9228874918707999,
"grad_norm": 1.171875,
"learning_rate": 1.7930434782608698e-07,
"loss": 0.3303,
"step": 27720
},
{
"epoch": 1.9235811836115326,
"grad_norm": 1.3828125,
"learning_rate": 1.7756521739130437e-07,
"loss": 0.2459,
"step": 27730
},
{
"epoch": 1.9242748753522654,
"grad_norm": 1.609375,
"learning_rate": 1.7582608695652175e-07,
"loss": 0.2606,
"step": 27740
},
{
"epoch": 1.9249685670929981,
"grad_norm": 1.3359375,
"learning_rate": 1.7408695652173914e-07,
"loss": 0.2583,
"step": 27750
},
{
"epoch": 1.9256622588337309,
"grad_norm": 1.015625,
"learning_rate": 1.7234782608695654e-07,
"loss": 0.2222,
"step": 27760
},
{
"epoch": 1.9263559505744636,
"grad_norm": 1.1171875,
"learning_rate": 1.706086956521739e-07,
"loss": 0.2098,
"step": 27770
},
{
"epoch": 1.9270496423151962,
"grad_norm": 1.1796875,
"learning_rate": 1.688695652173913e-07,
"loss": 0.2314,
"step": 27780
},
{
"epoch": 1.927743334055929,
"grad_norm": 1.265625,
"learning_rate": 1.671304347826087e-07,
"loss": 0.2344,
"step": 27790
},
{
"epoch": 1.9284370257966617,
"grad_norm": 1.234375,
"learning_rate": 1.653913043478261e-07,
"loss": 0.2579,
"step": 27800
},
{
"epoch": 1.9291307175373942,
"grad_norm": 1.078125,
"learning_rate": 1.636521739130435e-07,
"loss": 0.2024,
"step": 27810
},
{
"epoch": 1.929824409278127,
"grad_norm": 1.2890625,
"learning_rate": 1.619130434782609e-07,
"loss": 0.2661,
"step": 27820
},
{
"epoch": 1.9305181010188597,
"grad_norm": 1.0859375,
"learning_rate": 1.6017391304347827e-07,
"loss": 0.2161,
"step": 27830
},
{
"epoch": 1.9312117927595924,
"grad_norm": 1.1171875,
"learning_rate": 1.5843478260869567e-07,
"loss": 0.2477,
"step": 27840
},
{
"epoch": 1.9319054845003252,
"grad_norm": 1.140625,
"learning_rate": 1.5669565217391304e-07,
"loss": 0.2482,
"step": 27850
},
{
"epoch": 1.932599176241058,
"grad_norm": 1.7265625,
"learning_rate": 1.5495652173913046e-07,
"loss": 0.2296,
"step": 27860
},
{
"epoch": 1.9332928679817907,
"grad_norm": 1.09375,
"learning_rate": 1.5321739130434784e-07,
"loss": 0.2083,
"step": 27870
},
{
"epoch": 1.9339865597225234,
"grad_norm": 1.2109375,
"learning_rate": 1.5147826086956523e-07,
"loss": 0.2222,
"step": 27880
},
{
"epoch": 1.9346802514632562,
"grad_norm": 0.9453125,
"learning_rate": 1.4973913043478263e-07,
"loss": 0.2657,
"step": 27890
},
{
"epoch": 1.9353739432039887,
"grad_norm": 1.0078125,
"learning_rate": 1.4800000000000003e-07,
"loss": 0.2672,
"step": 27900
},
{
"epoch": 1.9360676349447214,
"grad_norm": 0.9609375,
"learning_rate": 1.462608695652174e-07,
"loss": 0.2201,
"step": 27910
},
{
"epoch": 1.9367613266854542,
"grad_norm": 1.078125,
"learning_rate": 1.445217391304348e-07,
"loss": 0.2807,
"step": 27920
},
{
"epoch": 1.9374550184261867,
"grad_norm": 1.4375,
"learning_rate": 1.427826086956522e-07,
"loss": 0.2759,
"step": 27930
},
{
"epoch": 1.9381487101669195,
"grad_norm": 1.15625,
"learning_rate": 1.410434782608696e-07,
"loss": 0.2086,
"step": 27940
},
{
"epoch": 1.9388424019076522,
"grad_norm": 1.3984375,
"learning_rate": 1.3930434782608696e-07,
"loss": 0.2209,
"step": 27950
},
{
"epoch": 1.939536093648385,
"grad_norm": 2.015625,
"learning_rate": 1.3756521739130436e-07,
"loss": 0.2617,
"step": 27960
},
{
"epoch": 1.9402297853891177,
"grad_norm": 1.3046875,
"learning_rate": 1.3582608695652176e-07,
"loss": 0.25,
"step": 27970
},
{
"epoch": 1.9409234771298505,
"grad_norm": 1.2109375,
"learning_rate": 1.3408695652173916e-07,
"loss": 0.1882,
"step": 27980
},
{
"epoch": 1.9416171688705832,
"grad_norm": 1.078125,
"learning_rate": 1.3234782608695653e-07,
"loss": 0.279,
"step": 27990
},
{
"epoch": 1.942310860611316,
"grad_norm": 1.25,
"learning_rate": 1.3060869565217392e-07,
"loss": 0.2479,
"step": 28000
},
{
"epoch": 1.9430045523520487,
"grad_norm": 1.140625,
"learning_rate": 1.2886956521739132e-07,
"loss": 0.2482,
"step": 28010
},
{
"epoch": 1.9436982440927812,
"grad_norm": 1.1328125,
"learning_rate": 1.2713043478260872e-07,
"loss": 0.2375,
"step": 28020
},
{
"epoch": 1.944391935833514,
"grad_norm": 1.0390625,
"learning_rate": 1.253913043478261e-07,
"loss": 0.215,
"step": 28030
},
{
"epoch": 1.9450856275742467,
"grad_norm": 1.3125,
"learning_rate": 1.236521739130435e-07,
"loss": 0.2363,
"step": 28040
},
{
"epoch": 1.9457793193149793,
"grad_norm": 0.93359375,
"learning_rate": 1.2191304347826089e-07,
"loss": 0.3115,
"step": 28050
},
{
"epoch": 1.946473011055712,
"grad_norm": 1.1328125,
"learning_rate": 1.2017391304347826e-07,
"loss": 0.2421,
"step": 28060
},
{
"epoch": 1.9471667027964448,
"grad_norm": 1.1328125,
"learning_rate": 1.1843478260869566e-07,
"loss": 0.204,
"step": 28070
},
{
"epoch": 1.9478603945371775,
"grad_norm": 1.0859375,
"learning_rate": 1.1669565217391305e-07,
"loss": 0.2407,
"step": 28080
},
{
"epoch": 1.9485540862779103,
"grad_norm": 1.3828125,
"learning_rate": 1.1495652173913045e-07,
"loss": 0.2806,
"step": 28090
},
{
"epoch": 1.949247778018643,
"grad_norm": 1.0703125,
"learning_rate": 1.1321739130434782e-07,
"loss": 0.205,
"step": 28100
},
{
"epoch": 1.9499414697593758,
"grad_norm": 1.4609375,
"learning_rate": 1.1147826086956522e-07,
"loss": 0.29,
"step": 28110
},
{
"epoch": 1.9506351615001085,
"grad_norm": 1.1484375,
"learning_rate": 1.0973913043478262e-07,
"loss": 0.256,
"step": 28120
},
{
"epoch": 1.9513288532408413,
"grad_norm": 1.328125,
"learning_rate": 1.0800000000000001e-07,
"loss": 0.2424,
"step": 28130
},
{
"epoch": 1.9520225449815738,
"grad_norm": 1.15625,
"learning_rate": 1.062608695652174e-07,
"loss": 0.2434,
"step": 28140
},
{
"epoch": 1.9527162367223065,
"grad_norm": 1.34375,
"learning_rate": 1.0452173913043478e-07,
"loss": 0.273,
"step": 28150
},
{
"epoch": 1.9534099284630393,
"grad_norm": 1.265625,
"learning_rate": 1.0278260869565218e-07,
"loss": 0.2397,
"step": 28160
},
{
"epoch": 1.9541036202037718,
"grad_norm": 1.28125,
"learning_rate": 1.0104347826086958e-07,
"loss": 0.2082,
"step": 28170
},
{
"epoch": 1.9547973119445046,
"grad_norm": 1.140625,
"learning_rate": 9.930434782608696e-08,
"loss": 0.2713,
"step": 28180
},
{
"epoch": 1.9554910036852373,
"grad_norm": 1.25,
"learning_rate": 9.756521739130436e-08,
"loss": 0.2822,
"step": 28190
},
{
"epoch": 1.95618469542597,
"grad_norm": 1.296875,
"learning_rate": 9.582608695652174e-08,
"loss": 0.2366,
"step": 28200
},
{
"epoch": 1.9568783871667028,
"grad_norm": 1.734375,
"learning_rate": 9.408695652173914e-08,
"loss": 0.2482,
"step": 28210
},
{
"epoch": 1.9575720789074356,
"grad_norm": 1.1171875,
"learning_rate": 9.234782608695653e-08,
"loss": 0.2706,
"step": 28220
},
{
"epoch": 1.9582657706481683,
"grad_norm": 1.203125,
"learning_rate": 9.060869565217392e-08,
"loss": 0.2504,
"step": 28230
},
{
"epoch": 1.958959462388901,
"grad_norm": 0.8984375,
"learning_rate": 8.886956521739131e-08,
"loss": 0.3229,
"step": 28240
},
{
"epoch": 1.9596531541296336,
"grad_norm": 1.09375,
"learning_rate": 8.71304347826087e-08,
"loss": 0.2716,
"step": 28250
},
{
"epoch": 1.9603468458703663,
"grad_norm": 1.6640625,
"learning_rate": 8.539130434782609e-08,
"loss": 0.2757,
"step": 28260
},
{
"epoch": 1.961040537611099,
"grad_norm": 1.21875,
"learning_rate": 8.365217391304349e-08,
"loss": 0.2666,
"step": 28270
},
{
"epoch": 1.9617342293518316,
"grad_norm": 1.203125,
"learning_rate": 8.191304347826089e-08,
"loss": 0.2189,
"step": 28280
},
{
"epoch": 1.9624279210925644,
"grad_norm": 1.3828125,
"learning_rate": 8.017391304347827e-08,
"loss": 0.2109,
"step": 28290
},
{
"epoch": 1.9631216128332971,
"grad_norm": 1.1640625,
"learning_rate": 7.843478260869565e-08,
"loss": 0.2154,
"step": 28300
},
{
"epoch": 1.9638153045740299,
"grad_norm": 1.046875,
"learning_rate": 7.669565217391305e-08,
"loss": 0.2053,
"step": 28310
},
{
"epoch": 1.9645089963147626,
"grad_norm": 1.171875,
"learning_rate": 7.495652173913045e-08,
"loss": 0.3179,
"step": 28320
},
{
"epoch": 1.9652026880554954,
"grad_norm": 1.234375,
"learning_rate": 7.321739130434783e-08,
"loss": 0.2356,
"step": 28330
},
{
"epoch": 1.965896379796228,
"grad_norm": 1.3203125,
"learning_rate": 7.147826086956522e-08,
"loss": 0.223,
"step": 28340
},
{
"epoch": 1.9665900715369609,
"grad_norm": 1.2109375,
"learning_rate": 6.973913043478262e-08,
"loss": 0.27,
"step": 28350
},
{
"epoch": 1.9672837632776936,
"grad_norm": 1.0859375,
"learning_rate": 6.8e-08,
"loss": 0.2565,
"step": 28360
},
{
"epoch": 1.9679774550184261,
"grad_norm": 1.015625,
"learning_rate": 6.62608695652174e-08,
"loss": 0.2209,
"step": 28370
},
{
"epoch": 1.9686711467591589,
"grad_norm": 1.1484375,
"learning_rate": 6.452173913043478e-08,
"loss": 0.2149,
"step": 28380
},
{
"epoch": 1.9693648384998916,
"grad_norm": 1.3046875,
"learning_rate": 6.278260869565218e-08,
"loss": 0.2276,
"step": 28390
},
{
"epoch": 1.9700585302406242,
"grad_norm": 1.5,
"learning_rate": 6.104347826086956e-08,
"loss": 0.2533,
"step": 28400
},
{
"epoch": 1.970752221981357,
"grad_norm": 1.296875,
"learning_rate": 5.930434782608696e-08,
"loss": 0.2304,
"step": 28410
},
{
"epoch": 1.9714459137220897,
"grad_norm": 0.97265625,
"learning_rate": 5.756521739130435e-08,
"loss": 0.2486,
"step": 28420
},
{
"epoch": 1.9721396054628224,
"grad_norm": 1.0,
"learning_rate": 5.5826086956521744e-08,
"loss": 0.234,
"step": 28430
},
{
"epoch": 1.9728332972035552,
"grad_norm": 1.421875,
"learning_rate": 5.4086956521739135e-08,
"loss": 0.2633,
"step": 28440
},
{
"epoch": 1.973526988944288,
"grad_norm": 1.046875,
"learning_rate": 5.2347826086956526e-08,
"loss": 0.2266,
"step": 28450
},
{
"epoch": 1.9742206806850207,
"grad_norm": 1.3359375,
"learning_rate": 5.0608695652173917e-08,
"loss": 0.2356,
"step": 28460
},
{
"epoch": 1.9749143724257534,
"grad_norm": 1.0234375,
"learning_rate": 4.886956521739131e-08,
"loss": 0.2128,
"step": 28470
},
{
"epoch": 1.9756080641664862,
"grad_norm": 1.09375,
"learning_rate": 4.71304347826087e-08,
"loss": 0.2174,
"step": 28480
},
{
"epoch": 1.9763017559072187,
"grad_norm": 1.3046875,
"learning_rate": 4.5391304347826096e-08,
"loss": 0.2517,
"step": 28490
},
{
"epoch": 1.9769954476479514,
"grad_norm": 1.25,
"learning_rate": 4.365217391304348e-08,
"loss": 0.227,
"step": 28500
}
],
"logging_steps": 10,
"max_steps": 28750,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 6.3842958733451e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}