NanoTranslator-M2 / trainer_state.json
Mxode's picture
Upload trainer_state.json
7baa3dc verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 19351,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.000516769159216578,
"grad_norm": 2.2854151725769043,
"learning_rate": 2.58384579608289e-05,
"loss": 8.3208,
"step": 10
},
{
"epoch": 0.001033538318433156,
"grad_norm": 1.7825466394424438,
"learning_rate": 5.16769159216578e-05,
"loss": 7.8091,
"step": 20
},
{
"epoch": 0.001550307477649734,
"grad_norm": 1.5718376636505127,
"learning_rate": 7.751537388248669e-05,
"loss": 7.23,
"step": 30
},
{
"epoch": 0.002067076636866312,
"grad_norm": 0.9627140164375305,
"learning_rate": 0.0001033538318433156,
"loss": 6.6397,
"step": 40
},
{
"epoch": 0.0025838457960828897,
"grad_norm": 0.7302472591400146,
"learning_rate": 0.0001291922898041445,
"loss": 6.2658,
"step": 50
},
{
"epoch": 0.003100614955299468,
"grad_norm": 0.6241187453269958,
"learning_rate": 0.00015503074776497338,
"loss": 5.9946,
"step": 60
},
{
"epoch": 0.0036173841145160455,
"grad_norm": 1.97952139377594,
"learning_rate": 0.0001808692057258023,
"loss": 5.7746,
"step": 70
},
{
"epoch": 0.004134153273732624,
"grad_norm": 0.8876756429672241,
"learning_rate": 0.0002067076636866312,
"loss": 5.6108,
"step": 80
},
{
"epoch": 0.004650922432949202,
"grad_norm": 0.9505946040153503,
"learning_rate": 0.00023254612164746007,
"loss": 5.4343,
"step": 90
},
{
"epoch": 0.005167691592165779,
"grad_norm": 0.8270168304443359,
"learning_rate": 0.000258384579608289,
"loss": 5.1914,
"step": 100
},
{
"epoch": 0.005684460751382358,
"grad_norm": 1.4167860746383667,
"learning_rate": 0.0002842230375691179,
"loss": 4.9526,
"step": 110
},
{
"epoch": 0.006201229910598936,
"grad_norm": 0.8067450523376465,
"learning_rate": 0.00031006149552994676,
"loss": 4.7455,
"step": 120
},
{
"epoch": 0.006717999069815513,
"grad_norm": 1.0726768970489502,
"learning_rate": 0.0003358999534907757,
"loss": 4.5333,
"step": 130
},
{
"epoch": 0.007234768229032091,
"grad_norm": 0.9126196503639221,
"learning_rate": 0.0003617384114516046,
"loss": 4.3803,
"step": 140
},
{
"epoch": 0.0077515373882486695,
"grad_norm": 1.0933046340942383,
"learning_rate": 0.0003875768694124335,
"loss": 4.2341,
"step": 150
},
{
"epoch": 0.008268306547465248,
"grad_norm": 1.2751045227050781,
"learning_rate": 0.0004134153273732624,
"loss": 4.1177,
"step": 160
},
{
"epoch": 0.008785075706681826,
"grad_norm": 1.0147113800048828,
"learning_rate": 0.0004392537853340913,
"loss": 4.006,
"step": 170
},
{
"epoch": 0.009301844865898403,
"grad_norm": 1.2339316606521606,
"learning_rate": 0.00046509224329492014,
"loss": 3.9218,
"step": 180
},
{
"epoch": 0.009818614025114981,
"grad_norm": 1.2789371013641357,
"learning_rate": 0.0004909307012557491,
"loss": 3.8193,
"step": 190
},
{
"epoch": 0.010335383184331559,
"grad_norm": 1.337181568145752,
"learning_rate": 0.0005,
"loss": 3.729,
"step": 200
},
{
"epoch": 0.010852152343548136,
"grad_norm": 1.29481840133667,
"learning_rate": 0.0005,
"loss": 3.6638,
"step": 210
},
{
"epoch": 0.011368921502764716,
"grad_norm": 1.2725201845169067,
"learning_rate": 0.0005,
"loss": 3.6127,
"step": 220
},
{
"epoch": 0.011885690661981294,
"grad_norm": 1.4627310037612915,
"learning_rate": 0.0005,
"loss": 3.5382,
"step": 230
},
{
"epoch": 0.012402459821197871,
"grad_norm": 1.0869964361190796,
"learning_rate": 0.0005,
"loss": 3.5016,
"step": 240
},
{
"epoch": 0.012919228980414449,
"grad_norm": 1.142592191696167,
"learning_rate": 0.0005,
"loss": 3.4392,
"step": 250
},
{
"epoch": 0.013435998139631027,
"grad_norm": 1.0130606889724731,
"learning_rate": 0.0005,
"loss": 3.3923,
"step": 260
},
{
"epoch": 0.013952767298847604,
"grad_norm": 1.2244436740875244,
"learning_rate": 0.0005,
"loss": 3.3526,
"step": 270
},
{
"epoch": 0.014469536458064182,
"grad_norm": 1.164255976676941,
"learning_rate": 0.0005,
"loss": 3.2819,
"step": 280
},
{
"epoch": 0.014986305617280761,
"grad_norm": 0.9863433241844177,
"learning_rate": 0.0005,
"loss": 3.2612,
"step": 290
},
{
"epoch": 0.015503074776497339,
"grad_norm": 1.1939715147018433,
"learning_rate": 0.0005,
"loss": 3.2136,
"step": 300
},
{
"epoch": 0.016019843935713915,
"grad_norm": 1.0233927965164185,
"learning_rate": 0.0005,
"loss": 3.1914,
"step": 310
},
{
"epoch": 0.016536613094930496,
"grad_norm": 1.3167331218719482,
"learning_rate": 0.0005,
"loss": 3.1614,
"step": 320
},
{
"epoch": 0.017053382254147074,
"grad_norm": 1.10019850730896,
"learning_rate": 0.0005,
"loss": 3.1133,
"step": 330
},
{
"epoch": 0.01757015141336365,
"grad_norm": 1.267260193824768,
"learning_rate": 0.0005,
"loss": 3.0851,
"step": 340
},
{
"epoch": 0.01808692057258023,
"grad_norm": 1.134069800376892,
"learning_rate": 0.0005,
"loss": 3.0513,
"step": 350
},
{
"epoch": 0.018603689731796807,
"grad_norm": 1.3286008834838867,
"learning_rate": 0.0005,
"loss": 3.0269,
"step": 360
},
{
"epoch": 0.019120458891013385,
"grad_norm": 1.3457223176956177,
"learning_rate": 0.0005,
"loss": 2.9893,
"step": 370
},
{
"epoch": 0.019637228050229962,
"grad_norm": 1.254515290260315,
"learning_rate": 0.0005,
"loss": 2.9698,
"step": 380
},
{
"epoch": 0.02015399720944654,
"grad_norm": 1.1265138387680054,
"learning_rate": 0.0005,
"loss": 2.9409,
"step": 390
},
{
"epoch": 0.020670766368663118,
"grad_norm": 1.1716142892837524,
"learning_rate": 0.0005,
"loss": 2.9289,
"step": 400
},
{
"epoch": 0.021187535527879695,
"grad_norm": 1.7391645908355713,
"learning_rate": 0.0005,
"loss": 2.9168,
"step": 410
},
{
"epoch": 0.021704304687096273,
"grad_norm": 1.2796969413757324,
"learning_rate": 0.0005,
"loss": 2.8871,
"step": 420
},
{
"epoch": 0.02222107384631285,
"grad_norm": 1.192845106124878,
"learning_rate": 0.0005,
"loss": 2.85,
"step": 430
},
{
"epoch": 0.02273784300552943,
"grad_norm": 1.2399319410324097,
"learning_rate": 0.0005,
"loss": 2.8469,
"step": 440
},
{
"epoch": 0.02325461216474601,
"grad_norm": 2.0635640621185303,
"learning_rate": 0.0005,
"loss": 2.8174,
"step": 450
},
{
"epoch": 0.023771381323962587,
"grad_norm": 1.2288042306900024,
"learning_rate": 0.0005,
"loss": 2.8065,
"step": 460
},
{
"epoch": 0.024288150483179165,
"grad_norm": 1.1476280689239502,
"learning_rate": 0.0005,
"loss": 2.7702,
"step": 470
},
{
"epoch": 0.024804919642395742,
"grad_norm": 1.0889836549758911,
"learning_rate": 0.0005,
"loss": 2.7561,
"step": 480
},
{
"epoch": 0.02532168880161232,
"grad_norm": 1.2561166286468506,
"learning_rate": 0.0005,
"loss": 2.7407,
"step": 490
},
{
"epoch": 0.025838457960828898,
"grad_norm": 1.105919599533081,
"learning_rate": 0.0005,
"loss": 2.7356,
"step": 500
},
{
"epoch": 0.026355227120045475,
"grad_norm": 1.2789875268936157,
"learning_rate": 0.0005,
"loss": 2.6941,
"step": 510
},
{
"epoch": 0.026871996279262053,
"grad_norm": 1.2486238479614258,
"learning_rate": 0.0005,
"loss": 2.7009,
"step": 520
},
{
"epoch": 0.02738876543847863,
"grad_norm": 1.3023815155029297,
"learning_rate": 0.0005,
"loss": 2.6761,
"step": 530
},
{
"epoch": 0.02790553459769521,
"grad_norm": 1.3703725337982178,
"learning_rate": 0.0005,
"loss": 2.659,
"step": 540
},
{
"epoch": 0.028422303756911786,
"grad_norm": 1.1285632848739624,
"learning_rate": 0.0005,
"loss": 2.6444,
"step": 550
},
{
"epoch": 0.028939072916128364,
"grad_norm": 1.6290286779403687,
"learning_rate": 0.0005,
"loss": 2.607,
"step": 560
},
{
"epoch": 0.029455842075344945,
"grad_norm": 1.2040055990219116,
"learning_rate": 0.0005,
"loss": 2.6061,
"step": 570
},
{
"epoch": 0.029972611234561523,
"grad_norm": 1.13448166847229,
"learning_rate": 0.0005,
"loss": 2.5709,
"step": 580
},
{
"epoch": 0.0304893803937781,
"grad_norm": 1.9924914836883545,
"learning_rate": 0.0005,
"loss": 2.5914,
"step": 590
},
{
"epoch": 0.031006149552994678,
"grad_norm": 1.6680738925933838,
"learning_rate": 0.0005,
"loss": 2.5523,
"step": 600
},
{
"epoch": 0.03152291871221125,
"grad_norm": 1.5603681802749634,
"learning_rate": 0.0005,
"loss": 2.5157,
"step": 610
},
{
"epoch": 0.03203968787142783,
"grad_norm": 1.2942813634872437,
"learning_rate": 0.0005,
"loss": 2.4997,
"step": 620
},
{
"epoch": 0.032556457030644415,
"grad_norm": 1.4313350915908813,
"learning_rate": 0.0005,
"loss": 2.4648,
"step": 630
},
{
"epoch": 0.03307322618986099,
"grad_norm": 1.41900634765625,
"learning_rate": 0.0005,
"loss": 2.4899,
"step": 640
},
{
"epoch": 0.03358999534907757,
"grad_norm": 2.0101678371429443,
"learning_rate": 0.0005,
"loss": 2.4705,
"step": 650
},
{
"epoch": 0.03410676450829415,
"grad_norm": 1.567264437675476,
"learning_rate": 0.0005,
"loss": 2.4655,
"step": 660
},
{
"epoch": 0.034623533667510725,
"grad_norm": 1.504845142364502,
"learning_rate": 0.0005,
"loss": 2.4357,
"step": 670
},
{
"epoch": 0.0351403028267273,
"grad_norm": 1.3784253597259521,
"learning_rate": 0.0005,
"loss": 2.4484,
"step": 680
},
{
"epoch": 0.03565707198594388,
"grad_norm": 1.2612375020980835,
"learning_rate": 0.0005,
"loss": 2.4108,
"step": 690
},
{
"epoch": 0.03617384114516046,
"grad_norm": 1.2809542417526245,
"learning_rate": 0.0005,
"loss": 2.429,
"step": 700
},
{
"epoch": 0.036690610304377036,
"grad_norm": 1.282116174697876,
"learning_rate": 0.0005,
"loss": 2.4136,
"step": 710
},
{
"epoch": 0.037207379463593614,
"grad_norm": 2.6279282569885254,
"learning_rate": 0.0005,
"loss": 2.4043,
"step": 720
},
{
"epoch": 0.03772414862281019,
"grad_norm": 1.158665657043457,
"learning_rate": 0.0005,
"loss": 2.3666,
"step": 730
},
{
"epoch": 0.03824091778202677,
"grad_norm": 1.0581611394882202,
"learning_rate": 0.0005,
"loss": 2.3588,
"step": 740
},
{
"epoch": 0.03875768694124335,
"grad_norm": 1.225664496421814,
"learning_rate": 0.0005,
"loss": 2.3499,
"step": 750
},
{
"epoch": 0.039274456100459924,
"grad_norm": 1.9708060026168823,
"learning_rate": 0.0005,
"loss": 2.3462,
"step": 760
},
{
"epoch": 0.0397912252596765,
"grad_norm": 1.856858730316162,
"learning_rate": 0.0005,
"loss": 2.3443,
"step": 770
},
{
"epoch": 0.04030799441889308,
"grad_norm": 1.4437602758407593,
"learning_rate": 0.0005,
"loss": 2.289,
"step": 780
},
{
"epoch": 0.04082476357810966,
"grad_norm": 1.289876937866211,
"learning_rate": 0.0005,
"loss": 2.2881,
"step": 790
},
{
"epoch": 0.041341532737326235,
"grad_norm": 1.2440109252929688,
"learning_rate": 0.0005,
"loss": 2.2639,
"step": 800
},
{
"epoch": 0.04185830189654281,
"grad_norm": 1.2062422037124634,
"learning_rate": 0.0005,
"loss": 2.263,
"step": 810
},
{
"epoch": 0.04237507105575939,
"grad_norm": 1.171238660812378,
"learning_rate": 0.0005,
"loss": 2.259,
"step": 820
},
{
"epoch": 0.04289184021497597,
"grad_norm": 1.1757316589355469,
"learning_rate": 0.0005,
"loss": 2.2527,
"step": 830
},
{
"epoch": 0.043408609374192546,
"grad_norm": 1.3125736713409424,
"learning_rate": 0.0005,
"loss": 2.2429,
"step": 840
},
{
"epoch": 0.04392537853340912,
"grad_norm": 1.2145166397094727,
"learning_rate": 0.0005,
"loss": 2.2615,
"step": 850
},
{
"epoch": 0.0444421476926257,
"grad_norm": 1.3434226512908936,
"learning_rate": 0.0005,
"loss": 2.2398,
"step": 860
},
{
"epoch": 0.04495891685184228,
"grad_norm": 1.5729234218597412,
"learning_rate": 0.0005,
"loss": 2.2263,
"step": 870
},
{
"epoch": 0.04547568601105886,
"grad_norm": 1.4341133832931519,
"learning_rate": 0.0005,
"loss": 2.193,
"step": 880
},
{
"epoch": 0.04599245517027544,
"grad_norm": 1.1859130859375,
"learning_rate": 0.0005,
"loss": 2.1928,
"step": 890
},
{
"epoch": 0.04650922432949202,
"grad_norm": 1.8495599031448364,
"learning_rate": 0.0005,
"loss": 2.182,
"step": 900
},
{
"epoch": 0.047025993488708596,
"grad_norm": 1.416601538658142,
"learning_rate": 0.0005,
"loss": 2.1748,
"step": 910
},
{
"epoch": 0.047542762647925174,
"grad_norm": 1.2966763973236084,
"learning_rate": 0.0005,
"loss": 2.1481,
"step": 920
},
{
"epoch": 0.04805953180714175,
"grad_norm": 1.499526858329773,
"learning_rate": 0.0005,
"loss": 2.1442,
"step": 930
},
{
"epoch": 0.04857630096635833,
"grad_norm": 1.2409976720809937,
"learning_rate": 0.0005,
"loss": 2.1788,
"step": 940
},
{
"epoch": 0.04909307012557491,
"grad_norm": 1.2645729780197144,
"learning_rate": 0.0005,
"loss": 2.1577,
"step": 950
},
{
"epoch": 0.049609839284791485,
"grad_norm": 1.297904372215271,
"learning_rate": 0.0005,
"loss": 2.1412,
"step": 960
},
{
"epoch": 0.05012660844400806,
"grad_norm": 1.3194257020950317,
"learning_rate": 0.0005,
"loss": 2.1242,
"step": 970
},
{
"epoch": 0.05064337760322464,
"grad_norm": 1.4184504747390747,
"learning_rate": 0.0005,
"loss": 2.1414,
"step": 980
},
{
"epoch": 0.05116014676244122,
"grad_norm": 2.1643896102905273,
"learning_rate": 0.0005,
"loss": 2.12,
"step": 990
},
{
"epoch": 0.051676915921657796,
"grad_norm": 2.3233911991119385,
"learning_rate": 0.0005,
"loss": 2.0972,
"step": 1000
},
{
"epoch": 0.05219368508087437,
"grad_norm": 2.2393977642059326,
"learning_rate": 0.0005,
"loss": 2.0906,
"step": 1010
},
{
"epoch": 0.05271045424009095,
"grad_norm": 1.298572063446045,
"learning_rate": 0.0005,
"loss": 2.0664,
"step": 1020
},
{
"epoch": 0.05322722339930753,
"grad_norm": 1.8283194303512573,
"learning_rate": 0.0005,
"loss": 2.0726,
"step": 1030
},
{
"epoch": 0.053743992558524106,
"grad_norm": 1.4193987846374512,
"learning_rate": 0.0005,
"loss": 2.0643,
"step": 1040
},
{
"epoch": 0.054260761717740684,
"grad_norm": 1.3053640127182007,
"learning_rate": 0.0005,
"loss": 2.0124,
"step": 1050
},
{
"epoch": 0.05477753087695726,
"grad_norm": 1.598849892616272,
"learning_rate": 0.0005,
"loss": 2.0588,
"step": 1060
},
{
"epoch": 0.05529430003617384,
"grad_norm": 1.3975911140441895,
"learning_rate": 0.0005,
"loss": 2.0349,
"step": 1070
},
{
"epoch": 0.05581106919539042,
"grad_norm": 1.1616796255111694,
"learning_rate": 0.0005,
"loss": 2.0226,
"step": 1080
},
{
"epoch": 0.056327838354606995,
"grad_norm": 1.3756109476089478,
"learning_rate": 0.0005,
"loss": 2.0305,
"step": 1090
},
{
"epoch": 0.05684460751382357,
"grad_norm": 1.4646899700164795,
"learning_rate": 0.0005,
"loss": 2.0329,
"step": 1100
},
{
"epoch": 0.05736137667304015,
"grad_norm": 1.215995192527771,
"learning_rate": 0.0005,
"loss": 2.039,
"step": 1110
},
{
"epoch": 0.05787814583225673,
"grad_norm": 1.6965135335922241,
"learning_rate": 0.0005,
"loss": 2.0142,
"step": 1120
},
{
"epoch": 0.05839491499147331,
"grad_norm": 1.466676950454712,
"learning_rate": 0.0005,
"loss": 2.0231,
"step": 1130
},
{
"epoch": 0.05891168415068989,
"grad_norm": 1.5624445676803589,
"learning_rate": 0.0005,
"loss": 1.9893,
"step": 1140
},
{
"epoch": 0.05942845330990647,
"grad_norm": 1.3193562030792236,
"learning_rate": 0.0005,
"loss": 1.9744,
"step": 1150
},
{
"epoch": 0.059945222469123045,
"grad_norm": 1.1906757354736328,
"learning_rate": 0.0005,
"loss": 1.9866,
"step": 1160
},
{
"epoch": 0.06046199162833962,
"grad_norm": 2.6814322471618652,
"learning_rate": 0.0005,
"loss": 1.9743,
"step": 1170
},
{
"epoch": 0.0609787607875562,
"grad_norm": 1.7349072694778442,
"learning_rate": 0.0005,
"loss": 1.9815,
"step": 1180
},
{
"epoch": 0.06149552994677278,
"grad_norm": 2.099928617477417,
"learning_rate": 0.0005,
"loss": 1.9469,
"step": 1190
},
{
"epoch": 0.062012299105989356,
"grad_norm": 1.141414999961853,
"learning_rate": 0.0005,
"loss": 1.9422,
"step": 1200
},
{
"epoch": 0.06252906826520593,
"grad_norm": 1.1726713180541992,
"learning_rate": 0.0005,
"loss": 1.956,
"step": 1210
},
{
"epoch": 0.0630458374244225,
"grad_norm": 1.7521125078201294,
"learning_rate": 0.0005,
"loss": 1.9423,
"step": 1220
},
{
"epoch": 0.06356260658363909,
"grad_norm": 1.1364134550094604,
"learning_rate": 0.0005,
"loss": 1.9614,
"step": 1230
},
{
"epoch": 0.06407937574285566,
"grad_norm": 1.3168714046478271,
"learning_rate": 0.0005,
"loss": 1.9571,
"step": 1240
},
{
"epoch": 0.06459614490207224,
"grad_norm": 1.787176251411438,
"learning_rate": 0.0005,
"loss": 1.9566,
"step": 1250
},
{
"epoch": 0.06511291406128883,
"grad_norm": 1.2864240407943726,
"learning_rate": 0.0005,
"loss": 1.9349,
"step": 1260
},
{
"epoch": 0.0656296832205054,
"grad_norm": 1.5742415189743042,
"learning_rate": 0.0005,
"loss": 1.9006,
"step": 1270
},
{
"epoch": 0.06614645237972198,
"grad_norm": 1.349160075187683,
"learning_rate": 0.0005,
"loss": 1.9083,
"step": 1280
},
{
"epoch": 0.06666322153893856,
"grad_norm": 2.4660980701446533,
"learning_rate": 0.0005,
"loss": 1.9296,
"step": 1290
},
{
"epoch": 0.06717999069815514,
"grad_norm": 2.0999865531921387,
"learning_rate": 0.0005,
"loss": 1.876,
"step": 1300
},
{
"epoch": 0.06769675985737171,
"grad_norm": 1.3416152000427246,
"learning_rate": 0.0005,
"loss": 1.8874,
"step": 1310
},
{
"epoch": 0.0682135290165883,
"grad_norm": 1.2447683811187744,
"learning_rate": 0.0005,
"loss": 1.8892,
"step": 1320
},
{
"epoch": 0.06873029817580487,
"grad_norm": 1.1316670179367065,
"learning_rate": 0.0005,
"loss": 1.8754,
"step": 1330
},
{
"epoch": 0.06924706733502145,
"grad_norm": 1.354366421699524,
"learning_rate": 0.0005,
"loss": 1.8778,
"step": 1340
},
{
"epoch": 0.06976383649423802,
"grad_norm": 1.7485758066177368,
"learning_rate": 0.0005,
"loss": 1.8982,
"step": 1350
},
{
"epoch": 0.0702806056534546,
"grad_norm": 1.5367459058761597,
"learning_rate": 0.0005,
"loss": 1.8862,
"step": 1360
},
{
"epoch": 0.07079737481267118,
"grad_norm": 1.112423062324524,
"learning_rate": 0.0005,
"loss": 1.8748,
"step": 1370
},
{
"epoch": 0.07131414397188776,
"grad_norm": 1.269370436668396,
"learning_rate": 0.0005,
"loss": 1.8389,
"step": 1380
},
{
"epoch": 0.07183091313110433,
"grad_norm": 1.6045186519622803,
"learning_rate": 0.0005,
"loss": 1.8814,
"step": 1390
},
{
"epoch": 0.07234768229032092,
"grad_norm": 2.199096918106079,
"learning_rate": 0.0005,
"loss": 1.8576,
"step": 1400
},
{
"epoch": 0.07286445144953749,
"grad_norm": 1.2949317693710327,
"learning_rate": 0.0005,
"loss": 1.837,
"step": 1410
},
{
"epoch": 0.07338122060875407,
"grad_norm": 1.2082808017730713,
"learning_rate": 0.0005,
"loss": 1.8191,
"step": 1420
},
{
"epoch": 0.07389798976797064,
"grad_norm": 1.632419466972351,
"learning_rate": 0.0005,
"loss": 1.8192,
"step": 1430
},
{
"epoch": 0.07441475892718723,
"grad_norm": 1.1036083698272705,
"learning_rate": 0.0005,
"loss": 1.8046,
"step": 1440
},
{
"epoch": 0.0749315280864038,
"grad_norm": 1.6769006252288818,
"learning_rate": 0.0005,
"loss": 1.8174,
"step": 1450
},
{
"epoch": 0.07544829724562038,
"grad_norm": 1.587368130683899,
"learning_rate": 0.0005,
"loss": 1.8311,
"step": 1460
},
{
"epoch": 0.07596506640483695,
"grad_norm": 1.063362956047058,
"learning_rate": 0.0005,
"loss": 1.7886,
"step": 1470
},
{
"epoch": 0.07648183556405354,
"grad_norm": 1.258238673210144,
"learning_rate": 0.0005,
"loss": 1.8063,
"step": 1480
},
{
"epoch": 0.07699860472327011,
"grad_norm": 1.3020492792129517,
"learning_rate": 0.0005,
"loss": 1.8289,
"step": 1490
},
{
"epoch": 0.0775153738824867,
"grad_norm": 2.0609872341156006,
"learning_rate": 0.0005,
"loss": 1.7893,
"step": 1500
},
{
"epoch": 0.07803214304170328,
"grad_norm": 1.1070424318313599,
"learning_rate": 0.0005,
"loss": 1.768,
"step": 1510
},
{
"epoch": 0.07854891220091985,
"grad_norm": 2.055048704147339,
"learning_rate": 0.0005,
"loss": 1.7597,
"step": 1520
},
{
"epoch": 0.07906568136013643,
"grad_norm": 1.4444563388824463,
"learning_rate": 0.0005,
"loss": 1.7827,
"step": 1530
},
{
"epoch": 0.079582450519353,
"grad_norm": 1.388077735900879,
"learning_rate": 0.0005,
"loss": 1.7753,
"step": 1540
},
{
"epoch": 0.08009921967856959,
"grad_norm": 1.2297486066818237,
"learning_rate": 0.0005,
"loss": 1.7371,
"step": 1550
},
{
"epoch": 0.08061598883778616,
"grad_norm": 1.1055219173431396,
"learning_rate": 0.0005,
"loss": 1.7616,
"step": 1560
},
{
"epoch": 0.08113275799700274,
"grad_norm": 1.330352783203125,
"learning_rate": 0.0005,
"loss": 1.7753,
"step": 1570
},
{
"epoch": 0.08164952715621931,
"grad_norm": 1.0750646591186523,
"learning_rate": 0.0005,
"loss": 1.7551,
"step": 1580
},
{
"epoch": 0.0821662963154359,
"grad_norm": 1.1137466430664062,
"learning_rate": 0.0005,
"loss": 1.7686,
"step": 1590
},
{
"epoch": 0.08268306547465247,
"grad_norm": 1.2276798486709595,
"learning_rate": 0.0005,
"loss": 1.7617,
"step": 1600
},
{
"epoch": 0.08319983463386905,
"grad_norm": 1.0940239429473877,
"learning_rate": 0.0005,
"loss": 1.7269,
"step": 1610
},
{
"epoch": 0.08371660379308563,
"grad_norm": 1.1361453533172607,
"learning_rate": 0.0005,
"loss": 1.7481,
"step": 1620
},
{
"epoch": 0.08423337295230221,
"grad_norm": 1.482571005821228,
"learning_rate": 0.0005,
"loss": 1.7265,
"step": 1630
},
{
"epoch": 0.08475014211151878,
"grad_norm": 1.2309211492538452,
"learning_rate": 0.0005,
"loss": 1.7087,
"step": 1640
},
{
"epoch": 0.08526691127073537,
"grad_norm": 1.162300705909729,
"learning_rate": 0.0005,
"loss": 1.708,
"step": 1650
},
{
"epoch": 0.08578368042995194,
"grad_norm": 1.1956666707992554,
"learning_rate": 0.0005,
"loss": 1.73,
"step": 1660
},
{
"epoch": 0.08630044958916852,
"grad_norm": 1.5038352012634277,
"learning_rate": 0.0005,
"loss": 1.7213,
"step": 1670
},
{
"epoch": 0.08681721874838509,
"grad_norm": 1.2151919603347778,
"learning_rate": 0.0005,
"loss": 1.7224,
"step": 1680
},
{
"epoch": 0.08733398790760168,
"grad_norm": 1.0433135032653809,
"learning_rate": 0.0005,
"loss": 1.7049,
"step": 1690
},
{
"epoch": 0.08785075706681825,
"grad_norm": 1.8113486766815186,
"learning_rate": 0.0005,
"loss": 1.7132,
"step": 1700
},
{
"epoch": 0.08836752622603483,
"grad_norm": 0.9753373861312866,
"learning_rate": 0.0005,
"loss": 1.7109,
"step": 1710
},
{
"epoch": 0.0888842953852514,
"grad_norm": 1.355560064315796,
"learning_rate": 0.0005,
"loss": 1.7041,
"step": 1720
},
{
"epoch": 0.08940106454446799,
"grad_norm": 1.1716082096099854,
"learning_rate": 0.0005,
"loss": 1.7114,
"step": 1730
},
{
"epoch": 0.08991783370368456,
"grad_norm": 1.10747492313385,
"learning_rate": 0.0005,
"loss": 1.7068,
"step": 1740
},
{
"epoch": 0.09043460286290114,
"grad_norm": 1.0477211475372314,
"learning_rate": 0.0005,
"loss": 1.6935,
"step": 1750
},
{
"epoch": 0.09095137202211773,
"grad_norm": 1.1489983797073364,
"learning_rate": 0.0005,
"loss": 1.6976,
"step": 1760
},
{
"epoch": 0.0914681411813343,
"grad_norm": 1.2262177467346191,
"learning_rate": 0.0005,
"loss": 1.689,
"step": 1770
},
{
"epoch": 0.09198491034055088,
"grad_norm": 1.111374020576477,
"learning_rate": 0.0005,
"loss": 1.6811,
"step": 1780
},
{
"epoch": 0.09250167949976745,
"grad_norm": 1.0549476146697998,
"learning_rate": 0.0005,
"loss": 1.6539,
"step": 1790
},
{
"epoch": 0.09301844865898404,
"grad_norm": 1.2341543436050415,
"learning_rate": 0.0005,
"loss": 1.6643,
"step": 1800
},
{
"epoch": 0.09353521781820061,
"grad_norm": 1.6305192708969116,
"learning_rate": 0.0005,
"loss": 1.6553,
"step": 1810
},
{
"epoch": 0.09405198697741719,
"grad_norm": 1.0614426136016846,
"learning_rate": 0.0005,
"loss": 1.6474,
"step": 1820
},
{
"epoch": 0.09456875613663376,
"grad_norm": 1.1092963218688965,
"learning_rate": 0.0005,
"loss": 1.6686,
"step": 1830
},
{
"epoch": 0.09508552529585035,
"grad_norm": 1.521255373954773,
"learning_rate": 0.0005,
"loss": 1.6586,
"step": 1840
},
{
"epoch": 0.09560229445506692,
"grad_norm": 1.353458285331726,
"learning_rate": 0.0005,
"loss": 1.6632,
"step": 1850
},
{
"epoch": 0.0961190636142835,
"grad_norm": 1.0622385740280151,
"learning_rate": 0.0005,
"loss": 1.6417,
"step": 1860
},
{
"epoch": 0.09663583277350007,
"grad_norm": 1.1304274797439575,
"learning_rate": 0.0005,
"loss": 1.6374,
"step": 1870
},
{
"epoch": 0.09715260193271666,
"grad_norm": 1.6776567697525024,
"learning_rate": 0.0005,
"loss": 1.648,
"step": 1880
},
{
"epoch": 0.09766937109193323,
"grad_norm": 1.2316774129867554,
"learning_rate": 0.0005,
"loss": 1.6055,
"step": 1890
},
{
"epoch": 0.09818614025114981,
"grad_norm": 1.1291395425796509,
"learning_rate": 0.0005,
"loss": 1.6199,
"step": 1900
},
{
"epoch": 0.09870290941036639,
"grad_norm": 1.2423152923583984,
"learning_rate": 0.0005,
"loss": 1.6343,
"step": 1910
},
{
"epoch": 0.09921967856958297,
"grad_norm": 1.6953014135360718,
"learning_rate": 0.0005,
"loss": 1.6011,
"step": 1920
},
{
"epoch": 0.09973644772879954,
"grad_norm": 1.078352451324463,
"learning_rate": 0.0005,
"loss": 1.5773,
"step": 1930
},
{
"epoch": 0.10025321688801613,
"grad_norm": 1.1383408308029175,
"learning_rate": 0.0005,
"loss": 1.6175,
"step": 1940
},
{
"epoch": 0.1007699860472327,
"grad_norm": 0.998919665813446,
"learning_rate": 0.0005,
"loss": 1.6388,
"step": 1950
},
{
"epoch": 0.10128675520644928,
"grad_norm": 1.6332008838653564,
"learning_rate": 0.0005,
"loss": 1.5987,
"step": 1960
},
{
"epoch": 0.10180352436566585,
"grad_norm": 1.041397213935852,
"learning_rate": 0.0005,
"loss": 1.6043,
"step": 1970
},
{
"epoch": 0.10232029352488244,
"grad_norm": 1.1090408563613892,
"learning_rate": 0.0005,
"loss": 1.5859,
"step": 1980
},
{
"epoch": 0.102837062684099,
"grad_norm": 1.0914579629898071,
"learning_rate": 0.0005,
"loss": 1.6069,
"step": 1990
},
{
"epoch": 0.10335383184331559,
"grad_norm": 1.5213651657104492,
"learning_rate": 0.0005,
"loss": 1.5897,
"step": 2000
},
{
"epoch": 0.10387060100253218,
"grad_norm": 1.5415380001068115,
"learning_rate": 0.0005,
"loss": 1.6035,
"step": 2010
},
{
"epoch": 0.10438737016174875,
"grad_norm": 1.1095470190048218,
"learning_rate": 0.0005,
"loss": 1.5697,
"step": 2020
},
{
"epoch": 0.10490413932096533,
"grad_norm": 1.3773058652877808,
"learning_rate": 0.0005,
"loss": 1.5827,
"step": 2030
},
{
"epoch": 0.1054209084801819,
"grad_norm": 0.9746466279029846,
"learning_rate": 0.0005,
"loss": 1.5612,
"step": 2040
},
{
"epoch": 0.10593767763939849,
"grad_norm": 1.047061800956726,
"learning_rate": 0.0005,
"loss": 1.5707,
"step": 2050
},
{
"epoch": 0.10645444679861506,
"grad_norm": 0.9137332439422607,
"learning_rate": 0.0005,
"loss": 1.5646,
"step": 2060
},
{
"epoch": 0.10697121595783164,
"grad_norm": 0.9967837929725647,
"learning_rate": 0.0005,
"loss": 1.5659,
"step": 2070
},
{
"epoch": 0.10748798511704821,
"grad_norm": 1.2617110013961792,
"learning_rate": 0.0005,
"loss": 1.5673,
"step": 2080
},
{
"epoch": 0.1080047542762648,
"grad_norm": 0.9831250309944153,
"learning_rate": 0.0005,
"loss": 1.5742,
"step": 2090
},
{
"epoch": 0.10852152343548137,
"grad_norm": 1.1735457181930542,
"learning_rate": 0.0005,
"loss": 1.5811,
"step": 2100
},
{
"epoch": 0.10903829259469795,
"grad_norm": 1.1183675527572632,
"learning_rate": 0.0005,
"loss": 1.5546,
"step": 2110
},
{
"epoch": 0.10955506175391452,
"grad_norm": 1.3536667823791504,
"learning_rate": 0.0005,
"loss": 1.5551,
"step": 2120
},
{
"epoch": 0.11007183091313111,
"grad_norm": 0.9417304396629333,
"learning_rate": 0.0005,
"loss": 1.562,
"step": 2130
},
{
"epoch": 0.11058860007234768,
"grad_norm": 0.9261025786399841,
"learning_rate": 0.0005,
"loss": 1.5736,
"step": 2140
},
{
"epoch": 0.11110536923156426,
"grad_norm": 1.1396183967590332,
"learning_rate": 0.0005,
"loss": 1.5417,
"step": 2150
},
{
"epoch": 0.11162213839078083,
"grad_norm": 0.9720540642738342,
"learning_rate": 0.0005,
"loss": 1.5231,
"step": 2160
},
{
"epoch": 0.11213890754999742,
"grad_norm": 0.9784930348396301,
"learning_rate": 0.0005,
"loss": 1.5428,
"step": 2170
},
{
"epoch": 0.11265567670921399,
"grad_norm": 1.037022590637207,
"learning_rate": 0.0005,
"loss": 1.5562,
"step": 2180
},
{
"epoch": 0.11317244586843057,
"grad_norm": 1.3437378406524658,
"learning_rate": 0.0005,
"loss": 1.5452,
"step": 2190
},
{
"epoch": 0.11368921502764714,
"grad_norm": 1.2525360584259033,
"learning_rate": 0.0005,
"loss": 1.5372,
"step": 2200
},
{
"epoch": 0.11420598418686373,
"grad_norm": 1.0389316082000732,
"learning_rate": 0.0005,
"loss": 1.5273,
"step": 2210
},
{
"epoch": 0.1147227533460803,
"grad_norm": 1.2379904985427856,
"learning_rate": 0.0005,
"loss": 1.5281,
"step": 2220
},
{
"epoch": 0.11523952250529688,
"grad_norm": 1.0728790760040283,
"learning_rate": 0.0005,
"loss": 1.5228,
"step": 2230
},
{
"epoch": 0.11575629166451346,
"grad_norm": 1.54011070728302,
"learning_rate": 0.0005,
"loss": 1.5257,
"step": 2240
},
{
"epoch": 0.11627306082373004,
"grad_norm": 1.4011873006820679,
"learning_rate": 0.0005,
"loss": 1.5258,
"step": 2250
},
{
"epoch": 0.11678982998294662,
"grad_norm": 1.2126344442367554,
"learning_rate": 0.0005,
"loss": 1.5249,
"step": 2260
},
{
"epoch": 0.1173065991421632,
"grad_norm": 1.1125898361206055,
"learning_rate": 0.0005,
"loss": 1.5034,
"step": 2270
},
{
"epoch": 0.11782336830137978,
"grad_norm": 1.0404047966003418,
"learning_rate": 0.0005,
"loss": 1.5243,
"step": 2280
},
{
"epoch": 0.11834013746059635,
"grad_norm": 0.9504315257072449,
"learning_rate": 0.0005,
"loss": 1.501,
"step": 2290
},
{
"epoch": 0.11885690661981294,
"grad_norm": 1.0554097890853882,
"learning_rate": 0.0005,
"loss": 1.5115,
"step": 2300
},
{
"epoch": 0.1193736757790295,
"grad_norm": 0.9352626204490662,
"learning_rate": 0.0005,
"loss": 1.5038,
"step": 2310
},
{
"epoch": 0.11989044493824609,
"grad_norm": 0.9765718579292297,
"learning_rate": 0.0005,
"loss": 1.5019,
"step": 2320
},
{
"epoch": 0.12040721409746266,
"grad_norm": 1.2419780492782593,
"learning_rate": 0.0005,
"loss": 1.4993,
"step": 2330
},
{
"epoch": 0.12092398325667925,
"grad_norm": 1.0337820053100586,
"learning_rate": 0.0005,
"loss": 1.4843,
"step": 2340
},
{
"epoch": 0.12144075241589582,
"grad_norm": 1.0803256034851074,
"learning_rate": 0.0005,
"loss": 1.4902,
"step": 2350
},
{
"epoch": 0.1219575215751124,
"grad_norm": 0.9424406886100769,
"learning_rate": 0.0005,
"loss": 1.5031,
"step": 2360
},
{
"epoch": 0.12247429073432897,
"grad_norm": 0.9924182891845703,
"learning_rate": 0.0005,
"loss": 1.489,
"step": 2370
},
{
"epoch": 0.12299105989354556,
"grad_norm": 1.0602052211761475,
"learning_rate": 0.0005,
"loss": 1.4801,
"step": 2380
},
{
"epoch": 0.12350782905276213,
"grad_norm": 0.9463520646095276,
"learning_rate": 0.0005,
"loss": 1.49,
"step": 2390
},
{
"epoch": 0.12402459821197871,
"grad_norm": 0.9301887154579163,
"learning_rate": 0.0005,
"loss": 1.4923,
"step": 2400
},
{
"epoch": 0.12454136737119528,
"grad_norm": 0.9018756151199341,
"learning_rate": 0.0005,
"loss": 1.457,
"step": 2410
},
{
"epoch": 0.12505813653041187,
"grad_norm": 0.9669187068939209,
"learning_rate": 0.0005,
"loss": 1.4691,
"step": 2420
},
{
"epoch": 0.12557490568962845,
"grad_norm": 0.9768301248550415,
"learning_rate": 0.0005,
"loss": 1.4448,
"step": 2430
},
{
"epoch": 0.126091674848845,
"grad_norm": 0.9736414551734924,
"learning_rate": 0.0005,
"loss": 1.4671,
"step": 2440
},
{
"epoch": 0.1266084440080616,
"grad_norm": 1.3117995262145996,
"learning_rate": 0.0005,
"loss": 1.4577,
"step": 2450
},
{
"epoch": 0.12712521316727818,
"grad_norm": 0.976732075214386,
"learning_rate": 0.0005,
"loss": 1.4624,
"step": 2460
},
{
"epoch": 0.12764198232649476,
"grad_norm": 1.1756422519683838,
"learning_rate": 0.0005,
"loss": 1.4675,
"step": 2470
},
{
"epoch": 0.12815875148571132,
"grad_norm": 0.9411507844924927,
"learning_rate": 0.0005,
"loss": 1.4634,
"step": 2480
},
{
"epoch": 0.1286755206449279,
"grad_norm": 1.6214072704315186,
"learning_rate": 0.0005,
"loss": 1.4685,
"step": 2490
},
{
"epoch": 0.1291922898041445,
"grad_norm": 1.0801911354064941,
"learning_rate": 0.0005,
"loss": 1.4468,
"step": 2500
},
{
"epoch": 0.12970905896336107,
"grad_norm": 0.9756599068641663,
"learning_rate": 0.0005,
"loss": 1.4438,
"step": 2510
},
{
"epoch": 0.13022582812257766,
"grad_norm": 1.1823363304138184,
"learning_rate": 0.0005,
"loss": 1.4522,
"step": 2520
},
{
"epoch": 0.13074259728179422,
"grad_norm": 1.0005122423171997,
"learning_rate": 0.0005,
"loss": 1.436,
"step": 2530
},
{
"epoch": 0.1312593664410108,
"grad_norm": 1.4303867816925049,
"learning_rate": 0.0005,
"loss": 1.4411,
"step": 2540
},
{
"epoch": 0.13177613560022738,
"grad_norm": 0.867132842540741,
"learning_rate": 0.0005,
"loss": 1.4558,
"step": 2550
},
{
"epoch": 0.13229290475944397,
"grad_norm": 0.9243984222412109,
"learning_rate": 0.0005,
"loss": 1.4282,
"step": 2560
},
{
"epoch": 0.13280967391866053,
"grad_norm": 1.1926263570785522,
"learning_rate": 0.0005,
"loss": 1.4187,
"step": 2570
},
{
"epoch": 0.1333264430778771,
"grad_norm": 1.1110721826553345,
"learning_rate": 0.0005,
"loss": 1.4302,
"step": 2580
},
{
"epoch": 0.1338432122370937,
"grad_norm": 0.9598495960235596,
"learning_rate": 0.0005,
"loss": 1.4459,
"step": 2590
},
{
"epoch": 0.13435998139631028,
"grad_norm": 0.9147258996963501,
"learning_rate": 0.0005,
"loss": 1.4174,
"step": 2600
},
{
"epoch": 0.13487675055552684,
"grad_norm": 0.8530228734016418,
"learning_rate": 0.0005,
"loss": 1.4348,
"step": 2610
},
{
"epoch": 0.13539351971474342,
"grad_norm": 1.0487037897109985,
"learning_rate": 0.0005,
"loss": 1.4302,
"step": 2620
},
{
"epoch": 0.13591028887396,
"grad_norm": 1.0711545944213867,
"learning_rate": 0.0005,
"loss": 1.425,
"step": 2630
},
{
"epoch": 0.1364270580331766,
"grad_norm": 1.0053889751434326,
"learning_rate": 0.0005,
"loss": 1.4099,
"step": 2640
},
{
"epoch": 0.13694382719239315,
"grad_norm": 0.8895754814147949,
"learning_rate": 0.0005,
"loss": 1.4101,
"step": 2650
},
{
"epoch": 0.13746059635160973,
"grad_norm": 1.1464654207229614,
"learning_rate": 0.0005,
"loss": 1.409,
"step": 2660
},
{
"epoch": 0.13797736551082632,
"grad_norm": 1.4213604927062988,
"learning_rate": 0.0005,
"loss": 1.4333,
"step": 2670
},
{
"epoch": 0.1384941346700429,
"grad_norm": 0.8963467478752136,
"learning_rate": 0.0005,
"loss": 1.4047,
"step": 2680
},
{
"epoch": 0.13901090382925946,
"grad_norm": 0.9514134526252747,
"learning_rate": 0.0005,
"loss": 1.3923,
"step": 2690
},
{
"epoch": 0.13952767298847604,
"grad_norm": 0.8818897604942322,
"learning_rate": 0.0005,
"loss": 1.4031,
"step": 2700
},
{
"epoch": 0.14004444214769263,
"grad_norm": 0.8554843664169312,
"learning_rate": 0.0005,
"loss": 1.4005,
"step": 2710
},
{
"epoch": 0.1405612113069092,
"grad_norm": 0.9477766752243042,
"learning_rate": 0.0005,
"loss": 1.3871,
"step": 2720
},
{
"epoch": 0.14107798046612577,
"grad_norm": 0.9560056924819946,
"learning_rate": 0.0005,
"loss": 1.388,
"step": 2730
},
{
"epoch": 0.14159474962534235,
"grad_norm": 1.325939655303955,
"learning_rate": 0.0005,
"loss": 1.372,
"step": 2740
},
{
"epoch": 0.14211151878455894,
"grad_norm": 0.9184489846229553,
"learning_rate": 0.0005,
"loss": 1.3901,
"step": 2750
},
{
"epoch": 0.14262828794377552,
"grad_norm": 0.905005693435669,
"learning_rate": 0.0005,
"loss": 1.3652,
"step": 2760
},
{
"epoch": 0.1431450571029921,
"grad_norm": 0.9112023115158081,
"learning_rate": 0.0005,
"loss": 1.3805,
"step": 2770
},
{
"epoch": 0.14366182626220866,
"grad_norm": 0.909542977809906,
"learning_rate": 0.0005,
"loss": 1.3851,
"step": 2780
},
{
"epoch": 0.14417859542142525,
"grad_norm": 0.8679105639457703,
"learning_rate": 0.0005,
"loss": 1.3776,
"step": 2790
},
{
"epoch": 0.14469536458064183,
"grad_norm": 0.884416401386261,
"learning_rate": 0.0005,
"loss": 1.3787,
"step": 2800
},
{
"epoch": 0.14521213373985842,
"grad_norm": 0.8939566612243652,
"learning_rate": 0.0005,
"loss": 1.3695,
"step": 2810
},
{
"epoch": 0.14572890289907497,
"grad_norm": 1.2388486862182617,
"learning_rate": 0.0005,
"loss": 1.3926,
"step": 2820
},
{
"epoch": 0.14624567205829156,
"grad_norm": 1.2662867307662964,
"learning_rate": 0.0005,
"loss": 1.3804,
"step": 2830
},
{
"epoch": 0.14676244121750814,
"grad_norm": 0.8967621326446533,
"learning_rate": 0.0005,
"loss": 1.3513,
"step": 2840
},
{
"epoch": 0.14727921037672473,
"grad_norm": 0.8640676736831665,
"learning_rate": 0.0005,
"loss": 1.3546,
"step": 2850
},
{
"epoch": 0.14779597953594129,
"grad_norm": 1.0147978067398071,
"learning_rate": 0.0005,
"loss": 1.3699,
"step": 2860
},
{
"epoch": 0.14831274869515787,
"grad_norm": 0.8949346542358398,
"learning_rate": 0.0005,
"loss": 1.345,
"step": 2870
},
{
"epoch": 0.14882951785437445,
"grad_norm": 0.8535652756690979,
"learning_rate": 0.0005,
"loss": 1.3724,
"step": 2880
},
{
"epoch": 0.14934628701359104,
"grad_norm": 0.840876042842865,
"learning_rate": 0.0005,
"loss": 1.3692,
"step": 2890
},
{
"epoch": 0.1498630561728076,
"grad_norm": 0.8421388864517212,
"learning_rate": 0.0005,
"loss": 1.3639,
"step": 2900
},
{
"epoch": 0.15037982533202418,
"grad_norm": 0.8401720523834229,
"learning_rate": 0.0005,
"loss": 1.348,
"step": 2910
},
{
"epoch": 0.15089659449124077,
"grad_norm": 0.8139095306396484,
"learning_rate": 0.0005,
"loss": 1.365,
"step": 2920
},
{
"epoch": 0.15141336365045735,
"grad_norm": 0.8704052567481995,
"learning_rate": 0.0005,
"loss": 1.3482,
"step": 2930
},
{
"epoch": 0.1519301328096739,
"grad_norm": 0.8963611125946045,
"learning_rate": 0.0005,
"loss": 1.3336,
"step": 2940
},
{
"epoch": 0.1524469019688905,
"grad_norm": 0.8725153207778931,
"learning_rate": 0.0005,
"loss": 1.3724,
"step": 2950
},
{
"epoch": 0.15296367112810708,
"grad_norm": 0.9125774502754211,
"learning_rate": 0.0005,
"loss": 1.3377,
"step": 2960
},
{
"epoch": 0.15348044028732366,
"grad_norm": 1.1160928010940552,
"learning_rate": 0.0005,
"loss": 1.3582,
"step": 2970
},
{
"epoch": 0.15399720944654022,
"grad_norm": 0.8732350468635559,
"learning_rate": 0.0005,
"loss": 1.3471,
"step": 2980
},
{
"epoch": 0.1545139786057568,
"grad_norm": 0.8881607055664062,
"learning_rate": 0.0005,
"loss": 1.3552,
"step": 2990
},
{
"epoch": 0.1550307477649734,
"grad_norm": 1.0814484357833862,
"learning_rate": 0.0005,
"loss": 1.3628,
"step": 3000
},
{
"epoch": 0.15554751692418997,
"grad_norm": 0.81389319896698,
"learning_rate": 0.0005,
"loss": 1.3249,
"step": 3010
},
{
"epoch": 0.15606428608340656,
"grad_norm": 0.8424196839332581,
"learning_rate": 0.0005,
"loss": 1.323,
"step": 3020
},
{
"epoch": 0.1565810552426231,
"grad_norm": 0.8028131127357483,
"learning_rate": 0.0005,
"loss": 1.3302,
"step": 3030
},
{
"epoch": 0.1570978244018397,
"grad_norm": 0.8348473906517029,
"learning_rate": 0.0005,
"loss": 1.3314,
"step": 3040
},
{
"epoch": 0.15761459356105628,
"grad_norm": 1.2074034214019775,
"learning_rate": 0.0005,
"loss": 1.3355,
"step": 3050
},
{
"epoch": 0.15813136272027287,
"grad_norm": 0.8177675604820251,
"learning_rate": 0.0005,
"loss": 1.3427,
"step": 3060
},
{
"epoch": 0.15864813187948942,
"grad_norm": 0.796273410320282,
"learning_rate": 0.0005,
"loss": 1.3088,
"step": 3070
},
{
"epoch": 0.159164901038706,
"grad_norm": 1.0104438066482544,
"learning_rate": 0.0005,
"loss": 1.3255,
"step": 3080
},
{
"epoch": 0.1596816701979226,
"grad_norm": 0.9192485809326172,
"learning_rate": 0.0005,
"loss": 1.3347,
"step": 3090
},
{
"epoch": 0.16019843935713918,
"grad_norm": 0.912550151348114,
"learning_rate": 0.0005,
"loss": 1.3157,
"step": 3100
},
{
"epoch": 0.16071520851635573,
"grad_norm": 0.9644028544425964,
"learning_rate": 0.0005,
"loss": 1.3242,
"step": 3110
},
{
"epoch": 0.16123197767557232,
"grad_norm": 0.9894726872444153,
"learning_rate": 0.0005,
"loss": 1.2968,
"step": 3120
},
{
"epoch": 0.1617487468347889,
"grad_norm": 0.9292682409286499,
"learning_rate": 0.0005,
"loss": 1.3342,
"step": 3130
},
{
"epoch": 0.1622655159940055,
"grad_norm": 0.9219216704368591,
"learning_rate": 0.0005,
"loss": 1.3242,
"step": 3140
},
{
"epoch": 0.16278228515322204,
"grad_norm": 1.1059894561767578,
"learning_rate": 0.0005,
"loss": 1.3238,
"step": 3150
},
{
"epoch": 0.16329905431243863,
"grad_norm": 0.8726058602333069,
"learning_rate": 0.0005,
"loss": 1.315,
"step": 3160
},
{
"epoch": 0.16381582347165521,
"grad_norm": 0.8204345107078552,
"learning_rate": 0.0005,
"loss": 1.3085,
"step": 3170
},
{
"epoch": 0.1643325926308718,
"grad_norm": 0.9515188932418823,
"learning_rate": 0.0005,
"loss": 1.2986,
"step": 3180
},
{
"epoch": 0.16484936179008836,
"grad_norm": 0.8825114369392395,
"learning_rate": 0.0005,
"loss": 1.2921,
"step": 3190
},
{
"epoch": 0.16536613094930494,
"grad_norm": 0.8144583702087402,
"learning_rate": 0.0005,
"loss": 1.2991,
"step": 3200
},
{
"epoch": 0.16588290010852152,
"grad_norm": 0.8747395873069763,
"learning_rate": 0.0005,
"loss": 1.2936,
"step": 3210
},
{
"epoch": 0.1663996692677381,
"grad_norm": 0.9829278588294983,
"learning_rate": 0.0005,
"loss": 1.2898,
"step": 3220
},
{
"epoch": 0.16691643842695467,
"grad_norm": 0.917072594165802,
"learning_rate": 0.0005,
"loss": 1.3056,
"step": 3230
},
{
"epoch": 0.16743320758617125,
"grad_norm": 0.893224835395813,
"learning_rate": 0.0005,
"loss": 1.2958,
"step": 3240
},
{
"epoch": 0.16794997674538784,
"grad_norm": 0.8513831496238708,
"learning_rate": 0.0005,
"loss": 1.3073,
"step": 3250
},
{
"epoch": 0.16846674590460442,
"grad_norm": 0.7902063727378845,
"learning_rate": 0.0005,
"loss": 1.2962,
"step": 3260
},
{
"epoch": 0.168983515063821,
"grad_norm": 0.8533388376235962,
"learning_rate": 0.0005,
"loss": 1.3034,
"step": 3270
},
{
"epoch": 0.16950028422303756,
"grad_norm": 0.89384526014328,
"learning_rate": 0.0005,
"loss": 1.306,
"step": 3280
},
{
"epoch": 0.17001705338225415,
"grad_norm": 1.1740915775299072,
"learning_rate": 0.0005,
"loss": 1.2861,
"step": 3290
},
{
"epoch": 0.17053382254147073,
"grad_norm": 0.7941210269927979,
"learning_rate": 0.0005,
"loss": 1.29,
"step": 3300
},
{
"epoch": 0.17105059170068732,
"grad_norm": 0.82374107837677,
"learning_rate": 0.0005,
"loss": 1.2715,
"step": 3310
},
{
"epoch": 0.17156736085990387,
"grad_norm": 0.9856778979301453,
"learning_rate": 0.0005,
"loss": 1.2908,
"step": 3320
},
{
"epoch": 0.17208413001912046,
"grad_norm": 0.777244508266449,
"learning_rate": 0.0005,
"loss": 1.2891,
"step": 3330
},
{
"epoch": 0.17260089917833704,
"grad_norm": 0.8938208222389221,
"learning_rate": 0.0005,
"loss": 1.285,
"step": 3340
},
{
"epoch": 0.17311766833755363,
"grad_norm": 0.8124037384986877,
"learning_rate": 0.0005,
"loss": 1.2908,
"step": 3350
},
{
"epoch": 0.17363443749677018,
"grad_norm": 0.9345457553863525,
"learning_rate": 0.0005,
"loss": 1.2964,
"step": 3360
},
{
"epoch": 0.17415120665598677,
"grad_norm": 0.7821003794670105,
"learning_rate": 0.0005,
"loss": 1.2767,
"step": 3370
},
{
"epoch": 0.17466797581520335,
"grad_norm": 0.8330212831497192,
"learning_rate": 0.0005,
"loss": 1.2779,
"step": 3380
},
{
"epoch": 0.17518474497441994,
"grad_norm": 0.764042854309082,
"learning_rate": 0.0005,
"loss": 1.2698,
"step": 3390
},
{
"epoch": 0.1757015141336365,
"grad_norm": 0.9339214563369751,
"learning_rate": 0.0005,
"loss": 1.2777,
"step": 3400
},
{
"epoch": 0.17621828329285308,
"grad_norm": 0.8121135830879211,
"learning_rate": 0.0005,
"loss": 1.2869,
"step": 3410
},
{
"epoch": 0.17673505245206966,
"grad_norm": 0.8460163474082947,
"learning_rate": 0.0005,
"loss": 1.2913,
"step": 3420
},
{
"epoch": 0.17725182161128625,
"grad_norm": 1.3961695432662964,
"learning_rate": 0.0005,
"loss": 1.2971,
"step": 3430
},
{
"epoch": 0.1777685907705028,
"grad_norm": 0.8089907765388489,
"learning_rate": 0.0005,
"loss": 1.2612,
"step": 3440
},
{
"epoch": 0.1782853599297194,
"grad_norm": 0.8770979046821594,
"learning_rate": 0.0005,
"loss": 1.2739,
"step": 3450
},
{
"epoch": 0.17880212908893597,
"grad_norm": 0.8448237776756287,
"learning_rate": 0.0005,
"loss": 1.2735,
"step": 3460
},
{
"epoch": 0.17931889824815256,
"grad_norm": 0.9335261583328247,
"learning_rate": 0.0005,
"loss": 1.2671,
"step": 3470
},
{
"epoch": 0.17983566740736912,
"grad_norm": 0.7510360479354858,
"learning_rate": 0.0005,
"loss": 1.2691,
"step": 3480
},
{
"epoch": 0.1803524365665857,
"grad_norm": 0.7871717810630798,
"learning_rate": 0.0005,
"loss": 1.2642,
"step": 3490
},
{
"epoch": 0.18086920572580228,
"grad_norm": 1.1407464742660522,
"learning_rate": 0.0005,
"loss": 1.248,
"step": 3500
},
{
"epoch": 0.18138597488501887,
"grad_norm": 0.8027787208557129,
"learning_rate": 0.0005,
"loss": 1.2557,
"step": 3510
},
{
"epoch": 0.18190274404423545,
"grad_norm": 0.8517947793006897,
"learning_rate": 0.0005,
"loss": 1.2529,
"step": 3520
},
{
"epoch": 0.182419513203452,
"grad_norm": 0.9083014726638794,
"learning_rate": 0.0005,
"loss": 1.2489,
"step": 3530
},
{
"epoch": 0.1829362823626686,
"grad_norm": 1.0628485679626465,
"learning_rate": 0.0005,
"loss": 1.2669,
"step": 3540
},
{
"epoch": 0.18345305152188518,
"grad_norm": 1.0175726413726807,
"learning_rate": 0.0005,
"loss": 1.2473,
"step": 3550
},
{
"epoch": 0.18396982068110176,
"grad_norm": 0.7979172468185425,
"learning_rate": 0.0005,
"loss": 1.2471,
"step": 3560
},
{
"epoch": 0.18448658984031832,
"grad_norm": 0.7472112774848938,
"learning_rate": 0.0005,
"loss": 1.2413,
"step": 3570
},
{
"epoch": 0.1850033589995349,
"grad_norm": 0.8240432739257812,
"learning_rate": 0.0005,
"loss": 1.2521,
"step": 3580
},
{
"epoch": 0.1855201281587515,
"grad_norm": 0.8023159503936768,
"learning_rate": 0.0005,
"loss": 1.2471,
"step": 3590
},
{
"epoch": 0.18603689731796808,
"grad_norm": 0.7950299978256226,
"learning_rate": 0.0005,
"loss": 1.2327,
"step": 3600
},
{
"epoch": 0.18655366647718463,
"grad_norm": 0.7718859314918518,
"learning_rate": 0.0005,
"loss": 1.2417,
"step": 3610
},
{
"epoch": 0.18707043563640122,
"grad_norm": 0.8416433334350586,
"learning_rate": 0.0005,
"loss": 1.2531,
"step": 3620
},
{
"epoch": 0.1875872047956178,
"grad_norm": 0.7842203974723816,
"learning_rate": 0.0005,
"loss": 1.2435,
"step": 3630
},
{
"epoch": 0.18810397395483439,
"grad_norm": 0.8708809614181519,
"learning_rate": 0.0005,
"loss": 1.245,
"step": 3640
},
{
"epoch": 0.18862074311405094,
"grad_norm": 0.8131195902824402,
"learning_rate": 0.0005,
"loss": 1.244,
"step": 3650
},
{
"epoch": 0.18913751227326753,
"grad_norm": 0.8010774254798889,
"learning_rate": 0.0005,
"loss": 1.245,
"step": 3660
},
{
"epoch": 0.1896542814324841,
"grad_norm": 0.7978084087371826,
"learning_rate": 0.0005,
"loss": 1.2475,
"step": 3670
},
{
"epoch": 0.1901710505917007,
"grad_norm": 0.7844563722610474,
"learning_rate": 0.0005,
"loss": 1.2325,
"step": 3680
},
{
"epoch": 0.19068781975091725,
"grad_norm": 0.8755462765693665,
"learning_rate": 0.0005,
"loss": 1.2243,
"step": 3690
},
{
"epoch": 0.19120458891013384,
"grad_norm": 0.7727536559104919,
"learning_rate": 0.0005,
"loss": 1.2447,
"step": 3700
},
{
"epoch": 0.19172135806935042,
"grad_norm": 0.7509860396385193,
"learning_rate": 0.0005,
"loss": 1.2324,
"step": 3710
},
{
"epoch": 0.192238127228567,
"grad_norm": 0.9001826047897339,
"learning_rate": 0.0005,
"loss": 1.2175,
"step": 3720
},
{
"epoch": 0.19275489638778356,
"grad_norm": 0.7595515847206116,
"learning_rate": 0.0005,
"loss": 1.2536,
"step": 3730
},
{
"epoch": 0.19327166554700015,
"grad_norm": 0.746465802192688,
"learning_rate": 0.0005,
"loss": 1.2439,
"step": 3740
},
{
"epoch": 0.19378843470621673,
"grad_norm": 0.8454607725143433,
"learning_rate": 0.0005,
"loss": 1.2319,
"step": 3750
},
{
"epoch": 0.19430520386543332,
"grad_norm": 0.7905994057655334,
"learning_rate": 0.0005,
"loss": 1.2335,
"step": 3760
},
{
"epoch": 0.1948219730246499,
"grad_norm": 1.1130495071411133,
"learning_rate": 0.0005,
"loss": 1.2444,
"step": 3770
},
{
"epoch": 0.19533874218386646,
"grad_norm": 0.9213355183601379,
"learning_rate": 0.0005,
"loss": 1.2188,
"step": 3780
},
{
"epoch": 0.19585551134308304,
"grad_norm": 0.8003748655319214,
"learning_rate": 0.0005,
"loss": 1.2478,
"step": 3790
},
{
"epoch": 0.19637228050229963,
"grad_norm": 0.7667946815490723,
"learning_rate": 0.0005,
"loss": 1.2286,
"step": 3800
},
{
"epoch": 0.1968890496615162,
"grad_norm": 0.7806205153465271,
"learning_rate": 0.0005,
"loss": 1.2152,
"step": 3810
},
{
"epoch": 0.19740581882073277,
"grad_norm": 1.1093833446502686,
"learning_rate": 0.0005,
"loss": 1.2281,
"step": 3820
},
{
"epoch": 0.19792258797994935,
"grad_norm": 0.8750317692756653,
"learning_rate": 0.0005,
"loss": 1.2418,
"step": 3830
},
{
"epoch": 0.19843935713916594,
"grad_norm": 0.9322946071624756,
"learning_rate": 0.0005,
"loss": 1.2168,
"step": 3840
},
{
"epoch": 0.19895612629838252,
"grad_norm": 0.9042627215385437,
"learning_rate": 0.0005,
"loss": 1.229,
"step": 3850
},
{
"epoch": 0.19947289545759908,
"grad_norm": 0.8162991404533386,
"learning_rate": 0.0005,
"loss": 1.2044,
"step": 3860
},
{
"epoch": 0.19998966461681567,
"grad_norm": 0.7078894972801208,
"learning_rate": 0.0005,
"loss": 1.2077,
"step": 3870
},
{
"epoch": 0.20050643377603225,
"grad_norm": 0.8144243955612183,
"learning_rate": 0.0005,
"loss": 1.1932,
"step": 3880
},
{
"epoch": 0.20102320293524883,
"grad_norm": 0.7456822991371155,
"learning_rate": 0.0005,
"loss": 1.2187,
"step": 3890
},
{
"epoch": 0.2015399720944654,
"grad_norm": 0.7855635285377502,
"learning_rate": 0.0005,
"loss": 1.2096,
"step": 3900
},
{
"epoch": 0.20205674125368198,
"grad_norm": 0.7501581311225891,
"learning_rate": 0.0005,
"loss": 1.2083,
"step": 3910
},
{
"epoch": 0.20257351041289856,
"grad_norm": 0.7569208145141602,
"learning_rate": 0.0005,
"loss": 1.2208,
"step": 3920
},
{
"epoch": 0.20309027957211515,
"grad_norm": 0.7520230412483215,
"learning_rate": 0.0005,
"loss": 1.2031,
"step": 3930
},
{
"epoch": 0.2036070487313317,
"grad_norm": 0.9110859632492065,
"learning_rate": 0.0005,
"loss": 1.2135,
"step": 3940
},
{
"epoch": 0.2041238178905483,
"grad_norm": 0.738043487071991,
"learning_rate": 0.0005,
"loss": 1.2066,
"step": 3950
},
{
"epoch": 0.20464058704976487,
"grad_norm": 0.7910060286521912,
"learning_rate": 0.0005,
"loss": 1.2089,
"step": 3960
},
{
"epoch": 0.20515735620898146,
"grad_norm": 0.7672162652015686,
"learning_rate": 0.0005,
"loss": 1.216,
"step": 3970
},
{
"epoch": 0.205674125368198,
"grad_norm": 0.7567201852798462,
"learning_rate": 0.0005,
"loss": 1.1915,
"step": 3980
},
{
"epoch": 0.2061908945274146,
"grad_norm": 0.759067714214325,
"learning_rate": 0.0005,
"loss": 1.2111,
"step": 3990
},
{
"epoch": 0.20670766368663118,
"grad_norm": 0.7911349534988403,
"learning_rate": 0.0005,
"loss": 1.211,
"step": 4000
},
{
"epoch": 0.20722443284584777,
"grad_norm": 1.0086050033569336,
"learning_rate": 0.0005,
"loss": 1.2122,
"step": 4010
},
{
"epoch": 0.20774120200506435,
"grad_norm": 1.1961076259613037,
"learning_rate": 0.0005,
"loss": 1.1972,
"step": 4020
},
{
"epoch": 0.2082579711642809,
"grad_norm": 0.8429704308509827,
"learning_rate": 0.0005,
"loss": 1.2038,
"step": 4030
},
{
"epoch": 0.2087747403234975,
"grad_norm": 1.0080244541168213,
"learning_rate": 0.0005,
"loss": 1.1981,
"step": 4040
},
{
"epoch": 0.20929150948271408,
"grad_norm": 0.7220394611358643,
"learning_rate": 0.0005,
"loss": 1.2083,
"step": 4050
},
{
"epoch": 0.20980827864193066,
"grad_norm": 0.7594371438026428,
"learning_rate": 0.0005,
"loss": 1.1976,
"step": 4060
},
{
"epoch": 0.21032504780114722,
"grad_norm": 0.7990491986274719,
"learning_rate": 0.0005,
"loss": 1.1938,
"step": 4070
},
{
"epoch": 0.2108418169603638,
"grad_norm": 1.0034983158111572,
"learning_rate": 0.0005,
"loss": 1.1769,
"step": 4080
},
{
"epoch": 0.2113585861195804,
"grad_norm": 0.8476843237876892,
"learning_rate": 0.0005,
"loss": 1.1914,
"step": 4090
},
{
"epoch": 0.21187535527879697,
"grad_norm": 0.7301702499389648,
"learning_rate": 0.0005,
"loss": 1.2054,
"step": 4100
},
{
"epoch": 0.21239212443801353,
"grad_norm": 0.7379107475280762,
"learning_rate": 0.0005,
"loss": 1.1945,
"step": 4110
},
{
"epoch": 0.21290889359723011,
"grad_norm": 0.7332804203033447,
"learning_rate": 0.0005,
"loss": 1.1921,
"step": 4120
},
{
"epoch": 0.2134256627564467,
"grad_norm": 0.7600969672203064,
"learning_rate": 0.0005,
"loss": 1.1957,
"step": 4130
},
{
"epoch": 0.21394243191566328,
"grad_norm": 0.9124670028686523,
"learning_rate": 0.0005,
"loss": 1.199,
"step": 4140
},
{
"epoch": 0.21445920107487984,
"grad_norm": 0.7995319962501526,
"learning_rate": 0.0005,
"loss": 1.1806,
"step": 4150
},
{
"epoch": 0.21497597023409643,
"grad_norm": 0.7137150168418884,
"learning_rate": 0.0005,
"loss": 1.1944,
"step": 4160
},
{
"epoch": 0.215492739393313,
"grad_norm": 0.8427070379257202,
"learning_rate": 0.0005,
"loss": 1.204,
"step": 4170
},
{
"epoch": 0.2160095085525296,
"grad_norm": 0.6893758177757263,
"learning_rate": 0.0005,
"loss": 1.2056,
"step": 4180
},
{
"epoch": 0.21652627771174615,
"grad_norm": 0.777153730392456,
"learning_rate": 0.0005,
"loss": 1.1834,
"step": 4190
},
{
"epoch": 0.21704304687096274,
"grad_norm": 0.7304201126098633,
"learning_rate": 0.0005,
"loss": 1.1918,
"step": 4200
},
{
"epoch": 0.21755981603017932,
"grad_norm": 0.7642196416854858,
"learning_rate": 0.0005,
"loss": 1.2043,
"step": 4210
},
{
"epoch": 0.2180765851893959,
"grad_norm": 0.703868567943573,
"learning_rate": 0.0005,
"loss": 1.1717,
"step": 4220
},
{
"epoch": 0.21859335434861246,
"grad_norm": 0.751356840133667,
"learning_rate": 0.0005,
"loss": 1.1975,
"step": 4230
},
{
"epoch": 0.21911012350782905,
"grad_norm": 0.8302937150001526,
"learning_rate": 0.0005,
"loss": 1.1981,
"step": 4240
},
{
"epoch": 0.21962689266704563,
"grad_norm": 0.8335602879524231,
"learning_rate": 0.0005,
"loss": 1.1863,
"step": 4250
},
{
"epoch": 0.22014366182626222,
"grad_norm": 0.7479858994483948,
"learning_rate": 0.0005,
"loss": 1.1788,
"step": 4260
},
{
"epoch": 0.2206604309854788,
"grad_norm": 0.9171736836433411,
"learning_rate": 0.0005,
"loss": 1.1773,
"step": 4270
},
{
"epoch": 0.22117720014469536,
"grad_norm": 0.7626177668571472,
"learning_rate": 0.0005,
"loss": 1.1869,
"step": 4280
},
{
"epoch": 0.22169396930391194,
"grad_norm": 0.7428616881370544,
"learning_rate": 0.0005,
"loss": 1.1698,
"step": 4290
},
{
"epoch": 0.22221073846312853,
"grad_norm": 0.8029087781906128,
"learning_rate": 0.0005,
"loss": 1.1884,
"step": 4300
},
{
"epoch": 0.2227275076223451,
"grad_norm": 0.7876361608505249,
"learning_rate": 0.0005,
"loss": 1.1843,
"step": 4310
},
{
"epoch": 0.22324427678156167,
"grad_norm": 0.6730009913444519,
"learning_rate": 0.0005,
"loss": 1.1703,
"step": 4320
},
{
"epoch": 0.22376104594077825,
"grad_norm": 0.7202760577201843,
"learning_rate": 0.0005,
"loss": 1.1753,
"step": 4330
},
{
"epoch": 0.22427781509999484,
"grad_norm": 0.7547861337661743,
"learning_rate": 0.0005,
"loss": 1.1755,
"step": 4340
},
{
"epoch": 0.22479458425921142,
"grad_norm": 0.7263453602790833,
"learning_rate": 0.0005,
"loss": 1.1783,
"step": 4350
},
{
"epoch": 0.22531135341842798,
"grad_norm": 0.7226181030273438,
"learning_rate": 0.0005,
"loss": 1.1829,
"step": 4360
},
{
"epoch": 0.22582812257764456,
"grad_norm": 0.7433076500892639,
"learning_rate": 0.0005,
"loss": 1.1821,
"step": 4370
},
{
"epoch": 0.22634489173686115,
"grad_norm": 0.8025347590446472,
"learning_rate": 0.0005,
"loss": 1.1548,
"step": 4380
},
{
"epoch": 0.22686166089607773,
"grad_norm": 0.8330517411231995,
"learning_rate": 0.0005,
"loss": 1.1757,
"step": 4390
},
{
"epoch": 0.2273784300552943,
"grad_norm": 0.7150396704673767,
"learning_rate": 0.0005,
"loss": 1.1592,
"step": 4400
},
{
"epoch": 0.22789519921451087,
"grad_norm": 0.8366827368736267,
"learning_rate": 0.0005,
"loss": 1.1614,
"step": 4410
},
{
"epoch": 0.22841196837372746,
"grad_norm": 0.8655450344085693,
"learning_rate": 0.0005,
"loss": 1.1553,
"step": 4420
},
{
"epoch": 0.22892873753294404,
"grad_norm": 0.6938055753707886,
"learning_rate": 0.0005,
"loss": 1.1657,
"step": 4430
},
{
"epoch": 0.2294455066921606,
"grad_norm": 0.7177290320396423,
"learning_rate": 0.0005,
"loss": 1.1728,
"step": 4440
},
{
"epoch": 0.22996227585137718,
"grad_norm": 0.7082594037055969,
"learning_rate": 0.0005,
"loss": 1.1659,
"step": 4450
},
{
"epoch": 0.23047904501059377,
"grad_norm": 0.7543273568153381,
"learning_rate": 0.0005,
"loss": 1.1517,
"step": 4460
},
{
"epoch": 0.23099581416981035,
"grad_norm": 0.722029983997345,
"learning_rate": 0.0005,
"loss": 1.1593,
"step": 4470
},
{
"epoch": 0.2315125833290269,
"grad_norm": 0.7107385396957397,
"learning_rate": 0.0005,
"loss": 1.1499,
"step": 4480
},
{
"epoch": 0.2320293524882435,
"grad_norm": 0.8118393421173096,
"learning_rate": 0.0005,
"loss": 1.1614,
"step": 4490
},
{
"epoch": 0.23254612164746008,
"grad_norm": 0.7901565432548523,
"learning_rate": 0.0005,
"loss": 1.1627,
"step": 4500
},
{
"epoch": 0.23306289080667666,
"grad_norm": 0.6997384428977966,
"learning_rate": 0.0005,
"loss": 1.1694,
"step": 4510
},
{
"epoch": 0.23357965996589325,
"grad_norm": 0.7574887871742249,
"learning_rate": 0.0005,
"loss": 1.1772,
"step": 4520
},
{
"epoch": 0.2340964291251098,
"grad_norm": 0.709123432636261,
"learning_rate": 0.0005,
"loss": 1.1793,
"step": 4530
},
{
"epoch": 0.2346131982843264,
"grad_norm": 0.7011120915412903,
"learning_rate": 0.0005,
"loss": 1.1569,
"step": 4540
},
{
"epoch": 0.23512996744354298,
"grad_norm": 0.7826752662658691,
"learning_rate": 0.0005,
"loss": 1.1551,
"step": 4550
},
{
"epoch": 0.23564673660275956,
"grad_norm": 0.7468019723892212,
"learning_rate": 0.0005,
"loss": 1.177,
"step": 4560
},
{
"epoch": 0.23616350576197612,
"grad_norm": 0.8336277604103088,
"learning_rate": 0.0005,
"loss": 1.1437,
"step": 4570
},
{
"epoch": 0.2366802749211927,
"grad_norm": 0.7412180304527283,
"learning_rate": 0.0005,
"loss": 1.1371,
"step": 4580
},
{
"epoch": 0.23719704408040929,
"grad_norm": 0.7702532410621643,
"learning_rate": 0.0005,
"loss": 1.1539,
"step": 4590
},
{
"epoch": 0.23771381323962587,
"grad_norm": 0.7170100808143616,
"learning_rate": 0.0005,
"loss": 1.1493,
"step": 4600
},
{
"epoch": 0.23823058239884243,
"grad_norm": 0.6973877549171448,
"learning_rate": 0.0005,
"loss": 1.1686,
"step": 4610
},
{
"epoch": 0.238747351558059,
"grad_norm": 0.7682148218154907,
"learning_rate": 0.0005,
"loss": 1.1374,
"step": 4620
},
{
"epoch": 0.2392641207172756,
"grad_norm": 0.7360324263572693,
"learning_rate": 0.0005,
"loss": 1.1461,
"step": 4630
},
{
"epoch": 0.23978088987649218,
"grad_norm": 0.6636998057365417,
"learning_rate": 0.0005,
"loss": 1.1468,
"step": 4640
},
{
"epoch": 0.24029765903570874,
"grad_norm": 0.9023354053497314,
"learning_rate": 0.0005,
"loss": 1.1523,
"step": 4650
},
{
"epoch": 0.24081442819492532,
"grad_norm": 0.6802653074264526,
"learning_rate": 0.0005,
"loss": 1.1354,
"step": 4660
},
{
"epoch": 0.2413311973541419,
"grad_norm": 0.917087972164154,
"learning_rate": 0.0005,
"loss": 1.1402,
"step": 4670
},
{
"epoch": 0.2418479665133585,
"grad_norm": 0.8304193019866943,
"learning_rate": 0.0005,
"loss": 1.1526,
"step": 4680
},
{
"epoch": 0.24236473567257505,
"grad_norm": 0.833188533782959,
"learning_rate": 0.0005,
"loss": 1.165,
"step": 4690
},
{
"epoch": 0.24288150483179163,
"grad_norm": 0.7147198915481567,
"learning_rate": 0.0005,
"loss": 1.1431,
"step": 4700
},
{
"epoch": 0.24339827399100822,
"grad_norm": 0.6784700155258179,
"learning_rate": 0.0005,
"loss": 1.138,
"step": 4710
},
{
"epoch": 0.2439150431502248,
"grad_norm": 0.6933045983314514,
"learning_rate": 0.0005,
"loss": 1.1173,
"step": 4720
},
{
"epoch": 0.24443181230944136,
"grad_norm": 0.7840824127197266,
"learning_rate": 0.0005,
"loss": 1.1384,
"step": 4730
},
{
"epoch": 0.24494858146865794,
"grad_norm": 0.8129291534423828,
"learning_rate": 0.0005,
"loss": 1.151,
"step": 4740
},
{
"epoch": 0.24546535062787453,
"grad_norm": 0.7420192360877991,
"learning_rate": 0.0005,
"loss": 1.1218,
"step": 4750
},
{
"epoch": 0.2459821197870911,
"grad_norm": 0.6665251851081848,
"learning_rate": 0.0005,
"loss": 1.1278,
"step": 4760
},
{
"epoch": 0.2464988889463077,
"grad_norm": 0.7529242038726807,
"learning_rate": 0.0005,
"loss": 1.1417,
"step": 4770
},
{
"epoch": 0.24701565810552426,
"grad_norm": 0.6908478140830994,
"learning_rate": 0.0005,
"loss": 1.1353,
"step": 4780
},
{
"epoch": 0.24753242726474084,
"grad_norm": 0.6860882043838501,
"learning_rate": 0.0005,
"loss": 1.1278,
"step": 4790
},
{
"epoch": 0.24804919642395742,
"grad_norm": 0.7322950959205627,
"learning_rate": 0.0005,
"loss": 1.1447,
"step": 4800
},
{
"epoch": 0.248565965583174,
"grad_norm": 0.679210364818573,
"learning_rate": 0.0005,
"loss": 1.146,
"step": 4810
},
{
"epoch": 0.24908273474239057,
"grad_norm": 0.7133141756057739,
"learning_rate": 0.0005,
"loss": 1.1389,
"step": 4820
},
{
"epoch": 0.24959950390160715,
"grad_norm": 0.6991278529167175,
"learning_rate": 0.0005,
"loss": 1.1324,
"step": 4830
},
{
"epoch": 0.25011627306082374,
"grad_norm": 0.7213752865791321,
"learning_rate": 0.0005,
"loss": 1.1303,
"step": 4840
},
{
"epoch": 0.2506330422200403,
"grad_norm": 0.6555566191673279,
"learning_rate": 0.0005,
"loss": 1.1277,
"step": 4850
},
{
"epoch": 0.2511498113792569,
"grad_norm": 0.7012516260147095,
"learning_rate": 0.0005,
"loss": 1.1267,
"step": 4860
},
{
"epoch": 0.25166658053847346,
"grad_norm": 0.74920654296875,
"learning_rate": 0.0005,
"loss": 1.1432,
"step": 4870
},
{
"epoch": 0.25218334969769,
"grad_norm": 0.721111536026001,
"learning_rate": 0.0005,
"loss": 1.1393,
"step": 4880
},
{
"epoch": 0.25270011885690663,
"grad_norm": 0.7633620500564575,
"learning_rate": 0.0005,
"loss": 1.135,
"step": 4890
},
{
"epoch": 0.2532168880161232,
"grad_norm": 0.7658079266548157,
"learning_rate": 0.0005,
"loss": 1.1223,
"step": 4900
},
{
"epoch": 0.2537336571753398,
"grad_norm": 0.6615222692489624,
"learning_rate": 0.0005,
"loss": 1.1476,
"step": 4910
},
{
"epoch": 0.25425042633455636,
"grad_norm": 0.6398602724075317,
"learning_rate": 0.0005,
"loss": 1.1044,
"step": 4920
},
{
"epoch": 0.2547671954937729,
"grad_norm": 0.7086970210075378,
"learning_rate": 0.0005,
"loss": 1.1253,
"step": 4930
},
{
"epoch": 0.2552839646529895,
"grad_norm": 0.6913731694221497,
"learning_rate": 0.0005,
"loss": 1.1356,
"step": 4940
},
{
"epoch": 0.2558007338122061,
"grad_norm": 0.7111396789550781,
"learning_rate": 0.0005,
"loss": 1.1219,
"step": 4950
},
{
"epoch": 0.25631750297142264,
"grad_norm": 0.699747622013092,
"learning_rate": 0.0005,
"loss": 1.1198,
"step": 4960
},
{
"epoch": 0.25683427213063925,
"grad_norm": 0.6903569102287292,
"learning_rate": 0.0005,
"loss": 1.1384,
"step": 4970
},
{
"epoch": 0.2573510412898558,
"grad_norm": 0.7051145434379578,
"learning_rate": 0.0005,
"loss": 1.1439,
"step": 4980
},
{
"epoch": 0.2578678104490724,
"grad_norm": 0.7983745336532593,
"learning_rate": 0.0005,
"loss": 1.1171,
"step": 4990
},
{
"epoch": 0.258384579608289,
"grad_norm": 0.7234880924224854,
"learning_rate": 0.0005,
"loss": 1.1083,
"step": 5000
},
{
"epoch": 0.25890134876750553,
"grad_norm": 0.740550696849823,
"learning_rate": 0.0005,
"loss": 1.1211,
"step": 5010
},
{
"epoch": 0.25941811792672215,
"grad_norm": 0.7128597497940063,
"learning_rate": 0.0005,
"loss": 1.1432,
"step": 5020
},
{
"epoch": 0.2599348870859387,
"grad_norm": 0.6916446089744568,
"learning_rate": 0.0005,
"loss": 1.1358,
"step": 5030
},
{
"epoch": 0.2604516562451553,
"grad_norm": 0.776382327079773,
"learning_rate": 0.0005,
"loss": 1.125,
"step": 5040
},
{
"epoch": 0.2609684254043719,
"grad_norm": 0.720817506313324,
"learning_rate": 0.0005,
"loss": 1.11,
"step": 5050
},
{
"epoch": 0.26148519456358843,
"grad_norm": 0.6699787378311157,
"learning_rate": 0.0005,
"loss": 1.1143,
"step": 5060
},
{
"epoch": 0.26200196372280504,
"grad_norm": 0.7283949851989746,
"learning_rate": 0.0005,
"loss": 1.1094,
"step": 5070
},
{
"epoch": 0.2625187328820216,
"grad_norm": 0.6964280009269714,
"learning_rate": 0.0005,
"loss": 1.1332,
"step": 5080
},
{
"epoch": 0.26303550204123816,
"grad_norm": 0.7906248569488525,
"learning_rate": 0.0005,
"loss": 1.1242,
"step": 5090
},
{
"epoch": 0.26355227120045477,
"grad_norm": 0.7149584889411926,
"learning_rate": 0.0005,
"loss": 1.1215,
"step": 5100
},
{
"epoch": 0.2640690403596713,
"grad_norm": 0.6400547027587891,
"learning_rate": 0.0005,
"loss": 1.1319,
"step": 5110
},
{
"epoch": 0.26458580951888794,
"grad_norm": 0.6504139304161072,
"learning_rate": 0.0005,
"loss": 1.1145,
"step": 5120
},
{
"epoch": 0.2651025786781045,
"grad_norm": 0.724251389503479,
"learning_rate": 0.0005,
"loss": 1.1185,
"step": 5130
},
{
"epoch": 0.26561934783732105,
"grad_norm": 0.7142144441604614,
"learning_rate": 0.0005,
"loss": 1.1296,
"step": 5140
},
{
"epoch": 0.26613611699653766,
"grad_norm": 0.7482824325561523,
"learning_rate": 0.0005,
"loss": 1.1035,
"step": 5150
},
{
"epoch": 0.2666528861557542,
"grad_norm": 0.7604995369911194,
"learning_rate": 0.0005,
"loss": 1.113,
"step": 5160
},
{
"epoch": 0.2671696553149708,
"grad_norm": 0.7642651200294495,
"learning_rate": 0.0005,
"loss": 1.0964,
"step": 5170
},
{
"epoch": 0.2676864244741874,
"grad_norm": 0.9142786860466003,
"learning_rate": 0.0005,
"loss": 1.101,
"step": 5180
},
{
"epoch": 0.26820319363340395,
"grad_norm": 0.6688016057014465,
"learning_rate": 0.0005,
"loss": 1.1125,
"step": 5190
},
{
"epoch": 0.26871996279262056,
"grad_norm": 0.7352325916290283,
"learning_rate": 0.0005,
"loss": 1.1081,
"step": 5200
},
{
"epoch": 0.2692367319518371,
"grad_norm": 0.696356475353241,
"learning_rate": 0.0005,
"loss": 1.0972,
"step": 5210
},
{
"epoch": 0.2697535011110537,
"grad_norm": 0.6730584502220154,
"learning_rate": 0.0005,
"loss": 1.1173,
"step": 5220
},
{
"epoch": 0.2702702702702703,
"grad_norm": 0.6800664067268372,
"learning_rate": 0.0005,
"loss": 1.0942,
"step": 5230
},
{
"epoch": 0.27078703942948684,
"grad_norm": 0.6622713208198547,
"learning_rate": 0.0005,
"loss": 1.1297,
"step": 5240
},
{
"epoch": 0.2713038085887034,
"grad_norm": 0.7148898839950562,
"learning_rate": 0.0005,
"loss": 1.0997,
"step": 5250
},
{
"epoch": 0.27182057774792,
"grad_norm": 0.6884311437606812,
"learning_rate": 0.0005,
"loss": 1.1031,
"step": 5260
},
{
"epoch": 0.27233734690713657,
"grad_norm": 0.6427676677703857,
"learning_rate": 0.0005,
"loss": 1.1102,
"step": 5270
},
{
"epoch": 0.2728541160663532,
"grad_norm": 0.6422214508056641,
"learning_rate": 0.0005,
"loss": 1.1116,
"step": 5280
},
{
"epoch": 0.27337088522556974,
"grad_norm": 0.6933507919311523,
"learning_rate": 0.0005,
"loss": 1.1179,
"step": 5290
},
{
"epoch": 0.2738876543847863,
"grad_norm": 0.6655607223510742,
"learning_rate": 0.0005,
"loss": 1.0943,
"step": 5300
},
{
"epoch": 0.2744044235440029,
"grad_norm": 0.7125523686408997,
"learning_rate": 0.0005,
"loss": 1.1065,
"step": 5310
},
{
"epoch": 0.27492119270321946,
"grad_norm": 0.8208178281784058,
"learning_rate": 0.0005,
"loss": 1.1193,
"step": 5320
},
{
"epoch": 0.2754379618624361,
"grad_norm": 0.715416669845581,
"learning_rate": 0.0005,
"loss": 1.1064,
"step": 5330
},
{
"epoch": 0.27595473102165263,
"grad_norm": 0.7992897629737854,
"learning_rate": 0.0005,
"loss": 1.1008,
"step": 5340
},
{
"epoch": 0.2764715001808692,
"grad_norm": 0.6610242128372192,
"learning_rate": 0.0005,
"loss": 1.1132,
"step": 5350
},
{
"epoch": 0.2769882693400858,
"grad_norm": 0.7205715775489807,
"learning_rate": 0.0005,
"loss": 1.0994,
"step": 5360
},
{
"epoch": 0.27750503849930236,
"grad_norm": 0.6824073791503906,
"learning_rate": 0.0005,
"loss": 1.0882,
"step": 5370
},
{
"epoch": 0.2780218076585189,
"grad_norm": 0.7015029191970825,
"learning_rate": 0.0005,
"loss": 1.1182,
"step": 5380
},
{
"epoch": 0.27853857681773553,
"grad_norm": 0.6447197794914246,
"learning_rate": 0.0005,
"loss": 1.105,
"step": 5390
},
{
"epoch": 0.2790553459769521,
"grad_norm": 0.7455316781997681,
"learning_rate": 0.0005,
"loss": 1.1069,
"step": 5400
},
{
"epoch": 0.2795721151361687,
"grad_norm": 0.8284129500389099,
"learning_rate": 0.0005,
"loss": 1.108,
"step": 5410
},
{
"epoch": 0.28008888429538525,
"grad_norm": 0.6697763204574585,
"learning_rate": 0.0005,
"loss": 1.1079,
"step": 5420
},
{
"epoch": 0.2806056534546018,
"grad_norm": 0.6729034781455994,
"learning_rate": 0.0005,
"loss": 1.1004,
"step": 5430
},
{
"epoch": 0.2811224226138184,
"grad_norm": 0.6567364931106567,
"learning_rate": 0.0005,
"loss": 1.0876,
"step": 5440
},
{
"epoch": 0.281639191773035,
"grad_norm": 0.6983076333999634,
"learning_rate": 0.0005,
"loss": 1.0979,
"step": 5450
},
{
"epoch": 0.28215596093225154,
"grad_norm": 0.6503905057907104,
"learning_rate": 0.0005,
"loss": 1.0884,
"step": 5460
},
{
"epoch": 0.28267273009146815,
"grad_norm": 0.6191208362579346,
"learning_rate": 0.0005,
"loss": 1.1057,
"step": 5470
},
{
"epoch": 0.2831894992506847,
"grad_norm": 0.7421597838401794,
"learning_rate": 0.0005,
"loss": 1.0992,
"step": 5480
},
{
"epoch": 0.2837062684099013,
"grad_norm": 0.6919003129005432,
"learning_rate": 0.0005,
"loss": 1.0961,
"step": 5490
},
{
"epoch": 0.2842230375691179,
"grad_norm": 0.6625383496284485,
"learning_rate": 0.0005,
"loss": 1.1108,
"step": 5500
},
{
"epoch": 0.28473980672833443,
"grad_norm": 0.6479719877243042,
"learning_rate": 0.0005,
"loss": 1.0969,
"step": 5510
},
{
"epoch": 0.28525657588755104,
"grad_norm": 0.765210747718811,
"learning_rate": 0.0005,
"loss": 1.0857,
"step": 5520
},
{
"epoch": 0.2857733450467676,
"grad_norm": 0.6934791803359985,
"learning_rate": 0.0005,
"loss": 1.0945,
"step": 5530
},
{
"epoch": 0.2862901142059842,
"grad_norm": 0.6789985299110413,
"learning_rate": 0.0005,
"loss": 1.1165,
"step": 5540
},
{
"epoch": 0.28680688336520077,
"grad_norm": 0.6476292014122009,
"learning_rate": 0.0005,
"loss": 1.0886,
"step": 5550
},
{
"epoch": 0.28732365252441733,
"grad_norm": 0.8015202283859253,
"learning_rate": 0.0005,
"loss": 1.09,
"step": 5560
},
{
"epoch": 0.28784042168363394,
"grad_norm": 0.8759499192237854,
"learning_rate": 0.0005,
"loss": 1.0962,
"step": 5570
},
{
"epoch": 0.2883571908428505,
"grad_norm": 0.6740782856941223,
"learning_rate": 0.0005,
"loss": 1.0803,
"step": 5580
},
{
"epoch": 0.28887396000206705,
"grad_norm": 0.6475633978843689,
"learning_rate": 0.0005,
"loss": 1.0825,
"step": 5590
},
{
"epoch": 0.28939072916128367,
"grad_norm": 0.7087163329124451,
"learning_rate": 0.0005,
"loss": 1.0982,
"step": 5600
},
{
"epoch": 0.2899074983205002,
"grad_norm": 0.6702967882156372,
"learning_rate": 0.0005,
"loss": 1.1146,
"step": 5610
},
{
"epoch": 0.29042426747971684,
"grad_norm": 0.6150313019752502,
"learning_rate": 0.0005,
"loss": 1.0919,
"step": 5620
},
{
"epoch": 0.2909410366389334,
"grad_norm": 0.6218642592430115,
"learning_rate": 0.0005,
"loss": 1.0874,
"step": 5630
},
{
"epoch": 0.29145780579814995,
"grad_norm": 0.670069694519043,
"learning_rate": 0.0005,
"loss": 1.0764,
"step": 5640
},
{
"epoch": 0.29197457495736656,
"grad_norm": 0.7384163737297058,
"learning_rate": 0.0005,
"loss": 1.0888,
"step": 5650
},
{
"epoch": 0.2924913441165831,
"grad_norm": 0.6525676250457764,
"learning_rate": 0.0005,
"loss": 1.0955,
"step": 5660
},
{
"epoch": 0.2930081132757997,
"grad_norm": 0.6424722075462341,
"learning_rate": 0.0005,
"loss": 1.077,
"step": 5670
},
{
"epoch": 0.2935248824350163,
"grad_norm": 0.6522981524467468,
"learning_rate": 0.0005,
"loss": 1.0996,
"step": 5680
},
{
"epoch": 0.29404165159423284,
"grad_norm": 0.686553955078125,
"learning_rate": 0.0005,
"loss": 1.0776,
"step": 5690
},
{
"epoch": 0.29455842075344946,
"grad_norm": 0.6501746773719788,
"learning_rate": 0.0005,
"loss": 1.09,
"step": 5700
},
{
"epoch": 0.295075189912666,
"grad_norm": 0.661805272102356,
"learning_rate": 0.0005,
"loss": 1.0987,
"step": 5710
},
{
"epoch": 0.29559195907188257,
"grad_norm": 0.6171291470527649,
"learning_rate": 0.0005,
"loss": 1.0896,
"step": 5720
},
{
"epoch": 0.2961087282310992,
"grad_norm": 0.6660189032554626,
"learning_rate": 0.0005,
"loss": 1.0795,
"step": 5730
},
{
"epoch": 0.29662549739031574,
"grad_norm": 0.7182852625846863,
"learning_rate": 0.0005,
"loss": 1.0888,
"step": 5740
},
{
"epoch": 0.2971422665495323,
"grad_norm": 0.6748793125152588,
"learning_rate": 0.0005,
"loss": 1.1066,
"step": 5750
},
{
"epoch": 0.2976590357087489,
"grad_norm": 0.9658355712890625,
"learning_rate": 0.0005,
"loss": 1.0788,
"step": 5760
},
{
"epoch": 0.29817580486796547,
"grad_norm": 0.7361212968826294,
"learning_rate": 0.0005,
"loss": 1.0963,
"step": 5770
},
{
"epoch": 0.2986925740271821,
"grad_norm": 0.6640811562538147,
"learning_rate": 0.0005,
"loss": 1.0872,
"step": 5780
},
{
"epoch": 0.29920934318639864,
"grad_norm": 0.6937102675437927,
"learning_rate": 0.0005,
"loss": 1.0777,
"step": 5790
},
{
"epoch": 0.2997261123456152,
"grad_norm": 0.7803467512130737,
"learning_rate": 0.0005,
"loss": 1.0986,
"step": 5800
},
{
"epoch": 0.3002428815048318,
"grad_norm": 0.8593279719352722,
"learning_rate": 0.0005,
"loss": 1.0796,
"step": 5810
},
{
"epoch": 0.30075965066404836,
"grad_norm": 0.6236810088157654,
"learning_rate": 0.0005,
"loss": 1.0932,
"step": 5820
},
{
"epoch": 0.301276419823265,
"grad_norm": 0.6399732828140259,
"learning_rate": 0.0005,
"loss": 1.0614,
"step": 5830
},
{
"epoch": 0.30179318898248153,
"grad_norm": 0.6762784123420715,
"learning_rate": 0.0005,
"loss": 1.0763,
"step": 5840
},
{
"epoch": 0.3023099581416981,
"grad_norm": 0.7428263425827026,
"learning_rate": 0.0005,
"loss": 1.0701,
"step": 5850
},
{
"epoch": 0.3028267273009147,
"grad_norm": 0.6435476541519165,
"learning_rate": 0.0005,
"loss": 1.0782,
"step": 5860
},
{
"epoch": 0.30334349646013126,
"grad_norm": 0.6325916647911072,
"learning_rate": 0.0005,
"loss": 1.0858,
"step": 5870
},
{
"epoch": 0.3038602656193478,
"grad_norm": 0.6759895086288452,
"learning_rate": 0.0005,
"loss": 1.082,
"step": 5880
},
{
"epoch": 0.3043770347785644,
"grad_norm": 0.705319881439209,
"learning_rate": 0.0005,
"loss": 1.0587,
"step": 5890
},
{
"epoch": 0.304893803937781,
"grad_norm": 0.6924307346343994,
"learning_rate": 0.0005,
"loss": 1.0756,
"step": 5900
},
{
"epoch": 0.3054105730969976,
"grad_norm": 0.6262795925140381,
"learning_rate": 0.0005,
"loss": 1.0875,
"step": 5910
},
{
"epoch": 0.30592734225621415,
"grad_norm": 0.6304033398628235,
"learning_rate": 0.0005,
"loss": 1.0889,
"step": 5920
},
{
"epoch": 0.3064441114154307,
"grad_norm": 0.6266285181045532,
"learning_rate": 0.0005,
"loss": 1.0734,
"step": 5930
},
{
"epoch": 0.3069608805746473,
"grad_norm": 0.66020268201828,
"learning_rate": 0.0005,
"loss": 1.0756,
"step": 5940
},
{
"epoch": 0.3074776497338639,
"grad_norm": 0.6455373764038086,
"learning_rate": 0.0005,
"loss": 1.088,
"step": 5950
},
{
"epoch": 0.30799441889308043,
"grad_norm": 0.6743224263191223,
"learning_rate": 0.0005,
"loss": 1.0777,
"step": 5960
},
{
"epoch": 0.30851118805229705,
"grad_norm": 0.6214370131492615,
"learning_rate": 0.0005,
"loss": 1.069,
"step": 5970
},
{
"epoch": 0.3090279572115136,
"grad_norm": 0.6882118582725525,
"learning_rate": 0.0005,
"loss": 1.0713,
"step": 5980
},
{
"epoch": 0.3095447263707302,
"grad_norm": 0.6656840443611145,
"learning_rate": 0.0005,
"loss": 1.0783,
"step": 5990
},
{
"epoch": 0.3100614955299468,
"grad_norm": 0.7134031653404236,
"learning_rate": 0.0005,
"loss": 1.049,
"step": 6000
},
{
"epoch": 0.31057826468916333,
"grad_norm": 0.7211028933525085,
"learning_rate": 0.0005,
"loss": 1.067,
"step": 6010
},
{
"epoch": 0.31109503384837994,
"grad_norm": 0.6382066607475281,
"learning_rate": 0.0005,
"loss": 1.0771,
"step": 6020
},
{
"epoch": 0.3116118030075965,
"grad_norm": 0.7246118187904358,
"learning_rate": 0.0005,
"loss": 1.0877,
"step": 6030
},
{
"epoch": 0.3121285721668131,
"grad_norm": 0.6753916144371033,
"learning_rate": 0.0005,
"loss": 1.0655,
"step": 6040
},
{
"epoch": 0.31264534132602967,
"grad_norm": 0.6585648655891418,
"learning_rate": 0.0005,
"loss": 1.0557,
"step": 6050
},
{
"epoch": 0.3131621104852462,
"grad_norm": 0.6378208994865417,
"learning_rate": 0.0005,
"loss": 1.0657,
"step": 6060
},
{
"epoch": 0.31367887964446284,
"grad_norm": 0.6496950387954712,
"learning_rate": 0.0005,
"loss": 1.0743,
"step": 6070
},
{
"epoch": 0.3141956488036794,
"grad_norm": 0.6112158298492432,
"learning_rate": 0.0005,
"loss": 1.076,
"step": 6080
},
{
"epoch": 0.31471241796289595,
"grad_norm": 0.6267996430397034,
"learning_rate": 0.0005,
"loss": 1.0882,
"step": 6090
},
{
"epoch": 0.31522918712211256,
"grad_norm": 0.6258119940757751,
"learning_rate": 0.0005,
"loss": 1.0747,
"step": 6100
},
{
"epoch": 0.3157459562813291,
"grad_norm": 0.6293036341667175,
"learning_rate": 0.0005,
"loss": 1.0648,
"step": 6110
},
{
"epoch": 0.31626272544054573,
"grad_norm": 0.6443596482276917,
"learning_rate": 0.0005,
"loss": 1.0898,
"step": 6120
},
{
"epoch": 0.3167794945997623,
"grad_norm": 0.6488006711006165,
"learning_rate": 0.0005,
"loss": 1.0533,
"step": 6130
},
{
"epoch": 0.31729626375897885,
"grad_norm": 0.6419286131858826,
"learning_rate": 0.0005,
"loss": 1.0755,
"step": 6140
},
{
"epoch": 0.31781303291819546,
"grad_norm": 0.6659611463546753,
"learning_rate": 0.0005,
"loss": 1.0526,
"step": 6150
},
{
"epoch": 0.318329802077412,
"grad_norm": 0.6645331382751465,
"learning_rate": 0.0005,
"loss": 1.0528,
"step": 6160
},
{
"epoch": 0.3188465712366286,
"grad_norm": 0.7420417070388794,
"learning_rate": 0.0005,
"loss": 1.0637,
"step": 6170
},
{
"epoch": 0.3193633403958452,
"grad_norm": 0.6399688720703125,
"learning_rate": 0.0005,
"loss": 1.0575,
"step": 6180
},
{
"epoch": 0.31988010955506174,
"grad_norm": 0.6128381490707397,
"learning_rate": 0.0005,
"loss": 1.0692,
"step": 6190
},
{
"epoch": 0.32039687871427835,
"grad_norm": 0.6373854279518127,
"learning_rate": 0.0005,
"loss": 1.0543,
"step": 6200
},
{
"epoch": 0.3209136478734949,
"grad_norm": 0.8587968349456787,
"learning_rate": 0.0005,
"loss": 1.0697,
"step": 6210
},
{
"epoch": 0.32143041703271147,
"grad_norm": 0.6043888926506042,
"learning_rate": 0.0005,
"loss": 1.0748,
"step": 6220
},
{
"epoch": 0.3219471861919281,
"grad_norm": 0.6279845237731934,
"learning_rate": 0.0005,
"loss": 1.0746,
"step": 6230
},
{
"epoch": 0.32246395535114464,
"grad_norm": 0.6751164793968201,
"learning_rate": 0.0005,
"loss": 1.0715,
"step": 6240
},
{
"epoch": 0.3229807245103612,
"grad_norm": 0.5915717482566833,
"learning_rate": 0.0005,
"loss": 1.0705,
"step": 6250
},
{
"epoch": 0.3234974936695778,
"grad_norm": 0.6816694140434265,
"learning_rate": 0.0005,
"loss": 1.0666,
"step": 6260
},
{
"epoch": 0.32401426282879436,
"grad_norm": 0.7093113660812378,
"learning_rate": 0.0005,
"loss": 1.0585,
"step": 6270
},
{
"epoch": 0.324531031988011,
"grad_norm": 0.6673592925071716,
"learning_rate": 0.0005,
"loss": 1.0767,
"step": 6280
},
{
"epoch": 0.32504780114722753,
"grad_norm": 0.5884393453598022,
"learning_rate": 0.0005,
"loss": 1.0662,
"step": 6290
},
{
"epoch": 0.3255645703064441,
"grad_norm": 0.6808472871780396,
"learning_rate": 0.0005,
"loss": 1.0442,
"step": 6300
},
{
"epoch": 0.3260813394656607,
"grad_norm": 0.6658387184143066,
"learning_rate": 0.0005,
"loss": 1.0627,
"step": 6310
},
{
"epoch": 0.32659810862487726,
"grad_norm": 0.6469089388847351,
"learning_rate": 0.0005,
"loss": 1.0645,
"step": 6320
},
{
"epoch": 0.32711487778409387,
"grad_norm": 0.6215671896934509,
"learning_rate": 0.0005,
"loss": 1.0544,
"step": 6330
},
{
"epoch": 0.32763164694331043,
"grad_norm": 0.6409225463867188,
"learning_rate": 0.0005,
"loss": 1.0555,
"step": 6340
},
{
"epoch": 0.328148416102527,
"grad_norm": 0.6427381038665771,
"learning_rate": 0.0005,
"loss": 1.0696,
"step": 6350
},
{
"epoch": 0.3286651852617436,
"grad_norm": 0.5856565833091736,
"learning_rate": 0.0005,
"loss": 1.0518,
"step": 6360
},
{
"epoch": 0.32918195442096015,
"grad_norm": 0.6217045187950134,
"learning_rate": 0.0005,
"loss": 1.066,
"step": 6370
},
{
"epoch": 0.3296987235801767,
"grad_norm": 0.7256447672843933,
"learning_rate": 0.0005,
"loss": 1.0514,
"step": 6380
},
{
"epoch": 0.3302154927393933,
"grad_norm": 0.6222741007804871,
"learning_rate": 0.0005,
"loss": 1.0509,
"step": 6390
},
{
"epoch": 0.3307322618986099,
"grad_norm": 0.6448323726654053,
"learning_rate": 0.0005,
"loss": 1.0622,
"step": 6400
},
{
"epoch": 0.3312490310578265,
"grad_norm": 0.6215245723724365,
"learning_rate": 0.0005,
"loss": 1.0637,
"step": 6410
},
{
"epoch": 0.33176580021704305,
"grad_norm": 0.6422061920166016,
"learning_rate": 0.0005,
"loss": 1.0599,
"step": 6420
},
{
"epoch": 0.3322825693762596,
"grad_norm": 0.8208865523338318,
"learning_rate": 0.0005,
"loss": 1.0524,
"step": 6430
},
{
"epoch": 0.3327993385354762,
"grad_norm": 1.1319376230239868,
"learning_rate": 0.0005,
"loss": 1.0577,
"step": 6440
},
{
"epoch": 0.3333161076946928,
"grad_norm": 0.63709956407547,
"learning_rate": 0.0005,
"loss": 1.0515,
"step": 6450
},
{
"epoch": 0.33383287685390933,
"grad_norm": 0.6338751912117004,
"learning_rate": 0.0005,
"loss": 1.0428,
"step": 6460
},
{
"epoch": 0.33434964601312595,
"grad_norm": 0.6264437437057495,
"learning_rate": 0.0005,
"loss": 1.0545,
"step": 6470
},
{
"epoch": 0.3348664151723425,
"grad_norm": 0.6507226228713989,
"learning_rate": 0.0005,
"loss": 1.0478,
"step": 6480
},
{
"epoch": 0.3353831843315591,
"grad_norm": 0.6316462755203247,
"learning_rate": 0.0005,
"loss": 1.05,
"step": 6490
},
{
"epoch": 0.33589995349077567,
"grad_norm": 0.8337516188621521,
"learning_rate": 0.0005,
"loss": 1.0486,
"step": 6500
},
{
"epoch": 0.33641672264999223,
"grad_norm": 0.9597588777542114,
"learning_rate": 0.0005,
"loss": 1.0551,
"step": 6510
},
{
"epoch": 0.33693349180920884,
"grad_norm": 0.6857469081878662,
"learning_rate": 0.0005,
"loss": 1.0651,
"step": 6520
},
{
"epoch": 0.3374502609684254,
"grad_norm": 0.6196707487106323,
"learning_rate": 0.0005,
"loss": 1.0425,
"step": 6530
},
{
"epoch": 0.337967030127642,
"grad_norm": 0.6072001457214355,
"learning_rate": 0.0005,
"loss": 1.0698,
"step": 6540
},
{
"epoch": 0.33848379928685857,
"grad_norm": 0.6677159667015076,
"learning_rate": 0.0005,
"loss": 1.0646,
"step": 6550
},
{
"epoch": 0.3390005684460751,
"grad_norm": 0.6435421109199524,
"learning_rate": 0.0005,
"loss": 1.0342,
"step": 6560
},
{
"epoch": 0.33951733760529174,
"grad_norm": 0.5953618288040161,
"learning_rate": 0.0005,
"loss": 1.0512,
"step": 6570
},
{
"epoch": 0.3400341067645083,
"grad_norm": 0.6292535066604614,
"learning_rate": 0.0005,
"loss": 1.0502,
"step": 6580
},
{
"epoch": 0.34055087592372485,
"grad_norm": 0.7501185536384583,
"learning_rate": 0.0005,
"loss": 1.0556,
"step": 6590
},
{
"epoch": 0.34106764508294146,
"grad_norm": 0.58536696434021,
"learning_rate": 0.0005,
"loss": 1.0534,
"step": 6600
},
{
"epoch": 0.341584414242158,
"grad_norm": 0.6455935835838318,
"learning_rate": 0.0005,
"loss": 1.0366,
"step": 6610
},
{
"epoch": 0.34210118340137463,
"grad_norm": 0.6323394179344177,
"learning_rate": 0.0005,
"loss": 1.052,
"step": 6620
},
{
"epoch": 0.3426179525605912,
"grad_norm": 0.6140257120132446,
"learning_rate": 0.0005,
"loss": 1.0452,
"step": 6630
},
{
"epoch": 0.34313472171980774,
"grad_norm": 0.6486880779266357,
"learning_rate": 0.0005,
"loss": 1.0422,
"step": 6640
},
{
"epoch": 0.34365149087902436,
"grad_norm": 0.6136801838874817,
"learning_rate": 0.0005,
"loss": 1.0529,
"step": 6650
},
{
"epoch": 0.3441682600382409,
"grad_norm": 0.78439861536026,
"learning_rate": 0.0005,
"loss": 1.0616,
"step": 6660
},
{
"epoch": 0.34468502919745747,
"grad_norm": 0.6717984080314636,
"learning_rate": 0.0005,
"loss": 1.0471,
"step": 6670
},
{
"epoch": 0.3452017983566741,
"grad_norm": 0.632985532283783,
"learning_rate": 0.0005,
"loss": 1.0426,
"step": 6680
},
{
"epoch": 0.34571856751589064,
"grad_norm": 0.6086390018463135,
"learning_rate": 0.0005,
"loss": 1.0384,
"step": 6690
},
{
"epoch": 0.34623533667510725,
"grad_norm": 0.7206865549087524,
"learning_rate": 0.0005,
"loss": 1.0441,
"step": 6700
},
{
"epoch": 0.3467521058343238,
"grad_norm": 0.6115614771842957,
"learning_rate": 0.0005,
"loss": 1.0486,
"step": 6710
},
{
"epoch": 0.34726887499354037,
"grad_norm": 0.6737103462219238,
"learning_rate": 0.0005,
"loss": 1.0679,
"step": 6720
},
{
"epoch": 0.347785644152757,
"grad_norm": 0.632331132888794,
"learning_rate": 0.0005,
"loss": 1.0327,
"step": 6730
},
{
"epoch": 0.34830241331197354,
"grad_norm": 0.7133494019508362,
"learning_rate": 0.0005,
"loss": 1.0412,
"step": 6740
},
{
"epoch": 0.3488191824711901,
"grad_norm": 0.5726544260978699,
"learning_rate": 0.0005,
"loss": 1.0503,
"step": 6750
},
{
"epoch": 0.3493359516304067,
"grad_norm": 0.719832181930542,
"learning_rate": 0.0005,
"loss": 1.0477,
"step": 6760
},
{
"epoch": 0.34985272078962326,
"grad_norm": 0.7709729671478271,
"learning_rate": 0.0005,
"loss": 1.0424,
"step": 6770
},
{
"epoch": 0.3503694899488399,
"grad_norm": 0.6043444275856018,
"learning_rate": 0.0005,
"loss": 1.0527,
"step": 6780
},
{
"epoch": 0.35088625910805643,
"grad_norm": 0.5770915746688843,
"learning_rate": 0.0005,
"loss": 1.033,
"step": 6790
},
{
"epoch": 0.351403028267273,
"grad_norm": 0.6332295536994934,
"learning_rate": 0.0005,
"loss": 1.0405,
"step": 6800
},
{
"epoch": 0.3519197974264896,
"grad_norm": 0.6505199670791626,
"learning_rate": 0.0005,
"loss": 1.0389,
"step": 6810
},
{
"epoch": 0.35243656658570616,
"grad_norm": 0.6215615272521973,
"learning_rate": 0.0005,
"loss": 1.0591,
"step": 6820
},
{
"epoch": 0.35295333574492277,
"grad_norm": 0.6917248368263245,
"learning_rate": 0.0005,
"loss": 1.0384,
"step": 6830
},
{
"epoch": 0.3534701049041393,
"grad_norm": 0.6240680813789368,
"learning_rate": 0.0005,
"loss": 1.0491,
"step": 6840
},
{
"epoch": 0.3539868740633559,
"grad_norm": 0.6082044243812561,
"learning_rate": 0.0005,
"loss": 1.0495,
"step": 6850
},
{
"epoch": 0.3545036432225725,
"grad_norm": 0.6314426064491272,
"learning_rate": 0.0005,
"loss": 1.0274,
"step": 6860
},
{
"epoch": 0.35502041238178905,
"grad_norm": 0.6714574694633484,
"learning_rate": 0.0005,
"loss": 1.0275,
"step": 6870
},
{
"epoch": 0.3555371815410056,
"grad_norm": 0.6438120603561401,
"learning_rate": 0.0005,
"loss": 1.0383,
"step": 6880
},
{
"epoch": 0.3560539507002222,
"grad_norm": 0.7354781031608582,
"learning_rate": 0.0005,
"loss": 1.0524,
"step": 6890
},
{
"epoch": 0.3565707198594388,
"grad_norm": 0.6491745114326477,
"learning_rate": 0.0005,
"loss": 1.0386,
"step": 6900
},
{
"epoch": 0.3570874890186554,
"grad_norm": 0.5888579487800598,
"learning_rate": 0.0005,
"loss": 1.0417,
"step": 6910
},
{
"epoch": 0.35760425817787195,
"grad_norm": 0.6474457383155823,
"learning_rate": 0.0005,
"loss": 1.0514,
"step": 6920
},
{
"epoch": 0.3581210273370885,
"grad_norm": 0.6235959529876709,
"learning_rate": 0.0005,
"loss": 1.03,
"step": 6930
},
{
"epoch": 0.3586377964963051,
"grad_norm": 0.6418899297714233,
"learning_rate": 0.0005,
"loss": 1.0436,
"step": 6940
},
{
"epoch": 0.3591545656555217,
"grad_norm": 0.671491801738739,
"learning_rate": 0.0005,
"loss": 1.0494,
"step": 6950
},
{
"epoch": 0.35967133481473823,
"grad_norm": 0.6662471890449524,
"learning_rate": 0.0005,
"loss": 1.0339,
"step": 6960
},
{
"epoch": 0.36018810397395484,
"grad_norm": 0.6041388511657715,
"learning_rate": 0.0005,
"loss": 1.0242,
"step": 6970
},
{
"epoch": 0.3607048731331714,
"grad_norm": 0.6014126539230347,
"learning_rate": 0.0005,
"loss": 1.0327,
"step": 6980
},
{
"epoch": 0.361221642292388,
"grad_norm": 0.611056387424469,
"learning_rate": 0.0005,
"loss": 1.0537,
"step": 6990
},
{
"epoch": 0.36173841145160457,
"grad_norm": 0.605475127696991,
"learning_rate": 0.0005,
"loss": 1.0255,
"step": 7000
},
{
"epoch": 0.3622551806108211,
"grad_norm": 0.5799763798713684,
"learning_rate": 0.0005,
"loss": 1.0396,
"step": 7010
},
{
"epoch": 0.36277194977003774,
"grad_norm": 0.5857988595962524,
"learning_rate": 0.0005,
"loss": 1.0362,
"step": 7020
},
{
"epoch": 0.3632887189292543,
"grad_norm": 0.6305558085441589,
"learning_rate": 0.0005,
"loss": 1.0378,
"step": 7030
},
{
"epoch": 0.3638054880884709,
"grad_norm": 0.5987147688865662,
"learning_rate": 0.0005,
"loss": 1.0407,
"step": 7040
},
{
"epoch": 0.36432225724768746,
"grad_norm": 0.5889327526092529,
"learning_rate": 0.0005,
"loss": 1.044,
"step": 7050
},
{
"epoch": 0.364839026406904,
"grad_norm": 0.5972746014595032,
"learning_rate": 0.0005,
"loss": 1.0333,
"step": 7060
},
{
"epoch": 0.36535579556612063,
"grad_norm": 0.6437240839004517,
"learning_rate": 0.0005,
"loss": 1.0219,
"step": 7070
},
{
"epoch": 0.3658725647253372,
"grad_norm": 0.6240195631980896,
"learning_rate": 0.0005,
"loss": 1.0268,
"step": 7080
},
{
"epoch": 0.36638933388455375,
"grad_norm": 0.6170317530632019,
"learning_rate": 0.0005,
"loss": 1.0402,
"step": 7090
},
{
"epoch": 0.36690610304377036,
"grad_norm": 0.661592423915863,
"learning_rate": 0.0005,
"loss": 1.0255,
"step": 7100
},
{
"epoch": 0.3674228722029869,
"grad_norm": 0.6611010432243347,
"learning_rate": 0.0005,
"loss": 1.0387,
"step": 7110
},
{
"epoch": 0.36793964136220353,
"grad_norm": 0.6037949323654175,
"learning_rate": 0.0005,
"loss": 1.0398,
"step": 7120
},
{
"epoch": 0.3684564105214201,
"grad_norm": 0.6260375380516052,
"learning_rate": 0.0005,
"loss": 1.0223,
"step": 7130
},
{
"epoch": 0.36897317968063664,
"grad_norm": 0.7400781512260437,
"learning_rate": 0.0005,
"loss": 1.0331,
"step": 7140
},
{
"epoch": 0.36948994883985325,
"grad_norm": 0.8144364356994629,
"learning_rate": 0.0005,
"loss": 1.0341,
"step": 7150
},
{
"epoch": 0.3700067179990698,
"grad_norm": 0.6299716830253601,
"learning_rate": 0.0005,
"loss": 1.0428,
"step": 7160
},
{
"epoch": 0.37052348715828637,
"grad_norm": 0.605995774269104,
"learning_rate": 0.0005,
"loss": 1.0628,
"step": 7170
},
{
"epoch": 0.371040256317503,
"grad_norm": 0.5977038145065308,
"learning_rate": 0.0005,
"loss": 1.0227,
"step": 7180
},
{
"epoch": 0.37155702547671954,
"grad_norm": 0.6418441534042358,
"learning_rate": 0.0005,
"loss": 1.0216,
"step": 7190
},
{
"epoch": 0.37207379463593615,
"grad_norm": 0.6550008654594421,
"learning_rate": 0.0005,
"loss": 1.0317,
"step": 7200
},
{
"epoch": 0.3725905637951527,
"grad_norm": 0.6023372411727905,
"learning_rate": 0.0005,
"loss": 1.0291,
"step": 7210
},
{
"epoch": 0.37310733295436926,
"grad_norm": 0.6071696877479553,
"learning_rate": 0.0005,
"loss": 1.041,
"step": 7220
},
{
"epoch": 0.3736241021135859,
"grad_norm": 0.6096029877662659,
"learning_rate": 0.0005,
"loss": 1.0316,
"step": 7230
},
{
"epoch": 0.37414087127280243,
"grad_norm": 0.5897752642631531,
"learning_rate": 0.0005,
"loss": 1.0289,
"step": 7240
},
{
"epoch": 0.374657640432019,
"grad_norm": 0.6093285083770752,
"learning_rate": 0.0005,
"loss": 1.0368,
"step": 7250
},
{
"epoch": 0.3751744095912356,
"grad_norm": 0.6444416046142578,
"learning_rate": 0.0005,
"loss": 1.0116,
"step": 7260
},
{
"epoch": 0.37569117875045216,
"grad_norm": 0.6363521814346313,
"learning_rate": 0.0005,
"loss": 1.0198,
"step": 7270
},
{
"epoch": 0.37620794790966877,
"grad_norm": 0.6633175611495972,
"learning_rate": 0.0005,
"loss": 1.0178,
"step": 7280
},
{
"epoch": 0.37672471706888533,
"grad_norm": 0.5611307621002197,
"learning_rate": 0.0005,
"loss": 1.0319,
"step": 7290
},
{
"epoch": 0.3772414862281019,
"grad_norm": 0.5733465552330017,
"learning_rate": 0.0005,
"loss": 1.0095,
"step": 7300
},
{
"epoch": 0.3777582553873185,
"grad_norm": 0.6538148522377014,
"learning_rate": 0.0005,
"loss": 1.0405,
"step": 7310
},
{
"epoch": 0.37827502454653505,
"grad_norm": 0.6904069781303406,
"learning_rate": 0.0005,
"loss": 1.0322,
"step": 7320
},
{
"epoch": 0.37879179370575167,
"grad_norm": 0.6486346125602722,
"learning_rate": 0.0005,
"loss": 1.0162,
"step": 7330
},
{
"epoch": 0.3793085628649682,
"grad_norm": 0.5600974559783936,
"learning_rate": 0.0005,
"loss": 1.0255,
"step": 7340
},
{
"epoch": 0.3798253320241848,
"grad_norm": 0.5800735354423523,
"learning_rate": 0.0005,
"loss": 1.0228,
"step": 7350
},
{
"epoch": 0.3803421011834014,
"grad_norm": 0.6365842819213867,
"learning_rate": 0.0005,
"loss": 1.0229,
"step": 7360
},
{
"epoch": 0.38085887034261795,
"grad_norm": 0.6074081659317017,
"learning_rate": 0.0005,
"loss": 1.0325,
"step": 7370
},
{
"epoch": 0.3813756395018345,
"grad_norm": 0.5998241901397705,
"learning_rate": 0.0005,
"loss": 1.0164,
"step": 7380
},
{
"epoch": 0.3818924086610511,
"grad_norm": 0.6576969623565674,
"learning_rate": 0.0005,
"loss": 1.0153,
"step": 7390
},
{
"epoch": 0.3824091778202677,
"grad_norm": 0.6602439284324646,
"learning_rate": 0.0005,
"loss": 1.0197,
"step": 7400
},
{
"epoch": 0.3829259469794843,
"grad_norm": 0.6058171987533569,
"learning_rate": 0.0005,
"loss": 1.0289,
"step": 7410
},
{
"epoch": 0.38344271613870085,
"grad_norm": 0.7188865542411804,
"learning_rate": 0.0005,
"loss": 1.0216,
"step": 7420
},
{
"epoch": 0.3839594852979174,
"grad_norm": 0.6025785803794861,
"learning_rate": 0.0005,
"loss": 1.0195,
"step": 7430
},
{
"epoch": 0.384476254457134,
"grad_norm": 0.6643381118774414,
"learning_rate": 0.0005,
"loss": 1.0059,
"step": 7440
},
{
"epoch": 0.38499302361635057,
"grad_norm": 0.6015246510505676,
"learning_rate": 0.0005,
"loss": 1.0181,
"step": 7450
},
{
"epoch": 0.38550979277556713,
"grad_norm": 0.6102477312088013,
"learning_rate": 0.0005,
"loss": 1.0268,
"step": 7460
},
{
"epoch": 0.38602656193478374,
"grad_norm": 0.6054964661598206,
"learning_rate": 0.0005,
"loss": 1.0207,
"step": 7470
},
{
"epoch": 0.3865433310940003,
"grad_norm": 0.5937122106552124,
"learning_rate": 0.0005,
"loss": 1.0214,
"step": 7480
},
{
"epoch": 0.3870601002532169,
"grad_norm": 0.5697932839393616,
"learning_rate": 0.0005,
"loss": 0.9999,
"step": 7490
},
{
"epoch": 0.38757686941243347,
"grad_norm": 0.6040372848510742,
"learning_rate": 0.0005,
"loss": 1.0244,
"step": 7500
},
{
"epoch": 0.38809363857165,
"grad_norm": 0.666986346244812,
"learning_rate": 0.0005,
"loss": 1.0253,
"step": 7510
},
{
"epoch": 0.38861040773086664,
"grad_norm": 0.5957795977592468,
"learning_rate": 0.0005,
"loss": 1.015,
"step": 7520
},
{
"epoch": 0.3891271768900832,
"grad_norm": 0.7224922776222229,
"learning_rate": 0.0005,
"loss": 1.018,
"step": 7530
},
{
"epoch": 0.3896439460492998,
"grad_norm": 0.6356753706932068,
"learning_rate": 0.0005,
"loss": 1.028,
"step": 7540
},
{
"epoch": 0.39016071520851636,
"grad_norm": 0.6179920434951782,
"learning_rate": 0.0005,
"loss": 1.022,
"step": 7550
},
{
"epoch": 0.3906774843677329,
"grad_norm": 0.7617205381393433,
"learning_rate": 0.0005,
"loss": 1.0124,
"step": 7560
},
{
"epoch": 0.39119425352694953,
"grad_norm": 0.6080652475357056,
"learning_rate": 0.0005,
"loss": 1.0189,
"step": 7570
},
{
"epoch": 0.3917110226861661,
"grad_norm": 0.6190568804740906,
"learning_rate": 0.0005,
"loss": 1.02,
"step": 7580
},
{
"epoch": 0.39222779184538265,
"grad_norm": 0.584118127822876,
"learning_rate": 0.0005,
"loss": 1.0134,
"step": 7590
},
{
"epoch": 0.39274456100459926,
"grad_norm": 0.5745325088500977,
"learning_rate": 0.0005,
"loss": 1.0145,
"step": 7600
},
{
"epoch": 0.3932613301638158,
"grad_norm": 0.586669385433197,
"learning_rate": 0.0005,
"loss": 1.0215,
"step": 7610
},
{
"epoch": 0.3937780993230324,
"grad_norm": 0.6320251822471619,
"learning_rate": 0.0005,
"loss": 1.0075,
"step": 7620
},
{
"epoch": 0.394294868482249,
"grad_norm": 0.6066457033157349,
"learning_rate": 0.0005,
"loss": 1.0084,
"step": 7630
},
{
"epoch": 0.39481163764146554,
"grad_norm": 0.5506545305252075,
"learning_rate": 0.0005,
"loss": 1.0187,
"step": 7640
},
{
"epoch": 0.39532840680068215,
"grad_norm": 0.6136749982833862,
"learning_rate": 0.0005,
"loss": 1.0125,
"step": 7650
},
{
"epoch": 0.3958451759598987,
"grad_norm": 0.7134038805961609,
"learning_rate": 0.0005,
"loss": 1.0056,
"step": 7660
},
{
"epoch": 0.39636194511911527,
"grad_norm": 0.6053097248077393,
"learning_rate": 0.0005,
"loss": 1.0099,
"step": 7670
},
{
"epoch": 0.3968787142783319,
"grad_norm": 0.5632675290107727,
"learning_rate": 0.0005,
"loss": 1.0134,
"step": 7680
},
{
"epoch": 0.39739548343754844,
"grad_norm": 0.6165273189544678,
"learning_rate": 0.0005,
"loss": 1.0235,
"step": 7690
},
{
"epoch": 0.39791225259676505,
"grad_norm": 0.6279580593109131,
"learning_rate": 0.0005,
"loss": 1.0103,
"step": 7700
},
{
"epoch": 0.3984290217559816,
"grad_norm": 0.6073136329650879,
"learning_rate": 0.0005,
"loss": 1.0134,
"step": 7710
},
{
"epoch": 0.39894579091519816,
"grad_norm": 0.5953530073165894,
"learning_rate": 0.0005,
"loss": 1.0249,
"step": 7720
},
{
"epoch": 0.3994625600744148,
"grad_norm": 0.5744448900222778,
"learning_rate": 0.0005,
"loss": 1.0138,
"step": 7730
},
{
"epoch": 0.39997932923363133,
"grad_norm": 0.5618404746055603,
"learning_rate": 0.0005,
"loss": 1.0079,
"step": 7740
},
{
"epoch": 0.4004960983928479,
"grad_norm": 0.567597508430481,
"learning_rate": 0.0005,
"loss": 1.0139,
"step": 7750
},
{
"epoch": 0.4010128675520645,
"grad_norm": 0.5764487981796265,
"learning_rate": 0.0005,
"loss": 1.0379,
"step": 7760
},
{
"epoch": 0.40152963671128106,
"grad_norm": 0.6651884913444519,
"learning_rate": 0.0005,
"loss": 1.0082,
"step": 7770
},
{
"epoch": 0.40204640587049767,
"grad_norm": 0.7175072431564331,
"learning_rate": 0.0005,
"loss": 1.0144,
"step": 7780
},
{
"epoch": 0.4025631750297142,
"grad_norm": 0.591261625289917,
"learning_rate": 0.0005,
"loss": 1.0103,
"step": 7790
},
{
"epoch": 0.4030799441889308,
"grad_norm": 0.5823299884796143,
"learning_rate": 0.0005,
"loss": 1.0016,
"step": 7800
},
{
"epoch": 0.4035967133481474,
"grad_norm": 0.5339162945747375,
"learning_rate": 0.0005,
"loss": 1.0124,
"step": 7810
},
{
"epoch": 0.40411348250736395,
"grad_norm": 0.6042317748069763,
"learning_rate": 0.0005,
"loss": 1.0006,
"step": 7820
},
{
"epoch": 0.40463025166658056,
"grad_norm": 0.6178877353668213,
"learning_rate": 0.0005,
"loss": 1.0166,
"step": 7830
},
{
"epoch": 0.4051470208257971,
"grad_norm": 0.6470639705657959,
"learning_rate": 0.0005,
"loss": 0.9899,
"step": 7840
},
{
"epoch": 0.4056637899850137,
"grad_norm": 0.5468031167984009,
"learning_rate": 0.0005,
"loss": 1.0,
"step": 7850
},
{
"epoch": 0.4061805591442303,
"grad_norm": 0.566137433052063,
"learning_rate": 0.0005,
"loss": 1.0187,
"step": 7860
},
{
"epoch": 0.40669732830344685,
"grad_norm": 0.6000310182571411,
"learning_rate": 0.0005,
"loss": 1.0221,
"step": 7870
},
{
"epoch": 0.4072140974626634,
"grad_norm": 0.5763528943061829,
"learning_rate": 0.0005,
"loss": 1.0318,
"step": 7880
},
{
"epoch": 0.40773086662188,
"grad_norm": 0.5767903327941895,
"learning_rate": 0.0005,
"loss": 1.014,
"step": 7890
},
{
"epoch": 0.4082476357810966,
"grad_norm": 0.6295961737632751,
"learning_rate": 0.0005,
"loss": 0.9885,
"step": 7900
},
{
"epoch": 0.4087644049403132,
"grad_norm": 0.6416009068489075,
"learning_rate": 0.0005,
"loss": 1.0013,
"step": 7910
},
{
"epoch": 0.40928117409952974,
"grad_norm": 0.6039779186248779,
"learning_rate": 0.0005,
"loss": 0.9986,
"step": 7920
},
{
"epoch": 0.4097979432587463,
"grad_norm": 0.6459826827049255,
"learning_rate": 0.0005,
"loss": 1.0051,
"step": 7930
},
{
"epoch": 0.4103147124179629,
"grad_norm": 0.597352147102356,
"learning_rate": 0.0005,
"loss": 1.002,
"step": 7940
},
{
"epoch": 0.41083148157717947,
"grad_norm": 0.5876639485359192,
"learning_rate": 0.0005,
"loss": 1.0076,
"step": 7950
},
{
"epoch": 0.411348250736396,
"grad_norm": 0.5862469673156738,
"learning_rate": 0.0005,
"loss": 0.9972,
"step": 7960
},
{
"epoch": 0.41186501989561264,
"grad_norm": 0.5829436779022217,
"learning_rate": 0.0005,
"loss": 1.0233,
"step": 7970
},
{
"epoch": 0.4123817890548292,
"grad_norm": 0.5912736058235168,
"learning_rate": 0.0005,
"loss": 1.0038,
"step": 7980
},
{
"epoch": 0.4128985582140458,
"grad_norm": 0.5810758471488953,
"learning_rate": 0.0005,
"loss": 1.0077,
"step": 7990
},
{
"epoch": 0.41341532737326236,
"grad_norm": 0.5771864056587219,
"learning_rate": 0.0005,
"loss": 0.9976,
"step": 8000
},
{
"epoch": 0.4139320965324789,
"grad_norm": 0.5928204655647278,
"learning_rate": 0.0005,
"loss": 1.0186,
"step": 8010
},
{
"epoch": 0.41444886569169553,
"grad_norm": 0.603636622428894,
"learning_rate": 0.0005,
"loss": 1.0009,
"step": 8020
},
{
"epoch": 0.4149656348509121,
"grad_norm": 0.5715627670288086,
"learning_rate": 0.0005,
"loss": 1.014,
"step": 8030
},
{
"epoch": 0.4154824040101287,
"grad_norm": 0.5580553412437439,
"learning_rate": 0.0005,
"loss": 0.9957,
"step": 8040
},
{
"epoch": 0.41599917316934526,
"grad_norm": 0.5680859088897705,
"learning_rate": 0.0005,
"loss": 1.0027,
"step": 8050
},
{
"epoch": 0.4165159423285618,
"grad_norm": 0.5446572303771973,
"learning_rate": 0.0005,
"loss": 1.0412,
"step": 8060
},
{
"epoch": 0.41703271148777843,
"grad_norm": 0.5877604484558105,
"learning_rate": 0.0005,
"loss": 1.0087,
"step": 8070
},
{
"epoch": 0.417549480646995,
"grad_norm": 0.5905182361602783,
"learning_rate": 0.0005,
"loss": 0.9889,
"step": 8080
},
{
"epoch": 0.41806624980621154,
"grad_norm": 0.6025214195251465,
"learning_rate": 0.0005,
"loss": 1.0171,
"step": 8090
},
{
"epoch": 0.41858301896542816,
"grad_norm": 0.5762201547622681,
"learning_rate": 0.0005,
"loss": 1.0199,
"step": 8100
},
{
"epoch": 0.4190997881246447,
"grad_norm": 0.5564827919006348,
"learning_rate": 0.0005,
"loss": 1.0049,
"step": 8110
},
{
"epoch": 0.4196165572838613,
"grad_norm": 0.5517228841781616,
"learning_rate": 0.0005,
"loss": 1.0036,
"step": 8120
},
{
"epoch": 0.4201333264430779,
"grad_norm": 0.6581810712814331,
"learning_rate": 0.0005,
"loss": 1.0042,
"step": 8130
},
{
"epoch": 0.42065009560229444,
"grad_norm": 0.5902772545814514,
"learning_rate": 0.0005,
"loss": 0.9956,
"step": 8140
},
{
"epoch": 0.42116686476151105,
"grad_norm": 0.5903311967849731,
"learning_rate": 0.0005,
"loss": 0.9994,
"step": 8150
},
{
"epoch": 0.4216836339207276,
"grad_norm": 0.5883710980415344,
"learning_rate": 0.0005,
"loss": 1.001,
"step": 8160
},
{
"epoch": 0.42220040307994416,
"grad_norm": 0.5694506764411926,
"learning_rate": 0.0005,
"loss": 0.997,
"step": 8170
},
{
"epoch": 0.4227171722391608,
"grad_norm": 0.5448591113090515,
"learning_rate": 0.0005,
"loss": 0.9987,
"step": 8180
},
{
"epoch": 0.42323394139837733,
"grad_norm": 0.5763291120529175,
"learning_rate": 0.0005,
"loss": 0.9957,
"step": 8190
},
{
"epoch": 0.42375071055759395,
"grad_norm": 0.5763616561889648,
"learning_rate": 0.0005,
"loss": 1.009,
"step": 8200
},
{
"epoch": 0.4242674797168105,
"grad_norm": 0.5575286149978638,
"learning_rate": 0.0005,
"loss": 1.01,
"step": 8210
},
{
"epoch": 0.42478424887602706,
"grad_norm": 0.5435507297515869,
"learning_rate": 0.0005,
"loss": 0.9947,
"step": 8220
},
{
"epoch": 0.42530101803524367,
"grad_norm": 0.6307750344276428,
"learning_rate": 0.0005,
"loss": 1.0098,
"step": 8230
},
{
"epoch": 0.42581778719446023,
"grad_norm": 0.5419248342514038,
"learning_rate": 0.0005,
"loss": 1.013,
"step": 8240
},
{
"epoch": 0.4263345563536768,
"grad_norm": 0.5558311343193054,
"learning_rate": 0.0005,
"loss": 0.9956,
"step": 8250
},
{
"epoch": 0.4268513255128934,
"grad_norm": 0.5593147277832031,
"learning_rate": 0.0005,
"loss": 1.0142,
"step": 8260
},
{
"epoch": 0.42736809467210995,
"grad_norm": 0.5839881896972656,
"learning_rate": 0.0005,
"loss": 1.001,
"step": 8270
},
{
"epoch": 0.42788486383132657,
"grad_norm": 0.5981064438819885,
"learning_rate": 0.0005,
"loss": 0.9952,
"step": 8280
},
{
"epoch": 0.4284016329905431,
"grad_norm": 0.6945583820343018,
"learning_rate": 0.0005,
"loss": 0.9971,
"step": 8290
},
{
"epoch": 0.4289184021497597,
"grad_norm": 0.5536506772041321,
"learning_rate": 0.0005,
"loss": 0.9929,
"step": 8300
},
{
"epoch": 0.4294351713089763,
"grad_norm": 0.557338297367096,
"learning_rate": 0.0005,
"loss": 0.9833,
"step": 8310
},
{
"epoch": 0.42995194046819285,
"grad_norm": 0.5480133295059204,
"learning_rate": 0.0005,
"loss": 1.0008,
"step": 8320
},
{
"epoch": 0.43046870962740946,
"grad_norm": 0.5495566129684448,
"learning_rate": 0.0005,
"loss": 1.0126,
"step": 8330
},
{
"epoch": 0.430985478786626,
"grad_norm": 0.5759509801864624,
"learning_rate": 0.0005,
"loss": 0.9866,
"step": 8340
},
{
"epoch": 0.4315022479458426,
"grad_norm": 0.5602892637252808,
"learning_rate": 0.0005,
"loss": 0.9893,
"step": 8350
},
{
"epoch": 0.4320190171050592,
"grad_norm": 0.560892641544342,
"learning_rate": 0.0005,
"loss": 1.0125,
"step": 8360
},
{
"epoch": 0.43253578626427575,
"grad_norm": 0.582815408706665,
"learning_rate": 0.0005,
"loss": 1.0,
"step": 8370
},
{
"epoch": 0.4330525554234923,
"grad_norm": 0.6133496165275574,
"learning_rate": 0.0005,
"loss": 0.9928,
"step": 8380
},
{
"epoch": 0.4335693245827089,
"grad_norm": 0.5611013174057007,
"learning_rate": 0.0005,
"loss": 0.998,
"step": 8390
},
{
"epoch": 0.43408609374192547,
"grad_norm": 0.5589267611503601,
"learning_rate": 0.0005,
"loss": 0.999,
"step": 8400
},
{
"epoch": 0.4346028629011421,
"grad_norm": 0.5508078932762146,
"learning_rate": 0.0005,
"loss": 0.9954,
"step": 8410
},
{
"epoch": 0.43511963206035864,
"grad_norm": 0.5803013443946838,
"learning_rate": 0.0005,
"loss": 0.9891,
"step": 8420
},
{
"epoch": 0.4356364012195752,
"grad_norm": 0.532085120677948,
"learning_rate": 0.0005,
"loss": 0.9935,
"step": 8430
},
{
"epoch": 0.4361531703787918,
"grad_norm": 0.6158758401870728,
"learning_rate": 0.0005,
"loss": 0.9927,
"step": 8440
},
{
"epoch": 0.43666993953800837,
"grad_norm": 0.5444722771644592,
"learning_rate": 0.0005,
"loss": 0.9754,
"step": 8450
},
{
"epoch": 0.4371867086972249,
"grad_norm": 0.5872038006782532,
"learning_rate": 0.0005,
"loss": 0.9803,
"step": 8460
},
{
"epoch": 0.43770347785644154,
"grad_norm": 0.5382379293441772,
"learning_rate": 0.0005,
"loss": 1.0025,
"step": 8470
},
{
"epoch": 0.4382202470156581,
"grad_norm": 0.5538324117660522,
"learning_rate": 0.0005,
"loss": 0.9764,
"step": 8480
},
{
"epoch": 0.4387370161748747,
"grad_norm": 0.5917341709136963,
"learning_rate": 0.0005,
"loss": 0.9732,
"step": 8490
},
{
"epoch": 0.43925378533409126,
"grad_norm": 0.5395458340644836,
"learning_rate": 0.0005,
"loss": 0.9948,
"step": 8500
},
{
"epoch": 0.4397705544933078,
"grad_norm": 0.5973149538040161,
"learning_rate": 0.0005,
"loss": 0.9971,
"step": 8510
},
{
"epoch": 0.44028732365252443,
"grad_norm": 0.579712450504303,
"learning_rate": 0.0005,
"loss": 0.9836,
"step": 8520
},
{
"epoch": 0.440804092811741,
"grad_norm": 0.5590643882751465,
"learning_rate": 0.0005,
"loss": 0.9896,
"step": 8530
},
{
"epoch": 0.4413208619709576,
"grad_norm": 0.5443204045295715,
"learning_rate": 0.0005,
"loss": 0.9868,
"step": 8540
},
{
"epoch": 0.44183763113017416,
"grad_norm": 0.5973614454269409,
"learning_rate": 0.0005,
"loss": 0.9881,
"step": 8550
},
{
"epoch": 0.4423544002893907,
"grad_norm": 0.6157576441764832,
"learning_rate": 0.0005,
"loss": 1.0007,
"step": 8560
},
{
"epoch": 0.4428711694486073,
"grad_norm": 0.5678598880767822,
"learning_rate": 0.0005,
"loss": 0.9878,
"step": 8570
},
{
"epoch": 0.4433879386078239,
"grad_norm": 0.5606565475463867,
"learning_rate": 0.0005,
"loss": 0.9899,
"step": 8580
},
{
"epoch": 0.44390470776704044,
"grad_norm": 0.651261031627655,
"learning_rate": 0.0005,
"loss": 1.0053,
"step": 8590
},
{
"epoch": 0.44442147692625705,
"grad_norm": 0.6717237830162048,
"learning_rate": 0.0005,
"loss": 0.9783,
"step": 8600
},
{
"epoch": 0.4449382460854736,
"grad_norm": 0.5981956720352173,
"learning_rate": 0.0005,
"loss": 0.9846,
"step": 8610
},
{
"epoch": 0.4454550152446902,
"grad_norm": 0.6338360905647278,
"learning_rate": 0.0005,
"loss": 0.9903,
"step": 8620
},
{
"epoch": 0.4459717844039068,
"grad_norm": 0.6431187987327576,
"learning_rate": 0.0005,
"loss": 0.9967,
"step": 8630
},
{
"epoch": 0.44648855356312334,
"grad_norm": 0.6032900810241699,
"learning_rate": 0.0005,
"loss": 0.9814,
"step": 8640
},
{
"epoch": 0.44700532272233995,
"grad_norm": 0.5607067942619324,
"learning_rate": 0.0005,
"loss": 1.0021,
"step": 8650
},
{
"epoch": 0.4475220918815565,
"grad_norm": 0.5442407727241516,
"learning_rate": 0.0005,
"loss": 0.9911,
"step": 8660
},
{
"epoch": 0.44803886104077306,
"grad_norm": 0.5274026989936829,
"learning_rate": 0.0005,
"loss": 0.9798,
"step": 8670
},
{
"epoch": 0.4485556301999897,
"grad_norm": 0.5678251385688782,
"learning_rate": 0.0005,
"loss": 0.9869,
"step": 8680
},
{
"epoch": 0.44907239935920623,
"grad_norm": 0.5528420805931091,
"learning_rate": 0.0005,
"loss": 0.9963,
"step": 8690
},
{
"epoch": 0.44958916851842284,
"grad_norm": 0.5485315918922424,
"learning_rate": 0.0005,
"loss": 0.974,
"step": 8700
},
{
"epoch": 0.4501059376776394,
"grad_norm": 0.566852331161499,
"learning_rate": 0.0005,
"loss": 0.9891,
"step": 8710
},
{
"epoch": 0.45062270683685596,
"grad_norm": 0.5270015597343445,
"learning_rate": 0.0005,
"loss": 0.979,
"step": 8720
},
{
"epoch": 0.45113947599607257,
"grad_norm": 0.5595947504043579,
"learning_rate": 0.0005,
"loss": 0.9632,
"step": 8730
},
{
"epoch": 0.4516562451552891,
"grad_norm": 0.5901986360549927,
"learning_rate": 0.0005,
"loss": 0.9952,
"step": 8740
},
{
"epoch": 0.4521730143145057,
"grad_norm": 0.5500153303146362,
"learning_rate": 0.0005,
"loss": 0.9826,
"step": 8750
},
{
"epoch": 0.4526897834737223,
"grad_norm": 0.6439850926399231,
"learning_rate": 0.0005,
"loss": 0.9783,
"step": 8760
},
{
"epoch": 0.45320655263293885,
"grad_norm": 0.6534972190856934,
"learning_rate": 0.0005,
"loss": 0.9905,
"step": 8770
},
{
"epoch": 0.45372332179215547,
"grad_norm": 0.6489924192428589,
"learning_rate": 0.0005,
"loss": 1.0066,
"step": 8780
},
{
"epoch": 0.454240090951372,
"grad_norm": 0.5541792511940002,
"learning_rate": 0.0005,
"loss": 0.989,
"step": 8790
},
{
"epoch": 0.4547568601105886,
"grad_norm": 0.5128721594810486,
"learning_rate": 0.0005,
"loss": 0.9943,
"step": 8800
},
{
"epoch": 0.4552736292698052,
"grad_norm": 0.5839647054672241,
"learning_rate": 0.0005,
"loss": 0.9824,
"step": 8810
},
{
"epoch": 0.45579039842902175,
"grad_norm": 0.6303303241729736,
"learning_rate": 0.0005,
"loss": 0.9975,
"step": 8820
},
{
"epoch": 0.45630716758823836,
"grad_norm": 0.5413320064544678,
"learning_rate": 0.0005,
"loss": 0.973,
"step": 8830
},
{
"epoch": 0.4568239367474549,
"grad_norm": 0.5503526926040649,
"learning_rate": 0.0005,
"loss": 0.997,
"step": 8840
},
{
"epoch": 0.4573407059066715,
"grad_norm": 0.5337091684341431,
"learning_rate": 0.0005,
"loss": 0.9785,
"step": 8850
},
{
"epoch": 0.4578574750658881,
"grad_norm": 0.5215671062469482,
"learning_rate": 0.0005,
"loss": 0.988,
"step": 8860
},
{
"epoch": 0.45837424422510464,
"grad_norm": 0.5596259236335754,
"learning_rate": 0.0005,
"loss": 0.9619,
"step": 8870
},
{
"epoch": 0.4588910133843212,
"grad_norm": 0.644656777381897,
"learning_rate": 0.0005,
"loss": 1.0018,
"step": 8880
},
{
"epoch": 0.4594077825435378,
"grad_norm": 0.546576976776123,
"learning_rate": 0.0005,
"loss": 0.9895,
"step": 8890
},
{
"epoch": 0.45992455170275437,
"grad_norm": 0.5912691354751587,
"learning_rate": 0.0005,
"loss": 0.9769,
"step": 8900
},
{
"epoch": 0.460441320861971,
"grad_norm": 0.5670520663261414,
"learning_rate": 0.0005,
"loss": 0.9841,
"step": 8910
},
{
"epoch": 0.46095809002118754,
"grad_norm": 0.5410053730010986,
"learning_rate": 0.0005,
"loss": 0.9842,
"step": 8920
},
{
"epoch": 0.4614748591804041,
"grad_norm": 0.5501711964607239,
"learning_rate": 0.0005,
"loss": 0.9833,
"step": 8930
},
{
"epoch": 0.4619916283396207,
"grad_norm": 0.5702757835388184,
"learning_rate": 0.0005,
"loss": 0.996,
"step": 8940
},
{
"epoch": 0.46250839749883726,
"grad_norm": 0.5536521077156067,
"learning_rate": 0.0005,
"loss": 0.9808,
"step": 8950
},
{
"epoch": 0.4630251666580538,
"grad_norm": 0.5470142364501953,
"learning_rate": 0.0005,
"loss": 0.9701,
"step": 8960
},
{
"epoch": 0.46354193581727043,
"grad_norm": 0.5773063898086548,
"learning_rate": 0.0005,
"loss": 0.9648,
"step": 8970
},
{
"epoch": 0.464058704976487,
"grad_norm": 0.5552759170532227,
"learning_rate": 0.0005,
"loss": 0.9801,
"step": 8980
},
{
"epoch": 0.4645754741357036,
"grad_norm": 0.5589256882667542,
"learning_rate": 0.0005,
"loss": 0.9762,
"step": 8990
},
{
"epoch": 0.46509224329492016,
"grad_norm": 0.5548306703567505,
"learning_rate": 0.0005,
"loss": 0.9536,
"step": 9000
},
{
"epoch": 0.4656090124541367,
"grad_norm": 0.5578811168670654,
"learning_rate": 0.0005,
"loss": 0.9758,
"step": 9010
},
{
"epoch": 0.46612578161335333,
"grad_norm": 0.542353630065918,
"learning_rate": 0.0005,
"loss": 0.9754,
"step": 9020
},
{
"epoch": 0.4666425507725699,
"grad_norm": 0.5240308046340942,
"learning_rate": 0.0005,
"loss": 0.9527,
"step": 9030
},
{
"epoch": 0.4671593199317865,
"grad_norm": 0.5662107467651367,
"learning_rate": 0.0005,
"loss": 0.9812,
"step": 9040
},
{
"epoch": 0.46767608909100306,
"grad_norm": 0.5549916625022888,
"learning_rate": 0.0005,
"loss": 0.9881,
"step": 9050
},
{
"epoch": 0.4681928582502196,
"grad_norm": 0.5178738832473755,
"learning_rate": 0.0005,
"loss": 0.9641,
"step": 9060
},
{
"epoch": 0.4687096274094362,
"grad_norm": 0.52500981092453,
"learning_rate": 0.0005,
"loss": 0.969,
"step": 9070
},
{
"epoch": 0.4692263965686528,
"grad_norm": 0.5403527617454529,
"learning_rate": 0.0005,
"loss": 0.9853,
"step": 9080
},
{
"epoch": 0.46974316572786934,
"grad_norm": 0.6338274478912354,
"learning_rate": 0.0005,
"loss": 0.9762,
"step": 9090
},
{
"epoch": 0.47025993488708595,
"grad_norm": 0.5694402456283569,
"learning_rate": 0.0005,
"loss": 0.9947,
"step": 9100
},
{
"epoch": 0.4707767040463025,
"grad_norm": 0.5308618545532227,
"learning_rate": 0.0005,
"loss": 1.0035,
"step": 9110
},
{
"epoch": 0.4712934732055191,
"grad_norm": 0.5705435872077942,
"learning_rate": 0.0005,
"loss": 0.979,
"step": 9120
},
{
"epoch": 0.4718102423647357,
"grad_norm": 0.5150364637374878,
"learning_rate": 0.0005,
"loss": 0.9907,
"step": 9130
},
{
"epoch": 0.47232701152395223,
"grad_norm": 0.6099853515625,
"learning_rate": 0.0005,
"loss": 0.9834,
"step": 9140
},
{
"epoch": 0.47284378068316885,
"grad_norm": 0.5578297972679138,
"learning_rate": 0.0005,
"loss": 0.9758,
"step": 9150
},
{
"epoch": 0.4733605498423854,
"grad_norm": 0.5842065811157227,
"learning_rate": 0.0005,
"loss": 0.9831,
"step": 9160
},
{
"epoch": 0.47387731900160196,
"grad_norm": 0.54753577709198,
"learning_rate": 0.0005,
"loss": 0.9767,
"step": 9170
},
{
"epoch": 0.47439408816081857,
"grad_norm": 0.5472375750541687,
"learning_rate": 0.0005,
"loss": 0.9844,
"step": 9180
},
{
"epoch": 0.47491085732003513,
"grad_norm": 0.6289487481117249,
"learning_rate": 0.0005,
"loss": 0.9806,
"step": 9190
},
{
"epoch": 0.47542762647925174,
"grad_norm": 0.5702399015426636,
"learning_rate": 0.0005,
"loss": 0.973,
"step": 9200
},
{
"epoch": 0.4759443956384683,
"grad_norm": 0.5393164753913879,
"learning_rate": 0.0005,
"loss": 0.9862,
"step": 9210
},
{
"epoch": 0.47646116479768486,
"grad_norm": 0.5307340621948242,
"learning_rate": 0.0005,
"loss": 0.9949,
"step": 9220
},
{
"epoch": 0.47697793395690147,
"grad_norm": 0.6061729788780212,
"learning_rate": 0.0005,
"loss": 0.9869,
"step": 9230
},
{
"epoch": 0.477494703116118,
"grad_norm": 0.5458270311355591,
"learning_rate": 0.0005,
"loss": 0.9782,
"step": 9240
},
{
"epoch": 0.4780114722753346,
"grad_norm": 0.5837684869766235,
"learning_rate": 0.0005,
"loss": 0.9794,
"step": 9250
},
{
"epoch": 0.4785282414345512,
"grad_norm": 0.557824432849884,
"learning_rate": 0.0005,
"loss": 0.9723,
"step": 9260
},
{
"epoch": 0.47904501059376775,
"grad_norm": 0.57038414478302,
"learning_rate": 0.0005,
"loss": 0.9782,
"step": 9270
},
{
"epoch": 0.47956177975298436,
"grad_norm": 0.5163660645484924,
"learning_rate": 0.0005,
"loss": 0.9615,
"step": 9280
},
{
"epoch": 0.4800785489122009,
"grad_norm": 0.5604984760284424,
"learning_rate": 0.0005,
"loss": 0.9806,
"step": 9290
},
{
"epoch": 0.4805953180714175,
"grad_norm": 0.5169503092765808,
"learning_rate": 0.0005,
"loss": 0.9594,
"step": 9300
},
{
"epoch": 0.4811120872306341,
"grad_norm": 0.547803521156311,
"learning_rate": 0.0005,
"loss": 0.9795,
"step": 9310
},
{
"epoch": 0.48162885638985065,
"grad_norm": 0.5462937951087952,
"learning_rate": 0.0005,
"loss": 0.9756,
"step": 9320
},
{
"epoch": 0.48214562554906726,
"grad_norm": 0.5670326352119446,
"learning_rate": 0.0005,
"loss": 0.9726,
"step": 9330
},
{
"epoch": 0.4826623947082838,
"grad_norm": 0.5633768439292908,
"learning_rate": 0.0005,
"loss": 0.958,
"step": 9340
},
{
"epoch": 0.48317916386750037,
"grad_norm": 0.5781881213188171,
"learning_rate": 0.0005,
"loss": 0.9531,
"step": 9350
},
{
"epoch": 0.483695933026717,
"grad_norm": 0.6162354350090027,
"learning_rate": 0.0005,
"loss": 0.9584,
"step": 9360
},
{
"epoch": 0.48421270218593354,
"grad_norm": 0.5659033060073853,
"learning_rate": 0.0005,
"loss": 0.9691,
"step": 9370
},
{
"epoch": 0.4847294713451501,
"grad_norm": 0.5409724116325378,
"learning_rate": 0.0005,
"loss": 0.9654,
"step": 9380
},
{
"epoch": 0.4852462405043667,
"grad_norm": 0.5185449719429016,
"learning_rate": 0.0005,
"loss": 0.9767,
"step": 9390
},
{
"epoch": 0.48576300966358327,
"grad_norm": 0.5317234992980957,
"learning_rate": 0.0005,
"loss": 0.9797,
"step": 9400
},
{
"epoch": 0.4862797788227999,
"grad_norm": 0.5362582802772522,
"learning_rate": 0.0005,
"loss": 0.9691,
"step": 9410
},
{
"epoch": 0.48679654798201644,
"grad_norm": 0.5296323895454407,
"learning_rate": 0.0005,
"loss": 0.9714,
"step": 9420
},
{
"epoch": 0.487313317141233,
"grad_norm": 0.5387376546859741,
"learning_rate": 0.0005,
"loss": 0.9857,
"step": 9430
},
{
"epoch": 0.4878300863004496,
"grad_norm": 0.5592471957206726,
"learning_rate": 0.0005,
"loss": 0.9687,
"step": 9440
},
{
"epoch": 0.48834685545966616,
"grad_norm": 0.5368979573249817,
"learning_rate": 0.0005,
"loss": 0.9624,
"step": 9450
},
{
"epoch": 0.4888636246188827,
"grad_norm": 0.559069037437439,
"learning_rate": 0.0005,
"loss": 0.9713,
"step": 9460
},
{
"epoch": 0.48938039377809933,
"grad_norm": 0.5417030453681946,
"learning_rate": 0.0005,
"loss": 0.9749,
"step": 9470
},
{
"epoch": 0.4898971629373159,
"grad_norm": 0.6302499771118164,
"learning_rate": 0.0005,
"loss": 0.976,
"step": 9480
},
{
"epoch": 0.4904139320965325,
"grad_norm": 0.5580116510391235,
"learning_rate": 0.0005,
"loss": 0.9696,
"step": 9490
},
{
"epoch": 0.49093070125574906,
"grad_norm": 0.5281049013137817,
"learning_rate": 0.0005,
"loss": 0.9626,
"step": 9500
},
{
"epoch": 0.4914474704149656,
"grad_norm": 0.6579439043998718,
"learning_rate": 0.0005,
"loss": 0.9915,
"step": 9510
},
{
"epoch": 0.4919642395741822,
"grad_norm": 0.6327407956123352,
"learning_rate": 0.0005,
"loss": 0.976,
"step": 9520
},
{
"epoch": 0.4924810087333988,
"grad_norm": 0.5917522311210632,
"learning_rate": 0.0005,
"loss": 0.9698,
"step": 9530
},
{
"epoch": 0.4929977778926154,
"grad_norm": 0.5556752681732178,
"learning_rate": 0.0005,
"loss": 0.974,
"step": 9540
},
{
"epoch": 0.49351454705183195,
"grad_norm": 0.6051674485206604,
"learning_rate": 0.0005,
"loss": 0.9673,
"step": 9550
},
{
"epoch": 0.4940313162110485,
"grad_norm": 0.6255143880844116,
"learning_rate": 0.0005,
"loss": 0.9741,
"step": 9560
},
{
"epoch": 0.4945480853702651,
"grad_norm": 0.5358819961547852,
"learning_rate": 0.0005,
"loss": 0.965,
"step": 9570
},
{
"epoch": 0.4950648545294817,
"grad_norm": 0.5503594279289246,
"learning_rate": 0.0005,
"loss": 0.9668,
"step": 9580
},
{
"epoch": 0.49558162368869824,
"grad_norm": 0.510237455368042,
"learning_rate": 0.0005,
"loss": 0.9685,
"step": 9590
},
{
"epoch": 0.49609839284791485,
"grad_norm": 0.5995839238166809,
"learning_rate": 0.0005,
"loss": 0.9709,
"step": 9600
},
{
"epoch": 0.4966151620071314,
"grad_norm": 0.5354804992675781,
"learning_rate": 0.0005,
"loss": 0.9618,
"step": 9610
},
{
"epoch": 0.497131931166348,
"grad_norm": 0.5301372408866882,
"learning_rate": 0.0005,
"loss": 0.9644,
"step": 9620
},
{
"epoch": 0.4976487003255646,
"grad_norm": 0.6010123491287231,
"learning_rate": 0.0005,
"loss": 0.9834,
"step": 9630
},
{
"epoch": 0.49816546948478113,
"grad_norm": 0.5131679177284241,
"learning_rate": 0.0005,
"loss": 0.9695,
"step": 9640
},
{
"epoch": 0.49868223864399774,
"grad_norm": 0.5364587903022766,
"learning_rate": 0.0005,
"loss": 0.9572,
"step": 9650
},
{
"epoch": 0.4991990078032143,
"grad_norm": 0.5561274290084839,
"learning_rate": 0.0005,
"loss": 0.9739,
"step": 9660
},
{
"epoch": 0.49971577696243086,
"grad_norm": 0.5267083048820496,
"learning_rate": 0.0005,
"loss": 0.9659,
"step": 9670
},
{
"epoch": 0.5002325461216475,
"grad_norm": 0.5306525230407715,
"learning_rate": 0.0005,
"loss": 0.9698,
"step": 9680
},
{
"epoch": 0.5007493152808641,
"grad_norm": 0.6048880219459534,
"learning_rate": 0.0005,
"loss": 0.9702,
"step": 9690
},
{
"epoch": 0.5012660844400806,
"grad_norm": 0.5528176426887512,
"learning_rate": 0.0005,
"loss": 0.981,
"step": 9700
},
{
"epoch": 0.5017828535992972,
"grad_norm": 0.5247277021408081,
"learning_rate": 0.0005,
"loss": 0.9587,
"step": 9710
},
{
"epoch": 0.5022996227585138,
"grad_norm": 0.5636876225471497,
"learning_rate": 0.0005,
"loss": 0.9627,
"step": 9720
},
{
"epoch": 0.5028163919177303,
"grad_norm": 0.5214900970458984,
"learning_rate": 0.0005,
"loss": 0.9644,
"step": 9730
},
{
"epoch": 0.5033331610769469,
"grad_norm": 0.5302378535270691,
"learning_rate": 0.0005,
"loss": 0.9612,
"step": 9740
},
{
"epoch": 0.5038499302361635,
"grad_norm": 0.5830851197242737,
"learning_rate": 0.0005,
"loss": 0.9563,
"step": 9750
},
{
"epoch": 0.50436669939538,
"grad_norm": 0.5303472876548767,
"learning_rate": 0.0005,
"loss": 0.9542,
"step": 9760
},
{
"epoch": 0.5048834685545966,
"grad_norm": 0.5632893443107605,
"learning_rate": 0.0005,
"loss": 0.9828,
"step": 9770
},
{
"epoch": 0.5054002377138133,
"grad_norm": 0.5968844890594482,
"learning_rate": 0.0005,
"loss": 0.9855,
"step": 9780
},
{
"epoch": 0.5059170068730299,
"grad_norm": 0.580721378326416,
"learning_rate": 0.0005,
"loss": 0.9903,
"step": 9790
},
{
"epoch": 0.5064337760322464,
"grad_norm": 0.5187913179397583,
"learning_rate": 0.0005,
"loss": 0.9512,
"step": 9800
},
{
"epoch": 0.506950545191463,
"grad_norm": 0.5946047902107239,
"learning_rate": 0.0005,
"loss": 0.9661,
"step": 9810
},
{
"epoch": 0.5074673143506796,
"grad_norm": 0.5428043603897095,
"learning_rate": 0.0005,
"loss": 0.9669,
"step": 9820
},
{
"epoch": 0.5079840835098961,
"grad_norm": 0.562601625919342,
"learning_rate": 0.0005,
"loss": 0.958,
"step": 9830
},
{
"epoch": 0.5085008526691127,
"grad_norm": 0.5812455415725708,
"learning_rate": 0.0005,
"loss": 0.969,
"step": 9840
},
{
"epoch": 0.5090176218283293,
"grad_norm": 0.6318747997283936,
"learning_rate": 0.0005,
"loss": 0.9512,
"step": 9850
},
{
"epoch": 0.5095343909875458,
"grad_norm": 0.6214849352836609,
"learning_rate": 0.0005,
"loss": 0.9727,
"step": 9860
},
{
"epoch": 0.5100511601467624,
"grad_norm": 0.5631205439567566,
"learning_rate": 0.0005,
"loss": 0.9564,
"step": 9870
},
{
"epoch": 0.510567929305979,
"grad_norm": 0.626625657081604,
"learning_rate": 0.0005,
"loss": 0.9597,
"step": 9880
},
{
"epoch": 0.5110846984651956,
"grad_norm": 0.4959418475627899,
"learning_rate": 0.0005,
"loss": 0.9591,
"step": 9890
},
{
"epoch": 0.5116014676244122,
"grad_norm": 0.5196536779403687,
"learning_rate": 0.0005,
"loss": 0.9771,
"step": 9900
},
{
"epoch": 0.5121182367836288,
"grad_norm": 0.6234534382820129,
"learning_rate": 0.0005,
"loss": 0.9609,
"step": 9910
},
{
"epoch": 0.5126350059428453,
"grad_norm": 0.5823763012886047,
"learning_rate": 0.0005,
"loss": 0.9757,
"step": 9920
},
{
"epoch": 0.5131517751020619,
"grad_norm": 0.5576559901237488,
"learning_rate": 0.0005,
"loss": 0.9502,
"step": 9930
},
{
"epoch": 0.5136685442612785,
"grad_norm": 0.5374221801757812,
"learning_rate": 0.0005,
"loss": 0.9513,
"step": 9940
},
{
"epoch": 0.5141853134204951,
"grad_norm": 0.5272248387336731,
"learning_rate": 0.0005,
"loss": 0.9586,
"step": 9950
},
{
"epoch": 0.5147020825797116,
"grad_norm": 0.5568712949752808,
"learning_rate": 0.0005,
"loss": 0.957,
"step": 9960
},
{
"epoch": 0.5152188517389282,
"grad_norm": 0.5274987816810608,
"learning_rate": 0.0005,
"loss": 0.9432,
"step": 9970
},
{
"epoch": 0.5157356208981448,
"grad_norm": 0.5364307165145874,
"learning_rate": 0.0005,
"loss": 0.9548,
"step": 9980
},
{
"epoch": 0.5162523900573613,
"grad_norm": 0.5436477065086365,
"learning_rate": 0.0005,
"loss": 0.9572,
"step": 9990
},
{
"epoch": 0.516769159216578,
"grad_norm": 0.5213954448699951,
"learning_rate": 0.0005,
"loss": 0.9589,
"step": 10000
},
{
"epoch": 0.5172859283757946,
"grad_norm": 0.5076503157615662,
"learning_rate": 0.0005,
"loss": 0.9498,
"step": 10010
},
{
"epoch": 0.5178026975350111,
"grad_norm": 0.5266632437705994,
"learning_rate": 0.0005,
"loss": 0.9641,
"step": 10020
},
{
"epoch": 0.5183194666942277,
"grad_norm": 0.5237132906913757,
"learning_rate": 0.0005,
"loss": 0.9717,
"step": 10030
},
{
"epoch": 0.5188362358534443,
"grad_norm": 0.5496323704719543,
"learning_rate": 0.0005,
"loss": 0.952,
"step": 10040
},
{
"epoch": 0.5193530050126608,
"grad_norm": 0.5751678347587585,
"learning_rate": 0.0005,
"loss": 0.9508,
"step": 10050
},
{
"epoch": 0.5198697741718774,
"grad_norm": 0.5333780646324158,
"learning_rate": 0.0005,
"loss": 0.9442,
"step": 10060
},
{
"epoch": 0.520386543331094,
"grad_norm": 0.5529361367225647,
"learning_rate": 0.0005,
"loss": 0.9662,
"step": 10070
},
{
"epoch": 0.5209033124903106,
"grad_norm": 0.5695346593856812,
"learning_rate": 0.0005,
"loss": 0.9648,
"step": 10080
},
{
"epoch": 0.5214200816495271,
"grad_norm": 0.528101921081543,
"learning_rate": 0.0005,
"loss": 0.9581,
"step": 10090
},
{
"epoch": 0.5219368508087437,
"grad_norm": 0.5323454141616821,
"learning_rate": 0.0005,
"loss": 0.9842,
"step": 10100
},
{
"epoch": 0.5224536199679604,
"grad_norm": 0.5791360139846802,
"learning_rate": 0.0005,
"loss": 0.9755,
"step": 10110
},
{
"epoch": 0.5229703891271769,
"grad_norm": 0.5297543406486511,
"learning_rate": 0.0005,
"loss": 0.9706,
"step": 10120
},
{
"epoch": 0.5234871582863935,
"grad_norm": 0.5344191789627075,
"learning_rate": 0.0005,
"loss": 0.9557,
"step": 10130
},
{
"epoch": 0.5240039274456101,
"grad_norm": 0.5307314395904541,
"learning_rate": 0.0005,
"loss": 0.9561,
"step": 10140
},
{
"epoch": 0.5245206966048266,
"grad_norm": 0.5625677108764648,
"learning_rate": 0.0005,
"loss": 0.9639,
"step": 10150
},
{
"epoch": 0.5250374657640432,
"grad_norm": 0.5287933945655823,
"learning_rate": 0.0005,
"loss": 0.9458,
"step": 10160
},
{
"epoch": 0.5255542349232598,
"grad_norm": 0.4987037777900696,
"learning_rate": 0.0005,
"loss": 0.9542,
"step": 10170
},
{
"epoch": 0.5260710040824763,
"grad_norm": 0.5192455053329468,
"learning_rate": 0.0005,
"loss": 0.9534,
"step": 10180
},
{
"epoch": 0.5265877732416929,
"grad_norm": 0.5038531422615051,
"learning_rate": 0.0005,
"loss": 0.9534,
"step": 10190
},
{
"epoch": 0.5271045424009095,
"grad_norm": 0.5356433391571045,
"learning_rate": 0.0005,
"loss": 0.9657,
"step": 10200
},
{
"epoch": 0.527621311560126,
"grad_norm": 0.5290383696556091,
"learning_rate": 0.0005,
"loss": 0.9405,
"step": 10210
},
{
"epoch": 0.5281380807193427,
"grad_norm": 0.5376208424568176,
"learning_rate": 0.0005,
"loss": 0.9581,
"step": 10220
},
{
"epoch": 0.5286548498785593,
"grad_norm": 0.5011909604072571,
"learning_rate": 0.0005,
"loss": 0.9383,
"step": 10230
},
{
"epoch": 0.5291716190377759,
"grad_norm": 0.503073513507843,
"learning_rate": 0.0005,
"loss": 0.9525,
"step": 10240
},
{
"epoch": 0.5296883881969924,
"grad_norm": 0.5255160927772522,
"learning_rate": 0.0005,
"loss": 0.9525,
"step": 10250
},
{
"epoch": 0.530205157356209,
"grad_norm": 0.5147885084152222,
"learning_rate": 0.0005,
"loss": 0.9465,
"step": 10260
},
{
"epoch": 0.5307219265154256,
"grad_norm": 0.5343205332756042,
"learning_rate": 0.0005,
"loss": 0.9441,
"step": 10270
},
{
"epoch": 0.5312386956746421,
"grad_norm": 0.5480389595031738,
"learning_rate": 0.0005,
"loss": 0.9551,
"step": 10280
},
{
"epoch": 0.5317554648338587,
"grad_norm": 0.5425328612327576,
"learning_rate": 0.0005,
"loss": 0.951,
"step": 10290
},
{
"epoch": 0.5322722339930753,
"grad_norm": 0.6197424530982971,
"learning_rate": 0.0005,
"loss": 0.9467,
"step": 10300
},
{
"epoch": 0.5327890031522918,
"grad_norm": 0.5289689898490906,
"learning_rate": 0.0005,
"loss": 0.9615,
"step": 10310
},
{
"epoch": 0.5333057723115084,
"grad_norm": 0.5715579986572266,
"learning_rate": 0.0005,
"loss": 0.9572,
"step": 10320
},
{
"epoch": 0.533822541470725,
"grad_norm": 0.5315567851066589,
"learning_rate": 0.0005,
"loss": 0.961,
"step": 10330
},
{
"epoch": 0.5343393106299416,
"grad_norm": 0.5441263318061829,
"learning_rate": 0.0005,
"loss": 0.9581,
"step": 10340
},
{
"epoch": 0.5348560797891582,
"grad_norm": 0.5785178542137146,
"learning_rate": 0.0005,
"loss": 0.9479,
"step": 10350
},
{
"epoch": 0.5353728489483748,
"grad_norm": 0.5260955691337585,
"learning_rate": 0.0005,
"loss": 0.9729,
"step": 10360
},
{
"epoch": 0.5358896181075914,
"grad_norm": 0.5125389099121094,
"learning_rate": 0.0005,
"loss": 0.9568,
"step": 10370
},
{
"epoch": 0.5364063872668079,
"grad_norm": 0.5203437209129333,
"learning_rate": 0.0005,
"loss": 0.9603,
"step": 10380
},
{
"epoch": 0.5369231564260245,
"grad_norm": 0.5585212707519531,
"learning_rate": 0.0005,
"loss": 0.9599,
"step": 10390
},
{
"epoch": 0.5374399255852411,
"grad_norm": 0.48404642939567566,
"learning_rate": 0.0005,
"loss": 0.9494,
"step": 10400
},
{
"epoch": 0.5379566947444576,
"grad_norm": 0.65147465467453,
"learning_rate": 0.0005,
"loss": 0.9469,
"step": 10410
},
{
"epoch": 0.5384734639036742,
"grad_norm": 0.5233981013298035,
"learning_rate": 0.0005,
"loss": 0.9564,
"step": 10420
},
{
"epoch": 0.5389902330628908,
"grad_norm": 0.5470656156539917,
"learning_rate": 0.0005,
"loss": 0.9377,
"step": 10430
},
{
"epoch": 0.5395070022221073,
"grad_norm": 0.522283673286438,
"learning_rate": 0.0005,
"loss": 0.9431,
"step": 10440
},
{
"epoch": 0.540023771381324,
"grad_norm": 0.5491459965705872,
"learning_rate": 0.0005,
"loss": 0.9565,
"step": 10450
},
{
"epoch": 0.5405405405405406,
"grad_norm": 0.5251693725585938,
"learning_rate": 0.0005,
"loss": 0.9485,
"step": 10460
},
{
"epoch": 0.5410573096997571,
"grad_norm": 0.5080156922340393,
"learning_rate": 0.0005,
"loss": 0.9577,
"step": 10470
},
{
"epoch": 0.5415740788589737,
"grad_norm": 0.5703207850456238,
"learning_rate": 0.0005,
"loss": 0.969,
"step": 10480
},
{
"epoch": 0.5420908480181903,
"grad_norm": 0.5768096446990967,
"learning_rate": 0.0005,
"loss": 0.9481,
"step": 10490
},
{
"epoch": 0.5426076171774068,
"grad_norm": 0.5119413733482361,
"learning_rate": 0.0005,
"loss": 0.9491,
"step": 10500
},
{
"epoch": 0.5431243863366234,
"grad_norm": 0.5329270958900452,
"learning_rate": 0.0005,
"loss": 0.9625,
"step": 10510
},
{
"epoch": 0.54364115549584,
"grad_norm": 0.528266966342926,
"learning_rate": 0.0005,
"loss": 0.9477,
"step": 10520
},
{
"epoch": 0.5441579246550566,
"grad_norm": 0.5584282279014587,
"learning_rate": 0.0005,
"loss": 0.9555,
"step": 10530
},
{
"epoch": 0.5446746938142731,
"grad_norm": 0.5280376672744751,
"learning_rate": 0.0005,
"loss": 0.9404,
"step": 10540
},
{
"epoch": 0.5451914629734897,
"grad_norm": 0.512711763381958,
"learning_rate": 0.0005,
"loss": 0.9551,
"step": 10550
},
{
"epoch": 0.5457082321327064,
"grad_norm": 0.5412839651107788,
"learning_rate": 0.0005,
"loss": 0.946,
"step": 10560
},
{
"epoch": 0.5462250012919229,
"grad_norm": 0.5105991363525391,
"learning_rate": 0.0005,
"loss": 0.9507,
"step": 10570
},
{
"epoch": 0.5467417704511395,
"grad_norm": 0.5690359473228455,
"learning_rate": 0.0005,
"loss": 0.9532,
"step": 10580
},
{
"epoch": 0.5472585396103561,
"grad_norm": 0.5333488583564758,
"learning_rate": 0.0005,
"loss": 0.9634,
"step": 10590
},
{
"epoch": 0.5477753087695726,
"grad_norm": 0.5984283089637756,
"learning_rate": 0.0005,
"loss": 0.9624,
"step": 10600
},
{
"epoch": 0.5482920779287892,
"grad_norm": 0.5076044201850891,
"learning_rate": 0.0005,
"loss": 0.9426,
"step": 10610
},
{
"epoch": 0.5488088470880058,
"grad_norm": 0.5287521481513977,
"learning_rate": 0.0005,
"loss": 0.9571,
"step": 10620
},
{
"epoch": 0.5493256162472223,
"grad_norm": 0.5479470491409302,
"learning_rate": 0.0005,
"loss": 0.9424,
"step": 10630
},
{
"epoch": 0.5498423854064389,
"grad_norm": 0.5120390057563782,
"learning_rate": 0.0005,
"loss": 0.9471,
"step": 10640
},
{
"epoch": 0.5503591545656555,
"grad_norm": 0.5130133032798767,
"learning_rate": 0.0005,
"loss": 0.9605,
"step": 10650
},
{
"epoch": 0.5508759237248722,
"grad_norm": 0.5507628917694092,
"learning_rate": 0.0005,
"loss": 0.9545,
"step": 10660
},
{
"epoch": 0.5513926928840887,
"grad_norm": 0.4929947555065155,
"learning_rate": 0.0005,
"loss": 0.9415,
"step": 10670
},
{
"epoch": 0.5519094620433053,
"grad_norm": 0.5119226574897766,
"learning_rate": 0.0005,
"loss": 0.9564,
"step": 10680
},
{
"epoch": 0.5524262312025219,
"grad_norm": 0.5126231908798218,
"learning_rate": 0.0005,
"loss": 0.9467,
"step": 10690
},
{
"epoch": 0.5529430003617384,
"grad_norm": 0.5123251676559448,
"learning_rate": 0.0005,
"loss": 0.9412,
"step": 10700
},
{
"epoch": 0.553459769520955,
"grad_norm": 0.5106756687164307,
"learning_rate": 0.0005,
"loss": 0.9583,
"step": 10710
},
{
"epoch": 0.5539765386801716,
"grad_norm": 0.520325243473053,
"learning_rate": 0.0005,
"loss": 0.9593,
"step": 10720
},
{
"epoch": 0.5544933078393881,
"grad_norm": 0.6005384922027588,
"learning_rate": 0.0005,
"loss": 0.9617,
"step": 10730
},
{
"epoch": 0.5550100769986047,
"grad_norm": 0.49362891912460327,
"learning_rate": 0.0005,
"loss": 0.9476,
"step": 10740
},
{
"epoch": 0.5555268461578213,
"grad_norm": 0.5586000084877014,
"learning_rate": 0.0005,
"loss": 0.9594,
"step": 10750
},
{
"epoch": 0.5560436153170378,
"grad_norm": 0.5586140155792236,
"learning_rate": 0.0005,
"loss": 0.9343,
"step": 10760
},
{
"epoch": 0.5565603844762544,
"grad_norm": 0.5251288414001465,
"learning_rate": 0.0005,
"loss": 0.945,
"step": 10770
},
{
"epoch": 0.5570771536354711,
"grad_norm": 0.5328302383422852,
"learning_rate": 0.0005,
"loss": 0.9479,
"step": 10780
},
{
"epoch": 0.5575939227946876,
"grad_norm": 0.49472010135650635,
"learning_rate": 0.0005,
"loss": 0.9668,
"step": 10790
},
{
"epoch": 0.5581106919539042,
"grad_norm": 0.5159969925880432,
"learning_rate": 0.0005,
"loss": 0.9559,
"step": 10800
},
{
"epoch": 0.5586274611131208,
"grad_norm": 0.5159046649932861,
"learning_rate": 0.0005,
"loss": 0.9397,
"step": 10810
},
{
"epoch": 0.5591442302723374,
"grad_norm": 0.5191036462783813,
"learning_rate": 0.0005,
"loss": 0.9466,
"step": 10820
},
{
"epoch": 0.5596609994315539,
"grad_norm": 0.5178474187850952,
"learning_rate": 0.0005,
"loss": 0.9439,
"step": 10830
},
{
"epoch": 0.5601777685907705,
"grad_norm": 0.5447880625724792,
"learning_rate": 0.0005,
"loss": 0.9576,
"step": 10840
},
{
"epoch": 0.5606945377499871,
"grad_norm": 0.5056577920913696,
"learning_rate": 0.0005,
"loss": 0.9514,
"step": 10850
},
{
"epoch": 0.5612113069092036,
"grad_norm": 0.5639669299125671,
"learning_rate": 0.0005,
"loss": 0.9482,
"step": 10860
},
{
"epoch": 0.5617280760684202,
"grad_norm": 0.570584774017334,
"learning_rate": 0.0005,
"loss": 0.9602,
"step": 10870
},
{
"epoch": 0.5622448452276368,
"grad_norm": 0.5161934494972229,
"learning_rate": 0.0005,
"loss": 0.9366,
"step": 10880
},
{
"epoch": 0.5627616143868533,
"grad_norm": 0.5521616339683533,
"learning_rate": 0.0005,
"loss": 0.9535,
"step": 10890
},
{
"epoch": 0.56327838354607,
"grad_norm": 0.5411272644996643,
"learning_rate": 0.0005,
"loss": 0.9324,
"step": 10900
},
{
"epoch": 0.5637951527052866,
"grad_norm": 0.5098778605461121,
"learning_rate": 0.0005,
"loss": 0.9408,
"step": 10910
},
{
"epoch": 0.5643119218645031,
"grad_norm": 0.4730329215526581,
"learning_rate": 0.0005,
"loss": 0.946,
"step": 10920
},
{
"epoch": 0.5648286910237197,
"grad_norm": 0.5085341334342957,
"learning_rate": 0.0005,
"loss": 0.9469,
"step": 10930
},
{
"epoch": 0.5653454601829363,
"grad_norm": 0.5201531052589417,
"learning_rate": 0.0005,
"loss": 0.9583,
"step": 10940
},
{
"epoch": 0.5658622293421529,
"grad_norm": 0.4958653748035431,
"learning_rate": 0.0005,
"loss": 0.9542,
"step": 10950
},
{
"epoch": 0.5663789985013694,
"grad_norm": 0.5279732942581177,
"learning_rate": 0.0005,
"loss": 0.9503,
"step": 10960
},
{
"epoch": 0.566895767660586,
"grad_norm": 0.5014291405677795,
"learning_rate": 0.0005,
"loss": 0.9562,
"step": 10970
},
{
"epoch": 0.5674125368198026,
"grad_norm": 0.5004532337188721,
"learning_rate": 0.0005,
"loss": 0.937,
"step": 10980
},
{
"epoch": 0.5679293059790191,
"grad_norm": 0.5091339349746704,
"learning_rate": 0.0005,
"loss": 0.9442,
"step": 10990
},
{
"epoch": 0.5684460751382358,
"grad_norm": 0.5625014901161194,
"learning_rate": 0.0005,
"loss": 0.9426,
"step": 11000
},
{
"epoch": 0.5689628442974524,
"grad_norm": 0.5026536583900452,
"learning_rate": 0.0005,
"loss": 0.952,
"step": 11010
},
{
"epoch": 0.5694796134566689,
"grad_norm": 0.4980801045894623,
"learning_rate": 0.0005,
"loss": 0.9347,
"step": 11020
},
{
"epoch": 0.5699963826158855,
"grad_norm": 0.4974989593029022,
"learning_rate": 0.0005,
"loss": 0.9442,
"step": 11030
},
{
"epoch": 0.5705131517751021,
"grad_norm": 0.5242035388946533,
"learning_rate": 0.0005,
"loss": 0.9464,
"step": 11040
},
{
"epoch": 0.5710299209343186,
"grad_norm": 0.5066283941268921,
"learning_rate": 0.0005,
"loss": 0.9276,
"step": 11050
},
{
"epoch": 0.5715466900935352,
"grad_norm": 0.508834958076477,
"learning_rate": 0.0005,
"loss": 0.9402,
"step": 11060
},
{
"epoch": 0.5720634592527518,
"grad_norm": 0.5046612024307251,
"learning_rate": 0.0005,
"loss": 0.9487,
"step": 11070
},
{
"epoch": 0.5725802284119684,
"grad_norm": 0.5268915891647339,
"learning_rate": 0.0005,
"loss": 0.9415,
"step": 11080
},
{
"epoch": 0.5730969975711849,
"grad_norm": 0.5040035247802734,
"learning_rate": 0.0005,
"loss": 0.9326,
"step": 11090
},
{
"epoch": 0.5736137667304015,
"grad_norm": 0.500636100769043,
"learning_rate": 0.0005,
"loss": 0.9422,
"step": 11100
},
{
"epoch": 0.5741305358896182,
"grad_norm": 0.5215865969657898,
"learning_rate": 0.0005,
"loss": 0.9414,
"step": 11110
},
{
"epoch": 0.5746473050488347,
"grad_norm": 0.5058110356330872,
"learning_rate": 0.0005,
"loss": 0.9522,
"step": 11120
},
{
"epoch": 0.5751640742080513,
"grad_norm": 0.5117678046226501,
"learning_rate": 0.0005,
"loss": 0.9518,
"step": 11130
},
{
"epoch": 0.5756808433672679,
"grad_norm": 0.5039757490158081,
"learning_rate": 0.0005,
"loss": 0.9418,
"step": 11140
},
{
"epoch": 0.5761976125264844,
"grad_norm": 0.5518759489059448,
"learning_rate": 0.0005,
"loss": 0.9407,
"step": 11150
},
{
"epoch": 0.576714381685701,
"grad_norm": 0.5106251239776611,
"learning_rate": 0.0005,
"loss": 0.9367,
"step": 11160
},
{
"epoch": 0.5772311508449176,
"grad_norm": 0.5682827830314636,
"learning_rate": 0.0005,
"loss": 0.945,
"step": 11170
},
{
"epoch": 0.5777479200041341,
"grad_norm": 0.521513044834137,
"learning_rate": 0.0005,
"loss": 0.9453,
"step": 11180
},
{
"epoch": 0.5782646891633507,
"grad_norm": 0.5230028629302979,
"learning_rate": 0.0005,
"loss": 0.9544,
"step": 11190
},
{
"epoch": 0.5787814583225673,
"grad_norm": 0.5285042524337769,
"learning_rate": 0.0005,
"loss": 0.9459,
"step": 11200
},
{
"epoch": 0.5792982274817838,
"grad_norm": 0.5230273604393005,
"learning_rate": 0.0005,
"loss": 0.9354,
"step": 11210
},
{
"epoch": 0.5798149966410004,
"grad_norm": 0.5298386216163635,
"learning_rate": 0.0005,
"loss": 0.9578,
"step": 11220
},
{
"epoch": 0.5803317658002171,
"grad_norm": 0.5199642181396484,
"learning_rate": 0.0005,
"loss": 0.9559,
"step": 11230
},
{
"epoch": 0.5808485349594337,
"grad_norm": 0.5283148884773254,
"learning_rate": 0.0005,
"loss": 0.9315,
"step": 11240
},
{
"epoch": 0.5813653041186502,
"grad_norm": 0.5081456303596497,
"learning_rate": 0.0005,
"loss": 0.936,
"step": 11250
},
{
"epoch": 0.5818820732778668,
"grad_norm": 0.4844646751880646,
"learning_rate": 0.0005,
"loss": 0.9478,
"step": 11260
},
{
"epoch": 0.5823988424370834,
"grad_norm": 0.5176190733909607,
"learning_rate": 0.0005,
"loss": 0.918,
"step": 11270
},
{
"epoch": 0.5829156115962999,
"grad_norm": 0.5267295241355896,
"learning_rate": 0.0005,
"loss": 0.9462,
"step": 11280
},
{
"epoch": 0.5834323807555165,
"grad_norm": 0.5780160427093506,
"learning_rate": 0.0005,
"loss": 0.9302,
"step": 11290
},
{
"epoch": 0.5839491499147331,
"grad_norm": 0.47616294026374817,
"learning_rate": 0.0005,
"loss": 0.945,
"step": 11300
},
{
"epoch": 0.5844659190739496,
"grad_norm": 0.556125283241272,
"learning_rate": 0.0005,
"loss": 0.9306,
"step": 11310
},
{
"epoch": 0.5849826882331662,
"grad_norm": 0.5071564316749573,
"learning_rate": 0.0005,
"loss": 0.9611,
"step": 11320
},
{
"epoch": 0.5854994573923828,
"grad_norm": 0.5186158418655396,
"learning_rate": 0.0005,
"loss": 0.9311,
"step": 11330
},
{
"epoch": 0.5860162265515994,
"grad_norm": 0.48720046877861023,
"learning_rate": 0.0005,
"loss": 0.9609,
"step": 11340
},
{
"epoch": 0.586532995710816,
"grad_norm": 0.49717170000076294,
"learning_rate": 0.0005,
"loss": 0.957,
"step": 11350
},
{
"epoch": 0.5870497648700326,
"grad_norm": 0.534752368927002,
"learning_rate": 0.0005,
"loss": 0.94,
"step": 11360
},
{
"epoch": 0.5875665340292492,
"grad_norm": 0.523997962474823,
"learning_rate": 0.0005,
"loss": 0.9373,
"step": 11370
},
{
"epoch": 0.5880833031884657,
"grad_norm": 0.49437177181243896,
"learning_rate": 0.0005,
"loss": 0.9327,
"step": 11380
},
{
"epoch": 0.5886000723476823,
"grad_norm": 0.4986345171928406,
"learning_rate": 0.0005,
"loss": 0.9353,
"step": 11390
},
{
"epoch": 0.5891168415068989,
"grad_norm": 0.49254122376441956,
"learning_rate": 0.0005,
"loss": 0.9451,
"step": 11400
},
{
"epoch": 0.5896336106661154,
"grad_norm": 0.5066004991531372,
"learning_rate": 0.0005,
"loss": 0.9307,
"step": 11410
},
{
"epoch": 0.590150379825332,
"grad_norm": 0.4954734444618225,
"learning_rate": 0.0005,
"loss": 0.9345,
"step": 11420
},
{
"epoch": 0.5906671489845486,
"grad_norm": 0.4814952313899994,
"learning_rate": 0.0005,
"loss": 0.9383,
"step": 11430
},
{
"epoch": 0.5911839181437651,
"grad_norm": 0.48946642875671387,
"learning_rate": 0.0005,
"loss": 0.9314,
"step": 11440
},
{
"epoch": 0.5917006873029818,
"grad_norm": 0.5009201765060425,
"learning_rate": 0.0005,
"loss": 0.9532,
"step": 11450
},
{
"epoch": 0.5922174564621984,
"grad_norm": 0.5228848457336426,
"learning_rate": 0.0005,
"loss": 0.9346,
"step": 11460
},
{
"epoch": 0.5927342256214149,
"grad_norm": 0.5121431350708008,
"learning_rate": 0.0005,
"loss": 0.9367,
"step": 11470
},
{
"epoch": 0.5932509947806315,
"grad_norm": 0.49431100487709045,
"learning_rate": 0.0005,
"loss": 0.9261,
"step": 11480
},
{
"epoch": 0.5937677639398481,
"grad_norm": 0.516291081905365,
"learning_rate": 0.0005,
"loss": 0.9452,
"step": 11490
},
{
"epoch": 0.5942845330990646,
"grad_norm": 0.5128830671310425,
"learning_rate": 0.0005,
"loss": 0.9446,
"step": 11500
},
{
"epoch": 0.5948013022582812,
"grad_norm": 0.5089874267578125,
"learning_rate": 0.0005,
"loss": 0.9321,
"step": 11510
},
{
"epoch": 0.5953180714174978,
"grad_norm": 0.5457943677902222,
"learning_rate": 0.0005,
"loss": 0.9349,
"step": 11520
},
{
"epoch": 0.5958348405767144,
"grad_norm": 0.5342771410942078,
"learning_rate": 0.0005,
"loss": 0.9326,
"step": 11530
},
{
"epoch": 0.5963516097359309,
"grad_norm": 0.511667788028717,
"learning_rate": 0.0005,
"loss": 0.933,
"step": 11540
},
{
"epoch": 0.5968683788951475,
"grad_norm": 0.5304045677185059,
"learning_rate": 0.0005,
"loss": 0.9278,
"step": 11550
},
{
"epoch": 0.5973851480543642,
"grad_norm": 0.5285548567771912,
"learning_rate": 0.0005,
"loss": 0.9451,
"step": 11560
},
{
"epoch": 0.5979019172135807,
"grad_norm": 0.5200523734092712,
"learning_rate": 0.0005,
"loss": 0.9256,
"step": 11570
},
{
"epoch": 0.5984186863727973,
"grad_norm": 0.49133771657943726,
"learning_rate": 0.0005,
"loss": 0.9107,
"step": 11580
},
{
"epoch": 0.5989354555320139,
"grad_norm": 0.5477631092071533,
"learning_rate": 0.0005,
"loss": 0.9273,
"step": 11590
},
{
"epoch": 0.5994522246912304,
"grad_norm": 0.5735862255096436,
"learning_rate": 0.0005,
"loss": 0.9358,
"step": 11600
},
{
"epoch": 0.599968993850447,
"grad_norm": 0.48721542954444885,
"learning_rate": 0.0005,
"loss": 0.9273,
"step": 11610
},
{
"epoch": 0.6004857630096636,
"grad_norm": 0.5106229186058044,
"learning_rate": 0.0005,
"loss": 0.9283,
"step": 11620
},
{
"epoch": 0.6010025321688801,
"grad_norm": 0.4914691746234894,
"learning_rate": 0.0005,
"loss": 0.9303,
"step": 11630
},
{
"epoch": 0.6015193013280967,
"grad_norm": 0.5924090147018433,
"learning_rate": 0.0005,
"loss": 0.9199,
"step": 11640
},
{
"epoch": 0.6020360704873133,
"grad_norm": 0.4983723759651184,
"learning_rate": 0.0005,
"loss": 0.9384,
"step": 11650
},
{
"epoch": 0.60255283964653,
"grad_norm": 0.52519690990448,
"learning_rate": 0.0005,
"loss": 0.934,
"step": 11660
},
{
"epoch": 0.6030696088057464,
"grad_norm": 0.5365654826164246,
"learning_rate": 0.0005,
"loss": 0.9342,
"step": 11670
},
{
"epoch": 0.6035863779649631,
"grad_norm": 0.4914066195487976,
"learning_rate": 0.0005,
"loss": 0.9453,
"step": 11680
},
{
"epoch": 0.6041031471241797,
"grad_norm": 0.4888913929462433,
"learning_rate": 0.0005,
"loss": 0.9322,
"step": 11690
},
{
"epoch": 0.6046199162833962,
"grad_norm": 0.4911440908908844,
"learning_rate": 0.0005,
"loss": 0.9327,
"step": 11700
},
{
"epoch": 0.6051366854426128,
"grad_norm": 0.5005333423614502,
"learning_rate": 0.0005,
"loss": 0.9467,
"step": 11710
},
{
"epoch": 0.6056534546018294,
"grad_norm": 0.5367693901062012,
"learning_rate": 0.0005,
"loss": 0.9384,
"step": 11720
},
{
"epoch": 0.6061702237610459,
"grad_norm": 0.48554107546806335,
"learning_rate": 0.0005,
"loss": 0.9446,
"step": 11730
},
{
"epoch": 0.6066869929202625,
"grad_norm": 0.514530599117279,
"learning_rate": 0.0005,
"loss": 0.914,
"step": 11740
},
{
"epoch": 0.6072037620794791,
"grad_norm": 0.5004679560661316,
"learning_rate": 0.0005,
"loss": 0.9342,
"step": 11750
},
{
"epoch": 0.6077205312386956,
"grad_norm": 0.516576886177063,
"learning_rate": 0.0005,
"loss": 0.9325,
"step": 11760
},
{
"epoch": 0.6082373003979122,
"grad_norm": 0.5298195481300354,
"learning_rate": 0.0005,
"loss": 0.9324,
"step": 11770
},
{
"epoch": 0.6087540695571289,
"grad_norm": 0.4899151921272278,
"learning_rate": 0.0005,
"loss": 0.9161,
"step": 11780
},
{
"epoch": 0.6092708387163454,
"grad_norm": 0.5261816382408142,
"learning_rate": 0.0005,
"loss": 0.9393,
"step": 11790
},
{
"epoch": 0.609787607875562,
"grad_norm": 0.5143525004386902,
"learning_rate": 0.0005,
"loss": 0.9393,
"step": 11800
},
{
"epoch": 0.6103043770347786,
"grad_norm": 0.521551251411438,
"learning_rate": 0.0005,
"loss": 0.9291,
"step": 11810
},
{
"epoch": 0.6108211461939952,
"grad_norm": 0.4708675444126129,
"learning_rate": 0.0005,
"loss": 0.9462,
"step": 11820
},
{
"epoch": 0.6113379153532117,
"grad_norm": 0.47985512018203735,
"learning_rate": 0.0005,
"loss": 0.9355,
"step": 11830
},
{
"epoch": 0.6118546845124283,
"grad_norm": 0.5093055367469788,
"learning_rate": 0.0005,
"loss": 0.9301,
"step": 11840
},
{
"epoch": 0.6123714536716449,
"grad_norm": 0.5011575222015381,
"learning_rate": 0.0005,
"loss": 0.9382,
"step": 11850
},
{
"epoch": 0.6128882228308614,
"grad_norm": 0.5071706771850586,
"learning_rate": 0.0005,
"loss": 0.9425,
"step": 11860
},
{
"epoch": 0.613404991990078,
"grad_norm": 0.49520188570022583,
"learning_rate": 0.0005,
"loss": 0.9402,
"step": 11870
},
{
"epoch": 0.6139217611492946,
"grad_norm": 0.46812620759010315,
"learning_rate": 0.0005,
"loss": 0.9325,
"step": 11880
},
{
"epoch": 0.6144385303085111,
"grad_norm": 0.524341344833374,
"learning_rate": 0.0005,
"loss": 0.9267,
"step": 11890
},
{
"epoch": 0.6149552994677278,
"grad_norm": 0.48518240451812744,
"learning_rate": 0.0005,
"loss": 0.938,
"step": 11900
},
{
"epoch": 0.6154720686269444,
"grad_norm": 0.5080456137657166,
"learning_rate": 0.0005,
"loss": 0.9341,
"step": 11910
},
{
"epoch": 0.6159888377861609,
"grad_norm": 0.5626226663589478,
"learning_rate": 0.0005,
"loss": 0.9258,
"step": 11920
},
{
"epoch": 0.6165056069453775,
"grad_norm": 0.47337082028388977,
"learning_rate": 0.0005,
"loss": 0.9421,
"step": 11930
},
{
"epoch": 0.6170223761045941,
"grad_norm": 0.4747110903263092,
"learning_rate": 0.0005,
"loss": 0.9339,
"step": 11940
},
{
"epoch": 0.6175391452638107,
"grad_norm": 0.5242559909820557,
"learning_rate": 0.0005,
"loss": 0.942,
"step": 11950
},
{
"epoch": 0.6180559144230272,
"grad_norm": 0.5247402191162109,
"learning_rate": 0.0005,
"loss": 0.9269,
"step": 11960
},
{
"epoch": 0.6185726835822438,
"grad_norm": 0.5551696419715881,
"learning_rate": 0.0005,
"loss": 0.9268,
"step": 11970
},
{
"epoch": 0.6190894527414604,
"grad_norm": 0.5222793817520142,
"learning_rate": 0.0005,
"loss": 0.9331,
"step": 11980
},
{
"epoch": 0.6196062219006769,
"grad_norm": 0.49412423372268677,
"learning_rate": 0.0005,
"loss": 0.9292,
"step": 11990
},
{
"epoch": 0.6201229910598935,
"grad_norm": 0.49935638904571533,
"learning_rate": 0.0005,
"loss": 0.9168,
"step": 12000
},
{
"epoch": 0.6206397602191102,
"grad_norm": 0.5514285564422607,
"learning_rate": 0.0005,
"loss": 0.9289,
"step": 12010
},
{
"epoch": 0.6211565293783267,
"grad_norm": 0.5182361602783203,
"learning_rate": 0.0005,
"loss": 0.9359,
"step": 12020
},
{
"epoch": 0.6216732985375433,
"grad_norm": 0.5162422060966492,
"learning_rate": 0.0005,
"loss": 0.9257,
"step": 12030
},
{
"epoch": 0.6221900676967599,
"grad_norm": 0.4926648437976837,
"learning_rate": 0.0005,
"loss": 0.935,
"step": 12040
},
{
"epoch": 0.6227068368559764,
"grad_norm": 0.5213857293128967,
"learning_rate": 0.0005,
"loss": 0.9353,
"step": 12050
},
{
"epoch": 0.623223606015193,
"grad_norm": 0.5043472051620483,
"learning_rate": 0.0005,
"loss": 0.9499,
"step": 12060
},
{
"epoch": 0.6237403751744096,
"grad_norm": 0.48353925347328186,
"learning_rate": 0.0005,
"loss": 0.9319,
"step": 12070
},
{
"epoch": 0.6242571443336262,
"grad_norm": 0.5488812923431396,
"learning_rate": 0.0005,
"loss": 0.9262,
"step": 12080
},
{
"epoch": 0.6247739134928427,
"grad_norm": 0.5349071621894836,
"learning_rate": 0.0005,
"loss": 0.9317,
"step": 12090
},
{
"epoch": 0.6252906826520593,
"grad_norm": 0.5111981630325317,
"learning_rate": 0.0005,
"loss": 0.9128,
"step": 12100
},
{
"epoch": 0.625807451811276,
"grad_norm": 0.525330126285553,
"learning_rate": 0.0005,
"loss": 0.9212,
"step": 12110
},
{
"epoch": 0.6263242209704925,
"grad_norm": 0.5191537141799927,
"learning_rate": 0.0005,
"loss": 0.9313,
"step": 12120
},
{
"epoch": 0.6268409901297091,
"grad_norm": 0.49418073892593384,
"learning_rate": 0.0005,
"loss": 0.9408,
"step": 12130
},
{
"epoch": 0.6273577592889257,
"grad_norm": 0.49373695254325867,
"learning_rate": 0.0005,
"loss": 0.9226,
"step": 12140
},
{
"epoch": 0.6278745284481422,
"grad_norm": 0.488068550825119,
"learning_rate": 0.0005,
"loss": 0.9407,
"step": 12150
},
{
"epoch": 0.6283912976073588,
"grad_norm": 0.5186513662338257,
"learning_rate": 0.0005,
"loss": 0.9351,
"step": 12160
},
{
"epoch": 0.6289080667665754,
"grad_norm": 0.532514750957489,
"learning_rate": 0.0005,
"loss": 0.9323,
"step": 12170
},
{
"epoch": 0.6294248359257919,
"grad_norm": 0.4832149147987366,
"learning_rate": 0.0005,
"loss": 0.9303,
"step": 12180
},
{
"epoch": 0.6299416050850085,
"grad_norm": 0.5020478963851929,
"learning_rate": 0.0005,
"loss": 0.9278,
"step": 12190
},
{
"epoch": 0.6304583742442251,
"grad_norm": 0.45874807238578796,
"learning_rate": 0.0005,
"loss": 0.9205,
"step": 12200
},
{
"epoch": 0.6309751434034416,
"grad_norm": 0.5273077487945557,
"learning_rate": 0.0005,
"loss": 0.9133,
"step": 12210
},
{
"epoch": 0.6314919125626582,
"grad_norm": 0.49270930886268616,
"learning_rate": 0.0005,
"loss": 0.9228,
"step": 12220
},
{
"epoch": 0.6320086817218749,
"grad_norm": 0.47435376048088074,
"learning_rate": 0.0005,
"loss": 0.937,
"step": 12230
},
{
"epoch": 0.6325254508810915,
"grad_norm": 0.49013498425483704,
"learning_rate": 0.0005,
"loss": 0.925,
"step": 12240
},
{
"epoch": 0.633042220040308,
"grad_norm": 0.481581449508667,
"learning_rate": 0.0005,
"loss": 0.9209,
"step": 12250
},
{
"epoch": 0.6335589891995246,
"grad_norm": 0.5189198851585388,
"learning_rate": 0.0005,
"loss": 0.9206,
"step": 12260
},
{
"epoch": 0.6340757583587412,
"grad_norm": 0.47871729731559753,
"learning_rate": 0.0005,
"loss": 0.9279,
"step": 12270
},
{
"epoch": 0.6345925275179577,
"grad_norm": 0.4953111410140991,
"learning_rate": 0.0005,
"loss": 0.93,
"step": 12280
},
{
"epoch": 0.6351092966771743,
"grad_norm": 0.5199342370033264,
"learning_rate": 0.0005,
"loss": 0.9246,
"step": 12290
},
{
"epoch": 0.6356260658363909,
"grad_norm": 0.48852893710136414,
"learning_rate": 0.0005,
"loss": 0.9222,
"step": 12300
},
{
"epoch": 0.6361428349956074,
"grad_norm": 0.5054774284362793,
"learning_rate": 0.0005,
"loss": 0.9346,
"step": 12310
},
{
"epoch": 0.636659604154824,
"grad_norm": 0.5030813813209534,
"learning_rate": 0.0005,
"loss": 0.9238,
"step": 12320
},
{
"epoch": 0.6371763733140406,
"grad_norm": 0.47299617528915405,
"learning_rate": 0.0005,
"loss": 0.9317,
"step": 12330
},
{
"epoch": 0.6376931424732571,
"grad_norm": 0.5473576784133911,
"learning_rate": 0.0005,
"loss": 0.9206,
"step": 12340
},
{
"epoch": 0.6382099116324738,
"grad_norm": 0.4999616742134094,
"learning_rate": 0.0005,
"loss": 0.9449,
"step": 12350
},
{
"epoch": 0.6387266807916904,
"grad_norm": 0.5508975982666016,
"learning_rate": 0.0005,
"loss": 0.921,
"step": 12360
},
{
"epoch": 0.639243449950907,
"grad_norm": 0.5574737191200256,
"learning_rate": 0.0005,
"loss": 0.935,
"step": 12370
},
{
"epoch": 0.6397602191101235,
"grad_norm": 0.5615907907485962,
"learning_rate": 0.0005,
"loss": 0.9263,
"step": 12380
},
{
"epoch": 0.6402769882693401,
"grad_norm": 0.5180084109306335,
"learning_rate": 0.0005,
"loss": 0.9235,
"step": 12390
},
{
"epoch": 0.6407937574285567,
"grad_norm": 0.46675363183021545,
"learning_rate": 0.0005,
"loss": 0.9237,
"step": 12400
},
{
"epoch": 0.6413105265877732,
"grad_norm": 0.4773077070713043,
"learning_rate": 0.0005,
"loss": 0.9098,
"step": 12410
},
{
"epoch": 0.6418272957469898,
"grad_norm": 0.5147991180419922,
"learning_rate": 0.0005,
"loss": 0.9215,
"step": 12420
},
{
"epoch": 0.6423440649062064,
"grad_norm": 0.47254249453544617,
"learning_rate": 0.0005,
"loss": 0.925,
"step": 12430
},
{
"epoch": 0.6428608340654229,
"grad_norm": 0.48444342613220215,
"learning_rate": 0.0005,
"loss": 0.9138,
"step": 12440
},
{
"epoch": 0.6433776032246395,
"grad_norm": 0.4626687169075012,
"learning_rate": 0.0005,
"loss": 0.9239,
"step": 12450
},
{
"epoch": 0.6438943723838562,
"grad_norm": 0.48663684725761414,
"learning_rate": 0.0005,
"loss": 0.9365,
"step": 12460
},
{
"epoch": 0.6444111415430727,
"grad_norm": 0.5721457600593567,
"learning_rate": 0.0005,
"loss": 0.9228,
"step": 12470
},
{
"epoch": 0.6449279107022893,
"grad_norm": 0.4997864067554474,
"learning_rate": 0.0005,
"loss": 0.9203,
"step": 12480
},
{
"epoch": 0.6454446798615059,
"grad_norm": 0.4961699843406677,
"learning_rate": 0.0005,
"loss": 0.909,
"step": 12490
},
{
"epoch": 0.6459614490207224,
"grad_norm": 0.49018388986587524,
"learning_rate": 0.0005,
"loss": 0.9242,
"step": 12500
},
{
"epoch": 0.646478218179939,
"grad_norm": 0.5205206871032715,
"learning_rate": 0.0005,
"loss": 0.923,
"step": 12510
},
{
"epoch": 0.6469949873391556,
"grad_norm": 0.527740478515625,
"learning_rate": 0.0005,
"loss": 0.9267,
"step": 12520
},
{
"epoch": 0.6475117564983722,
"grad_norm": 0.4962241053581238,
"learning_rate": 0.0005,
"loss": 0.9206,
"step": 12530
},
{
"epoch": 0.6480285256575887,
"grad_norm": 0.47836676239967346,
"learning_rate": 0.0005,
"loss": 0.9134,
"step": 12540
},
{
"epoch": 0.6485452948168053,
"grad_norm": 0.48245546221733093,
"learning_rate": 0.0005,
"loss": 0.9326,
"step": 12550
},
{
"epoch": 0.649062063976022,
"grad_norm": 0.503021240234375,
"learning_rate": 0.0005,
"loss": 0.9361,
"step": 12560
},
{
"epoch": 0.6495788331352385,
"grad_norm": 0.5059377551078796,
"learning_rate": 0.0005,
"loss": 0.8998,
"step": 12570
},
{
"epoch": 0.6500956022944551,
"grad_norm": 0.49928557872772217,
"learning_rate": 0.0004994267553729553,
"loss": 0.929,
"step": 12580
},
{
"epoch": 0.6506123714536717,
"grad_norm": 0.4804401099681854,
"learning_rate": 0.0004963394943411699,
"loss": 0.9173,
"step": 12590
},
{
"epoch": 0.6511291406128882,
"grad_norm": 0.4649386405944824,
"learning_rate": 0.0004932713175506187,
"loss": 0.9256,
"step": 12600
},
{
"epoch": 0.6516459097721048,
"grad_norm": 0.47866883873939514,
"learning_rate": 0.0004902221070299804,
"loss": 0.9185,
"step": 12610
},
{
"epoch": 0.6521626789313214,
"grad_norm": 0.4801424443721771,
"learning_rate": 0.00048719174553718596,
"loss": 0.9276,
"step": 12620
},
{
"epoch": 0.6526794480905379,
"grad_norm": 0.4797857105731964,
"learning_rate": 0.0004841801165549115,
"loss": 0.9262,
"step": 12630
},
{
"epoch": 0.6531962172497545,
"grad_norm": 0.4703647494316101,
"learning_rate": 0.0004811871042860973,
"loss": 0.9113,
"step": 12640
},
{
"epoch": 0.6537129864089711,
"grad_norm": 0.4952949583530426,
"learning_rate": 0.00047821259364949593,
"loss": 0.9372,
"step": 12650
},
{
"epoch": 0.6542297555681877,
"grad_norm": 0.48347562551498413,
"learning_rate": 0.0004752564702752473,
"loss": 0.9224,
"step": 12660
},
{
"epoch": 0.6547465247274042,
"grad_norm": 0.4917808473110199,
"learning_rate": 0.0004723186205004811,
"loss": 0.91,
"step": 12670
},
{
"epoch": 0.6552632938866209,
"grad_norm": 0.5070691704750061,
"learning_rate": 0.00046939893136494626,
"loss": 0.9147,
"step": 12680
},
{
"epoch": 0.6557800630458375,
"grad_norm": 0.49811315536499023,
"learning_rate": 0.0004664972906066682,
"loss": 0.903,
"step": 12690
},
{
"epoch": 0.656296832205054,
"grad_norm": 0.5315011739730835,
"learning_rate": 0.0004636135866576317,
"loss": 0.9087,
"step": 12700
},
{
"epoch": 0.6568136013642706,
"grad_norm": 0.4951007068157196,
"learning_rate": 0.00046074770863949155,
"loss": 0.9282,
"step": 12710
},
{
"epoch": 0.6573303705234872,
"grad_norm": 0.49288272857666016,
"learning_rate": 0.00045789954635930914,
"loss": 0.9279,
"step": 12720
},
{
"epoch": 0.6578471396827037,
"grad_norm": 0.4682476222515106,
"learning_rate": 0.00045506899030531544,
"loss": 0.9122,
"step": 12730
},
{
"epoch": 0.6583639088419203,
"grad_norm": 0.5064340233802795,
"learning_rate": 0.0004522559316427005,
"loss": 0.9114,
"step": 12740
},
{
"epoch": 0.6588806780011369,
"grad_norm": 0.4566449224948883,
"learning_rate": 0.00044946026220942865,
"loss": 0.9133,
"step": 12750
},
{
"epoch": 0.6593974471603534,
"grad_norm": 0.4679611623287201,
"learning_rate": 0.00044668187451207944,
"loss": 0.8991,
"step": 12760
},
{
"epoch": 0.65991421631957,
"grad_norm": 0.48330655694007874,
"learning_rate": 0.00044392066172171496,
"loss": 0.9103,
"step": 12770
},
{
"epoch": 0.6604309854787866,
"grad_norm": 0.5204933285713196,
"learning_rate": 0.00044117651766977195,
"loss": 0.9149,
"step": 12780
},
{
"epoch": 0.6609477546380031,
"grad_norm": 0.48776623606681824,
"learning_rate": 0.00043844933684397984,
"loss": 0.9185,
"step": 12790
},
{
"epoch": 0.6614645237972198,
"grad_norm": 0.4869120419025421,
"learning_rate": 0.0004357390143843035,
"loss": 0.9096,
"step": 12800
},
{
"epoch": 0.6619812929564364,
"grad_norm": 0.4783307611942291,
"learning_rate": 0.0004330454460789117,
"loss": 0.8977,
"step": 12810
},
{
"epoch": 0.662498062115653,
"grad_norm": 0.4555026888847351,
"learning_rate": 0.00043036852836016994,
"loss": 0.9039,
"step": 12820
},
{
"epoch": 0.6630148312748695,
"grad_norm": 0.47510290145874023,
"learning_rate": 0.00042770815830065834,
"loss": 0.9051,
"step": 12830
},
{
"epoch": 0.6635316004340861,
"grad_norm": 0.4920065999031067,
"learning_rate": 0.0004250642336092143,
"loss": 0.9138,
"step": 12840
},
{
"epoch": 0.6640483695933027,
"grad_norm": 0.47680869698524475,
"learning_rate": 0.000422436652626999,
"loss": 0.9131,
"step": 12850
},
{
"epoch": 0.6645651387525192,
"grad_norm": 0.5098276138305664,
"learning_rate": 0.00041982531432358883,
"loss": 0.9158,
"step": 12860
},
{
"epoch": 0.6650819079117358,
"grad_norm": 0.4745832085609436,
"learning_rate": 0.000417230118293091,
"loss": 0.9019,
"step": 12870
},
{
"epoch": 0.6655986770709524,
"grad_norm": 0.456750750541687,
"learning_rate": 0.00041465096475028256,
"loss": 0.8881,
"step": 12880
},
{
"epoch": 0.6661154462301689,
"grad_norm": 0.49757450819015503,
"learning_rate": 0.00041208775452677374,
"loss": 0.8971,
"step": 12890
},
{
"epoch": 0.6666322153893856,
"grad_norm": 0.4721812605857849,
"learning_rate": 0.0004095403890671951,
"loss": 0.8896,
"step": 12900
},
{
"epoch": 0.6671489845486022,
"grad_norm": 0.4674829840660095,
"learning_rate": 0.00040700877042540803,
"loss": 0.8978,
"step": 12910
},
{
"epoch": 0.6676657537078187,
"grad_norm": 0.45353659987449646,
"learning_rate": 0.0004044928012607386,
"loss": 0.9012,
"step": 12920
},
{
"epoch": 0.6681825228670353,
"grad_norm": 0.44594326615333557,
"learning_rate": 0.0004019923848342348,
"loss": 0.8864,
"step": 12930
},
{
"epoch": 0.6686992920262519,
"grad_norm": 0.4606136083602905,
"learning_rate": 0.0003995074250049472,
"loss": 0.9042,
"step": 12940
},
{
"epoch": 0.6692160611854685,
"grad_norm": 0.4778830111026764,
"learning_rate": 0.000397037826226232,
"loss": 0.8883,
"step": 12950
},
{
"epoch": 0.669732830344685,
"grad_norm": 0.4795719385147095,
"learning_rate": 0.00039458349354207754,
"loss": 0.8943,
"step": 12960
},
{
"epoch": 0.6702495995039016,
"grad_norm": 0.46150490641593933,
"learning_rate": 0.000392144332583453,
"loss": 0.8986,
"step": 12970
},
{
"epoch": 0.6707663686631182,
"grad_norm": 0.4591388404369354,
"learning_rate": 0.00038972024956468015,
"loss": 0.8973,
"step": 12980
},
{
"epoch": 0.6712831378223347,
"grad_norm": 0.447889506816864,
"learning_rate": 0.00038731115127982704,
"loss": 0.8982,
"step": 12990
},
{
"epoch": 0.6717999069815513,
"grad_norm": 0.4567711651325226,
"learning_rate": 0.00038491694509912446,
"loss": 0.8946,
"step": 13000
},
{
"epoch": 0.672316676140768,
"grad_norm": 0.4653710424900055,
"learning_rate": 0.00038253753896540417,
"loss": 0.8805,
"step": 13010
},
{
"epoch": 0.6728334452999845,
"grad_norm": 0.47622108459472656,
"learning_rate": 0.00038017284139055935,
"loss": 0.8971,
"step": 13020
},
{
"epoch": 0.6733502144592011,
"grad_norm": 0.46596968173980713,
"learning_rate": 0.0003778227614520272,
"loss": 0.8872,
"step": 13030
},
{
"epoch": 0.6738669836184177,
"grad_norm": 0.47842490673065186,
"learning_rate": 0.0003754872087892921,
"loss": 0.8844,
"step": 13040
},
{
"epoch": 0.6743837527776342,
"grad_norm": 0.5763306617736816,
"learning_rate": 0.00037316609360041244,
"loss": 0.884,
"step": 13050
},
{
"epoch": 0.6749005219368508,
"grad_norm": 0.4681786298751831,
"learning_rate": 0.00037085932663856664,
"loss": 0.8957,
"step": 13060
},
{
"epoch": 0.6754172910960674,
"grad_norm": 0.4536014199256897,
"learning_rate": 0.0003685668192086224,
"loss": 0.8962,
"step": 13070
},
{
"epoch": 0.675934060255284,
"grad_norm": 0.4593828320503235,
"learning_rate": 0.0003662884831637259,
"loss": 0.8792,
"step": 13080
},
{
"epoch": 0.6764508294145005,
"grad_norm": 0.4837941527366638,
"learning_rate": 0.00036402423090191283,
"loss": 0.8928,
"step": 13090
},
{
"epoch": 0.6769675985737171,
"grad_norm": 0.47275635600090027,
"learning_rate": 0.0003617739753627399,
"loss": 0.8885,
"step": 13100
},
{
"epoch": 0.6774843677329337,
"grad_norm": 0.465971976518631,
"learning_rate": 0.00035953763002393753,
"loss": 0.8859,
"step": 13110
},
{
"epoch": 0.6780011368921502,
"grad_norm": 0.46785497665405273,
"learning_rate": 0.00035731510889808296,
"loss": 0.8829,
"step": 13120
},
{
"epoch": 0.6785179060513669,
"grad_norm": 0.44653069972991943,
"learning_rate": 0.0003551063265292941,
"loss": 0.8694,
"step": 13130
},
{
"epoch": 0.6790346752105835,
"grad_norm": 0.46585527062416077,
"learning_rate": 0.0003529111979899436,
"loss": 0.8871,
"step": 13140
},
{
"epoch": 0.6795514443698,
"grad_norm": 0.5283601880073547,
"learning_rate": 0.00035072963887739373,
"loss": 0.8863,
"step": 13150
},
{
"epoch": 0.6800682135290166,
"grad_norm": 0.4678700864315033,
"learning_rate": 0.0003485615653107508,
"loss": 0.8859,
"step": 13160
},
{
"epoch": 0.6805849826882332,
"grad_norm": 0.4804142713546753,
"learning_rate": 0.0003464068939276399,
"loss": 0.8994,
"step": 13170
},
{
"epoch": 0.6811017518474497,
"grad_norm": 0.450847864151001,
"learning_rate": 0.0003442655418809999,
"loss": 0.8894,
"step": 13180
},
{
"epoch": 0.6816185210066663,
"grad_norm": 0.46586012840270996,
"learning_rate": 0.00034213742683589774,
"loss": 0.8768,
"step": 13190
},
{
"epoch": 0.6821352901658829,
"grad_norm": 0.439656525850296,
"learning_rate": 0.0003400224669663629,
"loss": 0.8855,
"step": 13200
},
{
"epoch": 0.6826520593250994,
"grad_norm": 0.4356318712234497,
"learning_rate": 0.00033792058095224076,
"loss": 0.8772,
"step": 13210
},
{
"epoch": 0.683168828484316,
"grad_norm": 0.460469514131546,
"learning_rate": 0.0003358316879760663,
"loss": 0.8681,
"step": 13220
},
{
"epoch": 0.6836855976435326,
"grad_norm": 0.43120890855789185,
"learning_rate": 0.0003337557077199565,
"loss": 0.8611,
"step": 13230
},
{
"epoch": 0.6842023668027493,
"grad_norm": 0.45166271924972534,
"learning_rate": 0.000331692560362522,
"loss": 0.8771,
"step": 13240
},
{
"epoch": 0.6847191359619658,
"grad_norm": 0.44746896624565125,
"learning_rate": 0.0003296421665757981,
"loss": 0.8781,
"step": 13250
},
{
"epoch": 0.6852359051211824,
"grad_norm": 0.4466201663017273,
"learning_rate": 0.0003276044475221947,
"loss": 0.8647,
"step": 13260
},
{
"epoch": 0.685752674280399,
"grad_norm": 0.48084691166877747,
"learning_rate": 0.00032557932485146473,
"loss": 0.9078,
"step": 13270
},
{
"epoch": 0.6862694434396155,
"grad_norm": 0.46723824739456177,
"learning_rate": 0.0003235667206976918,
"loss": 0.8802,
"step": 13280
},
{
"epoch": 0.6867862125988321,
"grad_norm": 0.4841623902320862,
"learning_rate": 0.00032156655767629616,
"loss": 0.8721,
"step": 13290
},
{
"epoch": 0.6873029817580487,
"grad_norm": 0.4535221755504608,
"learning_rate": 0.0003195787588810593,
"loss": 0.8609,
"step": 13300
},
{
"epoch": 0.6878197509172652,
"grad_norm": 0.47944900393486023,
"learning_rate": 0.00031760324788116683,
"loss": 0.8803,
"step": 13310
},
{
"epoch": 0.6883365200764818,
"grad_norm": 0.4466581344604492,
"learning_rate": 0.00031563994871826995,
"loss": 0.867,
"step": 13320
},
{
"epoch": 0.6888532892356984,
"grad_norm": 0.4529067277908325,
"learning_rate": 0.00031368878590356457,
"loss": 0.8861,
"step": 13330
},
{
"epoch": 0.6893700583949149,
"grad_norm": 0.45706498622894287,
"learning_rate": 0.00031174968441488886,
"loss": 0.8754,
"step": 13340
},
{
"epoch": 0.6898868275541316,
"grad_norm": 0.46450352668762207,
"learning_rate": 0.00030982256969383883,
"loss": 0.8669,
"step": 13350
},
{
"epoch": 0.6904035967133482,
"grad_norm": 0.45960313081741333,
"learning_rate": 0.0003079073676429011,
"loss": 0.8669,
"step": 13360
},
{
"epoch": 0.6909203658725648,
"grad_norm": 0.4698009192943573,
"learning_rate": 0.00030600400462260457,
"loss": 0.8697,
"step": 13370
},
{
"epoch": 0.6914371350317813,
"grad_norm": 0.4546875059604645,
"learning_rate": 0.0003041124074486883,
"loss": 0.863,
"step": 13380
},
{
"epoch": 0.6919539041909979,
"grad_norm": 0.4646720588207245,
"learning_rate": 0.00030223250338928787,
"loss": 0.8664,
"step": 13390
},
{
"epoch": 0.6924706733502145,
"grad_norm": 0.6140843629837036,
"learning_rate": 0.0003003642201621389,
"loss": 0.8636,
"step": 13400
},
{
"epoch": 0.692987442509431,
"grad_norm": 0.46629661321640015,
"learning_rate": 0.0002985074859317977,
"loss": 0.8776,
"step": 13410
},
{
"epoch": 0.6935042116686476,
"grad_norm": 0.4489153027534485,
"learning_rate": 0.00029666222930687926,
"loss": 0.8663,
"step": 13420
},
{
"epoch": 0.6940209808278642,
"grad_norm": 0.45471352338790894,
"learning_rate": 0.00029482837933731207,
"loss": 0.8514,
"step": 13430
},
{
"epoch": 0.6945377499870807,
"grad_norm": 0.4706459045410156,
"learning_rate": 0.00029300586551161034,
"loss": 0.866,
"step": 13440
},
{
"epoch": 0.6950545191462973,
"grad_norm": 0.44388100504875183,
"learning_rate": 0.00029119461775416286,
"loss": 0.862,
"step": 13450
},
{
"epoch": 0.695571288305514,
"grad_norm": 0.5106334090232849,
"learning_rate": 0.0002893945664225381,
"loss": 0.8563,
"step": 13460
},
{
"epoch": 0.6960880574647305,
"grad_norm": 0.4586535096168518,
"learning_rate": 0.00028760564230480724,
"loss": 0.8564,
"step": 13470
},
{
"epoch": 0.6966048266239471,
"grad_norm": 0.5277544856071472,
"learning_rate": 0.0002858277766168823,
"loss": 0.8685,
"step": 13480
},
{
"epoch": 0.6971215957831637,
"grad_norm": 0.48058634996414185,
"learning_rate": 0.0002840609009998717,
"loss": 0.8645,
"step": 13490
},
{
"epoch": 0.6976383649423802,
"grad_norm": 0.4804344177246094,
"learning_rate": 0.0002823049475174519,
"loss": 0.8754,
"step": 13500
},
{
"epoch": 0.6981551341015968,
"grad_norm": 0.4439767003059387,
"learning_rate": 0.00028055984865325503,
"loss": 0.8514,
"step": 13510
},
{
"epoch": 0.6986719032608134,
"grad_norm": 0.4501279294490814,
"learning_rate": 0.0002788255373082731,
"loss": 0.856,
"step": 13520
},
{
"epoch": 0.69918867242003,
"grad_norm": 0.5022059679031372,
"learning_rate": 0.000277101946798278,
"loss": 0.8647,
"step": 13530
},
{
"epoch": 0.6997054415792465,
"grad_norm": 0.45433667302131653,
"learning_rate": 0.00027538901085125735,
"loss": 0.8719,
"step": 13540
},
{
"epoch": 0.7002222107384631,
"grad_norm": 0.46493837237358093,
"learning_rate": 0.0002736866636048666,
"loss": 0.8599,
"step": 13550
},
{
"epoch": 0.7007389798976797,
"grad_norm": 0.45873501896858215,
"learning_rate": 0.0002719948396038963,
"loss": 0.8648,
"step": 13560
},
{
"epoch": 0.7012557490568962,
"grad_norm": 0.4426117539405823,
"learning_rate": 0.0002703134737977557,
"loss": 0.8574,
"step": 13570
},
{
"epoch": 0.7017725182161129,
"grad_norm": 0.44519364833831787,
"learning_rate": 0.0002686425015379712,
"loss": 0.854,
"step": 13580
},
{
"epoch": 0.7022892873753295,
"grad_norm": 0.47185274958610535,
"learning_rate": 0.00026698185857570094,
"loss": 0.8565,
"step": 13590
},
{
"epoch": 0.702806056534546,
"grad_norm": 0.43223652243614197,
"learning_rate": 0.00026533148105926436,
"loss": 0.8721,
"step": 13600
},
{
"epoch": 0.7033228256937626,
"grad_norm": 0.4602532386779785,
"learning_rate": 0.0002636913055316868,
"loss": 0.8518,
"step": 13610
},
{
"epoch": 0.7038395948529792,
"grad_norm": 0.45018014311790466,
"learning_rate": 0.00026206126892826,
"loss": 0.8685,
"step": 13620
},
{
"epoch": 0.7043563640121957,
"grad_norm": 0.49739015102386475,
"learning_rate": 0.000260441308574117,
"loss": 0.8483,
"step": 13630
},
{
"epoch": 0.7048731331714123,
"grad_norm": 0.4658418595790863,
"learning_rate": 0.00025883136218182235,
"loss": 0.8545,
"step": 13640
},
{
"epoch": 0.7053899023306289,
"grad_norm": 0.4808160066604614,
"learning_rate": 0.0002572313678489773,
"loss": 0.8622,
"step": 13650
},
{
"epoch": 0.7059066714898455,
"grad_norm": 0.4521915316581726,
"learning_rate": 0.0002556412640558396,
"loss": 0.8632,
"step": 13660
},
{
"epoch": 0.706423440649062,
"grad_norm": 0.456153005361557,
"learning_rate": 0.0002540609896629577,
"loss": 0.861,
"step": 13670
},
{
"epoch": 0.7069402098082787,
"grad_norm": 0.43279728293418884,
"learning_rate": 0.00025249048390882053,
"loss": 0.8593,
"step": 13680
},
{
"epoch": 0.7074569789674953,
"grad_norm": 0.4601012170314789,
"learning_rate": 0.0002509296864075207,
"loss": 0.8629,
"step": 13690
},
{
"epoch": 0.7079737481267118,
"grad_norm": 0.47351303696632385,
"learning_rate": 0.0002493785371464332,
"loss": 0.8622,
"step": 13700
},
{
"epoch": 0.7084905172859284,
"grad_norm": 0.4869425594806671,
"learning_rate": 0.0002478369764839074,
"loss": 0.8546,
"step": 13710
},
{
"epoch": 0.709007286445145,
"grad_norm": 0.4412122964859009,
"learning_rate": 0.0002463049451469741,
"loss": 0.8444,
"step": 13720
},
{
"epoch": 0.7095240556043615,
"grad_norm": 0.4480939209461212,
"learning_rate": 0.0002447823842290664,
"loss": 0.848,
"step": 13730
},
{
"epoch": 0.7100408247635781,
"grad_norm": 0.4651864767074585,
"learning_rate": 0.00024326923518775486,
"loss": 0.8455,
"step": 13740
},
{
"epoch": 0.7105575939227947,
"grad_norm": 0.4487757384777069,
"learning_rate": 0.0002417654398424963,
"loss": 0.841,
"step": 13750
},
{
"epoch": 0.7110743630820112,
"grad_norm": 0.44667768478393555,
"learning_rate": 0.00024027094037239717,
"loss": 0.8454,
"step": 13760
},
{
"epoch": 0.7115911322412278,
"grad_norm": 0.44757676124572754,
"learning_rate": 0.0002387856793139899,
"loss": 0.8438,
"step": 13770
},
{
"epoch": 0.7121079014004444,
"grad_norm": 0.47068849205970764,
"learning_rate": 0.00023730959955902366,
"loss": 0.8434,
"step": 13780
},
{
"epoch": 0.7126246705596609,
"grad_norm": 0.4390396773815155,
"learning_rate": 0.00023584264435226848,
"loss": 0.8461,
"step": 13790
},
{
"epoch": 0.7131414397188776,
"grad_norm": 0.4566657543182373,
"learning_rate": 0.00023438475728933318,
"loss": 0.8473,
"step": 13800
},
{
"epoch": 0.7136582088780942,
"grad_norm": 0.49407103657722473,
"learning_rate": 0.0002329358823144963,
"loss": 0.8431,
"step": 13810
},
{
"epoch": 0.7141749780373108,
"grad_norm": 0.47513094544410706,
"learning_rate": 0.00023149596371855103,
"loss": 0.8425,
"step": 13820
},
{
"epoch": 0.7146917471965273,
"grad_norm": 0.4418255686759949,
"learning_rate": 0.00023006494613666317,
"loss": 0.8394,
"step": 13830
},
{
"epoch": 0.7152085163557439,
"grad_norm": 0.45882540941238403,
"learning_rate": 0.0002286427745462422,
"loss": 0.844,
"step": 13840
},
{
"epoch": 0.7157252855149605,
"grad_norm": 0.44126296043395996,
"learning_rate": 0.00022722939426482577,
"loss": 0.8438,
"step": 13850
},
{
"epoch": 0.716242054674177,
"grad_norm": 0.44302189350128174,
"learning_rate": 0.00022582475094797713,
"loss": 0.8597,
"step": 13860
},
{
"epoch": 0.7167588238333936,
"grad_norm": 0.46645456552505493,
"learning_rate": 0.00022442879058719568,
"loss": 0.8218,
"step": 13870
},
{
"epoch": 0.7172755929926102,
"grad_norm": 0.4451071619987488,
"learning_rate": 0.00022304145950784017,
"loss": 0.852,
"step": 13880
},
{
"epoch": 0.7177923621518267,
"grad_norm": 0.47982582449913025,
"learning_rate": 0.00022166270436706502,
"loss": 0.8408,
"step": 13890
},
{
"epoch": 0.7183091313110433,
"grad_norm": 0.4596095085144043,
"learning_rate": 0.00022029247215176934,
"loss": 0.8333,
"step": 13900
},
{
"epoch": 0.71882590047026,
"grad_norm": 0.4595165550708771,
"learning_rate": 0.00021893071017655845,
"loss": 0.8426,
"step": 13910
},
{
"epoch": 0.7193426696294765,
"grad_norm": 0.4321739375591278,
"learning_rate": 0.00021757736608171818,
"loss": 0.8419,
"step": 13920
},
{
"epoch": 0.7198594387886931,
"grad_norm": 0.4603961706161499,
"learning_rate": 0.00021623238783120176,
"loss": 0.8471,
"step": 13930
},
{
"epoch": 0.7203762079479097,
"grad_norm": 0.47230657935142517,
"learning_rate": 0.00021489572371062883,
"loss": 0.8326,
"step": 13940
},
{
"epoch": 0.7208929771071263,
"grad_norm": 0.45762136578559875,
"learning_rate": 0.0002135673223252971,
"loss": 0.8425,
"step": 13950
},
{
"epoch": 0.7214097462663428,
"grad_norm": 0.4551469385623932,
"learning_rate": 0.00021224713259820633,
"loss": 0.8335,
"step": 13960
},
{
"epoch": 0.7219265154255594,
"grad_norm": 0.4409978985786438,
"learning_rate": 0.00021093510376809428,
"loss": 0.8388,
"step": 13970
},
{
"epoch": 0.722443284584776,
"grad_norm": 0.444934219121933,
"learning_rate": 0.00020963118538748493,
"loss": 0.8313,
"step": 13980
},
{
"epoch": 0.7229600537439925,
"grad_norm": 0.4529027044773102,
"learning_rate": 0.00020833532732074907,
"loss": 0.8298,
"step": 13990
},
{
"epoch": 0.7234768229032091,
"grad_norm": 0.44308820366859436,
"learning_rate": 0.00020704747974217608,
"loss": 0.8132,
"step": 14000
},
{
"epoch": 0.7239935920624258,
"grad_norm": 0.451187402009964,
"learning_rate": 0.0002057675931340586,
"loss": 0.8465,
"step": 14010
},
{
"epoch": 0.7245103612216423,
"grad_norm": 0.4436304569244385,
"learning_rate": 0.00020449561828478832,
"loss": 0.8502,
"step": 14020
},
{
"epoch": 0.7250271303808589,
"grad_norm": 0.4516158401966095,
"learning_rate": 0.00020323150628696383,
"loss": 0.8323,
"step": 14030
},
{
"epoch": 0.7255438995400755,
"grad_norm": 0.4490114450454712,
"learning_rate": 0.00020197520853551025,
"loss": 0.8366,
"step": 14040
},
{
"epoch": 0.726060668699292,
"grad_norm": 0.4692043364048004,
"learning_rate": 0.00020072667672581016,
"loss": 0.8537,
"step": 14050
},
{
"epoch": 0.7265774378585086,
"grad_norm": 0.47233638167381287,
"learning_rate": 0.00019948586285184656,
"loss": 0.8387,
"step": 14060
},
{
"epoch": 0.7270942070177252,
"grad_norm": 0.43632131814956665,
"learning_rate": 0.00019825271920435674,
"loss": 0.836,
"step": 14070
},
{
"epoch": 0.7276109761769418,
"grad_norm": 0.4420956075191498,
"learning_rate": 0.00019702719836899813,
"loss": 0.8381,
"step": 14080
},
{
"epoch": 0.7281277453361583,
"grad_norm": 0.4486638009548187,
"learning_rate": 0.00019580925322452495,
"loss": 0.8382,
"step": 14090
},
{
"epoch": 0.7286445144953749,
"grad_norm": 0.45652589201927185,
"learning_rate": 0.0001945988369409767,
"loss": 0.8538,
"step": 14100
},
{
"epoch": 0.7291612836545915,
"grad_norm": 0.4422604739665985,
"learning_rate": 0.00019339590297787735,
"loss": 0.8321,
"step": 14110
},
{
"epoch": 0.729678052813808,
"grad_norm": 0.4418606758117676,
"learning_rate": 0.00019220040508244581,
"loss": 0.8362,
"step": 14120
},
{
"epoch": 0.7301948219730247,
"grad_norm": 0.43576526641845703,
"learning_rate": 0.00019101229728781774,
"loss": 0.8131,
"step": 14130
},
{
"epoch": 0.7307115911322413,
"grad_norm": 0.4448246657848358,
"learning_rate": 0.0001898315339112779,
"loss": 0.8425,
"step": 14140
},
{
"epoch": 0.7312283602914578,
"grad_norm": 0.43587714433670044,
"learning_rate": 0.0001886580695525038,
"loss": 0.8283,
"step": 14150
},
{
"epoch": 0.7317451294506744,
"grad_norm": 0.4598979353904724,
"learning_rate": 0.00018749185909182,
"loss": 0.8441,
"step": 14160
},
{
"epoch": 0.732261898609891,
"grad_norm": 0.5122143626213074,
"learning_rate": 0.0001863328576884632,
"loss": 0.8497,
"step": 14170
},
{
"epoch": 0.7327786677691075,
"grad_norm": 0.45913758873939514,
"learning_rate": 0.00018518102077885824,
"loss": 0.8324,
"step": 14180
},
{
"epoch": 0.7332954369283241,
"grad_norm": 0.46700534224510193,
"learning_rate": 0.00018403630407490455,
"loss": 0.8165,
"step": 14190
},
{
"epoch": 0.7338122060875407,
"grad_norm": 0.4529505670070648,
"learning_rate": 0.0001828986635622732,
"loss": 0.8345,
"step": 14200
},
{
"epoch": 0.7343289752467572,
"grad_norm": 0.4726906716823578,
"learning_rate": 0.0001817680554987149,
"loss": 0.8283,
"step": 14210
},
{
"epoch": 0.7348457444059738,
"grad_norm": 0.4485037326812744,
"learning_rate": 0.00018064443641237752,
"loss": 0.8403,
"step": 14220
},
{
"epoch": 0.7353625135651904,
"grad_norm": 0.46243423223495483,
"learning_rate": 0.00017952776310013513,
"loss": 0.8292,
"step": 14230
},
{
"epoch": 0.7358792827244071,
"grad_norm": 0.45175400376319885,
"learning_rate": 0.00017841799262592663,
"loss": 0.837,
"step": 14240
},
{
"epoch": 0.7363960518836236,
"grad_norm": 0.4575372040271759,
"learning_rate": 0.0001773150823191048,
"loss": 0.8224,
"step": 14250
},
{
"epoch": 0.7369128210428402,
"grad_norm": 0.4672216773033142,
"learning_rate": 0.00017621898977279577,
"loss": 0.8351,
"step": 14260
},
{
"epoch": 0.7374295902020568,
"grad_norm": 0.45373353362083435,
"learning_rate": 0.0001751296728422683,
"loss": 0.8334,
"step": 14270
},
{
"epoch": 0.7379463593612733,
"grad_norm": 0.472469687461853,
"learning_rate": 0.0001740470896433135,
"loss": 0.8346,
"step": 14280
},
{
"epoch": 0.7384631285204899,
"grad_norm": 0.4568733274936676,
"learning_rate": 0.00017297119855063422,
"loss": 0.8223,
"step": 14290
},
{
"epoch": 0.7389798976797065,
"grad_norm": 0.4490255117416382,
"learning_rate": 0.00017190195819624467,
"loss": 0.8298,
"step": 14300
},
{
"epoch": 0.739496666838923,
"grad_norm": 0.4388444125652313,
"learning_rate": 0.0001708393274678798,
"loss": 0.8301,
"step": 14310
},
{
"epoch": 0.7400134359981396,
"grad_norm": 0.4393922686576843,
"learning_rate": 0.00016978326550741443,
"loss": 0.8379,
"step": 14320
},
{
"epoch": 0.7405302051573562,
"grad_norm": 0.44879150390625,
"learning_rate": 0.00016873373170929243,
"loss": 0.8205,
"step": 14330
},
{
"epoch": 0.7410469743165727,
"grad_norm": 0.4404836595058441,
"learning_rate": 0.00016769068571896532,
"loss": 0.8197,
"step": 14340
},
{
"epoch": 0.7415637434757893,
"grad_norm": 0.47884973883628845,
"learning_rate": 0.00016665408743134062,
"loss": 0.8433,
"step": 14350
},
{
"epoch": 0.742080512635006,
"grad_norm": 0.4363346993923187,
"learning_rate": 0.00016562389698924,
"loss": 0.8255,
"step": 14360
},
{
"epoch": 0.7425972817942226,
"grad_norm": 0.4692130982875824,
"learning_rate": 0.00016460007478186648,
"loss": 0.8146,
"step": 14370
},
{
"epoch": 0.7431140509534391,
"grad_norm": 0.45265311002731323,
"learning_rate": 0.00016358258144328163,
"loss": 0.8166,
"step": 14380
},
{
"epoch": 0.7436308201126557,
"grad_norm": 0.46352484822273254,
"learning_rate": 0.00016257137785089182,
"loss": 0.8262,
"step": 14390
},
{
"epoch": 0.7441475892718723,
"grad_norm": 0.4382546842098236,
"learning_rate": 0.00016156642512394405,
"loss": 0.8118,
"step": 14400
},
{
"epoch": 0.7446643584310888,
"grad_norm": 0.4326501190662384,
"learning_rate": 0.0001605676846220309,
"loss": 0.832,
"step": 14410
},
{
"epoch": 0.7451811275903054,
"grad_norm": 0.4683341979980469,
"learning_rate": 0.0001595751179436049,
"loss": 0.8202,
"step": 14420
},
{
"epoch": 0.745697896749522,
"grad_norm": 0.4519064426422119,
"learning_rate": 0.0001585886869245019,
"loss": 0.8055,
"step": 14430
},
{
"epoch": 0.7462146659087385,
"grad_norm": 0.45761948823928833,
"learning_rate": 0.00015760835363647367,
"loss": 0.8128,
"step": 14440
},
{
"epoch": 0.7467314350679551,
"grad_norm": 0.4355948269367218,
"learning_rate": 0.00015663408038572963,
"loss": 0.816,
"step": 14450
},
{
"epoch": 0.7472482042271718,
"grad_norm": 0.4464154839515686,
"learning_rate": 0.00015566582971148748,
"loss": 0.8211,
"step": 14460
},
{
"epoch": 0.7477649733863883,
"grad_norm": 0.4529094696044922,
"learning_rate": 0.0001547035643845329,
"loss": 0.8124,
"step": 14470
},
{
"epoch": 0.7482817425456049,
"grad_norm": 0.48181021213531494,
"learning_rate": 0.00015374724740578792,
"loss": 0.8092,
"step": 14480
},
{
"epoch": 0.7487985117048215,
"grad_norm": 0.46071046590805054,
"learning_rate": 0.0001527968420048884,
"loss": 0.7989,
"step": 14490
},
{
"epoch": 0.749315280864038,
"grad_norm": 0.4348960220813751,
"learning_rate": 0.00015185231163877035,
"loss": 0.834,
"step": 14500
},
{
"epoch": 0.7498320500232546,
"grad_norm": 0.42849427461624146,
"learning_rate": 0.00015091361999026458,
"loss": 0.7947,
"step": 14510
},
{
"epoch": 0.7503488191824712,
"grad_norm": 0.42904916405677795,
"learning_rate": 0.00014998073096670058,
"loss": 0.8235,
"step": 14520
},
{
"epoch": 0.7508655883416878,
"grad_norm": 0.4777064919471741,
"learning_rate": 0.0001490536086985185,
"loss": 0.8273,
"step": 14530
},
{
"epoch": 0.7513823575009043,
"grad_norm": 0.44165903329849243,
"learning_rate": 0.00014813221753789016,
"loss": 0.825,
"step": 14540
},
{
"epoch": 0.7518991266601209,
"grad_norm": 0.4439583122730255,
"learning_rate": 0.00014721652205734831,
"loss": 0.827,
"step": 14550
},
{
"epoch": 0.7524158958193375,
"grad_norm": 0.455435186624527,
"learning_rate": 0.00014630648704842445,
"loss": 0.8198,
"step": 14560
},
{
"epoch": 0.752932664978554,
"grad_norm": 0.4566732347011566,
"learning_rate": 0.00014540207752029508,
"loss": 0.8284,
"step": 14570
},
{
"epoch": 0.7534494341377707,
"grad_norm": 0.44228848814964294,
"learning_rate": 0.00014450325869843633,
"loss": 0.8191,
"step": 14580
},
{
"epoch": 0.7539662032969873,
"grad_norm": 0.445332795381546,
"learning_rate": 0.0001436099960232868,
"loss": 0.8131,
"step": 14590
},
{
"epoch": 0.7544829724562038,
"grad_norm": 0.4628824293613434,
"learning_rate": 0.0001427222551489188,
"loss": 0.8257,
"step": 14600
},
{
"epoch": 0.7549997416154204,
"grad_norm": 0.46374180912971497,
"learning_rate": 0.00014184000194171777,
"loss": 0.8334,
"step": 14610
},
{
"epoch": 0.755516510774637,
"grad_norm": 0.4505828320980072,
"learning_rate": 0.00014096320247906978,
"loss": 0.8203,
"step": 14620
},
{
"epoch": 0.7560332799338535,
"grad_norm": 0.4418148100376129,
"learning_rate": 0.00014009182304805726,
"loss": 0.8071,
"step": 14630
},
{
"epoch": 0.7565500490930701,
"grad_norm": 0.43000486493110657,
"learning_rate": 0.0001392258301441627,
"loss": 0.8223,
"step": 14640
},
{
"epoch": 0.7570668182522867,
"grad_norm": 0.4482291340827942,
"learning_rate": 0.0001383651904699805,
"loss": 0.8106,
"step": 14650
},
{
"epoch": 0.7575835874115033,
"grad_norm": 0.4472900629043579,
"learning_rate": 0.00013750987093393656,
"loss": 0.8196,
"step": 14660
},
{
"epoch": 0.7581003565707198,
"grad_norm": 0.45943567156791687,
"learning_rate": 0.00013665983864901587,
"loss": 0.8197,
"step": 14670
},
{
"epoch": 0.7586171257299364,
"grad_norm": 0.43818199634552,
"learning_rate": 0.00013581506093149825,
"loss": 0.8003,
"step": 14680
},
{
"epoch": 0.7591338948891531,
"grad_norm": 0.43463850021362305,
"learning_rate": 0.0001349755052997014,
"loss": 0.8086,
"step": 14690
},
{
"epoch": 0.7596506640483696,
"grad_norm": 0.4578488767147064,
"learning_rate": 0.00013414113947273217,
"loss": 0.8011,
"step": 14700
},
{
"epoch": 0.7601674332075862,
"grad_norm": 0.44629108905792236,
"learning_rate": 0.00013331193136924515,
"loss": 0.8086,
"step": 14710
},
{
"epoch": 0.7606842023668028,
"grad_norm": 0.4482209384441376,
"learning_rate": 0.00013248784910620945,
"loss": 0.7996,
"step": 14720
},
{
"epoch": 0.7612009715260193,
"grad_norm": 0.4447433650493622,
"learning_rate": 0.00013166886099768245,
"loss": 0.8162,
"step": 14730
},
{
"epoch": 0.7617177406852359,
"grad_norm": 0.44065767526626587,
"learning_rate": 0.00013085493555359173,
"loss": 0.826,
"step": 14740
},
{
"epoch": 0.7622345098444525,
"grad_norm": 0.47181805968284607,
"learning_rate": 0.00013004604147852416,
"loss": 0.8074,
"step": 14750
},
{
"epoch": 0.762751279003669,
"grad_norm": 0.44598037004470825,
"learning_rate": 0.00012924214767052268,
"loss": 0.8047,
"step": 14760
},
{
"epoch": 0.7632680481628856,
"grad_norm": 0.4688059091567993,
"learning_rate": 0.00012844322321989025,
"loss": 0.8076,
"step": 14770
},
{
"epoch": 0.7637848173221022,
"grad_norm": 0.47695672512054443,
"learning_rate": 0.00012764923740800162,
"loss": 0.7913,
"step": 14780
},
{
"epoch": 0.7643015864813187,
"grad_norm": 0.4601481556892395,
"learning_rate": 0.00012686015970612207,
"loss": 0.8122,
"step": 14790
},
{
"epoch": 0.7648183556405354,
"grad_norm": 0.46827730536460876,
"learning_rate": 0.0001260759597742335,
"loss": 0.8136,
"step": 14800
},
{
"epoch": 0.765335124799752,
"grad_norm": 0.43789979815483093,
"learning_rate": 0.00012529660745986808,
"loss": 0.8131,
"step": 14810
},
{
"epoch": 0.7658518939589686,
"grad_norm": 0.44412630796432495,
"learning_rate": 0.00012452207279694858,
"loss": 0.7994,
"step": 14820
},
{
"epoch": 0.7663686631181851,
"grad_norm": 0.44957849383354187,
"learning_rate": 0.00012375232600463646,
"loss": 0.801,
"step": 14830
},
{
"epoch": 0.7668854322774017,
"grad_norm": 0.4659784436225891,
"learning_rate": 0.0001229873374861867,
"loss": 0.8011,
"step": 14840
},
{
"epoch": 0.7674022014366183,
"grad_norm": 0.4447031617164612,
"learning_rate": 0.00012222707782780977,
"loss": 0.8132,
"step": 14850
},
{
"epoch": 0.7679189705958348,
"grad_norm": 0.45082828402519226,
"learning_rate": 0.00012147151779754062,
"loss": 0.8067,
"step": 14860
},
{
"epoch": 0.7684357397550514,
"grad_norm": 0.42726126313209534,
"learning_rate": 0.00012072062834411491,
"loss": 0.81,
"step": 14870
},
{
"epoch": 0.768952508914268,
"grad_norm": 0.46154364943504333,
"learning_rate": 0.00011997438059585174,
"loss": 0.8063,
"step": 14880
},
{
"epoch": 0.7694692780734845,
"grad_norm": 0.45202165842056274,
"learning_rate": 0.00011923274585954376,
"loss": 0.8066,
"step": 14890
},
{
"epoch": 0.7699860472327011,
"grad_norm": 0.43574896454811096,
"learning_rate": 0.00011849569561935377,
"loss": 0.8024,
"step": 14900
},
{
"epoch": 0.7705028163919178,
"grad_norm": 0.4647500514984131,
"learning_rate": 0.00011776320153571831,
"loss": 0.8047,
"step": 14910
},
{
"epoch": 0.7710195855511343,
"grad_norm": 0.4715510308742523,
"learning_rate": 0.00011703523544425804,
"loss": 0.8242,
"step": 14920
},
{
"epoch": 0.7715363547103509,
"grad_norm": 0.48043355345726013,
"learning_rate": 0.00011631176935469487,
"loss": 0.8014,
"step": 14930
},
{
"epoch": 0.7720531238695675,
"grad_norm": 0.45127764344215393,
"learning_rate": 0.00011559277544977559,
"loss": 0.8143,
"step": 14940
},
{
"epoch": 0.7725698930287841,
"grad_norm": 0.447942852973938,
"learning_rate": 0.0001148782260842024,
"loss": 0.815,
"step": 14950
},
{
"epoch": 0.7730866621880006,
"grad_norm": 0.4494159519672394,
"learning_rate": 0.00011416809378356995,
"loss": 0.8193,
"step": 14960
},
{
"epoch": 0.7736034313472172,
"grad_norm": 0.4411426782608032,
"learning_rate": 0.00011346235124330891,
"loss": 0.7971,
"step": 14970
},
{
"epoch": 0.7741202005064338,
"grad_norm": 0.4652232229709625,
"learning_rate": 0.0001127609713276361,
"loss": 0.8108,
"step": 14980
},
{
"epoch": 0.7746369696656503,
"grad_norm": 0.48985597491264343,
"learning_rate": 0.00011206392706851122,
"loss": 0.8061,
"step": 14990
},
{
"epoch": 0.7751537388248669,
"grad_norm": 0.4511886239051819,
"learning_rate": 0.00011137119166459977,
"loss": 0.8046,
"step": 15000
},
{
"epoch": 0.7756705079840835,
"grad_norm": 0.4621480405330658,
"learning_rate": 0.00011068273848024272,
"loss": 0.8116,
"step": 15010
},
{
"epoch": 0.7761872771433,
"grad_norm": 0.45318228006362915,
"learning_rate": 0.00010999854104443217,
"loss": 0.7992,
"step": 15020
},
{
"epoch": 0.7767040463025167,
"grad_norm": 0.46225494146347046,
"learning_rate": 0.00010931857304979372,
"loss": 0.8055,
"step": 15030
},
{
"epoch": 0.7772208154617333,
"grad_norm": 0.4576970934867859,
"learning_rate": 0.00010864280835157488,
"loss": 0.7918,
"step": 15040
},
{
"epoch": 0.7777375846209498,
"grad_norm": 0.43827998638153076,
"learning_rate": 0.00010797122096663975,
"loss": 0.8124,
"step": 15050
},
{
"epoch": 0.7782543537801664,
"grad_norm": 0.4270840883255005,
"learning_rate": 0.00010730378507247009,
"loss": 0.8027,
"step": 15060
},
{
"epoch": 0.778771122939383,
"grad_norm": 0.4645536243915558,
"learning_rate": 0.00010664047500617232,
"loss": 0.8103,
"step": 15070
},
{
"epoch": 0.7792878920985996,
"grad_norm": 0.4405182898044586,
"learning_rate": 0.00010598126526349083,
"loss": 0.7886,
"step": 15080
},
{
"epoch": 0.7798046612578161,
"grad_norm": 0.4572370648384094,
"learning_rate": 0.00010532613049782744,
"loss": 0.8021,
"step": 15090
},
{
"epoch": 0.7803214304170327,
"grad_norm": 0.4464896321296692,
"learning_rate": 0.00010467504551926664,
"loss": 0.7897,
"step": 15100
},
{
"epoch": 0.7808381995762493,
"grad_norm": 0.470245897769928,
"learning_rate": 0.00010402798529360717,
"loss": 0.8053,
"step": 15110
},
{
"epoch": 0.7813549687354658,
"grad_norm": 0.4271971583366394,
"learning_rate": 0.00010338492494139942,
"loss": 0.8144,
"step": 15120
},
{
"epoch": 0.7818717378946825,
"grad_norm": 0.45670023560523987,
"learning_rate": 0.00010274583973698883,
"loss": 0.8012,
"step": 15130
},
{
"epoch": 0.7823885070538991,
"grad_norm": 0.4224714934825897,
"learning_rate": 0.0001021107051075651,
"loss": 0.785,
"step": 15140
},
{
"epoch": 0.7829052762131156,
"grad_norm": 0.43493083119392395,
"learning_rate": 0.00010147949663221759,
"loss": 0.8028,
"step": 15150
},
{
"epoch": 0.7834220453723322,
"grad_norm": 0.4562802016735077,
"learning_rate": 0.00010085219004099603,
"loss": 0.8052,
"step": 15160
},
{
"epoch": 0.7839388145315488,
"grad_norm": 0.44530564546585083,
"learning_rate": 0.00010022876121397758,
"loss": 0.8073,
"step": 15170
},
{
"epoch": 0.7844555836907653,
"grad_norm": 0.5228975415229797,
"learning_rate": 9.960918618033934e-05,
"loss": 0.8089,
"step": 15180
},
{
"epoch": 0.7849723528499819,
"grad_norm": 0.44067102670669556,
"learning_rate": 9.899344111743661e-05,
"loss": 0.7955,
"step": 15190
},
{
"epoch": 0.7854891220091985,
"grad_norm": 0.474118173122406,
"learning_rate": 9.838150234988704e-05,
"loss": 0.7932,
"step": 15200
},
{
"epoch": 0.786005891168415,
"grad_norm": 0.4493066668510437,
"learning_rate": 9.777334634866019e-05,
"loss": 0.7938,
"step": 15210
},
{
"epoch": 0.7865226603276316,
"grad_norm": 0.44325533509254456,
"learning_rate": 9.716894973017291e-05,
"loss": 0.8098,
"step": 15220
},
{
"epoch": 0.7870394294868482,
"grad_norm": 0.44017842411994934,
"learning_rate": 9.656828925539026e-05,
"loss": 0.7872,
"step": 15230
},
{
"epoch": 0.7875561986460649,
"grad_norm": 0.4537578225135803,
"learning_rate": 9.597134182893185e-05,
"loss": 0.8046,
"step": 15240
},
{
"epoch": 0.7880729678052814,
"grad_norm": 0.43279150128364563,
"learning_rate": 9.5378084498184e-05,
"loss": 0.8155,
"step": 15250
},
{
"epoch": 0.788589736964498,
"grad_norm": 0.45793530344963074,
"learning_rate": 9.478849445241703e-05,
"loss": 0.8033,
"step": 15260
},
{
"epoch": 0.7891065061237146,
"grad_norm": 0.45037081837654114,
"learning_rate": 9.420254902190833e-05,
"loss": 0.7985,
"step": 15270
},
{
"epoch": 0.7896232752829311,
"grad_norm": 0.4623776972293854,
"learning_rate": 9.362022567707067e-05,
"loss": 0.8197,
"step": 15280
},
{
"epoch": 0.7901400444421477,
"grad_norm": 0.4537854790687561,
"learning_rate": 9.30415020275859e-05,
"loss": 0.7926,
"step": 15290
},
{
"epoch": 0.7906568136013643,
"grad_norm": 0.4492059648036957,
"learning_rate": 9.246635582154403e-05,
"loss": 0.7938,
"step": 15300
},
{
"epoch": 0.7911735827605808,
"grad_norm": 0.4396090805530548,
"learning_rate": 9.189476494458775e-05,
"loss": 0.7999,
"step": 15310
},
{
"epoch": 0.7916903519197974,
"grad_norm": 0.43469393253326416,
"learning_rate": 9.132670741906201e-05,
"loss": 0.7994,
"step": 15320
},
{
"epoch": 0.792207121079014,
"grad_norm": 0.44428810477256775,
"learning_rate": 9.076216140316906e-05,
"loss": 0.8043,
"step": 15330
},
{
"epoch": 0.7927238902382305,
"grad_norm": 0.4329991638660431,
"learning_rate": 9.02011051901286e-05,
"loss": 0.7877,
"step": 15340
},
{
"epoch": 0.7932406593974471,
"grad_norm": 0.4495084583759308,
"learning_rate": 8.964351720734322e-05,
"loss": 0.7969,
"step": 15350
},
{
"epoch": 0.7937574285566638,
"grad_norm": 0.4632558822631836,
"learning_rate": 8.908937601556875e-05,
"loss": 0.7895,
"step": 15360
},
{
"epoch": 0.7942741977158804,
"grad_norm": 0.44832077622413635,
"learning_rate": 8.853866030809016e-05,
"loss": 0.7928,
"step": 15370
},
{
"epoch": 0.7947909668750969,
"grad_norm": 0.4608152210712433,
"learning_rate": 8.799134890990218e-05,
"loss": 0.8033,
"step": 15380
},
{
"epoch": 0.7953077360343135,
"grad_norm": 0.45813852548599243,
"learning_rate": 8.744742077689513e-05,
"loss": 0.8127,
"step": 15390
},
{
"epoch": 0.7958245051935301,
"grad_norm": 0.4426814317703247,
"learning_rate": 8.69068549950458e-05,
"loss": 0.7939,
"step": 15400
},
{
"epoch": 0.7963412743527466,
"grad_norm": 0.4528840482234955,
"learning_rate": 8.636963077961332e-05,
"loss": 0.7889,
"step": 15410
},
{
"epoch": 0.7968580435119632,
"grad_norm": 0.4318794310092926,
"learning_rate": 8.583572747433989e-05,
"loss": 0.79,
"step": 15420
},
{
"epoch": 0.7973748126711798,
"grad_norm": 0.4563692808151245,
"learning_rate": 8.530512455065673e-05,
"loss": 0.7922,
"step": 15430
},
{
"epoch": 0.7978915818303963,
"grad_norm": 0.44473403692245483,
"learning_rate": 8.477780160689458e-05,
"loss": 0.7999,
"step": 15440
},
{
"epoch": 0.7984083509896129,
"grad_norm": 0.45080122351646423,
"learning_rate": 8.425373836749934e-05,
"loss": 0.7854,
"step": 15450
},
{
"epoch": 0.7989251201488295,
"grad_norm": 0.4660671055316925,
"learning_rate": 8.373291468225247e-05,
"loss": 0.8033,
"step": 15460
},
{
"epoch": 0.799441889308046,
"grad_norm": 0.43612638115882874,
"learning_rate": 8.321531052549621e-05,
"loss": 0.7975,
"step": 15470
},
{
"epoch": 0.7999586584672627,
"grad_norm": 0.44829973578453064,
"learning_rate": 8.270090599536357e-05,
"loss": 0.7865,
"step": 15480
},
{
"epoch": 0.8004754276264793,
"grad_norm": 0.4527774751186371,
"learning_rate": 8.218968131301314e-05,
"loss": 0.7994,
"step": 15490
},
{
"epoch": 0.8009921967856958,
"grad_norm": 0.46482163667678833,
"learning_rate": 8.16816168218686e-05,
"loss": 0.7949,
"step": 15500
},
{
"epoch": 0.8015089659449124,
"grad_norm": 0.4425605535507202,
"learning_rate": 8.117669298686285e-05,
"loss": 0.7708,
"step": 15510
},
{
"epoch": 0.802025735104129,
"grad_norm": 0.4287862777709961,
"learning_rate": 8.0674890393687e-05,
"loss": 0.801,
"step": 15520
},
{
"epoch": 0.8025425042633456,
"grad_norm": 0.4485211670398712,
"learning_rate": 8.017618974804377e-05,
"loss": 0.7876,
"step": 15530
},
{
"epoch": 0.8030592734225621,
"grad_norm": 0.43715623021125793,
"learning_rate": 7.968057187490574e-05,
"loss": 0.7984,
"step": 15540
},
{
"epoch": 0.8035760425817787,
"grad_norm": 0.4431898891925812,
"learning_rate": 7.918801771777797e-05,
"loss": 0.787,
"step": 15550
},
{
"epoch": 0.8040928117409953,
"grad_norm": 0.4634036421775818,
"learning_rate": 7.869850833796537e-05,
"loss": 0.8002,
"step": 15560
},
{
"epoch": 0.8046095809002118,
"grad_norm": 0.4434111416339874,
"learning_rate": 7.821202491384445e-05,
"loss": 0.7827,
"step": 15570
},
{
"epoch": 0.8051263500594285,
"grad_norm": 0.4345285892486572,
"learning_rate": 7.77285487401396e-05,
"loss": 0.7983,
"step": 15580
},
{
"epoch": 0.8056431192186451,
"grad_norm": 0.4299919605255127,
"learning_rate": 7.724806122720396e-05,
"loss": 0.7777,
"step": 15590
},
{
"epoch": 0.8061598883778616,
"grad_norm": 0.44167646765708923,
"learning_rate": 7.677054390030455e-05,
"loss": 0.7967,
"step": 15600
},
{
"epoch": 0.8066766575370782,
"grad_norm": 0.4805566370487213,
"learning_rate": 7.629597839891209e-05,
"loss": 0.809,
"step": 15610
},
{
"epoch": 0.8071934266962948,
"grad_norm": 0.4554888606071472,
"learning_rate": 7.582434647599476e-05,
"loss": 0.792,
"step": 15620
},
{
"epoch": 0.8077101958555113,
"grad_norm": 0.4604235887527466,
"learning_rate": 7.535562999731686e-05,
"loss": 0.7825,
"step": 15630
},
{
"epoch": 0.8082269650147279,
"grad_norm": 0.47276201844215393,
"learning_rate": 7.488981094074143e-05,
"loss": 0.7981,
"step": 15640
},
{
"epoch": 0.8087437341739445,
"grad_norm": 0.46937987208366394,
"learning_rate": 7.442687139553729e-05,
"loss": 0.7825,
"step": 15650
},
{
"epoch": 0.8092605033331611,
"grad_norm": 0.44667670130729675,
"learning_rate": 7.396679356169044e-05,
"loss": 0.7788,
"step": 15660
},
{
"epoch": 0.8097772724923776,
"grad_norm": 0.4452296197414398,
"learning_rate": 7.35095597492196e-05,
"loss": 0.7962,
"step": 15670
},
{
"epoch": 0.8102940416515942,
"grad_norm": 0.47155633568763733,
"learning_rate": 7.3055152377496e-05,
"loss": 0.7937,
"step": 15680
},
{
"epoch": 0.8108108108108109,
"grad_norm": 0.4572817087173462,
"learning_rate": 7.260355397456748e-05,
"loss": 0.7911,
"step": 15690
},
{
"epoch": 0.8113275799700274,
"grad_norm": 0.4582803547382355,
"learning_rate": 7.21547471764867e-05,
"loss": 0.7832,
"step": 15700
},
{
"epoch": 0.811844349129244,
"grad_norm": 0.45184165239334106,
"learning_rate": 7.170871472664335e-05,
"loss": 0.7896,
"step": 15710
},
{
"epoch": 0.8123611182884606,
"grad_norm": 0.462866872549057,
"learning_rate": 7.126543947510089e-05,
"loss": 0.8053,
"step": 15720
},
{
"epoch": 0.8128778874476771,
"grad_norm": 0.4350687265396118,
"learning_rate": 7.082490437793685e-05,
"loss": 0.7901,
"step": 15730
},
{
"epoch": 0.8133946566068937,
"grad_norm": 0.48868757486343384,
"learning_rate": 7.03870924965877e-05,
"loss": 0.7932,
"step": 15740
},
{
"epoch": 0.8139114257661103,
"grad_norm": 0.4378123879432678,
"learning_rate": 6.995198699719745e-05,
"loss": 0.8041,
"step": 15750
},
{
"epoch": 0.8144281949253268,
"grad_norm": 0.43519341945648193,
"learning_rate": 6.95195711499705e-05,
"loss": 0.7868,
"step": 15760
},
{
"epoch": 0.8149449640845434,
"grad_norm": 0.434491366147995,
"learning_rate": 6.908982832852821e-05,
"loss": 0.7872,
"step": 15770
},
{
"epoch": 0.81546173324376,
"grad_norm": 0.44694221019744873,
"learning_rate": 6.86627420092698e-05,
"loss": 0.7804,
"step": 15780
},
{
"epoch": 0.8159785024029765,
"grad_norm": 0.4496343731880188,
"learning_rate": 6.823829577073686e-05,
"loss": 0.7805,
"step": 15790
},
{
"epoch": 0.8164952715621931,
"grad_norm": 0.4403352737426758,
"learning_rate": 6.781647329298209e-05,
"loss": 0.7783,
"step": 15800
},
{
"epoch": 0.8170120407214098,
"grad_norm": 0.43307387828826904,
"learning_rate": 6.739725835694167e-05,
"loss": 0.7883,
"step": 15810
},
{
"epoch": 0.8175288098806264,
"grad_norm": 0.4405989646911621,
"learning_rate": 6.698063484381174e-05,
"loss": 0.7945,
"step": 15820
},
{
"epoch": 0.8180455790398429,
"grad_norm": 0.46816104650497437,
"learning_rate": 6.656658673442854e-05,
"loss": 0.7719,
"step": 15830
},
{
"epoch": 0.8185623481990595,
"grad_norm": 0.4712413251399994,
"learning_rate": 6.615509810865257e-05,
"loss": 0.8033,
"step": 15840
},
{
"epoch": 0.8190791173582761,
"grad_norm": 0.45156368613243103,
"learning_rate": 6.574615314475637e-05,
"loss": 0.7981,
"step": 15850
},
{
"epoch": 0.8195958865174926,
"grad_norm": 0.44122111797332764,
"learning_rate": 6.533973611881624e-05,
"loss": 0.7945,
"step": 15860
},
{
"epoch": 0.8201126556767092,
"grad_norm": 0.4496499001979828,
"learning_rate": 6.493583140410763e-05,
"loss": 0.7858,
"step": 15870
},
{
"epoch": 0.8206294248359258,
"grad_norm": 0.4501078128814697,
"learning_rate": 6.453442347050426e-05,
"loss": 0.7928,
"step": 15880
},
{
"epoch": 0.8211461939951423,
"grad_norm": 0.4360281825065613,
"learning_rate": 6.413549688388107e-05,
"loss": 0.787,
"step": 15890
},
{
"epoch": 0.8216629631543589,
"grad_norm": 0.4398462176322937,
"learning_rate": 6.37390363055207e-05,
"loss": 0.7736,
"step": 15900
},
{
"epoch": 0.8221797323135756,
"grad_norm": 0.44592639803886414,
"learning_rate": 6.334502649152376e-05,
"loss": 0.7869,
"step": 15910
},
{
"epoch": 0.822696501472792,
"grad_norm": 0.44563406705856323,
"learning_rate": 6.295345229222268e-05,
"loss": 0.7859,
"step": 15920
},
{
"epoch": 0.8232132706320087,
"grad_norm": 0.46638575196266174,
"learning_rate": 6.256429865159924e-05,
"loss": 0.7921,
"step": 15930
},
{
"epoch": 0.8237300397912253,
"grad_norm": 0.458056777715683,
"learning_rate": 6.217755060670557e-05,
"loss": 0.7799,
"step": 15940
},
{
"epoch": 0.8242468089504419,
"grad_norm": 0.4988017976284027,
"learning_rate": 6.1793193287089e-05,
"loss": 0.7771,
"step": 15950
},
{
"epoch": 0.8247635781096584,
"grad_norm": 0.44715121388435364,
"learning_rate": 6.141121191422011e-05,
"loss": 0.7974,
"step": 15960
},
{
"epoch": 0.825280347268875,
"grad_norm": 0.45090383291244507,
"learning_rate": 6.1031591800924596e-05,
"loss": 0.7683,
"step": 15970
},
{
"epoch": 0.8257971164280916,
"grad_norm": 0.43011826276779175,
"learning_rate": 6.0654318350818545e-05,
"loss": 0.7791,
"step": 15980
},
{
"epoch": 0.8263138855873081,
"grad_norm": 0.4606122672557831,
"learning_rate": 6.027937705774713e-05,
"loss": 0.7998,
"step": 15990
},
{
"epoch": 0.8268306547465247,
"grad_norm": 0.4207383096218109,
"learning_rate": 5.9906753505226956e-05,
"loss": 0.7785,
"step": 16000
},
{
"epoch": 0.8273474239057413,
"grad_norm": 0.4336974620819092,
"learning_rate": 5.953643336589173e-05,
"loss": 0.7834,
"step": 16010
},
{
"epoch": 0.8278641930649578,
"grad_norm": 0.4548156261444092,
"learning_rate": 5.916840240094121e-05,
"loss": 0.7922,
"step": 16020
},
{
"epoch": 0.8283809622241745,
"grad_norm": 0.43436485528945923,
"learning_rate": 5.880264645959399e-05,
"loss": 0.7804,
"step": 16030
},
{
"epoch": 0.8288977313833911,
"grad_norm": 0.4377012252807617,
"learning_rate": 5.843915147854316e-05,
"loss": 0.7718,
"step": 16040
},
{
"epoch": 0.8294145005426076,
"grad_norm": 0.46145206689834595,
"learning_rate": 5.807790348141579e-05,
"loss": 0.7888,
"step": 16050
},
{
"epoch": 0.8299312697018242,
"grad_norm": 0.444749116897583,
"learning_rate": 5.771888857823527e-05,
"loss": 0.7978,
"step": 16060
},
{
"epoch": 0.8304480388610408,
"grad_norm": 0.4541518986225128,
"learning_rate": 5.736209296488757e-05,
"loss": 0.7849,
"step": 16070
},
{
"epoch": 0.8309648080202574,
"grad_norm": 0.43136441707611084,
"learning_rate": 5.7007502922590154e-05,
"loss": 0.7924,
"step": 16080
},
{
"epoch": 0.8314815771794739,
"grad_norm": 0.4634501338005066,
"learning_rate": 5.665510481736475e-05,
"loss": 0.7966,
"step": 16090
},
{
"epoch": 0.8319983463386905,
"grad_norm": 0.45138517022132874,
"learning_rate": 5.63048850995129e-05,
"loss": 0.783,
"step": 16100
},
{
"epoch": 0.8325151154979071,
"grad_norm": 0.45926496386528015,
"learning_rate": 5.59568303030952e-05,
"loss": 0.7903,
"step": 16110
},
{
"epoch": 0.8330318846571236,
"grad_norm": 0.4217846691608429,
"learning_rate": 5.561092704541337e-05,
"loss": 0.765,
"step": 16120
},
{
"epoch": 0.8335486538163402,
"grad_norm": 0.46820348501205444,
"learning_rate": 5.526716202649569e-05,
"loss": 0.7917,
"step": 16130
},
{
"epoch": 0.8340654229755569,
"grad_norm": 0.45810696482658386,
"learning_rate": 5.492552202858579e-05,
"loss": 0.7771,
"step": 16140
},
{
"epoch": 0.8345821921347734,
"grad_norm": 0.45739495754241943,
"learning_rate": 5.458599391563416e-05,
"loss": 0.7949,
"step": 16150
},
{
"epoch": 0.83509896129399,
"grad_norm": 0.45775654911994934,
"learning_rate": 5.4248564632793354e-05,
"loss": 0.7748,
"step": 16160
},
{
"epoch": 0.8356157304532066,
"grad_norm": 0.471780925989151,
"learning_rate": 5.3913221205915764e-05,
"loss": 0.7908,
"step": 16170
},
{
"epoch": 0.8361324996124231,
"grad_norm": 0.4380318522453308,
"learning_rate": 5.3579950741055e-05,
"loss": 0.7871,
"step": 16180
},
{
"epoch": 0.8366492687716397,
"grad_norm": 0.45614588260650635,
"learning_rate": 5.324874042396992e-05,
"loss": 0.7717,
"step": 16190
},
{
"epoch": 0.8371660379308563,
"grad_norm": 0.42838895320892334,
"learning_rate": 5.29195775196321e-05,
"loss": 0.7816,
"step": 16200
},
{
"epoch": 0.8376828070900728,
"grad_norm": 0.47133561968803406,
"learning_rate": 5.259244937173599e-05,
"loss": 0.7732,
"step": 16210
},
{
"epoch": 0.8381995762492894,
"grad_norm": 0.42173993587493896,
"learning_rate": 5.226734340221249e-05,
"loss": 0.7687,
"step": 16220
},
{
"epoch": 0.838716345408506,
"grad_norm": 0.42915183305740356,
"learning_rate": 5.194424711074507e-05,
"loss": 0.7866,
"step": 16230
},
{
"epoch": 0.8392331145677226,
"grad_norm": 0.4370039999485016,
"learning_rate": 5.1623148074289386e-05,
"loss": 0.7855,
"step": 16240
},
{
"epoch": 0.8397498837269392,
"grad_norm": 0.4343273937702179,
"learning_rate": 5.130403394659548e-05,
"loss": 0.7871,
"step": 16250
},
{
"epoch": 0.8402666528861558,
"grad_norm": 0.4628264009952545,
"learning_rate": 5.0986892457733016e-05,
"loss": 0.7929,
"step": 16260
},
{
"epoch": 0.8407834220453724,
"grad_norm": 0.4544295072555542,
"learning_rate": 5.067171141361967e-05,
"loss": 0.7823,
"step": 16270
},
{
"epoch": 0.8413001912045889,
"grad_norm": 0.46135464310646057,
"learning_rate": 5.035847869555207e-05,
"loss": 0.7747,
"step": 16280
},
{
"epoch": 0.8418169603638055,
"grad_norm": 0.44259122014045715,
"learning_rate": 5.004718225974004e-05,
"loss": 0.7836,
"step": 16290
},
{
"epoch": 0.8423337295230221,
"grad_norm": 0.44478118419647217,
"learning_rate": 4.9737810136843286e-05,
"loss": 0.7664,
"step": 16300
},
{
"epoch": 0.8428504986822386,
"grad_norm": 0.44629231095314026,
"learning_rate": 4.943035043151143e-05,
"loss": 0.7906,
"step": 16310
},
{
"epoch": 0.8433672678414552,
"grad_norm": 0.4398927092552185,
"learning_rate": 4.912479132192638e-05,
"loss": 0.7835,
"step": 16320
},
{
"epoch": 0.8438840370006718,
"grad_norm": 0.4557620882987976,
"learning_rate": 4.882112105934801e-05,
"loss": 0.7727,
"step": 16330
},
{
"epoch": 0.8444008061598883,
"grad_norm": 0.45272544026374817,
"learning_rate": 4.851932796766221e-05,
"loss": 0.781,
"step": 16340
},
{
"epoch": 0.8449175753191049,
"grad_norm": 0.44196563959121704,
"learning_rate": 4.821940044293212e-05,
"loss": 0.7867,
"step": 16350
},
{
"epoch": 0.8454343444783216,
"grad_norm": 0.44495323300361633,
"learning_rate": 4.79213269529519e-05,
"loss": 0.7791,
"step": 16360
},
{
"epoch": 0.8459511136375382,
"grad_norm": 0.4298705756664276,
"learning_rate": 4.76250960368032e-05,
"loss": 0.7924,
"step": 16370
},
{
"epoch": 0.8464678827967547,
"grad_norm": 0.4538145065307617,
"learning_rate": 4.7330696304414696e-05,
"loss": 0.801,
"step": 16380
},
{
"epoch": 0.8469846519559713,
"grad_norm": 0.437732458114624,
"learning_rate": 4.703811643612394e-05,
"loss": 0.7953,
"step": 16390
},
{
"epoch": 0.8475014211151879,
"grad_norm": 0.441617876291275,
"learning_rate": 4.674734518224231e-05,
"loss": 0.772,
"step": 16400
},
{
"epoch": 0.8480181902744044,
"grad_norm": 0.42918652296066284,
"learning_rate": 4.645837136262228e-05,
"loss": 0.7839,
"step": 16410
},
{
"epoch": 0.848534959433621,
"grad_norm": 0.44365042448043823,
"learning_rate": 4.617118386622768e-05,
"loss": 0.7774,
"step": 16420
},
{
"epoch": 0.8490517285928376,
"grad_norm": 0.43790024518966675,
"learning_rate": 4.588577165070638e-05,
"loss": 0.7821,
"step": 16430
},
{
"epoch": 0.8495684977520541,
"grad_norm": 0.4523584246635437,
"learning_rate": 4.5602123741965806e-05,
"loss": 0.7689,
"step": 16440
},
{
"epoch": 0.8500852669112707,
"grad_norm": 0.438987135887146,
"learning_rate": 4.5320229233750884e-05,
"loss": 0.7774,
"step": 16450
},
{
"epoch": 0.8506020360704873,
"grad_norm": 0.4385901987552643,
"learning_rate": 4.504007728722478e-05,
"loss": 0.7767,
"step": 16460
},
{
"epoch": 0.8511188052297038,
"grad_norm": 0.44286254048347473,
"learning_rate": 4.4761657130552136e-05,
"loss": 0.7893,
"step": 16470
},
{
"epoch": 0.8516355743889205,
"grad_norm": 0.43227192759513855,
"learning_rate": 4.448495805848479e-05,
"loss": 0.7632,
"step": 16480
},
{
"epoch": 0.8521523435481371,
"grad_norm": 0.4544907510280609,
"learning_rate": 4.420996943195034e-05,
"loss": 0.7812,
"step": 16490
},
{
"epoch": 0.8526691127073536,
"grad_norm": 0.46841660141944885,
"learning_rate": 4.393668067764288e-05,
"loss": 0.7712,
"step": 16500
},
{
"epoch": 0.8531858818665702,
"grad_norm": 0.45919257402420044,
"learning_rate": 4.3665081287616635e-05,
"loss": 0.7757,
"step": 16510
},
{
"epoch": 0.8537026510257868,
"grad_norm": 0.44672319293022156,
"learning_rate": 4.339516081888175e-05,
"loss": 0.7787,
"step": 16520
},
{
"epoch": 0.8542194201850034,
"grad_norm": 0.445287823677063,
"learning_rate": 4.312690889300296e-05,
"loss": 0.7787,
"step": 16530
},
{
"epoch": 0.8547361893442199,
"grad_norm": 0.46268194913864136,
"learning_rate": 4.286031519570033e-05,
"loss": 0.7757,
"step": 16540
},
{
"epoch": 0.8552529585034365,
"grad_norm": 0.434190034866333,
"learning_rate": 4.2595369476452845e-05,
"loss": 0.7733,
"step": 16550
},
{
"epoch": 0.8557697276626531,
"grad_norm": 0.4440845549106598,
"learning_rate": 4.233206154810416e-05,
"loss": 0.7667,
"step": 16560
},
{
"epoch": 0.8562864968218696,
"grad_norm": 0.43531450629234314,
"learning_rate": 4.2070381286470965e-05,
"loss": 0.7712,
"step": 16570
},
{
"epoch": 0.8568032659810862,
"grad_norm": 0.46447721123695374,
"learning_rate": 4.181031862995373e-05,
"loss": 0.7679,
"step": 16580
},
{
"epoch": 0.8573200351403029,
"grad_norm": 0.43442919850349426,
"learning_rate": 4.155186357914973e-05,
"loss": 0.7959,
"step": 16590
},
{
"epoch": 0.8578368042995194,
"grad_norm": 0.4343065917491913,
"learning_rate": 4.129500619646871e-05,
"loss": 0.7829,
"step": 16600
},
{
"epoch": 0.858353573458736,
"grad_norm": 0.4585905969142914,
"learning_rate": 4.103973660575065e-05,
"loss": 0.7786,
"step": 16610
},
{
"epoch": 0.8588703426179526,
"grad_norm": 0.43392133712768555,
"learning_rate": 4.078604499188617e-05,
"loss": 0.7773,
"step": 16620
},
{
"epoch": 0.8593871117771691,
"grad_norm": 0.43312516808509827,
"learning_rate": 4.053392160043896e-05,
"loss": 0.7678,
"step": 16630
},
{
"epoch": 0.8599038809363857,
"grad_norm": 0.4381249248981476,
"learning_rate": 4.028335673727093e-05,
"loss": 0.7724,
"step": 16640
},
{
"epoch": 0.8604206500956023,
"grad_norm": 0.4337814152240753,
"learning_rate": 4.0034340768169274e-05,
"loss": 0.7823,
"step": 16650
},
{
"epoch": 0.8609374192548189,
"grad_norm": 0.4437348246574402,
"learning_rate": 3.978686411847619e-05,
"loss": 0.7926,
"step": 16660
},
{
"epoch": 0.8614541884140354,
"grad_norm": 0.4341773986816406,
"learning_rate": 3.954091727272062e-05,
"loss": 0.7826,
"step": 16670
},
{
"epoch": 0.861970957573252,
"grad_norm": 0.44881367683410645,
"learning_rate": 3.929649077425246e-05,
"loss": 0.7704,
"step": 16680
},
{
"epoch": 0.8624877267324687,
"grad_norm": 0.4502032697200775,
"learning_rate": 3.9053575224878926e-05,
"loss": 0.7816,
"step": 16690
},
{
"epoch": 0.8630044958916852,
"grad_norm": 0.47224000096321106,
"learning_rate": 3.881216128450315e-05,
"loss": 0.7736,
"step": 16700
},
{
"epoch": 0.8635212650509018,
"grad_norm": 0.4375690221786499,
"learning_rate": 3.857223967076515e-05,
"loss": 0.7812,
"step": 16710
},
{
"epoch": 0.8640380342101184,
"grad_norm": 0.4506520926952362,
"learning_rate": 3.833380115868479e-05,
"loss": 0.7993,
"step": 16720
},
{
"epoch": 0.8645548033693349,
"grad_norm": 0.45109614729881287,
"learning_rate": 3.809683658030725e-05,
"loss": 0.7912,
"step": 16730
},
{
"epoch": 0.8650715725285515,
"grad_norm": 0.4557834267616272,
"learning_rate": 3.7861336824350335e-05,
"loss": 0.7775,
"step": 16740
},
{
"epoch": 0.8655883416877681,
"grad_norm": 0.43183183670043945,
"learning_rate": 3.7627292835854304e-05,
"loss": 0.7656,
"step": 16750
},
{
"epoch": 0.8661051108469846,
"grad_norm": 0.43516460061073303,
"learning_rate": 3.7394695615833586e-05,
"loss": 0.7817,
"step": 16760
},
{
"epoch": 0.8666218800062012,
"grad_norm": 0.45719340443611145,
"learning_rate": 3.7163536220930875e-05,
"loss": 0.7886,
"step": 16770
},
{
"epoch": 0.8671386491654178,
"grad_norm": 0.45268991589546204,
"learning_rate": 3.693380576307314e-05,
"loss": 0.7874,
"step": 16780
},
{
"epoch": 0.8676554183246343,
"grad_norm": 0.43802937865257263,
"learning_rate": 3.6705495409130015e-05,
"loss": 0.7802,
"step": 16790
},
{
"epoch": 0.8681721874838509,
"grad_norm": 0.44268324971199036,
"learning_rate": 3.647859638057403e-05,
"loss": 0.7695,
"step": 16800
},
{
"epoch": 0.8686889566430676,
"grad_norm": 0.4444487988948822,
"learning_rate": 3.625309995314319e-05,
"loss": 0.7831,
"step": 16810
},
{
"epoch": 0.8692057258022842,
"grad_norm": 0.4433843493461609,
"learning_rate": 3.602899745650546e-05,
"loss": 0.7795,
"step": 16820
},
{
"epoch": 0.8697224949615007,
"grad_norm": 0.45644548535346985,
"learning_rate": 3.580628027392539e-05,
"loss": 0.7705,
"step": 16830
},
{
"epoch": 0.8702392641207173,
"grad_norm": 0.4484211802482605,
"learning_rate": 3.558493984193286e-05,
"loss": 0.7708,
"step": 16840
},
{
"epoch": 0.8707560332799339,
"grad_norm": 0.46782976388931274,
"learning_rate": 3.536496764999374e-05,
"loss": 0.7723,
"step": 16850
},
{
"epoch": 0.8712728024391504,
"grad_norm": 0.4290997087955475,
"learning_rate": 3.5146355240182734e-05,
"loss": 0.7832,
"step": 16860
},
{
"epoch": 0.871789571598367,
"grad_norm": 0.449011892080307,
"learning_rate": 3.492909420685807e-05,
"loss": 0.786,
"step": 16870
},
{
"epoch": 0.8723063407575836,
"grad_norm": 0.4471029043197632,
"learning_rate": 3.471317619633846e-05,
"loss": 0.7797,
"step": 16880
},
{
"epoch": 0.8728231099168001,
"grad_norm": 0.41699501872062683,
"learning_rate": 3.449859290658173e-05,
"loss": 0.7732,
"step": 16890
},
{
"epoch": 0.8733398790760167,
"grad_norm": 0.42831024527549744,
"learning_rate": 3.428533608686573e-05,
"loss": 0.7711,
"step": 16900
},
{
"epoch": 0.8738566482352333,
"grad_norm": 0.44072601199150085,
"learning_rate": 3.407339753747102e-05,
"loss": 0.7796,
"step": 16910
},
{
"epoch": 0.8743734173944498,
"grad_norm": 0.43595975637435913,
"learning_rate": 3.386276910936564e-05,
"loss": 0.7583,
"step": 16920
},
{
"epoch": 0.8748901865536665,
"grad_norm": 0.47178915143013,
"learning_rate": 3.365344270389179e-05,
"loss": 0.7815,
"step": 16930
},
{
"epoch": 0.8754069557128831,
"grad_norm": 0.4295157790184021,
"learning_rate": 3.344541027245434e-05,
"loss": 0.7664,
"step": 16940
},
{
"epoch": 0.8759237248720997,
"grad_norm": 0.43913745880126953,
"learning_rate": 3.323866381621149e-05,
"loss": 0.767,
"step": 16950
},
{
"epoch": 0.8764404940313162,
"grad_norm": 0.4710383415222168,
"learning_rate": 3.3033195385767116e-05,
"loss": 0.7841,
"step": 16960
},
{
"epoch": 0.8769572631905328,
"grad_norm": 0.4420885443687439,
"learning_rate": 3.282899708086518e-05,
"loss": 0.7809,
"step": 16970
},
{
"epoch": 0.8774740323497494,
"grad_norm": 0.4158540666103363,
"learning_rate": 3.262606105008591e-05,
"loss": 0.7677,
"step": 16980
},
{
"epoch": 0.8779908015089659,
"grad_norm": 0.4570242464542389,
"learning_rate": 3.242437949054398e-05,
"loss": 0.7651,
"step": 16990
},
{
"epoch": 0.8785075706681825,
"grad_norm": 0.4389027953147888,
"learning_rate": 3.2223944647588423e-05,
"loss": 0.7688,
"step": 17000
},
{
"epoch": 0.8790243398273991,
"grad_norm": 0.4603040814399719,
"learning_rate": 3.202474881450452e-05,
"loss": 0.7836,
"step": 17010
},
{
"epoch": 0.8795411089866156,
"grad_norm": 0.43595853447914124,
"learning_rate": 3.18267843322174e-05,
"loss": 0.7632,
"step": 17020
},
{
"epoch": 0.8800578781458323,
"grad_norm": 0.45017024874687195,
"learning_rate": 3.163004358899766e-05,
"loss": 0.7783,
"step": 17030
},
{
"epoch": 0.8805746473050489,
"grad_norm": 0.4486757516860962,
"learning_rate": 3.143451902016862e-05,
"loss": 0.7764,
"step": 17040
},
{
"epoch": 0.8810914164642654,
"grad_norm": 0.44407910108566284,
"learning_rate": 3.124020310781543e-05,
"loss": 0.768,
"step": 17050
},
{
"epoch": 0.881608185623482,
"grad_norm": 0.43660351634025574,
"learning_rate": 3.1047088380496114e-05,
"loss": 0.7758,
"step": 17060
},
{
"epoch": 0.8821249547826986,
"grad_norm": 0.4449329674243927,
"learning_rate": 3.0855167412954175e-05,
"loss": 0.7875,
"step": 17070
},
{
"epoch": 0.8826417239419152,
"grad_norm": 0.43863120675086975,
"learning_rate": 3.066443282583321e-05,
"loss": 0.7723,
"step": 17080
},
{
"epoch": 0.8831584931011317,
"grad_norm": 0.4402186870574951,
"learning_rate": 3.0474877285393036e-05,
"loss": 0.7713,
"step": 17090
},
{
"epoch": 0.8836752622603483,
"grad_norm": 0.47123128175735474,
"learning_rate": 3.028649350322787e-05,
"loss": 0.7822,
"step": 17100
},
{
"epoch": 0.8841920314195649,
"grad_norm": 0.44672438502311707,
"learning_rate": 3.0099274235985934e-05,
"loss": 0.7716,
"step": 17110
},
{
"epoch": 0.8847088005787814,
"grad_norm": 0.4311140179634094,
"learning_rate": 2.9913212285091083e-05,
"loss": 0.7735,
"step": 17120
},
{
"epoch": 0.885225569737998,
"grad_norm": 0.42859673500061035,
"learning_rate": 2.9728300496465886e-05,
"loss": 0.768,
"step": 17130
},
{
"epoch": 0.8857423388972147,
"grad_norm": 0.4675106406211853,
"learning_rate": 2.954453176025668e-05,
"loss": 0.7915,
"step": 17140
},
{
"epoch": 0.8862591080564312,
"grad_norm": 0.44611257314682007,
"learning_rate": 2.936189901056014e-05,
"loss": 0.7661,
"step": 17150
},
{
"epoch": 0.8867758772156478,
"grad_norm": 0.4537068009376526,
"learning_rate": 2.918039522515154e-05,
"loss": 0.7732,
"step": 17160
},
{
"epoch": 0.8872926463748644,
"grad_norm": 0.451235830783844,
"learning_rate": 2.900001342521487e-05,
"loss": 0.7765,
"step": 17170
},
{
"epoch": 0.8878094155340809,
"grad_norm": 0.42030608654022217,
"learning_rate": 2.882074667507437e-05,
"loss": 0.764,
"step": 17180
},
{
"epoch": 0.8883261846932975,
"grad_norm": 0.4544169306755066,
"learning_rate": 2.8642588081927974e-05,
"loss": 0.7751,
"step": 17190
},
{
"epoch": 0.8888429538525141,
"grad_norm": 0.4388182759284973,
"learning_rate": 2.8465530795582176e-05,
"loss": 0.7677,
"step": 17200
},
{
"epoch": 0.8893597230117306,
"grad_norm": 0.4463309645652771,
"learning_rate": 2.8289568008188735e-05,
"loss": 0.7847,
"step": 17210
},
{
"epoch": 0.8898764921709472,
"grad_norm": 0.42829135060310364,
"learning_rate": 2.8114692953982826e-05,
"loss": 0.7622,
"step": 17220
},
{
"epoch": 0.8903932613301638,
"grad_norm": 0.4384378492832184,
"learning_rate": 2.7940898909022972e-05,
"loss": 0.7695,
"step": 17230
},
{
"epoch": 0.8909100304893804,
"grad_norm": 0.4420071542263031,
"learning_rate": 2.7768179190932436e-05,
"loss": 0.7716,
"step": 17240
},
{
"epoch": 0.891426799648597,
"grad_norm": 0.4406958818435669,
"learning_rate": 2.7596527158642362e-05,
"loss": 0.772,
"step": 17250
},
{
"epoch": 0.8919435688078136,
"grad_norm": 0.46476542949676514,
"learning_rate": 2.7425936212136382e-05,
"loss": 0.7747,
"step": 17260
},
{
"epoch": 0.8924603379670302,
"grad_norm": 0.44601190090179443,
"learning_rate": 2.7256399792196816e-05,
"loss": 0.7739,
"step": 17270
},
{
"epoch": 0.8929771071262467,
"grad_norm": 0.4409795105457306,
"learning_rate": 2.7087911380152546e-05,
"loss": 0.7703,
"step": 17280
},
{
"epoch": 0.8934938762854633,
"grad_norm": 0.4447353780269623,
"learning_rate": 2.6920464497628288e-05,
"loss": 0.7713,
"step": 17290
},
{
"epoch": 0.8940106454446799,
"grad_norm": 0.42424049973487854,
"learning_rate": 2.6754052706295595e-05,
"loss": 0.7662,
"step": 17300
},
{
"epoch": 0.8945274146038964,
"grad_norm": 0.4320373237133026,
"learning_rate": 2.6588669607625194e-05,
"loss": 0.764,
"step": 17310
},
{
"epoch": 0.895044183763113,
"grad_norm": 0.4584170877933502,
"learning_rate": 2.6424308842641074e-05,
"loss": 0.7697,
"step": 17320
},
{
"epoch": 0.8955609529223296,
"grad_norm": 0.4255240261554718,
"learning_rate": 2.6260964091675873e-05,
"loss": 0.7638,
"step": 17330
},
{
"epoch": 0.8960777220815461,
"grad_norm": 0.4410153329372406,
"learning_rate": 2.6098629074128e-05,
"loss": 0.7722,
"step": 17340
},
{
"epoch": 0.8965944912407627,
"grad_norm": 0.4603617787361145,
"learning_rate": 2.593729754822004e-05,
"loss": 0.7764,
"step": 17350
},
{
"epoch": 0.8971112603999793,
"grad_norm": 0.4616399109363556,
"learning_rate": 2.5776963310758847e-05,
"loss": 0.7828,
"step": 17360
},
{
"epoch": 0.897628029559196,
"grad_norm": 0.4478990435600281,
"learning_rate": 2.5617620196896944e-05,
"loss": 0.7677,
"step": 17370
},
{
"epoch": 0.8981447987184125,
"grad_norm": 0.4245089292526245,
"learning_rate": 2.545926207989558e-05,
"loss": 0.7751,
"step": 17380
},
{
"epoch": 0.8986615678776291,
"grad_norm": 0.4588530957698822,
"learning_rate": 2.530188287088909e-05,
"loss": 0.7735,
"step": 17390
},
{
"epoch": 0.8991783370368457,
"grad_norm": 0.4587204158306122,
"learning_rate": 2.5145476518650782e-05,
"loss": 0.7804,
"step": 17400
},
{
"epoch": 0.8996951061960622,
"grad_norm": 0.4349258244037628,
"learning_rate": 2.499003700936031e-05,
"loss": 0.78,
"step": 17410
},
{
"epoch": 0.9002118753552788,
"grad_norm": 0.46240687370300293,
"learning_rate": 2.4835558366372383e-05,
"loss": 0.7741,
"step": 17420
},
{
"epoch": 0.9007286445144954,
"grad_norm": 0.43434906005859375,
"learning_rate": 2.4682034649987037e-05,
"loss": 0.7757,
"step": 17430
},
{
"epoch": 0.9012454136737119,
"grad_norm": 0.45485690236091614,
"learning_rate": 2.4529459957221164e-05,
"loss": 0.7614,
"step": 17440
},
{
"epoch": 0.9017621828329285,
"grad_norm": 0.451511025428772,
"learning_rate": 2.4377828421581636e-05,
"loss": 0.775,
"step": 17450
},
{
"epoch": 0.9022789519921451,
"grad_norm": 0.44211798906326294,
"learning_rate": 2.422713421283965e-05,
"loss": 0.7715,
"step": 17460
},
{
"epoch": 0.9027957211513616,
"grad_norm": 0.43941619992256165,
"learning_rate": 2.4077371536806647e-05,
"loss": 0.7762,
"step": 17470
},
{
"epoch": 0.9033124903105783,
"grad_norm": 0.4729272723197937,
"learning_rate": 2.392853463511143e-05,
"loss": 0.7889,
"step": 17480
},
{
"epoch": 0.9038292594697949,
"grad_norm": 0.45001113414764404,
"learning_rate": 2.3780617784978833e-05,
"loss": 0.7644,
"step": 17490
},
{
"epoch": 0.9043460286290114,
"grad_norm": 0.44931286573410034,
"learning_rate": 2.3633615299009652e-05,
"loss": 0.7628,
"step": 17500
},
{
"epoch": 0.904862797788228,
"grad_norm": 0.43167644739151,
"learning_rate": 2.348752152496193e-05,
"loss": 0.7707,
"step": 17510
},
{
"epoch": 0.9053795669474446,
"grad_norm": 0.4542749226093292,
"learning_rate": 2.33423308455337e-05,
"loss": 0.7687,
"step": 17520
},
{
"epoch": 0.9058963361066612,
"grad_norm": 0.4356542229652405,
"learning_rate": 2.319803767814693e-05,
"loss": 0.7656,
"step": 17530
},
{
"epoch": 0.9064131052658777,
"grad_norm": 0.4345816373825073,
"learning_rate": 2.305463647473293e-05,
"loss": 0.7564,
"step": 17540
},
{
"epoch": 0.9069298744250943,
"grad_norm": 0.4554193317890167,
"learning_rate": 2.291212172151897e-05,
"loss": 0.7659,
"step": 17550
},
{
"epoch": 0.9074466435843109,
"grad_norm": 0.4463479518890381,
"learning_rate": 2.2770487938816346e-05,
"loss": 0.7608,
"step": 17560
},
{
"epoch": 0.9079634127435274,
"grad_norm": 0.4824206829071045,
"learning_rate": 2.262972968080962e-05,
"loss": 0.7768,
"step": 17570
},
{
"epoch": 0.908480181902744,
"grad_norm": 0.4427326023578644,
"learning_rate": 2.248984153534727e-05,
"loss": 0.7791,
"step": 17580
},
{
"epoch": 0.9089969510619607,
"grad_norm": 0.4576285779476166,
"learning_rate": 2.2350818123733565e-05,
"loss": 0.7788,
"step": 17590
},
{
"epoch": 0.9095137202211772,
"grad_norm": 0.40807288885116577,
"learning_rate": 2.2212654100521793e-05,
"loss": 0.7733,
"step": 17600
},
{
"epoch": 0.9100304893803938,
"grad_norm": 0.4429195821285248,
"learning_rate": 2.20753441533087e-05,
"loss": 0.796,
"step": 17610
},
{
"epoch": 0.9105472585396104,
"grad_norm": 0.4344060719013214,
"learning_rate": 2.19388830025302e-05,
"loss": 0.7661,
"step": 17620
},
{
"epoch": 0.9110640276988269,
"grad_norm": 0.4657835364341736,
"learning_rate": 2.180326540125846e-05,
"loss": 0.7738,
"step": 17630
},
{
"epoch": 0.9115807968580435,
"grad_norm": 0.4533781111240387,
"learning_rate": 2.166848613500005e-05,
"loss": 0.7719,
"step": 17640
},
{
"epoch": 0.9120975660172601,
"grad_norm": 0.43933114409446716,
"learning_rate": 2.1534540021495556e-05,
"loss": 0.769,
"step": 17650
},
{
"epoch": 0.9126143351764767,
"grad_norm": 0.439761221408844,
"learning_rate": 2.140142191052022e-05,
"loss": 0.7698,
"step": 17660
},
{
"epoch": 0.9131311043356932,
"grad_norm": 0.471292644739151,
"learning_rate": 2.1269126683685998e-05,
"loss": 0.7586,
"step": 17670
},
{
"epoch": 0.9136478734949098,
"grad_norm": 0.45629554986953735,
"learning_rate": 2.1137649254244677e-05,
"loss": 0.794,
"step": 17680
},
{
"epoch": 0.9141646426541264,
"grad_norm": 0.4637652039527893,
"learning_rate": 2.1006984566892386e-05,
"loss": 0.7757,
"step": 17690
},
{
"epoch": 0.914681411813343,
"grad_norm": 0.4626142382621765,
"learning_rate": 2.087712759757512e-05,
"loss": 0.7778,
"step": 17700
},
{
"epoch": 0.9151981809725596,
"grad_norm": 0.4568713903427124,
"learning_rate": 2.074807335329564e-05,
"loss": 0.7972,
"step": 17710
},
{
"epoch": 0.9157149501317762,
"grad_norm": 0.43964695930480957,
"learning_rate": 2.061981687192147e-05,
"loss": 0.7651,
"step": 17720
},
{
"epoch": 0.9162317192909927,
"grad_norm": 0.45957452058792114,
"learning_rate": 2.0492353221994066e-05,
"loss": 0.7744,
"step": 17730
},
{
"epoch": 0.9167484884502093,
"grad_norm": 0.42849215865135193,
"learning_rate": 2.0365677502539268e-05,
"loss": 0.7602,
"step": 17740
},
{
"epoch": 0.9172652576094259,
"grad_norm": 0.4392319619655609,
"learning_rate": 2.0239784842878798e-05,
"loss": 0.7822,
"step": 17750
},
{
"epoch": 0.9177820267686424,
"grad_norm": 0.43897444009780884,
"learning_rate": 2.011467040244303e-05,
"loss": 0.7793,
"step": 17760
},
{
"epoch": 0.918298795927859,
"grad_norm": 0.4271240532398224,
"learning_rate": 1.9990329370584816e-05,
"loss": 0.7727,
"step": 17770
},
{
"epoch": 0.9188155650870756,
"grad_norm": 0.43358883261680603,
"learning_rate": 1.9866756966394584e-05,
"loss": 0.7884,
"step": 17780
},
{
"epoch": 0.9193323342462921,
"grad_norm": 0.4576852023601532,
"learning_rate": 1.9743948438516452e-05,
"loss": 0.7845,
"step": 17790
},
{
"epoch": 0.9198491034055087,
"grad_norm": 0.4521750211715698,
"learning_rate": 1.962189906496559e-05,
"loss": 0.7652,
"step": 17800
},
{
"epoch": 0.9203658725647254,
"grad_norm": 0.4462205469608307,
"learning_rate": 1.9500604152946586e-05,
"loss": 0.7748,
"step": 17810
},
{
"epoch": 0.920882641723942,
"grad_norm": 0.4531271457672119,
"learning_rate": 1.9380059038673104e-05,
"loss": 0.7843,
"step": 17820
},
{
"epoch": 0.9213994108831585,
"grad_norm": 0.4446341097354889,
"learning_rate": 1.9260259087188497e-05,
"loss": 0.7529,
"step": 17830
},
{
"epoch": 0.9219161800423751,
"grad_norm": 0.4507541060447693,
"learning_rate": 1.9141199692187586e-05,
"loss": 0.7641,
"step": 17840
},
{
"epoch": 0.9224329492015917,
"grad_norm": 0.4495556056499481,
"learning_rate": 1.9022876275839615e-05,
"loss": 0.7679,
"step": 17850
},
{
"epoch": 0.9229497183608082,
"grad_norm": 0.448811799287796,
"learning_rate": 1.890528428861213e-05,
"loss": 0.7744,
"step": 17860
},
{
"epoch": 0.9234664875200248,
"grad_norm": 0.45697128772735596,
"learning_rate": 1.8788419209096178e-05,
"loss": 0.7723,
"step": 17870
},
{
"epoch": 0.9239832566792414,
"grad_norm": 0.43319204449653625,
"learning_rate": 1.8672276543832325e-05,
"loss": 0.7901,
"step": 17880
},
{
"epoch": 0.9245000258384579,
"grad_norm": 0.4573897123336792,
"learning_rate": 1.855685182713799e-05,
"loss": 0.7739,
"step": 17890
},
{
"epoch": 0.9250167949976745,
"grad_norm": 0.4467730224132538,
"learning_rate": 1.8442140620935673e-05,
"loss": 0.7709,
"step": 17900
},
{
"epoch": 0.9255335641568911,
"grad_norm": 0.4632819592952728,
"learning_rate": 1.8328138514582353e-05,
"loss": 0.7597,
"step": 17910
},
{
"epoch": 0.9260503333161076,
"grad_norm": 0.45948299765586853,
"learning_rate": 1.821484112469986e-05,
"loss": 0.7795,
"step": 17920
},
{
"epoch": 0.9265671024753243,
"grad_norm": 0.464005708694458,
"learning_rate": 1.810224409500637e-05,
"loss": 0.7693,
"step": 17930
},
{
"epoch": 0.9270838716345409,
"grad_norm": 0.4494501054286957,
"learning_rate": 1.79903430961489e-05,
"loss": 0.7754,
"step": 17940
},
{
"epoch": 0.9276006407937575,
"grad_norm": 0.4453310966491699,
"learning_rate": 1.7879133825536803e-05,
"loss": 0.7703,
"step": 17950
},
{
"epoch": 0.928117409952974,
"grad_norm": 0.4534304141998291,
"learning_rate": 1.7768612007176403e-05,
"loss": 0.7694,
"step": 17960
},
{
"epoch": 0.9286341791121906,
"grad_norm": 0.42768940329551697,
"learning_rate": 1.7658773391506503e-05,
"loss": 0.7753,
"step": 17970
},
{
"epoch": 0.9291509482714072,
"grad_norm": 0.4579961597919464,
"learning_rate": 1.754961375523509e-05,
"loss": 0.7756,
"step": 17980
},
{
"epoch": 0.9296677174306237,
"grad_norm": 0.43378955125808716,
"learning_rate": 1.744112890117683e-05,
"loss": 0.7584,
"step": 17990
},
{
"epoch": 0.9301844865898403,
"grad_norm": 0.4437185823917389,
"learning_rate": 1.7333314658091796e-05,
"loss": 0.7636,
"step": 18000
},
{
"epoch": 0.9307012557490569,
"grad_norm": 0.4335078299045563,
"learning_rate": 1.7226166880525008e-05,
"loss": 0.7676,
"step": 18010
},
{
"epoch": 0.9312180249082734,
"grad_norm": 0.4542897343635559,
"learning_rate": 1.711968144864709e-05,
"loss": 0.7743,
"step": 18020
},
{
"epoch": 0.93173479406749,
"grad_norm": 0.46580132842063904,
"learning_rate": 1.7013854268095815e-05,
"loss": 0.7722,
"step": 18030
},
{
"epoch": 0.9322515632267067,
"grad_norm": 0.4515324532985687,
"learning_rate": 1.6908681269818735e-05,
"loss": 0.7711,
"step": 18040
},
{
"epoch": 0.9327683323859232,
"grad_norm": 0.4366278350353241,
"learning_rate": 1.6804158409916664e-05,
"loss": 0.7707,
"step": 18050
},
{
"epoch": 0.9332851015451398,
"grad_norm": 0.45202723145484924,
"learning_rate": 1.6700281669488236e-05,
"loss": 0.7733,
"step": 18060
},
{
"epoch": 0.9338018707043564,
"grad_norm": 0.4829843044281006,
"learning_rate": 1.6597047054475375e-05,
"loss": 0.7772,
"step": 18070
},
{
"epoch": 0.934318639863573,
"grad_norm": 0.45102638006210327,
"learning_rate": 1.6494450595509677e-05,
"loss": 0.7736,
"step": 18080
},
{
"epoch": 0.9348354090227895,
"grad_norm": 0.43405377864837646,
"learning_rate": 1.639248834775986e-05,
"loss": 0.7655,
"step": 18090
},
{
"epoch": 0.9353521781820061,
"grad_norm": 0.44487160444259644,
"learning_rate": 1.6291156390780006e-05,
"loss": 0.7617,
"step": 18100
},
{
"epoch": 0.9358689473412227,
"grad_norm": 0.4330504238605499,
"learning_rate": 1.6190450828358913e-05,
"loss": 0.7771,
"step": 18110
},
{
"epoch": 0.9363857165004392,
"grad_norm": 0.44895511865615845,
"learning_rate": 1.6090367788370184e-05,
"loss": 0.7787,
"step": 18120
},
{
"epoch": 0.9369024856596558,
"grad_norm": 0.4521077275276184,
"learning_rate": 1.599090342262343e-05,
"loss": 0.7599,
"step": 18130
},
{
"epoch": 0.9374192548188724,
"grad_norm": 0.4501364529132843,
"learning_rate": 1.589205390671625e-05,
"loss": 0.7611,
"step": 18140
},
{
"epoch": 0.937936023978089,
"grad_norm": 0.45777976512908936,
"learning_rate": 1.5793815439887217e-05,
"loss": 0.7609,
"step": 18150
},
{
"epoch": 0.9384527931373056,
"grad_norm": 0.4469406306743622,
"learning_rate": 1.569618424486971e-05,
"loss": 0.7669,
"step": 18160
},
{
"epoch": 0.9389695622965222,
"grad_norm": 0.44795021414756775,
"learning_rate": 1.5599156567746714e-05,
"loss": 0.7748,
"step": 18170
},
{
"epoch": 0.9394863314557387,
"grad_norm": 0.46077170968055725,
"learning_rate": 1.5502728677806457e-05,
"loss": 0.7829,
"step": 18180
},
{
"epoch": 0.9400031006149553,
"grad_norm": 0.4519754946231842,
"learning_rate": 1.5406896867398952e-05,
"loss": 0.7608,
"step": 18190
},
{
"epoch": 0.9405198697741719,
"grad_norm": 0.43412908911705017,
"learning_rate": 1.5311657451793483e-05,
"loss": 0.7739,
"step": 18200
},
{
"epoch": 0.9410366389333884,
"grad_norm": 0.44264018535614014,
"learning_rate": 1.5217006769036868e-05,
"loss": 0.7754,
"step": 18210
},
{
"epoch": 0.941553408092605,
"grad_norm": 0.42187464237213135,
"learning_rate": 1.5122941179812719e-05,
"loss": 0.7649,
"step": 18220
},
{
"epoch": 0.9420701772518216,
"grad_norm": 0.44390153884887695,
"learning_rate": 1.5029457067301455e-05,
"loss": 0.759,
"step": 18230
},
{
"epoch": 0.9425869464110382,
"grad_norm": 0.43942004442214966,
"learning_rate": 1.4936550837041282e-05,
"loss": 0.7693,
"step": 18240
},
{
"epoch": 0.9431037155702547,
"grad_norm": 0.44910815358161926,
"learning_rate": 1.4844218916789941e-05,
"loss": 0.7672,
"step": 18250
},
{
"epoch": 0.9436204847294714,
"grad_norm": 0.4458234906196594,
"learning_rate": 1.4752457756387405e-05,
"loss": 0.7841,
"step": 18260
},
{
"epoch": 0.944137253888688,
"grad_norm": 0.42799797654151917,
"learning_rate": 1.4661263827619318e-05,
"loss": 0.7717,
"step": 18270
},
{
"epoch": 0.9446540230479045,
"grad_norm": 0.4394701421260834,
"learning_rate": 1.4570633624081393e-05,
"loss": 0.7702,
"step": 18280
},
{
"epoch": 0.9451707922071211,
"grad_norm": 0.44984373450279236,
"learning_rate": 1.4480563661044558e-05,
"loss": 0.7719,
"step": 18290
},
{
"epoch": 0.9456875613663377,
"grad_norm": 0.446482390165329,
"learning_rate": 1.4391050475320961e-05,
"loss": 0.7572,
"step": 18300
},
{
"epoch": 0.9462043305255542,
"grad_norm": 0.4424509108066559,
"learning_rate": 1.4302090625130843e-05,
"loss": 0.7773,
"step": 18310
},
{
"epoch": 0.9467210996847708,
"grad_norm": 0.4587627649307251,
"learning_rate": 1.4213680689970162e-05,
"loss": 0.7723,
"step": 18320
},
{
"epoch": 0.9472378688439874,
"grad_norm": 0.4332590699195862,
"learning_rate": 1.4125817270479119e-05,
"loss": 0.7649,
"step": 18330
},
{
"epoch": 0.9477546380032039,
"grad_norm": 0.4457739591598511,
"learning_rate": 1.4038496988311402e-05,
"loss": 0.7722,
"step": 18340
},
{
"epoch": 0.9482714071624205,
"grad_norm": 0.4352693557739258,
"learning_rate": 1.3951716486004345e-05,
"loss": 0.7592,
"step": 18350
},
{
"epoch": 0.9487881763216371,
"grad_norm": 0.44573667645454407,
"learning_rate": 1.3865472426849772e-05,
"loss": 0.7637,
"step": 18360
},
{
"epoch": 0.9493049454808538,
"grad_norm": 0.4508999288082123,
"learning_rate": 1.3779761494765763e-05,
"loss": 0.7627,
"step": 18370
},
{
"epoch": 0.9498217146400703,
"grad_norm": 0.46261972188949585,
"learning_rate": 1.3694580394169099e-05,
"loss": 0.7798,
"step": 18380
},
{
"epoch": 0.9503384837992869,
"grad_norm": 0.446575790643692,
"learning_rate": 1.360992584984858e-05,
"loss": 0.7636,
"step": 18390
},
{
"epoch": 0.9508552529585035,
"grad_norm": 0.4478476941585541,
"learning_rate": 1.3525794606839085e-05,
"loss": 0.7757,
"step": 18400
},
{
"epoch": 0.95137202211772,
"grad_norm": 0.4484612047672272,
"learning_rate": 1.3442183430296398e-05,
"loss": 0.7695,
"step": 18410
},
{
"epoch": 0.9518887912769366,
"grad_norm": 0.45452138781547546,
"learning_rate": 1.3359089105372866e-05,
"loss": 0.7659,
"step": 18420
},
{
"epoch": 0.9524055604361532,
"grad_norm": 0.4534998834133148,
"learning_rate": 1.3276508437093752e-05,
"loss": 0.763,
"step": 18430
},
{
"epoch": 0.9529223295953697,
"grad_norm": 0.43683722615242004,
"learning_rate": 1.3194438250234418e-05,
"loss": 0.7744,
"step": 18440
},
{
"epoch": 0.9534390987545863,
"grad_norm": 0.4494810998439789,
"learning_rate": 1.3112875389198208e-05,
"loss": 0.7645,
"step": 18450
},
{
"epoch": 0.9539558679138029,
"grad_norm": 0.449897825717926,
"learning_rate": 1.3031816717895151e-05,
"loss": 0.7641,
"step": 18460
},
{
"epoch": 0.9544726370730194,
"grad_norm": 0.4382020831108093,
"learning_rate": 1.2951259119621336e-05,
"loss": 0.7748,
"step": 18470
},
{
"epoch": 0.954989406232236,
"grad_norm": 0.46431413292884827,
"learning_rate": 1.2871199496939121e-05,
"loss": 0.7683,
"step": 18480
},
{
"epoch": 0.9555061753914527,
"grad_norm": 0.4337891936302185,
"learning_rate": 1.2791634771557991e-05,
"loss": 0.7561,
"step": 18490
},
{
"epoch": 0.9560229445506692,
"grad_norm": 0.46482157707214355,
"learning_rate": 1.2712561884216234e-05,
"loss": 0.7601,
"step": 18500
},
{
"epoch": 0.9565397137098858,
"grad_norm": 0.4410005211830139,
"learning_rate": 1.2633977794563303e-05,
"loss": 0.773,
"step": 18510
},
{
"epoch": 0.9570564828691024,
"grad_norm": 0.46581384539604187,
"learning_rate": 1.2555879481042893e-05,
"loss": 0.7753,
"step": 18520
},
{
"epoch": 0.957573252028319,
"grad_norm": 0.45101165771484375,
"learning_rate": 1.2478263940776792e-05,
"loss": 0.7647,
"step": 18530
},
{
"epoch": 0.9580900211875355,
"grad_norm": 0.44979819655418396,
"learning_rate": 1.2401128189449399e-05,
"loss": 0.775,
"step": 18540
},
{
"epoch": 0.9586067903467521,
"grad_norm": 0.4470668435096741,
"learning_rate": 1.2324469261193e-05,
"loss": 0.7579,
"step": 18550
},
{
"epoch": 0.9591235595059687,
"grad_norm": 0.4402695596218109,
"learning_rate": 1.2248284208473693e-05,
"loss": 0.7793,
"step": 18560
},
{
"epoch": 0.9596403286651852,
"grad_norm": 0.4400414526462555,
"learning_rate": 1.2172570101978107e-05,
"loss": 0.7725,
"step": 18570
},
{
"epoch": 0.9601570978244018,
"grad_norm": 0.43797457218170166,
"learning_rate": 1.2097324030500717e-05,
"loss": 0.7474,
"step": 18580
},
{
"epoch": 0.9606738669836185,
"grad_norm": 0.47379326820373535,
"learning_rate": 1.2022543100831949e-05,
"loss": 0.7644,
"step": 18590
},
{
"epoch": 0.961190636142835,
"grad_norm": 0.4277331829071045,
"learning_rate": 1.1948224437646907e-05,
"loss": 0.7698,
"step": 18600
},
{
"epoch": 0.9617074053020516,
"grad_norm": 0.46481338143348694,
"learning_rate": 1.1874365183394848e-05,
"loss": 0.7575,
"step": 18610
},
{
"epoch": 0.9622241744612682,
"grad_norm": 0.4436621367931366,
"learning_rate": 1.1800962498189266e-05,
"loss": 0.7714,
"step": 18620
},
{
"epoch": 0.9627409436204847,
"grad_norm": 0.44922277331352234,
"learning_rate": 1.1728013559698744e-05,
"loss": 0.7711,
"step": 18630
},
{
"epoch": 0.9632577127797013,
"grad_norm": 0.4406448006629944,
"learning_rate": 1.1655515563038412e-05,
"loss": 0.7645,
"step": 18640
},
{
"epoch": 0.9637744819389179,
"grad_norm": 0.4575316607952118,
"learning_rate": 1.1583465720662092e-05,
"loss": 0.7774,
"step": 18650
},
{
"epoch": 0.9642912510981345,
"grad_norm": 0.44259268045425415,
"learning_rate": 1.1511861262255142e-05,
"loss": 0.7791,
"step": 18660
},
{
"epoch": 0.964808020257351,
"grad_norm": 0.43396565318107605,
"learning_rate": 1.14406994346279e-05,
"loss": 0.7552,
"step": 18670
},
{
"epoch": 0.9653247894165676,
"grad_norm": 0.4611850082874298,
"learning_rate": 1.1369977501609877e-05,
"loss": 0.7747,
"step": 18680
},
{
"epoch": 0.9658415585757842,
"grad_norm": 0.4555375277996063,
"learning_rate": 1.129969274394449e-05,
"loss": 0.7726,
"step": 18690
},
{
"epoch": 0.9663583277350007,
"grad_norm": 0.4663475453853607,
"learning_rate": 1.1229842459184562e-05,
"loss": 0.7596,
"step": 18700
},
{
"epoch": 0.9668750968942174,
"grad_norm": 0.45513424277305603,
"learning_rate": 1.1160423961588368e-05,
"loss": 0.7813,
"step": 18710
},
{
"epoch": 0.967391866053434,
"grad_norm": 0.4629857838153839,
"learning_rate": 1.1091434582016413e-05,
"loss": 0.7668,
"step": 18720
},
{
"epoch": 0.9679086352126505,
"grad_norm": 0.45282307267189026,
"learning_rate": 1.1022871667828753e-05,
"loss": 0.7543,
"step": 18730
},
{
"epoch": 0.9684254043718671,
"grad_norm": 0.4608106315135956,
"learning_rate": 1.0954732582783043e-05,
"loss": 0.7588,
"step": 18740
},
{
"epoch": 0.9689421735310837,
"grad_norm": 0.44871219992637634,
"learning_rate": 1.088701470693316e-05,
"loss": 0.7681,
"step": 18750
},
{
"epoch": 0.9694589426903002,
"grad_norm": 0.4576722979545593,
"learning_rate": 1.081971543652845e-05,
"loss": 0.7618,
"step": 18760
},
{
"epoch": 0.9699757118495168,
"grad_norm": 0.4332127571105957,
"learning_rate": 1.0752832183913647e-05,
"loss": 0.7586,
"step": 18770
},
{
"epoch": 0.9704924810087334,
"grad_norm": 0.44485628604888916,
"learning_rate": 1.0686362377429339e-05,
"loss": 0.7737,
"step": 18780
},
{
"epoch": 0.9710092501679499,
"grad_norm": 0.45990100502967834,
"learning_rate": 1.0620303461313126e-05,
"loss": 0.7679,
"step": 18790
},
{
"epoch": 0.9715260193271665,
"grad_norm": 0.4547218084335327,
"learning_rate": 1.0554652895601313e-05,
"loss": 0.7559,
"step": 18800
},
{
"epoch": 0.9720427884863831,
"grad_norm": 0.43457552790641785,
"learning_rate": 1.0489408156031289e-05,
"loss": 0.7512,
"step": 18810
},
{
"epoch": 0.9725595576455998,
"grad_norm": 0.44039562344551086,
"learning_rate": 1.0424566733944429e-05,
"loss": 0.7791,
"step": 18820
},
{
"epoch": 0.9730763268048163,
"grad_norm": 0.4435688257217407,
"learning_rate": 1.0360126136189671e-05,
"loss": 0.7738,
"step": 18830
},
{
"epoch": 0.9735930959640329,
"grad_norm": 0.4358065128326416,
"learning_rate": 1.0296083885027623e-05,
"loss": 0.7595,
"step": 18840
},
{
"epoch": 0.9741098651232495,
"grad_norm": 0.4542253613471985,
"learning_rate": 1.0232437518035322e-05,
"loss": 0.7802,
"step": 18850
},
{
"epoch": 0.974626634282466,
"grad_norm": 0.4499568045139313,
"learning_rate": 1.0169184588011541e-05,
"loss": 0.7556,
"step": 18860
},
{
"epoch": 0.9751434034416826,
"grad_norm": 0.42469751834869385,
"learning_rate": 1.0106322662882686e-05,
"loss": 0.7747,
"step": 18870
},
{
"epoch": 0.9756601726008992,
"grad_norm": 0.45162233710289,
"learning_rate": 1.00438493256093e-05,
"loss": 0.7716,
"step": 18880
},
{
"epoch": 0.9761769417601157,
"grad_norm": 0.45597076416015625,
"learning_rate": 9.981762174093112e-06,
"loss": 0.7779,
"step": 18890
},
{
"epoch": 0.9766937109193323,
"grad_norm": 0.4463193714618683,
"learning_rate": 9.920058821084695e-06,
"loss": 0.7686,
"step": 18900
},
{
"epoch": 0.9772104800785489,
"grad_norm": 0.4148988425731659,
"learning_rate": 9.858736894091644e-06,
"loss": 0.753,
"step": 18910
},
{
"epoch": 0.9777272492377654,
"grad_norm": 0.4257926940917969,
"learning_rate": 9.797794035287406e-06,
"loss": 0.7675,
"step": 18920
},
{
"epoch": 0.978244018396982,
"grad_norm": 0.4566889703273773,
"learning_rate": 9.737227901420558e-06,
"loss": 0.7674,
"step": 18930
},
{
"epoch": 0.9787607875561987,
"grad_norm": 0.46036675572395325,
"learning_rate": 9.677036163724766e-06,
"loss": 0.7701,
"step": 18940
},
{
"epoch": 0.9792775567154153,
"grad_norm": 0.4719618260860443,
"learning_rate": 9.617216507829204e-06,
"loss": 0.7577,
"step": 18950
},
{
"epoch": 0.9797943258746318,
"grad_norm": 0.45223793387413025,
"learning_rate": 9.557766633669592e-06,
"loss": 0.7618,
"step": 18960
},
{
"epoch": 0.9803110950338484,
"grad_norm": 0.44620633125305176,
"learning_rate": 9.498684255399747e-06,
"loss": 0.7623,
"step": 18970
},
{
"epoch": 0.980827864193065,
"grad_norm": 0.4350356459617615,
"learning_rate": 9.439967101303683e-06,
"loss": 0.7659,
"step": 18980
},
{
"epoch": 0.9813446333522815,
"grad_norm": 0.434857040643692,
"learning_rate": 9.381612913708292e-06,
"loss": 0.7637,
"step": 18990
},
{
"epoch": 0.9818614025114981,
"grad_norm": 0.44825971126556396,
"learning_rate": 9.323619448896502e-06,
"loss": 0.766,
"step": 19000
},
{
"epoch": 0.9823781716707147,
"grad_norm": 0.4420020282268524,
"learning_rate": 9.26598447702104e-06,
"loss": 0.7644,
"step": 19010
},
{
"epoch": 0.9828949408299312,
"grad_norm": 0.44582831859588623,
"learning_rate": 9.208705782018656e-06,
"loss": 0.7606,
"step": 19020
},
{
"epoch": 0.9834117099891478,
"grad_norm": 0.4383075535297394,
"learning_rate": 9.151781161524964e-06,
"loss": 0.7662,
"step": 19030
},
{
"epoch": 0.9839284791483645,
"grad_norm": 0.4672369062900543,
"learning_rate": 9.095208426789703e-06,
"loss": 0.7623,
"step": 19040
},
{
"epoch": 0.984445248307581,
"grad_norm": 0.4448625445365906,
"learning_rate": 9.03898540259264e-06,
"loss": 0.7767,
"step": 19050
},
{
"epoch": 0.9849620174667976,
"grad_norm": 0.45743006467819214,
"learning_rate": 8.983109927159886e-06,
"loss": 0.7655,
"step": 19060
},
{
"epoch": 0.9854787866260142,
"grad_norm": 0.4571949243545532,
"learning_rate": 8.927579852080794e-06,
"loss": 0.7569,
"step": 19070
},
{
"epoch": 0.9859955557852308,
"grad_norm": 0.4542441666126251,
"learning_rate": 8.872393042225366e-06,
"loss": 0.7726,
"step": 19080
},
{
"epoch": 0.9865123249444473,
"grad_norm": 0.4544001817703247,
"learning_rate": 8.817547375662121e-06,
"loss": 0.7624,
"step": 19090
},
{
"epoch": 0.9870290941036639,
"grad_norm": 0.44613394141197205,
"learning_rate": 8.763040743576555e-06,
"loss": 0.7729,
"step": 19100
},
{
"epoch": 0.9875458632628805,
"grad_norm": 0.4503871202468872,
"learning_rate": 8.708871050190002e-06,
"loss": 0.7619,
"step": 19110
},
{
"epoch": 0.988062632422097,
"grad_norm": 0.45252034068107605,
"learning_rate": 8.65503621267911e-06,
"loss": 0.7617,
"step": 19120
},
{
"epoch": 0.9885794015813136,
"grad_norm": 0.4656429886817932,
"learning_rate": 8.601534161095704e-06,
"loss": 0.7733,
"step": 19130
},
{
"epoch": 0.9890961707405302,
"grad_norm": 0.44941556453704834,
"learning_rate": 8.548362838287236e-06,
"loss": 0.765,
"step": 19140
},
{
"epoch": 0.9896129398997467,
"grad_norm": 0.4554784893989563,
"learning_rate": 8.495520199817657e-06,
"loss": 0.7708,
"step": 19150
},
{
"epoch": 0.9901297090589634,
"grad_norm": 0.44851189851760864,
"learning_rate": 8.443004213888836e-06,
"loss": 0.7548,
"step": 19160
},
{
"epoch": 0.99064647821818,
"grad_norm": 0.43213942646980286,
"learning_rate": 8.390812861262414e-06,
"loss": 0.7583,
"step": 19170
},
{
"epoch": 0.9911632473773965,
"grad_norm": 0.4359610676765442,
"learning_rate": 8.33894413518218e-06,
"loss": 0.7451,
"step": 19180
},
{
"epoch": 0.9916800165366131,
"grad_norm": 0.4492233693599701,
"learning_rate": 8.287396041296902e-06,
"loss": 0.7648,
"step": 19190
},
{
"epoch": 0.9921967856958297,
"grad_norm": 0.45256808400154114,
"learning_rate": 8.236166597583653e-06,
"loss": 0.781,
"step": 19200
},
{
"epoch": 0.9927135548550462,
"grad_norm": 0.45061782002449036,
"learning_rate": 8.185253834271597e-06,
"loss": 0.7828,
"step": 19210
},
{
"epoch": 0.9932303240142628,
"grad_norm": 0.43763041496276855,
"learning_rate": 8.134655793766237e-06,
"loss": 0.7523,
"step": 19220
},
{
"epoch": 0.9937470931734794,
"grad_norm": 0.4337799847126007,
"learning_rate": 8.084370530574186e-06,
"loss": 0.7738,
"step": 19230
},
{
"epoch": 0.994263862332696,
"grad_norm": 0.45650362968444824,
"learning_rate": 8.034396111228312e-06,
"loss": 0.7676,
"step": 19240
},
{
"epoch": 0.9947806314919125,
"grad_norm": 0.458556205034256,
"learning_rate": 7.98473061421344e-06,
"loss": 0.7812,
"step": 19250
},
{
"epoch": 0.9952974006511291,
"grad_norm": 0.4379122853279114,
"learning_rate": 7.935372129892435e-06,
"loss": 0.7653,
"step": 19260
},
{
"epoch": 0.9958141698103458,
"grad_norm": 0.453417032957077,
"learning_rate": 7.886318760432809e-06,
"loss": 0.7701,
"step": 19270
},
{
"epoch": 0.9963309389695623,
"grad_norm": 0.4366815388202667,
"learning_rate": 7.837568619733714e-06,
"loss": 0.7665,
"step": 19280
},
{
"epoch": 0.9968477081287789,
"grad_norm": 0.4635095000267029,
"learning_rate": 7.78911983335346e-06,
"loss": 0.7694,
"step": 19290
},
{
"epoch": 0.9973644772879955,
"grad_norm": 0.4435023069381714,
"learning_rate": 7.740970538437405e-06,
"loss": 0.7689,
"step": 19300
},
{
"epoch": 0.997881246447212,
"grad_norm": 0.432817667722702,
"learning_rate": 7.693118883646362e-06,
"loss": 0.7592,
"step": 19310
},
{
"epoch": 0.9983980156064286,
"grad_norm": 0.45705628395080566,
"learning_rate": 7.64556302908539e-06,
"loss": 0.77,
"step": 19320
},
{
"epoch": 0.9989147847656452,
"grad_norm": 0.45206621289253235,
"learning_rate": 7.598301146233062e-06,
"loss": 0.7665,
"step": 19330
},
{
"epoch": 0.9994315539248617,
"grad_norm": 0.42955172061920166,
"learning_rate": 7.551331417871156e-06,
"loss": 0.7619,
"step": 19340
},
{
"epoch": 0.9999483230840783,
"grad_norm": 0.436574250459671,
"learning_rate": 7.50465203801478e-06,
"loss": 0.7581,
"step": 19350
}
],
"logging_steps": 10,
"max_steps": 19351,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.2816606598299008e+17,
"train_batch_size": 512,
"trial_name": null,
"trial_params": null
}