9b-22 / trainer_state.json
furproxy's picture
Upload folder using huggingface_hub
b27dbda verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 1680,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0035714285714285713,
"grad_norm": 0.3871030807495117,
"learning_rate": 1.1904761904761906e-07,
"loss": 1.9294867515563965,
"step": 2
},
{
"epoch": 0.007142857142857143,
"grad_norm": 0.34607651829719543,
"learning_rate": 3.5714285714285716e-07,
"loss": 1.931689739227295,
"step": 4
},
{
"epoch": 0.010714285714285714,
"grad_norm": 0.3077217638492584,
"learning_rate": 5.952380952380953e-07,
"loss": 1.859986662864685,
"step": 6
},
{
"epoch": 0.014285714285714285,
"grad_norm": 0.26033300161361694,
"learning_rate": 8.333333333333333e-07,
"loss": 1.8296231031417847,
"step": 8
},
{
"epoch": 0.017857142857142856,
"grad_norm": 0.3577536344528198,
"learning_rate": 1.0714285714285714e-06,
"loss": 1.840135097503662,
"step": 10
},
{
"epoch": 0.02142857142857143,
"grad_norm": 0.35149258375167847,
"learning_rate": 1.3095238095238096e-06,
"loss": 1.718151330947876,
"step": 12
},
{
"epoch": 0.025,
"grad_norm": 0.3105311691761017,
"learning_rate": 1.5476190476190479e-06,
"loss": 1.8123761415481567,
"step": 14
},
{
"epoch": 0.02857142857142857,
"grad_norm": 0.3541400134563446,
"learning_rate": 1.7857142857142859e-06,
"loss": 1.801349401473999,
"step": 16
},
{
"epoch": 0.03214285714285714,
"grad_norm": 0.32876938581466675,
"learning_rate": 2.023809523809524e-06,
"loss": 1.8854210376739502,
"step": 18
},
{
"epoch": 0.03571428571428571,
"grad_norm": 0.9392958283424377,
"learning_rate": 2.261904761904762e-06,
"loss": 1.7024314403533936,
"step": 20
},
{
"epoch": 0.039285714285714285,
"grad_norm": 0.6484195590019226,
"learning_rate": 2.5e-06,
"loss": 1.9459373950958252,
"step": 22
},
{
"epoch": 0.04285714285714286,
"grad_norm": 0.36433079838752747,
"learning_rate": 2.7380952380952387e-06,
"loss": 1.9512709379196167,
"step": 24
},
{
"epoch": 0.04642857142857143,
"grad_norm": 0.4358835220336914,
"learning_rate": 2.9761904761904763e-06,
"loss": 1.7940953969955444,
"step": 26
},
{
"epoch": 0.05,
"grad_norm": 0.506097137928009,
"learning_rate": 3.2142857142857147e-06,
"loss": 1.7999926805496216,
"step": 28
},
{
"epoch": 0.05357142857142857,
"grad_norm": 0.5315778255462646,
"learning_rate": 3.4523809523809528e-06,
"loss": 1.7870306968688965,
"step": 30
},
{
"epoch": 0.05714285714285714,
"grad_norm": 0.30482104420661926,
"learning_rate": 3.690476190476191e-06,
"loss": 1.8913555145263672,
"step": 32
},
{
"epoch": 0.060714285714285714,
"grad_norm": 0.8241702318191528,
"learning_rate": 3.928571428571429e-06,
"loss": 1.9280858039855957,
"step": 34
},
{
"epoch": 0.06428571428571428,
"grad_norm": 0.39840635657310486,
"learning_rate": 4.166666666666667e-06,
"loss": 1.9256908893585205,
"step": 36
},
{
"epoch": 0.06785714285714285,
"grad_norm": 0.33251017332077026,
"learning_rate": 4.404761904761905e-06,
"loss": 1.8829214572906494,
"step": 38
},
{
"epoch": 0.07142857142857142,
"grad_norm": 0.49388226866722107,
"learning_rate": 4.642857142857144e-06,
"loss": 1.8666248321533203,
"step": 40
},
{
"epoch": 0.075,
"grad_norm": 0.28926795721054077,
"learning_rate": 4.880952380952381e-06,
"loss": 1.8469940423965454,
"step": 42
},
{
"epoch": 0.07857142857142857,
"grad_norm": 0.317127525806427,
"learning_rate": 5.119047619047619e-06,
"loss": 1.892695426940918,
"step": 44
},
{
"epoch": 0.08214285714285714,
"grad_norm": 0.8169130682945251,
"learning_rate": 5.357142857142857e-06,
"loss": 1.893534541130066,
"step": 46
},
{
"epoch": 0.08571428571428572,
"grad_norm": 0.27684587240219116,
"learning_rate": 5.595238095238096e-06,
"loss": 1.5699528455734253,
"step": 48
},
{
"epoch": 0.08928571428571429,
"grad_norm": 0.5231921076774597,
"learning_rate": 5.833333333333334e-06,
"loss": 1.6496429443359375,
"step": 50
},
{
"epoch": 0.09285714285714286,
"grad_norm": 0.5755372643470764,
"learning_rate": 6.071428571428571e-06,
"loss": 1.6312464475631714,
"step": 52
},
{
"epoch": 0.09642857142857143,
"grad_norm": 0.40994322299957275,
"learning_rate": 6.30952380952381e-06,
"loss": 1.8703556060791016,
"step": 54
},
{
"epoch": 0.1,
"grad_norm": 0.6402392983436584,
"learning_rate": 6.547619047619048e-06,
"loss": 1.7122882604599,
"step": 56
},
{
"epoch": 0.10357142857142858,
"grad_norm": 0.4092760980129242,
"learning_rate": 6.785714285714287e-06,
"loss": 1.7604010105133057,
"step": 58
},
{
"epoch": 0.10714285714285714,
"grad_norm": 0.41417962312698364,
"learning_rate": 7.023809523809524e-06,
"loss": 2.1781420707702637,
"step": 60
},
{
"epoch": 0.11071428571428571,
"grad_norm": 0.6246824264526367,
"learning_rate": 7.261904761904762e-06,
"loss": 1.7982336282730103,
"step": 62
},
{
"epoch": 0.11428571428571428,
"grad_norm": 0.26309502124786377,
"learning_rate": 7.500000000000001e-06,
"loss": 1.696463704109192,
"step": 64
},
{
"epoch": 0.11785714285714285,
"grad_norm": 0.9458585381507874,
"learning_rate": 7.738095238095238e-06,
"loss": 1.7728084325790405,
"step": 66
},
{
"epoch": 0.12142857142857143,
"grad_norm": 0.22862379252910614,
"learning_rate": 7.976190476190477e-06,
"loss": 1.6821340322494507,
"step": 68
},
{
"epoch": 0.125,
"grad_norm": 0.236324280500412,
"learning_rate": 8.214285714285714e-06,
"loss": 1.7681533098220825,
"step": 70
},
{
"epoch": 0.12857142857142856,
"grad_norm": 0.2597522735595703,
"learning_rate": 8.452380952380953e-06,
"loss": 1.8034054040908813,
"step": 72
},
{
"epoch": 0.13214285714285715,
"grad_norm": 0.24487343430519104,
"learning_rate": 8.690476190476192e-06,
"loss": 1.7554086446762085,
"step": 74
},
{
"epoch": 0.1357142857142857,
"grad_norm": 0.22543826699256897,
"learning_rate": 8.92857142857143e-06,
"loss": 1.7456854581832886,
"step": 76
},
{
"epoch": 0.1392857142857143,
"grad_norm": 0.2380058914422989,
"learning_rate": 9.166666666666666e-06,
"loss": 1.7143663167953491,
"step": 78
},
{
"epoch": 0.14285714285714285,
"grad_norm": 0.26500657200813293,
"learning_rate": 9.404761904761905e-06,
"loss": 1.7059998512268066,
"step": 80
},
{
"epoch": 0.14642857142857144,
"grad_norm": 0.2978551387786865,
"learning_rate": 9.642857142857144e-06,
"loss": 1.7792344093322754,
"step": 82
},
{
"epoch": 0.15,
"grad_norm": 0.2930593490600586,
"learning_rate": 9.880952380952381e-06,
"loss": 1.6987429857254028,
"step": 84
},
{
"epoch": 0.15357142857142858,
"grad_norm": 0.4046596884727478,
"learning_rate": 9.999991282010348e-06,
"loss": 1.7894960641860962,
"step": 86
},
{
"epoch": 0.15714285714285714,
"grad_norm": 0.23502953350543976,
"learning_rate": 9.999921538295799e-06,
"loss": 1.749454379081726,
"step": 88
},
{
"epoch": 0.16071428571428573,
"grad_norm": 0.22283266484737396,
"learning_rate": 9.999782051947632e-06,
"loss": 1.686018943786621,
"step": 90
},
{
"epoch": 0.16428571428571428,
"grad_norm": 0.24027639627456665,
"learning_rate": 9.999572825127696e-06,
"loss": 1.480033040046692,
"step": 92
},
{
"epoch": 0.16785714285714284,
"grad_norm": 0.5684676766395569,
"learning_rate": 9.99929386107872e-06,
"loss": 1.675416350364685,
"step": 94
},
{
"epoch": 0.17142857142857143,
"grad_norm": 1.138840675354004,
"learning_rate": 9.998945164124268e-06,
"loss": 1.7155344486236572,
"step": 96
},
{
"epoch": 0.175,
"grad_norm": 0.2664114534854889,
"learning_rate": 9.998526739668664e-06,
"loss": 1.6043933629989624,
"step": 98
},
{
"epoch": 0.17857142857142858,
"grad_norm": 0.28691864013671875,
"learning_rate": 9.998038594196913e-06,
"loss": 1.6187028884887695,
"step": 100
},
{
"epoch": 0.18214285714285713,
"grad_norm": 0.31850922107696533,
"learning_rate": 9.997480735274608e-06,
"loss": 1.5820776224136353,
"step": 102
},
{
"epoch": 0.18571428571428572,
"grad_norm": 0.23401758074760437,
"learning_rate": 9.996853171547794e-06,
"loss": 1.5967426300048828,
"step": 104
},
{
"epoch": 0.18928571428571428,
"grad_norm": 0.23440219461917877,
"learning_rate": 9.996155912742856e-06,
"loss": 1.6334154605865479,
"step": 106
},
{
"epoch": 0.19285714285714287,
"grad_norm": 0.7341821193695068,
"learning_rate": 9.995388969666348e-06,
"loss": 1.598835825920105,
"step": 108
},
{
"epoch": 0.19642857142857142,
"grad_norm": 0.6320663094520569,
"learning_rate": 9.994552354204844e-06,
"loss": 1.6243830919265747,
"step": 110
},
{
"epoch": 0.2,
"grad_norm": 0.5404586791992188,
"learning_rate": 9.993646079324738e-06,
"loss": 1.566571831703186,
"step": 112
},
{
"epoch": 0.20357142857142857,
"grad_norm": 0.5022917985916138,
"learning_rate": 9.992670159072052e-06,
"loss": 1.6408634185791016,
"step": 114
},
{
"epoch": 0.20714285714285716,
"grad_norm": 1.1218639612197876,
"learning_rate": 9.991624608572215e-06,
"loss": 1.7009669542312622,
"step": 116
},
{
"epoch": 0.21071428571428572,
"grad_norm": 0.5181306004524231,
"learning_rate": 9.990509444029833e-06,
"loss": 1.7996366024017334,
"step": 118
},
{
"epoch": 0.21428571428571427,
"grad_norm": 0.43997907638549805,
"learning_rate": 9.98932468272843e-06,
"loss": 1.7554632425308228,
"step": 120
},
{
"epoch": 0.21785714285714286,
"grad_norm": 0.3227292597293854,
"learning_rate": 9.98807034303019e-06,
"loss": 1.473575472831726,
"step": 122
},
{
"epoch": 0.22142857142857142,
"grad_norm": 0.3611178398132324,
"learning_rate": 9.98674644437566e-06,
"loss": 1.594710350036621,
"step": 124
},
{
"epoch": 0.225,
"grad_norm": 0.9151387214660645,
"learning_rate": 9.985353007283464e-06,
"loss": 1.6291745901107788,
"step": 126
},
{
"epoch": 0.22857142857142856,
"grad_norm": 0.2581241726875305,
"learning_rate": 9.983890053349969e-06,
"loss": 1.2893997430801392,
"step": 128
},
{
"epoch": 0.23214285714285715,
"grad_norm": 0.5861591100692749,
"learning_rate": 9.982357605248963e-06,
"loss": 1.0495647192001343,
"step": 130
},
{
"epoch": 0.2357142857142857,
"grad_norm": 0.46270960569381714,
"learning_rate": 9.980755686731296e-06,
"loss": 1.3306972980499268,
"step": 132
},
{
"epoch": 0.2392857142857143,
"grad_norm": 0.36067521572113037,
"learning_rate": 9.979084322624518e-06,
"loss": 1.5336247682571411,
"step": 134
},
{
"epoch": 0.24285714285714285,
"grad_norm": 0.9336586594581604,
"learning_rate": 9.977343538832486e-06,
"loss": 1.7042999267578125,
"step": 136
},
{
"epoch": 0.24642857142857144,
"grad_norm": 0.387260377407074,
"learning_rate": 9.97553336233497e-06,
"loss": 1.396690011024475,
"step": 138
},
{
"epoch": 0.25,
"grad_norm": 0.4640398919582367,
"learning_rate": 9.973653821187233e-06,
"loss": 1.3623416423797607,
"step": 140
},
{
"epoch": 0.25357142857142856,
"grad_norm": 0.33568593859672546,
"learning_rate": 9.971704944519593e-06,
"loss": 1.3992865085601807,
"step": 142
},
{
"epoch": 0.2571428571428571,
"grad_norm": 0.24492622911930084,
"learning_rate": 9.969686762536973e-06,
"loss": 1.444324016571045,
"step": 144
},
{
"epoch": 0.26071428571428573,
"grad_norm": 0.8304792642593384,
"learning_rate": 9.967599306518438e-06,
"loss": 1.3990877866744995,
"step": 146
},
{
"epoch": 0.2642857142857143,
"grad_norm": 0.623303234577179,
"learning_rate": 9.965442608816704e-06,
"loss": 1.4563076496124268,
"step": 148
},
{
"epoch": 0.26785714285714285,
"grad_norm": 0.31338322162628174,
"learning_rate": 9.963216702857635e-06,
"loss": 1.6392706632614136,
"step": 150
},
{
"epoch": 0.2714285714285714,
"grad_norm": 0.3007861375808716,
"learning_rate": 9.96092162313973e-06,
"loss": 1.5057697296142578,
"step": 152
},
{
"epoch": 0.275,
"grad_norm": 0.15701599419116974,
"learning_rate": 9.958557405233593e-06,
"loss": 1.4597502946853638,
"step": 154
},
{
"epoch": 0.2785714285714286,
"grad_norm": 0.2882039248943329,
"learning_rate": 9.956124085781366e-06,
"loss": 1.3839119672775269,
"step": 156
},
{
"epoch": 0.28214285714285714,
"grad_norm": 0.3199823498725891,
"learning_rate": 9.953621702496178e-06,
"loss": 1.6068451404571533,
"step": 158
},
{
"epoch": 0.2857142857142857,
"grad_norm": 0.24365948140621185,
"learning_rate": 9.951050294161548e-06,
"loss": 1.7299036979675293,
"step": 160
},
{
"epoch": 0.2892857142857143,
"grad_norm": 0.46145617961883545,
"learning_rate": 9.948409900630787e-06,
"loss": 1.3489717245101929,
"step": 162
},
{
"epoch": 0.29285714285714287,
"grad_norm": 0.15912453830242157,
"learning_rate": 9.945700562826394e-06,
"loss": 1.5043880939483643,
"step": 164
},
{
"epoch": 0.29642857142857143,
"grad_norm": 0.1797444075345993,
"learning_rate": 9.942922322739395e-06,
"loss": 1.1060163974761963,
"step": 166
},
{
"epoch": 0.3,
"grad_norm": 0.2711468040943146,
"learning_rate": 9.940075223428718e-06,
"loss": 1.8668510913848877,
"step": 168
},
{
"epoch": 0.30357142857142855,
"grad_norm": 0.1544935554265976,
"learning_rate": 9.93715930902051e-06,
"loss": 1.2808830738067627,
"step": 170
},
{
"epoch": 0.30714285714285716,
"grad_norm": 0.37536972761154175,
"learning_rate": 9.934174624707459e-06,
"loss": 1.4175796508789062,
"step": 172
},
{
"epoch": 0.3107142857142857,
"grad_norm": 0.22495543956756592,
"learning_rate": 9.931121216748092e-06,
"loss": 1.6282312870025635,
"step": 174
},
{
"epoch": 0.3142857142857143,
"grad_norm": 0.3511154055595398,
"learning_rate": 9.927999132466059e-06,
"loss": 1.635170340538025,
"step": 176
},
{
"epoch": 0.31785714285714284,
"grad_norm": 0.47558754682540894,
"learning_rate": 9.924808420249404e-06,
"loss": 1.563542127609253,
"step": 178
},
{
"epoch": 0.32142857142857145,
"grad_norm": 0.6490837335586548,
"learning_rate": 9.921549129549799e-06,
"loss": 2.066225290298462,
"step": 180
},
{
"epoch": 0.325,
"grad_norm": 0.37414857745170593,
"learning_rate": 9.918221310881797e-06,
"loss": 1.2984635829925537,
"step": 182
},
{
"epoch": 0.32857142857142857,
"grad_norm": 0.24293118715286255,
"learning_rate": 9.91482501582204e-06,
"loss": 1.560595989227295,
"step": 184
},
{
"epoch": 0.33214285714285713,
"grad_norm": 1.0313069820404053,
"learning_rate": 9.91136029700846e-06,
"loss": 1.6615456342697144,
"step": 186
},
{
"epoch": 0.3357142857142857,
"grad_norm": 0.2365736961364746,
"learning_rate": 9.907827208139462e-06,
"loss": 1.4550660848617554,
"step": 188
},
{
"epoch": 0.3392857142857143,
"grad_norm": 0.48783159255981445,
"learning_rate": 9.904225803973095e-06,
"loss": 1.1695599555969238,
"step": 190
},
{
"epoch": 0.34285714285714286,
"grad_norm": 0.18840378522872925,
"learning_rate": 9.900556140326203e-06,
"loss": 1.175657033920288,
"step": 192
},
{
"epoch": 0.3464285714285714,
"grad_norm": 0.33193379640579224,
"learning_rate": 9.896818274073555e-06,
"loss": 1.5140769481658936,
"step": 194
},
{
"epoch": 0.35,
"grad_norm": 0.23811382055282593,
"learning_rate": 9.893012263146971e-06,
"loss": 1.5519834756851196,
"step": 196
},
{
"epoch": 0.3535714285714286,
"grad_norm": 0.3118128478527069,
"learning_rate": 9.889138166534416e-06,
"loss": 1.6215221881866455,
"step": 198
},
{
"epoch": 0.35714285714285715,
"grad_norm": 0.20358364284038544,
"learning_rate": 9.885196044279095e-06,
"loss": 1.4951940774917603,
"step": 200
},
{
"epoch": 0.3607142857142857,
"grad_norm": 0.33988499641418457,
"learning_rate": 9.881185957478514e-06,
"loss": 1.5101016759872437,
"step": 202
},
{
"epoch": 0.36428571428571427,
"grad_norm": 0.2685701251029968,
"learning_rate": 9.877107968283538e-06,
"loss": 1.3352025747299194,
"step": 204
},
{
"epoch": 0.3678571428571429,
"grad_norm": 0.24243606626987457,
"learning_rate": 9.872962139897426e-06,
"loss": 1.6032525300979614,
"step": 206
},
{
"epoch": 0.37142857142857144,
"grad_norm": 0.2574315369129181,
"learning_rate": 9.86874853657485e-06,
"loss": 1.4210426807403564,
"step": 208
},
{
"epoch": 0.375,
"grad_norm": 0.30428075790405273,
"learning_rate": 9.864467223620908e-06,
"loss": 1.4541680812835693,
"step": 210
},
{
"epoch": 0.37857142857142856,
"grad_norm": 0.27129194140434265,
"learning_rate": 9.860118267390092e-06,
"loss": 1.5869474411010742,
"step": 212
},
{
"epoch": 0.3821428571428571,
"grad_norm": 0.2757408022880554,
"learning_rate": 9.855701735285285e-06,
"loss": 1.4132391214370728,
"step": 214
},
{
"epoch": 0.38571428571428573,
"grad_norm": 0.8043122291564941,
"learning_rate": 9.851217695756694e-06,
"loss": 1.3423351049423218,
"step": 216
},
{
"epoch": 0.3892857142857143,
"grad_norm": 0.3778972327709198,
"learning_rate": 9.846666218300808e-06,
"loss": 1.4252076148986816,
"step": 218
},
{
"epoch": 0.39285714285714285,
"grad_norm": 0.30076470971107483,
"learning_rate": 9.842047373459305e-06,
"loss": 1.477191686630249,
"step": 220
},
{
"epoch": 0.3964285714285714,
"grad_norm": 0.2551064193248749,
"learning_rate": 9.837361232817964e-06,
"loss": 1.4160501956939697,
"step": 222
},
{
"epoch": 0.4,
"grad_norm": 0.3507143557071686,
"learning_rate": 9.832607869005565e-06,
"loss": 1.3853830099105835,
"step": 224
},
{
"epoch": 0.4035714285714286,
"grad_norm": 0.2576788663864136,
"learning_rate": 9.827787355692749e-06,
"loss": 1.5642895698547363,
"step": 226
},
{
"epoch": 0.40714285714285714,
"grad_norm": 0.6864569187164307,
"learning_rate": 9.822899767590884e-06,
"loss": 1.5939396619796753,
"step": 228
},
{
"epoch": 0.4107142857142857,
"grad_norm": 0.290575236082077,
"learning_rate": 9.817945180450902e-06,
"loss": 1.692050814628601,
"step": 230
},
{
"epoch": 0.4142857142857143,
"grad_norm": 0.2678094506263733,
"learning_rate": 9.812923671062139e-06,
"loss": 1.5078585147857666,
"step": 232
},
{
"epoch": 0.41785714285714287,
"grad_norm": 0.2580035924911499,
"learning_rate": 9.80783531725112e-06,
"loss": 1.5001014471054077,
"step": 234
},
{
"epoch": 0.42142857142857143,
"grad_norm": 0.48962509632110596,
"learning_rate": 9.80268019788038e-06,
"loss": 1.30159592628479,
"step": 236
},
{
"epoch": 0.425,
"grad_norm": 0.3317374587059021,
"learning_rate": 9.79745839284722e-06,
"loss": 1.3308159112930298,
"step": 238
},
{
"epoch": 0.42857142857142855,
"grad_norm": 0.40648195147514343,
"learning_rate": 9.792169983082484e-06,
"loss": 1.2273372411727905,
"step": 240
},
{
"epoch": 0.43214285714285716,
"grad_norm": 0.23908843100070953,
"learning_rate": 9.786815050549295e-06,
"loss": 1.3610113859176636,
"step": 242
},
{
"epoch": 0.4357142857142857,
"grad_norm": 0.30702945590019226,
"learning_rate": 9.781393678241787e-06,
"loss": 1.526265025138855,
"step": 244
},
{
"epoch": 0.4392857142857143,
"grad_norm": 0.3442508578300476,
"learning_rate": 9.775905950183821e-06,
"loss": 1.4831691980361938,
"step": 246
},
{
"epoch": 0.44285714285714284,
"grad_norm": 0.2286010980606079,
"learning_rate": 9.770351951427684e-06,
"loss": 1.5686728954315186,
"step": 248
},
{
"epoch": 0.44642857142857145,
"grad_norm": 0.42278486490249634,
"learning_rate": 9.764731768052762e-06,
"loss": 1.583655834197998,
"step": 250
},
{
"epoch": 0.45,
"grad_norm": 0.2032240778207779,
"learning_rate": 9.75904548716422e-06,
"loss": 1.7357392311096191,
"step": 252
},
{
"epoch": 0.45357142857142857,
"grad_norm": 0.6682279706001282,
"learning_rate": 9.753293196891639e-06,
"loss": 1.4943958520889282,
"step": 254
},
{
"epoch": 0.45714285714285713,
"grad_norm": 0.3326359689235687,
"learning_rate": 9.747474986387655e-06,
"loss": 1.4537054300308228,
"step": 256
},
{
"epoch": 0.4607142857142857,
"grad_norm": 0.31363770365715027,
"learning_rate": 9.74159094582658e-06,
"loss": 1.4956998825073242,
"step": 258
},
{
"epoch": 0.4642857142857143,
"grad_norm": 0.6148827075958252,
"learning_rate": 9.735641166402998e-06,
"loss": 1.3277488946914673,
"step": 260
},
{
"epoch": 0.46785714285714286,
"grad_norm": 0.31903398036956787,
"learning_rate": 9.729625740330363e-06,
"loss": 1.49782133102417,
"step": 262
},
{
"epoch": 0.4714285714285714,
"grad_norm": 0.8235952258110046,
"learning_rate": 9.723544760839555e-06,
"loss": 1.460282325744629,
"step": 264
},
{
"epoch": 0.475,
"grad_norm": 0.3165692090988159,
"learning_rate": 9.717398322177442e-06,
"loss": 1.5261378288269043,
"step": 266
},
{
"epoch": 0.4785714285714286,
"grad_norm": 1.6001724004745483,
"learning_rate": 9.71118651960543e-06,
"loss": 1.7769297361373901,
"step": 268
},
{
"epoch": 0.48214285714285715,
"grad_norm": 0.26702550053596497,
"learning_rate": 9.704909449397962e-06,
"loss": 1.3878670930862427,
"step": 270
},
{
"epoch": 0.4857142857142857,
"grad_norm": 0.18522176146507263,
"learning_rate": 9.69856720884105e-06,
"loss": 1.2690881490707397,
"step": 272
},
{
"epoch": 0.48928571428571427,
"grad_norm": 0.40137895941734314,
"learning_rate": 9.692159896230757e-06,
"loss": 1.3622859716415405,
"step": 274
},
{
"epoch": 0.4928571428571429,
"grad_norm": 0.34499719738960266,
"learning_rate": 9.685687610871666e-06,
"loss": 1.6427959203720093,
"step": 276
},
{
"epoch": 0.49642857142857144,
"grad_norm": 0.3400484621524811,
"learning_rate": 9.679150453075357e-06,
"loss": 1.3161296844482422,
"step": 278
},
{
"epoch": 0.5,
"grad_norm": 0.3539294898509979,
"learning_rate": 9.67254852415884e-06,
"loss": 1.2691534757614136,
"step": 280
},
{
"epoch": 0.5035714285714286,
"grad_norm": 0.2683607339859009,
"learning_rate": 9.665881926442994e-06,
"loss": 1.5461015701293945,
"step": 282
},
{
"epoch": 0.5071428571428571,
"grad_norm": 0.3605668246746063,
"learning_rate": 9.659150763250966e-06,
"loss": 1.6314688920974731,
"step": 284
},
{
"epoch": 0.5107142857142857,
"grad_norm": 0.3184402585029602,
"learning_rate": 9.652355138906591e-06,
"loss": 1.518629789352417,
"step": 286
},
{
"epoch": 0.5142857142857142,
"grad_norm": 0.870186984539032,
"learning_rate": 9.645495158732755e-06,
"loss": 1.143850564956665,
"step": 288
},
{
"epoch": 0.5178571428571429,
"grad_norm": 0.33421170711517334,
"learning_rate": 9.638570929049776e-06,
"loss": 1.0234707593917847,
"step": 290
},
{
"epoch": 0.5214285714285715,
"grad_norm": 0.1717844307422638,
"learning_rate": 9.631582557173751e-06,
"loss": 1.4672911167144775,
"step": 292
},
{
"epoch": 0.525,
"grad_norm": 0.9301527738571167,
"learning_rate": 9.624530151414894e-06,
"loss": 1.2730239629745483,
"step": 294
},
{
"epoch": 0.5285714285714286,
"grad_norm": 0.38581225275993347,
"learning_rate": 9.617413821075852e-06,
"loss": 1.2601397037506104,
"step": 296
},
{
"epoch": 0.5321428571428571,
"grad_norm": 0.23976172506809235,
"learning_rate": 9.61023367645002e-06,
"loss": 1.2101945877075195,
"step": 298
},
{
"epoch": 0.5357142857142857,
"grad_norm": 0.22152353823184967,
"learning_rate": 9.602989828819829e-06,
"loss": 1.542162537574768,
"step": 300
},
{
"epoch": 0.5392857142857143,
"grad_norm": 0.4408532381057739,
"learning_rate": 9.595682390455015e-06,
"loss": 1.3136895895004272,
"step": 302
},
{
"epoch": 0.5428571428571428,
"grad_norm": 0.4480395019054413,
"learning_rate": 9.588311474610888e-06,
"loss": 1.1242649555206299,
"step": 304
},
{
"epoch": 0.5464285714285714,
"grad_norm": 0.7074999213218689,
"learning_rate": 9.580877195526564e-06,
"loss": 1.6407079696655273,
"step": 306
},
{
"epoch": 0.55,
"grad_norm": 0.3410518765449524,
"learning_rate": 9.573379668423209e-06,
"loss": 1.3072420358657837,
"step": 308
},
{
"epoch": 0.5535714285714286,
"grad_norm": 0.47393250465393066,
"learning_rate": 9.56581900950225e-06,
"loss": 1.4017832279205322,
"step": 310
},
{
"epoch": 0.5571428571428572,
"grad_norm": 0.6089979410171509,
"learning_rate": 9.558195335943566e-06,
"loss": 1.5297354459762573,
"step": 312
},
{
"epoch": 0.5607142857142857,
"grad_norm": 0.2799089848995209,
"learning_rate": 9.550508765903672e-06,
"loss": 1.043546199798584,
"step": 314
},
{
"epoch": 0.5642857142857143,
"grad_norm": 0.2620464563369751,
"learning_rate": 9.542759418513906e-06,
"loss": 1.6538763046264648,
"step": 316
},
{
"epoch": 0.5678571428571428,
"grad_norm": 0.5144315958023071,
"learning_rate": 9.534947413878556e-06,
"loss": 1.5541188716888428,
"step": 318
},
{
"epoch": 0.5714285714285714,
"grad_norm": 0.29509297013282776,
"learning_rate": 9.52707287307302e-06,
"loss": 1.1971598863601685,
"step": 320
},
{
"epoch": 0.575,
"grad_norm": 0.1929909884929657,
"learning_rate": 9.519135918141913e-06,
"loss": 1.1823662519454956,
"step": 322
},
{
"epoch": 0.5785714285714286,
"grad_norm": 0.48544377088546753,
"learning_rate": 9.511136672097194e-06,
"loss": 1.3313523530960083,
"step": 324
},
{
"epoch": 0.5821428571428572,
"grad_norm": 0.3510501980781555,
"learning_rate": 9.503075258916241e-06,
"loss": 1.3195650577545166,
"step": 326
},
{
"epoch": 0.5857142857142857,
"grad_norm": 0.2727429270744324,
"learning_rate": 9.494951803539942e-06,
"loss": 1.2425987720489502,
"step": 328
},
{
"epoch": 0.5892857142857143,
"grad_norm": 15.424638748168945,
"learning_rate": 9.486766431870752e-06,
"loss": 1.2101187705993652,
"step": 330
},
{
"epoch": 0.5928571428571429,
"grad_norm": 0.2866066098213196,
"learning_rate": 9.478519270770746e-06,
"loss": 1.2784419059753418,
"step": 332
},
{
"epoch": 0.5964285714285714,
"grad_norm": 0.4156343638896942,
"learning_rate": 9.470210448059645e-06,
"loss": 1.583785057067871,
"step": 334
},
{
"epoch": 0.6,
"grad_norm": 0.45487159490585327,
"learning_rate": 9.46184009251285e-06,
"loss": 1.3652830123901367,
"step": 336
},
{
"epoch": 0.6035714285714285,
"grad_norm": 0.32525262236595154,
"learning_rate": 9.453408333859427e-06,
"loss": 1.4697949886322021,
"step": 338
},
{
"epoch": 0.6071428571428571,
"grad_norm": 0.5784197449684143,
"learning_rate": 9.444915302780117e-06,
"loss": 1.3824127912521362,
"step": 340
},
{
"epoch": 0.6107142857142858,
"grad_norm": 0.26421067118644714,
"learning_rate": 9.436361130905288e-06,
"loss": 1.42073655128479,
"step": 342
},
{
"epoch": 0.6142857142857143,
"grad_norm": 0.2380143105983734,
"learning_rate": 9.427745950812917e-06,
"loss": 1.3658424615859985,
"step": 344
},
{
"epoch": 0.6178571428571429,
"grad_norm": 0.43499693274497986,
"learning_rate": 9.41906989602652e-06,
"loss": 1.461742639541626,
"step": 346
},
{
"epoch": 0.6214285714285714,
"grad_norm": 0.400419145822525,
"learning_rate": 9.410333101013086e-06,
"loss": 1.3119421005249023,
"step": 348
},
{
"epoch": 0.625,
"grad_norm": 0.4901754856109619,
"learning_rate": 9.401535701180998e-06,
"loss": 1.2844195365905762,
"step": 350
},
{
"epoch": 0.6285714285714286,
"grad_norm": 0.48951858282089233,
"learning_rate": 9.392677832877932e-06,
"loss": 1.568238615989685,
"step": 352
},
{
"epoch": 0.6321428571428571,
"grad_norm": 0.2112666368484497,
"learning_rate": 9.383759633388737e-06,
"loss": 1.5015143156051636,
"step": 354
},
{
"epoch": 0.6357142857142857,
"grad_norm": 0.2615770101547241,
"learning_rate": 9.374781240933316e-06,
"loss": 1.5211448669433594,
"step": 356
},
{
"epoch": 0.6392857142857142,
"grad_norm": 0.2031329870223999,
"learning_rate": 9.365742794664484e-06,
"loss": 1.3461060523986816,
"step": 358
},
{
"epoch": 0.6428571428571429,
"grad_norm": 0.36589643359184265,
"learning_rate": 9.356644434665804e-06,
"loss": 1.3849568367004395,
"step": 360
},
{
"epoch": 0.6464285714285715,
"grad_norm": 0.3387724757194519,
"learning_rate": 9.347486301949417e-06,
"loss": 1.558565616607666,
"step": 362
},
{
"epoch": 0.65,
"grad_norm": 0.29686594009399414,
"learning_rate": 9.33826853845387e-06,
"loss": 1.3272876739501953,
"step": 364
},
{
"epoch": 0.6535714285714286,
"grad_norm": 0.8538780808448792,
"learning_rate": 9.328991287041892e-06,
"loss": 1.3049917221069336,
"step": 366
},
{
"epoch": 0.6571428571428571,
"grad_norm": 0.2655990719795227,
"learning_rate": 9.319654691498205e-06,
"loss": 1.2668689489364624,
"step": 368
},
{
"epoch": 0.6607142857142857,
"grad_norm": 0.28246378898620605,
"learning_rate": 9.31025889652728e-06,
"loss": 1.4307278394699097,
"step": 370
},
{
"epoch": 0.6642857142857143,
"grad_norm": 0.33245643973350525,
"learning_rate": 9.300804047751093e-06,
"loss": 1.3824753761291504,
"step": 372
},
{
"epoch": 0.6678571428571428,
"grad_norm": 0.21639856696128845,
"learning_rate": 9.291290291706881e-06,
"loss": 1.3373095989227295,
"step": 374
},
{
"epoch": 0.6714285714285714,
"grad_norm": 0.42471569776535034,
"learning_rate": 9.281717775844857e-06,
"loss": 1.2794650793075562,
"step": 376
},
{
"epoch": 0.675,
"grad_norm": 0.5068492293357849,
"learning_rate": 9.272086648525937e-06,
"loss": 1.3947125673294067,
"step": 378
},
{
"epoch": 0.6785714285714286,
"grad_norm": 0.42456403374671936,
"learning_rate": 9.26239705901943e-06,
"loss": 1.4322527647018433,
"step": 380
},
{
"epoch": 0.6821428571428572,
"grad_norm": 0.7322901487350464,
"learning_rate": 9.25264915750073e-06,
"loss": 1.427004337310791,
"step": 382
},
{
"epoch": 0.6857142857142857,
"grad_norm": 0.2247505635023117,
"learning_rate": 9.242843095048987e-06,
"loss": 1.2980873584747314,
"step": 384
},
{
"epoch": 0.6892857142857143,
"grad_norm": 0.28208163380622864,
"learning_rate": 9.232979023644768e-06,
"loss": 1.466817855834961,
"step": 386
},
{
"epoch": 0.6928571428571428,
"grad_norm": 0.4910048544406891,
"learning_rate": 9.223057096167696e-06,
"loss": 1.4608205556869507,
"step": 388
},
{
"epoch": 0.6964285714285714,
"grad_norm": 0.5288735032081604,
"learning_rate": 9.213077466394088e-06,
"loss": 1.3513166904449463,
"step": 390
},
{
"epoch": 0.7,
"grad_norm": 0.304855078458786,
"learning_rate": 9.203040288994566e-06,
"loss": 1.464281678199768,
"step": 392
},
{
"epoch": 0.7035714285714286,
"grad_norm": 0.28660398721694946,
"learning_rate": 9.192945719531662e-06,
"loss": 1.3084968328475952,
"step": 394
},
{
"epoch": 0.7071428571428572,
"grad_norm": 0.5636733770370483,
"learning_rate": 9.182793914457402e-06,
"loss": 1.2844712734222412,
"step": 396
},
{
"epoch": 0.7107142857142857,
"grad_norm": 0.5751602649688721,
"learning_rate": 9.172585031110895e-06,
"loss": 1.5046448707580566,
"step": 398
},
{
"epoch": 0.7142857142857143,
"grad_norm": 0.34700506925582886,
"learning_rate": 9.162319227715877e-06,
"loss": 1.3449612855911255,
"step": 400
},
{
"epoch": 0.7178571428571429,
"grad_norm": 0.34046903252601624,
"learning_rate": 9.151996663378271e-06,
"loss": 1.3594465255737305,
"step": 402
},
{
"epoch": 0.7214285714285714,
"grad_norm": 0.48511913418769836,
"learning_rate": 9.141617498083717e-06,
"loss": 1.5169265270233154,
"step": 404
},
{
"epoch": 0.725,
"grad_norm": 0.26317858695983887,
"learning_rate": 9.131181892695089e-06,
"loss": 1.4639661312103271,
"step": 406
},
{
"epoch": 0.7285714285714285,
"grad_norm": 0.4234665632247925,
"learning_rate": 9.120690008950008e-06,
"loss": 1.4238711595535278,
"step": 408
},
{
"epoch": 0.7321428571428571,
"grad_norm": 0.683773934841156,
"learning_rate": 9.110142009458333e-06,
"loss": 1.2991688251495361,
"step": 410
},
{
"epoch": 0.7357142857142858,
"grad_norm": 0.9090404510498047,
"learning_rate": 9.099538057699643e-06,
"loss": 1.4411964416503906,
"step": 412
},
{
"epoch": 0.7392857142857143,
"grad_norm": 0.42502301931381226,
"learning_rate": 9.08887831802069e-06,
"loss": 1.3963665962219238,
"step": 414
},
{
"epoch": 0.7428571428571429,
"grad_norm": 0.25458428263664246,
"learning_rate": 9.078162955632878e-06,
"loss": 1.3666608333587646,
"step": 416
},
{
"epoch": 0.7464285714285714,
"grad_norm": 0.2778110206127167,
"learning_rate": 9.067392136609672e-06,
"loss": 1.4295861721038818,
"step": 418
},
{
"epoch": 0.75,
"grad_norm": 0.3574320673942566,
"learning_rate": 9.056566027884051e-06,
"loss": 1.4124993085861206,
"step": 420
},
{
"epoch": 0.7535714285714286,
"grad_norm": 0.2570479214191437,
"learning_rate": 9.045684797245902e-06,
"loss": 1.3560070991516113,
"step": 422
},
{
"epoch": 0.7571428571428571,
"grad_norm": 0.3374227285385132,
"learning_rate": 9.034748613339427e-06,
"loss": 1.360439658164978,
"step": 424
},
{
"epoch": 0.7607142857142857,
"grad_norm": 0.25365766882896423,
"learning_rate": 9.023757645660531e-06,
"loss": 1.3708235025405884,
"step": 426
},
{
"epoch": 0.7642857142857142,
"grad_norm": 0.2227737158536911,
"learning_rate": 9.01271206455419e-06,
"loss": 1.3818211555480957,
"step": 428
},
{
"epoch": 0.7678571428571429,
"grad_norm": 0.21550701558589935,
"learning_rate": 9.001612041211817e-06,
"loss": 1.3254315853118896,
"step": 430
},
{
"epoch": 0.7714285714285715,
"grad_norm": 0.5434844493865967,
"learning_rate": 8.9904577476686e-06,
"loss": 1.3340120315551758,
"step": 432
},
{
"epoch": 0.775,
"grad_norm": 0.2289412021636963,
"learning_rate": 8.979249356800846e-06,
"loss": 1.2770015001296997,
"step": 434
},
{
"epoch": 0.7785714285714286,
"grad_norm": 0.25175049901008606,
"learning_rate": 8.967987042323293e-06,
"loss": 1.3385746479034424,
"step": 436
},
{
"epoch": 0.7821428571428571,
"grad_norm": 0.27297094464302063,
"learning_rate": 8.956670978786423e-06,
"loss": 1.2522022724151611,
"step": 438
},
{
"epoch": 0.7857142857142857,
"grad_norm": 0.2057066559791565,
"learning_rate": 8.945301341573757e-06,
"loss": 1.3175703287124634,
"step": 440
},
{
"epoch": 0.7892857142857143,
"grad_norm": 0.16934043169021606,
"learning_rate": 8.93387830689913e-06,
"loss": 1.2785143852233887,
"step": 442
},
{
"epoch": 0.7928571428571428,
"grad_norm": 0.17673851549625397,
"learning_rate": 8.922402051803968e-06,
"loss": 1.311404824256897,
"step": 444
},
{
"epoch": 0.7964285714285714,
"grad_norm": 0.36772605776786804,
"learning_rate": 8.91087275415454e-06,
"loss": 1.27708101272583,
"step": 446
},
{
"epoch": 0.8,
"grad_norm": 0.1414009928703308,
"learning_rate": 8.8992905926392e-06,
"loss": 1.247365117073059,
"step": 448
},
{
"epoch": 0.8035714285714286,
"grad_norm": 0.16844603419303894,
"learning_rate": 8.887655746765625e-06,
"loss": 1.3339194059371948,
"step": 450
},
{
"epoch": 0.8071428571428572,
"grad_norm": 0.4043944180011749,
"learning_rate": 8.875968396858023e-06,
"loss": 1.3012686967849731,
"step": 452
},
{
"epoch": 0.8107142857142857,
"grad_norm": 0.19886070489883423,
"learning_rate": 8.864228724054342e-06,
"loss": 1.2051547765731812,
"step": 454
},
{
"epoch": 0.8142857142857143,
"grad_norm": 0.18143871426582336,
"learning_rate": 8.852436910303466e-06,
"loss": 1.264425277709961,
"step": 456
},
{
"epoch": 0.8178571428571428,
"grad_norm": 0.30469146370887756,
"learning_rate": 8.840593138362395e-06,
"loss": 1.2156575918197632,
"step": 458
},
{
"epoch": 0.8214285714285714,
"grad_norm": 0.19490455090999603,
"learning_rate": 8.828697591793405e-06,
"loss": 1.2579315900802612,
"step": 460
},
{
"epoch": 0.825,
"grad_norm": 0.22966210544109344,
"learning_rate": 8.816750454961206e-06,
"loss": 1.2265636920928955,
"step": 462
},
{
"epoch": 0.8285714285714286,
"grad_norm": 0.4836777448654175,
"learning_rate": 8.804751913030095e-06,
"loss": 1.2515498399734497,
"step": 464
},
{
"epoch": 0.8321428571428572,
"grad_norm": 0.22509177029132843,
"learning_rate": 8.792702151961074e-06,
"loss": 1.2572628259658813,
"step": 466
},
{
"epoch": 0.8357142857142857,
"grad_norm": 0.4269544184207916,
"learning_rate": 8.780601358508966e-06,
"loss": 1.2433445453643799,
"step": 468
},
{
"epoch": 0.8392857142857143,
"grad_norm": 0.19438913464546204,
"learning_rate": 8.768449720219533e-06,
"loss": 1.2479232549667358,
"step": 470
},
{
"epoch": 0.8428571428571429,
"grad_norm": 0.695250391960144,
"learning_rate": 8.75624742542656e-06,
"loss": 1.300042748451233,
"step": 472
},
{
"epoch": 0.8464285714285714,
"grad_norm": 0.35800135135650635,
"learning_rate": 8.743994663248939e-06,
"loss": 1.2871143817901611,
"step": 474
},
{
"epoch": 0.85,
"grad_norm": 0.20253418385982513,
"learning_rate": 8.73169162358774e-06,
"loss": 1.2776912450790405,
"step": 476
},
{
"epoch": 0.8535714285714285,
"grad_norm": 0.203902930021286,
"learning_rate": 8.719338497123258e-06,
"loss": 1.3039164543151855,
"step": 478
},
{
"epoch": 0.8571428571428571,
"grad_norm": 0.24306446313858032,
"learning_rate": 8.706935475312073e-06,
"loss": 1.30210542678833,
"step": 480
},
{
"epoch": 0.8607142857142858,
"grad_norm": 0.2822311520576477,
"learning_rate": 8.694482750384069e-06,
"loss": 1.2630928754806519,
"step": 482
},
{
"epoch": 0.8642857142857143,
"grad_norm": 0.2177450954914093,
"learning_rate": 8.681980515339464e-06,
"loss": 1.2841533422470093,
"step": 484
},
{
"epoch": 0.8678571428571429,
"grad_norm": 0.19454443454742432,
"learning_rate": 8.669428963945815e-06,
"loss": 1.2446175813674927,
"step": 486
},
{
"epoch": 0.8714285714285714,
"grad_norm": 0.161905437707901,
"learning_rate": 8.656828290735013e-06,
"loss": 1.2695343494415283,
"step": 488
},
{
"epoch": 0.875,
"grad_norm": 0.19021154940128326,
"learning_rate": 8.644178691000272e-06,
"loss": 1.2780508995056152,
"step": 490
},
{
"epoch": 0.8785714285714286,
"grad_norm": 0.3725239038467407,
"learning_rate": 8.631480360793095e-06,
"loss": 1.2979791164398193,
"step": 492
},
{
"epoch": 0.8821428571428571,
"grad_norm": 0.5264632701873779,
"learning_rate": 8.61873349692025e-06,
"loss": 1.2810431718826294,
"step": 494
},
{
"epoch": 0.8857142857142857,
"grad_norm": 0.26536062359809875,
"learning_rate": 8.605938296940702e-06,
"loss": 1.2166625261306763,
"step": 496
},
{
"epoch": 0.8892857142857142,
"grad_norm": 0.4096132516860962,
"learning_rate": 8.593094959162565e-06,
"loss": 1.2420190572738647,
"step": 498
},
{
"epoch": 0.8928571428571429,
"grad_norm": 0.4396449327468872,
"learning_rate": 8.58020368264002e-06,
"loss": 1.2754027843475342,
"step": 500
},
{
"epoch": 0.8964285714285715,
"grad_norm": 0.15545235574245453,
"learning_rate": 8.567264667170232e-06,
"loss": 1.3059731721878052,
"step": 502
},
{
"epoch": 0.9,
"grad_norm": 0.25121352076530457,
"learning_rate": 8.554278113290262e-06,
"loss": 1.2766114473342896,
"step": 504
},
{
"epoch": 0.9035714285714286,
"grad_norm": 0.21137557923793793,
"learning_rate": 8.541244222273942e-06,
"loss": 1.258975863456726,
"step": 506
},
{
"epoch": 0.9071428571428571,
"grad_norm": 0.16647249460220337,
"learning_rate": 8.528163196128767e-06,
"loss": 1.2222638130187988,
"step": 508
},
{
"epoch": 0.9107142857142857,
"grad_norm": 0.3039259910583496,
"learning_rate": 8.51503523759277e-06,
"loss": 1.257559895515442,
"step": 510
},
{
"epoch": 0.9142857142857143,
"grad_norm": 0.32180115580558777,
"learning_rate": 8.501860550131361e-06,
"loss": 1.280539631843567,
"step": 512
},
{
"epoch": 0.9178571428571428,
"grad_norm": 0.2822877764701843,
"learning_rate": 8.488639337934188e-06,
"loss": 1.225077509880066,
"step": 514
},
{
"epoch": 0.9214285714285714,
"grad_norm": 0.22444438934326172,
"learning_rate": 8.475371805911975e-06,
"loss": 1.259244441986084,
"step": 516
},
{
"epoch": 0.925,
"grad_norm": 0.17102967202663422,
"learning_rate": 8.462058159693332e-06,
"loss": 1.2512003183364868,
"step": 518
},
{
"epoch": 0.9285714285714286,
"grad_norm": 0.9442085027694702,
"learning_rate": 8.44869860562158e-06,
"loss": 1.2956591844558716,
"step": 520
},
{
"epoch": 0.9321428571428572,
"grad_norm": 0.31264039874076843,
"learning_rate": 8.435293350751545e-06,
"loss": 1.3134222030639648,
"step": 522
},
{
"epoch": 0.9357142857142857,
"grad_norm": 0.20593850314617157,
"learning_rate": 8.421842602846362e-06,
"loss": 1.269896149635315,
"step": 524
},
{
"epoch": 0.9392857142857143,
"grad_norm": 0.24257254600524902,
"learning_rate": 8.408346570374234e-06,
"loss": 1.2887259721755981,
"step": 526
},
{
"epoch": 0.9428571428571428,
"grad_norm": 0.18374580144882202,
"learning_rate": 8.394805462505224e-06,
"loss": 1.2653754949569702,
"step": 528
},
{
"epoch": 0.9464285714285714,
"grad_norm": 0.7440497875213623,
"learning_rate": 8.381219489107992e-06,
"loss": 1.2136163711547852,
"step": 530
},
{
"epoch": 0.95,
"grad_norm": 0.3250195384025574,
"learning_rate": 8.36758886074656e-06,
"loss": 1.233951449394226,
"step": 532
},
{
"epoch": 0.9535714285714286,
"grad_norm": 0.2864832878112793,
"learning_rate": 8.353913788677036e-06,
"loss": 1.2546851634979248,
"step": 534
},
{
"epoch": 0.9571428571428572,
"grad_norm": 0.22155587375164032,
"learning_rate": 8.34019448484435e-06,
"loss": 1.2355575561523438,
"step": 536
},
{
"epoch": 0.9607142857142857,
"grad_norm": 0.19411461055278778,
"learning_rate": 8.326431161878957e-06,
"loss": 1.2437915802001953,
"step": 538
},
{
"epoch": 0.9642857142857143,
"grad_norm": 0.26431798934936523,
"learning_rate": 8.312624033093555e-06,
"loss": 1.2899754047393799,
"step": 540
},
{
"epoch": 0.9678571428571429,
"grad_norm": 0.3181489109992981,
"learning_rate": 8.298773312479767e-06,
"loss": 1.2769360542297363,
"step": 542
},
{
"epoch": 0.9714285714285714,
"grad_norm": 0.2669861912727356,
"learning_rate": 8.284879214704834e-06,
"loss": 1.2913857698440552,
"step": 544
},
{
"epoch": 0.975,
"grad_norm": 0.2932322919368744,
"learning_rate": 8.270941955108281e-06,
"loss": 1.2430675029754639,
"step": 546
},
{
"epoch": 0.9785714285714285,
"grad_norm": 0.3006272614002228,
"learning_rate": 8.256961749698583e-06,
"loss": 1.2453312873840332,
"step": 548
},
{
"epoch": 0.9821428571428571,
"grad_norm": 0.2196272611618042,
"learning_rate": 8.242938815149817e-06,
"loss": 1.2648967504501343,
"step": 550
},
{
"epoch": 0.9857142857142858,
"grad_norm": 0.2562142014503479,
"learning_rate": 8.228873368798304e-06,
"loss": 1.3159946203231812,
"step": 552
},
{
"epoch": 0.9892857142857143,
"grad_norm": 0.26237812638282776,
"learning_rate": 8.214765628639235e-06,
"loss": 1.3476945161819458,
"step": 554
},
{
"epoch": 0.9928571428571429,
"grad_norm": 0.38732582330703735,
"learning_rate": 8.200615813323306e-06,
"loss": 1.9057130813598633,
"step": 556
},
{
"epoch": 0.9964285714285714,
"grad_norm": 0.33351263403892517,
"learning_rate": 8.18642414215331e-06,
"loss": 1.8800382614135742,
"step": 558
},
{
"epoch": 1.0,
"grad_norm": 0.6058505773544312,
"learning_rate": 8.172190835080757e-06,
"loss": 1.8019236326217651,
"step": 560
},
{
"epoch": 1.0035714285714286,
"grad_norm": 0.31470683217048645,
"learning_rate": 8.157916112702452e-06,
"loss": 1.384263277053833,
"step": 562
},
{
"epoch": 1.0071428571428571,
"grad_norm": 0.310624897480011,
"learning_rate": 8.143600196257086e-06,
"loss": 1.3995013236999512,
"step": 564
},
{
"epoch": 1.0107142857142857,
"grad_norm": 0.20878104865550995,
"learning_rate": 8.129243307621791e-06,
"loss": 1.3525418043136597,
"step": 566
},
{
"epoch": 1.0142857142857142,
"grad_norm": 0.2683800160884857,
"learning_rate": 8.114845669308723e-06,
"loss": 1.3207361698150635,
"step": 568
},
{
"epoch": 1.0178571428571428,
"grad_norm": 0.27859288454055786,
"learning_rate": 8.100407504461595e-06,
"loss": 1.3501830101013184,
"step": 570
},
{
"epoch": 1.0214285714285714,
"grad_norm": 0.32225877046585083,
"learning_rate": 8.085929036852236e-06,
"loss": 1.1840941905975342,
"step": 572
},
{
"epoch": 1.025,
"grad_norm": 0.23283155262470245,
"learning_rate": 8.071410490877097e-06,
"loss": 1.2650562524795532,
"step": 574
},
{
"epoch": 1.0285714285714285,
"grad_norm": 0.1705978810787201,
"learning_rate": 8.0568520915538e-06,
"loss": 1.2940489053726196,
"step": 576
},
{
"epoch": 1.032142857142857,
"grad_norm": 0.23754863440990448,
"learning_rate": 8.042254064517642e-06,
"loss": 1.3267643451690674,
"step": 578
},
{
"epoch": 1.0357142857142858,
"grad_norm": 0.46769577264785767,
"learning_rate": 8.027616636018085e-06,
"loss": 1.2288154363632202,
"step": 580
},
{
"epoch": 1.0392857142857144,
"grad_norm": 0.233358234167099,
"learning_rate": 8.012940032915263e-06,
"loss": 1.3615669012069702,
"step": 582
},
{
"epoch": 1.042857142857143,
"grad_norm": 0.2691819369792938,
"learning_rate": 7.998224482676473e-06,
"loss": 1.3021140098571777,
"step": 584
},
{
"epoch": 1.0464285714285715,
"grad_norm": 0.24730414152145386,
"learning_rate": 7.983470213372624e-06,
"loss": 1.2602746486663818,
"step": 586
},
{
"epoch": 1.05,
"grad_norm": 0.2731882929801941,
"learning_rate": 7.96867745367473e-06,
"loss": 1.2430776357650757,
"step": 588
},
{
"epoch": 1.0535714285714286,
"grad_norm": 0.22160141170024872,
"learning_rate": 7.953846432850346e-06,
"loss": 1.2589969635009766,
"step": 590
},
{
"epoch": 1.0571428571428572,
"grad_norm": 0.2917991280555725,
"learning_rate": 7.938977380760024e-06,
"loss": 1.408372402191162,
"step": 592
},
{
"epoch": 1.0607142857142857,
"grad_norm": 0.23420438170433044,
"learning_rate": 7.92407052785375e-06,
"loss": 1.3381731510162354,
"step": 594
},
{
"epoch": 1.0642857142857143,
"grad_norm": 0.19835133850574493,
"learning_rate": 7.909126105167373e-06,
"loss": 1.3641246557235718,
"step": 596
},
{
"epoch": 1.0678571428571428,
"grad_norm": 0.21805885434150696,
"learning_rate": 7.894144344319015e-06,
"loss": 1.2766021490097046,
"step": 598
},
{
"epoch": 1.0714285714285714,
"grad_norm": 0.3379668593406677,
"learning_rate": 7.879125477505495e-06,
"loss": 1.2909208536148071,
"step": 600
},
{
"epoch": 1.075,
"grad_norm": 0.3864686191082001,
"learning_rate": 7.864069737498722e-06,
"loss": 1.259904146194458,
"step": 602
},
{
"epoch": 1.0785714285714285,
"grad_norm": 0.3104611933231354,
"learning_rate": 7.848977357642089e-06,
"loss": 1.3227314949035645,
"step": 604
},
{
"epoch": 1.082142857142857,
"grad_norm": 0.244283065199852,
"learning_rate": 7.833848571846855e-06,
"loss": 1.3027191162109375,
"step": 606
},
{
"epoch": 1.0857142857142856,
"grad_norm": 0.19385835528373718,
"learning_rate": 7.818683614588523e-06,
"loss": 1.0396664142608643,
"step": 608
},
{
"epoch": 1.0892857142857142,
"grad_norm": 0.2750968933105469,
"learning_rate": 7.803482720903206e-06,
"loss": 1.1102863550186157,
"step": 610
},
{
"epoch": 1.092857142857143,
"grad_norm": 0.3333893418312073,
"learning_rate": 7.788246126383977e-06,
"loss": 1.1634554862976074,
"step": 612
},
{
"epoch": 1.0964285714285715,
"grad_norm": 0.28989356756210327,
"learning_rate": 7.77297406717723e-06,
"loss": 1.3986788988113403,
"step": 614
},
{
"epoch": 1.1,
"grad_norm": 0.27835774421691895,
"learning_rate": 7.757666779979008e-06,
"loss": 1.2263062000274658,
"step": 616
},
{
"epoch": 1.1035714285714286,
"grad_norm": 0.2572242021560669,
"learning_rate": 7.74232450203134e-06,
"loss": 1.2180155515670776,
"step": 618
},
{
"epoch": 1.1071428571428572,
"grad_norm": 0.3894072473049164,
"learning_rate": 7.72694747111857e-06,
"loss": 1.478975534439087,
"step": 620
},
{
"epoch": 1.1107142857142858,
"grad_norm": 0.4212060868740082,
"learning_rate": 7.711535925563655e-06,
"loss": 1.3129830360412598,
"step": 622
},
{
"epoch": 1.1142857142857143,
"grad_norm": 0.23659296333789825,
"learning_rate": 7.696090104224492e-06,
"loss": 1.229081392288208,
"step": 624
},
{
"epoch": 1.1178571428571429,
"grad_norm": 0.254404217004776,
"learning_rate": 7.680610246490199e-06,
"loss": 1.2878901958465576,
"step": 626
},
{
"epoch": 1.1214285714285714,
"grad_norm": 0.3570263981819153,
"learning_rate": 7.665096592277415e-06,
"loss": 1.218833088874817,
"step": 628
},
{
"epoch": 1.125,
"grad_norm": 0.27803489565849304,
"learning_rate": 7.649549382026575e-06,
"loss": 1.274793028831482,
"step": 630
},
{
"epoch": 1.1285714285714286,
"grad_norm": 0.2562004327774048,
"learning_rate": 7.633968856698192e-06,
"loss": 1.3318731784820557,
"step": 632
},
{
"epoch": 1.1321428571428571,
"grad_norm": 0.19307534396648407,
"learning_rate": 7.618355257769111e-06,
"loss": 1.2363682985305786,
"step": 634
},
{
"epoch": 1.1357142857142857,
"grad_norm": 0.5484210848808289,
"learning_rate": 7.602708827228779e-06,
"loss": 1.259455680847168,
"step": 636
},
{
"epoch": 1.1392857142857142,
"grad_norm": 0.2351217418909073,
"learning_rate": 7.587029807575482e-06,
"loss": 1.2625541687011719,
"step": 638
},
{
"epoch": 1.1428571428571428,
"grad_norm": 0.5461699962615967,
"learning_rate": 7.571318441812599e-06,
"loss": 1.1984379291534424,
"step": 640
},
{
"epoch": 1.1464285714285714,
"grad_norm": 0.30940407514572144,
"learning_rate": 7.55557497344482e-06,
"loss": 1.3161015510559082,
"step": 642
},
{
"epoch": 1.15,
"grad_norm": 0.32747605443000793,
"learning_rate": 7.539799646474393e-06,
"loss": 1.234968900680542,
"step": 644
},
{
"epoch": 1.1535714285714285,
"grad_norm": 0.2250605821609497,
"learning_rate": 7.523992705397321e-06,
"loss": 1.3490346670150757,
"step": 646
},
{
"epoch": 1.157142857142857,
"grad_norm": 0.3528631925582886,
"learning_rate": 7.508154395199592e-06,
"loss": 1.350324034690857,
"step": 648
},
{
"epoch": 1.1607142857142858,
"grad_norm": 0.247028186917305,
"learning_rate": 7.492284961353361e-06,
"loss": 1.285825252532959,
"step": 650
},
{
"epoch": 1.1642857142857144,
"grad_norm": 0.26968345046043396,
"learning_rate": 7.4763846498131675e-06,
"loss": 1.123679518699646,
"step": 652
},
{
"epoch": 1.167857142857143,
"grad_norm": 0.23967714607715607,
"learning_rate": 7.460453707012107e-06,
"loss": 1.2702839374542236,
"step": 654
},
{
"epoch": 1.1714285714285715,
"grad_norm": 0.9412787556648254,
"learning_rate": 7.444492379858021e-06,
"loss": 1.3307619094848633,
"step": 656
},
{
"epoch": 1.175,
"grad_norm": 0.60057133436203,
"learning_rate": 7.428500915729663e-06,
"loss": 1.218625783920288,
"step": 658
},
{
"epoch": 1.1785714285714286,
"grad_norm": 0.2611408829689026,
"learning_rate": 7.412479562472873e-06,
"loss": 1.1818389892578125,
"step": 660
},
{
"epoch": 1.1821428571428572,
"grad_norm": 0.21901297569274902,
"learning_rate": 7.3964285683967285e-06,
"loss": 1.2105083465576172,
"step": 662
},
{
"epoch": 1.1857142857142857,
"grad_norm": 0.9242513179779053,
"learning_rate": 7.380348182269701e-06,
"loss": 1.2359505891799927,
"step": 664
},
{
"epoch": 1.1892857142857143,
"grad_norm": 0.24152880907058716,
"learning_rate": 7.364238653315795e-06,
"loss": 1.268753170967102,
"step": 666
},
{
"epoch": 1.1928571428571428,
"grad_norm": 2.834768533706665,
"learning_rate": 7.348100231210697e-06,
"loss": 1.2450233697891235,
"step": 668
},
{
"epoch": 1.1964285714285714,
"grad_norm": 0.7332023978233337,
"learning_rate": 7.331933166077886e-06,
"loss": 1.2236673831939697,
"step": 670
},
{
"epoch": 1.2,
"grad_norm": 0.3339300751686096,
"learning_rate": 7.31573770848478e-06,
"loss": 1.1605288982391357,
"step": 672
},
{
"epoch": 1.2035714285714285,
"grad_norm": 0.7548586130142212,
"learning_rate": 7.299514109438835e-06,
"loss": 1.276812195777893,
"step": 674
},
{
"epoch": 1.207142857142857,
"grad_norm": 0.32066163420677185,
"learning_rate": 7.283262620383664e-06,
"loss": 1.2277733087539673,
"step": 676
},
{
"epoch": 1.2107142857142856,
"grad_norm": 0.3439161777496338,
"learning_rate": 7.266983493195133e-06,
"loss": 1.443245768547058,
"step": 678
},
{
"epoch": 1.2142857142857142,
"grad_norm": 0.28881630301475525,
"learning_rate": 7.250676980177468e-06,
"loss": 1.3642569780349731,
"step": 680
},
{
"epoch": 1.217857142857143,
"grad_norm": 0.3376900553703308,
"learning_rate": 7.2343433340593315e-06,
"loss": 1.1232848167419434,
"step": 682
},
{
"epoch": 1.2214285714285715,
"grad_norm": 0.5144054293632507,
"learning_rate": 7.217982807989915e-06,
"loss": 1.2558438777923584,
"step": 684
},
{
"epoch": 1.225,
"grad_norm": 0.25952062010765076,
"learning_rate": 7.201595655535011e-06,
"loss": 1.3395494222640991,
"step": 686
},
{
"epoch": 1.2285714285714286,
"grad_norm": 0.3723627030849457,
"learning_rate": 7.1851821306730876e-06,
"loss": 0.9402600526809692,
"step": 688
},
{
"epoch": 1.2321428571428572,
"grad_norm": 0.3420025110244751,
"learning_rate": 7.168742487791345e-06,
"loss": 0.7468339204788208,
"step": 690
},
{
"epoch": 1.2357142857142858,
"grad_norm": 0.34970328211784363,
"learning_rate": 7.152276981681781e-06,
"loss": 1.0327891111373901,
"step": 692
},
{
"epoch": 1.2392857142857143,
"grad_norm": 0.3740408420562744,
"learning_rate": 7.135785867537235e-06,
"loss": 1.267980694770813,
"step": 694
},
{
"epoch": 1.2428571428571429,
"grad_norm": 0.5309215188026428,
"learning_rate": 7.119269400947437e-06,
"loss": 1.4097453355789185,
"step": 696
},
{
"epoch": 1.2464285714285714,
"grad_norm": 0.3004949390888214,
"learning_rate": 7.1027278378950486e-06,
"loss": 1.2045501470565796,
"step": 698
},
{
"epoch": 1.25,
"grad_norm": 0.6161743402481079,
"learning_rate": 7.086161434751684e-06,
"loss": 1.0838185548782349,
"step": 700
},
{
"epoch": 1.2535714285714286,
"grad_norm": 0.22701780498027802,
"learning_rate": 7.069570448273951e-06,
"loss": 1.1616631746292114,
"step": 702
},
{
"epoch": 1.2571428571428571,
"grad_norm": 0.3208640515804291,
"learning_rate": 7.0529551355994686e-06,
"loss": 1.2447824478149414,
"step": 704
},
{
"epoch": 1.2607142857142857,
"grad_norm": 0.7384056448936462,
"learning_rate": 7.03631575424287e-06,
"loss": 1.1175577640533447,
"step": 706
},
{
"epoch": 1.2642857142857142,
"grad_norm": 0.5497505068778992,
"learning_rate": 7.019652562091826e-06,
"loss": 1.141535758972168,
"step": 708
},
{
"epoch": 1.2678571428571428,
"grad_norm": 0.3330208361148834,
"learning_rate": 7.0029658174030425e-06,
"loss": 1.3164706230163574,
"step": 710
},
{
"epoch": 1.2714285714285714,
"grad_norm": 0.4105195701122284,
"learning_rate": 6.986255778798253e-06,
"loss": 1.234831690788269,
"step": 712
},
{
"epoch": 1.275,
"grad_norm": 0.28338423371315,
"learning_rate": 6.9695227052602174e-06,
"loss": 1.1415457725524902,
"step": 714
},
{
"epoch": 1.2785714285714285,
"grad_norm": 0.3706303536891937,
"learning_rate": 6.952766856128709e-06,
"loss": 1.199047565460205,
"step": 716
},
{
"epoch": 1.282142857142857,
"grad_norm": 0.3346574902534485,
"learning_rate": 6.9359884910964856e-06,
"loss": 1.4197050333023071,
"step": 718
},
{
"epoch": 1.2857142857142856,
"grad_norm": 0.3120553195476532,
"learning_rate": 6.919187870205275e-06,
"loss": 1.5487772226333618,
"step": 720
},
{
"epoch": 1.2892857142857144,
"grad_norm": 0.2753259837627411,
"learning_rate": 6.902365253841737e-06,
"loss": 1.177211880683899,
"step": 722
},
{
"epoch": 1.292857142857143,
"grad_norm": 0.2185521274805069,
"learning_rate": 6.885520902733435e-06,
"loss": 1.2806293964385986,
"step": 724
},
{
"epoch": 1.2964285714285715,
"grad_norm": 0.14865590631961823,
"learning_rate": 6.868655077944788e-06,
"loss": 0.9303812980651855,
"step": 726
},
{
"epoch": 1.3,
"grad_norm": 0.39503300189971924,
"learning_rate": 6.85176804087303e-06,
"loss": 1.5363171100616455,
"step": 728
},
{
"epoch": 1.3035714285714286,
"grad_norm": 0.510991632938385,
"learning_rate": 6.834860053244154e-06,
"loss": 1.1531927585601807,
"step": 730
},
{
"epoch": 1.3071428571428572,
"grad_norm": 0.28777721524238586,
"learning_rate": 6.8179313771088626e-06,
"loss": 1.2121974229812622,
"step": 732
},
{
"epoch": 1.3107142857142857,
"grad_norm": 0.30707836151123047,
"learning_rate": 6.800982274838495e-06,
"loss": 1.4065004587173462,
"step": 734
},
{
"epoch": 1.3142857142857143,
"grad_norm": 0.23764309287071228,
"learning_rate": 6.784013009120975e-06,
"loss": 1.4308959245681763,
"step": 736
},
{
"epoch": 1.3178571428571428,
"grad_norm": 0.6906368136405945,
"learning_rate": 6.767023842956725e-06,
"loss": 1.1925731897354126,
"step": 738
},
{
"epoch": 1.3214285714285714,
"grad_norm": 0.4775388538837433,
"learning_rate": 6.750015039654603e-06,
"loss": 1.6403999328613281,
"step": 740
},
{
"epoch": 1.325,
"grad_norm": 0.2565818727016449,
"learning_rate": 6.732986862827813e-06,
"loss": 1.0603913068771362,
"step": 742
},
{
"epoch": 1.3285714285714285,
"grad_norm": 0.47122514247894287,
"learning_rate": 6.7159395763898214e-06,
"loss": 1.3830267190933228,
"step": 744
},
{
"epoch": 1.332142857142857,
"grad_norm": 0.5306914448738098,
"learning_rate": 6.698873444550271e-06,
"loss": 1.2981680631637573,
"step": 746
},
{
"epoch": 1.3357142857142856,
"grad_norm": 0.408100426197052,
"learning_rate": 6.68178873181088e-06,
"loss": 1.2487084865570068,
"step": 748
},
{
"epoch": 1.3392857142857144,
"grad_norm": 0.33308205008506775,
"learning_rate": 6.664685702961344e-06,
"loss": 0.9980481266975403,
"step": 750
},
{
"epoch": 1.342857142857143,
"grad_norm": 0.20474325120449066,
"learning_rate": 6.647564623075236e-06,
"loss": 0.9687408804893494,
"step": 752
},
{
"epoch": 1.3464285714285715,
"grad_norm": 0.8245405554771423,
"learning_rate": 6.630425757505894e-06,
"loss": 1.33769953250885,
"step": 754
},
{
"epoch": 1.35,
"grad_norm": 0.2982644736766815,
"learning_rate": 6.613269371882308e-06,
"loss": 1.3833491802215576,
"step": 756
},
{
"epoch": 1.3535714285714286,
"grad_norm": 0.45085495710372925,
"learning_rate": 6.596095732105011e-06,
"loss": 1.2755907773971558,
"step": 758
},
{
"epoch": 1.3571428571428572,
"grad_norm": 0.29945558309555054,
"learning_rate": 6.5789051043419435e-06,
"loss": 1.2956531047821045,
"step": 760
},
{
"epoch": 1.3607142857142858,
"grad_norm": 0.5544592142105103,
"learning_rate": 6.5616977550243435e-06,
"loss": 1.2718784809112549,
"step": 762
},
{
"epoch": 1.3642857142857143,
"grad_norm": 0.7638172507286072,
"learning_rate": 6.544473950842606e-06,
"loss": 1.126919150352478,
"step": 764
},
{
"epoch": 1.3678571428571429,
"grad_norm": 0.4192071557044983,
"learning_rate": 6.527233958742154e-06,
"loss": 1.4331161975860596,
"step": 766
},
{
"epoch": 1.3714285714285714,
"grad_norm": 0.2737813889980316,
"learning_rate": 6.509978045919307e-06,
"loss": 1.2379997968673706,
"step": 768
},
{
"epoch": 1.375,
"grad_norm": 0.7987821102142334,
"learning_rate": 6.492706479817125e-06,
"loss": 1.278856873512268,
"step": 770
},
{
"epoch": 1.3785714285714286,
"grad_norm": 0.30944374203681946,
"learning_rate": 6.475419528121279e-06,
"loss": 1.3922899961471558,
"step": 772
},
{
"epoch": 1.3821428571428571,
"grad_norm": 0.29533934593200684,
"learning_rate": 6.45811745875589e-06,
"loss": 1.235024094581604,
"step": 774
},
{
"epoch": 1.3857142857142857,
"grad_norm": 0.788487434387207,
"learning_rate": 6.440800539879392e-06,
"loss": 1.1024410724639893,
"step": 776
},
{
"epoch": 1.3892857142857142,
"grad_norm": 0.3519847095012665,
"learning_rate": 6.423469039880355e-06,
"loss": 1.233741283416748,
"step": 778
},
{
"epoch": 1.3928571428571428,
"grad_norm": 0.18675316870212555,
"learning_rate": 6.406123227373343e-06,
"loss": 1.3022193908691406,
"step": 780
},
{
"epoch": 1.3964285714285714,
"grad_norm": 0.263254314661026,
"learning_rate": 6.388763371194741e-06,
"loss": 1.2517147064208984,
"step": 782
},
{
"epoch": 1.4,
"grad_norm": 0.35091346502304077,
"learning_rate": 6.371389740398597e-06,
"loss": 1.1601366996765137,
"step": 784
},
{
"epoch": 1.4035714285714285,
"grad_norm": 0.34103208780288696,
"learning_rate": 6.35400260425244e-06,
"loss": 1.3991872072219849,
"step": 786
},
{
"epoch": 1.407142857142857,
"grad_norm": 1.0600661039352417,
"learning_rate": 6.336602232233116e-06,
"loss": 1.4128477573394775,
"step": 788
},
{
"epoch": 1.4107142857142856,
"grad_norm": 0.6274294257164001,
"learning_rate": 6.319188894022612e-06,
"loss": 1.5149511098861694,
"step": 790
},
{
"epoch": 1.4142857142857144,
"grad_norm": 0.25083670020103455,
"learning_rate": 6.301762859503869e-06,
"loss": 1.3468106985092163,
"step": 792
},
{
"epoch": 1.417857142857143,
"grad_norm": 0.4435229003429413,
"learning_rate": 6.284324398756606e-06,
"loss": 1.3005448579788208,
"step": 794
},
{
"epoch": 1.4214285714285715,
"grad_norm": 0.5059611201286316,
"learning_rate": 6.266873782053131e-06,
"loss": 1.0667213201522827,
"step": 796
},
{
"epoch": 1.425,
"grad_norm": 0.2751584053039551,
"learning_rate": 6.249411279854152e-06,
"loss": 1.1674690246582031,
"step": 798
},
{
"epoch": 1.4285714285714286,
"grad_norm": 0.2168678641319275,
"learning_rate": 6.231937162804584e-06,
"loss": 1.0654405355453491,
"step": 800
},
{
"epoch": 1.4321428571428572,
"grad_norm": 0.6201224327087402,
"learning_rate": 6.214451701729363e-06,
"loss": 1.1552761793136597,
"step": 802
},
{
"epoch": 1.4357142857142857,
"grad_norm": 0.4682956635951996,
"learning_rate": 6.196955167629236e-06,
"loss": 1.3353182077407837,
"step": 804
},
{
"epoch": 1.4392857142857143,
"grad_norm": 0.3534834384918213,
"learning_rate": 6.179447831676566e-06,
"loss": 1.3080209493637085,
"step": 806
},
{
"epoch": 1.4428571428571428,
"grad_norm": 0.4813729226589203,
"learning_rate": 6.161929965211135e-06,
"loss": 1.3717149496078491,
"step": 808
},
{
"epoch": 1.4464285714285714,
"grad_norm": 0.26942121982574463,
"learning_rate": 6.144401839735931e-06,
"loss": 1.4133044481277466,
"step": 810
},
{
"epoch": 1.45,
"grad_norm": 0.30204319953918457,
"learning_rate": 6.12686372691294e-06,
"loss": 1.581753134727478,
"step": 812
},
{
"epoch": 1.4535714285714285,
"grad_norm": 1.1933614015579224,
"learning_rate": 6.109315898558943e-06,
"loss": 1.1946600675582886,
"step": 814
},
{
"epoch": 1.457142857142857,
"grad_norm": 0.651054322719574,
"learning_rate": 6.091758626641296e-06,
"loss": 1.2849314212799072,
"step": 816
},
{
"epoch": 1.4607142857142856,
"grad_norm": 0.41265299916267395,
"learning_rate": 6.074192183273714e-06,
"loss": 1.2870151996612549,
"step": 818
},
{
"epoch": 1.4642857142857144,
"grad_norm": 0.2880115807056427,
"learning_rate": 6.056616840712065e-06,
"loss": 1.156186580657959,
"step": 820
},
{
"epoch": 1.467857142857143,
"grad_norm": 0.31380829215049744,
"learning_rate": 6.039032871350136e-06,
"loss": 1.3075363636016846,
"step": 822
},
{
"epoch": 1.4714285714285715,
"grad_norm": 0.735464334487915,
"learning_rate": 6.021440547715418e-06,
"loss": 1.2372568845748901,
"step": 824
},
{
"epoch": 1.475,
"grad_norm": 0.3404405117034912,
"learning_rate": 6.0038401424648866e-06,
"loss": 1.3656535148620605,
"step": 826
},
{
"epoch": 1.4785714285714286,
"grad_norm": 1.0161242485046387,
"learning_rate": 5.986231928380764e-06,
"loss": 1.4575047492980957,
"step": 828
},
{
"epoch": 1.4821428571428572,
"grad_norm": 0.32120001316070557,
"learning_rate": 5.968616178366304e-06,
"loss": 1.1328424215316772,
"step": 830
},
{
"epoch": 1.4857142857142858,
"grad_norm": 0.24318258464336395,
"learning_rate": 5.95099316544156e-06,
"loss": 1.1171592473983765,
"step": 832
},
{
"epoch": 1.4892857142857143,
"grad_norm": 0.2471759170293808,
"learning_rate": 5.9333631627391385e-06,
"loss": 1.1361713409423828,
"step": 834
},
{
"epoch": 1.4928571428571429,
"grad_norm": 0.31643709540367126,
"learning_rate": 5.915726443499992e-06,
"loss": 1.4550275802612305,
"step": 836
},
{
"epoch": 1.4964285714285714,
"grad_norm": 0.2178327739238739,
"learning_rate": 5.89808328106916e-06,
"loss": 1.1423126459121704,
"step": 838
},
{
"epoch": 1.5,
"grad_norm": 0.38720911741256714,
"learning_rate": 5.880433948891548e-06,
"loss": 1.0535848140716553,
"step": 840
},
{
"epoch": 1.5035714285714286,
"grad_norm": 0.4030672013759613,
"learning_rate": 5.862778720507684e-06,
"loss": 1.3946490287780762,
"step": 842
},
{
"epoch": 1.5071428571428571,
"grad_norm": 0.35578665137290955,
"learning_rate": 5.845117869549477e-06,
"loss": 1.5173096656799316,
"step": 844
},
{
"epoch": 1.5107142857142857,
"grad_norm": 0.3867500126361847,
"learning_rate": 5.827451669735977e-06,
"loss": 1.352368712425232,
"step": 846
},
{
"epoch": 1.5142857142857142,
"grad_norm": 0.9219626188278198,
"learning_rate": 5.80978039486914e-06,
"loss": 0.9382961988449097,
"step": 848
},
{
"epoch": 1.5178571428571428,
"grad_norm": 0.21979399025440216,
"learning_rate": 5.79210431882957e-06,
"loss": 0.8432712554931641,
"step": 850
},
{
"epoch": 1.5214285714285714,
"grad_norm": 0.28859761357307434,
"learning_rate": 5.774423715572289e-06,
"loss": 1.296618938446045,
"step": 852
},
{
"epoch": 1.525,
"grad_norm": 0.4942507743835449,
"learning_rate": 5.756738859122483e-06,
"loss": 1.0648285150527954,
"step": 854
},
{
"epoch": 1.5285714285714285,
"grad_norm": 0.5750854015350342,
"learning_rate": 5.739050023571258e-06,
"loss": 1.0088112354278564,
"step": 856
},
{
"epoch": 1.532142857142857,
"grad_norm": 0.20957696437835693,
"learning_rate": 5.721357483071386e-06,
"loss": 1.0590897798538208,
"step": 858
},
{
"epoch": 1.5357142857142856,
"grad_norm": 0.8381152153015137,
"learning_rate": 5.703661511833064e-06,
"loss": 1.3163901567459106,
"step": 860
},
{
"epoch": 1.5392857142857141,
"grad_norm": 0.4364100992679596,
"learning_rate": 5.68596238411966e-06,
"loss": 1.1863445043563843,
"step": 862
},
{
"epoch": 1.5428571428571427,
"grad_norm": 0.3657117784023285,
"learning_rate": 5.668260374243467e-06,
"loss": 0.98140949010849,
"step": 864
},
{
"epoch": 1.5464285714285713,
"grad_norm": 0.6113946437835693,
"learning_rate": 5.650555756561439e-06,
"loss": 1.3584340810775757,
"step": 866
},
{
"epoch": 1.55,
"grad_norm": 0.7465829849243164,
"learning_rate": 5.6328488054709575e-06,
"loss": 1.149134874343872,
"step": 868
},
{
"epoch": 1.5535714285714286,
"grad_norm": 0.9023903608322144,
"learning_rate": 5.615139795405559e-06,
"loss": 1.2276476621627808,
"step": 870
},
{
"epoch": 1.5571428571428572,
"grad_norm": 0.5961250066757202,
"learning_rate": 5.5974290008307e-06,
"loss": 1.3803772926330566,
"step": 872
},
{
"epoch": 1.5607142857142857,
"grad_norm": 0.31303706765174866,
"learning_rate": 5.579716696239486e-06,
"loss": 0.8974480628967285,
"step": 874
},
{
"epoch": 1.5642857142857143,
"grad_norm": 0.49465271830558777,
"learning_rate": 5.562003156148434e-06,
"loss": 1.500373125076294,
"step": 876
},
{
"epoch": 1.5678571428571428,
"grad_norm": 0.4547047019004822,
"learning_rate": 5.544288655093203e-06,
"loss": 1.3437693119049072,
"step": 878
},
{
"epoch": 1.5714285714285714,
"grad_norm": 0.2680365741252899,
"learning_rate": 5.526573467624351e-06,
"loss": 1.0480762720108032,
"step": 880
},
{
"epoch": 1.575,
"grad_norm": 0.2553335130214691,
"learning_rate": 5.508857868303068e-06,
"loss": 1.078729271888733,
"step": 882
},
{
"epoch": 1.5785714285714287,
"grad_norm": 0.2632956802845001,
"learning_rate": 5.491142131696934e-06,
"loss": 1.16781485080719,
"step": 884
},
{
"epoch": 1.5821428571428573,
"grad_norm": 0.42439237236976624,
"learning_rate": 5.473426532375651e-06,
"loss": 1.0907145738601685,
"step": 886
},
{
"epoch": 1.5857142857142859,
"grad_norm": 0.4016067087650299,
"learning_rate": 5.455711344906797e-06,
"loss": 1.0479315519332886,
"step": 888
},
{
"epoch": 1.5892857142857144,
"grad_norm": 0.787295401096344,
"learning_rate": 5.437996843851567e-06,
"loss": 1.1056879758834839,
"step": 890
},
{
"epoch": 1.592857142857143,
"grad_norm": 0.24893441796302795,
"learning_rate": 5.420283303760515e-06,
"loss": 1.086808443069458,
"step": 892
},
{
"epoch": 1.5964285714285715,
"grad_norm": 1.0016993284225464,
"learning_rate": 5.402570999169303e-06,
"loss": 1.4259756803512573,
"step": 894
},
{
"epoch": 1.6,
"grad_norm": 0.5658416748046875,
"learning_rate": 5.384860204594442e-06,
"loss": 1.175308346748352,
"step": 896
},
{
"epoch": 1.6035714285714286,
"grad_norm": 0.32960644364356995,
"learning_rate": 5.367151194529045e-06,
"loss": 1.3044936656951904,
"step": 898
},
{
"epoch": 1.6071428571428572,
"grad_norm": 1.566615343093872,
"learning_rate": 5.349444243438563e-06,
"loss": 1.1787108182907104,
"step": 900
},
{
"epoch": 1.6107142857142858,
"grad_norm": 0.3008659780025482,
"learning_rate": 5.331739625756535e-06,
"loss": 1.2578707933425903,
"step": 902
},
{
"epoch": 1.6142857142857143,
"grad_norm": 0.3048568367958069,
"learning_rate": 5.314037615880341e-06,
"loss": 1.214415192604065,
"step": 904
},
{
"epoch": 1.6178571428571429,
"grad_norm": 0.30796509981155396,
"learning_rate": 5.296338488166939e-06,
"loss": 1.2612226009368896,
"step": 906
},
{
"epoch": 1.6214285714285714,
"grad_norm": 0.3856910467147827,
"learning_rate": 5.278642516928617e-06,
"loss": 1.1769757270812988,
"step": 908
},
{
"epoch": 1.625,
"grad_norm": 0.4512476921081543,
"learning_rate": 5.260949976428745e-06,
"loss": 1.058244228363037,
"step": 910
},
{
"epoch": 1.6285714285714286,
"grad_norm": 0.5113015174865723,
"learning_rate": 5.243261140877517e-06,
"loss": 1.3994414806365967,
"step": 912
},
{
"epoch": 1.6321428571428571,
"grad_norm": 0.24723981320858002,
"learning_rate": 5.225576284427712e-06,
"loss": 1.29803466796875,
"step": 914
},
{
"epoch": 1.6357142857142857,
"grad_norm": 0.2900439202785492,
"learning_rate": 5.207895681170432e-06,
"loss": 1.341897964477539,
"step": 916
},
{
"epoch": 1.6392857142857142,
"grad_norm": 0.2555374205112457,
"learning_rate": 5.190219605130863e-06,
"loss": 1.1864595413208008,
"step": 918
},
{
"epoch": 1.6428571428571428,
"grad_norm": 0.31760746240615845,
"learning_rate": 5.172548330264023e-06,
"loss": 1.2025091648101807,
"step": 920
},
{
"epoch": 1.6464285714285714,
"grad_norm": 0.28426891565322876,
"learning_rate": 5.154882130450525e-06,
"loss": 1.3937333822250366,
"step": 922
},
{
"epoch": 1.65,
"grad_norm": 0.26754945516586304,
"learning_rate": 5.137221279492317e-06,
"loss": 1.1592669486999512,
"step": 924
},
{
"epoch": 1.6535714285714285,
"grad_norm": 0.398725301027298,
"learning_rate": 5.119566051108453e-06,
"loss": 1.041808009147644,
"step": 926
},
{
"epoch": 1.657142857142857,
"grad_norm": 0.24082130193710327,
"learning_rate": 5.10191671893084e-06,
"loss": 1.1113499402999878,
"step": 928
},
{
"epoch": 1.6607142857142856,
"grad_norm": 0.32985880970954895,
"learning_rate": 5.08427355650001e-06,
"loss": 1.243566632270813,
"step": 930
},
{
"epoch": 1.6642857142857141,
"grad_norm": 0.22729991376399994,
"learning_rate": 5.066636837260863e-06,
"loss": 1.218003511428833,
"step": 932
},
{
"epoch": 1.6678571428571427,
"grad_norm": 0.20701321959495544,
"learning_rate": 5.049006834558443e-06,
"loss": 1.1665146350860596,
"step": 934
},
{
"epoch": 1.6714285714285713,
"grad_norm": 0.7482126355171204,
"learning_rate": 5.031383821633695e-06,
"loss": 1.0261443853378296,
"step": 936
},
{
"epoch": 1.675,
"grad_norm": 0.6510646939277649,
"learning_rate": 5.013768071619237e-06,
"loss": 1.1913405656814575,
"step": 938
},
{
"epoch": 1.6785714285714286,
"grad_norm": 0.3893536329269409,
"learning_rate": 4.996159857535116e-06,
"loss": 1.2498658895492554,
"step": 940
},
{
"epoch": 1.6821428571428572,
"grad_norm": 0.33820265531539917,
"learning_rate": 4.9785594522845835e-06,
"loss": 1.2645461559295654,
"step": 942
},
{
"epoch": 1.6857142857142857,
"grad_norm": 0.2857305407524109,
"learning_rate": 4.9609671286498655e-06,
"loss": 1.1648997068405151,
"step": 944
},
{
"epoch": 1.6892857142857143,
"grad_norm": 0.35911425948143005,
"learning_rate": 4.943383159287936e-06,
"loss": 1.3200312852859497,
"step": 946
},
{
"epoch": 1.6928571428571428,
"grad_norm": 0.28003281354904175,
"learning_rate": 4.925807816726288e-06,
"loss": 1.2886927127838135,
"step": 948
},
{
"epoch": 1.6964285714285714,
"grad_norm": 0.3707423210144043,
"learning_rate": 4.908241373358707e-06,
"loss": 1.2256838083267212,
"step": 950
},
{
"epoch": 1.7,
"grad_norm": 0.37781476974487305,
"learning_rate": 4.890684101441059e-06,
"loss": 1.261880874633789,
"step": 952
},
{
"epoch": 1.7035714285714287,
"grad_norm": 0.455138623714447,
"learning_rate": 4.873136273087061e-06,
"loss": 1.1675777435302734,
"step": 954
},
{
"epoch": 1.7071428571428573,
"grad_norm": 0.3084830641746521,
"learning_rate": 4.855598160264071e-06,
"loss": 1.0751243829727173,
"step": 956
},
{
"epoch": 1.7107142857142859,
"grad_norm": 0.33484798669815063,
"learning_rate": 4.838070034788865e-06,
"loss": 1.2969300746917725,
"step": 958
},
{
"epoch": 1.7142857142857144,
"grad_norm": 0.45519745349884033,
"learning_rate": 4.820552168323434e-06,
"loss": 1.1682568788528442,
"step": 960
},
{
"epoch": 1.717857142857143,
"grad_norm": 0.3936917185783386,
"learning_rate": 4.803044832370765e-06,
"loss": 1.2029849290847778,
"step": 962
},
{
"epoch": 1.7214285714285715,
"grad_norm": 0.2847800850868225,
"learning_rate": 4.7855482982706396e-06,
"loss": 1.308813452720642,
"step": 964
},
{
"epoch": 1.725,
"grad_norm": 0.2914465069770813,
"learning_rate": 4.768062837195417e-06,
"loss": 1.2900055646896362,
"step": 966
},
{
"epoch": 1.7285714285714286,
"grad_norm": 0.5518858432769775,
"learning_rate": 4.7505887201458485e-06,
"loss": 1.2404606342315674,
"step": 968
},
{
"epoch": 1.7321428571428572,
"grad_norm": 0.34736767411231995,
"learning_rate": 4.73312621794687e-06,
"loss": 1.1192835569381714,
"step": 970
},
{
"epoch": 1.7357142857142858,
"grad_norm": 0.27989068627357483,
"learning_rate": 4.715675601243396e-06,
"loss": 1.2646175622940063,
"step": 972
},
{
"epoch": 1.7392857142857143,
"grad_norm": 0.2832848131656647,
"learning_rate": 4.698237140496132e-06,
"loss": 1.2004600763320923,
"step": 974
},
{
"epoch": 1.7428571428571429,
"grad_norm": 1.7877376079559326,
"learning_rate": 4.68081110597739e-06,
"loss": 1.2224751710891724,
"step": 976
},
{
"epoch": 1.7464285714285714,
"grad_norm": 0.2644546627998352,
"learning_rate": 4.663397767766885e-06,
"loss": 1.2846026420593262,
"step": 978
},
{
"epoch": 1.75,
"grad_norm": 0.23440435528755188,
"learning_rate": 4.6459973957475625e-06,
"loss": 1.2761108875274658,
"step": 980
},
{
"epoch": 1.7535714285714286,
"grad_norm": 0.29541414976119995,
"learning_rate": 4.628610259601406e-06,
"loss": 1.2253004312515259,
"step": 982
},
{
"epoch": 1.7571428571428571,
"grad_norm": 0.3721539378166199,
"learning_rate": 4.611236628805259e-06,
"loss": 1.217316746711731,
"step": 984
},
{
"epoch": 1.7607142857142857,
"grad_norm": 0.23486927151679993,
"learning_rate": 4.593876772626659e-06,
"loss": 1.238864779472351,
"step": 986
},
{
"epoch": 1.7642857142857142,
"grad_norm": 0.35403114557266235,
"learning_rate": 4.576530960119646e-06,
"loss": 1.2506440877914429,
"step": 988
},
{
"epoch": 1.7678571428571428,
"grad_norm": 0.24216312170028687,
"learning_rate": 4.55919946012061e-06,
"loss": 1.203848123550415,
"step": 990
},
{
"epoch": 1.7714285714285714,
"grad_norm": 0.5742025971412659,
"learning_rate": 4.54188254124411e-06,
"loss": 1.2036422491073608,
"step": 992
},
{
"epoch": 1.775,
"grad_norm": 0.4332943260669708,
"learning_rate": 4.524580471878724e-06,
"loss": 1.1484333276748657,
"step": 994
},
{
"epoch": 1.7785714285714285,
"grad_norm": 0.2262076586484909,
"learning_rate": 4.507293520182877e-06,
"loss": 1.2005127668380737,
"step": 996
},
{
"epoch": 1.782142857142857,
"grad_norm": 0.18153786659240723,
"learning_rate": 4.490021954080695e-06,
"loss": 1.1209759712219238,
"step": 998
},
{
"epoch": 1.7857142857142856,
"grad_norm": 0.2752821147441864,
"learning_rate": 4.472766041257846e-06,
"loss": 1.1912975311279297,
"step": 1000
},
{
"epoch": 1.7892857142857141,
"grad_norm": 0.37398797273635864,
"learning_rate": 4.4555260491573956e-06,
"loss": 1.1634106636047363,
"step": 1002
},
{
"epoch": 1.7928571428571427,
"grad_norm": 0.4885188341140747,
"learning_rate": 4.438302244975659e-06,
"loss": 1.19752037525177,
"step": 1004
},
{
"epoch": 1.7964285714285713,
"grad_norm": 0.20963414013385773,
"learning_rate": 4.421094895658058e-06,
"loss": 1.1573578119277954,
"step": 1006
},
{
"epoch": 1.8,
"grad_norm": 0.19768458604812622,
"learning_rate": 4.403904267894991e-06,
"loss": 1.1309683322906494,
"step": 1008
},
{
"epoch": 1.8035714285714286,
"grad_norm": 0.3265831470489502,
"learning_rate": 4.386730628117692e-06,
"loss": 1.210740566253662,
"step": 1010
},
{
"epoch": 1.8071428571428572,
"grad_norm": 0.24831008911132812,
"learning_rate": 4.369574242494108e-06,
"loss": 1.1857199668884277,
"step": 1012
},
{
"epoch": 1.8107142857142857,
"grad_norm": 0.24806837737560272,
"learning_rate": 4.3524353769247665e-06,
"loss": 1.0957400798797607,
"step": 1014
},
{
"epoch": 1.8142857142857143,
"grad_norm": 0.21978451311588287,
"learning_rate": 4.335314297038656e-06,
"loss": 1.1512374877929688,
"step": 1016
},
{
"epoch": 1.8178571428571428,
"grad_norm": 0.18174096941947937,
"learning_rate": 4.318211268189121e-06,
"loss": 1.1074084043502808,
"step": 1018
},
{
"epoch": 1.8214285714285714,
"grad_norm": 0.1807389110326767,
"learning_rate": 4.3011265554497305e-06,
"loss": 1.1385325193405151,
"step": 1020
},
{
"epoch": 1.825,
"grad_norm": 0.3229348063468933,
"learning_rate": 4.28406042361018e-06,
"loss": 1.119950771331787,
"step": 1022
},
{
"epoch": 1.8285714285714287,
"grad_norm": 0.21613694727420807,
"learning_rate": 4.267013137172189e-06,
"loss": 1.1364243030548096,
"step": 1024
},
{
"epoch": 1.8321428571428573,
"grad_norm": 0.5674333572387695,
"learning_rate": 4.249984960345399e-06,
"loss": 1.1446290016174316,
"step": 1026
},
{
"epoch": 1.8357142857142859,
"grad_norm": 0.19522684812545776,
"learning_rate": 4.232976157043277e-06,
"loss": 1.1350977420806885,
"step": 1028
},
{
"epoch": 1.8392857142857144,
"grad_norm": 0.22652848064899445,
"learning_rate": 4.2159869908790275e-06,
"loss": 1.1374115943908691,
"step": 1030
},
{
"epoch": 1.842857142857143,
"grad_norm": 0.20917841792106628,
"learning_rate": 4.199017725161505e-06,
"loss": 1.1824545860290527,
"step": 1032
},
{
"epoch": 1.8464285714285715,
"grad_norm": 0.2631721496582031,
"learning_rate": 4.182068622891139e-06,
"loss": 1.1770212650299072,
"step": 1034
},
{
"epoch": 1.85,
"grad_norm": 0.24983558058738708,
"learning_rate": 4.165139946755847e-06,
"loss": 1.161262035369873,
"step": 1036
},
{
"epoch": 1.8535714285714286,
"grad_norm": 0.31537604331970215,
"learning_rate": 4.148231959126973e-06,
"loss": 1.1958869695663452,
"step": 1038
},
{
"epoch": 1.8571428571428572,
"grad_norm": 0.3142789900302887,
"learning_rate": 4.131344922055213e-06,
"loss": 1.1789402961730957,
"step": 1040
},
{
"epoch": 1.8607142857142858,
"grad_norm": 0.42967483401298523,
"learning_rate": 4.114479097266567e-06,
"loss": 1.1411830186843872,
"step": 1042
},
{
"epoch": 1.8642857142857143,
"grad_norm": 0.21074344217777252,
"learning_rate": 4.0976347461582656e-06,
"loss": 1.17338228225708,
"step": 1044
},
{
"epoch": 1.8678571428571429,
"grad_norm": 0.33415719866752625,
"learning_rate": 4.080812129794728e-06,
"loss": 1.1420398950576782,
"step": 1046
},
{
"epoch": 1.8714285714285714,
"grad_norm": 0.16336952149868011,
"learning_rate": 4.064011508903516e-06,
"loss": 1.1628490686416626,
"step": 1048
},
{
"epoch": 1.875,
"grad_norm": 0.2252008020877838,
"learning_rate": 4.047233143871292e-06,
"loss": 1.173589825630188,
"step": 1050
},
{
"epoch": 1.8785714285714286,
"grad_norm": 0.33176442980766296,
"learning_rate": 4.030477294739783e-06,
"loss": 1.194374918937683,
"step": 1052
},
{
"epoch": 1.8821428571428571,
"grad_norm": 0.29097726941108704,
"learning_rate": 4.013744221201749e-06,
"loss": 1.1737301349639893,
"step": 1054
},
{
"epoch": 1.8857142857142857,
"grad_norm": 0.1832679808139801,
"learning_rate": 3.997034182596958e-06,
"loss": 1.110135793685913,
"step": 1056
},
{
"epoch": 1.8892857142857142,
"grad_norm": 0.2953426241874695,
"learning_rate": 3.980347437908175e-06,
"loss": 1.1428486108779907,
"step": 1058
},
{
"epoch": 1.8928571428571428,
"grad_norm": 0.20754416286945343,
"learning_rate": 3.963684245757132e-06,
"loss": 1.17241632938385,
"step": 1060
},
{
"epoch": 1.8964285714285714,
"grad_norm": 0.29985517263412476,
"learning_rate": 3.9470448644005345e-06,
"loss": 1.2037956714630127,
"step": 1062
},
{
"epoch": 1.9,
"grad_norm": 0.24180017411708832,
"learning_rate": 3.930429551726049e-06,
"loss": 1.1744909286499023,
"step": 1064
},
{
"epoch": 1.9035714285714285,
"grad_norm": 0.1725412905216217,
"learning_rate": 3.913838565248318e-06,
"loss": 1.1504842042922974,
"step": 1066
},
{
"epoch": 1.907142857142857,
"grad_norm": 0.19483552873134613,
"learning_rate": 3.8972721621049545e-06,
"loss": 1.1242973804473877,
"step": 1068
},
{
"epoch": 1.9107142857142856,
"grad_norm": 0.2150045484304428,
"learning_rate": 3.880730599052565e-06,
"loss": 1.1571553945541382,
"step": 1070
},
{
"epoch": 1.9142857142857141,
"grad_norm": 0.26055601239204407,
"learning_rate": 3.864214132462766e-06,
"loss": 1.1744543313980103,
"step": 1072
},
{
"epoch": 1.9178571428571427,
"grad_norm": 0.20224107801914215,
"learning_rate": 3.84772301831822e-06,
"loss": 1.129955768585205,
"step": 1074
},
{
"epoch": 1.9214285714285713,
"grad_norm": 0.21899673342704773,
"learning_rate": 3.831257512208657e-06,
"loss": 1.1564751863479614,
"step": 1076
},
{
"epoch": 1.925,
"grad_norm": 0.24604743719100952,
"learning_rate": 3.814817869326915e-06,
"loss": 1.1490484476089478,
"step": 1078
},
{
"epoch": 1.9285714285714286,
"grad_norm": 0.1920636147260666,
"learning_rate": 3.7984043444649898e-06,
"loss": 1.1944819688796997,
"step": 1080
},
{
"epoch": 1.9321428571428572,
"grad_norm": 0.2951393723487854,
"learning_rate": 3.782017192010087e-06,
"loss": 1.2130813598632812,
"step": 1082
},
{
"epoch": 1.9357142857142857,
"grad_norm": 0.38370734453201294,
"learning_rate": 3.76565666594067e-06,
"loss": 1.1711630821228027,
"step": 1084
},
{
"epoch": 1.9392857142857143,
"grad_norm": 0.7297260165214539,
"learning_rate": 3.749323019822534e-06,
"loss": 1.1901503801345825,
"step": 1086
},
{
"epoch": 1.9428571428571428,
"grad_norm": 0.22041039168834686,
"learning_rate": 3.7330165068048673e-06,
"loss": 1.1663475036621094,
"step": 1088
},
{
"epoch": 1.9464285714285714,
"grad_norm": 0.2529982626438141,
"learning_rate": 3.7167373796163377e-06,
"loss": 1.1222208738327026,
"step": 1090
},
{
"epoch": 1.95,
"grad_norm": 0.22839988768100739,
"learning_rate": 3.700485890561167e-06,
"loss": 1.1396700143814087,
"step": 1092
},
{
"epoch": 1.9535714285714287,
"grad_norm": 0.32207345962524414,
"learning_rate": 3.6842622915152228e-06,
"loss": 1.1646703481674194,
"step": 1094
},
{
"epoch": 1.9571428571428573,
"grad_norm": 0.2876273989677429,
"learning_rate": 3.668066833922116e-06,
"loss": 1.148516058921814,
"step": 1096
},
{
"epoch": 1.9607142857142859,
"grad_norm": 0.2196146845817566,
"learning_rate": 3.6518997687893053e-06,
"loss": 1.1533443927764893,
"step": 1098
},
{
"epoch": 1.9642857142857144,
"grad_norm": 0.46365395188331604,
"learning_rate": 3.635761346684206e-06,
"loss": 1.1947966814041138,
"step": 1100
},
{
"epoch": 1.967857142857143,
"grad_norm": 0.2954294681549072,
"learning_rate": 3.619651817730302e-06,
"loss": 1.1832884550094604,
"step": 1102
},
{
"epoch": 1.9714285714285715,
"grad_norm": 0.2565920650959015,
"learning_rate": 3.603571431603272e-06,
"loss": 1.1965795755386353,
"step": 1104
},
{
"epoch": 1.975,
"grad_norm": 0.2640427350997925,
"learning_rate": 3.587520437527128e-06,
"loss": 1.140123963356018,
"step": 1106
},
{
"epoch": 1.9785714285714286,
"grad_norm": 0.26683422923088074,
"learning_rate": 3.571499084270338e-06,
"loss": 1.1581156253814697,
"step": 1108
},
{
"epoch": 1.9821428571428572,
"grad_norm": 0.2290692776441574,
"learning_rate": 3.5555076201419816e-06,
"loss": 1.174959421157837,
"step": 1110
},
{
"epoch": 1.9857142857142858,
"grad_norm": 0.2061983048915863,
"learning_rate": 3.5395462929878945e-06,
"loss": 1.220007061958313,
"step": 1112
},
{
"epoch": 1.9892857142857143,
"grad_norm": 0.20125523209571838,
"learning_rate": 3.5236153501868343e-06,
"loss": 1.2462403774261475,
"step": 1114
},
{
"epoch": 1.9928571428571429,
"grad_norm": 0.29600805044174194,
"learning_rate": 3.5077150386466406e-06,
"loss": 1.2024950981140137,
"step": 1116
},
{
"epoch": 1.9964285714285714,
"grad_norm": 0.2931258976459503,
"learning_rate": 3.4918456048004106e-06,
"loss": 1.1237006187438965,
"step": 1118
},
{
"epoch": 2.0,
"grad_norm": 0.4734819829463959,
"learning_rate": 3.4760072946026786e-06,
"loss": 1.1085011959075928,
"step": 1120
},
{
"epoch": 2.0035714285714286,
"grad_norm": 0.20331430435180664,
"learning_rate": 3.46020035352561e-06,
"loss": 1.2824596166610718,
"step": 1122
},
{
"epoch": 2.007142857142857,
"grad_norm": 0.46622058749198914,
"learning_rate": 3.444425026555182e-06,
"loss": 1.2747101783752441,
"step": 1124
},
{
"epoch": 2.0107142857142857,
"grad_norm": 0.19980192184448242,
"learning_rate": 3.4286815581874045e-06,
"loss": 1.2517393827438354,
"step": 1126
},
{
"epoch": 2.0142857142857142,
"grad_norm": 0.32897406816482544,
"learning_rate": 3.4129701924245173e-06,
"loss": 1.2301400899887085,
"step": 1128
},
{
"epoch": 2.017857142857143,
"grad_norm": 0.17299680411815643,
"learning_rate": 3.397291172771221e-06,
"loss": 1.2544574737548828,
"step": 1130
},
{
"epoch": 2.0214285714285714,
"grad_norm": 0.2090325653553009,
"learning_rate": 3.3816447422308883e-06,
"loss": 1.0791321992874146,
"step": 1132
},
{
"epoch": 2.025,
"grad_norm": 0.2806832790374756,
"learning_rate": 3.366031143301811e-06,
"loss": 1.1756961345672607,
"step": 1134
},
{
"epoch": 2.0285714285714285,
"grad_norm": 0.4019312858581543,
"learning_rate": 3.3504506179734254e-06,
"loss": 1.1622370481491089,
"step": 1136
},
{
"epoch": 2.032142857142857,
"grad_norm": 0.22266216576099396,
"learning_rate": 3.334903407722587e-06,
"loss": 1.234253168106079,
"step": 1138
},
{
"epoch": 2.0357142857142856,
"grad_norm": 0.29923903942108154,
"learning_rate": 3.319389753509803e-06,
"loss": 1.1241004467010498,
"step": 1140
},
{
"epoch": 2.039285714285714,
"grad_norm": 0.3284701704978943,
"learning_rate": 3.30390989577551e-06,
"loss": 1.260522723197937,
"step": 1142
},
{
"epoch": 2.0428571428571427,
"grad_norm": 0.4323379099369049,
"learning_rate": 3.288464074436346e-06,
"loss": 1.1753382682800293,
"step": 1144
},
{
"epoch": 2.0464285714285713,
"grad_norm": 0.3188895285129547,
"learning_rate": 3.273052528881433e-06,
"loss": 1.1759196519851685,
"step": 1146
},
{
"epoch": 2.05,
"grad_norm": 0.754629373550415,
"learning_rate": 3.257675497968661e-06,
"loss": 1.0839532613754272,
"step": 1148
},
{
"epoch": 2.0535714285714284,
"grad_norm": 0.261398047208786,
"learning_rate": 3.2423332200209946e-06,
"loss": 1.1668034791946411,
"step": 1150
},
{
"epoch": 2.057142857142857,
"grad_norm": 0.3192571699619293,
"learning_rate": 3.2270259328227703e-06,
"loss": 1.312312364578247,
"step": 1152
},
{
"epoch": 2.0607142857142855,
"grad_norm": 0.3842572271823883,
"learning_rate": 3.2117538736160235e-06,
"loss": 1.241450548171997,
"step": 1154
},
{
"epoch": 2.064285714285714,
"grad_norm": 0.3109821677207947,
"learning_rate": 3.1965172790967967e-06,
"loss": 1.2660008668899536,
"step": 1156
},
{
"epoch": 2.067857142857143,
"grad_norm": 0.30365416407585144,
"learning_rate": 3.1813163854114793e-06,
"loss": 1.1892515420913696,
"step": 1158
},
{
"epoch": 2.0714285714285716,
"grad_norm": 0.26805219054222107,
"learning_rate": 3.1661514281531464e-06,
"loss": 1.2073129415512085,
"step": 1160
},
{
"epoch": 2.075,
"grad_norm": 0.26900723576545715,
"learning_rate": 3.1510226423579127e-06,
"loss": 1.1416363716125488,
"step": 1162
},
{
"epoch": 2.0785714285714287,
"grad_norm": 0.3996395468711853,
"learning_rate": 3.135930262501279e-06,
"loss": 1.2287384271621704,
"step": 1164
},
{
"epoch": 2.0821428571428573,
"grad_norm": 0.3018134832382202,
"learning_rate": 3.120874522494506e-06,
"loss": 1.2006416320800781,
"step": 1166
},
{
"epoch": 2.085714285714286,
"grad_norm": 0.16339807212352753,
"learning_rate": 3.105855655680986e-06,
"loss": 0.9185248017311096,
"step": 1168
},
{
"epoch": 2.0892857142857144,
"grad_norm": 0.3090437352657318,
"learning_rate": 3.090873894832628e-06,
"loss": 0.9894356727600098,
"step": 1170
},
{
"epoch": 2.092857142857143,
"grad_norm": 0.30770227313041687,
"learning_rate": 3.07592947214625e-06,
"loss": 1.0587633848190308,
"step": 1172
},
{
"epoch": 2.0964285714285715,
"grad_norm": 0.32658347487449646,
"learning_rate": 3.0610226192399767e-06,
"loss": 1.2783530950546265,
"step": 1174
},
{
"epoch": 2.1,
"grad_norm": 0.3846922218799591,
"learning_rate": 3.0461535671496537e-06,
"loss": 1.0930966138839722,
"step": 1176
},
{
"epoch": 2.1035714285714286,
"grad_norm": 0.44550713896751404,
"learning_rate": 3.0313225463252716e-06,
"loss": 1.0916811227798462,
"step": 1178
},
{
"epoch": 2.107142857142857,
"grad_norm": 0.9442609548568726,
"learning_rate": 3.0165297866273766e-06,
"loss": 1.2753980159759521,
"step": 1180
},
{
"epoch": 2.1107142857142858,
"grad_norm": 0.2832079529762268,
"learning_rate": 3.0017755173235295e-06,
"loss": 1.195408821105957,
"step": 1182
},
{
"epoch": 2.1142857142857143,
"grad_norm": 0.27624693512916565,
"learning_rate": 2.9870599670847366e-06,
"loss": 1.137044072151184,
"step": 1184
},
{
"epoch": 2.117857142857143,
"grad_norm": 0.5313391089439392,
"learning_rate": 2.972383363981917e-06,
"loss": 1.1940035820007324,
"step": 1186
},
{
"epoch": 2.1214285714285714,
"grad_norm": 1.0065633058547974,
"learning_rate": 2.9577459354823602e-06,
"loss": 1.1326301097869873,
"step": 1188
},
{
"epoch": 2.125,
"grad_norm": 0.19776014983654022,
"learning_rate": 2.9431479084462013e-06,
"loss": 1.18599534034729,
"step": 1190
},
{
"epoch": 2.1285714285714286,
"grad_norm": 0.2414723038673401,
"learning_rate": 2.9285895091229042e-06,
"loss": 1.2466977834701538,
"step": 1192
},
{
"epoch": 2.132142857142857,
"grad_norm": 0.2931707799434662,
"learning_rate": 2.9140709631477666e-06,
"loss": 1.155306339263916,
"step": 1194
},
{
"epoch": 2.1357142857142857,
"grad_norm": 0.26033467054367065,
"learning_rate": 2.8995924955384048e-06,
"loss": 1.1785553693771362,
"step": 1196
},
{
"epoch": 2.1392857142857142,
"grad_norm": 0.24594391882419586,
"learning_rate": 2.885154330691278e-06,
"loss": 1.1734336614608765,
"step": 1198
},
{
"epoch": 2.142857142857143,
"grad_norm": 0.5041958093643188,
"learning_rate": 2.8707566923782105e-06,
"loss": 1.0410226583480835,
"step": 1200
},
{
"epoch": 2.1464285714285714,
"grad_norm": 0.25237134099006653,
"learning_rate": 2.856399803742916e-06,
"loss": 1.2042694091796875,
"step": 1202
},
{
"epoch": 2.15,
"grad_norm": 0.4853833019733429,
"learning_rate": 2.8420838872975482e-06,
"loss": 1.150026559829712,
"step": 1204
},
{
"epoch": 2.1535714285714285,
"grad_norm": 0.3172329366207123,
"learning_rate": 2.8278091649192443e-06,
"loss": 1.2379705905914307,
"step": 1206
},
{
"epoch": 2.157142857142857,
"grad_norm": 0.24962536990642548,
"learning_rate": 2.81357585784669e-06,
"loss": 1.2625255584716797,
"step": 1208
},
{
"epoch": 2.1607142857142856,
"grad_norm": 0.5905876755714417,
"learning_rate": 2.799384186676696e-06,
"loss": 1.1990773677825928,
"step": 1210
},
{
"epoch": 2.164285714285714,
"grad_norm": 0.2595714032649994,
"learning_rate": 2.785234371360766e-06,
"loss": 1.0102604627609253,
"step": 1212
},
{
"epoch": 2.1678571428571427,
"grad_norm": 0.2449759989976883,
"learning_rate": 2.7711266312016986e-06,
"loss": 1.1595333814620972,
"step": 1214
},
{
"epoch": 2.1714285714285713,
"grad_norm": 0.38237428665161133,
"learning_rate": 2.757061184850183e-06,
"loss": 1.2344083786010742,
"step": 1216
},
{
"epoch": 2.175,
"grad_norm": 0.24876584112644196,
"learning_rate": 2.743038250301418e-06,
"loss": 1.124006748199463,
"step": 1218
},
{
"epoch": 2.1785714285714284,
"grad_norm": 0.34139466285705566,
"learning_rate": 2.7290580448917204e-06,
"loss": 1.090733528137207,
"step": 1220
},
{
"epoch": 2.182142857142857,
"grad_norm": 0.22050592303276062,
"learning_rate": 2.7151207852951677e-06,
"loss": 1.1178282499313354,
"step": 1222
},
{
"epoch": 2.185714285714286,
"grad_norm": 0.26262110471725464,
"learning_rate": 2.701226687520235e-06,
"loss": 1.1468334197998047,
"step": 1224
},
{
"epoch": 2.189285714285714,
"grad_norm": 0.2389093041419983,
"learning_rate": 2.6873759669064474e-06,
"loss": 1.1655080318450928,
"step": 1226
},
{
"epoch": 2.192857142857143,
"grad_norm": 0.22899575531482697,
"learning_rate": 2.673568838121045e-06,
"loss": 1.169728398323059,
"step": 1228
},
{
"epoch": 2.1964285714285716,
"grad_norm": 0.7747792601585388,
"learning_rate": 2.659805515155653e-06,
"loss": 1.0896999835968018,
"step": 1230
},
{
"epoch": 2.2,
"grad_norm": 0.35865241289138794,
"learning_rate": 2.6460862113229656e-06,
"loss": 1.0157350301742554,
"step": 1232
},
{
"epoch": 2.2035714285714287,
"grad_norm": 0.9577608108520508,
"learning_rate": 2.6324111392534423e-06,
"loss": 1.1235113143920898,
"step": 1234
},
{
"epoch": 2.2071428571428573,
"grad_norm": 0.3065534234046936,
"learning_rate": 2.6187805108920104e-06,
"loss": 1.071955680847168,
"step": 1236
},
{
"epoch": 2.210714285714286,
"grad_norm": 0.33233603835105896,
"learning_rate": 2.605194537494779e-06,
"loss": 1.3001371622085571,
"step": 1238
},
{
"epoch": 2.2142857142857144,
"grad_norm": 0.8232606649398804,
"learning_rate": 2.5916534296257655e-06,
"loss": 1.2073559761047363,
"step": 1240
},
{
"epoch": 2.217857142857143,
"grad_norm": 0.3004189431667328,
"learning_rate": 2.5781573971536387e-06,
"loss": 0.9778292179107666,
"step": 1242
},
{
"epoch": 2.2214285714285715,
"grad_norm": 0.5353025794029236,
"learning_rate": 2.5647066492484564e-06,
"loss": 1.106062889099121,
"step": 1244
},
{
"epoch": 2.225,
"grad_norm": 0.2562118172645569,
"learning_rate": 2.5513013943784236e-06,
"loss": 1.187153935432434,
"step": 1246
},
{
"epoch": 2.2285714285714286,
"grad_norm": 0.3913024067878723,
"learning_rate": 2.537941840306669e-06,
"loss": 0.8193651437759399,
"step": 1248
},
{
"epoch": 2.232142857142857,
"grad_norm": 0.29852673411369324,
"learning_rate": 2.524628194088027e-06,
"loss": 0.5965661406517029,
"step": 1250
},
{
"epoch": 2.2357142857142858,
"grad_norm": 0.2190428078174591,
"learning_rate": 2.511360662065813e-06,
"loss": 0.9129496812820435,
"step": 1252
},
{
"epoch": 2.2392857142857143,
"grad_norm": 0.3540997803211212,
"learning_rate": 2.4981394498686413e-06,
"loss": 1.138474702835083,
"step": 1254
},
{
"epoch": 2.242857142857143,
"grad_norm": 0.9036802053451538,
"learning_rate": 2.484964762407232e-06,
"loss": 1.2528407573699951,
"step": 1256
},
{
"epoch": 2.2464285714285714,
"grad_norm": 0.4152211844921112,
"learning_rate": 2.471836803871233e-06,
"loss": 1.105533480644226,
"step": 1258
},
{
"epoch": 2.25,
"grad_norm": 0.48458918929100037,
"learning_rate": 2.45875577772606e-06,
"loss": 0.9600842595100403,
"step": 1260
},
{
"epoch": 2.2535714285714286,
"grad_norm": 0.3086172044277191,
"learning_rate": 2.4457218867097396e-06,
"loss": 1.0594391822814941,
"step": 1262
},
{
"epoch": 2.257142857142857,
"grad_norm": 0.24558311700820923,
"learning_rate": 2.4327353328297673e-06,
"loss": 1.1570055484771729,
"step": 1264
},
{
"epoch": 2.2607142857142857,
"grad_norm": 1.6706045866012573,
"learning_rate": 2.419796317359983e-06,
"loss": 0.9727555513381958,
"step": 1266
},
{
"epoch": 2.2642857142857142,
"grad_norm": 0.37175774574279785,
"learning_rate": 2.4069050408374376e-06,
"loss": 1.0557781457901,
"step": 1268
},
{
"epoch": 2.267857142857143,
"grad_norm": 0.2886607050895691,
"learning_rate": 2.3940617030593e-06,
"loss": 1.1356130838394165,
"step": 1270
},
{
"epoch": 2.2714285714285714,
"grad_norm": 0.2709295451641083,
"learning_rate": 2.3812665030797512e-06,
"loss": 1.0775344371795654,
"step": 1272
},
{
"epoch": 2.275,
"grad_norm": 0.17263904213905334,
"learning_rate": 2.368519639206905e-06,
"loss": 0.9881319999694824,
"step": 1274
},
{
"epoch": 2.2785714285714285,
"grad_norm": 0.3276418149471283,
"learning_rate": 2.3558213089997303e-06,
"loss": 1.1184488534927368,
"step": 1276
},
{
"epoch": 2.282142857142857,
"grad_norm": 0.9172634482383728,
"learning_rate": 2.3431717092649892e-06,
"loss": 1.3341600894927979,
"step": 1278
},
{
"epoch": 2.2857142857142856,
"grad_norm": 0.5521453022956848,
"learning_rate": 2.3305710360541857e-06,
"loss": 1.4648536443710327,
"step": 1280
},
{
"epoch": 2.289285714285714,
"grad_norm": 0.27971673011779785,
"learning_rate": 2.3180194846605367e-06,
"loss": 1.0912892818450928,
"step": 1282
},
{
"epoch": 2.2928571428571427,
"grad_norm": 0.38737377524375916,
"learning_rate": 2.3055172496159327e-06,
"loss": 1.1721148490905762,
"step": 1284
},
{
"epoch": 2.2964285714285713,
"grad_norm": 0.32838499546051025,
"learning_rate": 2.2930645246879286e-06,
"loss": 0.8287088871002197,
"step": 1286
},
{
"epoch": 2.3,
"grad_norm": 0.9804138541221619,
"learning_rate": 2.2806615028767447e-06,
"loss": 1.3678312301635742,
"step": 1288
},
{
"epoch": 2.3035714285714284,
"grad_norm": 0.22584359347820282,
"learning_rate": 2.2683083764122626e-06,
"loss": 1.076238989830017,
"step": 1290
},
{
"epoch": 2.307142857142857,
"grad_norm": 0.3474865257740021,
"learning_rate": 2.2560053367510624e-06,
"loss": 1.0969926118850708,
"step": 1292
},
{
"epoch": 2.310714285714286,
"grad_norm": 0.5705395340919495,
"learning_rate": 2.24375257457344e-06,
"loss": 1.2809841632843018,
"step": 1294
},
{
"epoch": 2.314285714285714,
"grad_norm": 0.4103868007659912,
"learning_rate": 2.2315502797804677e-06,
"loss": 1.329990029335022,
"step": 1296
},
{
"epoch": 2.317857142857143,
"grad_norm": 0.4318333566188812,
"learning_rate": 2.2193986414910347e-06,
"loss": 0.9738024473190308,
"step": 1298
},
{
"epoch": 2.3214285714285716,
"grad_norm": 0.6811454892158508,
"learning_rate": 2.2072978480389286e-06,
"loss": 1.3944941759109497,
"step": 1300
},
{
"epoch": 2.325,
"grad_norm": 0.2388792783021927,
"learning_rate": 2.195248086969904e-06,
"loss": 0.9329886436462402,
"step": 1302
},
{
"epoch": 2.3285714285714287,
"grad_norm": 0.316201388835907,
"learning_rate": 2.1832495450387934e-06,
"loss": 1.300463080406189,
"step": 1304
},
{
"epoch": 2.3321428571428573,
"grad_norm": 1.2377450466156006,
"learning_rate": 2.1713024082065965e-06,
"loss": 1.1311689615249634,
"step": 1306
},
{
"epoch": 2.335714285714286,
"grad_norm": 0.2693905234336853,
"learning_rate": 2.1594068616376056e-06,
"loss": 1.1664714813232422,
"step": 1308
},
{
"epoch": 2.3392857142857144,
"grad_norm": 0.2321355789899826,
"learning_rate": 2.1475630896965336e-06,
"loss": 0.9228266477584839,
"step": 1310
},
{
"epoch": 2.342857142857143,
"grad_norm": 0.2707984149456024,
"learning_rate": 2.1357712759456594e-06,
"loss": 0.8861098289489746,
"step": 1312
},
{
"epoch": 2.3464285714285715,
"grad_norm": 0.41806021332740784,
"learning_rate": 2.1240316031419795e-06,
"loss": 1.2544275522232056,
"step": 1314
},
{
"epoch": 2.35,
"grad_norm": 0.232350692152977,
"learning_rate": 2.112344253234377e-06,
"loss": 1.2989314794540405,
"step": 1316
},
{
"epoch": 2.3535714285714286,
"grad_norm": 0.32811442017555237,
"learning_rate": 2.1007094073607996e-06,
"loss": 1.0640029907226562,
"step": 1318
},
{
"epoch": 2.357142857142857,
"grad_norm": 0.4184323847293854,
"learning_rate": 2.0891272458454614e-06,
"loss": 1.1912882328033447,
"step": 1320
},
{
"epoch": 2.3607142857142858,
"grad_norm": 0.4829447865486145,
"learning_rate": 2.0775979481960343e-06,
"loss": 1.1298654079437256,
"step": 1322
},
{
"epoch": 2.3642857142857143,
"grad_norm": 0.24751894176006317,
"learning_rate": 2.0661216931008717e-06,
"loss": 1.0057674646377563,
"step": 1324
},
{
"epoch": 2.367857142857143,
"grad_norm": 0.3193625807762146,
"learning_rate": 2.054698658426244e-06,
"loss": 1.3430179357528687,
"step": 1326
},
{
"epoch": 2.3714285714285714,
"grad_norm": 1.9639981985092163,
"learning_rate": 2.043329021213577e-06,
"loss": 1.1388099193572998,
"step": 1328
},
{
"epoch": 2.375,
"grad_norm": 0.4315277636051178,
"learning_rate": 2.0320129576767083e-06,
"loss": 1.1900275945663452,
"step": 1330
},
{
"epoch": 2.3785714285714286,
"grad_norm": 0.34250909090042114,
"learning_rate": 2.0207506431991556e-06,
"loss": 1.29435396194458,
"step": 1332
},
{
"epoch": 2.382142857142857,
"grad_norm": 0.6402963399887085,
"learning_rate": 2.0095422523314016e-06,
"loss": 1.1449788808822632,
"step": 1334
},
{
"epoch": 2.3857142857142857,
"grad_norm": 0.24777400493621826,
"learning_rate": 1.998387958788185e-06,
"loss": 0.9889009594917297,
"step": 1336
},
{
"epoch": 2.3892857142857142,
"grad_norm": 0.27742165327072144,
"learning_rate": 1.987287935445811e-06,
"loss": 1.13013756275177,
"step": 1338
},
{
"epoch": 2.392857142857143,
"grad_norm": 0.6482072472572327,
"learning_rate": 1.976242354339471e-06,
"loss": 1.2214878797531128,
"step": 1340
},
{
"epoch": 2.3964285714285714,
"grad_norm": 0.41687601804733276,
"learning_rate": 1.965251386660575e-06,
"loss": 1.180694818496704,
"step": 1342
},
{
"epoch": 2.4,
"grad_norm": 0.4969882071018219,
"learning_rate": 1.9543152027541003e-06,
"loss": 1.0276660919189453,
"step": 1344
},
{
"epoch": 2.4035714285714285,
"grad_norm": 0.25297048687934875,
"learning_rate": 1.9434339721159506e-06,
"loss": 1.3130789995193481,
"step": 1346
},
{
"epoch": 2.407142857142857,
"grad_norm": 0.3224523961544037,
"learning_rate": 1.932607863390329e-06,
"loss": 1.323912262916565,
"step": 1348
},
{
"epoch": 2.4107142857142856,
"grad_norm": 0.3630305528640747,
"learning_rate": 1.9218370443671232e-06,
"loss": 1.420185923576355,
"step": 1350
},
{
"epoch": 2.414285714285714,
"grad_norm": 0.33518993854522705,
"learning_rate": 1.91112168197931e-06,
"loss": 1.2631648778915405,
"step": 1352
},
{
"epoch": 2.4178571428571427,
"grad_norm": 0.2684813439846039,
"learning_rate": 1.900461942300359e-06,
"loss": 1.2116239070892334,
"step": 1354
},
{
"epoch": 2.4214285714285713,
"grad_norm": 1.0438412427902222,
"learning_rate": 1.8898579905416678e-06,
"loss": 0.9407988786697388,
"step": 1356
},
{
"epoch": 2.425,
"grad_norm": 0.2759835124015808,
"learning_rate": 1.8793099910499926e-06,
"loss": 1.089248776435852,
"step": 1358
},
{
"epoch": 2.4285714285714284,
"grad_norm": 0.2118200808763504,
"learning_rate": 1.8688181073049125e-06,
"loss": 0.9922888278961182,
"step": 1360
},
{
"epoch": 2.432142857142857,
"grad_norm": 0.29328909516334534,
"learning_rate": 1.8583825019162843e-06,
"loss": 1.0572453737258911,
"step": 1362
},
{
"epoch": 2.435714285714286,
"grad_norm": 0.34180185198783875,
"learning_rate": 1.848003336621729e-06,
"loss": 1.2007834911346436,
"step": 1364
},
{
"epoch": 2.439285714285714,
"grad_norm": 2.949885368347168,
"learning_rate": 1.8376807722841231e-06,
"loss": 1.2154308557510376,
"step": 1366
},
{
"epoch": 2.442857142857143,
"grad_norm": 0.29990777373313904,
"learning_rate": 1.8274149688891057e-06,
"loss": 1.2820924520492554,
"step": 1368
},
{
"epoch": 2.4464285714285716,
"grad_norm": 0.2850666046142578,
"learning_rate": 1.8172060855425986e-06,
"loss": 1.3318397998809814,
"step": 1370
},
{
"epoch": 2.45,
"grad_norm": 0.4406229555606842,
"learning_rate": 1.8070542804683406e-06,
"loss": 1.490922212600708,
"step": 1372
},
{
"epoch": 2.4535714285714287,
"grad_norm": 0.5301911234855652,
"learning_rate": 1.7969597110054343e-06,
"loss": 1.04641854763031,
"step": 1374
},
{
"epoch": 2.4571428571428573,
"grad_norm": 0.4790363013744354,
"learning_rate": 1.7869225336059133e-06,
"loss": 1.2003765106201172,
"step": 1376
},
{
"epoch": 2.460714285714286,
"grad_norm": 0.3002559542655945,
"learning_rate": 1.7769429038323058e-06,
"loss": 1.1743593215942383,
"step": 1378
},
{
"epoch": 2.4642857142857144,
"grad_norm": 0.47378110885620117,
"learning_rate": 1.7670209763552342e-06,
"loss": 1.0753716230392456,
"step": 1380
},
{
"epoch": 2.467857142857143,
"grad_norm": 0.4303780198097229,
"learning_rate": 1.757156904951014e-06,
"loss": 1.195298194885254,
"step": 1382
},
{
"epoch": 2.4714285714285715,
"grad_norm": 0.40849828720092773,
"learning_rate": 1.747350842499271e-06,
"loss": 1.0725401639938354,
"step": 1384
},
{
"epoch": 2.475,
"grad_norm": 0.4191647469997406,
"learning_rate": 1.7376029409805708e-06,
"loss": 1.2902517318725586,
"step": 1386
},
{
"epoch": 2.4785714285714286,
"grad_norm": 0.5962879657745361,
"learning_rate": 1.7279133514740645e-06,
"loss": 1.2889909744262695,
"step": 1388
},
{
"epoch": 2.482142857142857,
"grad_norm": 0.2635829448699951,
"learning_rate": 1.7182822241551434e-06,
"loss": 0.9972074627876282,
"step": 1390
},
{
"epoch": 2.4857142857142858,
"grad_norm": 0.27476590871810913,
"learning_rate": 1.708709708293121e-06,
"loss": 1.0351589918136597,
"step": 1392
},
{
"epoch": 2.4892857142857143,
"grad_norm": 0.3098399341106415,
"learning_rate": 1.6991959522489082e-06,
"loss": 1.030190110206604,
"step": 1394
},
{
"epoch": 2.492857142857143,
"grad_norm": 0.37093329429626465,
"learning_rate": 1.6897411034727217e-06,
"loss": 1.3557082414627075,
"step": 1396
},
{
"epoch": 2.4964285714285714,
"grad_norm": 0.4083240032196045,
"learning_rate": 1.680345308501795e-06,
"loss": 1.0274466276168823,
"step": 1398
},
{
"epoch": 2.5,
"grad_norm": 0.34320634603500366,
"learning_rate": 1.6710087129581086e-06,
"loss": 0.9457365274429321,
"step": 1400
},
{
"epoch": 2.5035714285714286,
"grad_norm": 0.5619872808456421,
"learning_rate": 1.6617314615461325e-06,
"loss": 1.3013941049575806,
"step": 1402
},
{
"epoch": 2.507142857142857,
"grad_norm": 0.9764664769172668,
"learning_rate": 1.6525136980505835e-06,
"loss": 1.4310553073883057,
"step": 1404
},
{
"epoch": 2.5107142857142857,
"grad_norm": 0.548743724822998,
"learning_rate": 1.6433555653341976e-06,
"loss": 1.255396842956543,
"step": 1406
},
{
"epoch": 2.5142857142857142,
"grad_norm": 0.8014435172080994,
"learning_rate": 1.6342572053355166e-06,
"loss": 0.830237865447998,
"step": 1408
},
{
"epoch": 2.517857142857143,
"grad_norm": 0.21949461102485657,
"learning_rate": 1.625218759066685e-06,
"loss": 0.7343713641166687,
"step": 1410
},
{
"epoch": 2.5214285714285714,
"grad_norm": 0.6966763734817505,
"learning_rate": 1.6162403666112653e-06,
"loss": 1.1919779777526855,
"step": 1412
},
{
"epoch": 2.525,
"grad_norm": 0.30908581614494324,
"learning_rate": 1.6073221671220692e-06,
"loss": 0.9375178813934326,
"step": 1414
},
{
"epoch": 2.5285714285714285,
"grad_norm": 0.34836652874946594,
"learning_rate": 1.5984642988190022e-06,
"loss": 0.8665962219238281,
"step": 1416
},
{
"epoch": 2.532142857142857,
"grad_norm": 0.18187429010868073,
"learning_rate": 1.5896668989869151e-06,
"loss": 0.9749317765235901,
"step": 1418
},
{
"epoch": 2.5357142857142856,
"grad_norm": 0.2711097002029419,
"learning_rate": 1.5809301039734814e-06,
"loss": 1.1920053958892822,
"step": 1420
},
{
"epoch": 2.539285714285714,
"grad_norm": 0.35151663422584534,
"learning_rate": 1.5722540491870838e-06,
"loss": 1.1063796281814575,
"step": 1422
},
{
"epoch": 2.5428571428571427,
"grad_norm": 0.46157142519950867,
"learning_rate": 1.5636388690947125e-06,
"loss": 0.9042350649833679,
"step": 1424
},
{
"epoch": 2.5464285714285713,
"grad_norm": 0.44619572162628174,
"learning_rate": 1.5550846972198851e-06,
"loss": 1.1896483898162842,
"step": 1426
},
{
"epoch": 2.55,
"grad_norm": 0.5084243416786194,
"learning_rate": 1.5465916661405734e-06,
"loss": 1.0787028074264526,
"step": 1428
},
{
"epoch": 2.553571428571429,
"grad_norm": 0.2909405529499054,
"learning_rate": 1.5381599074871512e-06,
"loss": 1.1317380666732788,
"step": 1430
},
{
"epoch": 2.557142857142857,
"grad_norm": 0.7613154053688049,
"learning_rate": 1.5297895519403563e-06,
"loss": 1.3027656078338623,
"step": 1432
},
{
"epoch": 2.560714285714286,
"grad_norm": 0.38280853629112244,
"learning_rate": 1.5214807292292567e-06,
"loss": 0.8128288984298706,
"step": 1434
},
{
"epoch": 2.564285714285714,
"grad_norm": 0.33587777614593506,
"learning_rate": 1.5132335681292492e-06,
"loss": 1.4057202339172363,
"step": 1436
},
{
"epoch": 2.567857142857143,
"grad_norm": 0.4974580407142639,
"learning_rate": 1.5050481964600582e-06,
"loss": 1.2144535779953003,
"step": 1438
},
{
"epoch": 2.571428571428571,
"grad_norm": 0.21717508137226105,
"learning_rate": 1.496924741083759e-06,
"loss": 0.9632461667060852,
"step": 1440
},
{
"epoch": 2.575,
"grad_norm": 0.18900008499622345,
"learning_rate": 1.4888633279028068e-06,
"loss": 1.021627426147461,
"step": 1442
},
{
"epoch": 2.5785714285714287,
"grad_norm": 0.41346102952957153,
"learning_rate": 1.4808640818580885e-06,
"loss": 1.0733561515808105,
"step": 1444
},
{
"epoch": 2.5821428571428573,
"grad_norm": 0.3450411558151245,
"learning_rate": 1.4729271269269823e-06,
"loss": 1.0130958557128906,
"step": 1446
},
{
"epoch": 2.585714285714286,
"grad_norm": 0.4527641832828522,
"learning_rate": 1.4650525861214454e-06,
"loss": 0.9112399220466614,
"step": 1448
},
{
"epoch": 2.5892857142857144,
"grad_norm": 0.43975669145584106,
"learning_rate": 1.4572405814860954e-06,
"loss": 1.0099694728851318,
"step": 1450
},
{
"epoch": 2.592857142857143,
"grad_norm": 2.6724021434783936,
"learning_rate": 1.4494912340963286e-06,
"loss": 0.9879626035690308,
"step": 1452
},
{
"epoch": 2.5964285714285715,
"grad_norm": 0.33726853132247925,
"learning_rate": 1.441804664056437e-06,
"loss": 1.3339985609054565,
"step": 1454
},
{
"epoch": 2.6,
"grad_norm": 0.5543254017829895,
"learning_rate": 1.4341809904977511e-06,
"loss": 1.0636701583862305,
"step": 1456
},
{
"epoch": 2.6035714285714286,
"grad_norm": 0.35016801953315735,
"learning_rate": 1.4266203315767917e-06,
"loss": 1.2073761224746704,
"step": 1458
},
{
"epoch": 2.607142857142857,
"grad_norm": 0.37314754724502563,
"learning_rate": 1.4191228044734387e-06,
"loss": 1.067349910736084,
"step": 1460
},
{
"epoch": 2.6107142857142858,
"grad_norm": 0.27696406841278076,
"learning_rate": 1.4116885253891142e-06,
"loss": 1.1596084833145142,
"step": 1462
},
{
"epoch": 2.6142857142857143,
"grad_norm": 0.23734059929847717,
"learning_rate": 1.4043176095449843e-06,
"loss": 1.130849003791809,
"step": 1464
},
{
"epoch": 2.617857142857143,
"grad_norm": 0.451869934797287,
"learning_rate": 1.3970101711801712e-06,
"loss": 1.1519298553466797,
"step": 1466
},
{
"epoch": 2.6214285714285714,
"grad_norm": 0.367313027381897,
"learning_rate": 1.3897663235499797e-06,
"loss": 1.081532597541809,
"step": 1468
},
{
"epoch": 2.625,
"grad_norm": 1.2766571044921875,
"learning_rate": 1.382586178924149e-06,
"loss": 0.9227726459503174,
"step": 1470
},
{
"epoch": 2.6285714285714286,
"grad_norm": 1.6380170583724976,
"learning_rate": 1.3754698485851074e-06,
"loss": 1.3057407140731812,
"step": 1472
},
{
"epoch": 2.632142857142857,
"grad_norm": 0.3816126585006714,
"learning_rate": 1.368417442826249e-06,
"loss": 1.1892451047897339,
"step": 1474
},
{
"epoch": 2.6357142857142857,
"grad_norm": 0.3007228672504425,
"learning_rate": 1.3614290709502242e-06,
"loss": 1.2595423460006714,
"step": 1476
},
{
"epoch": 2.6392857142857142,
"grad_norm": 0.2307678908109665,
"learning_rate": 1.3545048412672459e-06,
"loss": 1.10439932346344,
"step": 1478
},
{
"epoch": 2.642857142857143,
"grad_norm": 0.34183934330940247,
"learning_rate": 1.3476448610934104e-06,
"loss": 1.1247930526733398,
"step": 1480
},
{
"epoch": 2.6464285714285714,
"grad_norm": 0.50603187084198,
"learning_rate": 1.3408492367490344e-06,
"loss": 1.308542013168335,
"step": 1482
},
{
"epoch": 2.65,
"grad_norm": 0.5772185921669006,
"learning_rate": 1.3341180735570081e-06,
"loss": 1.086531639099121,
"step": 1484
},
{
"epoch": 2.6535714285714285,
"grad_norm": 0.2957296073436737,
"learning_rate": 1.3274514758411595e-06,
"loss": 0.9083548784255981,
"step": 1486
},
{
"epoch": 2.657142857142857,
"grad_norm": 0.2126568704843521,
"learning_rate": 1.3208495469246445e-06,
"loss": 1.0338191986083984,
"step": 1488
},
{
"epoch": 2.6607142857142856,
"grad_norm": 0.23187443614006042,
"learning_rate": 1.3143123891283354e-06,
"loss": 1.1434146165847778,
"step": 1490
},
{
"epoch": 2.664285714285714,
"grad_norm": 0.2083001434803009,
"learning_rate": 1.3078401037692451e-06,
"loss": 1.148645281791687,
"step": 1492
},
{
"epoch": 2.6678571428571427,
"grad_norm": 0.24332857131958008,
"learning_rate": 1.3014327911589495e-06,
"loss": 1.0858982801437378,
"step": 1494
},
{
"epoch": 2.6714285714285713,
"grad_norm": 0.44840723276138306,
"learning_rate": 1.2950905506020383e-06,
"loss": 0.8910313844680786,
"step": 1496
},
{
"epoch": 2.675,
"grad_norm": 0.6759834885597229,
"learning_rate": 1.2888134803945713e-06,
"loss": 1.0723787546157837,
"step": 1498
},
{
"epoch": 2.678571428571429,
"grad_norm": 0.3571532964706421,
"learning_rate": 1.2826016778225578e-06,
"loss": 1.1453263759613037,
"step": 1500
},
{
"epoch": 2.682142857142857,
"grad_norm": 0.3260257840156555,
"learning_rate": 1.2764552391604468e-06,
"loss": 1.1897282600402832,
"step": 1502
},
{
"epoch": 2.685714285714286,
"grad_norm": 0.21461273729801178,
"learning_rate": 1.2703742596696383e-06,
"loss": 1.114097237586975,
"step": 1504
},
{
"epoch": 2.689285714285714,
"grad_norm": 0.39265140891075134,
"learning_rate": 1.2643588335970021e-06,
"loss": 1.2430890798568726,
"step": 1506
},
{
"epoch": 2.692857142857143,
"grad_norm": 0.26661592721939087,
"learning_rate": 1.2584090541734216e-06,
"loss": 1.2044790983200073,
"step": 1508
},
{
"epoch": 2.696428571428571,
"grad_norm": 0.4279651641845703,
"learning_rate": 1.252525013612346e-06,
"loss": 1.1148457527160645,
"step": 1510
},
{
"epoch": 2.7,
"grad_norm": 0.26563382148742676,
"learning_rate": 1.2467068031083623e-06,
"loss": 1.151499629020691,
"step": 1512
},
{
"epoch": 2.7035714285714287,
"grad_norm": 0.28036361932754517,
"learning_rate": 1.2409545128357806e-06,
"loss": 1.112971544265747,
"step": 1514
},
{
"epoch": 2.7071428571428573,
"grad_norm": 0.3321837782859802,
"learning_rate": 1.235268231947238e-06,
"loss": 0.9679718613624573,
"step": 1516
},
{
"epoch": 2.710714285714286,
"grad_norm": 0.7659473419189453,
"learning_rate": 1.229648048572317e-06,
"loss": 1.18712317943573,
"step": 1518
},
{
"epoch": 2.7142857142857144,
"grad_norm": 0.5400887131690979,
"learning_rate": 1.2240940498161797e-06,
"loss": 1.0840147733688354,
"step": 1520
},
{
"epoch": 2.717857142857143,
"grad_norm": 0.3426344096660614,
"learning_rate": 1.2186063217582144e-06,
"loss": 1.1307204961776733,
"step": 1522
},
{
"epoch": 2.7214285714285715,
"grad_norm": 0.39970487356185913,
"learning_rate": 1.213184949450706e-06,
"loss": 1.1921186447143555,
"step": 1524
},
{
"epoch": 2.725,
"grad_norm": 0.31394848227500916,
"learning_rate": 1.2078300169175158e-06,
"loss": 1.1872678995132446,
"step": 1526
},
{
"epoch": 2.7285714285714286,
"grad_norm": 0.7688894271850586,
"learning_rate": 1.20254160715278e-06,
"loss": 1.1403369903564453,
"step": 1528
},
{
"epoch": 2.732142857142857,
"grad_norm": 0.3478771448135376,
"learning_rate": 1.1973198021196207e-06,
"loss": 1.0353933572769165,
"step": 1530
},
{
"epoch": 2.7357142857142858,
"grad_norm": 1.663916826248169,
"learning_rate": 1.1921646827488807e-06,
"loss": 1.1801190376281738,
"step": 1532
},
{
"epoch": 2.7392857142857143,
"grad_norm": 0.9486533999443054,
"learning_rate": 1.187076328937863e-06,
"loss": 1.118172287940979,
"step": 1534
},
{
"epoch": 2.742857142857143,
"grad_norm": 0.3661729693412781,
"learning_rate": 1.182054819549098e-06,
"loss": 1.166612982749939,
"step": 1536
},
{
"epoch": 2.7464285714285714,
"grad_norm": 0.273942768573761,
"learning_rate": 1.1771002324091183e-06,
"loss": 1.219356656074524,
"step": 1538
},
{
"epoch": 2.75,
"grad_norm": 0.2943507730960846,
"learning_rate": 1.172212644307252e-06,
"loss": 1.2092581987380981,
"step": 1540
},
{
"epoch": 2.7535714285714286,
"grad_norm": 0.22103095054626465,
"learning_rate": 1.1673921309944356e-06,
"loss": 1.1635977029800415,
"step": 1542
},
{
"epoch": 2.757142857142857,
"grad_norm": 0.27992480993270874,
"learning_rate": 1.1626387671820363e-06,
"loss": 1.1578980684280396,
"step": 1544
},
{
"epoch": 2.7607142857142857,
"grad_norm": 0.1873656064271927,
"learning_rate": 1.1579526265406972e-06,
"loss": 1.1813486814498901,
"step": 1546
},
{
"epoch": 2.7642857142857142,
"grad_norm": 0.3528795838356018,
"learning_rate": 1.1533337816991932e-06,
"loss": 1.1933683156967163,
"step": 1548
},
{
"epoch": 2.767857142857143,
"grad_norm": 0.31167811155319214,
"learning_rate": 1.1487823042433063e-06,
"loss": 1.1475173234939575,
"step": 1550
},
{
"epoch": 2.7714285714285714,
"grad_norm": 1.7408783435821533,
"learning_rate": 1.1442982647147167e-06,
"loss": 1.148131251335144,
"step": 1552
},
{
"epoch": 2.775,
"grad_norm": 0.3031138777732849,
"learning_rate": 1.1398817326099094e-06,
"loss": 1.0997506380081177,
"step": 1554
},
{
"epoch": 2.7785714285714285,
"grad_norm": 0.21349631249904633,
"learning_rate": 1.1355327763790943e-06,
"loss": 1.1433438062667847,
"step": 1556
},
{
"epoch": 2.782142857142857,
"grad_norm": 0.16756878793239594,
"learning_rate": 1.1312514634251492e-06,
"loss": 1.0694825649261475,
"step": 1558
},
{
"epoch": 2.7857142857142856,
"grad_norm": 0.19285623729228973,
"learning_rate": 1.127037860102575e-06,
"loss": 1.1415499448776245,
"step": 1560
},
{
"epoch": 2.789285714285714,
"grad_norm": 0.3282257616519928,
"learning_rate": 1.1228920317164625e-06,
"loss": 1.1128462553024292,
"step": 1562
},
{
"epoch": 2.7928571428571427,
"grad_norm": 0.20754434168338776,
"learning_rate": 1.118814042521486e-06,
"loss": 1.1504778861999512,
"step": 1564
},
{
"epoch": 2.7964285714285713,
"grad_norm": 0.22546795010566711,
"learning_rate": 1.1148039557209057e-06,
"loss": 1.1107934713363647,
"step": 1566
},
{
"epoch": 2.8,
"grad_norm": 0.16394157707691193,
"learning_rate": 1.1108618334655843e-06,
"loss": 1.0830016136169434,
"step": 1568
},
{
"epoch": 2.803571428571429,
"grad_norm": 0.1953999102115631,
"learning_rate": 1.1069877368530303e-06,
"loss": 1.16024649143219,
"step": 1570
},
{
"epoch": 2.807142857142857,
"grad_norm": 0.211993008852005,
"learning_rate": 1.1031817259264454e-06,
"loss": 1.1383813619613647,
"step": 1572
},
{
"epoch": 2.810714285714286,
"grad_norm": 0.1844896823167801,
"learning_rate": 1.0994438596737971e-06,
"loss": 1.0519864559173584,
"step": 1574
},
{
"epoch": 2.814285714285714,
"grad_norm": 0.4553788900375366,
"learning_rate": 1.0957741960269049e-06,
"loss": 1.1024482250213623,
"step": 1576
},
{
"epoch": 2.817857142857143,
"grad_norm": 0.2758769989013672,
"learning_rate": 1.092172791860539e-06,
"loss": 1.0607486963272095,
"step": 1578
},
{
"epoch": 2.821428571428571,
"grad_norm": 0.28464648127555847,
"learning_rate": 1.0886397029915415e-06,
"loss": 1.0878740549087524,
"step": 1580
},
{
"epoch": 2.825,
"grad_norm": 0.2519758641719818,
"learning_rate": 1.0851749841779609e-06,
"loss": 1.0692694187164307,
"step": 1582
},
{
"epoch": 2.8285714285714287,
"grad_norm": 0.20021863281726837,
"learning_rate": 1.0817786891182041e-06,
"loss": 1.0892566442489624,
"step": 1584
},
{
"epoch": 2.8321428571428573,
"grad_norm": 0.21085211634635925,
"learning_rate": 1.0784508704502029e-06,
"loss": 1.0911756753921509,
"step": 1586
},
{
"epoch": 2.835714285714286,
"grad_norm": 0.2599065899848938,
"learning_rate": 1.0751915797505986e-06,
"loss": 1.0842504501342773,
"step": 1588
},
{
"epoch": 2.8392857142857144,
"grad_norm": 0.23683688044548035,
"learning_rate": 1.0720008675339403e-06,
"loss": 1.0852082967758179,
"step": 1590
},
{
"epoch": 2.842857142857143,
"grad_norm": 0.19538818299770355,
"learning_rate": 1.0688787832519085e-06,
"loss": 1.1298590898513794,
"step": 1592
},
{
"epoch": 2.8464285714285715,
"grad_norm": 0.5865882039070129,
"learning_rate": 1.0658253752925417e-06,
"loss": 1.122971773147583,
"step": 1594
},
{
"epoch": 2.85,
"grad_norm": 0.3269581198692322,
"learning_rate": 1.062840690979491e-06,
"loss": 1.109829068183899,
"step": 1596
},
{
"epoch": 2.8535714285714286,
"grad_norm": 0.5810469388961792,
"learning_rate": 1.0599247765712832e-06,
"loss": 1.1492294073104858,
"step": 1598
},
{
"epoch": 2.857142857142857,
"grad_norm": 0.2330639660358429,
"learning_rate": 1.0570776772606056e-06,
"loss": 1.123344898223877,
"step": 1600
},
{
"epoch": 2.8607142857142858,
"grad_norm": 0.2107606828212738,
"learning_rate": 1.0542994371736076e-06,
"loss": 1.0889390707015991,
"step": 1602
},
{
"epoch": 2.8642857142857143,
"grad_norm": 0.2753591239452362,
"learning_rate": 1.0515900993692128e-06,
"loss": 1.1300913095474243,
"step": 1604
},
{
"epoch": 2.867857142857143,
"grad_norm": 0.27015575766563416,
"learning_rate": 1.048949705838454e-06,
"loss": 1.0982666015625,
"step": 1606
},
{
"epoch": 2.8714285714285714,
"grad_norm": 0.1620846688747406,
"learning_rate": 1.0463782975038226e-06,
"loss": 1.1166629791259766,
"step": 1608
},
{
"epoch": 2.875,
"grad_norm": 0.21408753097057343,
"learning_rate": 1.0438759142186336e-06,
"loss": 1.127457857131958,
"step": 1610
},
{
"epoch": 2.8785714285714286,
"grad_norm": 0.26070085167884827,
"learning_rate": 1.0414425947664075e-06,
"loss": 1.1438779830932617,
"step": 1612
},
{
"epoch": 2.882142857142857,
"grad_norm": 0.1973988115787506,
"learning_rate": 1.0390783768602694e-06,
"loss": 1.1256788969039917,
"step": 1614
},
{
"epoch": 2.8857142857142857,
"grad_norm": 0.1865663081407547,
"learning_rate": 1.0367832971423664e-06,
"loss": 1.0647690296173096,
"step": 1616
},
{
"epoch": 2.8892857142857142,
"grad_norm": 0.40141281485557556,
"learning_rate": 1.0345573911832976e-06,
"loss": 1.0978182554244995,
"step": 1618
},
{
"epoch": 2.892857142857143,
"grad_norm": 0.19470001757144928,
"learning_rate": 1.0324006934815623e-06,
"loss": 1.1264913082122803,
"step": 1620
},
{
"epoch": 2.8964285714285714,
"grad_norm": 0.1923714429140091,
"learning_rate": 1.0303132374630276e-06,
"loss": 1.1599576473236084,
"step": 1622
},
{
"epoch": 2.9,
"grad_norm": 0.2873956561088562,
"learning_rate": 1.0282950554804084e-06,
"loss": 1.1344720125198364,
"step": 1624
},
{
"epoch": 2.9035714285714285,
"grad_norm": 0.2792896330356598,
"learning_rate": 1.0263461788127682e-06,
"loss": 1.1077191829681396,
"step": 1626
},
{
"epoch": 2.907142857142857,
"grad_norm": 0.17874673008918762,
"learning_rate": 1.0244666376650307e-06,
"loss": 1.0769405364990234,
"step": 1628
},
{
"epoch": 2.9107142857142856,
"grad_norm": 0.23230457305908203,
"learning_rate": 1.0226564611675146e-06,
"loss": 1.1149848699569702,
"step": 1630
},
{
"epoch": 2.914285714285714,
"grad_norm": 0.2538415789604187,
"learning_rate": 1.020915677375483e-06,
"loss": 1.1285921335220337,
"step": 1632
},
{
"epoch": 2.9178571428571427,
"grad_norm": 0.18281330168247223,
"learning_rate": 1.0192443132687039e-06,
"loss": 1.0885471105575562,
"step": 1634
},
{
"epoch": 2.9214285714285713,
"grad_norm": 0.27069422602653503,
"learning_rate": 1.0176423947510377e-06,
"loss": 1.1098750829696655,
"step": 1636
},
{
"epoch": 2.925,
"grad_norm": 0.24785873293876648,
"learning_rate": 1.016109946650032e-06,
"loss": 1.1053394079208374,
"step": 1638
},
{
"epoch": 2.928571428571429,
"grad_norm": 0.2786495089530945,
"learning_rate": 1.014646992716537e-06,
"loss": 1.1500390768051147,
"step": 1640
},
{
"epoch": 2.932142857142857,
"grad_norm": 0.3538748621940613,
"learning_rate": 1.01325355562434e-06,
"loss": 1.1664944887161255,
"step": 1642
},
{
"epoch": 2.935714285714286,
"grad_norm": 0.3729296326637268,
"learning_rate": 1.0119296569698112e-06,
"loss": 1.1281384229660034,
"step": 1644
},
{
"epoch": 2.939285714285714,
"grad_norm": 0.21035878360271454,
"learning_rate": 1.01067531727157e-06,
"loss": 1.1451420783996582,
"step": 1646
},
{
"epoch": 2.942857142857143,
"grad_norm": 0.3253045380115509,
"learning_rate": 1.0094905559701678e-06,
"loss": 1.1268796920776367,
"step": 1648
},
{
"epoch": 2.946428571428571,
"grad_norm": 0.20938168466091156,
"learning_rate": 1.0083753914277859e-06,
"loss": 1.0814552307128906,
"step": 1650
},
{
"epoch": 2.95,
"grad_norm": 0.24861246347427368,
"learning_rate": 1.007329840927949e-06,
"loss": 1.1016547679901123,
"step": 1652
},
{
"epoch": 2.9535714285714287,
"grad_norm": 0.26715606451034546,
"learning_rate": 1.006353920675263e-06,
"loss": 1.1287412643432617,
"step": 1654
},
{
"epoch": 2.9571428571428573,
"grad_norm": 0.20948819816112518,
"learning_rate": 1.0054476457951567e-06,
"loss": 1.11174476146698,
"step": 1656
},
{
"epoch": 2.960714285714286,
"grad_norm": 0.5076990127563477,
"learning_rate": 1.0046110303336519e-06,
"loss": 1.112143874168396,
"step": 1658
},
{
"epoch": 2.9642857142857144,
"grad_norm": 0.5603309273719788,
"learning_rate": 1.0038440872571456e-06,
"loss": 1.1545910835266113,
"step": 1660
},
{
"epoch": 2.967857142857143,
"grad_norm": 0.23968827724456787,
"learning_rate": 1.0031468284522063e-06,
"loss": 1.1435242891311646,
"step": 1662
},
{
"epoch": 2.9714285714285715,
"grad_norm": 0.26473504304885864,
"learning_rate": 1.0025192647253939e-06,
"loss": 1.1580908298492432,
"step": 1664
},
{
"epoch": 2.975,
"grad_norm": 0.6800065636634827,
"learning_rate": 1.0019614058030874e-06,
"loss": 1.1012563705444336,
"step": 1666
},
{
"epoch": 2.9785714285714286,
"grad_norm": 0.23044763505458832,
"learning_rate": 1.0014732603313375e-06,
"loss": 1.1186460256576538,
"step": 1668
},
{
"epoch": 2.982142857142857,
"grad_norm": 0.21679583191871643,
"learning_rate": 1.0010548358757327e-06,
"loss": 1.1382079124450684,
"step": 1670
},
{
"epoch": 2.9857142857142858,
"grad_norm": 0.4521788954734802,
"learning_rate": 1.0007061389212794e-06,
"loss": 1.182320475578308,
"step": 1672
},
{
"epoch": 2.9892857142857143,
"grad_norm": 0.24779334664344788,
"learning_rate": 1.0004271748723043e-06,
"loss": 1.2086482048034668,
"step": 1674
},
{
"epoch": 2.992857142857143,
"grad_norm": 0.5126925706863403,
"learning_rate": 1.0002179480523687e-06,
"loss": 0.834091067314148,
"step": 1676
},
{
"epoch": 2.9964285714285714,
"grad_norm": 0.3477499783039093,
"learning_rate": 1.0000784617042023e-06,
"loss": 0.722780168056488,
"step": 1678
},
{
"epoch": 3.0,
"grad_norm": 0.47854718565940857,
"learning_rate": 1.0000087179896533e-06,
"loss": 0.7972838282585144,
"step": 1680
},
{
"epoch": 3.0,
"step": 1680,
"total_flos": 2.510120369642275e+18,
"train_loss": 1.2744095386493774,
"train_runtime": 14979.881,
"train_samples_per_second": 1.794,
"train_steps_per_second": 0.112
}
],
"logging_steps": 2,
"max_steps": 1680,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 9999999,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.510120369642275e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}