9b-23 / trainer_state.json
furproxy's picture
Upload folder using huggingface_hub
99c7079 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 1680,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0035714285714285713,
"grad_norm": 0.35824885964393616,
"learning_rate": 1.1904761904761904e-12,
"loss": 1.8791723251342773,
"step": 2
},
{
"epoch": 0.007142857142857143,
"grad_norm": 1.133953332901001,
"learning_rate": 3.5714285714285712e-12,
"loss": 1.8177907466888428,
"step": 4
},
{
"epoch": 0.010714285714285714,
"grad_norm": 0.31963008642196655,
"learning_rate": 5.952380952380952e-12,
"loss": 1.8180923461914062,
"step": 6
},
{
"epoch": 0.014285714285714285,
"grad_norm": 0.38317519426345825,
"learning_rate": 8.333333333333334e-12,
"loss": 1.890156626701355,
"step": 8
},
{
"epoch": 0.017857142857142856,
"grad_norm": 0.22351811826229095,
"learning_rate": 1.0714285714285714e-11,
"loss": 1.8039294481277466,
"step": 10
},
{
"epoch": 0.02142857142857143,
"grad_norm": 0.2431482970714569,
"learning_rate": 1.3095238095238097e-11,
"loss": 1.7611536979675293,
"step": 12
},
{
"epoch": 0.025,
"grad_norm": 0.6136751770973206,
"learning_rate": 1.5476190476190478e-11,
"loss": 1.803197979927063,
"step": 14
},
{
"epoch": 0.02857142857142857,
"grad_norm": 0.2537895441055298,
"learning_rate": 1.7857142857142857e-11,
"loss": 1.7924619913101196,
"step": 16
},
{
"epoch": 0.03214285714285714,
"grad_norm": 0.7044904232025146,
"learning_rate": 2.023809523809524e-11,
"loss": 1.8655799627304077,
"step": 18
},
{
"epoch": 0.03571428571428571,
"grad_norm": 0.25141608715057373,
"learning_rate": 2.261904761904762e-11,
"loss": 1.7573883533477783,
"step": 20
},
{
"epoch": 0.039285714285714285,
"grad_norm": 0.9878725409507751,
"learning_rate": 2.5e-11,
"loss": 1.7613331079483032,
"step": 22
},
{
"epoch": 0.04285714285714286,
"grad_norm": 0.6218051910400391,
"learning_rate": 2.7380952380952383e-11,
"loss": 2.009953498840332,
"step": 24
},
{
"epoch": 0.04642857142857143,
"grad_norm": 0.3609226644039154,
"learning_rate": 2.976190476190476e-11,
"loss": 1.8607038259506226,
"step": 26
},
{
"epoch": 0.05,
"grad_norm": 0.24035774171352386,
"learning_rate": 3.214285714285715e-11,
"loss": 1.5517598390579224,
"step": 28
},
{
"epoch": 0.05357142857142857,
"grad_norm": 0.2459728866815567,
"learning_rate": 3.452380952380953e-11,
"loss": 1.8139374256134033,
"step": 30
},
{
"epoch": 0.05714285714285714,
"grad_norm": 0.25730156898498535,
"learning_rate": 3.6904761904761907e-11,
"loss": 1.8025331497192383,
"step": 32
},
{
"epoch": 0.060714285714285714,
"grad_norm": 0.353179395198822,
"learning_rate": 3.9285714285714286e-11,
"loss": 1.8871498107910156,
"step": 34
},
{
"epoch": 0.06428571428571428,
"grad_norm": 0.3843620717525482,
"learning_rate": 4.166666666666667e-11,
"loss": 1.9136126041412354,
"step": 36
},
{
"epoch": 0.06785714285714285,
"grad_norm": 0.62681645154953,
"learning_rate": 4.404761904761905e-11,
"loss": 2.016162157058716,
"step": 38
},
{
"epoch": 0.07142857142857142,
"grad_norm": 1.0013960599899292,
"learning_rate": 4.642857142857143e-11,
"loss": 1.8622362613677979,
"step": 40
},
{
"epoch": 0.075,
"grad_norm": 0.3581591248512268,
"learning_rate": 4.880952380952381e-11,
"loss": 1.8666965961456299,
"step": 42
},
{
"epoch": 0.07857142857142857,
"grad_norm": 0.5053503513336182,
"learning_rate": 5.119047619047619e-11,
"loss": 1.8547173738479614,
"step": 44
},
{
"epoch": 0.08214285714285714,
"grad_norm": 0.3650856614112854,
"learning_rate": 5.3571428571428574e-11,
"loss": 1.8626824617385864,
"step": 46
},
{
"epoch": 0.08571428571428572,
"grad_norm": 0.26654282212257385,
"learning_rate": 5.595238095238095e-11,
"loss": 1.6219651699066162,
"step": 48
},
{
"epoch": 0.08928571428571429,
"grad_norm": 0.3208557963371277,
"learning_rate": 5.833333333333334e-11,
"loss": 1.762930154800415,
"step": 50
},
{
"epoch": 0.09285714285714286,
"grad_norm": 0.2823070287704468,
"learning_rate": 6.071428571428571e-11,
"loss": 1.720660924911499,
"step": 52
},
{
"epoch": 0.09642857142857143,
"grad_norm": 0.3506131172180176,
"learning_rate": 6.30952380952381e-11,
"loss": 1.836098074913025,
"step": 54
},
{
"epoch": 0.1,
"grad_norm": 0.41053706407546997,
"learning_rate": 6.547619047619048e-11,
"loss": 1.6985701322555542,
"step": 56
},
{
"epoch": 0.10357142857142858,
"grad_norm": 0.45289528369903564,
"learning_rate": 6.785714285714287e-11,
"loss": 1.855586051940918,
"step": 58
},
{
"epoch": 0.10714285714285714,
"grad_norm": 2.517709732055664,
"learning_rate": 7.023809523809524e-11,
"loss": 1.9189352989196777,
"step": 60
},
{
"epoch": 0.11071428571428571,
"grad_norm": 0.3041253387928009,
"learning_rate": 7.261904761904761e-11,
"loss": 1.7433679103851318,
"step": 62
},
{
"epoch": 0.11428571428571428,
"grad_norm": 0.36612775921821594,
"learning_rate": 7.5e-11,
"loss": 1.9802634716033936,
"step": 64
},
{
"epoch": 0.11785714285714285,
"grad_norm": 0.5854454040527344,
"learning_rate": 7.738095238095239e-11,
"loss": 1.9604766368865967,
"step": 66
},
{
"epoch": 0.12142857142857143,
"grad_norm": 0.43273574113845825,
"learning_rate": 7.976190476190477e-11,
"loss": 1.8710302114486694,
"step": 68
},
{
"epoch": 0.125,
"grad_norm": 0.7926504015922546,
"learning_rate": 8.214285714285714e-11,
"loss": 1.9847257137298584,
"step": 70
},
{
"epoch": 0.12857142857142856,
"grad_norm": 1.5940577983856201,
"learning_rate": 8.452380952380953e-11,
"loss": 2.3266658782958984,
"step": 72
},
{
"epoch": 0.13214285714285715,
"grad_norm": 0.16762828826904297,
"learning_rate": 8.690476190476191e-11,
"loss": 1.4378584623336792,
"step": 74
},
{
"epoch": 0.1357142857142857,
"grad_norm": 0.29286450147628784,
"learning_rate": 8.928571428571429e-11,
"loss": 1.778782606124878,
"step": 76
},
{
"epoch": 0.1392857142857143,
"grad_norm": 0.5462100505828857,
"learning_rate": 9.166666666666666e-11,
"loss": 1.8584684133529663,
"step": 78
},
{
"epoch": 0.14285714285714285,
"grad_norm": 0.24206940829753876,
"learning_rate": 9.404761904761905e-11,
"loss": 1.8260565996170044,
"step": 80
},
{
"epoch": 0.14642857142857144,
"grad_norm": 0.4532042145729065,
"learning_rate": 9.642857142857143e-11,
"loss": 1.7921696901321411,
"step": 82
},
{
"epoch": 0.15,
"grad_norm": 0.3084694743156433,
"learning_rate": 9.880952380952382e-11,
"loss": 1.8815988302230835,
"step": 84
},
{
"epoch": 0.15357142857142858,
"grad_norm": 0.31026917695999146,
"learning_rate": 9.999991282010347e-11,
"loss": 1.934004783630371,
"step": 86
},
{
"epoch": 0.15714285714285714,
"grad_norm": 0.22002796828746796,
"learning_rate": 9.999921538295798e-11,
"loss": 1.8987516164779663,
"step": 88
},
{
"epoch": 0.16071428571428573,
"grad_norm": 0.2701459228992462,
"learning_rate": 9.999782051947631e-11,
"loss": 1.768871545791626,
"step": 90
},
{
"epoch": 0.16428571428571428,
"grad_norm": 0.28832632303237915,
"learning_rate": 9.999572825127696e-11,
"loss": 1.8338327407836914,
"step": 92
},
{
"epoch": 0.16785714285714284,
"grad_norm": 0.42094674706459045,
"learning_rate": 9.99929386107872e-11,
"loss": 1.8449537754058838,
"step": 94
},
{
"epoch": 0.17142857142857143,
"grad_norm": 0.31477025151252747,
"learning_rate": 9.998945164124268e-11,
"loss": 1.8257495164871216,
"step": 96
},
{
"epoch": 0.175,
"grad_norm": 0.2591283321380615,
"learning_rate": 9.998526739668663e-11,
"loss": 1.876816987991333,
"step": 98
},
{
"epoch": 0.17857142857142858,
"grad_norm": 0.32023900747299194,
"learning_rate": 9.998038594196913e-11,
"loss": 1.8425483703613281,
"step": 100
},
{
"epoch": 0.18214285714285713,
"grad_norm": 0.3183627128601074,
"learning_rate": 9.997480735274607e-11,
"loss": 1.7012989521026611,
"step": 102
},
{
"epoch": 0.18571428571428572,
"grad_norm": 1.6413437128067017,
"learning_rate": 9.996853171547794e-11,
"loss": 1.7975858449935913,
"step": 104
},
{
"epoch": 0.18928571428571428,
"grad_norm": 0.48182412981987,
"learning_rate": 9.996155912742855e-11,
"loss": 1.849790334701538,
"step": 106
},
{
"epoch": 0.19285714285714287,
"grad_norm": 0.2553955614566803,
"learning_rate": 9.995388969666348e-11,
"loss": 1.730877161026001,
"step": 108
},
{
"epoch": 0.19642857142857142,
"grad_norm": 0.22714966535568237,
"learning_rate": 9.994552354204844e-11,
"loss": 1.5956761837005615,
"step": 110
},
{
"epoch": 0.2,
"grad_norm": 0.41010916233062744,
"learning_rate": 9.993646079324738e-11,
"loss": 1.7001782655715942,
"step": 112
},
{
"epoch": 0.20357142857142857,
"grad_norm": 0.43712326884269714,
"learning_rate": 9.992670159072051e-11,
"loss": 1.7033194303512573,
"step": 114
},
{
"epoch": 0.20714285714285716,
"grad_norm": 0.30069440603256226,
"learning_rate": 9.991624608572216e-11,
"loss": 1.6037108898162842,
"step": 116
},
{
"epoch": 0.21071428571428572,
"grad_norm": 0.359576016664505,
"learning_rate": 9.990509444029832e-11,
"loss": 1.6560925245285034,
"step": 118
},
{
"epoch": 0.21428571428571427,
"grad_norm": 0.22528955340385437,
"learning_rate": 9.98932468272843e-11,
"loss": 1.660873532295227,
"step": 120
},
{
"epoch": 0.21785714285714286,
"grad_norm": 0.26792994141578674,
"learning_rate": 9.98807034303019e-11,
"loss": 1.685825228691101,
"step": 122
},
{
"epoch": 0.22142857142857142,
"grad_norm": 0.5145900249481201,
"learning_rate": 9.986746444375661e-11,
"loss": 1.6325483322143555,
"step": 124
},
{
"epoch": 0.225,
"grad_norm": 0.29799070954322815,
"learning_rate": 9.985353007283463e-11,
"loss": 1.7098278999328613,
"step": 126
},
{
"epoch": 0.22857142857142856,
"grad_norm": 0.2814246118068695,
"learning_rate": 9.983890053349969e-11,
"loss": 1.6903581619262695,
"step": 128
},
{
"epoch": 0.23214285714285715,
"grad_norm": 0.2916901707649231,
"learning_rate": 9.982357605248963e-11,
"loss": 1.7061477899551392,
"step": 130
},
{
"epoch": 0.2357142857142857,
"grad_norm": 0.43531540036201477,
"learning_rate": 9.980755686731296e-11,
"loss": 1.751139760017395,
"step": 132
},
{
"epoch": 0.2392857142857143,
"grad_norm": 0.35889768600463867,
"learning_rate": 9.979084322624518e-11,
"loss": 1.6222317218780518,
"step": 134
},
{
"epoch": 0.24285714285714285,
"grad_norm": 0.604837954044342,
"learning_rate": 9.977343538832486e-11,
"loss": 1.6161425113677979,
"step": 136
},
{
"epoch": 0.24642857142857144,
"grad_norm": 0.2969377338886261,
"learning_rate": 9.97553336233497e-11,
"loss": 1.5216706991195679,
"step": 138
},
{
"epoch": 0.25,
"grad_norm": 0.5487335920333862,
"learning_rate": 9.973653821187232e-11,
"loss": 1.6033167839050293,
"step": 140
},
{
"epoch": 0.25357142857142856,
"grad_norm": 0.615515947341919,
"learning_rate": 9.971704944519594e-11,
"loss": 1.6344680786132812,
"step": 142
},
{
"epoch": 0.2571428571428571,
"grad_norm": 0.8488220572471619,
"learning_rate": 9.969686762536972e-11,
"loss": 1.8572226762771606,
"step": 144
},
{
"epoch": 0.26071428571428573,
"grad_norm": 2.822991371154785,
"learning_rate": 9.967599306518438e-11,
"loss": 1.5748167037963867,
"step": 146
},
{
"epoch": 0.2642857142857143,
"grad_norm": 0.5391015410423279,
"learning_rate": 9.965442608816703e-11,
"loss": 1.7799961566925049,
"step": 148
},
{
"epoch": 0.26785714285714285,
"grad_norm": 0.421622633934021,
"learning_rate": 9.963216702857634e-11,
"loss": 1.6995267868041992,
"step": 150
},
{
"epoch": 0.2714285714285714,
"grad_norm": 0.46860265731811523,
"learning_rate": 9.96092162313973e-11,
"loss": 1.7670485973358154,
"step": 152
},
{
"epoch": 0.275,
"grad_norm": 0.47773948311805725,
"learning_rate": 9.958557405233593e-11,
"loss": 1.7523893117904663,
"step": 154
},
{
"epoch": 0.2785714285714286,
"grad_norm": 0.6034820675849915,
"learning_rate": 9.956124085781366e-11,
"loss": 1.5787875652313232,
"step": 156
},
{
"epoch": 0.28214285714285714,
"grad_norm": 0.8053703308105469,
"learning_rate": 9.953621702496178e-11,
"loss": 1.1423616409301758,
"step": 158
},
{
"epoch": 0.2857142857142857,
"grad_norm": 0.34408634901046753,
"learning_rate": 9.951050294161547e-11,
"loss": 1.5774961709976196,
"step": 160
},
{
"epoch": 0.2892857142857143,
"grad_norm": 0.4361685812473297,
"learning_rate": 9.948409900630787e-11,
"loss": 1.5200037956237793,
"step": 162
},
{
"epoch": 0.29285714285714287,
"grad_norm": 0.34992775321006775,
"learning_rate": 9.945700562826394e-11,
"loss": 1.8257197141647339,
"step": 164
},
{
"epoch": 0.29642857142857143,
"grad_norm": 0.5045400857925415,
"learning_rate": 9.942922322739395e-11,
"loss": 1.4601728916168213,
"step": 166
},
{
"epoch": 0.3,
"grad_norm": 0.2626727223396301,
"learning_rate": 9.940075223428717e-11,
"loss": 1.3292102813720703,
"step": 168
},
{
"epoch": 0.30357142857142855,
"grad_norm": 0.40398019552230835,
"learning_rate": 9.937159309020509e-11,
"loss": 1.44813072681427,
"step": 170
},
{
"epoch": 0.30714285714285716,
"grad_norm": 0.3662099242210388,
"learning_rate": 9.934174624707459e-11,
"loss": 1.727007508277893,
"step": 172
},
{
"epoch": 0.3107142857142857,
"grad_norm": 0.24835538864135742,
"learning_rate": 9.931121216748092e-11,
"loss": 1.059862494468689,
"step": 174
},
{
"epoch": 0.3142857142857143,
"grad_norm": 0.3025449514389038,
"learning_rate": 9.927999132466059e-11,
"loss": 1.7108771800994873,
"step": 176
},
{
"epoch": 0.31785714285714284,
"grad_norm": 0.3157467842102051,
"learning_rate": 9.924808420249403e-11,
"loss": 1.5672202110290527,
"step": 178
},
{
"epoch": 0.32142857142857145,
"grad_norm": 0.699787437915802,
"learning_rate": 9.921549129549799e-11,
"loss": 1.339919924736023,
"step": 180
},
{
"epoch": 0.325,
"grad_norm": 0.48888230323791504,
"learning_rate": 9.918221310881795e-11,
"loss": 2.124028205871582,
"step": 182
},
{
"epoch": 0.32857142857142857,
"grad_norm": 0.314627081155777,
"learning_rate": 9.91482501582204e-11,
"loss": 1.5376380681991577,
"step": 184
},
{
"epoch": 0.33214285714285713,
"grad_norm": 0.21677131950855255,
"learning_rate": 9.91136029700846e-11,
"loss": 1.4906896352767944,
"step": 186
},
{
"epoch": 0.3357142857142857,
"grad_norm": 0.8940775394439697,
"learning_rate": 9.907827208139462e-11,
"loss": 1.7292166948318481,
"step": 188
},
{
"epoch": 0.3392857142857143,
"grad_norm": 0.32917287945747375,
"learning_rate": 9.904225803973094e-11,
"loss": 1.7098058462142944,
"step": 190
},
{
"epoch": 0.34285714285714286,
"grad_norm": 0.32240667939186096,
"learning_rate": 9.900556140326203e-11,
"loss": 1.2109023332595825,
"step": 192
},
{
"epoch": 0.3464285714285714,
"grad_norm": 0.3542839288711548,
"learning_rate": 9.896818274073555e-11,
"loss": 1.0306252241134644,
"step": 194
},
{
"epoch": 0.35,
"grad_norm": 0.5823745727539062,
"learning_rate": 9.893012263146971e-11,
"loss": 1.6056597232818604,
"step": 196
},
{
"epoch": 0.3535714285714286,
"grad_norm": 0.8560162782669067,
"learning_rate": 9.889138166534416e-11,
"loss": 1.6744499206542969,
"step": 198
},
{
"epoch": 0.35714285714285715,
"grad_norm": 0.348728746175766,
"learning_rate": 9.885196044279095e-11,
"loss": 1.5353935956954956,
"step": 200
},
{
"epoch": 0.3607142857142857,
"grad_norm": 5.2847466468811035,
"learning_rate": 9.881185957478514e-11,
"loss": 1.2694430351257324,
"step": 202
},
{
"epoch": 0.36428571428571427,
"grad_norm": 0.2536151111125946,
"learning_rate": 9.877107968283538e-11,
"loss": 1.559706687927246,
"step": 204
},
{
"epoch": 0.3678571428571429,
"grad_norm": 0.23312772810459137,
"learning_rate": 9.872962139897426e-11,
"loss": 1.6206153631210327,
"step": 206
},
{
"epoch": 0.37142857142857144,
"grad_norm": 0.2879284620285034,
"learning_rate": 9.86874853657485e-11,
"loss": 1.5968157052993774,
"step": 208
},
{
"epoch": 0.375,
"grad_norm": 0.279491126537323,
"learning_rate": 9.864467223620907e-11,
"loss": 1.3809610605239868,
"step": 210
},
{
"epoch": 0.37857142857142856,
"grad_norm": 0.31574228405952454,
"learning_rate": 9.860118267390092e-11,
"loss": 1.8226162195205688,
"step": 212
},
{
"epoch": 0.3821428571428571,
"grad_norm": 0.2886030972003937,
"learning_rate": 9.855701735285285e-11,
"loss": 1.6120859384536743,
"step": 214
},
{
"epoch": 0.38571428571428573,
"grad_norm": 0.22635193169116974,
"learning_rate": 9.851217695756695e-11,
"loss": 1.4717481136322021,
"step": 216
},
{
"epoch": 0.3892857142857143,
"grad_norm": 0.19679969549179077,
"learning_rate": 9.846666218300807e-11,
"loss": 1.4480830430984497,
"step": 218
},
{
"epoch": 0.39285714285714285,
"grad_norm": 0.42078128457069397,
"learning_rate": 9.842047373459305e-11,
"loss": 1.6225476264953613,
"step": 220
},
{
"epoch": 0.3964285714285714,
"grad_norm": 0.2914668023586273,
"learning_rate": 9.837361232817964e-11,
"loss": 1.455915093421936,
"step": 222
},
{
"epoch": 0.4,
"grad_norm": 0.292388379573822,
"learning_rate": 9.832607869005566e-11,
"loss": 1.6276307106018066,
"step": 224
},
{
"epoch": 0.4035714285714286,
"grad_norm": 0.5007878541946411,
"learning_rate": 9.827787355692747e-11,
"loss": 1.6865593194961548,
"step": 226
},
{
"epoch": 0.40714285714285714,
"grad_norm": 0.6924875974655151,
"learning_rate": 9.822899767590882e-11,
"loss": 1.6561429500579834,
"step": 228
},
{
"epoch": 0.4107142857142857,
"grad_norm": 0.3577944040298462,
"learning_rate": 9.817945180450903e-11,
"loss": 1.7861510515213013,
"step": 230
},
{
"epoch": 0.4142857142857143,
"grad_norm": 0.245406836271286,
"learning_rate": 9.812923671062139e-11,
"loss": 1.6835694313049316,
"step": 232
},
{
"epoch": 0.41785714285714287,
"grad_norm": 0.32185444235801697,
"learning_rate": 9.807835317251119e-11,
"loss": 1.4940822124481201,
"step": 234
},
{
"epoch": 0.42142857142857143,
"grad_norm": 0.3216511011123657,
"learning_rate": 9.802680197880379e-11,
"loss": 1.3896503448486328,
"step": 236
},
{
"epoch": 0.425,
"grad_norm": 0.29104116559028625,
"learning_rate": 9.79745839284722e-11,
"loss": 1.386911392211914,
"step": 238
},
{
"epoch": 0.42857142857142855,
"grad_norm": 0.1894490122795105,
"learning_rate": 9.792169983082484e-11,
"loss": 1.5068607330322266,
"step": 240
},
{
"epoch": 0.43214285714285716,
"grad_norm": 0.554080069065094,
"learning_rate": 9.786815050549295e-11,
"loss": 1.5528550148010254,
"step": 242
},
{
"epoch": 0.4357142857142857,
"grad_norm": 0.41308215260505676,
"learning_rate": 9.781393678241786e-11,
"loss": 1.6799581050872803,
"step": 244
},
{
"epoch": 0.4392857142857143,
"grad_norm": 0.5376290082931519,
"learning_rate": 9.775905950183821e-11,
"loss": 1.7018500566482544,
"step": 246
},
{
"epoch": 0.44285714285714284,
"grad_norm": 0.567138135433197,
"learning_rate": 9.770351951427684e-11,
"loss": 1.5787659883499146,
"step": 248
},
{
"epoch": 0.44642857142857145,
"grad_norm": 0.4916873574256897,
"learning_rate": 9.764731768052763e-11,
"loss": 1.8073500394821167,
"step": 250
},
{
"epoch": 0.45,
"grad_norm": 1.941686987876892,
"learning_rate": 9.75904548716422e-11,
"loss": 1.7315703630447388,
"step": 252
},
{
"epoch": 0.45357142857142857,
"grad_norm": 0.3704272210597992,
"learning_rate": 9.753293196891639e-11,
"loss": 1.3973535299301147,
"step": 254
},
{
"epoch": 0.45714285714285713,
"grad_norm": 0.5139915943145752,
"learning_rate": 9.747474986387655e-11,
"loss": 1.6836737394332886,
"step": 256
},
{
"epoch": 0.4607142857142857,
"grad_norm": 0.28661784529685974,
"learning_rate": 9.74159094582658e-11,
"loss": 1.4698333740234375,
"step": 258
},
{
"epoch": 0.4642857142857143,
"grad_norm": 0.32768112421035767,
"learning_rate": 9.735641166402998e-11,
"loss": 1.6010611057281494,
"step": 260
},
{
"epoch": 0.46785714285714286,
"grad_norm": 1.0357842445373535,
"learning_rate": 9.729625740330362e-11,
"loss": 1.5772435665130615,
"step": 262
},
{
"epoch": 0.4714285714285714,
"grad_norm": 0.727933943271637,
"learning_rate": 9.723544760839555e-11,
"loss": 1.7117342948913574,
"step": 264
},
{
"epoch": 0.475,
"grad_norm": 0.426328182220459,
"learning_rate": 9.717398322177442e-11,
"loss": 1.5788122415542603,
"step": 266
},
{
"epoch": 0.4785714285714286,
"grad_norm": 0.7015219926834106,
"learning_rate": 9.71118651960543e-11,
"loss": 1.7658413648605347,
"step": 268
},
{
"epoch": 0.48214285714285715,
"grad_norm": 0.28371384739875793,
"learning_rate": 9.704909449397962e-11,
"loss": 1.4088166952133179,
"step": 270
},
{
"epoch": 0.4857142857142857,
"grad_norm": 0.3040277063846588,
"learning_rate": 9.69856720884105e-11,
"loss": 1.338411808013916,
"step": 272
},
{
"epoch": 0.48928571428571427,
"grad_norm": 0.30010318756103516,
"learning_rate": 9.692159896230756e-11,
"loss": 1.7016575336456299,
"step": 274
},
{
"epoch": 0.4928571428571429,
"grad_norm": 0.4967346489429474,
"learning_rate": 9.685687610871665e-11,
"loss": 1.7265392541885376,
"step": 276
},
{
"epoch": 0.49642857142857144,
"grad_norm": 0.32157382369041443,
"learning_rate": 9.679150453075356e-11,
"loss": 1.5933375358581543,
"step": 278
},
{
"epoch": 0.5,
"grad_norm": 0.26096051931381226,
"learning_rate": 9.67254852415884e-11,
"loss": 1.591860294342041,
"step": 280
},
{
"epoch": 0.5035714285714286,
"grad_norm": 0.23185808956623077,
"learning_rate": 9.665881926442993e-11,
"loss": 1.589665174484253,
"step": 282
},
{
"epoch": 0.5071428571428571,
"grad_norm": 0.32329416275024414,
"learning_rate": 9.659150763250966e-11,
"loss": 1.6434839963912964,
"step": 284
},
{
"epoch": 0.5107142857142857,
"grad_norm": 0.29206129908561707,
"learning_rate": 9.65235513890659e-11,
"loss": 1.605237364768982,
"step": 286
},
{
"epoch": 0.5142857142857142,
"grad_norm": 0.4287250339984894,
"learning_rate": 9.645495158732754e-11,
"loss": 1.464892864227295,
"step": 288
},
{
"epoch": 0.5178571428571429,
"grad_norm": 0.3669995963573456,
"learning_rate": 9.638570929049775e-11,
"loss": 1.1133761405944824,
"step": 290
},
{
"epoch": 0.5214285714285715,
"grad_norm": 1.1597135066986084,
"learning_rate": 9.631582557173752e-11,
"loss": 1.183565616607666,
"step": 292
},
{
"epoch": 0.525,
"grad_norm": 0.29547828435897827,
"learning_rate": 9.624530151414893e-11,
"loss": 1.507879614830017,
"step": 294
},
{
"epoch": 0.5285714285714286,
"grad_norm": 0.6196130514144897,
"learning_rate": 9.617413821075851e-11,
"loss": 1.291815161705017,
"step": 296
},
{
"epoch": 0.5321428571428571,
"grad_norm": 0.23788149654865265,
"learning_rate": 9.610233676450021e-11,
"loss": 1.4130339622497559,
"step": 298
},
{
"epoch": 0.5357142857142857,
"grad_norm": 0.4616411328315735,
"learning_rate": 9.602989828819829e-11,
"loss": 1.3440712690353394,
"step": 300
},
{
"epoch": 0.5392857142857143,
"grad_norm": 0.2647687494754791,
"learning_rate": 9.595682390455016e-11,
"loss": 1.6042909622192383,
"step": 302
},
{
"epoch": 0.5428571428571428,
"grad_norm": 0.16753101348876953,
"learning_rate": 9.588311474610887e-11,
"loss": 1.2125191688537598,
"step": 304
},
{
"epoch": 0.5464285714285714,
"grad_norm": 0.5108986496925354,
"learning_rate": 9.580877195526563e-11,
"loss": 1.5605721473693848,
"step": 306
},
{
"epoch": 0.55,
"grad_norm": 0.15435348451137543,
"learning_rate": 9.573379668423209e-11,
"loss": 1.5854820013046265,
"step": 308
},
{
"epoch": 0.5535714285714286,
"grad_norm": 0.30770283937454224,
"learning_rate": 9.56581900950225e-11,
"loss": 1.3195915222167969,
"step": 310
},
{
"epoch": 0.5571428571428572,
"grad_norm": 0.3878917694091797,
"learning_rate": 9.558195335943565e-11,
"loss": 1.6782605648040771,
"step": 312
},
{
"epoch": 0.5607142857142857,
"grad_norm": 0.43914857506752014,
"learning_rate": 9.550508765903672e-11,
"loss": 1.3400123119354248,
"step": 314
},
{
"epoch": 0.5642857142857143,
"grad_norm": 0.7770475745201111,
"learning_rate": 9.542759418513905e-11,
"loss": 1.4410650730133057,
"step": 316
},
{
"epoch": 0.5678571428571428,
"grad_norm": 0.4235874116420746,
"learning_rate": 9.534947413878556e-11,
"loss": 1.5005742311477661,
"step": 318
},
{
"epoch": 0.5714285714285714,
"grad_norm": 0.4761328101158142,
"learning_rate": 9.527072873073018e-11,
"loss": 1.5790057182312012,
"step": 320
},
{
"epoch": 0.575,
"grad_norm": 0.30607500672340393,
"learning_rate": 9.519135918141913e-11,
"loss": 1.4029256105422974,
"step": 322
},
{
"epoch": 0.5785714285714286,
"grad_norm": 0.199839249253273,
"learning_rate": 9.511136672097194e-11,
"loss": 1.2247833013534546,
"step": 324
},
{
"epoch": 0.5821428571428572,
"grad_norm": 0.4178214371204376,
"learning_rate": 9.50307525891624e-11,
"loss": 1.5843498706817627,
"step": 326
},
{
"epoch": 0.5857142857142857,
"grad_norm": 0.5691080093383789,
"learning_rate": 9.494951803539943e-11,
"loss": 1.432054042816162,
"step": 328
},
{
"epoch": 0.5892857142857143,
"grad_norm": 0.2235386073589325,
"learning_rate": 9.486766431870752e-11,
"loss": 1.3727772235870361,
"step": 330
},
{
"epoch": 0.5928571428571429,
"grad_norm": 0.294951468706131,
"learning_rate": 9.478519270770745e-11,
"loss": 1.2510879039764404,
"step": 332
},
{
"epoch": 0.5964285714285714,
"grad_norm": 0.5128263831138611,
"learning_rate": 9.470210448059644e-11,
"loss": 1.4979231357574463,
"step": 334
},
{
"epoch": 0.6,
"grad_norm": 0.4786055088043213,
"learning_rate": 9.461840092512849e-11,
"loss": 1.4333927631378174,
"step": 336
},
{
"epoch": 0.6035714285714285,
"grad_norm": 0.34201353788375854,
"learning_rate": 9.453408333859427e-11,
"loss": 1.490931510925293,
"step": 338
},
{
"epoch": 0.6071428571428571,
"grad_norm": 0.5112800002098083,
"learning_rate": 9.444915302780116e-11,
"loss": 1.5842055082321167,
"step": 340
},
{
"epoch": 0.6107142857142858,
"grad_norm": 0.25259843468666077,
"learning_rate": 9.436361130905288e-11,
"loss": 1.5695821046829224,
"step": 342
},
{
"epoch": 0.6142857142857143,
"grad_norm": 0.6922969222068787,
"learning_rate": 9.427745950812918e-11,
"loss": 1.4329246282577515,
"step": 344
},
{
"epoch": 0.6178571428571429,
"grad_norm": 0.5728318691253662,
"learning_rate": 9.41906989602652e-11,
"loss": 1.4237987995147705,
"step": 346
},
{
"epoch": 0.6214285714285714,
"grad_norm": 0.20611967146396637,
"learning_rate": 9.410333101013086e-11,
"loss": 1.5165703296661377,
"step": 348
},
{
"epoch": 0.625,
"grad_norm": 0.2743736207485199,
"learning_rate": 9.401535701180998e-11,
"loss": 1.3337576389312744,
"step": 350
},
{
"epoch": 0.6285714285714286,
"grad_norm": 0.5341992974281311,
"learning_rate": 9.39267783287793e-11,
"loss": 1.6692336797714233,
"step": 352
},
{
"epoch": 0.6321428571428571,
"grad_norm": 0.6746388673782349,
"learning_rate": 9.383759633388736e-11,
"loss": 1.6261032819747925,
"step": 354
},
{
"epoch": 0.6357142857142857,
"grad_norm": 0.4282362163066864,
"learning_rate": 9.374781240933316e-11,
"loss": 1.385348916053772,
"step": 356
},
{
"epoch": 0.6392857142857142,
"grad_norm": 0.17694340646266937,
"learning_rate": 9.365742794664484e-11,
"loss": 1.1620020866394043,
"step": 358
},
{
"epoch": 0.6428571428571429,
"grad_norm": 0.31339991092681885,
"learning_rate": 9.356644434665803e-11,
"loss": 1.6486270427703857,
"step": 360
},
{
"epoch": 0.6464285714285715,
"grad_norm": 0.23338527977466583,
"learning_rate": 9.347486301949418e-11,
"loss": 1.5743000507354736,
"step": 362
},
{
"epoch": 0.65,
"grad_norm": 0.35687318444252014,
"learning_rate": 9.338268538453869e-11,
"loss": 1.6195820569992065,
"step": 364
},
{
"epoch": 0.6535714285714286,
"grad_norm": 0.22189630568027496,
"learning_rate": 9.328991287041892e-11,
"loss": 1.555490255355835,
"step": 366
},
{
"epoch": 0.6571428571428571,
"grad_norm": 0.24453404545783997,
"learning_rate": 9.319654691498206e-11,
"loss": 1.606858730316162,
"step": 368
},
{
"epoch": 0.6607142857142857,
"grad_norm": 0.29564130306243896,
"learning_rate": 9.310258896527278e-11,
"loss": 1.5415475368499756,
"step": 370
},
{
"epoch": 0.6642857142857143,
"grad_norm": 0.24590720236301422,
"learning_rate": 9.300804047751092e-11,
"loss": 1.5487055778503418,
"step": 372
},
{
"epoch": 0.6678571428571428,
"grad_norm": 0.21976569294929504,
"learning_rate": 9.29129029170688e-11,
"loss": 1.5091149806976318,
"step": 374
},
{
"epoch": 0.6714285714285714,
"grad_norm": 0.36947011947631836,
"learning_rate": 9.281717775844857e-11,
"loss": 1.5175336599349976,
"step": 376
},
{
"epoch": 0.675,
"grad_norm": 0.31734710931777954,
"learning_rate": 9.272086648525937e-11,
"loss": 1.5560580492019653,
"step": 378
},
{
"epoch": 0.6785714285714286,
"grad_norm": 0.3260459005832672,
"learning_rate": 9.26239705901943e-11,
"loss": 1.5495206117630005,
"step": 380
},
{
"epoch": 0.6821428571428572,
"grad_norm": 0.2940950393676758,
"learning_rate": 9.25264915750073e-11,
"loss": 1.7193139791488647,
"step": 382
},
{
"epoch": 0.6857142857142857,
"grad_norm": 0.5894531607627869,
"learning_rate": 9.242843095048987e-11,
"loss": 1.5194193124771118,
"step": 384
},
{
"epoch": 0.6892857142857143,
"grad_norm": 0.24562247097492218,
"learning_rate": 9.232979023644767e-11,
"loss": 1.3620585203170776,
"step": 386
},
{
"epoch": 0.6928571428571428,
"grad_norm": 0.25872641801834106,
"learning_rate": 9.223057096167696e-11,
"loss": 1.5675086975097656,
"step": 388
},
{
"epoch": 0.6964285714285714,
"grad_norm": 0.34952595829963684,
"learning_rate": 9.213077466394088e-11,
"loss": 1.556618094444275,
"step": 390
},
{
"epoch": 0.7,
"grad_norm": 0.3161361813545227,
"learning_rate": 9.203040288994566e-11,
"loss": 1.566756248474121,
"step": 392
},
{
"epoch": 0.7035714285714286,
"grad_norm": 0.2212635725736618,
"learning_rate": 9.192945719531662e-11,
"loss": 1.4415736198425293,
"step": 394
},
{
"epoch": 0.7071428571428572,
"grad_norm": 0.24640850722789764,
"learning_rate": 9.182793914457402e-11,
"loss": 1.5302636623382568,
"step": 396
},
{
"epoch": 0.7107142857142857,
"grad_norm": 0.32642388343811035,
"learning_rate": 9.172585031110895e-11,
"loss": 1.5108647346496582,
"step": 398
},
{
"epoch": 0.7142857142857143,
"grad_norm": 0.44278931617736816,
"learning_rate": 9.162319227715878e-11,
"loss": 1.4763598442077637,
"step": 400
},
{
"epoch": 0.7178571428571429,
"grad_norm": 0.6782664060592651,
"learning_rate": 9.151996663378271e-11,
"loss": 1.5651448965072632,
"step": 402
},
{
"epoch": 0.7214285714285714,
"grad_norm": 0.3046627640724182,
"learning_rate": 9.141617498083716e-11,
"loss": 1.54989755153656,
"step": 404
},
{
"epoch": 0.725,
"grad_norm": 0.2901434302330017,
"learning_rate": 9.131181892695088e-11,
"loss": 1.5768945217132568,
"step": 406
},
{
"epoch": 0.7285714285714285,
"grad_norm": 0.33921149373054504,
"learning_rate": 9.120690008950008e-11,
"loss": 1.5979000329971313,
"step": 408
},
{
"epoch": 0.7321428571428571,
"grad_norm": 0.3361220359802246,
"learning_rate": 9.110142009458333e-11,
"loss": 1.497594952583313,
"step": 410
},
{
"epoch": 0.7357142857142858,
"grad_norm": 0.5470672249794006,
"learning_rate": 9.099538057699643e-11,
"loss": 1.574213981628418,
"step": 412
},
{
"epoch": 0.7392857142857143,
"grad_norm": 0.2838931381702423,
"learning_rate": 9.08887831802069e-11,
"loss": 1.5098413228988647,
"step": 414
},
{
"epoch": 0.7428571428571429,
"grad_norm": 0.20152640342712402,
"learning_rate": 9.078162955632877e-11,
"loss": 1.4644570350646973,
"step": 416
},
{
"epoch": 0.7464285714285714,
"grad_norm": 0.4462726414203644,
"learning_rate": 9.067392136609671e-11,
"loss": 1.4528954029083252,
"step": 418
},
{
"epoch": 0.75,
"grad_norm": 1.3298100233078003,
"learning_rate": 9.05656602788405e-11,
"loss": 1.7332755327224731,
"step": 420
},
{
"epoch": 0.7535714285714286,
"grad_norm": 0.4603538513183594,
"learning_rate": 9.045684797245901e-11,
"loss": 1.278205156326294,
"step": 422
},
{
"epoch": 0.7571428571428571,
"grad_norm": 0.24970531463623047,
"learning_rate": 9.034748613339426e-11,
"loss": 1.462319254875183,
"step": 424
},
{
"epoch": 0.7607142857142857,
"grad_norm": 0.26904845237731934,
"learning_rate": 9.02375764566053e-11,
"loss": 1.5326104164123535,
"step": 426
},
{
"epoch": 0.7642857142857142,
"grad_norm": 0.2414090633392334,
"learning_rate": 9.012712064554189e-11,
"loss": 1.429811716079712,
"step": 428
},
{
"epoch": 0.7678571428571429,
"grad_norm": 0.1693522036075592,
"learning_rate": 9.001612041211817e-11,
"loss": 1.4766952991485596,
"step": 430
},
{
"epoch": 0.7714285714285715,
"grad_norm": 0.1426696926355362,
"learning_rate": 8.9904577476686e-11,
"loss": 1.4853742122650146,
"step": 432
},
{
"epoch": 0.775,
"grad_norm": 0.2107386738061905,
"learning_rate": 8.979249356800845e-11,
"loss": 1.3115483522415161,
"step": 434
},
{
"epoch": 0.7785714285714286,
"grad_norm": 0.18600672483444214,
"learning_rate": 8.967987042323293e-11,
"loss": 1.4503666162490845,
"step": 436
},
{
"epoch": 0.7821428571428571,
"grad_norm": 0.13341867923736572,
"learning_rate": 8.956670978786423e-11,
"loss": 1.3890115022659302,
"step": 438
},
{
"epoch": 0.7857142857142857,
"grad_norm": 0.16304397583007812,
"learning_rate": 8.945301341573758e-11,
"loss": 1.427004098892212,
"step": 440
},
{
"epoch": 0.7892857142857143,
"grad_norm": 0.15247249603271484,
"learning_rate": 8.93387830689913e-11,
"loss": 1.3906886577606201,
"step": 442
},
{
"epoch": 0.7928571428571428,
"grad_norm": 0.23002289235591888,
"learning_rate": 8.922402051803968e-11,
"loss": 1.4104899168014526,
"step": 444
},
{
"epoch": 0.7964285714285714,
"grad_norm": 0.17135556042194366,
"learning_rate": 8.910872754154539e-11,
"loss": 1.4079930782318115,
"step": 446
},
{
"epoch": 0.8,
"grad_norm": 0.16783295571804047,
"learning_rate": 8.8992905926392e-11,
"loss": 1.3688652515411377,
"step": 448
},
{
"epoch": 0.8035714285714286,
"grad_norm": 0.13924425840377808,
"learning_rate": 8.887655746765626e-11,
"loss": 1.4514307975769043,
"step": 450
},
{
"epoch": 0.8071428571428572,
"grad_norm": 0.19156193733215332,
"learning_rate": 8.875968396858021e-11,
"loss": 1.43871009349823,
"step": 452
},
{
"epoch": 0.8107142857142857,
"grad_norm": 0.13427551090717316,
"learning_rate": 8.864228724054341e-11,
"loss": 1.3157857656478882,
"step": 454
},
{
"epoch": 0.8142857142857143,
"grad_norm": 0.3618568778038025,
"learning_rate": 8.852436910303467e-11,
"loss": 1.3874411582946777,
"step": 456
},
{
"epoch": 0.8178571428571428,
"grad_norm": 0.15196223556995392,
"learning_rate": 8.840593138362395e-11,
"loss": 1.3326449394226074,
"step": 458
},
{
"epoch": 0.8214285714285714,
"grad_norm": 0.1430208683013916,
"learning_rate": 8.828697591793404e-11,
"loss": 1.3794419765472412,
"step": 460
},
{
"epoch": 0.825,
"grad_norm": 0.15054234862327576,
"learning_rate": 8.816750454961207e-11,
"loss": 1.3381770849227905,
"step": 462
},
{
"epoch": 0.8285714285714286,
"grad_norm": 0.1533413976430893,
"learning_rate": 8.804751913030096e-11,
"loss": 1.3633875846862793,
"step": 464
},
{
"epoch": 0.8321428571428572,
"grad_norm": 0.23823502659797668,
"learning_rate": 8.792702151961074e-11,
"loss": 1.3637359142303467,
"step": 466
},
{
"epoch": 0.8357142857142857,
"grad_norm": 0.3262959122657776,
"learning_rate": 8.780601358508965e-11,
"loss": 1.358881950378418,
"step": 468
},
{
"epoch": 0.8392857142857143,
"grad_norm": 0.21195098757743835,
"learning_rate": 8.768449720219532e-11,
"loss": 1.3551591634750366,
"step": 470
},
{
"epoch": 0.8428571428571429,
"grad_norm": 0.15915225446224213,
"learning_rate": 8.756247425426559e-11,
"loss": 1.40420401096344,
"step": 472
},
{
"epoch": 0.8464285714285714,
"grad_norm": 0.18268409371376038,
"learning_rate": 8.743994663248939e-11,
"loss": 1.3795422315597534,
"step": 474
},
{
"epoch": 0.85,
"grad_norm": 0.3486330211162567,
"learning_rate": 8.731691623587738e-11,
"loss": 1.3883861303329468,
"step": 476
},
{
"epoch": 0.8535714285714285,
"grad_norm": 0.20148774981498718,
"learning_rate": 8.719338497123257e-11,
"loss": 1.4160270690917969,
"step": 478
},
{
"epoch": 0.8571428571428571,
"grad_norm": 0.17149412631988525,
"learning_rate": 8.706935475312072e-11,
"loss": 1.4068703651428223,
"step": 480
},
{
"epoch": 0.8607142857142858,
"grad_norm": 0.20048604905605316,
"learning_rate": 8.694482750384069e-11,
"loss": 1.3851019144058228,
"step": 482
},
{
"epoch": 0.8642857142857143,
"grad_norm": 0.18711215257644653,
"learning_rate": 8.681980515339464e-11,
"loss": 1.4040751457214355,
"step": 484
},
{
"epoch": 0.8678571428571429,
"grad_norm": 0.1565803438425064,
"learning_rate": 8.669428963945815e-11,
"loss": 1.3519985675811768,
"step": 486
},
{
"epoch": 0.8714285714285714,
"grad_norm": 0.3789925277233124,
"learning_rate": 8.656828290735013e-11,
"loss": 1.3698389530181885,
"step": 488
},
{
"epoch": 0.875,
"grad_norm": 0.4136276841163635,
"learning_rate": 8.64417869100027e-11,
"loss": 1.375820279121399,
"step": 490
},
{
"epoch": 0.8785714285714286,
"grad_norm": 0.32030630111694336,
"learning_rate": 8.631480360793095e-11,
"loss": 1.4090447425842285,
"step": 492
},
{
"epoch": 0.8821428571428571,
"grad_norm": 0.19467246532440186,
"learning_rate": 8.61873349692025e-11,
"loss": 1.3862667083740234,
"step": 494
},
{
"epoch": 0.8857142857142857,
"grad_norm": 0.18389186263084412,
"learning_rate": 8.605938296940702e-11,
"loss": 1.3221886157989502,
"step": 496
},
{
"epoch": 0.8892857142857142,
"grad_norm": 0.1759600192308426,
"learning_rate": 8.593094959162564e-11,
"loss": 1.360346794128418,
"step": 498
},
{
"epoch": 0.8928571428571429,
"grad_norm": 0.18795090913772583,
"learning_rate": 8.580203682640018e-11,
"loss": 1.378947377204895,
"step": 500
},
{
"epoch": 0.8964285714285715,
"grad_norm": 0.2354908436536789,
"learning_rate": 8.567264667170233e-11,
"loss": 1.3998974561691284,
"step": 502
},
{
"epoch": 0.9,
"grad_norm": 0.16188935935497284,
"learning_rate": 8.554278113290261e-11,
"loss": 1.3859062194824219,
"step": 504
},
{
"epoch": 0.9035714285714286,
"grad_norm": 0.1890728920698166,
"learning_rate": 8.54124422227394e-11,
"loss": 1.3670257329940796,
"step": 506
},
{
"epoch": 0.9071428571428571,
"grad_norm": 0.18385370075702667,
"learning_rate": 8.528163196128768e-11,
"loss": 1.3132009506225586,
"step": 508
},
{
"epoch": 0.9107142857142857,
"grad_norm": 0.1908484250307083,
"learning_rate": 8.51503523759277e-11,
"loss": 1.369741678237915,
"step": 510
},
{
"epoch": 0.9142857142857143,
"grad_norm": 0.1750551164150238,
"learning_rate": 8.501860550131361e-11,
"loss": 1.390952706336975,
"step": 512
},
{
"epoch": 0.9178571428571428,
"grad_norm": 0.17115701735019684,
"learning_rate": 8.488639337934187e-11,
"loss": 1.3319488763809204,
"step": 514
},
{
"epoch": 0.9214285714285714,
"grad_norm": 0.17177698016166687,
"learning_rate": 8.475371805911974e-11,
"loss": 1.3774604797363281,
"step": 516
},
{
"epoch": 0.925,
"grad_norm": 0.26216426491737366,
"learning_rate": 8.46205815969333e-11,
"loss": 1.355608582496643,
"step": 518
},
{
"epoch": 0.9285714285714286,
"grad_norm": 0.1776549220085144,
"learning_rate": 8.448698605621579e-11,
"loss": 1.4020413160324097,
"step": 520
},
{
"epoch": 0.9321428571428572,
"grad_norm": 0.1494104564189911,
"learning_rate": 8.435293350751545e-11,
"loss": 1.4186415672302246,
"step": 522
},
{
"epoch": 0.9357142857142857,
"grad_norm": 0.15738944709300995,
"learning_rate": 8.421842602846361e-11,
"loss": 1.3921843767166138,
"step": 524
},
{
"epoch": 0.9392857142857143,
"grad_norm": 0.19063201546669006,
"learning_rate": 8.408346570374234e-11,
"loss": 1.3952494859695435,
"step": 526
},
{
"epoch": 0.9428571428571428,
"grad_norm": 0.17228396236896515,
"learning_rate": 8.394805462505222e-11,
"loss": 1.3549234867095947,
"step": 528
},
{
"epoch": 0.9464285714285714,
"grad_norm": 0.18750858306884766,
"learning_rate": 8.381219489107991e-11,
"loss": 1.3113043308258057,
"step": 530
},
{
"epoch": 0.95,
"grad_norm": 0.3387869894504547,
"learning_rate": 8.367588860746559e-11,
"loss": 1.3371450901031494,
"step": 532
},
{
"epoch": 0.9535714285714286,
"grad_norm": 0.19042649865150452,
"learning_rate": 8.353913788677036e-11,
"loss": 1.3512016534805298,
"step": 534
},
{
"epoch": 0.9571428571428572,
"grad_norm": 0.22516337037086487,
"learning_rate": 8.340194484844349e-11,
"loss": 1.3233355283737183,
"step": 536
},
{
"epoch": 0.9607142857142857,
"grad_norm": 0.17853480577468872,
"learning_rate": 8.326431161878958e-11,
"loss": 1.3481040000915527,
"step": 538
},
{
"epoch": 0.9642857142857143,
"grad_norm": 0.1799231916666031,
"learning_rate": 8.312624033093554e-11,
"loss": 1.3993315696716309,
"step": 540
},
{
"epoch": 0.9678571428571429,
"grad_norm": 0.2989877164363861,
"learning_rate": 8.298773312479766e-11,
"loss": 1.3812036514282227,
"step": 542
},
{
"epoch": 0.9714285714285714,
"grad_norm": 0.17884613573551178,
"learning_rate": 8.284879214704834e-11,
"loss": 1.3906068801879883,
"step": 544
},
{
"epoch": 0.975,
"grad_norm": 0.28383585810661316,
"learning_rate": 8.27094195510828e-11,
"loss": 1.3412469625473022,
"step": 546
},
{
"epoch": 0.9785714285714285,
"grad_norm": 0.28323087096214294,
"learning_rate": 8.256961749698581e-11,
"loss": 1.33486807346344,
"step": 548
},
{
"epoch": 0.9821428571428571,
"grad_norm": 0.21711941063404083,
"learning_rate": 8.242938815149817e-11,
"loss": 1.370800256729126,
"step": 550
},
{
"epoch": 0.9857142857142858,
"grad_norm": 0.20825232565402985,
"learning_rate": 8.228873368798304e-11,
"loss": 1.3969084024429321,
"step": 552
},
{
"epoch": 0.9892857142857143,
"grad_norm": 0.19650134444236755,
"learning_rate": 8.214765628639235e-11,
"loss": 1.4430627822875977,
"step": 554
},
{
"epoch": 0.9928571428571429,
"grad_norm": 0.30671611428260803,
"learning_rate": 8.200615813323305e-11,
"loss": 2.1070995330810547,
"step": 556
},
{
"epoch": 0.9964285714285714,
"grad_norm": 0.3667857348918915,
"learning_rate": 8.18642414215331e-11,
"loss": 2.032963752746582,
"step": 558
},
{
"epoch": 1.0,
"grad_norm": 0.47882604598999023,
"learning_rate": 8.172190835080758e-11,
"loss": 1.951048731803894,
"step": 560
},
{
"epoch": 1.0035714285714286,
"grad_norm": 0.17595674097537994,
"learning_rate": 8.157916112702452e-11,
"loss": 1.538697361946106,
"step": 562
},
{
"epoch": 1.0071428571428571,
"grad_norm": 0.15145337581634521,
"learning_rate": 8.143600196257085e-11,
"loss": 1.4741806983947754,
"step": 564
},
{
"epoch": 1.0107142857142857,
"grad_norm": 0.16578371822834015,
"learning_rate": 8.129243307621791e-11,
"loss": 1.4760806560516357,
"step": 566
},
{
"epoch": 1.0142857142857142,
"grad_norm": 0.208124577999115,
"learning_rate": 8.114845669308723e-11,
"loss": 1.5404026508331299,
"step": 568
},
{
"epoch": 1.0178571428571428,
"grad_norm": 0.15642480552196503,
"learning_rate": 8.100407504461596e-11,
"loss": 1.4572526216506958,
"step": 570
},
{
"epoch": 1.0214285714285714,
"grad_norm": 0.17752893269062042,
"learning_rate": 8.085929036852235e-11,
"loss": 1.4172760248184204,
"step": 572
},
{
"epoch": 1.025,
"grad_norm": 0.16976918280124664,
"learning_rate": 8.071410490877097e-11,
"loss": 1.3708614110946655,
"step": 574
},
{
"epoch": 1.0285714285714285,
"grad_norm": 0.21366411447525024,
"learning_rate": 8.0568520915538e-11,
"loss": 1.426504373550415,
"step": 576
},
{
"epoch": 1.032142857142857,
"grad_norm": 0.2278887778520584,
"learning_rate": 8.042254064517641e-11,
"loss": 1.4279263019561768,
"step": 578
},
{
"epoch": 1.0357142857142858,
"grad_norm": 0.15804490447044373,
"learning_rate": 8.027616636018083e-11,
"loss": 1.3734034299850464,
"step": 580
},
{
"epoch": 1.0392857142857144,
"grad_norm": 0.18044191598892212,
"learning_rate": 8.012940032915264e-11,
"loss": 1.3280595541000366,
"step": 582
},
{
"epoch": 1.042857142857143,
"grad_norm": 0.2565247416496277,
"learning_rate": 7.998224482676473e-11,
"loss": 1.5239272117614746,
"step": 584
},
{
"epoch": 1.0464285714285715,
"grad_norm": 0.20189274847507477,
"learning_rate": 7.983470213372624e-11,
"loss": 1.4172801971435547,
"step": 586
},
{
"epoch": 1.05,
"grad_norm": 0.1864236444234848,
"learning_rate": 7.96867745367473e-11,
"loss": 1.2826603651046753,
"step": 588
},
{
"epoch": 1.0535714285714286,
"grad_norm": 0.1944390833377838,
"learning_rate": 7.953846432850344e-11,
"loss": 1.4027470350265503,
"step": 590
},
{
"epoch": 1.0571428571428572,
"grad_norm": 0.19034692645072937,
"learning_rate": 7.938977380760024e-11,
"loss": 1.35616934299469,
"step": 592
},
{
"epoch": 1.0607142857142857,
"grad_norm": 0.15960095822811127,
"learning_rate": 7.924070527853751e-11,
"loss": 1.438362717628479,
"step": 594
},
{
"epoch": 1.0642857142857143,
"grad_norm": 0.1991662234067917,
"learning_rate": 7.909126105167373e-11,
"loss": 1.5469117164611816,
"step": 596
},
{
"epoch": 1.0678571428571428,
"grad_norm": 0.3842826187610626,
"learning_rate": 7.894144344319014e-11,
"loss": 1.5314031839370728,
"step": 598
},
{
"epoch": 1.0714285714285714,
"grad_norm": 0.19364933669567108,
"learning_rate": 7.879125477505494e-11,
"loss": 1.2979897260665894,
"step": 600
},
{
"epoch": 1.075,
"grad_norm": 0.22292639315128326,
"learning_rate": 7.864069737498722e-11,
"loss": 1.4224971532821655,
"step": 602
},
{
"epoch": 1.0785714285714285,
"grad_norm": 0.22089318931102753,
"learning_rate": 7.848977357642088e-11,
"loss": 1.4035828113555908,
"step": 604
},
{
"epoch": 1.082142857142857,
"grad_norm": 0.17179912328720093,
"learning_rate": 7.833848571846855e-11,
"loss": 1.4242126941680908,
"step": 606
},
{
"epoch": 1.0857142857142856,
"grad_norm": 0.14348094165325165,
"learning_rate": 7.818683614588523e-11,
"loss": 1.2999255657196045,
"step": 608
},
{
"epoch": 1.0892857142857142,
"grad_norm": 0.17789550125598907,
"learning_rate": 7.803482720903205e-11,
"loss": 1.3410260677337646,
"step": 610
},
{
"epoch": 1.092857142857143,
"grad_norm": 0.1673593670129776,
"learning_rate": 7.788246126383976e-11,
"loss": 1.3250867128372192,
"step": 612
},
{
"epoch": 1.0964285714285715,
"grad_norm": 0.17654156684875488,
"learning_rate": 7.77297406717723e-11,
"loss": 1.4403133392333984,
"step": 614
},
{
"epoch": 1.1,
"grad_norm": 0.14737814664840698,
"learning_rate": 7.757666779979006e-11,
"loss": 1.4014517068862915,
"step": 616
},
{
"epoch": 1.1035714285714286,
"grad_norm": 0.27064049243927,
"learning_rate": 7.742324502031339e-11,
"loss": 1.4908621311187744,
"step": 618
},
{
"epoch": 1.1071428571428572,
"grad_norm": 0.19962742924690247,
"learning_rate": 7.72694747111857e-11,
"loss": 1.557312250137329,
"step": 620
},
{
"epoch": 1.1107142857142858,
"grad_norm": 0.4431476294994354,
"learning_rate": 7.711535925563654e-11,
"loss": 1.3716031312942505,
"step": 622
},
{
"epoch": 1.1142857142857143,
"grad_norm": 0.26187270879745483,
"learning_rate": 7.696090104224491e-11,
"loss": 1.5084398984909058,
"step": 624
},
{
"epoch": 1.1178571428571429,
"grad_norm": 0.4299829602241516,
"learning_rate": 7.680610246490199e-11,
"loss": 1.483479380607605,
"step": 626
},
{
"epoch": 1.1214285714285714,
"grad_norm": 0.25136733055114746,
"learning_rate": 7.665096592277414e-11,
"loss": 1.3639954328536987,
"step": 628
},
{
"epoch": 1.125,
"grad_norm": 0.3161426782608032,
"learning_rate": 7.649549382026574e-11,
"loss": 1.4137345552444458,
"step": 630
},
{
"epoch": 1.1285714285714286,
"grad_norm": 0.3137775659561157,
"learning_rate": 7.633968856698192e-11,
"loss": 1.1740094423294067,
"step": 632
},
{
"epoch": 1.1321428571428571,
"grad_norm": 0.13070949912071228,
"learning_rate": 7.618355257769111e-11,
"loss": 1.2834131717681885,
"step": 634
},
{
"epoch": 1.1357142857142857,
"grad_norm": 0.2657448947429657,
"learning_rate": 7.602708827228779e-11,
"loss": 1.3757896423339844,
"step": 636
},
{
"epoch": 1.1392857142857142,
"grad_norm": 0.20995846390724182,
"learning_rate": 7.587029807575483e-11,
"loss": 1.4640997648239136,
"step": 638
},
{
"epoch": 1.1428571428571428,
"grad_norm": 0.14955592155456543,
"learning_rate": 7.571318441812598e-11,
"loss": 1.3956358432769775,
"step": 640
},
{
"epoch": 1.1464285714285714,
"grad_norm": 0.2415839433670044,
"learning_rate": 7.55557497344482e-11,
"loss": 1.3727810382843018,
"step": 642
},
{
"epoch": 1.15,
"grad_norm": 0.18018396198749542,
"learning_rate": 7.539799646474393e-11,
"loss": 1.3874338865280151,
"step": 644
},
{
"epoch": 1.1535714285714285,
"grad_norm": 0.5012511014938354,
"learning_rate": 7.52399270539732e-11,
"loss": 1.5338712930679321,
"step": 646
},
{
"epoch": 1.157142857142857,
"grad_norm": 0.19371801614761353,
"learning_rate": 7.508154395199592e-11,
"loss": 1.4757752418518066,
"step": 648
},
{
"epoch": 1.1607142857142858,
"grad_norm": 0.1719474196434021,
"learning_rate": 7.492284961353361e-11,
"loss": 1.3998007774353027,
"step": 650
},
{
"epoch": 1.1642857142857144,
"grad_norm": 0.2309015393257141,
"learning_rate": 7.476384649813167e-11,
"loss": 1.4197440147399902,
"step": 652
},
{
"epoch": 1.167857142857143,
"grad_norm": 0.26694613695144653,
"learning_rate": 7.460453707012108e-11,
"loss": 1.4728981256484985,
"step": 654
},
{
"epoch": 1.1714285714285715,
"grad_norm": 0.24615098536014557,
"learning_rate": 7.44449237985802e-11,
"loss": 1.36574387550354,
"step": 656
},
{
"epoch": 1.175,
"grad_norm": 0.17588579654693604,
"learning_rate": 7.428500915729663e-11,
"loss": 1.478152871131897,
"step": 658
},
{
"epoch": 1.1785714285714286,
"grad_norm": 0.21529220044612885,
"learning_rate": 7.412479562472872e-11,
"loss": 1.4191372394561768,
"step": 660
},
{
"epoch": 1.1821428571428572,
"grad_norm": 0.24417810142040253,
"learning_rate": 7.396428568396729e-11,
"loss": 1.262759804725647,
"step": 662
},
{
"epoch": 1.1857142857142857,
"grad_norm": 0.22581692039966583,
"learning_rate": 7.3803481822697e-11,
"loss": 1.380365014076233,
"step": 664
},
{
"epoch": 1.1892857142857143,
"grad_norm": 0.2688646912574768,
"learning_rate": 7.364238653315794e-11,
"loss": 1.4285253286361694,
"step": 666
},
{
"epoch": 1.1928571428571428,
"grad_norm": 0.19933386147022247,
"learning_rate": 7.348100231210697e-11,
"loss": 1.3763307332992554,
"step": 668
},
{
"epoch": 1.1964285714285714,
"grad_norm": 0.2015334814786911,
"learning_rate": 7.331933166077885e-11,
"loss": 1.281455159187317,
"step": 670
},
{
"epoch": 1.2,
"grad_norm": 0.15446756780147552,
"learning_rate": 7.315737708484779e-11,
"loss": 1.3722114562988281,
"step": 672
},
{
"epoch": 1.2035714285714285,
"grad_norm": 0.24289092421531677,
"learning_rate": 7.299514109438834e-11,
"loss": 1.385769248008728,
"step": 674
},
{
"epoch": 1.207142857142857,
"grad_norm": 0.18633770942687988,
"learning_rate": 7.283262620383664e-11,
"loss": 1.3191158771514893,
"step": 676
},
{
"epoch": 1.2107142857142856,
"grad_norm": 0.3428845703601837,
"learning_rate": 7.266983493195134e-11,
"loss": 1.3687481880187988,
"step": 678
},
{
"epoch": 1.2142857142857142,
"grad_norm": 0.1786336749792099,
"learning_rate": 7.250676980177467e-11,
"loss": 1.386405110359192,
"step": 680
},
{
"epoch": 1.217857142857143,
"grad_norm": 0.21881183981895447,
"learning_rate": 7.234343334059331e-11,
"loss": 1.4689749479293823,
"step": 682
},
{
"epoch": 1.2214285714285715,
"grad_norm": 0.2003452330827713,
"learning_rate": 7.217982807989915e-11,
"loss": 1.3969757556915283,
"step": 684
},
{
"epoch": 1.225,
"grad_norm": 0.23969583213329315,
"learning_rate": 7.201595655535011e-11,
"loss": 1.5124036073684692,
"step": 686
},
{
"epoch": 1.2285714285714286,
"grad_norm": 0.2236098349094391,
"learning_rate": 7.185182130673088e-11,
"loss": 1.4658269882202148,
"step": 688
},
{
"epoch": 1.2321428571428572,
"grad_norm": 0.2575554847717285,
"learning_rate": 7.168742487791345e-11,
"loss": 1.4607081413269043,
"step": 690
},
{
"epoch": 1.2357142857142858,
"grad_norm": 0.22493329644203186,
"learning_rate": 7.152276981681781e-11,
"loss": 1.4938011169433594,
"step": 692
},
{
"epoch": 1.2392857142857143,
"grad_norm": 0.19840767979621887,
"learning_rate": 7.135785867537235e-11,
"loss": 1.3276569843292236,
"step": 694
},
{
"epoch": 1.2428571428571429,
"grad_norm": 0.26204267144203186,
"learning_rate": 7.119269400947436e-11,
"loss": 1.3468204736709595,
"step": 696
},
{
"epoch": 1.2464285714285714,
"grad_norm": 0.20326542854309082,
"learning_rate": 7.102727837895048e-11,
"loss": 1.2875033617019653,
"step": 698
},
{
"epoch": 1.25,
"grad_norm": 0.8089625835418701,
"learning_rate": 7.086161434751684e-11,
"loss": 1.326261043548584,
"step": 700
},
{
"epoch": 1.2535714285714286,
"grad_norm": 0.6610464453697205,
"learning_rate": 7.06957044827395e-11,
"loss": 1.4190644025802612,
"step": 702
},
{
"epoch": 1.2571428571428571,
"grad_norm": 0.3214384615421295,
"learning_rate": 7.052955135599468e-11,
"loss": 1.5597217082977295,
"step": 704
},
{
"epoch": 1.2607142857142857,
"grad_norm": 0.3793260157108307,
"learning_rate": 7.03631575424287e-11,
"loss": 1.3309506177902222,
"step": 706
},
{
"epoch": 1.2642857142857142,
"grad_norm": 0.3192151188850403,
"learning_rate": 7.019652562091826e-11,
"loss": 1.5660918951034546,
"step": 708
},
{
"epoch": 1.2678571428571428,
"grad_norm": 0.2909698188304901,
"learning_rate": 7.002965817403042e-11,
"loss": 1.4371814727783203,
"step": 710
},
{
"epoch": 1.2714285714285714,
"grad_norm": 0.511368453502655,
"learning_rate": 6.986255778798253e-11,
"loss": 1.5524672269821167,
"step": 712
},
{
"epoch": 1.275,
"grad_norm": 0.25665637850761414,
"learning_rate": 6.969522705260218e-11,
"loss": 1.5821752548217773,
"step": 714
},
{
"epoch": 1.2785714285714285,
"grad_norm": 0.4477701187133789,
"learning_rate": 6.952766856128708e-11,
"loss": 1.3979148864746094,
"step": 716
},
{
"epoch": 1.282142857142857,
"grad_norm": 0.44492456316947937,
"learning_rate": 6.935988491096485e-11,
"loss": 1.013071894645691,
"step": 718
},
{
"epoch": 1.2857142857142856,
"grad_norm": 0.21502305567264557,
"learning_rate": 6.919187870205275e-11,
"loss": 1.3727712631225586,
"step": 720
},
{
"epoch": 1.2892857142857144,
"grad_norm": 0.3092592656612396,
"learning_rate": 6.902365253841737e-11,
"loss": 1.3816754817962646,
"step": 722
},
{
"epoch": 1.292857142857143,
"grad_norm": 0.1694367378950119,
"learning_rate": 6.885520902733434e-11,
"loss": 1.69234299659729,
"step": 724
},
{
"epoch": 1.2964285714285715,
"grad_norm": 0.44906988739967346,
"learning_rate": 6.868655077944788e-11,
"loss": 1.3512274026870728,
"step": 726
},
{
"epoch": 1.3,
"grad_norm": 0.21286985278129578,
"learning_rate": 6.85176804087303e-11,
"loss": 1.219728946685791,
"step": 728
},
{
"epoch": 1.3035714285714286,
"grad_norm": 0.38225606083869934,
"learning_rate": 6.834860053244154e-11,
"loss": 1.3647881746292114,
"step": 730
},
{
"epoch": 1.3071428571428572,
"grad_norm": 0.19242212176322937,
"learning_rate": 6.817931377108863e-11,
"loss": 1.635160207748413,
"step": 732
},
{
"epoch": 1.3107142857142857,
"grad_norm": 0.24187368154525757,
"learning_rate": 6.800982274838496e-11,
"loss": 0.9980994462966919,
"step": 734
},
{
"epoch": 1.3142857142857143,
"grad_norm": 0.22730109095573425,
"learning_rate": 6.784013009120974e-11,
"loss": 1.61106276512146,
"step": 736
},
{
"epoch": 1.3178571428571428,
"grad_norm": 0.34292080998420715,
"learning_rate": 6.767023842956724e-11,
"loss": 1.4898104667663574,
"step": 738
},
{
"epoch": 1.3214285714285714,
"grad_norm": 0.6306799650192261,
"learning_rate": 6.750015039654603e-11,
"loss": 1.2501347064971924,
"step": 740
},
{
"epoch": 1.325,
"grad_norm": 0.5413057208061218,
"learning_rate": 6.732986862827812e-11,
"loss": 1.9483036994934082,
"step": 742
},
{
"epoch": 1.3285714285714285,
"grad_norm": 0.34432464838027954,
"learning_rate": 6.715939576389822e-11,
"loss": 1.4203524589538574,
"step": 744
},
{
"epoch": 1.332142857142857,
"grad_norm": 0.2747512459754944,
"learning_rate": 6.698873444550271e-11,
"loss": 1.3994556665420532,
"step": 746
},
{
"epoch": 1.3357142857142856,
"grad_norm": 0.4280205965042114,
"learning_rate": 6.681788731810879e-11,
"loss": 1.5723004341125488,
"step": 748
},
{
"epoch": 1.3392857142857144,
"grad_norm": 0.33289003372192383,
"learning_rate": 6.664685702961344e-11,
"loss": 1.5144555568695068,
"step": 750
},
{
"epoch": 1.342857142857143,
"grad_norm": 0.17391818761825562,
"learning_rate": 6.647564623075235e-11,
"loss": 1.1204313039779663,
"step": 752
},
{
"epoch": 1.3464285714285715,
"grad_norm": 0.16711686551570892,
"learning_rate": 6.630425757505894e-11,
"loss": 0.9277492165565491,
"step": 754
},
{
"epoch": 1.35,
"grad_norm": 0.20722971856594086,
"learning_rate": 6.613269371882309e-11,
"loss": 1.4686331748962402,
"step": 756
},
{
"epoch": 1.3535714285714286,
"grad_norm": 0.31218937039375305,
"learning_rate": 6.596095732105011e-11,
"loss": 1.5793182849884033,
"step": 758
},
{
"epoch": 1.3571428571428572,
"grad_norm": 0.9370469450950623,
"learning_rate": 6.578905104341944e-11,
"loss": 1.4430747032165527,
"step": 760
},
{
"epoch": 1.3607142857142858,
"grad_norm": 0.15756045281887054,
"learning_rate": 6.561697755024343e-11,
"loss": 1.2268548011779785,
"step": 762
},
{
"epoch": 1.3642857142857143,
"grad_norm": 0.18516050279140472,
"learning_rate": 6.544473950842606e-11,
"loss": 1.4649068117141724,
"step": 764
},
{
"epoch": 1.3678571428571429,
"grad_norm": 0.19654791057109833,
"learning_rate": 6.527233958742153e-11,
"loss": 1.533630609512329,
"step": 766
},
{
"epoch": 1.3714285714285714,
"grad_norm": 0.2150849997997284,
"learning_rate": 6.509978045919307e-11,
"loss": 1.478027105331421,
"step": 768
},
{
"epoch": 1.375,
"grad_norm": 0.2530464828014374,
"learning_rate": 6.492706479817125e-11,
"loss": 1.3111753463745117,
"step": 770
},
{
"epoch": 1.3785714285714286,
"grad_norm": 0.3632781207561493,
"learning_rate": 6.475419528121279e-11,
"loss": 1.6716187000274658,
"step": 772
},
{
"epoch": 1.3821428571428571,
"grad_norm": 0.25495582818984985,
"learning_rate": 6.45811745875589e-11,
"loss": 1.4510148763656616,
"step": 774
},
{
"epoch": 1.3857142857142857,
"grad_norm": 0.17029410600662231,
"learning_rate": 6.440800539879391e-11,
"loss": 1.3360023498535156,
"step": 776
},
{
"epoch": 1.3892857142857142,
"grad_norm": 0.17277792096138,
"learning_rate": 6.423469039880355e-11,
"loss": 1.3302971124649048,
"step": 778
},
{
"epoch": 1.3928571428571428,
"grad_norm": 0.2622109353542328,
"learning_rate": 6.406123227373342e-11,
"loss": 1.4888077974319458,
"step": 780
},
{
"epoch": 1.3964285714285714,
"grad_norm": 0.3945457935333252,
"learning_rate": 6.388763371194741e-11,
"loss": 1.3184008598327637,
"step": 782
},
{
"epoch": 1.4,
"grad_norm": 0.28038397431373596,
"learning_rate": 6.371389740398596e-11,
"loss": 1.4953203201293945,
"step": 784
},
{
"epoch": 1.4035714285714285,
"grad_norm": 0.28005000948905945,
"learning_rate": 6.35400260425244e-11,
"loss": 1.5821740627288818,
"step": 786
},
{
"epoch": 1.407142857142857,
"grad_norm": 0.2094164341688156,
"learning_rate": 6.336602232233117e-11,
"loss": 1.5140842199325562,
"step": 788
},
{
"epoch": 1.4107142857142856,
"grad_norm": 0.22712309658527374,
"learning_rate": 6.319188894022612e-11,
"loss": 1.6447871923446655,
"step": 790
},
{
"epoch": 1.4142857142857144,
"grad_norm": 0.13142111897468567,
"learning_rate": 6.301762859503869e-11,
"loss": 1.5251423120498657,
"step": 792
},
{
"epoch": 1.417857142857143,
"grad_norm": 0.26605498790740967,
"learning_rate": 6.284324398756605e-11,
"loss": 1.3444472551345825,
"step": 794
},
{
"epoch": 1.4214285714285715,
"grad_norm": 0.23393483459949493,
"learning_rate": 6.266873782053131e-11,
"loss": 1.2516331672668457,
"step": 796
},
{
"epoch": 1.425,
"grad_norm": 0.30043426156044006,
"learning_rate": 6.249411279854152e-11,
"loss": 1.2557588815689087,
"step": 798
},
{
"epoch": 1.4285714285714286,
"grad_norm": 0.5380626916885376,
"learning_rate": 6.231937162804584e-11,
"loss": 1.3160637617111206,
"step": 800
},
{
"epoch": 1.4321428571428572,
"grad_norm": 0.24224776029586792,
"learning_rate": 6.214451701729363e-11,
"loss": 1.4175952672958374,
"step": 802
},
{
"epoch": 1.4357142857142857,
"grad_norm": 0.18135520815849304,
"learning_rate": 6.196955167629236e-11,
"loss": 1.5174418687820435,
"step": 804
},
{
"epoch": 1.4392857142857143,
"grad_norm": 0.3037535846233368,
"learning_rate": 6.179447831676566e-11,
"loss": 1.581838846206665,
"step": 806
},
{
"epoch": 1.4428571428571428,
"grad_norm": 0.29367074370384216,
"learning_rate": 6.161929965211134e-11,
"loss": 1.4578280448913574,
"step": 808
},
{
"epoch": 1.4464285714285714,
"grad_norm": 0.3817584216594696,
"learning_rate": 6.14440183973593e-11,
"loss": 1.678054690361023,
"step": 810
},
{
"epoch": 1.45,
"grad_norm": 0.46394509077072144,
"learning_rate": 6.12686372691294e-11,
"loss": 1.5505553483963013,
"step": 812
},
{
"epoch": 1.4535714285714285,
"grad_norm": 0.1447513848543167,
"learning_rate": 6.109315898558943e-11,
"loss": 1.2679914236068726,
"step": 814
},
{
"epoch": 1.457142857142857,
"grad_norm": 0.24315816164016724,
"learning_rate": 6.091758626641295e-11,
"loss": 1.566739797592163,
"step": 816
},
{
"epoch": 1.4607142857142856,
"grad_norm": 0.5375627875328064,
"learning_rate": 6.074192183273714e-11,
"loss": 1.3268802165985107,
"step": 818
},
{
"epoch": 1.4642857142857144,
"grad_norm": 0.2522839605808258,
"learning_rate": 6.056616840712064e-11,
"loss": 1.4653449058532715,
"step": 820
},
{
"epoch": 1.467857142857143,
"grad_norm": 0.40299344062805176,
"learning_rate": 6.039032871350136e-11,
"loss": 1.4160243272781372,
"step": 822
},
{
"epoch": 1.4714285714285715,
"grad_norm": 0.6600869297981262,
"learning_rate": 6.021440547715418e-11,
"loss": 1.5625566244125366,
"step": 824
},
{
"epoch": 1.475,
"grad_norm": 0.21130329370498657,
"learning_rate": 6.003840142464887e-11,
"loss": 1.4704797267913818,
"step": 826
},
{
"epoch": 1.4785714285714286,
"grad_norm": 0.2992205321788788,
"learning_rate": 5.986231928380765e-11,
"loss": 1.5831502676010132,
"step": 828
},
{
"epoch": 1.4821428571428572,
"grad_norm": 0.18447980284690857,
"learning_rate": 5.968616178366304e-11,
"loss": 1.2691162824630737,
"step": 830
},
{
"epoch": 1.4857142857142858,
"grad_norm": 0.23359040915966034,
"learning_rate": 5.95099316544156e-11,
"loss": 1.2079955339431763,
"step": 832
},
{
"epoch": 1.4892857142857143,
"grad_norm": 0.21547919511795044,
"learning_rate": 5.933363162739138e-11,
"loss": 1.5598586797714233,
"step": 834
},
{
"epoch": 1.4928571428571429,
"grad_norm": 0.41725102066993713,
"learning_rate": 5.915726443499991e-11,
"loss": 1.6024370193481445,
"step": 836
},
{
"epoch": 1.4964285714285714,
"grad_norm": 0.23062889277935028,
"learning_rate": 5.89808328106916e-11,
"loss": 1.4700309038162231,
"step": 838
},
{
"epoch": 1.5,
"grad_norm": 0.17646436393260956,
"learning_rate": 5.880433948891548e-11,
"loss": 1.4606246948242188,
"step": 840
},
{
"epoch": 1.5035714285714286,
"grad_norm": 0.1672886461019516,
"learning_rate": 5.862778720507684e-11,
"loss": 1.473752498626709,
"step": 842
},
{
"epoch": 1.5071428571428571,
"grad_norm": 0.18604016304016113,
"learning_rate": 5.845117869549476e-11,
"loss": 1.570174217224121,
"step": 844
},
{
"epoch": 1.5107142857142857,
"grad_norm": 0.2805999517440796,
"learning_rate": 5.827451669735976e-11,
"loss": 1.525863528251648,
"step": 846
},
{
"epoch": 1.5142857142857142,
"grad_norm": 0.2228318601846695,
"learning_rate": 5.80978039486914e-11,
"loss": 1.356152892112732,
"step": 848
},
{
"epoch": 1.5178571428571428,
"grad_norm": 0.213277667760849,
"learning_rate": 5.79210431882957e-11,
"loss": 1.0404657125473022,
"step": 850
},
{
"epoch": 1.5214285714285714,
"grad_norm": 0.23507511615753174,
"learning_rate": 5.774423715572289e-11,
"loss": 1.117300271987915,
"step": 852
},
{
"epoch": 1.525,
"grad_norm": 0.2990909516811371,
"learning_rate": 5.7567388591224835e-11,
"loss": 1.3961025476455688,
"step": 854
},
{
"epoch": 1.5285714285714285,
"grad_norm": 0.37220829725265503,
"learning_rate": 5.739050023571257e-11,
"loss": 1.18937087059021,
"step": 856
},
{
"epoch": 1.532142857142857,
"grad_norm": 0.14452393352985382,
"learning_rate": 5.7213574830713854e-11,
"loss": 1.3321001529693604,
"step": 858
},
{
"epoch": 1.5357142857142856,
"grad_norm": 0.23967216908931732,
"learning_rate": 5.703661511833064e-11,
"loss": 1.2635600566864014,
"step": 860
},
{
"epoch": 1.5392857142857141,
"grad_norm": 0.1488959789276123,
"learning_rate": 5.6859623841196594e-11,
"loss": 1.4870667457580566,
"step": 862
},
{
"epoch": 1.5428571428571427,
"grad_norm": 0.10835379362106323,
"learning_rate": 5.668260374243467e-11,
"loss": 1.1637285947799683,
"step": 864
},
{
"epoch": 1.5464285714285713,
"grad_norm": 0.26973676681518555,
"learning_rate": 5.650555756561439e-11,
"loss": 1.4411020278930664,
"step": 866
},
{
"epoch": 1.55,
"grad_norm": 0.11648156493902206,
"learning_rate": 5.632848805470957e-11,
"loss": 1.4853591918945312,
"step": 868
},
{
"epoch": 1.5535714285714286,
"grad_norm": 0.24108168482780457,
"learning_rate": 5.6151397954055585e-11,
"loss": 1.235764741897583,
"step": 870
},
{
"epoch": 1.5571428571428572,
"grad_norm": 0.18987397849559784,
"learning_rate": 5.5974290008306997e-11,
"loss": 1.589194655418396,
"step": 872
},
{
"epoch": 1.5607142857142857,
"grad_norm": 0.28706496953964233,
"learning_rate": 5.579716696239487e-11,
"loss": 1.270675539970398,
"step": 874
},
{
"epoch": 1.5642857142857143,
"grad_norm": 0.38785573840141296,
"learning_rate": 5.562003156148434e-11,
"loss": 1.3905096054077148,
"step": 876
},
{
"epoch": 1.5678571428571428,
"grad_norm": 0.3089293837547302,
"learning_rate": 5.5442886550932024e-11,
"loss": 1.4197461605072021,
"step": 878
},
{
"epoch": 1.5714285714285714,
"grad_norm": 0.5964844822883606,
"learning_rate": 5.5265734676243505e-11,
"loss": 1.466257095336914,
"step": 880
},
{
"epoch": 1.575,
"grad_norm": 0.1559826284646988,
"learning_rate": 5.508857868303068e-11,
"loss": 1.310103178024292,
"step": 882
},
{
"epoch": 1.5785714285714287,
"grad_norm": 0.1740395873785019,
"learning_rate": 5.4911421316969336e-11,
"loss": 1.1727169752120972,
"step": 884
},
{
"epoch": 1.5821428571428573,
"grad_norm": 0.2936098873615265,
"learning_rate": 5.47342653237565e-11,
"loss": 1.482723593711853,
"step": 886
},
{
"epoch": 1.5857142857142859,
"grad_norm": 0.3703981041908264,
"learning_rate": 5.4557113449067964e-11,
"loss": 1.3175097703933716,
"step": 888
},
{
"epoch": 1.5892857142857144,
"grad_norm": 0.18052178621292114,
"learning_rate": 5.4379968438515673e-11,
"loss": 1.2715235948562622,
"step": 890
},
{
"epoch": 1.592857142857143,
"grad_norm": 0.4644646644592285,
"learning_rate": 5.420283303760515e-11,
"loss": 1.1736079454421997,
"step": 892
},
{
"epoch": 1.5964285714285715,
"grad_norm": 0.31031447649002075,
"learning_rate": 5.402570999169303e-11,
"loss": 1.447801947593689,
"step": 894
},
{
"epoch": 1.6,
"grad_norm": 0.26687949895858765,
"learning_rate": 5.384860204594442e-11,
"loss": 1.3616970777511597,
"step": 896
},
{
"epoch": 1.6035714285714286,
"grad_norm": 0.3266461193561554,
"learning_rate": 5.3671511945290443e-11,
"loss": 1.4090511798858643,
"step": 898
},
{
"epoch": 1.6071428571428572,
"grad_norm": 0.270843505859375,
"learning_rate": 5.3494442434385626e-11,
"loss": 1.4962491989135742,
"step": 900
},
{
"epoch": 1.6107142857142858,
"grad_norm": 0.2539942264556885,
"learning_rate": 5.331739625756535e-11,
"loss": 1.484360933303833,
"step": 902
},
{
"epoch": 1.6142857142857143,
"grad_norm": 0.2731671929359436,
"learning_rate": 5.314037615880341e-11,
"loss": 1.333021640777588,
"step": 904
},
{
"epoch": 1.6178571428571429,
"grad_norm": 0.3004004955291748,
"learning_rate": 5.2963384881669383e-11,
"loss": 1.3362126350402832,
"step": 906
},
{
"epoch": 1.6214285714285714,
"grad_norm": 0.18505540490150452,
"learning_rate": 5.278642516928617e-11,
"loss": 1.4492074251174927,
"step": 908
},
{
"epoch": 1.625,
"grad_norm": 0.2023400068283081,
"learning_rate": 5.260949976428745e-11,
"loss": 1.232993721961975,
"step": 910
},
{
"epoch": 1.6285714285714286,
"grad_norm": 0.3517407178878784,
"learning_rate": 5.243261140877517e-11,
"loss": 1.5753288269042969,
"step": 912
},
{
"epoch": 1.6321428571428571,
"grad_norm": 0.5919647216796875,
"learning_rate": 5.225576284427712e-11,
"loss": 1.4577170610427856,
"step": 914
},
{
"epoch": 1.6357142857142857,
"grad_norm": 0.37958261370658875,
"learning_rate": 5.2078956811704316e-11,
"loss": 1.1840314865112305,
"step": 916
},
{
"epoch": 1.6392857142857142,
"grad_norm": 0.48498472571372986,
"learning_rate": 5.190219605130863e-11,
"loss": 1.0771994590759277,
"step": 918
},
{
"epoch": 1.6428571428571428,
"grad_norm": 0.22175057232379913,
"learning_rate": 5.172548330264023e-11,
"loss": 1.485005259513855,
"step": 920
},
{
"epoch": 1.6464285714285714,
"grad_norm": 0.15276828408241272,
"learning_rate": 5.1548821304505246e-11,
"loss": 1.4146548509597778,
"step": 922
},
{
"epoch": 1.65,
"grad_norm": 0.338878333568573,
"learning_rate": 5.137221279492317e-11,
"loss": 1.4840327501296997,
"step": 924
},
{
"epoch": 1.6535714285714285,
"grad_norm": 0.17189446091651917,
"learning_rate": 5.119566051108453e-11,
"loss": 1.4107526540756226,
"step": 926
},
{
"epoch": 1.657142857142857,
"grad_norm": 0.16768991947174072,
"learning_rate": 5.1019167189308406e-11,
"loss": 1.46079421043396,
"step": 928
},
{
"epoch": 1.6607142857142856,
"grad_norm": 0.2172195166349411,
"learning_rate": 5.08427355650001e-11,
"loss": 1.4025652408599854,
"step": 930
},
{
"epoch": 1.6642857142857141,
"grad_norm": 0.20361092686653137,
"learning_rate": 5.0666368372608627e-11,
"loss": 1.393689513206482,
"step": 932
},
{
"epoch": 1.6678571428571427,
"grad_norm": 0.19188135862350464,
"learning_rate": 5.0490068345584426e-11,
"loss": 1.3864973783493042,
"step": 934
},
{
"epoch": 1.6714285714285713,
"grad_norm": 0.3993217945098877,
"learning_rate": 5.031383821633695e-11,
"loss": 1.3697587251663208,
"step": 936
},
{
"epoch": 1.675,
"grad_norm": 0.25149768590927124,
"learning_rate": 5.013768071619237e-11,
"loss": 1.4470927715301514,
"step": 938
},
{
"epoch": 1.6785714285714286,
"grad_norm": 0.20718373358249664,
"learning_rate": 4.9961598575351155e-11,
"loss": 1.454539179801941,
"step": 940
},
{
"epoch": 1.6821428571428572,
"grad_norm": 0.5436975955963135,
"learning_rate": 4.9785594522845833e-11,
"loss": 1.5738086700439453,
"step": 942
},
{
"epoch": 1.6857142857142857,
"grad_norm": 0.3914676904678345,
"learning_rate": 4.9609671286498646e-11,
"loss": 1.32332181930542,
"step": 944
},
{
"epoch": 1.6892857142857143,
"grad_norm": 0.22571302950382233,
"learning_rate": 4.943383159287935e-11,
"loss": 1.2254847288131714,
"step": 946
},
{
"epoch": 1.6928571428571428,
"grad_norm": 0.255727618932724,
"learning_rate": 4.9258078167262875e-11,
"loss": 1.4280102252960205,
"step": 948
},
{
"epoch": 1.6964285714285714,
"grad_norm": 0.17480432987213135,
"learning_rate": 4.9082413733587075e-11,
"loss": 1.4205875396728516,
"step": 950
},
{
"epoch": 1.7,
"grad_norm": 0.3139236569404602,
"learning_rate": 4.8906841014410586e-11,
"loss": 1.4274033308029175,
"step": 952
},
{
"epoch": 1.7035714285714287,
"grad_norm": 0.1673169583082199,
"learning_rate": 4.873136273087061e-11,
"loss": 1.2917604446411133,
"step": 954
},
{
"epoch": 1.7071428571428573,
"grad_norm": 0.13775251805782318,
"learning_rate": 4.855598160264071e-11,
"loss": 1.39280104637146,
"step": 956
},
{
"epoch": 1.7107142857142859,
"grad_norm": 0.29080042243003845,
"learning_rate": 4.838070034788865e-11,
"loss": 1.3744114637374878,
"step": 958
},
{
"epoch": 1.7142857142857144,
"grad_norm": 0.1755424588918686,
"learning_rate": 4.820552168323433e-11,
"loss": 1.3679919242858887,
"step": 960
},
{
"epoch": 1.717857142857143,
"grad_norm": 0.26247265934944153,
"learning_rate": 4.8030448323707654e-11,
"loss": 1.4235137701034546,
"step": 962
},
{
"epoch": 1.7214285714285715,
"grad_norm": 0.191309854388237,
"learning_rate": 4.785548298270639e-11,
"loss": 1.430955171585083,
"step": 964
},
{
"epoch": 1.725,
"grad_norm": 0.25478798151016235,
"learning_rate": 4.7680628371954174e-11,
"loss": 1.4475382566452026,
"step": 966
},
{
"epoch": 1.7285714285714286,
"grad_norm": 0.3051491677761078,
"learning_rate": 4.7505887201458484e-11,
"loss": 1.4724607467651367,
"step": 968
},
{
"epoch": 1.7321428571428572,
"grad_norm": 0.2450665533542633,
"learning_rate": 4.73312621794687e-11,
"loss": 1.342381477355957,
"step": 970
},
{
"epoch": 1.7357142857142858,
"grad_norm": 0.20480383932590485,
"learning_rate": 4.7156756012433956e-11,
"loss": 1.4223990440368652,
"step": 972
},
{
"epoch": 1.7392857142857143,
"grad_norm": 0.2236645370721817,
"learning_rate": 4.698237140496132e-11,
"loss": 1.3705493211746216,
"step": 974
},
{
"epoch": 1.7428571428571429,
"grad_norm": 0.32822078466415405,
"learning_rate": 4.680811105977389e-11,
"loss": 1.34307062625885,
"step": 976
},
{
"epoch": 1.7464285714285714,
"grad_norm": 0.2703489065170288,
"learning_rate": 4.663397767766885e-11,
"loss": 1.3283851146697998,
"step": 978
},
{
"epoch": 1.75,
"grad_norm": 0.46878188848495483,
"learning_rate": 4.645997395747562e-11,
"loss": 1.5840815305709839,
"step": 980
},
{
"epoch": 1.7535714285714286,
"grad_norm": 0.1596301943063736,
"learning_rate": 4.628610259601406e-11,
"loss": 1.1211912631988525,
"step": 982
},
{
"epoch": 1.7571428571428571,
"grad_norm": 0.1534484177827835,
"learning_rate": 4.6112366288052587e-11,
"loss": 1.3600302934646606,
"step": 984
},
{
"epoch": 1.7607142857142857,
"grad_norm": 0.1662014126777649,
"learning_rate": 4.5938767726266585e-11,
"loss": 1.4457441568374634,
"step": 986
},
{
"epoch": 1.7642857142857142,
"grad_norm": 0.14455421268939972,
"learning_rate": 4.576530960119646e-11,
"loss": 1.3325226306915283,
"step": 988
},
{
"epoch": 1.7678571428571428,
"grad_norm": 0.18354535102844238,
"learning_rate": 4.55919946012061e-11,
"loss": 1.3722156286239624,
"step": 990
},
{
"epoch": 1.7714285714285714,
"grad_norm": 0.20837841928005219,
"learning_rate": 4.5418825412441104e-11,
"loss": 1.3799716234207153,
"step": 992
},
{
"epoch": 1.775,
"grad_norm": 0.14612962305545807,
"learning_rate": 4.5245804718787233e-11,
"loss": 1.208733081817627,
"step": 994
},
{
"epoch": 1.7785714285714285,
"grad_norm": 0.2214983105659485,
"learning_rate": 4.507293520182877e-11,
"loss": 1.3159799575805664,
"step": 996
},
{
"epoch": 1.782142857142857,
"grad_norm": 0.19904594123363495,
"learning_rate": 4.4900219540806954e-11,
"loss": 1.276192545890808,
"step": 998
},
{
"epoch": 1.7857142857142856,
"grad_norm": 0.13550381362438202,
"learning_rate": 4.4727660412578454e-11,
"loss": 1.32045316696167,
"step": 1000
},
{
"epoch": 1.7892857142857141,
"grad_norm": 0.16557137668132782,
"learning_rate": 4.455526049157396e-11,
"loss": 1.293030023574829,
"step": 1002
},
{
"epoch": 1.7928571428571427,
"grad_norm": 0.31324100494384766,
"learning_rate": 4.438302244975658e-11,
"loss": 1.3284529447555542,
"step": 1004
},
{
"epoch": 1.7964285714285713,
"grad_norm": 0.16482140123844147,
"learning_rate": 4.421094895658058e-11,
"loss": 1.298207402229309,
"step": 1006
},
{
"epoch": 1.8,
"grad_norm": 0.13380751013755798,
"learning_rate": 4.4039042678949905e-11,
"loss": 1.2636936902999878,
"step": 1008
},
{
"epoch": 1.8035714285714286,
"grad_norm": 0.27826249599456787,
"learning_rate": 4.386730628117692e-11,
"loss": 1.3365410566329956,
"step": 1010
},
{
"epoch": 1.8071428571428572,
"grad_norm": 0.1916886866092682,
"learning_rate": 4.369574242494108e-11,
"loss": 1.3265316486358643,
"step": 1012
},
{
"epoch": 1.8107142857142857,
"grad_norm": 0.1272166520357132,
"learning_rate": 4.352435376924766e-11,
"loss": 1.215617299079895,
"step": 1014
},
{
"epoch": 1.8142857142857143,
"grad_norm": 0.13315260410308838,
"learning_rate": 4.335314297038656e-11,
"loss": 1.2893967628479004,
"step": 1016
},
{
"epoch": 1.8178571428571428,
"grad_norm": 0.4151960611343384,
"learning_rate": 4.318211268189121e-11,
"loss": 1.2345842123031616,
"step": 1018
},
{
"epoch": 1.8214285714285714,
"grad_norm": 0.14745569229125977,
"learning_rate": 4.30112655544973e-11,
"loss": 1.2733076810836792,
"step": 1020
},
{
"epoch": 1.825,
"grad_norm": 0.4383365511894226,
"learning_rate": 4.28406042361018e-11,
"loss": 1.2510349750518799,
"step": 1022
},
{
"epoch": 1.8285714285714287,
"grad_norm": 0.11978907883167267,
"learning_rate": 4.2670131371721885e-11,
"loss": 1.2697960138320923,
"step": 1024
},
{
"epoch": 1.8321428571428573,
"grad_norm": 0.16413728892803192,
"learning_rate": 4.249984960345399e-11,
"loss": 1.2707985639572144,
"step": 1026
},
{
"epoch": 1.8357142857142859,
"grad_norm": 0.14620473980903625,
"learning_rate": 4.232976157043277e-11,
"loss": 1.2601186037063599,
"step": 1028
},
{
"epoch": 1.8392857142857144,
"grad_norm": 0.15290690958499908,
"learning_rate": 4.215986990879027e-11,
"loss": 1.2626264095306396,
"step": 1030
},
{
"epoch": 1.842857142857143,
"grad_norm": 0.1867092102766037,
"learning_rate": 4.199017725161505e-11,
"loss": 1.3110102415084839,
"step": 1032
},
{
"epoch": 1.8464285714285715,
"grad_norm": 0.1754860281944275,
"learning_rate": 4.182068622891139e-11,
"loss": 1.29032301902771,
"step": 1034
},
{
"epoch": 1.85,
"grad_norm": 0.1369704008102417,
"learning_rate": 4.165139946755847e-11,
"loss": 1.2947062253952026,
"step": 1036
},
{
"epoch": 1.8535714285714286,
"grad_norm": 0.1493859589099884,
"learning_rate": 4.1482319591269726e-11,
"loss": 1.3114452362060547,
"step": 1038
},
{
"epoch": 1.8571428571428572,
"grad_norm": 0.19140051305294037,
"learning_rate": 4.131344922055213e-11,
"loss": 1.315718173980713,
"step": 1040
},
{
"epoch": 1.8607142857142858,
"grad_norm": 0.21373005211353302,
"learning_rate": 4.114479097266567e-11,
"loss": 1.2820724248886108,
"step": 1042
},
{
"epoch": 1.8642857142857143,
"grad_norm": 0.13293863832950592,
"learning_rate": 4.0976347461582655e-11,
"loss": 1.3095886707305908,
"step": 1044
},
{
"epoch": 1.8678571428571429,
"grad_norm": 0.142401784658432,
"learning_rate": 4.080812129794728e-11,
"loss": 1.2567329406738281,
"step": 1046
},
{
"epoch": 1.8714285714285714,
"grad_norm": 0.1333981454372406,
"learning_rate": 4.064011508903516e-11,
"loss": 1.2891546487808228,
"step": 1048
},
{
"epoch": 1.875,
"grad_norm": 0.2096826136112213,
"learning_rate": 4.047233143871292e-11,
"loss": 1.2824205160140991,
"step": 1050
},
{
"epoch": 1.8785714285714286,
"grad_norm": 0.17919301986694336,
"learning_rate": 4.030477294739783e-11,
"loss": 1.3100366592407227,
"step": 1052
},
{
"epoch": 1.8821428571428571,
"grad_norm": 0.16150566935539246,
"learning_rate": 4.013744221201749e-11,
"loss": 1.3002382516860962,
"step": 1054
},
{
"epoch": 1.8857142857142857,
"grad_norm": 0.1178724467754364,
"learning_rate": 3.997034182596958e-11,
"loss": 1.2348383665084839,
"step": 1056
},
{
"epoch": 1.8892857142857142,
"grad_norm": 0.22984254360198975,
"learning_rate": 3.980347437908175e-11,
"loss": 1.2670952081680298,
"step": 1058
},
{
"epoch": 1.8928571428571428,
"grad_norm": 0.13385139405727386,
"learning_rate": 3.963684245757131e-11,
"loss": 1.2981677055358887,
"step": 1060
},
{
"epoch": 1.8964285714285714,
"grad_norm": 0.4410691559314728,
"learning_rate": 3.9470448644005344e-11,
"loss": 1.3137519359588623,
"step": 1062
},
{
"epoch": 1.9,
"grad_norm": 0.3602881133556366,
"learning_rate": 3.930429551726049e-11,
"loss": 1.2973172664642334,
"step": 1064
},
{
"epoch": 1.9035714285714285,
"grad_norm": 0.13422204554080963,
"learning_rate": 3.913838565248318e-11,
"loss": 1.2817872762680054,
"step": 1066
},
{
"epoch": 1.907142857142857,
"grad_norm": 0.1284937709569931,
"learning_rate": 3.8972721621049546e-11,
"loss": 1.2446856498718262,
"step": 1068
},
{
"epoch": 1.9107142857142856,
"grad_norm": 0.14526879787445068,
"learning_rate": 3.880730599052564e-11,
"loss": 1.2884457111358643,
"step": 1070
},
{
"epoch": 1.9142857142857141,
"grad_norm": 0.13001468777656555,
"learning_rate": 3.8642141324627655e-11,
"loss": 1.3033688068389893,
"step": 1072
},
{
"epoch": 1.9178571428571427,
"grad_norm": 0.1109691932797432,
"learning_rate": 3.84772301831822e-11,
"loss": 1.2417011260986328,
"step": 1074
},
{
"epoch": 1.9214285714285713,
"grad_norm": 0.12410853058099747,
"learning_rate": 3.831257512208657e-11,
"loss": 1.2828723192214966,
"step": 1076
},
{
"epoch": 1.925,
"grad_norm": 0.15531232953071594,
"learning_rate": 3.8148178693269145e-11,
"loss": 1.269210934638977,
"step": 1078
},
{
"epoch": 1.9285714285714286,
"grad_norm": 0.16727633774280548,
"learning_rate": 3.79840434446499e-11,
"loss": 1.327274203300476,
"step": 1080
},
{
"epoch": 1.9321428571428572,
"grad_norm": 0.13850118219852448,
"learning_rate": 3.782017192010087e-11,
"loss": 1.329694390296936,
"step": 1082
},
{
"epoch": 1.9357142857142857,
"grad_norm": 0.14651495218276978,
"learning_rate": 3.76565666594067e-11,
"loss": 1.2990163564682007,
"step": 1084
},
{
"epoch": 1.9392857142857143,
"grad_norm": 0.14077956974506378,
"learning_rate": 3.749323019822534e-11,
"loss": 1.3179551362991333,
"step": 1086
},
{
"epoch": 1.9428571428571428,
"grad_norm": 0.14183518290519714,
"learning_rate": 3.733016506804867e-11,
"loss": 1.2862507104873657,
"step": 1088
},
{
"epoch": 1.9464285714285714,
"grad_norm": 0.19720809161663055,
"learning_rate": 3.716737379616337e-11,
"loss": 1.2341052293777466,
"step": 1090
},
{
"epoch": 1.95,
"grad_norm": 0.1752225160598755,
"learning_rate": 3.700485890561167e-11,
"loss": 1.2532352209091187,
"step": 1092
},
{
"epoch": 1.9535714285714287,
"grad_norm": 0.15048037469387054,
"learning_rate": 3.684262291515223e-11,
"loss": 1.2703880071640015,
"step": 1094
},
{
"epoch": 1.9571428571428573,
"grad_norm": 0.1569390445947647,
"learning_rate": 3.668066833922116e-11,
"loss": 1.2509340047836304,
"step": 1096
},
{
"epoch": 1.9607142857142859,
"grad_norm": 0.25200825929641724,
"learning_rate": 3.6518997687893055e-11,
"loss": 1.2597652673721313,
"step": 1098
},
{
"epoch": 1.9642857142857144,
"grad_norm": 0.14276815950870514,
"learning_rate": 3.635761346684206e-11,
"loss": 1.3016998767852783,
"step": 1100
},
{
"epoch": 1.967857142857143,
"grad_norm": 0.16960203647613525,
"learning_rate": 3.619651817730302e-11,
"loss": 1.2928533554077148,
"step": 1102
},
{
"epoch": 1.9714285714285715,
"grad_norm": 0.1542951613664627,
"learning_rate": 3.603571431603272e-11,
"loss": 1.3007863759994507,
"step": 1104
},
{
"epoch": 1.975,
"grad_norm": 0.17226935923099518,
"learning_rate": 3.5875204375271275e-11,
"loss": 1.2495512962341309,
"step": 1106
},
{
"epoch": 1.9785714285714286,
"grad_norm": 0.13276343047618866,
"learning_rate": 3.571499084270338e-11,
"loss": 1.2602217197418213,
"step": 1108
},
{
"epoch": 1.9821428571428572,
"grad_norm": 0.18922999501228333,
"learning_rate": 3.555507620141981e-11,
"loss": 1.2807466983795166,
"step": 1110
},
{
"epoch": 1.9857142857142858,
"grad_norm": 0.21290341019630432,
"learning_rate": 3.539546292987894e-11,
"loss": 1.313105583190918,
"step": 1112
},
{
"epoch": 1.9892857142857143,
"grad_norm": 0.1874661147594452,
"learning_rate": 3.523615350186834e-11,
"loss": 1.3670251369476318,
"step": 1114
},
{
"epoch": 1.9928571428571429,
"grad_norm": 0.24585479497909546,
"learning_rate": 3.507715038646641e-11,
"loss": 1.625638484954834,
"step": 1116
},
{
"epoch": 1.9964285714285714,
"grad_norm": 0.26914891600608826,
"learning_rate": 3.49184560480041e-11,
"loss": 1.5536011457443237,
"step": 1118
},
{
"epoch": 2.0,
"grad_norm": 0.5115702152252197,
"learning_rate": 3.476007294602678e-11,
"loss": 1.4410558938980103,
"step": 1120
},
{
"epoch": 2.0035714285714286,
"grad_norm": 0.19693709909915924,
"learning_rate": 3.46020035352561e-11,
"loss": 1.42737877368927,
"step": 1122
},
{
"epoch": 2.007142857142857,
"grad_norm": 0.14978626370429993,
"learning_rate": 3.444425026555182e-11,
"loss": 1.3573956489562988,
"step": 1124
},
{
"epoch": 2.0107142857142857,
"grad_norm": 0.13327878713607788,
"learning_rate": 3.4286815581874045e-11,
"loss": 1.3719631433486938,
"step": 1126
},
{
"epoch": 2.0142857142857142,
"grad_norm": 0.17517639696598053,
"learning_rate": 3.412970192424517e-11,
"loss": 1.4211511611938477,
"step": 1128
},
{
"epoch": 2.017857142857143,
"grad_norm": 0.1492166668176651,
"learning_rate": 3.397291172771221e-11,
"loss": 1.362390160560608,
"step": 1130
},
{
"epoch": 2.0214285714285714,
"grad_norm": 0.1388062685728073,
"learning_rate": 3.3816447422308884e-11,
"loss": 1.3313382863998413,
"step": 1132
},
{
"epoch": 2.025,
"grad_norm": 0.13775278627872467,
"learning_rate": 3.366031143301811e-11,
"loss": 1.2752572298049927,
"step": 1134
},
{
"epoch": 2.0285714285714285,
"grad_norm": 0.15108071267604828,
"learning_rate": 3.3504506179734254e-11,
"loss": 1.344627022743225,
"step": 1136
},
{
"epoch": 2.032142857142857,
"grad_norm": 0.1777033656835556,
"learning_rate": 3.334903407722587e-11,
"loss": 1.3156089782714844,
"step": 1138
},
{
"epoch": 2.0357142857142856,
"grad_norm": 0.1836261749267578,
"learning_rate": 3.319389753509803e-11,
"loss": 1.2969412803649902,
"step": 1140
},
{
"epoch": 2.039285714285714,
"grad_norm": 0.1210937574505806,
"learning_rate": 3.30390989577551e-11,
"loss": 1.2561373710632324,
"step": 1142
},
{
"epoch": 2.0428571428571427,
"grad_norm": 0.22169147431850433,
"learning_rate": 3.288464074436346e-11,
"loss": 1.418563961982727,
"step": 1144
},
{
"epoch": 2.0464285714285713,
"grad_norm": 0.1952674388885498,
"learning_rate": 3.273052528881433e-11,
"loss": 1.336297631263733,
"step": 1146
},
{
"epoch": 2.05,
"grad_norm": 0.15283362567424774,
"learning_rate": 3.257675497968661e-11,
"loss": 1.2213199138641357,
"step": 1148
},
{
"epoch": 2.0535714285714284,
"grad_norm": 0.20565065741539001,
"learning_rate": 3.242333220020994e-11,
"loss": 1.3224503993988037,
"step": 1150
},
{
"epoch": 2.057142857142857,
"grad_norm": 0.15253019332885742,
"learning_rate": 3.22702593282277e-11,
"loss": 1.2786815166473389,
"step": 1152
},
{
"epoch": 2.0607142857142855,
"grad_norm": 0.20985379815101624,
"learning_rate": 3.211753873616024e-11,
"loss": 1.367148756980896,
"step": 1154
},
{
"epoch": 2.064285714285714,
"grad_norm": 0.1880485862493515,
"learning_rate": 3.196517279096797e-11,
"loss": 1.4771831035614014,
"step": 1156
},
{
"epoch": 2.067857142857143,
"grad_norm": 0.21174761652946472,
"learning_rate": 3.181316385411479e-11,
"loss": 1.44190514087677,
"step": 1158
},
{
"epoch": 2.0714285714285716,
"grad_norm": 0.23373731970787048,
"learning_rate": 3.166151428153146e-11,
"loss": 1.2126356363296509,
"step": 1160
},
{
"epoch": 2.075,
"grad_norm": 0.16597457230091095,
"learning_rate": 3.1510226423579124e-11,
"loss": 1.3093327283859253,
"step": 1162
},
{
"epoch": 2.0785714285714287,
"grad_norm": 0.17415602505207062,
"learning_rate": 3.1359302625012785e-11,
"loss": 1.3309526443481445,
"step": 1164
},
{
"epoch": 2.0821428571428573,
"grad_norm": 0.2406979203224182,
"learning_rate": 3.1208745224945054e-11,
"loss": 1.3279399871826172,
"step": 1166
},
{
"epoch": 2.085714285714286,
"grad_norm": 0.15369997918605804,
"learning_rate": 3.105855655680986e-11,
"loss": 1.2411911487579346,
"step": 1168
},
{
"epoch": 2.0892857142857144,
"grad_norm": 0.1417544037103653,
"learning_rate": 3.090873894832628e-11,
"loss": 1.2670767307281494,
"step": 1170
},
{
"epoch": 2.092857142857143,
"grad_norm": 0.39732953906059265,
"learning_rate": 3.0759294721462496e-11,
"loss": 1.2473151683807373,
"step": 1172
},
{
"epoch": 2.0964285714285715,
"grad_norm": 0.19093656539916992,
"learning_rate": 3.061022619239977e-11,
"loss": 1.3719512224197388,
"step": 1174
},
{
"epoch": 2.1,
"grad_norm": 0.13845385611057281,
"learning_rate": 3.046153567149654e-11,
"loss": 1.3432111740112305,
"step": 1176
},
{
"epoch": 2.1035714285714286,
"grad_norm": 0.15389707684516907,
"learning_rate": 3.0313225463252715e-11,
"loss": 1.421315312385559,
"step": 1178
},
{
"epoch": 2.107142857142857,
"grad_norm": 0.1846085637807846,
"learning_rate": 3.0165297866273766e-11,
"loss": 1.4859906435012817,
"step": 1180
},
{
"epoch": 2.1107142857142858,
"grad_norm": 0.1564885377883911,
"learning_rate": 3.00177551732353e-11,
"loss": 1.3033545017242432,
"step": 1182
},
{
"epoch": 2.1142857142857143,
"grad_norm": 0.24117198586463928,
"learning_rate": 2.9870599670847367e-11,
"loss": 1.4248461723327637,
"step": 1184
},
{
"epoch": 2.117857142857143,
"grad_norm": 0.145203098654747,
"learning_rate": 2.972383363981917e-11,
"loss": 1.3876914978027344,
"step": 1186
},
{
"epoch": 2.1214285714285714,
"grad_norm": 0.20995774865150452,
"learning_rate": 2.95774593548236e-11,
"loss": 1.2380093336105347,
"step": 1188
},
{
"epoch": 2.125,
"grad_norm": 0.17368246614933014,
"learning_rate": 2.943147908446201e-11,
"loss": 1.297877311706543,
"step": 1190
},
{
"epoch": 2.1285714285714286,
"grad_norm": 0.3502728044986725,
"learning_rate": 2.9285895091229044e-11,
"loss": 1.0307722091674805,
"step": 1192
},
{
"epoch": 2.132142857142857,
"grad_norm": 0.19696146249771118,
"learning_rate": 2.9140709631477665e-11,
"loss": 1.2322001457214355,
"step": 1194
},
{
"epoch": 2.1357142857142857,
"grad_norm": 0.1824025809764862,
"learning_rate": 2.8995924955384046e-11,
"loss": 1.3014825582504272,
"step": 1196
},
{
"epoch": 2.1392857142857142,
"grad_norm": 0.17128019034862518,
"learning_rate": 2.8851543306912782e-11,
"loss": 1.3863450288772583,
"step": 1198
},
{
"epoch": 2.142857142857143,
"grad_norm": 0.1545657217502594,
"learning_rate": 2.8707566923782103e-11,
"loss": 1.2946215867996216,
"step": 1200
},
{
"epoch": 2.1464285714285714,
"grad_norm": 0.16589367389678955,
"learning_rate": 2.8563998037429164e-11,
"loss": 1.2779427766799927,
"step": 1202
},
{
"epoch": 2.15,
"grad_norm": 0.30030524730682373,
"learning_rate": 2.842083887297548e-11,
"loss": 1.2760953903198242,
"step": 1204
},
{
"epoch": 2.1535714285714285,
"grad_norm": 0.18602436780929565,
"learning_rate": 2.827809164919244e-11,
"loss": 1.4392411708831787,
"step": 1206
},
{
"epoch": 2.157142857142857,
"grad_norm": 0.15597227215766907,
"learning_rate": 2.8135758578466896e-11,
"loss": 1.3980666399002075,
"step": 1208
},
{
"epoch": 2.1607142857142856,
"grad_norm": 0.1635390818119049,
"learning_rate": 2.7993841866766962e-11,
"loss": 1.3193120956420898,
"step": 1210
},
{
"epoch": 2.164285714285714,
"grad_norm": 0.22947640717029572,
"learning_rate": 2.785234371360766e-11,
"loss": 1.342287540435791,
"step": 1212
},
{
"epoch": 2.1678571428571427,
"grad_norm": 0.20655785501003265,
"learning_rate": 2.7711266312016987e-11,
"loss": 1.373154640197754,
"step": 1214
},
{
"epoch": 2.1714285714285713,
"grad_norm": 0.1706349104642868,
"learning_rate": 2.757061184850183e-11,
"loss": 1.2490075826644897,
"step": 1216
},
{
"epoch": 2.175,
"grad_norm": 0.22908586263656616,
"learning_rate": 2.743038250301418e-11,
"loss": 1.379451036453247,
"step": 1218
},
{
"epoch": 2.1785714285714284,
"grad_norm": 0.17054013907909393,
"learning_rate": 2.7290580448917202e-11,
"loss": 1.3195239305496216,
"step": 1220
},
{
"epoch": 2.182142857142857,
"grad_norm": 0.2558165490627289,
"learning_rate": 2.7151207852951677e-11,
"loss": 1.1808172464370728,
"step": 1222
},
{
"epoch": 2.185714285714286,
"grad_norm": 0.1999257206916809,
"learning_rate": 2.7012266875202346e-11,
"loss": 1.2860252857208252,
"step": 1224
},
{
"epoch": 2.189285714285714,
"grad_norm": 0.23496678471565247,
"learning_rate": 2.6873759669064475e-11,
"loss": 1.3138054609298706,
"step": 1226
},
{
"epoch": 2.192857142857143,
"grad_norm": 0.2032238245010376,
"learning_rate": 2.673568838121045e-11,
"loss": 1.291284203529358,
"step": 1228
},
{
"epoch": 2.1964285714285716,
"grad_norm": 0.16216710209846497,
"learning_rate": 2.659805515155653e-11,
"loss": 1.212812900543213,
"step": 1230
},
{
"epoch": 2.2,
"grad_norm": 0.21912680566310883,
"learning_rate": 2.6460862113229657e-11,
"loss": 1.2930355072021484,
"step": 1232
},
{
"epoch": 2.2035714285714287,
"grad_norm": 0.16590477526187897,
"learning_rate": 2.632411139253442e-11,
"loss": 1.3133563995361328,
"step": 1234
},
{
"epoch": 2.2071428571428573,
"grad_norm": 0.19360366463661194,
"learning_rate": 2.6187805108920104e-11,
"loss": 1.2435790300369263,
"step": 1236
},
{
"epoch": 2.210714285714286,
"grad_norm": 0.1824900209903717,
"learning_rate": 2.605194537494779e-11,
"loss": 1.295372486114502,
"step": 1238
},
{
"epoch": 2.2142857142857144,
"grad_norm": 0.17591625452041626,
"learning_rate": 2.5916534296257656e-11,
"loss": 1.3211277723312378,
"step": 1240
},
{
"epoch": 2.217857142857143,
"grad_norm": 0.18499885499477386,
"learning_rate": 2.5781573971536387e-11,
"loss": 1.4101108312606812,
"step": 1242
},
{
"epoch": 2.2214285714285715,
"grad_norm": 0.20316849648952484,
"learning_rate": 2.564706649248456e-11,
"loss": 1.3189499378204346,
"step": 1244
},
{
"epoch": 2.225,
"grad_norm": 0.18995395302772522,
"learning_rate": 2.5513013943784236e-11,
"loss": 1.4385656118392944,
"step": 1246
},
{
"epoch": 2.2285714285714286,
"grad_norm": 0.1785760372877121,
"learning_rate": 2.537941840306669e-11,
"loss": 1.375566005706787,
"step": 1248
},
{
"epoch": 2.232142857142857,
"grad_norm": 0.2664630711078644,
"learning_rate": 2.5246281940880267e-11,
"loss": 1.4056965112686157,
"step": 1250
},
{
"epoch": 2.2357142857142858,
"grad_norm": 0.24670979380607605,
"learning_rate": 2.5113606620658124e-11,
"loss": 1.398148536682129,
"step": 1252
},
{
"epoch": 2.2392857142857143,
"grad_norm": 0.32755303382873535,
"learning_rate": 2.4981394498686412e-11,
"loss": 1.2407431602478027,
"step": 1254
},
{
"epoch": 2.242857142857143,
"grad_norm": 0.25386714935302734,
"learning_rate": 2.484964762407232e-11,
"loss": 1.2231903076171875,
"step": 1256
},
{
"epoch": 2.2464285714285714,
"grad_norm": 0.31434598565101624,
"learning_rate": 2.4718368038712332e-11,
"loss": 1.2211343050003052,
"step": 1258
},
{
"epoch": 2.25,
"grad_norm": 0.39853435754776,
"learning_rate": 2.45875577772606e-11,
"loss": 1.2230669260025024,
"step": 1260
},
{
"epoch": 2.2535714285714286,
"grad_norm": 0.4850930869579315,
"learning_rate": 2.4457218867097394e-11,
"loss": 1.3305819034576416,
"step": 1262
},
{
"epoch": 2.257142857142857,
"grad_norm": 0.3038150370121002,
"learning_rate": 2.4327353328297673e-11,
"loss": 1.4300968647003174,
"step": 1264
},
{
"epoch": 2.2607142857142857,
"grad_norm": 0.42150551080703735,
"learning_rate": 2.4197963173599828e-11,
"loss": 1.210048794746399,
"step": 1266
},
{
"epoch": 2.2642857142857142,
"grad_norm": 0.4590936303138733,
"learning_rate": 2.4069050408374375e-11,
"loss": 1.4437575340270996,
"step": 1268
},
{
"epoch": 2.267857142857143,
"grad_norm": 0.2934648394584656,
"learning_rate": 2.3940617030592998e-11,
"loss": 1.2751147747039795,
"step": 1270
},
{
"epoch": 2.2714285714285714,
"grad_norm": 0.2862495481967926,
"learning_rate": 2.3812665030797508e-11,
"loss": 1.4070979356765747,
"step": 1272
},
{
"epoch": 2.275,
"grad_norm": 0.2849954068660736,
"learning_rate": 2.3685196392069054e-11,
"loss": 1.4459384679794312,
"step": 1274
},
{
"epoch": 2.2785714285714285,
"grad_norm": 0.34812119603157043,
"learning_rate": 2.3558213089997302e-11,
"loss": 1.2700358629226685,
"step": 1276
},
{
"epoch": 2.282142857142857,
"grad_norm": 0.2995881140232086,
"learning_rate": 2.343171709264989e-11,
"loss": 0.9016294479370117,
"step": 1278
},
{
"epoch": 2.2857142857142856,
"grad_norm": 0.4387514591217041,
"learning_rate": 2.3305710360541853e-11,
"loss": 1.2820184230804443,
"step": 1280
},
{
"epoch": 2.289285714285714,
"grad_norm": 0.20915277302265167,
"learning_rate": 2.3180194846605366e-11,
"loss": 1.296189308166504,
"step": 1282
},
{
"epoch": 2.2928571428571427,
"grad_norm": 0.32164767384529114,
"learning_rate": 2.3055172496159327e-11,
"loss": 1.6072280406951904,
"step": 1284
},
{
"epoch": 2.2964285714285713,
"grad_norm": 0.878715455532074,
"learning_rate": 2.2930645246879285e-11,
"loss": 1.2524617910385132,
"step": 1286
},
{
"epoch": 2.3,
"grad_norm": 0.2663787007331848,
"learning_rate": 2.2806615028767446e-11,
"loss": 1.1642640829086304,
"step": 1288
},
{
"epoch": 2.3035714285714284,
"grad_norm": 0.30167171359062195,
"learning_rate": 2.2683083764122627e-11,
"loss": 1.2560240030288696,
"step": 1290
},
{
"epoch": 2.307142857142857,
"grad_norm": 0.19819043576717377,
"learning_rate": 2.2560053367510623e-11,
"loss": 1.562985897064209,
"step": 1292
},
{
"epoch": 2.310714285714286,
"grad_norm": 0.3227558135986328,
"learning_rate": 2.24375257457344e-11,
"loss": 0.941666841506958,
"step": 1294
},
{
"epoch": 2.314285714285714,
"grad_norm": 0.30193987488746643,
"learning_rate": 2.2315502797804678e-11,
"loss": 1.497104525566101,
"step": 1296
},
{
"epoch": 2.317857142857143,
"grad_norm": 0.22983092069625854,
"learning_rate": 2.2193986414910348e-11,
"loss": 1.3992904424667358,
"step": 1298
},
{
"epoch": 2.3214285714285716,
"grad_norm": 0.3726719617843628,
"learning_rate": 2.2072978480389284e-11,
"loss": 1.1520694494247437,
"step": 1300
},
{
"epoch": 2.325,
"grad_norm": 0.43815112113952637,
"learning_rate": 2.1952480869699036e-11,
"loss": 1.7719744443893433,
"step": 1302
},
{
"epoch": 2.3285714285714287,
"grad_norm": 0.3036911189556122,
"learning_rate": 2.1832495450387936e-11,
"loss": 1.3131693601608276,
"step": 1304
},
{
"epoch": 2.3321428571428573,
"grad_norm": 0.4145776331424713,
"learning_rate": 2.1713024082065964e-11,
"loss": 1.334944486618042,
"step": 1306
},
{
"epoch": 2.335714285714286,
"grad_norm": 0.44712337851524353,
"learning_rate": 2.1594068616376053e-11,
"loss": 1.4629075527191162,
"step": 1308
},
{
"epoch": 2.3392857142857144,
"grad_norm": 0.29300954937934875,
"learning_rate": 2.1475630896965337e-11,
"loss": 1.3952922821044922,
"step": 1310
},
{
"epoch": 2.342857142857143,
"grad_norm": 0.2007542997598648,
"learning_rate": 2.1357712759456594e-11,
"loss": 1.0657954216003418,
"step": 1312
},
{
"epoch": 2.3464285714285715,
"grad_norm": 0.24710141122341156,
"learning_rate": 2.1240316031419794e-11,
"loss": 0.8775244355201721,
"step": 1314
},
{
"epoch": 2.35,
"grad_norm": 0.2989410161972046,
"learning_rate": 2.1123442532343767e-11,
"loss": 1.389277458190918,
"step": 1316
},
{
"epoch": 2.3535714285714286,
"grad_norm": 0.3632841408252716,
"learning_rate": 2.1007094073607998e-11,
"loss": 1.512230634689331,
"step": 1318
},
{
"epoch": 2.357142857142857,
"grad_norm": 0.20877733826637268,
"learning_rate": 2.089127245845461e-11,
"loss": 1.338801383972168,
"step": 1320
},
{
"epoch": 2.3607142857142858,
"grad_norm": 0.14346203207969666,
"learning_rate": 2.0775979481960342e-11,
"loss": 1.1795090436935425,
"step": 1322
},
{
"epoch": 2.3642857142857143,
"grad_norm": 0.32592296600341797,
"learning_rate": 2.0661216931008714e-11,
"loss": 1.364555835723877,
"step": 1324
},
{
"epoch": 2.367857142857143,
"grad_norm": 2.3708178997039795,
"learning_rate": 2.0546986584262438e-11,
"loss": 1.4721930027008057,
"step": 1326
},
{
"epoch": 2.3714285714285714,
"grad_norm": 0.19912676513195038,
"learning_rate": 2.043329021213577e-11,
"loss": 1.4108600616455078,
"step": 1328
},
{
"epoch": 2.375,
"grad_norm": 0.20013919472694397,
"learning_rate": 2.032012957676708e-11,
"loss": 1.2515854835510254,
"step": 1330
},
{
"epoch": 2.3785714285714286,
"grad_norm": 0.29759207367897034,
"learning_rate": 2.0207506431991558e-11,
"loss": 1.568236231803894,
"step": 1332
},
{
"epoch": 2.382142857142857,
"grad_norm": 0.18850746750831604,
"learning_rate": 2.0095422523314016e-11,
"loss": 1.3757435083389282,
"step": 1334
},
{
"epoch": 2.3857142857142857,
"grad_norm": 0.16454045474529266,
"learning_rate": 1.9983879587881848e-11,
"loss": 1.2330865859985352,
"step": 1336
},
{
"epoch": 2.3892857142857142,
"grad_norm": 0.13228176534175873,
"learning_rate": 1.9872879354458114e-11,
"loss": 1.2531322240829468,
"step": 1338
},
{
"epoch": 2.392857142857143,
"grad_norm": 0.17047590017318726,
"learning_rate": 1.976242354339471e-11,
"loss": 1.392430305480957,
"step": 1340
},
{
"epoch": 2.3964285714285714,
"grad_norm": 0.964780330657959,
"learning_rate": 1.9652513866605748e-11,
"loss": 1.2138177156448364,
"step": 1342
},
{
"epoch": 2.4,
"grad_norm": 0.20008714497089386,
"learning_rate": 1.9543152027541e-11,
"loss": 1.3786259889602661,
"step": 1344
},
{
"epoch": 2.4035714285714285,
"grad_norm": 0.2520330250263214,
"learning_rate": 1.9434339721159505e-11,
"loss": 1.5099730491638184,
"step": 1346
},
{
"epoch": 2.407142857142857,
"grad_norm": 0.26674994826316833,
"learning_rate": 1.932607863390329e-11,
"loss": 1.4184703826904297,
"step": 1348
},
{
"epoch": 2.4107142857142856,
"grad_norm": 0.3661814332008362,
"learning_rate": 1.9218370443671232e-11,
"loss": 1.5577000379562378,
"step": 1350
},
{
"epoch": 2.414285714285714,
"grad_norm": 0.1575201451778412,
"learning_rate": 1.9111216819793098e-11,
"loss": 1.4460270404815674,
"step": 1352
},
{
"epoch": 2.4178571428571427,
"grad_norm": 0.2647629678249359,
"learning_rate": 1.900461942300359e-11,
"loss": 1.242174506187439,
"step": 1354
},
{
"epoch": 2.4214285714285713,
"grad_norm": 0.22450336813926697,
"learning_rate": 1.8898579905416678e-11,
"loss": 1.143039345741272,
"step": 1356
},
{
"epoch": 2.425,
"grad_norm": 0.14635813236236572,
"learning_rate": 1.8793099910499927e-11,
"loss": 1.1687003374099731,
"step": 1358
},
{
"epoch": 2.4285714285714284,
"grad_norm": 0.14772412180900574,
"learning_rate": 1.8688181073049123e-11,
"loss": 1.2316815853118896,
"step": 1360
},
{
"epoch": 2.432142857142857,
"grad_norm": 0.1730790138244629,
"learning_rate": 1.8583825019162844e-11,
"loss": 1.316425085067749,
"step": 1362
},
{
"epoch": 2.435714285714286,
"grad_norm": 0.1888405829668045,
"learning_rate": 1.8480033366217288e-11,
"loss": 1.437265396118164,
"step": 1364
},
{
"epoch": 2.439285714285714,
"grad_norm": 0.22746612131595612,
"learning_rate": 1.837680772284123e-11,
"loss": 1.5048937797546387,
"step": 1366
},
{
"epoch": 2.442857142857143,
"grad_norm": 0.24730168282985687,
"learning_rate": 1.8274149688891058e-11,
"loss": 1.3409751653671265,
"step": 1368
},
{
"epoch": 2.4464285714285716,
"grad_norm": 0.2903384268283844,
"learning_rate": 1.8172060855425984e-11,
"loss": 1.6011804342269897,
"step": 1370
},
{
"epoch": 2.45,
"grad_norm": 0.4629653990268707,
"learning_rate": 1.8070542804683405e-11,
"loss": 1.39284086227417,
"step": 1372
},
{
"epoch": 2.4535714285714287,
"grad_norm": 0.1703772395849228,
"learning_rate": 1.7969597110054342e-11,
"loss": 1.1957430839538574,
"step": 1374
},
{
"epoch": 2.4571428571428573,
"grad_norm": 0.46089184284210205,
"learning_rate": 1.786922533605913e-11,
"loss": 1.4656089544296265,
"step": 1376
},
{
"epoch": 2.460714285714286,
"grad_norm": 0.18819937109947205,
"learning_rate": 1.776942903832306e-11,
"loss": 1.223941445350647,
"step": 1378
},
{
"epoch": 2.4642857142857144,
"grad_norm": 0.23815391957759857,
"learning_rate": 1.7670209763552342e-11,
"loss": 1.3691385984420776,
"step": 1380
},
{
"epoch": 2.467857142857143,
"grad_norm": 0.17686067521572113,
"learning_rate": 1.7571569049510138e-11,
"loss": 1.317873239517212,
"step": 1382
},
{
"epoch": 2.4714285714285715,
"grad_norm": 0.38989320397377014,
"learning_rate": 1.747350842499271e-11,
"loss": 1.466494083404541,
"step": 1384
},
{
"epoch": 2.475,
"grad_norm": 0.22029191255569458,
"learning_rate": 1.7376029409805707e-11,
"loss": 1.3847404718399048,
"step": 1386
},
{
"epoch": 2.4785714285714286,
"grad_norm": 0.27662548422813416,
"learning_rate": 1.7279133514740644e-11,
"loss": 1.37192702293396,
"step": 1388
},
{
"epoch": 2.482142857142857,
"grad_norm": 0.23457181453704834,
"learning_rate": 1.7182822241551434e-11,
"loss": 1.1607106924057007,
"step": 1390
},
{
"epoch": 2.4857142857142858,
"grad_norm": 0.1711721569299698,
"learning_rate": 1.708709708293121e-11,
"loss": 1.1327881813049316,
"step": 1392
},
{
"epoch": 2.4892857142857143,
"grad_norm": 0.24287395179271698,
"learning_rate": 1.699195952248908e-11,
"loss": 1.4552991390228271,
"step": 1394
},
{
"epoch": 2.492857142857143,
"grad_norm": 0.3649357557296753,
"learning_rate": 1.6897411034727216e-11,
"loss": 1.497635006904602,
"step": 1396
},
{
"epoch": 2.4964285714285714,
"grad_norm": 0.35778528451919556,
"learning_rate": 1.680345308501795e-11,
"loss": 1.3948183059692383,
"step": 1398
},
{
"epoch": 2.5,
"grad_norm": 0.1573958396911621,
"learning_rate": 1.6710087129581085e-11,
"loss": 1.3863242864608765,
"step": 1400
},
{
"epoch": 2.5035714285714286,
"grad_norm": 0.20089280605316162,
"learning_rate": 1.6617314615461325e-11,
"loss": 1.3896986246109009,
"step": 1402
},
{
"epoch": 2.507142857142857,
"grad_norm": 0.23711322247982025,
"learning_rate": 1.6525136980505835e-11,
"loss": 1.4924380779266357,
"step": 1404
},
{
"epoch": 2.5107142857142857,
"grad_norm": 0.20246915519237518,
"learning_rate": 1.6433555653341975e-11,
"loss": 1.4420058727264404,
"step": 1406
},
{
"epoch": 2.5142857142857142,
"grad_norm": 0.20075786113739014,
"learning_rate": 1.6342572053355166e-11,
"loss": 1.2571265697479248,
"step": 1408
},
{
"epoch": 2.517857142857143,
"grad_norm": 0.2846459150314331,
"learning_rate": 1.625218759066685e-11,
"loss": 0.9605814218521118,
"step": 1410
},
{
"epoch": 2.5214285714285714,
"grad_norm": 0.2503245770931244,
"learning_rate": 1.6162403666112653e-11,
"loss": 1.0498977899551392,
"step": 1412
},
{
"epoch": 2.525,
"grad_norm": 0.2854737639427185,
"learning_rate": 1.607322167122069e-11,
"loss": 1.3054733276367188,
"step": 1414
},
{
"epoch": 2.5285714285714285,
"grad_norm": 0.4330965280532837,
"learning_rate": 1.598464298819002e-11,
"loss": 1.088234305381775,
"step": 1416
},
{
"epoch": 2.532142857142857,
"grad_norm": 0.23491685092449188,
"learning_rate": 1.5896668989869152e-11,
"loss": 1.2604817152023315,
"step": 1418
},
{
"epoch": 2.5357142857142856,
"grad_norm": 0.20269037783145905,
"learning_rate": 1.5809301039734815e-11,
"loss": 1.2018966674804688,
"step": 1420
},
{
"epoch": 2.539285714285714,
"grad_norm": 0.15246133506298065,
"learning_rate": 1.572254049187084e-11,
"loss": 1.3999316692352295,
"step": 1422
},
{
"epoch": 2.5428571428571427,
"grad_norm": 0.13255059719085693,
"learning_rate": 1.5636388690947122e-11,
"loss": 1.1051727533340454,
"step": 1424
},
{
"epoch": 2.5464285714285713,
"grad_norm": 0.4702497720718384,
"learning_rate": 1.5550846972198852e-11,
"loss": 1.3160711526870728,
"step": 1426
},
{
"epoch": 2.55,
"grad_norm": 0.12075657397508621,
"learning_rate": 1.546591666140573e-11,
"loss": 1.3880804777145386,
"step": 1428
},
{
"epoch": 2.553571428571429,
"grad_norm": 0.2639578580856323,
"learning_rate": 1.5381599074871512e-11,
"loss": 1.1285425424575806,
"step": 1430
},
{
"epoch": 2.557142857142857,
"grad_norm": 0.30509477853775024,
"learning_rate": 1.5297895519403562e-11,
"loss": 1.5178343057632446,
"step": 1432
},
{
"epoch": 2.560714285714286,
"grad_norm": 0.5000651478767395,
"learning_rate": 1.5214807292292565e-11,
"loss": 1.17734694480896,
"step": 1434
},
{
"epoch": 2.564285714285714,
"grad_norm": 0.2977011799812317,
"learning_rate": 1.513233568129249e-11,
"loss": 1.3034253120422363,
"step": 1436
},
{
"epoch": 2.567857142857143,
"grad_norm": 0.2919791638851166,
"learning_rate": 1.5050481964600582e-11,
"loss": 1.330073595046997,
"step": 1438
},
{
"epoch": 2.571428571428571,
"grad_norm": 0.281448096036911,
"learning_rate": 1.4969247410837587e-11,
"loss": 1.3406702280044556,
"step": 1440
},
{
"epoch": 2.575,
"grad_norm": 0.20965971052646637,
"learning_rate": 1.4888633279028066e-11,
"loss": 1.233846664428711,
"step": 1442
},
{
"epoch": 2.5785714285714287,
"grad_norm": 0.13012030720710754,
"learning_rate": 1.4808640818580886e-11,
"loss": 1.1111416816711426,
"step": 1444
},
{
"epoch": 2.5821428571428573,
"grad_norm": 0.2702961266040802,
"learning_rate": 1.4729271269269823e-11,
"loss": 1.3957263231277466,
"step": 1446
},
{
"epoch": 2.585714285714286,
"grad_norm": 0.5005801320075989,
"learning_rate": 1.4650525861214453e-11,
"loss": 1.228448748588562,
"step": 1448
},
{
"epoch": 2.5892857142857144,
"grad_norm": 0.2977920472621918,
"learning_rate": 1.4572405814860953e-11,
"loss": 1.1848669052124023,
"step": 1450
},
{
"epoch": 2.592857142857143,
"grad_norm": 0.19575802981853485,
"learning_rate": 1.4494912340963285e-11,
"loss": 1.0956735610961914,
"step": 1452
},
{
"epoch": 2.5964285714285715,
"grad_norm": 0.285162091255188,
"learning_rate": 1.441804664056437e-11,
"loss": 1.3711469173431396,
"step": 1454
},
{
"epoch": 2.6,
"grad_norm": 0.3140964210033417,
"learning_rate": 1.434180990497751e-11,
"loss": 1.2784916162490845,
"step": 1456
},
{
"epoch": 2.6035714285714286,
"grad_norm": 0.2630551755428314,
"learning_rate": 1.4266203315767917e-11,
"loss": 1.3073674440383911,
"step": 1458
},
{
"epoch": 2.607142857142857,
"grad_norm": 0.276987224817276,
"learning_rate": 1.4191228044734386e-11,
"loss": 1.3831404447555542,
"step": 1460
},
{
"epoch": 2.6107142857142858,
"grad_norm": 0.21835049986839294,
"learning_rate": 1.411688525389114e-11,
"loss": 1.4132722616195679,
"step": 1462
},
{
"epoch": 2.6142857142857143,
"grad_norm": 0.36612504720687866,
"learning_rate": 1.4043176095449842e-11,
"loss": 1.2454001903533936,
"step": 1464
},
{
"epoch": 2.617857142857143,
"grad_norm": 0.9990705847740173,
"learning_rate": 1.397010171180171e-11,
"loss": 1.2597897052764893,
"step": 1466
},
{
"epoch": 2.6214285714285714,
"grad_norm": 0.28593096137046814,
"learning_rate": 1.3897663235499797e-11,
"loss": 1.3988193273544312,
"step": 1468
},
{
"epoch": 2.625,
"grad_norm": 0.26052477955818176,
"learning_rate": 1.382586178924149e-11,
"loss": 1.1380650997161865,
"step": 1470
},
{
"epoch": 2.6285714285714286,
"grad_norm": 1.1111445426940918,
"learning_rate": 1.3754698485851073e-11,
"loss": 1.457688808441162,
"step": 1472
},
{
"epoch": 2.632142857142857,
"grad_norm": 0.7538399696350098,
"learning_rate": 1.3684174428262489e-11,
"loss": 1.3109138011932373,
"step": 1474
},
{
"epoch": 2.6357142857142857,
"grad_norm": 0.23928874731063843,
"learning_rate": 1.3614290709502243e-11,
"loss": 1.0036344528198242,
"step": 1476
},
{
"epoch": 2.6392857142857142,
"grad_norm": 0.30908122658729553,
"learning_rate": 1.354504841267246e-11,
"loss": 0.977489709854126,
"step": 1478
},
{
"epoch": 2.642857142857143,
"grad_norm": 0.24736160039901733,
"learning_rate": 1.3476448610934103e-11,
"loss": 1.356593132019043,
"step": 1480
},
{
"epoch": 2.6464285714285714,
"grad_norm": 0.25912997126579285,
"learning_rate": 1.3408492367490344e-11,
"loss": 1.3192540407180786,
"step": 1482
},
{
"epoch": 2.65,
"grad_norm": 0.24118413031101227,
"learning_rate": 1.334118073557008e-11,
"loss": 1.3637826442718506,
"step": 1484
},
{
"epoch": 2.6535714285714285,
"grad_norm": 0.1752414107322693,
"learning_rate": 1.3274514758411593e-11,
"loss": 1.3229981660842896,
"step": 1486
},
{
"epoch": 2.657142857142857,
"grad_norm": 0.19834518432617188,
"learning_rate": 1.3208495469246445e-11,
"loss": 1.365134596824646,
"step": 1488
},
{
"epoch": 2.6607142857142856,
"grad_norm": 0.19379328191280365,
"learning_rate": 1.3143123891283355e-11,
"loss": 1.3100693225860596,
"step": 1490
},
{
"epoch": 2.664285714285714,
"grad_norm": 0.17539915442466736,
"learning_rate": 1.307840103769245e-11,
"loss": 1.3072569370269775,
"step": 1492
},
{
"epoch": 2.6678571428571427,
"grad_norm": 0.1835700273513794,
"learning_rate": 1.3014327911589493e-11,
"loss": 1.2993477582931519,
"step": 1494
},
{
"epoch": 2.6714285714285713,
"grad_norm": 0.16093674302101135,
"learning_rate": 1.2950905506020382e-11,
"loss": 1.2758489847183228,
"step": 1496
},
{
"epoch": 2.675,
"grad_norm": 0.24639339745044708,
"learning_rate": 1.288813480394571e-11,
"loss": 1.3632615804672241,
"step": 1498
},
{
"epoch": 2.678571428571429,
"grad_norm": 0.2552201747894287,
"learning_rate": 1.2826016778225578e-11,
"loss": 1.3620305061340332,
"step": 1500
},
{
"epoch": 2.682142857142857,
"grad_norm": 0.24661146104335785,
"learning_rate": 1.2764552391604466e-11,
"loss": 1.4900641441345215,
"step": 1502
},
{
"epoch": 2.685714285714286,
"grad_norm": 0.3591724634170532,
"learning_rate": 1.2703742596696384e-11,
"loss": 1.1812725067138672,
"step": 1504
},
{
"epoch": 2.689285714285714,
"grad_norm": 0.221350759267807,
"learning_rate": 1.2643588335970021e-11,
"loss": 1.1195027828216553,
"step": 1506
},
{
"epoch": 2.692857142857143,
"grad_norm": 0.21987511217594147,
"learning_rate": 1.2584090541734216e-11,
"loss": 1.333940863609314,
"step": 1508
},
{
"epoch": 2.696428571428571,
"grad_norm": 0.16496390104293823,
"learning_rate": 1.252525013612346e-11,
"loss": 1.335278034210205,
"step": 1510
},
{
"epoch": 2.7,
"grad_norm": 0.2967815399169922,
"learning_rate": 1.2467068031083622e-11,
"loss": 1.312089204788208,
"step": 1512
},
{
"epoch": 2.7035714285714287,
"grad_norm": 0.16498564183712006,
"learning_rate": 1.2409545128357805e-11,
"loss": 1.184598445892334,
"step": 1514
},
{
"epoch": 2.7071428571428573,
"grad_norm": 0.16838309168815613,
"learning_rate": 1.235268231947238e-11,
"loss": 1.2930303812026978,
"step": 1516
},
{
"epoch": 2.710714285714286,
"grad_norm": 0.27942174673080444,
"learning_rate": 1.229648048572317e-11,
"loss": 1.2529990673065186,
"step": 1518
},
{
"epoch": 2.7142857142857144,
"grad_norm": 0.16519573330879211,
"learning_rate": 1.2240940498161798e-11,
"loss": 1.2533071041107178,
"step": 1520
},
{
"epoch": 2.717857142857143,
"grad_norm": 0.358005553483963,
"learning_rate": 1.2186063217582143e-11,
"loss": 1.2952263355255127,
"step": 1522
},
{
"epoch": 2.7214285714285715,
"grad_norm": 0.3745562732219696,
"learning_rate": 1.2131849494507061e-11,
"loss": 1.3314462900161743,
"step": 1524
},
{
"epoch": 2.725,
"grad_norm": 0.20380382239818573,
"learning_rate": 1.2078300169175156e-11,
"loss": 1.3251063823699951,
"step": 1526
},
{
"epoch": 2.7285714285714286,
"grad_norm": 0.3119201064109802,
"learning_rate": 1.20254160715278e-11,
"loss": 1.3549195528030396,
"step": 1528
},
{
"epoch": 2.732142857142857,
"grad_norm": 0.22798198461532593,
"learning_rate": 1.1973198021196207e-11,
"loss": 1.2130204439163208,
"step": 1530
},
{
"epoch": 2.7357142857142858,
"grad_norm": 0.2302948534488678,
"learning_rate": 1.1921646827488806e-11,
"loss": 1.32042396068573,
"step": 1532
},
{
"epoch": 2.7392857142857143,
"grad_norm": 0.22557149827480316,
"learning_rate": 1.1870763289378629e-11,
"loss": 1.2422351837158203,
"step": 1534
},
{
"epoch": 2.742857142857143,
"grad_norm": 0.15642249584197998,
"learning_rate": 1.182054819549098e-11,
"loss": 1.2506171464920044,
"step": 1536
},
{
"epoch": 2.7464285714285714,
"grad_norm": 0.32084932923316956,
"learning_rate": 1.1771002324091182e-11,
"loss": 1.2349700927734375,
"step": 1538
},
{
"epoch": 2.75,
"grad_norm": 0.4902629554271698,
"learning_rate": 1.1722126443072518e-11,
"loss": 1.4166244268417358,
"step": 1540
},
{
"epoch": 2.7535714285714286,
"grad_norm": 0.17237702012062073,
"learning_rate": 1.1673921309944354e-11,
"loss": 1.020308017730713,
"step": 1542
},
{
"epoch": 2.757142857142857,
"grad_norm": 0.1602608561515808,
"learning_rate": 1.1626387671820362e-11,
"loss": 1.2884690761566162,
"step": 1544
},
{
"epoch": 2.7607142857142857,
"grad_norm": 0.16682331264019012,
"learning_rate": 1.1579526265406972e-11,
"loss": 1.3690019845962524,
"step": 1546
},
{
"epoch": 2.7642857142857142,
"grad_norm": 0.1700551062822342,
"learning_rate": 1.1533337816991932e-11,
"loss": 1.2616186141967773,
"step": 1548
},
{
"epoch": 2.767857142857143,
"grad_norm": 0.16919812560081482,
"learning_rate": 1.1487823042433062e-11,
"loss": 1.307798981666565,
"step": 1550
},
{
"epoch": 2.7714285714285714,
"grad_norm": 0.11971700936555862,
"learning_rate": 1.1442982647147166e-11,
"loss": 1.3186330795288086,
"step": 1552
},
{
"epoch": 2.775,
"grad_norm": 0.15739700198173523,
"learning_rate": 1.1398817326099093e-11,
"loss": 1.1444510221481323,
"step": 1554
},
{
"epoch": 2.7785714285714285,
"grad_norm": 0.15496599674224854,
"learning_rate": 1.1355327763790944e-11,
"loss": 1.236445665359497,
"step": 1556
},
{
"epoch": 2.782142857142857,
"grad_norm": 0.2167055755853653,
"learning_rate": 1.1312514634251493e-11,
"loss": 1.1993296146392822,
"step": 1558
},
{
"epoch": 2.7857142857142856,
"grad_norm": 0.14245696365833282,
"learning_rate": 1.1270378601025748e-11,
"loss": 1.2451859712600708,
"step": 1560
},
{
"epoch": 2.789285714285714,
"grad_norm": 0.15736693143844604,
"learning_rate": 1.1228920317164623e-11,
"loss": 1.2228260040283203,
"step": 1562
},
{
"epoch": 2.7928571428571427,
"grad_norm": 0.13533014059066772,
"learning_rate": 1.1188140425214861e-11,
"loss": 1.2670307159423828,
"step": 1564
},
{
"epoch": 2.7964285714285713,
"grad_norm": 0.1510917991399765,
"learning_rate": 1.1148039557209056e-11,
"loss": 1.227577805519104,
"step": 1566
},
{
"epoch": 2.8,
"grad_norm": 0.12420517206192017,
"learning_rate": 1.1108618334655843e-11,
"loss": 1.2005560398101807,
"step": 1568
},
{
"epoch": 2.803571428571429,
"grad_norm": 0.18340545892715454,
"learning_rate": 1.1069877368530302e-11,
"loss": 1.2663429975509644,
"step": 1570
},
{
"epoch": 2.807142857142857,
"grad_norm": 0.15353085100650787,
"learning_rate": 1.1031817259264454e-11,
"loss": 1.251989722251892,
"step": 1572
},
{
"epoch": 2.810714285714286,
"grad_norm": 0.12760750949382782,
"learning_rate": 1.0994438596737972e-11,
"loss": 1.1478347778320312,
"step": 1574
},
{
"epoch": 2.814285714285714,
"grad_norm": 1.7780303955078125,
"learning_rate": 1.0957741960269049e-11,
"loss": 1.2149173021316528,
"step": 1576
},
{
"epoch": 2.817857142857143,
"grad_norm": 0.13187356293201447,
"learning_rate": 1.092172791860539e-11,
"loss": 1.1681101322174072,
"step": 1578
},
{
"epoch": 2.821428571428571,
"grad_norm": 0.18063776195049286,
"learning_rate": 1.0886397029915413e-11,
"loss": 1.199445366859436,
"step": 1580
},
{
"epoch": 2.825,
"grad_norm": 0.1410759687423706,
"learning_rate": 1.0851749841779609e-11,
"loss": 1.1747792959213257,
"step": 1582
},
{
"epoch": 2.8285714285714287,
"grad_norm": 0.19593161344528198,
"learning_rate": 1.0817786891182041e-11,
"loss": 1.1981396675109863,
"step": 1584
},
{
"epoch": 2.8321428571428573,
"grad_norm": 0.14246626198291779,
"learning_rate": 1.078450870450203e-11,
"loss": 1.2036207914352417,
"step": 1586
},
{
"epoch": 2.835714285714286,
"grad_norm": 0.25931403040885925,
"learning_rate": 1.0751915797505985e-11,
"loss": 1.1870161294937134,
"step": 1588
},
{
"epoch": 2.8392857142857144,
"grad_norm": 0.18567614257335663,
"learning_rate": 1.0720008675339403e-11,
"loss": 1.1964879035949707,
"step": 1590
},
{
"epoch": 2.842857142857143,
"grad_norm": 0.1569967120885849,
"learning_rate": 1.0688787832519084e-11,
"loss": 1.234360933303833,
"step": 1592
},
{
"epoch": 2.8464285714285715,
"grad_norm": 0.17869971692562103,
"learning_rate": 1.0658253752925418e-11,
"loss": 1.208680510520935,
"step": 1594
},
{
"epoch": 2.85,
"grad_norm": 1.144254207611084,
"learning_rate": 1.0628406909794908e-11,
"loss": 1.2222168445587158,
"step": 1596
},
{
"epoch": 2.8535714285714286,
"grad_norm": 0.13791275024414062,
"learning_rate": 1.059924776571283e-11,
"loss": 1.2378734350204468,
"step": 1598
},
{
"epoch": 2.857142857142857,
"grad_norm": 0.1584549993276596,
"learning_rate": 1.0570776772606056e-11,
"loss": 1.2322474718093872,
"step": 1600
},
{
"epoch": 2.8607142857142858,
"grad_norm": 0.1755964756011963,
"learning_rate": 1.0542994371736075e-11,
"loss": 1.1988189220428467,
"step": 1602
},
{
"epoch": 2.8642857142857143,
"grad_norm": 0.21343602240085602,
"learning_rate": 1.0515900993692126e-11,
"loss": 1.228104829788208,
"step": 1604
},
{
"epoch": 2.867857142857143,
"grad_norm": 0.15894442796707153,
"learning_rate": 1.048949705838454e-11,
"loss": 1.1797680854797363,
"step": 1606
},
{
"epoch": 2.8714285714285714,
"grad_norm": 0.1659688502550125,
"learning_rate": 1.0463782975038227e-11,
"loss": 1.2225085496902466,
"step": 1608
},
{
"epoch": 2.875,
"grad_norm": 0.2029978483915329,
"learning_rate": 1.0438759142186335e-11,
"loss": 1.2150154113769531,
"step": 1610
},
{
"epoch": 2.8785714285714286,
"grad_norm": 0.17542648315429688,
"learning_rate": 1.0414425947664074e-11,
"loss": 1.2423878908157349,
"step": 1612
},
{
"epoch": 2.882142857142857,
"grad_norm": 0.18975086510181427,
"learning_rate": 1.0390783768602692e-11,
"loss": 1.227378010749817,
"step": 1614
},
{
"epoch": 2.8857142857142857,
"grad_norm": 0.24369192123413086,
"learning_rate": 1.0367832971423663e-11,
"loss": 1.1613997220993042,
"step": 1616
},
{
"epoch": 2.8892857142857142,
"grad_norm": 0.15854114294052124,
"learning_rate": 1.0345573911832976e-11,
"loss": 1.1997004747390747,
"step": 1618
},
{
"epoch": 2.892857142857143,
"grad_norm": 0.15320441126823425,
"learning_rate": 1.0324006934815622e-11,
"loss": 1.2283430099487305,
"step": 1620
},
{
"epoch": 2.8964285714285714,
"grad_norm": 0.24903114140033722,
"learning_rate": 1.0303132374630275e-11,
"loss": 1.2436856031417847,
"step": 1622
},
{
"epoch": 2.9,
"grad_norm": 0.13410496711730957,
"learning_rate": 1.0282950554804083e-11,
"loss": 1.2366242408752441,
"step": 1624
},
{
"epoch": 2.9035714285714285,
"grad_norm": 0.17165103554725647,
"learning_rate": 1.0263461788127682e-11,
"loss": 1.2125755548477173,
"step": 1626
},
{
"epoch": 2.907142857142857,
"grad_norm": 0.1607826203107834,
"learning_rate": 1.0244666376650306e-11,
"loss": 1.1719647645950317,
"step": 1628
},
{
"epoch": 2.9107142857142856,
"grad_norm": 0.2778455913066864,
"learning_rate": 1.0226564611675146e-11,
"loss": 1.2128190994262695,
"step": 1630
},
{
"epoch": 2.914285714285714,
"grad_norm": 0.16275779902935028,
"learning_rate": 1.020915677375483e-11,
"loss": 1.229934811592102,
"step": 1632
},
{
"epoch": 2.9178571428571427,
"grad_norm": 0.1258879005908966,
"learning_rate": 1.019244313268704e-11,
"loss": 1.1735552549362183,
"step": 1634
},
{
"epoch": 2.9214285714285713,
"grad_norm": 0.2216557413339615,
"learning_rate": 1.0176423947510376e-11,
"loss": 1.2107999324798584,
"step": 1636
},
{
"epoch": 2.925,
"grad_norm": 0.19989758729934692,
"learning_rate": 1.0161099466500318e-11,
"loss": 1.1985503435134888,
"step": 1638
},
{
"epoch": 2.928571428571429,
"grad_norm": 0.21431688964366913,
"learning_rate": 1.014646992716537e-11,
"loss": 1.25062894821167,
"step": 1640
},
{
"epoch": 2.932142857142857,
"grad_norm": 0.15615607798099518,
"learning_rate": 1.0132535556243399e-11,
"loss": 1.2564295530319214,
"step": 1642
},
{
"epoch": 2.935714285714286,
"grad_norm": 0.16791290044784546,
"learning_rate": 1.0119296569698111e-11,
"loss": 1.2255940437316895,
"step": 1644
},
{
"epoch": 2.939285714285714,
"grad_norm": 0.16198527812957764,
"learning_rate": 1.01067531727157e-11,
"loss": 1.2428375482559204,
"step": 1646
},
{
"epoch": 2.942857142857143,
"grad_norm": 0.16298729181289673,
"learning_rate": 1.0094905559701677e-11,
"loss": 1.2208831310272217,
"step": 1648
},
{
"epoch": 2.946428571428571,
"grad_norm": 0.2169935405254364,
"learning_rate": 1.0083753914277857e-11,
"loss": 1.1699635982513428,
"step": 1650
},
{
"epoch": 2.95,
"grad_norm": 0.1867915242910385,
"learning_rate": 1.0073298409279491e-11,
"loss": 1.1837371587753296,
"step": 1652
},
{
"epoch": 2.9535714285714287,
"grad_norm": 0.19683191180229187,
"learning_rate": 1.006353920675263e-11,
"loss": 1.2122198343276978,
"step": 1654
},
{
"epoch": 2.9571428571428573,
"grad_norm": 0.16676557064056396,
"learning_rate": 1.0054476457951567e-11,
"loss": 1.1825740337371826,
"step": 1656
},
{
"epoch": 2.960714285714286,
"grad_norm": 0.651061475276947,
"learning_rate": 1.0046110303336517e-11,
"loss": 1.1873282194137573,
"step": 1658
},
{
"epoch": 2.9642857142857144,
"grad_norm": 0.16477417945861816,
"learning_rate": 1.0038440872571455e-11,
"loss": 1.236222743988037,
"step": 1660
},
{
"epoch": 2.967857142857143,
"grad_norm": 0.1982717663049698,
"learning_rate": 1.0031468284522064e-11,
"loss": 1.2251464128494263,
"step": 1662
},
{
"epoch": 2.9714285714285715,
"grad_norm": 0.1778063327074051,
"learning_rate": 1.0025192647253938e-11,
"loss": 1.2394579648971558,
"step": 1664
},
{
"epoch": 2.975,
"grad_norm": 0.7245147824287415,
"learning_rate": 1.0019614058030873e-11,
"loss": 1.1742377281188965,
"step": 1666
},
{
"epoch": 2.9785714285714286,
"grad_norm": 0.16432130336761475,
"learning_rate": 1.0014732603313377e-11,
"loss": 1.194645643234253,
"step": 1668
},
{
"epoch": 2.982142857142857,
"grad_norm": 0.16144277155399323,
"learning_rate": 1.0010548358757328e-11,
"loss": 1.2105306386947632,
"step": 1670
},
{
"epoch": 2.9857142857142858,
"grad_norm": 0.18343304097652435,
"learning_rate": 1.0007061389212795e-11,
"loss": 1.2389037609100342,
"step": 1672
},
{
"epoch": 2.9892857142857143,
"grad_norm": 0.4671591520309448,
"learning_rate": 1.0004271748723041e-11,
"loss": 1.2875266075134277,
"step": 1674
},
{
"epoch": 2.992857142857143,
"grad_norm": 0.557140588760376,
"learning_rate": 1.0002179480523687e-11,
"loss": 1.168911337852478,
"step": 1676
},
{
"epoch": 2.9964285714285714,
"grad_norm": 0.35813069343566895,
"learning_rate": 1.0000784617042024e-11,
"loss": 1.0809228420257568,
"step": 1678
},
{
"epoch": 3.0,
"grad_norm": 0.509574294090271,
"learning_rate": 1.0000087179896533e-11,
"loss": 0.9652191400527954,
"step": 1680
},
{
"epoch": 3.0,
"step": 1680,
"total_flos": 2.513699577954042e+18,
"train_loss": 1.4121140867471695,
"train_runtime": 17187.153,
"train_samples_per_second": 1.564,
"train_steps_per_second": 0.098
}
],
"logging_steps": 2,
"max_steps": 1680,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 9999999,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.513699577954042e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}