CodeV-All-DSC / trainer_state.json
yang-z's picture
Upload trainer_state.json with huggingface_hub
323badd verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.997428736090792,
"eval_steps": 500,
"global_step": 2112,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.0,
"loss": 2.8745,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 1.2797901240490773e-05,
"loss": 2.1085,
"step": 2
},
{
"epoch": 0.0,
"learning_rate": 2.0284193554110643e-05,
"loss": 2.416,
"step": 3
},
{
"epoch": 0.01,
"learning_rate": 2.5595802480981545e-05,
"loss": 1.7089,
"step": 4
},
{
"epoch": 0.01,
"learning_rate": 2.9715806445889356e-05,
"loss": 1.4257,
"step": 5
},
{
"epoch": 0.01,
"learning_rate": 3.308209479460142e-05,
"loss": 1.1841,
"step": 6
},
{
"epoch": 0.01,
"learning_rate": 3.592825103949889e-05,
"loss": 1.1048,
"step": 7
},
{
"epoch": 0.01,
"learning_rate": 3.839370372147232e-05,
"loss": 0.8983,
"step": 8
},
{
"epoch": 0.01,
"learning_rate": 4.056838710822129e-05,
"loss": 0.7222,
"step": 9
},
{
"epoch": 0.01,
"learning_rate": 4.251370768638013e-05,
"loss": 0.7423,
"step": 10
},
{
"epoch": 0.02,
"learning_rate": 4.427346420355127e-05,
"loss": 0.6476,
"step": 11
},
{
"epoch": 0.02,
"learning_rate": 4.587999603509219e-05,
"loss": 0.649,
"step": 12
},
{
"epoch": 0.02,
"learning_rate": 4.735786205915921e-05,
"loss": 0.61,
"step": 13
},
{
"epoch": 0.02,
"learning_rate": 4.8726152279989664e-05,
"loss": 0.5849,
"step": 14
},
{
"epoch": 0.02,
"learning_rate": 4.9999999999999996e-05,
"loss": 0.5866,
"step": 15
},
{
"epoch": 0.02,
"learning_rate": 5e-05,
"loss": 0.5659,
"step": 16
},
{
"epoch": 0.02,
"learning_rate": 4.9976156413924656e-05,
"loss": 0.5556,
"step": 17
},
{
"epoch": 0.03,
"learning_rate": 4.995231282784931e-05,
"loss": 0.5623,
"step": 18
},
{
"epoch": 0.03,
"learning_rate": 4.992846924177397e-05,
"loss": 0.5657,
"step": 19
},
{
"epoch": 0.03,
"learning_rate": 4.990462565569862e-05,
"loss": 0.5256,
"step": 20
},
{
"epoch": 0.03,
"learning_rate": 4.988078206962327e-05,
"loss": 0.525,
"step": 21
},
{
"epoch": 0.03,
"learning_rate": 4.9856938483547926e-05,
"loss": 0.5175,
"step": 22
},
{
"epoch": 0.03,
"learning_rate": 4.983309489747258e-05,
"loss": 0.5272,
"step": 23
},
{
"epoch": 0.03,
"learning_rate": 4.980925131139724e-05,
"loss": 0.5532,
"step": 24
},
{
"epoch": 0.04,
"learning_rate": 4.9785407725321894e-05,
"loss": 0.5515,
"step": 25
},
{
"epoch": 0.04,
"learning_rate": 4.976156413924655e-05,
"loss": 0.5725,
"step": 26
},
{
"epoch": 0.04,
"learning_rate": 4.97377205531712e-05,
"loss": 0.5295,
"step": 27
},
{
"epoch": 0.04,
"learning_rate": 4.971387696709585e-05,
"loss": 0.5265,
"step": 28
},
{
"epoch": 0.04,
"learning_rate": 4.969003338102051e-05,
"loss": 0.4977,
"step": 29
},
{
"epoch": 0.04,
"learning_rate": 4.966618979494516e-05,
"loss": 0.4907,
"step": 30
},
{
"epoch": 0.04,
"learning_rate": 4.964234620886982e-05,
"loss": 0.5202,
"step": 31
},
{
"epoch": 0.05,
"learning_rate": 4.961850262279447e-05,
"loss": 0.5301,
"step": 32
},
{
"epoch": 0.05,
"learning_rate": 4.9594659036719125e-05,
"loss": 0.5282,
"step": 33
},
{
"epoch": 0.05,
"learning_rate": 4.957081545064378e-05,
"loss": 0.5206,
"step": 34
},
{
"epoch": 0.05,
"learning_rate": 4.954697186456843e-05,
"loss": 0.507,
"step": 35
},
{
"epoch": 0.05,
"learning_rate": 4.9523128278493086e-05,
"loss": 0.5107,
"step": 36
},
{
"epoch": 0.05,
"learning_rate": 4.949928469241774e-05,
"loss": 0.5249,
"step": 37
},
{
"epoch": 0.05,
"learning_rate": 4.9475441106342394e-05,
"loss": 0.4808,
"step": 38
},
{
"epoch": 0.06,
"learning_rate": 4.945159752026705e-05,
"loss": 0.5163,
"step": 39
},
{
"epoch": 0.06,
"learning_rate": 4.942775393419171e-05,
"loss": 0.502,
"step": 40
},
{
"epoch": 0.06,
"learning_rate": 4.940391034811636e-05,
"loss": 0.5087,
"step": 41
},
{
"epoch": 0.06,
"learning_rate": 4.938006676204101e-05,
"loss": 0.4914,
"step": 42
},
{
"epoch": 0.06,
"learning_rate": 4.935622317596566e-05,
"loss": 0.4981,
"step": 43
},
{
"epoch": 0.06,
"learning_rate": 4.9332379589890324e-05,
"loss": 0.5097,
"step": 44
},
{
"epoch": 0.06,
"learning_rate": 4.930853600381498e-05,
"loss": 0.5182,
"step": 45
},
{
"epoch": 0.07,
"learning_rate": 4.928469241773963e-05,
"loss": 0.5,
"step": 46
},
{
"epoch": 0.07,
"learning_rate": 4.9260848831664286e-05,
"loss": 0.4855,
"step": 47
},
{
"epoch": 0.07,
"learning_rate": 4.923700524558894e-05,
"loss": 0.5189,
"step": 48
},
{
"epoch": 0.07,
"learning_rate": 4.921316165951359e-05,
"loss": 0.4852,
"step": 49
},
{
"epoch": 0.07,
"learning_rate": 4.918931807343825e-05,
"loss": 0.4951,
"step": 50
},
{
"epoch": 0.07,
"learning_rate": 4.91654744873629e-05,
"loss": 0.5062,
"step": 51
},
{
"epoch": 0.07,
"learning_rate": 4.9141630901287555e-05,
"loss": 0.4724,
"step": 52
},
{
"epoch": 0.08,
"learning_rate": 4.911778731521221e-05,
"loss": 0.457,
"step": 53
},
{
"epoch": 0.08,
"learning_rate": 4.909394372913686e-05,
"loss": 0.5022,
"step": 54
},
{
"epoch": 0.08,
"learning_rate": 4.907010014306152e-05,
"loss": 0.4488,
"step": 55
},
{
"epoch": 0.08,
"learning_rate": 4.904625655698618e-05,
"loss": 0.4751,
"step": 56
},
{
"epoch": 0.08,
"learning_rate": 4.9022412970910824e-05,
"loss": 0.481,
"step": 57
},
{
"epoch": 0.08,
"learning_rate": 4.899856938483548e-05,
"loss": 0.477,
"step": 58
},
{
"epoch": 0.08,
"learning_rate": 4.897472579876013e-05,
"loss": 0.4562,
"step": 59
},
{
"epoch": 0.09,
"learning_rate": 4.895088221268479e-05,
"loss": 0.5104,
"step": 60
},
{
"epoch": 0.09,
"learning_rate": 4.8927038626609446e-05,
"loss": 0.5089,
"step": 61
},
{
"epoch": 0.09,
"learning_rate": 4.89031950405341e-05,
"loss": 0.4879,
"step": 62
},
{
"epoch": 0.09,
"learning_rate": 4.8879351454458754e-05,
"loss": 0.467,
"step": 63
},
{
"epoch": 0.09,
"learning_rate": 4.88555078683834e-05,
"loss": 0.4967,
"step": 64
},
{
"epoch": 0.09,
"learning_rate": 4.883166428230806e-05,
"loss": 0.4766,
"step": 65
},
{
"epoch": 0.09,
"learning_rate": 4.8807820696232716e-05,
"loss": 0.4929,
"step": 66
},
{
"epoch": 0.1,
"learning_rate": 4.878397711015737e-05,
"loss": 0.4772,
"step": 67
},
{
"epoch": 0.1,
"learning_rate": 4.8760133524082023e-05,
"loss": 0.4695,
"step": 68
},
{
"epoch": 0.1,
"learning_rate": 4.873628993800668e-05,
"loss": 0.4834,
"step": 69
},
{
"epoch": 0.1,
"learning_rate": 4.871244635193134e-05,
"loss": 0.4771,
"step": 70
},
{
"epoch": 0.1,
"learning_rate": 4.868860276585599e-05,
"loss": 0.4607,
"step": 71
},
{
"epoch": 0.1,
"learning_rate": 4.866475917978064e-05,
"loss": 0.4634,
"step": 72
},
{
"epoch": 0.1,
"learning_rate": 4.864091559370529e-05,
"loss": 0.4895,
"step": 73
},
{
"epoch": 0.1,
"learning_rate": 4.8617072007629947e-05,
"loss": 0.4763,
"step": 74
},
{
"epoch": 0.11,
"learning_rate": 4.859322842155461e-05,
"loss": 0.4706,
"step": 75
},
{
"epoch": 0.11,
"learning_rate": 4.856938483547926e-05,
"loss": 0.4996,
"step": 76
},
{
"epoch": 0.11,
"learning_rate": 4.8545541249403915e-05,
"loss": 0.4614,
"step": 77
},
{
"epoch": 0.11,
"learning_rate": 4.852169766332857e-05,
"loss": 0.4573,
"step": 78
},
{
"epoch": 0.11,
"learning_rate": 4.8497854077253216e-05,
"loss": 0.4839,
"step": 79
},
{
"epoch": 0.11,
"learning_rate": 4.8474010491177877e-05,
"loss": 0.467,
"step": 80
},
{
"epoch": 0.11,
"learning_rate": 4.845016690510253e-05,
"loss": 0.449,
"step": 81
},
{
"epoch": 0.12,
"learning_rate": 4.8426323319027184e-05,
"loss": 0.456,
"step": 82
},
{
"epoch": 0.12,
"learning_rate": 4.840247973295184e-05,
"loss": 0.4887,
"step": 83
},
{
"epoch": 0.12,
"learning_rate": 4.837863614687649e-05,
"loss": 0.4534,
"step": 84
},
{
"epoch": 0.12,
"learning_rate": 4.8354792560801146e-05,
"loss": 0.4732,
"step": 85
},
{
"epoch": 0.12,
"learning_rate": 4.8330948974725806e-05,
"loss": 0.4731,
"step": 86
},
{
"epoch": 0.12,
"learning_rate": 4.8307105388650454e-05,
"loss": 0.487,
"step": 87
},
{
"epoch": 0.12,
"learning_rate": 4.828326180257511e-05,
"loss": 0.4588,
"step": 88
},
{
"epoch": 0.13,
"learning_rate": 4.825941821649976e-05,
"loss": 0.4863,
"step": 89
},
{
"epoch": 0.13,
"learning_rate": 4.8235574630424415e-05,
"loss": 0.4597,
"step": 90
},
{
"epoch": 0.13,
"learning_rate": 4.8211731044349076e-05,
"loss": 0.5038,
"step": 91
},
{
"epoch": 0.13,
"learning_rate": 4.818788745827373e-05,
"loss": 0.441,
"step": 92
},
{
"epoch": 0.13,
"learning_rate": 4.8164043872198383e-05,
"loss": 0.4693,
"step": 93
},
{
"epoch": 0.13,
"learning_rate": 4.814020028612303e-05,
"loss": 0.492,
"step": 94
},
{
"epoch": 0.13,
"learning_rate": 4.811635670004769e-05,
"loss": 0.4671,
"step": 95
},
{
"epoch": 0.14,
"learning_rate": 4.8092513113972345e-05,
"loss": 0.4612,
"step": 96
},
{
"epoch": 0.14,
"learning_rate": 4.8068669527897e-05,
"loss": 0.4639,
"step": 97
},
{
"epoch": 0.14,
"learning_rate": 4.804482594182165e-05,
"loss": 0.5006,
"step": 98
},
{
"epoch": 0.14,
"learning_rate": 4.802098235574631e-05,
"loss": 0.4533,
"step": 99
},
{
"epoch": 0.14,
"learning_rate": 4.799713876967096e-05,
"loss": 0.4535,
"step": 100
},
{
"epoch": 0.14,
"learning_rate": 4.7973295183595614e-05,
"loss": 0.434,
"step": 101
},
{
"epoch": 0.14,
"learning_rate": 4.794945159752027e-05,
"loss": 0.4471,
"step": 102
},
{
"epoch": 0.15,
"learning_rate": 4.792560801144492e-05,
"loss": 0.4429,
"step": 103
},
{
"epoch": 0.15,
"learning_rate": 4.7901764425369576e-05,
"loss": 0.4682,
"step": 104
},
{
"epoch": 0.15,
"learning_rate": 4.787792083929423e-05,
"loss": 0.4393,
"step": 105
},
{
"epoch": 0.15,
"learning_rate": 4.785407725321889e-05,
"loss": 0.4617,
"step": 106
},
{
"epoch": 0.15,
"learning_rate": 4.7830233667143544e-05,
"loss": 0.4381,
"step": 107
},
{
"epoch": 0.15,
"learning_rate": 4.78063900810682e-05,
"loss": 0.4357,
"step": 108
},
{
"epoch": 0.15,
"learning_rate": 4.7782546494992845e-05,
"loss": 0.4728,
"step": 109
},
{
"epoch": 0.16,
"learning_rate": 4.77587029089175e-05,
"loss": 0.4412,
"step": 110
},
{
"epoch": 0.16,
"learning_rate": 4.773485932284216e-05,
"loss": 0.4507,
"step": 111
},
{
"epoch": 0.16,
"learning_rate": 4.7711015736766814e-05,
"loss": 0.4455,
"step": 112
},
{
"epoch": 0.16,
"learning_rate": 4.768717215069147e-05,
"loss": 0.4596,
"step": 113
},
{
"epoch": 0.16,
"learning_rate": 4.766332856461612e-05,
"loss": 0.458,
"step": 114
},
{
"epoch": 0.16,
"learning_rate": 4.7639484978540775e-05,
"loss": 0.4607,
"step": 115
},
{
"epoch": 0.16,
"learning_rate": 4.761564139246543e-05,
"loss": 0.4816,
"step": 116
},
{
"epoch": 0.17,
"learning_rate": 4.759179780639008e-05,
"loss": 0.4469,
"step": 117
},
{
"epoch": 0.17,
"learning_rate": 4.756795422031474e-05,
"loss": 0.4515,
"step": 118
},
{
"epoch": 0.17,
"learning_rate": 4.754411063423939e-05,
"loss": 0.4474,
"step": 119
},
{
"epoch": 0.17,
"learning_rate": 4.7520267048164044e-05,
"loss": 0.4355,
"step": 120
},
{
"epoch": 0.17,
"learning_rate": 4.74964234620887e-05,
"loss": 0.429,
"step": 121
},
{
"epoch": 0.17,
"learning_rate": 4.747257987601336e-05,
"loss": 0.4234,
"step": 122
},
{
"epoch": 0.17,
"learning_rate": 4.744873628993801e-05,
"loss": 0.4589,
"step": 123
},
{
"epoch": 0.18,
"learning_rate": 4.742489270386266e-05,
"loss": 0.4644,
"step": 124
},
{
"epoch": 0.18,
"learning_rate": 4.7401049117787314e-05,
"loss": 0.4266,
"step": 125
},
{
"epoch": 0.18,
"learning_rate": 4.7377205531711974e-05,
"loss": 0.4692,
"step": 126
},
{
"epoch": 0.18,
"learning_rate": 4.735336194563663e-05,
"loss": 0.4277,
"step": 127
},
{
"epoch": 0.18,
"learning_rate": 4.732951835956128e-05,
"loss": 0.427,
"step": 128
},
{
"epoch": 0.18,
"learning_rate": 4.7305674773485936e-05,
"loss": 0.455,
"step": 129
},
{
"epoch": 0.18,
"learning_rate": 4.728183118741059e-05,
"loss": 0.455,
"step": 130
},
{
"epoch": 0.19,
"learning_rate": 4.7257987601335244e-05,
"loss": 0.4493,
"step": 131
},
{
"epoch": 0.19,
"learning_rate": 4.72341440152599e-05,
"loss": 0.4597,
"step": 132
},
{
"epoch": 0.19,
"learning_rate": 4.721030042918455e-05,
"loss": 0.4407,
"step": 133
},
{
"epoch": 0.19,
"learning_rate": 4.7186456843109205e-05,
"loss": 0.4464,
"step": 134
},
{
"epoch": 0.19,
"learning_rate": 4.716261325703386e-05,
"loss": 0.4767,
"step": 135
},
{
"epoch": 0.19,
"learning_rate": 4.713876967095851e-05,
"loss": 0.4599,
"step": 136
},
{
"epoch": 0.19,
"learning_rate": 4.7114926084883174e-05,
"loss": 0.4269,
"step": 137
},
{
"epoch": 0.2,
"learning_rate": 4.709108249880782e-05,
"loss": 0.4309,
"step": 138
},
{
"epoch": 0.2,
"learning_rate": 4.7067238912732475e-05,
"loss": 0.4395,
"step": 139
},
{
"epoch": 0.2,
"learning_rate": 4.704339532665713e-05,
"loss": 0.4455,
"step": 140
},
{
"epoch": 0.2,
"learning_rate": 4.701955174058178e-05,
"loss": 0.4713,
"step": 141
},
{
"epoch": 0.2,
"learning_rate": 4.699570815450644e-05,
"loss": 0.4576,
"step": 142
},
{
"epoch": 0.2,
"learning_rate": 4.69718645684311e-05,
"loss": 0.4353,
"step": 143
},
{
"epoch": 0.2,
"learning_rate": 4.694802098235575e-05,
"loss": 0.467,
"step": 144
},
{
"epoch": 0.21,
"learning_rate": 4.6924177396280405e-05,
"loss": 0.4624,
"step": 145
},
{
"epoch": 0.21,
"learning_rate": 4.690033381020505e-05,
"loss": 0.4568,
"step": 146
},
{
"epoch": 0.21,
"learning_rate": 4.687649022412971e-05,
"loss": 0.44,
"step": 147
},
{
"epoch": 0.21,
"learning_rate": 4.6852646638054366e-05,
"loss": 0.4442,
"step": 148
},
{
"epoch": 0.21,
"learning_rate": 4.682880305197902e-05,
"loss": 0.4309,
"step": 149
},
{
"epoch": 0.21,
"learning_rate": 4.6804959465903674e-05,
"loss": 0.4321,
"step": 150
},
{
"epoch": 0.21,
"learning_rate": 4.678111587982833e-05,
"loss": 0.4464,
"step": 151
},
{
"epoch": 0.22,
"learning_rate": 4.675727229375299e-05,
"loss": 0.4589,
"step": 152
},
{
"epoch": 0.22,
"learning_rate": 4.6733428707677635e-05,
"loss": 0.4375,
"step": 153
},
{
"epoch": 0.22,
"learning_rate": 4.670958512160229e-05,
"loss": 0.4465,
"step": 154
},
{
"epoch": 0.22,
"learning_rate": 4.668574153552694e-05,
"loss": 0.4257,
"step": 155
},
{
"epoch": 0.22,
"learning_rate": 4.66618979494516e-05,
"loss": 0.43,
"step": 156
},
{
"epoch": 0.22,
"learning_rate": 4.663805436337626e-05,
"loss": 0.4512,
"step": 157
},
{
"epoch": 0.22,
"learning_rate": 4.661421077730091e-05,
"loss": 0.4393,
"step": 158
},
{
"epoch": 0.23,
"learning_rate": 4.6590367191225565e-05,
"loss": 0.4319,
"step": 159
},
{
"epoch": 0.23,
"learning_rate": 4.656652360515021e-05,
"loss": 0.4515,
"step": 160
},
{
"epoch": 0.23,
"learning_rate": 4.6542680019074866e-05,
"loss": 0.44,
"step": 161
},
{
"epoch": 0.23,
"learning_rate": 4.651883643299953e-05,
"loss": 0.4329,
"step": 162
},
{
"epoch": 0.23,
"learning_rate": 4.649499284692418e-05,
"loss": 0.4447,
"step": 163
},
{
"epoch": 0.23,
"learning_rate": 4.6471149260848835e-05,
"loss": 0.4546,
"step": 164
},
{
"epoch": 0.23,
"learning_rate": 4.644730567477349e-05,
"loss": 0.4473,
"step": 165
},
{
"epoch": 0.24,
"learning_rate": 4.642346208869814e-05,
"loss": 0.4443,
"step": 166
},
{
"epoch": 0.24,
"learning_rate": 4.6399618502622796e-05,
"loss": 0.429,
"step": 167
},
{
"epoch": 0.24,
"learning_rate": 4.637577491654745e-05,
"loss": 0.4514,
"step": 168
},
{
"epoch": 0.24,
"learning_rate": 4.6351931330472104e-05,
"loss": 0.4492,
"step": 169
},
{
"epoch": 0.24,
"learning_rate": 4.632808774439676e-05,
"loss": 0.4327,
"step": 170
},
{
"epoch": 0.24,
"learning_rate": 4.630424415832141e-05,
"loss": 0.417,
"step": 171
},
{
"epoch": 0.24,
"learning_rate": 4.6280400572246065e-05,
"loss": 0.4552,
"step": 172
},
{
"epoch": 0.25,
"learning_rate": 4.6256556986170726e-05,
"loss": 0.4356,
"step": 173
},
{
"epoch": 0.25,
"learning_rate": 4.623271340009538e-05,
"loss": 0.4585,
"step": 174
},
{
"epoch": 0.25,
"learning_rate": 4.620886981402003e-05,
"loss": 0.4408,
"step": 175
},
{
"epoch": 0.25,
"learning_rate": 4.618502622794468e-05,
"loss": 0.4306,
"step": 176
},
{
"epoch": 0.25,
"learning_rate": 4.616118264186934e-05,
"loss": 0.4605,
"step": 177
},
{
"epoch": 0.25,
"learning_rate": 4.6137339055793995e-05,
"loss": 0.4402,
"step": 178
},
{
"epoch": 0.25,
"learning_rate": 4.611349546971865e-05,
"loss": 0.4224,
"step": 179
},
{
"epoch": 0.26,
"learning_rate": 4.60896518836433e-05,
"loss": 0.457,
"step": 180
},
{
"epoch": 0.26,
"learning_rate": 4.606580829756796e-05,
"loss": 0.4161,
"step": 181
},
{
"epoch": 0.26,
"learning_rate": 4.604196471149261e-05,
"loss": 0.4513,
"step": 182
},
{
"epoch": 0.26,
"learning_rate": 4.6018121125417265e-05,
"loss": 0.4359,
"step": 183
},
{
"epoch": 0.26,
"learning_rate": 4.599427753934192e-05,
"loss": 0.4604,
"step": 184
},
{
"epoch": 0.26,
"learning_rate": 4.597043395326657e-05,
"loss": 0.4302,
"step": 185
},
{
"epoch": 0.26,
"learning_rate": 4.5946590367191226e-05,
"loss": 0.4181,
"step": 186
},
{
"epoch": 0.27,
"learning_rate": 4.592274678111588e-05,
"loss": 0.4369,
"step": 187
},
{
"epoch": 0.27,
"learning_rate": 4.589890319504054e-05,
"loss": 0.4443,
"step": 188
},
{
"epoch": 0.27,
"learning_rate": 4.5875059608965195e-05,
"loss": 0.4211,
"step": 189
},
{
"epoch": 0.27,
"learning_rate": 4.585121602288984e-05,
"loss": 0.446,
"step": 190
},
{
"epoch": 0.27,
"learning_rate": 4.5827372436814496e-05,
"loss": 0.4533,
"step": 191
},
{
"epoch": 0.27,
"learning_rate": 4.580352885073915e-05,
"loss": 0.4508,
"step": 192
},
{
"epoch": 0.27,
"learning_rate": 4.577968526466381e-05,
"loss": 0.4435,
"step": 193
},
{
"epoch": 0.28,
"learning_rate": 4.5755841678588464e-05,
"loss": 0.452,
"step": 194
},
{
"epoch": 0.28,
"learning_rate": 4.573199809251312e-05,
"loss": 0.4551,
"step": 195
},
{
"epoch": 0.28,
"learning_rate": 4.570815450643777e-05,
"loss": 0.4219,
"step": 196
},
{
"epoch": 0.28,
"learning_rate": 4.568431092036242e-05,
"loss": 0.4235,
"step": 197
},
{
"epoch": 0.28,
"learning_rate": 4.566046733428708e-05,
"loss": 0.4254,
"step": 198
},
{
"epoch": 0.28,
"learning_rate": 4.563662374821173e-05,
"loss": 0.4466,
"step": 199
},
{
"epoch": 0.28,
"learning_rate": 4.561278016213639e-05,
"loss": 0.4336,
"step": 200
},
{
"epoch": 0.29,
"learning_rate": 4.558893657606104e-05,
"loss": 0.4167,
"step": 201
},
{
"epoch": 0.29,
"learning_rate": 4.5565092989985695e-05,
"loss": 0.4383,
"step": 202
},
{
"epoch": 0.29,
"learning_rate": 4.5541249403910355e-05,
"loss": 0.4364,
"step": 203
},
{
"epoch": 0.29,
"learning_rate": 4.551740581783501e-05,
"loss": 0.4603,
"step": 204
},
{
"epoch": 0.29,
"learning_rate": 4.5493562231759656e-05,
"loss": 0.4446,
"step": 205
},
{
"epoch": 0.29,
"learning_rate": 4.546971864568431e-05,
"loss": 0.4362,
"step": 206
},
{
"epoch": 0.29,
"learning_rate": 4.5445875059608964e-05,
"loss": 0.4395,
"step": 207
},
{
"epoch": 0.3,
"learning_rate": 4.5422031473533625e-05,
"loss": 0.4303,
"step": 208
},
{
"epoch": 0.3,
"learning_rate": 4.539818788745828e-05,
"loss": 0.4456,
"step": 209
},
{
"epoch": 0.3,
"learning_rate": 4.537434430138293e-05,
"loss": 0.4516,
"step": 210
},
{
"epoch": 0.3,
"learning_rate": 4.5350500715307586e-05,
"loss": 0.4309,
"step": 211
},
{
"epoch": 0.3,
"learning_rate": 4.5326657129232233e-05,
"loss": 0.424,
"step": 212
},
{
"epoch": 0.3,
"learning_rate": 4.5302813543156894e-05,
"loss": 0.4472,
"step": 213
},
{
"epoch": 0.3,
"learning_rate": 4.527896995708155e-05,
"loss": 0.449,
"step": 214
},
{
"epoch": 0.31,
"learning_rate": 4.52551263710062e-05,
"loss": 0.4371,
"step": 215
},
{
"epoch": 0.31,
"learning_rate": 4.5231282784930856e-05,
"loss": 0.4096,
"step": 216
},
{
"epoch": 0.31,
"learning_rate": 4.520743919885551e-05,
"loss": 0.4294,
"step": 217
},
{
"epoch": 0.31,
"learning_rate": 4.518359561278016e-05,
"loss": 0.434,
"step": 218
},
{
"epoch": 0.31,
"learning_rate": 4.5159752026704824e-05,
"loss": 0.4338,
"step": 219
},
{
"epoch": 0.31,
"learning_rate": 4.513590844062947e-05,
"loss": 0.456,
"step": 220
},
{
"epoch": 0.31,
"learning_rate": 4.5112064854554125e-05,
"loss": 0.4511,
"step": 221
},
{
"epoch": 0.31,
"learning_rate": 4.508822126847878e-05,
"loss": 0.4335,
"step": 222
},
{
"epoch": 0.32,
"learning_rate": 4.506437768240343e-05,
"loss": 0.4452,
"step": 223
},
{
"epoch": 0.32,
"learning_rate": 4.504053409632809e-05,
"loss": 0.4272,
"step": 224
},
{
"epoch": 0.32,
"learning_rate": 4.501669051025275e-05,
"loss": 0.4283,
"step": 225
},
{
"epoch": 0.32,
"learning_rate": 4.49928469241774e-05,
"loss": 0.4408,
"step": 226
},
{
"epoch": 0.32,
"learning_rate": 4.496900333810205e-05,
"loss": 0.4238,
"step": 227
},
{
"epoch": 0.32,
"learning_rate": 4.494515975202671e-05,
"loss": 0.4394,
"step": 228
},
{
"epoch": 0.32,
"learning_rate": 4.492131616595136e-05,
"loss": 0.421,
"step": 229
},
{
"epoch": 0.33,
"learning_rate": 4.4897472579876016e-05,
"loss": 0.4452,
"step": 230
},
{
"epoch": 0.33,
"learning_rate": 4.487362899380067e-05,
"loss": 0.4179,
"step": 231
},
{
"epoch": 0.33,
"learning_rate": 4.4849785407725324e-05,
"loss": 0.4346,
"step": 232
},
{
"epoch": 0.33,
"learning_rate": 4.482594182164998e-05,
"loss": 0.4207,
"step": 233
},
{
"epoch": 0.33,
"learning_rate": 4.480209823557463e-05,
"loss": 0.4304,
"step": 234
},
{
"epoch": 0.33,
"learning_rate": 4.4778254649499286e-05,
"loss": 0.4367,
"step": 235
},
{
"epoch": 0.33,
"learning_rate": 4.475441106342394e-05,
"loss": 0.4358,
"step": 236
},
{
"epoch": 0.34,
"learning_rate": 4.4730567477348593e-05,
"loss": 0.4096,
"step": 237
},
{
"epoch": 0.34,
"learning_rate": 4.470672389127325e-05,
"loss": 0.4615,
"step": 238
},
{
"epoch": 0.34,
"learning_rate": 4.468288030519791e-05,
"loss": 0.4201,
"step": 239
},
{
"epoch": 0.34,
"learning_rate": 4.465903671912256e-05,
"loss": 0.4213,
"step": 240
},
{
"epoch": 0.34,
"learning_rate": 4.4635193133047216e-05,
"loss": 0.4435,
"step": 241
},
{
"epoch": 0.34,
"learning_rate": 4.461134954697186e-05,
"loss": 0.4192,
"step": 242
},
{
"epoch": 0.34,
"learning_rate": 4.458750596089652e-05,
"loss": 0.4438,
"step": 243
},
{
"epoch": 0.35,
"learning_rate": 4.456366237482118e-05,
"loss": 0.4147,
"step": 244
},
{
"epoch": 0.35,
"learning_rate": 4.453981878874583e-05,
"loss": 0.4161,
"step": 245
},
{
"epoch": 0.35,
"learning_rate": 4.4515975202670485e-05,
"loss": 0.4302,
"step": 246
},
{
"epoch": 0.35,
"learning_rate": 4.449213161659514e-05,
"loss": 0.424,
"step": 247
},
{
"epoch": 0.35,
"learning_rate": 4.446828803051979e-05,
"loss": 0.4446,
"step": 248
},
{
"epoch": 0.35,
"learning_rate": 4.4444444444444447e-05,
"loss": 0.4123,
"step": 249
},
{
"epoch": 0.35,
"learning_rate": 4.44206008583691e-05,
"loss": 0.4187,
"step": 250
},
{
"epoch": 0.36,
"learning_rate": 4.4396757272293754e-05,
"loss": 0.4316,
"step": 251
},
{
"epoch": 0.36,
"learning_rate": 4.437291368621841e-05,
"loss": 0.4294,
"step": 252
},
{
"epoch": 0.36,
"learning_rate": 4.434907010014306e-05,
"loss": 0.4462,
"step": 253
},
{
"epoch": 0.36,
"learning_rate": 4.4325226514067716e-05,
"loss": 0.4413,
"step": 254
},
{
"epoch": 0.36,
"learning_rate": 4.4301382927992377e-05,
"loss": 0.414,
"step": 255
},
{
"epoch": 0.36,
"learning_rate": 4.4277539341917024e-05,
"loss": 0.4279,
"step": 256
},
{
"epoch": 0.36,
"learning_rate": 4.425369575584168e-05,
"loss": 0.4207,
"step": 257
},
{
"epoch": 0.37,
"learning_rate": 4.422985216976633e-05,
"loss": 0.4512,
"step": 258
},
{
"epoch": 0.37,
"learning_rate": 4.420600858369099e-05,
"loss": 0.4407,
"step": 259
},
{
"epoch": 0.37,
"learning_rate": 4.4182164997615646e-05,
"loss": 0.4131,
"step": 260
},
{
"epoch": 0.37,
"learning_rate": 4.41583214115403e-05,
"loss": 0.4306,
"step": 261
},
{
"epoch": 0.37,
"learning_rate": 4.4134477825464954e-05,
"loss": 0.4245,
"step": 262
},
{
"epoch": 0.37,
"learning_rate": 4.411063423938961e-05,
"loss": 0.445,
"step": 263
},
{
"epoch": 0.37,
"learning_rate": 4.408679065331426e-05,
"loss": 0.4182,
"step": 264
},
{
"epoch": 0.38,
"learning_rate": 4.4062947067238915e-05,
"loss": 0.4482,
"step": 265
},
{
"epoch": 0.38,
"learning_rate": 4.403910348116357e-05,
"loss": 0.4409,
"step": 266
},
{
"epoch": 0.38,
"learning_rate": 4.401525989508822e-05,
"loss": 0.4253,
"step": 267
},
{
"epoch": 0.38,
"learning_rate": 4.399141630901288e-05,
"loss": 0.4194,
"step": 268
},
{
"epoch": 0.38,
"learning_rate": 4.396757272293753e-05,
"loss": 0.4185,
"step": 269
},
{
"epoch": 0.38,
"learning_rate": 4.394372913686219e-05,
"loss": 0.4257,
"step": 270
},
{
"epoch": 0.38,
"learning_rate": 4.391988555078684e-05,
"loss": 0.4209,
"step": 271
},
{
"epoch": 0.39,
"learning_rate": 4.389604196471149e-05,
"loss": 0.4291,
"step": 272
},
{
"epoch": 0.39,
"learning_rate": 4.3872198378636146e-05,
"loss": 0.403,
"step": 273
},
{
"epoch": 0.39,
"learning_rate": 4.38483547925608e-05,
"loss": 0.4043,
"step": 274
},
{
"epoch": 0.39,
"learning_rate": 4.382451120648546e-05,
"loss": 0.4165,
"step": 275
},
{
"epoch": 0.39,
"learning_rate": 4.3800667620410114e-05,
"loss": 0.4274,
"step": 276
},
{
"epoch": 0.39,
"learning_rate": 4.377682403433477e-05,
"loss": 0.427,
"step": 277
},
{
"epoch": 0.39,
"learning_rate": 4.375298044825942e-05,
"loss": 0.4368,
"step": 278
},
{
"epoch": 0.4,
"learning_rate": 4.372913686218407e-05,
"loss": 0.4301,
"step": 279
},
{
"epoch": 0.4,
"learning_rate": 4.370529327610873e-05,
"loss": 0.4225,
"step": 280
},
{
"epoch": 0.4,
"learning_rate": 4.3681449690033384e-05,
"loss": 0.4209,
"step": 281
},
{
"epoch": 0.4,
"learning_rate": 4.365760610395804e-05,
"loss": 0.4156,
"step": 282
},
{
"epoch": 0.4,
"learning_rate": 4.363376251788269e-05,
"loss": 0.4046,
"step": 283
},
{
"epoch": 0.4,
"learning_rate": 4.3609918931807345e-05,
"loss": 0.4397,
"step": 284
},
{
"epoch": 0.4,
"learning_rate": 4.3586075345732006e-05,
"loss": 0.4344,
"step": 285
},
{
"epoch": 0.41,
"learning_rate": 4.356223175965665e-05,
"loss": 0.421,
"step": 286
},
{
"epoch": 0.41,
"learning_rate": 4.353838817358131e-05,
"loss": 0.4358,
"step": 287
},
{
"epoch": 0.41,
"learning_rate": 4.351454458750596e-05,
"loss": 0.4308,
"step": 288
},
{
"epoch": 0.41,
"learning_rate": 4.3490701001430615e-05,
"loss": 0.4256,
"step": 289
},
{
"epoch": 0.41,
"learning_rate": 4.3466857415355275e-05,
"loss": 0.4035,
"step": 290
},
{
"epoch": 0.41,
"learning_rate": 4.344301382927993e-05,
"loss": 0.4197,
"step": 291
},
{
"epoch": 0.41,
"learning_rate": 4.341917024320458e-05,
"loss": 0.4115,
"step": 292
},
{
"epoch": 0.42,
"learning_rate": 4.339532665712923e-05,
"loss": 0.4242,
"step": 293
},
{
"epoch": 0.42,
"learning_rate": 4.3371483071053884e-05,
"loss": 0.431,
"step": 294
},
{
"epoch": 0.42,
"learning_rate": 4.3347639484978544e-05,
"loss": 0.4149,
"step": 295
},
{
"epoch": 0.42,
"learning_rate": 4.33237958989032e-05,
"loss": 0.426,
"step": 296
},
{
"epoch": 0.42,
"learning_rate": 4.329995231282785e-05,
"loss": 0.4248,
"step": 297
},
{
"epoch": 0.42,
"learning_rate": 4.3276108726752506e-05,
"loss": 0.4275,
"step": 298
},
{
"epoch": 0.42,
"learning_rate": 4.325226514067716e-05,
"loss": 0.4285,
"step": 299
},
{
"epoch": 0.43,
"learning_rate": 4.3228421554601814e-05,
"loss": 0.4246,
"step": 300
},
{
"epoch": 0.43,
"learning_rate": 4.320457796852647e-05,
"loss": 0.4375,
"step": 301
},
{
"epoch": 0.43,
"learning_rate": 4.318073438245112e-05,
"loss": 0.406,
"step": 302
},
{
"epoch": 0.43,
"learning_rate": 4.3156890796375775e-05,
"loss": 0.4376,
"step": 303
},
{
"epoch": 0.43,
"learning_rate": 4.313304721030043e-05,
"loss": 0.416,
"step": 304
},
{
"epoch": 0.43,
"learning_rate": 4.310920362422508e-05,
"loss": 0.3924,
"step": 305
},
{
"epoch": 0.43,
"learning_rate": 4.3085360038149744e-05,
"loss": 0.4497,
"step": 306
},
{
"epoch": 0.44,
"learning_rate": 4.30615164520744e-05,
"loss": 0.4063,
"step": 307
},
{
"epoch": 0.44,
"learning_rate": 4.3037672865999045e-05,
"loss": 0.419,
"step": 308
},
{
"epoch": 0.44,
"learning_rate": 4.30138292799237e-05,
"loss": 0.3914,
"step": 309
},
{
"epoch": 0.44,
"learning_rate": 4.298998569384836e-05,
"loss": 0.4157,
"step": 310
},
{
"epoch": 0.44,
"learning_rate": 4.296614210777301e-05,
"loss": 0.4178,
"step": 311
},
{
"epoch": 0.44,
"learning_rate": 4.294229852169767e-05,
"loss": 0.4328,
"step": 312
},
{
"epoch": 0.44,
"learning_rate": 4.291845493562232e-05,
"loss": 0.434,
"step": 313
},
{
"epoch": 0.45,
"learning_rate": 4.2894611349546975e-05,
"loss": 0.4294,
"step": 314
},
{
"epoch": 0.45,
"learning_rate": 4.287076776347163e-05,
"loss": 0.427,
"step": 315
},
{
"epoch": 0.45,
"learning_rate": 4.284692417739628e-05,
"loss": 0.4103,
"step": 316
},
{
"epoch": 0.45,
"learning_rate": 4.2823080591320936e-05,
"loss": 0.4009,
"step": 317
},
{
"epoch": 0.45,
"learning_rate": 4.279923700524559e-05,
"loss": 0.4389,
"step": 318
},
{
"epoch": 0.45,
"learning_rate": 4.2775393419170244e-05,
"loss": 0.4086,
"step": 319
},
{
"epoch": 0.45,
"learning_rate": 4.27515498330949e-05,
"loss": 0.4403,
"step": 320
},
{
"epoch": 0.46,
"learning_rate": 4.272770624701956e-05,
"loss": 0.413,
"step": 321
},
{
"epoch": 0.46,
"learning_rate": 4.270386266094421e-05,
"loss": 0.433,
"step": 322
},
{
"epoch": 0.46,
"learning_rate": 4.268001907486886e-05,
"loss": 0.4249,
"step": 323
},
{
"epoch": 0.46,
"learning_rate": 4.265617548879351e-05,
"loss": 0.4279,
"step": 324
},
{
"epoch": 0.46,
"learning_rate": 4.263233190271817e-05,
"loss": 0.4089,
"step": 325
},
{
"epoch": 0.46,
"learning_rate": 4.260848831664283e-05,
"loss": 0.4416,
"step": 326
},
{
"epoch": 0.46,
"learning_rate": 4.258464473056748e-05,
"loss": 0.4211,
"step": 327
},
{
"epoch": 0.47,
"learning_rate": 4.2560801144492135e-05,
"loss": 0.3975,
"step": 328
},
{
"epoch": 0.47,
"learning_rate": 4.253695755841679e-05,
"loss": 0.4216,
"step": 329
},
{
"epoch": 0.47,
"learning_rate": 4.2513113972341436e-05,
"loss": 0.3844,
"step": 330
},
{
"epoch": 0.47,
"learning_rate": 4.24892703862661e-05,
"loss": 0.4144,
"step": 331
},
{
"epoch": 0.47,
"learning_rate": 4.246542680019075e-05,
"loss": 0.4121,
"step": 332
},
{
"epoch": 0.47,
"learning_rate": 4.2441583214115405e-05,
"loss": 0.4246,
"step": 333
},
{
"epoch": 0.47,
"learning_rate": 4.241773962804006e-05,
"loss": 0.4149,
"step": 334
},
{
"epoch": 0.48,
"learning_rate": 4.239389604196471e-05,
"loss": 0.4217,
"step": 335
},
{
"epoch": 0.48,
"learning_rate": 4.237005245588937e-05,
"loss": 0.4324,
"step": 336
},
{
"epoch": 0.48,
"learning_rate": 4.234620886981403e-05,
"loss": 0.4261,
"step": 337
},
{
"epoch": 0.48,
"learning_rate": 4.2322365283738674e-05,
"loss": 0.4192,
"step": 338
},
{
"epoch": 0.48,
"learning_rate": 4.229852169766333e-05,
"loss": 0.42,
"step": 339
},
{
"epoch": 0.48,
"learning_rate": 4.227467811158798e-05,
"loss": 0.4221,
"step": 340
},
{
"epoch": 0.48,
"learning_rate": 4.225083452551264e-05,
"loss": 0.4244,
"step": 341
},
{
"epoch": 0.49,
"learning_rate": 4.2226990939437296e-05,
"loss": 0.4528,
"step": 342
},
{
"epoch": 0.49,
"learning_rate": 4.220314735336195e-05,
"loss": 0.4225,
"step": 343
},
{
"epoch": 0.49,
"learning_rate": 4.2179303767286604e-05,
"loss": 0.4394,
"step": 344
},
{
"epoch": 0.49,
"learning_rate": 4.215546018121125e-05,
"loss": 0.4198,
"step": 345
},
{
"epoch": 0.49,
"learning_rate": 4.213161659513591e-05,
"loss": 0.4425,
"step": 346
},
{
"epoch": 0.49,
"learning_rate": 4.2107773009060565e-05,
"loss": 0.4371,
"step": 347
},
{
"epoch": 0.49,
"learning_rate": 4.208392942298522e-05,
"loss": 0.4275,
"step": 348
},
{
"epoch": 0.5,
"learning_rate": 4.206008583690987e-05,
"loss": 0.4186,
"step": 349
},
{
"epoch": 0.5,
"learning_rate": 4.203624225083453e-05,
"loss": 0.4219,
"step": 350
},
{
"epoch": 0.5,
"learning_rate": 4.201239866475918e-05,
"loss": 0.4179,
"step": 351
},
{
"epoch": 0.5,
"learning_rate": 4.1988555078683835e-05,
"loss": 0.3877,
"step": 352
},
{
"epoch": 0.5,
"learning_rate": 4.196471149260849e-05,
"loss": 0.3955,
"step": 353
},
{
"epoch": 0.5,
"learning_rate": 4.194086790653314e-05,
"loss": 0.4243,
"step": 354
},
{
"epoch": 0.5,
"learning_rate": 4.1917024320457796e-05,
"loss": 0.4153,
"step": 355
},
{
"epoch": 0.51,
"learning_rate": 4.189318073438245e-05,
"loss": 0.4492,
"step": 356
},
{
"epoch": 0.51,
"learning_rate": 4.186933714830711e-05,
"loss": 0.4202,
"step": 357
},
{
"epoch": 0.51,
"learning_rate": 4.1845493562231765e-05,
"loss": 0.4111,
"step": 358
},
{
"epoch": 0.51,
"learning_rate": 4.182164997615642e-05,
"loss": 0.4096,
"step": 359
},
{
"epoch": 0.51,
"learning_rate": 4.1797806390081066e-05,
"loss": 0.4198,
"step": 360
},
{
"epoch": 0.51,
"learning_rate": 4.1773962804005726e-05,
"loss": 0.4102,
"step": 361
},
{
"epoch": 0.51,
"learning_rate": 4.175011921793038e-05,
"loss": 0.4251,
"step": 362
},
{
"epoch": 0.51,
"learning_rate": 4.1726275631855034e-05,
"loss": 0.4123,
"step": 363
},
{
"epoch": 0.52,
"learning_rate": 4.170243204577969e-05,
"loss": 0.4007,
"step": 364
},
{
"epoch": 0.52,
"learning_rate": 4.167858845970434e-05,
"loss": 0.4238,
"step": 365
},
{
"epoch": 0.52,
"learning_rate": 4.1654744873628996e-05,
"loss": 0.4204,
"step": 366
},
{
"epoch": 0.52,
"learning_rate": 4.163090128755365e-05,
"loss": 0.4196,
"step": 367
},
{
"epoch": 0.52,
"learning_rate": 4.16070577014783e-05,
"loss": 0.4182,
"step": 368
},
{
"epoch": 0.52,
"learning_rate": 4.158321411540296e-05,
"loss": 0.4227,
"step": 369
},
{
"epoch": 0.52,
"learning_rate": 4.155937052932761e-05,
"loss": 0.4167,
"step": 370
},
{
"epoch": 0.53,
"learning_rate": 4.1535526943252265e-05,
"loss": 0.421,
"step": 371
},
{
"epoch": 0.53,
"learning_rate": 4.1511683357176926e-05,
"loss": 0.4244,
"step": 372
},
{
"epoch": 0.53,
"learning_rate": 4.148783977110158e-05,
"loss": 0.4066,
"step": 373
},
{
"epoch": 0.53,
"learning_rate": 4.146399618502623e-05,
"loss": 0.4449,
"step": 374
},
{
"epoch": 0.53,
"learning_rate": 4.144015259895088e-05,
"loss": 0.4308,
"step": 375
},
{
"epoch": 0.53,
"learning_rate": 4.1416309012875534e-05,
"loss": 0.4146,
"step": 376
},
{
"epoch": 0.53,
"learning_rate": 4.1392465426800195e-05,
"loss": 0.4178,
"step": 377
},
{
"epoch": 0.54,
"learning_rate": 4.136862184072485e-05,
"loss": 0.41,
"step": 378
},
{
"epoch": 0.54,
"learning_rate": 4.13447782546495e-05,
"loss": 0.4251,
"step": 379
},
{
"epoch": 0.54,
"learning_rate": 4.1320934668574156e-05,
"loss": 0.4209,
"step": 380
},
{
"epoch": 0.54,
"learning_rate": 4.129709108249881e-05,
"loss": 0.4224,
"step": 381
},
{
"epoch": 0.54,
"learning_rate": 4.1273247496423464e-05,
"loss": 0.4183,
"step": 382
},
{
"epoch": 0.54,
"learning_rate": 4.124940391034812e-05,
"loss": 0.4376,
"step": 383
},
{
"epoch": 0.54,
"learning_rate": 4.122556032427277e-05,
"loss": 0.4106,
"step": 384
},
{
"epoch": 0.55,
"learning_rate": 4.1201716738197426e-05,
"loss": 0.4081,
"step": 385
},
{
"epoch": 0.55,
"learning_rate": 4.117787315212208e-05,
"loss": 0.4195,
"step": 386
},
{
"epoch": 0.55,
"learning_rate": 4.1154029566046733e-05,
"loss": 0.427,
"step": 387
},
{
"epoch": 0.55,
"learning_rate": 4.1130185979971394e-05,
"loss": 0.4198,
"step": 388
},
{
"epoch": 0.55,
"learning_rate": 4.110634239389604e-05,
"loss": 0.4082,
"step": 389
},
{
"epoch": 0.55,
"learning_rate": 4.1082498807820695e-05,
"loss": 0.4101,
"step": 390
},
{
"epoch": 0.55,
"learning_rate": 4.105865522174535e-05,
"loss": 0.4148,
"step": 391
},
{
"epoch": 0.56,
"learning_rate": 4.103481163567001e-05,
"loss": 0.4288,
"step": 392
},
{
"epoch": 0.56,
"learning_rate": 4.101096804959466e-05,
"loss": 0.4277,
"step": 393
},
{
"epoch": 0.56,
"learning_rate": 4.098712446351932e-05,
"loss": 0.4207,
"step": 394
},
{
"epoch": 0.56,
"learning_rate": 4.096328087744397e-05,
"loss": 0.4072,
"step": 395
},
{
"epoch": 0.56,
"learning_rate": 4.0939437291368625e-05,
"loss": 0.4238,
"step": 396
},
{
"epoch": 0.56,
"learning_rate": 4.091559370529328e-05,
"loss": 0.4035,
"step": 397
},
{
"epoch": 0.56,
"learning_rate": 4.089175011921793e-05,
"loss": 0.4009,
"step": 398
},
{
"epoch": 0.57,
"learning_rate": 4.0867906533142587e-05,
"loss": 0.405,
"step": 399
},
{
"epoch": 0.57,
"learning_rate": 4.084406294706724e-05,
"loss": 0.4159,
"step": 400
},
{
"epoch": 0.57,
"learning_rate": 4.0820219360991894e-05,
"loss": 0.3982,
"step": 401
},
{
"epoch": 0.57,
"learning_rate": 4.079637577491655e-05,
"loss": 0.4182,
"step": 402
},
{
"epoch": 0.57,
"learning_rate": 4.077253218884121e-05,
"loss": 0.4132,
"step": 403
},
{
"epoch": 0.57,
"learning_rate": 4.0748688602765856e-05,
"loss": 0.4232,
"step": 404
},
{
"epoch": 0.57,
"learning_rate": 4.072484501669051e-05,
"loss": 0.4178,
"step": 405
},
{
"epoch": 0.58,
"learning_rate": 4.0701001430615164e-05,
"loss": 0.428,
"step": 406
},
{
"epoch": 0.58,
"learning_rate": 4.067715784453982e-05,
"loss": 0.4101,
"step": 407
},
{
"epoch": 0.58,
"learning_rate": 4.065331425846448e-05,
"loss": 0.4247,
"step": 408
},
{
"epoch": 0.58,
"learning_rate": 4.062947067238913e-05,
"loss": 0.4068,
"step": 409
},
{
"epoch": 0.58,
"learning_rate": 4.0605627086313786e-05,
"loss": 0.3983,
"step": 410
},
{
"epoch": 0.58,
"learning_rate": 4.058178350023844e-05,
"loss": 0.4269,
"step": 411
},
{
"epoch": 0.58,
"learning_rate": 4.055793991416309e-05,
"loss": 0.4105,
"step": 412
},
{
"epoch": 0.59,
"learning_rate": 4.053409632808775e-05,
"loss": 0.4342,
"step": 413
},
{
"epoch": 0.59,
"learning_rate": 4.05102527420124e-05,
"loss": 0.4178,
"step": 414
},
{
"epoch": 0.59,
"learning_rate": 4.0486409155937055e-05,
"loss": 0.4348,
"step": 415
},
{
"epoch": 0.59,
"learning_rate": 4.046256556986171e-05,
"loss": 0.4138,
"step": 416
},
{
"epoch": 0.59,
"learning_rate": 4.043872198378636e-05,
"loss": 0.4237,
"step": 417
},
{
"epoch": 0.59,
"learning_rate": 4.0414878397711023e-05,
"loss": 0.3965,
"step": 418
},
{
"epoch": 0.59,
"learning_rate": 4.039103481163567e-05,
"loss": 0.4235,
"step": 419
},
{
"epoch": 0.6,
"learning_rate": 4.0367191225560324e-05,
"loss": 0.4247,
"step": 420
},
{
"epoch": 0.6,
"learning_rate": 4.034334763948498e-05,
"loss": 0.4032,
"step": 421
},
{
"epoch": 0.6,
"learning_rate": 4.031950405340963e-05,
"loss": 0.4184,
"step": 422
},
{
"epoch": 0.6,
"learning_rate": 4.029566046733429e-05,
"loss": 0.3911,
"step": 423
},
{
"epoch": 0.6,
"learning_rate": 4.0271816881258947e-05,
"loss": 0.3885,
"step": 424
},
{
"epoch": 0.6,
"learning_rate": 4.02479732951836e-05,
"loss": 0.4302,
"step": 425
},
{
"epoch": 0.6,
"learning_rate": 4.022412970910825e-05,
"loss": 0.4158,
"step": 426
},
{
"epoch": 0.61,
"learning_rate": 4.02002861230329e-05,
"loss": 0.4098,
"step": 427
},
{
"epoch": 0.61,
"learning_rate": 4.017644253695756e-05,
"loss": 0.4139,
"step": 428
},
{
"epoch": 0.61,
"learning_rate": 4.0152598950882216e-05,
"loss": 0.4221,
"step": 429
},
{
"epoch": 0.61,
"learning_rate": 4.012875536480687e-05,
"loss": 0.4304,
"step": 430
},
{
"epoch": 0.61,
"learning_rate": 4.0104911778731524e-05,
"loss": 0.4283,
"step": 431
},
{
"epoch": 0.61,
"learning_rate": 4.008106819265618e-05,
"loss": 0.4186,
"step": 432
},
{
"epoch": 0.61,
"learning_rate": 4.005722460658083e-05,
"loss": 0.4339,
"step": 433
},
{
"epoch": 0.62,
"learning_rate": 4.0033381020505485e-05,
"loss": 0.438,
"step": 434
},
{
"epoch": 0.62,
"learning_rate": 4.000953743443014e-05,
"loss": 0.4068,
"step": 435
},
{
"epoch": 0.62,
"learning_rate": 3.998569384835479e-05,
"loss": 0.416,
"step": 436
},
{
"epoch": 0.62,
"learning_rate": 3.996185026227945e-05,
"loss": 0.4215,
"step": 437
},
{
"epoch": 0.62,
"learning_rate": 3.99380066762041e-05,
"loss": 0.4235,
"step": 438
},
{
"epoch": 0.62,
"learning_rate": 3.991416309012876e-05,
"loss": 0.409,
"step": 439
},
{
"epoch": 0.62,
"learning_rate": 3.9890319504053415e-05,
"loss": 0.4085,
"step": 440
},
{
"epoch": 0.63,
"learning_rate": 3.986647591797806e-05,
"loss": 0.4064,
"step": 441
},
{
"epoch": 0.63,
"learning_rate": 3.9842632331902716e-05,
"loss": 0.4272,
"step": 442
},
{
"epoch": 0.63,
"learning_rate": 3.981878874582738e-05,
"loss": 0.4196,
"step": 443
},
{
"epoch": 0.63,
"learning_rate": 3.979494515975203e-05,
"loss": 0.4206,
"step": 444
},
{
"epoch": 0.63,
"learning_rate": 3.9771101573676684e-05,
"loss": 0.4059,
"step": 445
},
{
"epoch": 0.63,
"learning_rate": 3.974725798760134e-05,
"loss": 0.4142,
"step": 446
},
{
"epoch": 0.63,
"learning_rate": 3.972341440152599e-05,
"loss": 0.4118,
"step": 447
},
{
"epoch": 0.64,
"learning_rate": 3.9699570815450646e-05,
"loss": 0.4407,
"step": 448
},
{
"epoch": 0.64,
"learning_rate": 3.96757272293753e-05,
"loss": 0.39,
"step": 449
},
{
"epoch": 0.64,
"learning_rate": 3.9651883643299954e-05,
"loss": 0.4009,
"step": 450
},
{
"epoch": 0.64,
"learning_rate": 3.962804005722461e-05,
"loss": 0.4212,
"step": 451
},
{
"epoch": 0.64,
"learning_rate": 3.960419647114926e-05,
"loss": 0.412,
"step": 452
},
{
"epoch": 0.64,
"learning_rate": 3.9580352885073915e-05,
"loss": 0.4004,
"step": 453
},
{
"epoch": 0.64,
"learning_rate": 3.9556509298998576e-05,
"loss": 0.4292,
"step": 454
},
{
"epoch": 0.65,
"learning_rate": 3.953266571292323e-05,
"loss": 0.4004,
"step": 455
},
{
"epoch": 0.65,
"learning_rate": 3.950882212684788e-05,
"loss": 0.4112,
"step": 456
},
{
"epoch": 0.65,
"learning_rate": 3.948497854077253e-05,
"loss": 0.41,
"step": 457
},
{
"epoch": 0.65,
"learning_rate": 3.9461134954697185e-05,
"loss": 0.393,
"step": 458
},
{
"epoch": 0.65,
"learning_rate": 3.9437291368621845e-05,
"loss": 0.3934,
"step": 459
},
{
"epoch": 0.65,
"learning_rate": 3.94134477825465e-05,
"loss": 0.399,
"step": 460
},
{
"epoch": 0.65,
"learning_rate": 3.938960419647115e-05,
"loss": 0.4372,
"step": 461
},
{
"epoch": 0.66,
"learning_rate": 3.936576061039581e-05,
"loss": 0.4419,
"step": 462
},
{
"epoch": 0.66,
"learning_rate": 3.9341917024320454e-05,
"loss": 0.4273,
"step": 463
},
{
"epoch": 0.66,
"learning_rate": 3.9318073438245115e-05,
"loss": 0.3922,
"step": 464
},
{
"epoch": 0.66,
"learning_rate": 3.929422985216977e-05,
"loss": 0.4043,
"step": 465
},
{
"epoch": 0.66,
"learning_rate": 3.927038626609442e-05,
"loss": 0.421,
"step": 466
},
{
"epoch": 0.66,
"learning_rate": 3.9246542680019076e-05,
"loss": 0.4236,
"step": 467
},
{
"epoch": 0.66,
"learning_rate": 3.922269909394373e-05,
"loss": 0.4134,
"step": 468
},
{
"epoch": 0.67,
"learning_rate": 3.919885550786839e-05,
"loss": 0.3827,
"step": 469
},
{
"epoch": 0.67,
"learning_rate": 3.9175011921793044e-05,
"loss": 0.3952,
"step": 470
},
{
"epoch": 0.67,
"learning_rate": 3.915116833571769e-05,
"loss": 0.4203,
"step": 471
},
{
"epoch": 0.67,
"learning_rate": 3.9127324749642345e-05,
"loss": 0.4076,
"step": 472
},
{
"epoch": 0.67,
"learning_rate": 3.9103481163567e-05,
"loss": 0.3838,
"step": 473
},
{
"epoch": 0.67,
"learning_rate": 3.907963757749166e-05,
"loss": 0.4388,
"step": 474
},
{
"epoch": 0.67,
"learning_rate": 3.9055793991416314e-05,
"loss": 0.4407,
"step": 475
},
{
"epoch": 0.68,
"learning_rate": 3.903195040534097e-05,
"loss": 0.4022,
"step": 476
},
{
"epoch": 0.68,
"learning_rate": 3.900810681926562e-05,
"loss": 0.427,
"step": 477
},
{
"epoch": 0.68,
"learning_rate": 3.898426323319027e-05,
"loss": 0.407,
"step": 478
},
{
"epoch": 0.68,
"learning_rate": 3.896041964711493e-05,
"loss": 0.4176,
"step": 479
},
{
"epoch": 0.68,
"learning_rate": 3.893657606103958e-05,
"loss": 0.4073,
"step": 480
},
{
"epoch": 0.68,
"learning_rate": 3.891273247496424e-05,
"loss": 0.4339,
"step": 481
},
{
"epoch": 0.68,
"learning_rate": 3.888888888888889e-05,
"loss": 0.4199,
"step": 482
},
{
"epoch": 0.69,
"learning_rate": 3.8865045302813545e-05,
"loss": 0.4209,
"step": 483
},
{
"epoch": 0.69,
"learning_rate": 3.88412017167382e-05,
"loss": 0.3905,
"step": 484
},
{
"epoch": 0.69,
"learning_rate": 3.881735813066285e-05,
"loss": 0.4267,
"step": 485
},
{
"epoch": 0.69,
"learning_rate": 3.8793514544587506e-05,
"loss": 0.3786,
"step": 486
},
{
"epoch": 0.69,
"learning_rate": 3.876967095851216e-05,
"loss": 0.3966,
"step": 487
},
{
"epoch": 0.69,
"learning_rate": 3.8745827372436814e-05,
"loss": 0.4181,
"step": 488
},
{
"epoch": 0.69,
"learning_rate": 3.872198378636147e-05,
"loss": 0.4096,
"step": 489
},
{
"epoch": 0.7,
"learning_rate": 3.869814020028613e-05,
"loss": 0.426,
"step": 490
},
{
"epoch": 0.7,
"learning_rate": 3.867429661421078e-05,
"loss": 0.4161,
"step": 491
},
{
"epoch": 0.7,
"learning_rate": 3.8650453028135436e-05,
"loss": 0.4073,
"step": 492
},
{
"epoch": 0.7,
"learning_rate": 3.862660944206008e-05,
"loss": 0.4249,
"step": 493
},
{
"epoch": 0.7,
"learning_rate": 3.8602765855984744e-05,
"loss": 0.4035,
"step": 494
},
{
"epoch": 0.7,
"learning_rate": 3.85789222699094e-05,
"loss": 0.3933,
"step": 495
},
{
"epoch": 0.7,
"learning_rate": 3.855507868383405e-05,
"loss": 0.4233,
"step": 496
},
{
"epoch": 0.71,
"learning_rate": 3.8531235097758705e-05,
"loss": 0.4101,
"step": 497
},
{
"epoch": 0.71,
"learning_rate": 3.850739151168336e-05,
"loss": 0.4071,
"step": 498
},
{
"epoch": 0.71,
"learning_rate": 3.848354792560801e-05,
"loss": 0.4113,
"step": 499
},
{
"epoch": 0.71,
"learning_rate": 3.845970433953267e-05,
"loss": 0.4002,
"step": 500
},
{
"epoch": 0.71,
"learning_rate": 3.843586075345732e-05,
"loss": 0.4016,
"step": 501
},
{
"epoch": 0.71,
"learning_rate": 3.8412017167381975e-05,
"loss": 0.4128,
"step": 502
},
{
"epoch": 0.71,
"learning_rate": 3.838817358130663e-05,
"loss": 0.4113,
"step": 503
},
{
"epoch": 0.71,
"learning_rate": 3.836432999523128e-05,
"loss": 0.3946,
"step": 504
},
{
"epoch": 0.72,
"learning_rate": 3.834048640915594e-05,
"loss": 0.3931,
"step": 505
},
{
"epoch": 0.72,
"learning_rate": 3.83166428230806e-05,
"loss": 0.412,
"step": 506
},
{
"epoch": 0.72,
"learning_rate": 3.829279923700525e-05,
"loss": 0.4394,
"step": 507
},
{
"epoch": 0.72,
"learning_rate": 3.82689556509299e-05,
"loss": 0.3829,
"step": 508
},
{
"epoch": 0.72,
"learning_rate": 3.824511206485455e-05,
"loss": 0.4442,
"step": 509
},
{
"epoch": 0.72,
"learning_rate": 3.822126847877921e-05,
"loss": 0.4153,
"step": 510
},
{
"epoch": 0.72,
"learning_rate": 3.8197424892703866e-05,
"loss": 0.403,
"step": 511
},
{
"epoch": 0.73,
"learning_rate": 3.817358130662852e-05,
"loss": 0.4078,
"step": 512
},
{
"epoch": 0.73,
"learning_rate": 3.8149737720553174e-05,
"loss": 0.413,
"step": 513
},
{
"epoch": 0.73,
"learning_rate": 3.812589413447783e-05,
"loss": 0.407,
"step": 514
},
{
"epoch": 0.73,
"learning_rate": 3.810205054840248e-05,
"loss": 0.4088,
"step": 515
},
{
"epoch": 0.73,
"learning_rate": 3.8078206962327136e-05,
"loss": 0.4119,
"step": 516
},
{
"epoch": 0.73,
"learning_rate": 3.805436337625179e-05,
"loss": 0.4191,
"step": 517
},
{
"epoch": 0.73,
"learning_rate": 3.803051979017644e-05,
"loss": 0.3974,
"step": 518
},
{
"epoch": 0.74,
"learning_rate": 3.80066762041011e-05,
"loss": 0.4198,
"step": 519
},
{
"epoch": 0.74,
"learning_rate": 3.798283261802575e-05,
"loss": 0.4176,
"step": 520
},
{
"epoch": 0.74,
"learning_rate": 3.795898903195041e-05,
"loss": 0.3938,
"step": 521
},
{
"epoch": 0.74,
"learning_rate": 3.793514544587506e-05,
"loss": 0.414,
"step": 522
},
{
"epoch": 0.74,
"learning_rate": 3.791130185979971e-05,
"loss": 0.4349,
"step": 523
},
{
"epoch": 0.74,
"learning_rate": 3.7887458273724366e-05,
"loss": 0.3845,
"step": 524
},
{
"epoch": 0.74,
"learning_rate": 3.786361468764903e-05,
"loss": 0.4072,
"step": 525
},
{
"epoch": 0.75,
"learning_rate": 3.783977110157368e-05,
"loss": 0.4224,
"step": 526
},
{
"epoch": 0.75,
"learning_rate": 3.7815927515498335e-05,
"loss": 0.4031,
"step": 527
},
{
"epoch": 0.75,
"learning_rate": 3.779208392942299e-05,
"loss": 0.4142,
"step": 528
},
{
"epoch": 0.75,
"learning_rate": 3.776824034334764e-05,
"loss": 0.3726,
"step": 529
},
{
"epoch": 0.75,
"learning_rate": 3.7744396757272296e-05,
"loss": 0.4047,
"step": 530
},
{
"epoch": 0.75,
"learning_rate": 3.772055317119695e-05,
"loss": 0.3957,
"step": 531
},
{
"epoch": 0.75,
"learning_rate": 3.7696709585121604e-05,
"loss": 0.4179,
"step": 532
},
{
"epoch": 0.76,
"learning_rate": 3.767286599904626e-05,
"loss": 0.3976,
"step": 533
},
{
"epoch": 0.76,
"learning_rate": 3.764902241297091e-05,
"loss": 0.4101,
"step": 534
},
{
"epoch": 0.76,
"learning_rate": 3.7625178826895566e-05,
"loss": 0.3982,
"step": 535
},
{
"epoch": 0.76,
"learning_rate": 3.7601335240820226e-05,
"loss": 0.3911,
"step": 536
},
{
"epoch": 0.76,
"learning_rate": 3.757749165474487e-05,
"loss": 0.4157,
"step": 537
},
{
"epoch": 0.76,
"learning_rate": 3.755364806866953e-05,
"loss": 0.3968,
"step": 538
},
{
"epoch": 0.76,
"learning_rate": 3.752980448259418e-05,
"loss": 0.4022,
"step": 539
},
{
"epoch": 0.77,
"learning_rate": 3.7505960896518835e-05,
"loss": 0.4099,
"step": 540
},
{
"epoch": 0.77,
"learning_rate": 3.7482117310443496e-05,
"loss": 0.4053,
"step": 541
},
{
"epoch": 0.77,
"learning_rate": 3.745827372436815e-05,
"loss": 0.3908,
"step": 542
},
{
"epoch": 0.77,
"learning_rate": 3.74344301382928e-05,
"loss": 0.4189,
"step": 543
},
{
"epoch": 0.77,
"learning_rate": 3.741058655221745e-05,
"loss": 0.4034,
"step": 544
},
{
"epoch": 0.77,
"learning_rate": 3.7386742966142104e-05,
"loss": 0.394,
"step": 545
},
{
"epoch": 0.77,
"learning_rate": 3.7362899380066765e-05,
"loss": 0.3851,
"step": 546
},
{
"epoch": 0.78,
"learning_rate": 3.733905579399142e-05,
"loss": 0.4037,
"step": 547
},
{
"epoch": 0.78,
"learning_rate": 3.731521220791607e-05,
"loss": 0.3952,
"step": 548
},
{
"epoch": 0.78,
"learning_rate": 3.7291368621840726e-05,
"loss": 0.4086,
"step": 549
},
{
"epoch": 0.78,
"learning_rate": 3.726752503576538e-05,
"loss": 0.3877,
"step": 550
},
{
"epoch": 0.78,
"learning_rate": 3.724368144969004e-05,
"loss": 0.3984,
"step": 551
},
{
"epoch": 0.78,
"learning_rate": 3.721983786361469e-05,
"loss": 0.4414,
"step": 552
},
{
"epoch": 0.78,
"learning_rate": 3.719599427753934e-05,
"loss": 0.3957,
"step": 553
},
{
"epoch": 0.79,
"learning_rate": 3.7172150691463996e-05,
"loss": 0.4187,
"step": 554
},
{
"epoch": 0.79,
"learning_rate": 3.714830710538865e-05,
"loss": 0.4111,
"step": 555
},
{
"epoch": 0.79,
"learning_rate": 3.712446351931331e-05,
"loss": 0.4264,
"step": 556
},
{
"epoch": 0.79,
"learning_rate": 3.7100619933237964e-05,
"loss": 0.4074,
"step": 557
},
{
"epoch": 0.79,
"learning_rate": 3.707677634716262e-05,
"loss": 0.4193,
"step": 558
},
{
"epoch": 0.79,
"learning_rate": 3.7052932761087265e-05,
"loss": 0.3993,
"step": 559
},
{
"epoch": 0.79,
"learning_rate": 3.702908917501192e-05,
"loss": 0.3825,
"step": 560
},
{
"epoch": 0.8,
"learning_rate": 3.700524558893658e-05,
"loss": 0.4156,
"step": 561
},
{
"epoch": 0.8,
"learning_rate": 3.6981402002861233e-05,
"loss": 0.4211,
"step": 562
},
{
"epoch": 0.8,
"learning_rate": 3.695755841678589e-05,
"loss": 0.4191,
"step": 563
},
{
"epoch": 0.8,
"learning_rate": 3.693371483071054e-05,
"loss": 0.4098,
"step": 564
},
{
"epoch": 0.8,
"learning_rate": 3.6909871244635195e-05,
"loss": 0.4107,
"step": 565
},
{
"epoch": 0.8,
"learning_rate": 3.688602765855985e-05,
"loss": 0.3818,
"step": 566
},
{
"epoch": 0.8,
"learning_rate": 3.68621840724845e-05,
"loss": 0.4165,
"step": 567
},
{
"epoch": 0.81,
"learning_rate": 3.6838340486409157e-05,
"loss": 0.3971,
"step": 568
},
{
"epoch": 0.81,
"learning_rate": 3.681449690033381e-05,
"loss": 0.3901,
"step": 569
},
{
"epoch": 0.81,
"learning_rate": 3.6790653314258464e-05,
"loss": 0.4065,
"step": 570
},
{
"epoch": 0.81,
"learning_rate": 3.676680972818312e-05,
"loss": 0.3875,
"step": 571
},
{
"epoch": 0.81,
"learning_rate": 3.674296614210778e-05,
"loss": 0.3834,
"step": 572
},
{
"epoch": 0.81,
"learning_rate": 3.671912255603243e-05,
"loss": 0.4119,
"step": 573
},
{
"epoch": 0.81,
"learning_rate": 3.669527896995708e-05,
"loss": 0.3992,
"step": 574
},
{
"epoch": 0.82,
"learning_rate": 3.6671435383881734e-05,
"loss": 0.4091,
"step": 575
},
{
"epoch": 0.82,
"learning_rate": 3.6647591797806394e-05,
"loss": 0.4168,
"step": 576
},
{
"epoch": 0.82,
"learning_rate": 3.662374821173105e-05,
"loss": 0.4166,
"step": 577
},
{
"epoch": 0.82,
"learning_rate": 3.65999046256557e-05,
"loss": 0.3883,
"step": 578
},
{
"epoch": 0.82,
"learning_rate": 3.6576061039580356e-05,
"loss": 0.4036,
"step": 579
},
{
"epoch": 0.82,
"learning_rate": 3.655221745350501e-05,
"loss": 0.4069,
"step": 580
},
{
"epoch": 0.82,
"learning_rate": 3.6528373867429664e-05,
"loss": 0.4033,
"step": 581
},
{
"epoch": 0.83,
"learning_rate": 3.650453028135432e-05,
"loss": 0.4006,
"step": 582
},
{
"epoch": 0.83,
"learning_rate": 3.648068669527897e-05,
"loss": 0.4205,
"step": 583
},
{
"epoch": 0.83,
"learning_rate": 3.6456843109203625e-05,
"loss": 0.4223,
"step": 584
},
{
"epoch": 0.83,
"learning_rate": 3.643299952312828e-05,
"loss": 0.3968,
"step": 585
},
{
"epoch": 0.83,
"learning_rate": 3.640915593705293e-05,
"loss": 0.4147,
"step": 586
},
{
"epoch": 0.83,
"learning_rate": 3.6385312350977593e-05,
"loss": 0.4031,
"step": 587
},
{
"epoch": 0.83,
"learning_rate": 3.636146876490225e-05,
"loss": 0.4355,
"step": 588
},
{
"epoch": 0.84,
"learning_rate": 3.6337625178826894e-05,
"loss": 0.4377,
"step": 589
},
{
"epoch": 0.84,
"learning_rate": 3.631378159275155e-05,
"loss": 0.4115,
"step": 590
},
{
"epoch": 0.84,
"learning_rate": 3.62899380066762e-05,
"loss": 0.3998,
"step": 591
},
{
"epoch": 0.84,
"learning_rate": 3.626609442060086e-05,
"loss": 0.4032,
"step": 592
},
{
"epoch": 0.84,
"learning_rate": 3.624225083452552e-05,
"loss": 0.3999,
"step": 593
},
{
"epoch": 0.84,
"learning_rate": 3.621840724845017e-05,
"loss": 0.4049,
"step": 594
},
{
"epoch": 0.84,
"learning_rate": 3.6194563662374824e-05,
"loss": 0.42,
"step": 595
},
{
"epoch": 0.85,
"learning_rate": 3.617072007629947e-05,
"loss": 0.3802,
"step": 596
},
{
"epoch": 0.85,
"learning_rate": 3.614687649022413e-05,
"loss": 0.3801,
"step": 597
},
{
"epoch": 0.85,
"learning_rate": 3.6123032904148786e-05,
"loss": 0.4001,
"step": 598
},
{
"epoch": 0.85,
"learning_rate": 3.609918931807344e-05,
"loss": 0.3925,
"step": 599
},
{
"epoch": 0.85,
"learning_rate": 3.6075345731998094e-05,
"loss": 0.3618,
"step": 600
},
{
"epoch": 0.85,
"learning_rate": 3.605150214592275e-05,
"loss": 0.4082,
"step": 601
},
{
"epoch": 0.85,
"learning_rate": 3.602765855984741e-05,
"loss": 0.4116,
"step": 602
},
{
"epoch": 0.86,
"learning_rate": 3.600381497377206e-05,
"loss": 0.3958,
"step": 603
},
{
"epoch": 0.86,
"learning_rate": 3.597997138769671e-05,
"loss": 0.4072,
"step": 604
},
{
"epoch": 0.86,
"learning_rate": 3.595612780162136e-05,
"loss": 0.386,
"step": 605
},
{
"epoch": 0.86,
"learning_rate": 3.593228421554602e-05,
"loss": 0.3848,
"step": 606
},
{
"epoch": 0.86,
"learning_rate": 3.590844062947068e-05,
"loss": 0.3845,
"step": 607
},
{
"epoch": 0.86,
"learning_rate": 3.588459704339533e-05,
"loss": 0.4115,
"step": 608
},
{
"epoch": 0.86,
"learning_rate": 3.5860753457319985e-05,
"loss": 0.3938,
"step": 609
},
{
"epoch": 0.87,
"learning_rate": 3.583690987124464e-05,
"loss": 0.3806,
"step": 610
},
{
"epoch": 0.87,
"learning_rate": 3.5813066285169286e-05,
"loss": 0.3955,
"step": 611
},
{
"epoch": 0.87,
"learning_rate": 3.578922269909395e-05,
"loss": 0.393,
"step": 612
},
{
"epoch": 0.87,
"learning_rate": 3.57653791130186e-05,
"loss": 0.4206,
"step": 613
},
{
"epoch": 0.87,
"learning_rate": 3.5741535526943254e-05,
"loss": 0.4157,
"step": 614
},
{
"epoch": 0.87,
"learning_rate": 3.571769194086791e-05,
"loss": 0.4099,
"step": 615
},
{
"epoch": 0.87,
"learning_rate": 3.569384835479256e-05,
"loss": 0.4095,
"step": 616
},
{
"epoch": 0.88,
"learning_rate": 3.5670004768717216e-05,
"loss": 0.3944,
"step": 617
},
{
"epoch": 0.88,
"learning_rate": 3.564616118264187e-05,
"loss": 0.3892,
"step": 618
},
{
"epoch": 0.88,
"learning_rate": 3.5622317596566524e-05,
"loss": 0.4158,
"step": 619
},
{
"epoch": 0.88,
"learning_rate": 3.559847401049118e-05,
"loss": 0.4067,
"step": 620
},
{
"epoch": 0.88,
"learning_rate": 3.557463042441583e-05,
"loss": 0.4255,
"step": 621
},
{
"epoch": 0.88,
"learning_rate": 3.5550786838340485e-05,
"loss": 0.3958,
"step": 622
},
{
"epoch": 0.88,
"learning_rate": 3.5526943252265146e-05,
"loss": 0.4017,
"step": 623
},
{
"epoch": 0.89,
"learning_rate": 3.55030996661898e-05,
"loss": 0.391,
"step": 624
},
{
"epoch": 0.89,
"learning_rate": 3.5479256080114454e-05,
"loss": 0.4068,
"step": 625
},
{
"epoch": 0.89,
"learning_rate": 3.54554124940391e-05,
"loss": 0.3955,
"step": 626
},
{
"epoch": 0.89,
"learning_rate": 3.543156890796376e-05,
"loss": 0.3921,
"step": 627
},
{
"epoch": 0.89,
"learning_rate": 3.5407725321888415e-05,
"loss": 0.3929,
"step": 628
},
{
"epoch": 0.89,
"learning_rate": 3.538388173581307e-05,
"loss": 0.411,
"step": 629
},
{
"epoch": 0.89,
"learning_rate": 3.536003814973772e-05,
"loss": 0.3868,
"step": 630
},
{
"epoch": 0.9,
"learning_rate": 3.533619456366238e-05,
"loss": 0.3745,
"step": 631
},
{
"epoch": 0.9,
"learning_rate": 3.531235097758703e-05,
"loss": 0.4239,
"step": 632
},
{
"epoch": 0.9,
"learning_rate": 3.5288507391511685e-05,
"loss": 0.4004,
"step": 633
},
{
"epoch": 0.9,
"learning_rate": 3.526466380543634e-05,
"loss": 0.3775,
"step": 634
},
{
"epoch": 0.9,
"learning_rate": 3.524082021936099e-05,
"loss": 0.4021,
"step": 635
},
{
"epoch": 0.9,
"learning_rate": 3.5216976633285646e-05,
"loss": 0.3995,
"step": 636
},
{
"epoch": 0.9,
"learning_rate": 3.51931330472103e-05,
"loss": 0.407,
"step": 637
},
{
"epoch": 0.91,
"learning_rate": 3.516928946113496e-05,
"loss": 0.397,
"step": 638
},
{
"epoch": 0.91,
"learning_rate": 3.5145445875059614e-05,
"loss": 0.3874,
"step": 639
},
{
"epoch": 0.91,
"learning_rate": 3.512160228898426e-05,
"loss": 0.4067,
"step": 640
},
{
"epoch": 0.91,
"learning_rate": 3.5097758702908915e-05,
"loss": 0.4121,
"step": 641
},
{
"epoch": 0.91,
"learning_rate": 3.507391511683357e-05,
"loss": 0.3994,
"step": 642
},
{
"epoch": 0.91,
"learning_rate": 3.505007153075823e-05,
"loss": 0.4152,
"step": 643
},
{
"epoch": 0.91,
"learning_rate": 3.5026227944682884e-05,
"loss": 0.3936,
"step": 644
},
{
"epoch": 0.92,
"learning_rate": 3.500238435860754e-05,
"loss": 0.3981,
"step": 645
},
{
"epoch": 0.92,
"learning_rate": 3.497854077253219e-05,
"loss": 0.4106,
"step": 646
},
{
"epoch": 0.92,
"learning_rate": 3.4954697186456845e-05,
"loss": 0.4021,
"step": 647
},
{
"epoch": 0.92,
"learning_rate": 3.49308536003815e-05,
"loss": 0.3855,
"step": 648
},
{
"epoch": 0.92,
"learning_rate": 3.490701001430615e-05,
"loss": 0.3768,
"step": 649
},
{
"epoch": 0.92,
"learning_rate": 3.488316642823081e-05,
"loss": 0.4207,
"step": 650
},
{
"epoch": 0.92,
"learning_rate": 3.485932284215546e-05,
"loss": 0.3949,
"step": 651
},
{
"epoch": 0.92,
"learning_rate": 3.4835479256080115e-05,
"loss": 0.3883,
"step": 652
},
{
"epoch": 0.93,
"learning_rate": 3.481163567000477e-05,
"loss": 0.4037,
"step": 653
},
{
"epoch": 0.93,
"learning_rate": 3.478779208392943e-05,
"loss": 0.3858,
"step": 654
},
{
"epoch": 0.93,
"learning_rate": 3.4763948497854076e-05,
"loss": 0.398,
"step": 655
},
{
"epoch": 0.93,
"learning_rate": 3.474010491177873e-05,
"loss": 0.4112,
"step": 656
},
{
"epoch": 0.93,
"learning_rate": 3.4716261325703384e-05,
"loss": 0.3821,
"step": 657
},
{
"epoch": 0.93,
"learning_rate": 3.4692417739628045e-05,
"loss": 0.4214,
"step": 658
},
{
"epoch": 0.93,
"learning_rate": 3.46685741535527e-05,
"loss": 0.4072,
"step": 659
},
{
"epoch": 0.94,
"learning_rate": 3.464473056747735e-05,
"loss": 0.4014,
"step": 660
},
{
"epoch": 0.94,
"learning_rate": 3.4620886981402006e-05,
"loss": 0.3965,
"step": 661
},
{
"epoch": 0.94,
"learning_rate": 3.459704339532666e-05,
"loss": 0.4191,
"step": 662
},
{
"epoch": 0.94,
"learning_rate": 3.4573199809251314e-05,
"loss": 0.3855,
"step": 663
},
{
"epoch": 0.94,
"learning_rate": 3.454935622317597e-05,
"loss": 0.3804,
"step": 664
},
{
"epoch": 0.94,
"learning_rate": 3.452551263710062e-05,
"loss": 0.4182,
"step": 665
},
{
"epoch": 0.94,
"learning_rate": 3.4501669051025275e-05,
"loss": 0.4109,
"step": 666
},
{
"epoch": 0.95,
"learning_rate": 3.447782546494993e-05,
"loss": 0.416,
"step": 667
},
{
"epoch": 0.95,
"learning_rate": 3.445398187887458e-05,
"loss": 0.3863,
"step": 668
},
{
"epoch": 0.95,
"learning_rate": 3.4430138292799244e-05,
"loss": 0.4007,
"step": 669
},
{
"epoch": 0.95,
"learning_rate": 3.440629470672389e-05,
"loss": 0.3921,
"step": 670
},
{
"epoch": 0.95,
"learning_rate": 3.4382451120648545e-05,
"loss": 0.3665,
"step": 671
},
{
"epoch": 0.95,
"learning_rate": 3.43586075345732e-05,
"loss": 0.4014,
"step": 672
},
{
"epoch": 0.95,
"learning_rate": 3.433476394849785e-05,
"loss": 0.4062,
"step": 673
},
{
"epoch": 0.96,
"learning_rate": 3.431092036242251e-05,
"loss": 0.4049,
"step": 674
},
{
"epoch": 0.96,
"learning_rate": 3.428707677634717e-05,
"loss": 0.3937,
"step": 675
},
{
"epoch": 0.96,
"learning_rate": 3.426323319027182e-05,
"loss": 0.3969,
"step": 676
},
{
"epoch": 0.96,
"learning_rate": 3.423938960419647e-05,
"loss": 0.3827,
"step": 677
},
{
"epoch": 0.96,
"learning_rate": 3.421554601812112e-05,
"loss": 0.4271,
"step": 678
},
{
"epoch": 0.96,
"learning_rate": 3.419170243204578e-05,
"loss": 0.4083,
"step": 679
},
{
"epoch": 0.96,
"learning_rate": 3.4167858845970436e-05,
"loss": 0.3981,
"step": 680
},
{
"epoch": 0.97,
"learning_rate": 3.414401525989509e-05,
"loss": 0.3877,
"step": 681
},
{
"epoch": 0.97,
"learning_rate": 3.4120171673819744e-05,
"loss": 0.3947,
"step": 682
},
{
"epoch": 0.97,
"learning_rate": 3.40963280877444e-05,
"loss": 0.3974,
"step": 683
},
{
"epoch": 0.97,
"learning_rate": 3.407248450166906e-05,
"loss": 0.3816,
"step": 684
},
{
"epoch": 0.97,
"learning_rate": 3.4048640915593706e-05,
"loss": 0.3984,
"step": 685
},
{
"epoch": 0.97,
"learning_rate": 3.402479732951836e-05,
"loss": 0.3823,
"step": 686
},
{
"epoch": 0.97,
"learning_rate": 3.400095374344301e-05,
"loss": 0.3922,
"step": 687
},
{
"epoch": 0.98,
"learning_rate": 3.397711015736767e-05,
"loss": 0.3985,
"step": 688
},
{
"epoch": 0.98,
"learning_rate": 3.395326657129233e-05,
"loss": 0.3971,
"step": 689
},
{
"epoch": 0.98,
"learning_rate": 3.392942298521698e-05,
"loss": 0.4024,
"step": 690
},
{
"epoch": 0.98,
"learning_rate": 3.3905579399141636e-05,
"loss": 0.4028,
"step": 691
},
{
"epoch": 0.98,
"learning_rate": 3.388173581306628e-05,
"loss": 0.3957,
"step": 692
},
{
"epoch": 0.98,
"learning_rate": 3.3857892226990936e-05,
"loss": 0.4151,
"step": 693
},
{
"epoch": 0.98,
"learning_rate": 3.38340486409156e-05,
"loss": 0.3974,
"step": 694
},
{
"epoch": 0.99,
"learning_rate": 3.381020505484025e-05,
"loss": 0.4179,
"step": 695
},
{
"epoch": 0.99,
"learning_rate": 3.3786361468764905e-05,
"loss": 0.3854,
"step": 696
},
{
"epoch": 0.99,
"learning_rate": 3.376251788268956e-05,
"loss": 0.4167,
"step": 697
},
{
"epoch": 0.99,
"learning_rate": 3.373867429661421e-05,
"loss": 0.4183,
"step": 698
},
{
"epoch": 0.99,
"learning_rate": 3.3714830710538866e-05,
"loss": 0.4206,
"step": 699
},
{
"epoch": 0.99,
"learning_rate": 3.369098712446352e-05,
"loss": 0.3828,
"step": 700
},
{
"epoch": 0.99,
"learning_rate": 3.3667143538388174e-05,
"loss": 0.3853,
"step": 701
},
{
"epoch": 1.0,
"learning_rate": 3.364329995231283e-05,
"loss": 0.3962,
"step": 702
},
{
"epoch": 1.0,
"learning_rate": 3.361945636623748e-05,
"loss": 0.4024,
"step": 703
},
{
"epoch": 1.0,
"learning_rate": 3.3595612780162136e-05,
"loss": 0.4071,
"step": 704
},
{
"epoch": 1.0,
"learning_rate": 3.3571769194086796e-05,
"loss": 0.4045,
"step": 705
},
{
"epoch": 1.0,
"learning_rate": 3.354792560801145e-05,
"loss": 0.3416,
"step": 706
},
{
"epoch": 1.0,
"learning_rate": 3.35240820219361e-05,
"loss": 0.3414,
"step": 707
},
{
"epoch": 1.0,
"learning_rate": 3.350023843586075e-05,
"loss": 0.3413,
"step": 708
},
{
"epoch": 1.01,
"learning_rate": 3.347639484978541e-05,
"loss": 0.3575,
"step": 709
},
{
"epoch": 1.01,
"learning_rate": 3.3452551263710066e-05,
"loss": 0.3492,
"step": 710
},
{
"epoch": 1.01,
"learning_rate": 3.342870767763472e-05,
"loss": 0.3369,
"step": 711
},
{
"epoch": 1.01,
"learning_rate": 3.340486409155937e-05,
"loss": 0.3496,
"step": 712
},
{
"epoch": 1.01,
"learning_rate": 3.338102050548403e-05,
"loss": 0.3294,
"step": 713
},
{
"epoch": 1.01,
"learning_rate": 3.335717691940868e-05,
"loss": 0.3431,
"step": 714
},
{
"epoch": 1.01,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.3344,
"step": 715
},
{
"epoch": 1.02,
"learning_rate": 3.330948974725799e-05,
"loss": 0.3532,
"step": 716
},
{
"epoch": 1.02,
"learning_rate": 3.328564616118264e-05,
"loss": 0.3379,
"step": 717
},
{
"epoch": 1.02,
"learning_rate": 3.3261802575107297e-05,
"loss": 0.3471,
"step": 718
},
{
"epoch": 1.02,
"learning_rate": 3.323795898903195e-05,
"loss": 0.3463,
"step": 719
},
{
"epoch": 1.02,
"learning_rate": 3.321411540295661e-05,
"loss": 0.3207,
"step": 720
},
{
"epoch": 1.02,
"learning_rate": 3.3190271816881265e-05,
"loss": 0.3515,
"step": 721
},
{
"epoch": 1.02,
"learning_rate": 3.316642823080591e-05,
"loss": 0.3301,
"step": 722
},
{
"epoch": 1.03,
"learning_rate": 3.3142584644730566e-05,
"loss": 0.344,
"step": 723
},
{
"epoch": 1.03,
"learning_rate": 3.311874105865522e-05,
"loss": 0.3287,
"step": 724
},
{
"epoch": 1.03,
"learning_rate": 3.309489747257988e-05,
"loss": 0.3524,
"step": 725
},
{
"epoch": 1.03,
"learning_rate": 3.3071053886504534e-05,
"loss": 0.3464,
"step": 726
},
{
"epoch": 1.03,
"learning_rate": 3.304721030042919e-05,
"loss": 0.357,
"step": 727
},
{
"epoch": 1.03,
"learning_rate": 3.302336671435384e-05,
"loss": 0.343,
"step": 728
},
{
"epoch": 1.03,
"learning_rate": 3.299952312827849e-05,
"loss": 0.3257,
"step": 729
},
{
"epoch": 1.04,
"learning_rate": 3.297567954220315e-05,
"loss": 0.3326,
"step": 730
},
{
"epoch": 1.04,
"learning_rate": 3.2951835956127803e-05,
"loss": 0.3378,
"step": 731
},
{
"epoch": 1.04,
"learning_rate": 3.292799237005246e-05,
"loss": 0.325,
"step": 732
},
{
"epoch": 1.04,
"learning_rate": 3.290414878397711e-05,
"loss": 0.3578,
"step": 733
},
{
"epoch": 1.04,
"learning_rate": 3.2880305197901765e-05,
"loss": 0.3451,
"step": 734
},
{
"epoch": 1.04,
"learning_rate": 3.2856461611826426e-05,
"loss": 0.3369,
"step": 735
},
{
"epoch": 1.04,
"learning_rate": 3.283261802575107e-05,
"loss": 0.329,
"step": 736
},
{
"epoch": 1.05,
"learning_rate": 3.280877443967573e-05,
"loss": 0.3499,
"step": 737
},
{
"epoch": 1.05,
"learning_rate": 3.278493085360038e-05,
"loss": 0.32,
"step": 738
},
{
"epoch": 1.05,
"learning_rate": 3.2761087267525034e-05,
"loss": 0.3232,
"step": 739
},
{
"epoch": 1.05,
"learning_rate": 3.2737243681449695e-05,
"loss": 0.3358,
"step": 740
},
{
"epoch": 1.05,
"learning_rate": 3.271340009537435e-05,
"loss": 0.336,
"step": 741
},
{
"epoch": 1.05,
"learning_rate": 3.2689556509299e-05,
"loss": 0.3277,
"step": 742
},
{
"epoch": 1.05,
"learning_rate": 3.2665712923223657e-05,
"loss": 0.331,
"step": 743
},
{
"epoch": 1.06,
"learning_rate": 3.2641869337148304e-05,
"loss": 0.3289,
"step": 744
},
{
"epoch": 1.06,
"learning_rate": 3.2618025751072964e-05,
"loss": 0.3509,
"step": 745
},
{
"epoch": 1.06,
"learning_rate": 3.259418216499762e-05,
"loss": 0.3354,
"step": 746
},
{
"epoch": 1.06,
"learning_rate": 3.257033857892227e-05,
"loss": 0.3585,
"step": 747
},
{
"epoch": 1.06,
"learning_rate": 3.2546494992846926e-05,
"loss": 0.3285,
"step": 748
},
{
"epoch": 1.06,
"learning_rate": 3.252265140677158e-05,
"loss": 0.3214,
"step": 749
},
{
"epoch": 1.06,
"learning_rate": 3.2498807820696234e-05,
"loss": 0.3118,
"step": 750
},
{
"epoch": 1.07,
"learning_rate": 3.247496423462089e-05,
"loss": 0.3376,
"step": 751
},
{
"epoch": 1.07,
"learning_rate": 3.245112064854554e-05,
"loss": 0.3374,
"step": 752
},
{
"epoch": 1.07,
"learning_rate": 3.2427277062470195e-05,
"loss": 0.3516,
"step": 753
},
{
"epoch": 1.07,
"learning_rate": 3.240343347639485e-05,
"loss": 0.32,
"step": 754
},
{
"epoch": 1.07,
"learning_rate": 3.23795898903195e-05,
"loss": 0.3561,
"step": 755
},
{
"epoch": 1.07,
"learning_rate": 3.2355746304244164e-05,
"loss": 0.3214,
"step": 756
},
{
"epoch": 1.07,
"learning_rate": 3.233190271816882e-05,
"loss": 0.324,
"step": 757
},
{
"epoch": 1.08,
"learning_rate": 3.230805913209347e-05,
"loss": 0.3404,
"step": 758
},
{
"epoch": 1.08,
"learning_rate": 3.228421554601812e-05,
"loss": 0.3401,
"step": 759
},
{
"epoch": 1.08,
"learning_rate": 3.226037195994278e-05,
"loss": 0.3245,
"step": 760
},
{
"epoch": 1.08,
"learning_rate": 3.223652837386743e-05,
"loss": 0.3295,
"step": 761
},
{
"epoch": 1.08,
"learning_rate": 3.221268478779209e-05,
"loss": 0.3389,
"step": 762
},
{
"epoch": 1.08,
"learning_rate": 3.218884120171674e-05,
"loss": 0.3266,
"step": 763
},
{
"epoch": 1.08,
"learning_rate": 3.2164997615641394e-05,
"loss": 0.3397,
"step": 764
},
{
"epoch": 1.09,
"learning_rate": 3.214115402956605e-05,
"loss": 0.3317,
"step": 765
},
{
"epoch": 1.09,
"learning_rate": 3.21173104434907e-05,
"loss": 0.3273,
"step": 766
},
{
"epoch": 1.09,
"learning_rate": 3.2093466857415356e-05,
"loss": 0.3201,
"step": 767
},
{
"epoch": 1.09,
"learning_rate": 3.206962327134001e-05,
"loss": 0.3455,
"step": 768
},
{
"epoch": 1.09,
"learning_rate": 3.2045779685264664e-05,
"loss": 0.3451,
"step": 769
},
{
"epoch": 1.09,
"learning_rate": 3.202193609918932e-05,
"loss": 0.3208,
"step": 770
},
{
"epoch": 1.09,
"learning_rate": 3.199809251311398e-05,
"loss": 0.3333,
"step": 771
},
{
"epoch": 1.1,
"learning_rate": 3.197424892703863e-05,
"loss": 0.3426,
"step": 772
},
{
"epoch": 1.1,
"learning_rate": 3.195040534096328e-05,
"loss": 0.3259,
"step": 773
},
{
"epoch": 1.1,
"learning_rate": 3.192656175488793e-05,
"loss": 0.3212,
"step": 774
},
{
"epoch": 1.1,
"learning_rate": 3.190271816881259e-05,
"loss": 0.325,
"step": 775
},
{
"epoch": 1.1,
"learning_rate": 3.187887458273725e-05,
"loss": 0.3264,
"step": 776
},
{
"epoch": 1.1,
"learning_rate": 3.18550309966619e-05,
"loss": 0.3415,
"step": 777
},
{
"epoch": 1.1,
"learning_rate": 3.1831187410586555e-05,
"loss": 0.3419,
"step": 778
},
{
"epoch": 1.11,
"learning_rate": 3.180734382451121e-05,
"loss": 0.3361,
"step": 779
},
{
"epoch": 1.11,
"learning_rate": 3.178350023843586e-05,
"loss": 0.3057,
"step": 780
},
{
"epoch": 1.11,
"learning_rate": 3.175965665236052e-05,
"loss": 0.3441,
"step": 781
},
{
"epoch": 1.11,
"learning_rate": 3.173581306628517e-05,
"loss": 0.3435,
"step": 782
},
{
"epoch": 1.11,
"learning_rate": 3.1711969480209824e-05,
"loss": 0.3327,
"step": 783
},
{
"epoch": 1.11,
"learning_rate": 3.168812589413448e-05,
"loss": 0.337,
"step": 784
},
{
"epoch": 1.11,
"learning_rate": 3.166428230805913e-05,
"loss": 0.3496,
"step": 785
},
{
"epoch": 1.12,
"learning_rate": 3.1640438721983786e-05,
"loss": 0.3332,
"step": 786
},
{
"epoch": 1.12,
"learning_rate": 3.161659513590845e-05,
"loss": 0.3418,
"step": 787
},
{
"epoch": 1.12,
"learning_rate": 3.1592751549833094e-05,
"loss": 0.3449,
"step": 788
},
{
"epoch": 1.12,
"learning_rate": 3.156890796375775e-05,
"loss": 0.3332,
"step": 789
},
{
"epoch": 1.12,
"learning_rate": 3.15450643776824e-05,
"loss": 0.3203,
"step": 790
},
{
"epoch": 1.12,
"learning_rate": 3.152122079160706e-05,
"loss": 0.3376,
"step": 791
},
{
"epoch": 1.12,
"learning_rate": 3.1497377205531716e-05,
"loss": 0.3351,
"step": 792
},
{
"epoch": 1.12,
"learning_rate": 3.147353361945637e-05,
"loss": 0.3299,
"step": 793
},
{
"epoch": 1.13,
"learning_rate": 3.1449690033381024e-05,
"loss": 0.3336,
"step": 794
},
{
"epoch": 1.13,
"learning_rate": 3.142584644730567e-05,
"loss": 0.3326,
"step": 795
},
{
"epoch": 1.13,
"learning_rate": 3.140200286123033e-05,
"loss": 0.3328,
"step": 796
},
{
"epoch": 1.13,
"learning_rate": 3.1378159275154985e-05,
"loss": 0.3489,
"step": 797
},
{
"epoch": 1.13,
"learning_rate": 3.135431568907964e-05,
"loss": 0.3355,
"step": 798
},
{
"epoch": 1.13,
"learning_rate": 3.133047210300429e-05,
"loss": 0.3295,
"step": 799
},
{
"epoch": 1.13,
"learning_rate": 3.130662851692895e-05,
"loss": 0.3255,
"step": 800
},
{
"epoch": 1.14,
"learning_rate": 3.12827849308536e-05,
"loss": 0.3339,
"step": 801
},
{
"epoch": 1.14,
"learning_rate": 3.125894134477826e-05,
"loss": 0.3393,
"step": 802
},
{
"epoch": 1.14,
"learning_rate": 3.123509775870291e-05,
"loss": 0.3327,
"step": 803
},
{
"epoch": 1.14,
"learning_rate": 3.121125417262756e-05,
"loss": 0.3392,
"step": 804
},
{
"epoch": 1.14,
"learning_rate": 3.1187410586552216e-05,
"loss": 0.3354,
"step": 805
},
{
"epoch": 1.14,
"learning_rate": 3.116356700047687e-05,
"loss": 0.3384,
"step": 806
},
{
"epoch": 1.14,
"learning_rate": 3.113972341440153e-05,
"loss": 0.3148,
"step": 807
},
{
"epoch": 1.15,
"learning_rate": 3.1115879828326185e-05,
"loss": 0.3255,
"step": 808
},
{
"epoch": 1.15,
"learning_rate": 3.109203624225084e-05,
"loss": 0.3383,
"step": 809
},
{
"epoch": 1.15,
"learning_rate": 3.1068192656175485e-05,
"loss": 0.3325,
"step": 810
},
{
"epoch": 1.15,
"learning_rate": 3.104434907010014e-05,
"loss": 0.3243,
"step": 811
},
{
"epoch": 1.15,
"learning_rate": 3.10205054840248e-05,
"loss": 0.3429,
"step": 812
},
{
"epoch": 1.15,
"learning_rate": 3.0996661897949454e-05,
"loss": 0.3228,
"step": 813
},
{
"epoch": 1.15,
"learning_rate": 3.097281831187411e-05,
"loss": 0.3188,
"step": 814
},
{
"epoch": 1.16,
"learning_rate": 3.094897472579876e-05,
"loss": 0.3482,
"step": 815
},
{
"epoch": 1.16,
"learning_rate": 3.0925131139723415e-05,
"loss": 0.3271,
"step": 816
},
{
"epoch": 1.16,
"learning_rate": 3.0901287553648076e-05,
"loss": 0.3171,
"step": 817
},
{
"epoch": 1.16,
"learning_rate": 3.087744396757272e-05,
"loss": 0.3053,
"step": 818
},
{
"epoch": 1.16,
"learning_rate": 3.085360038149738e-05,
"loss": 0.3311,
"step": 819
},
{
"epoch": 1.16,
"learning_rate": 3.082975679542203e-05,
"loss": 0.321,
"step": 820
},
{
"epoch": 1.16,
"learning_rate": 3.0805913209346685e-05,
"loss": 0.3423,
"step": 821
},
{
"epoch": 1.17,
"learning_rate": 3.0782069623271345e-05,
"loss": 0.3294,
"step": 822
},
{
"epoch": 1.17,
"learning_rate": 3.0758226037196e-05,
"loss": 0.3457,
"step": 823
},
{
"epoch": 1.17,
"learning_rate": 3.073438245112065e-05,
"loss": 0.3488,
"step": 824
},
{
"epoch": 1.17,
"learning_rate": 3.07105388650453e-05,
"loss": 0.3318,
"step": 825
},
{
"epoch": 1.17,
"learning_rate": 3.0686695278969954e-05,
"loss": 0.3256,
"step": 826
},
{
"epoch": 1.17,
"learning_rate": 3.0662851692894615e-05,
"loss": 0.3245,
"step": 827
},
{
"epoch": 1.17,
"learning_rate": 3.063900810681927e-05,
"loss": 0.3216,
"step": 828
},
{
"epoch": 1.18,
"learning_rate": 3.061516452074392e-05,
"loss": 0.3213,
"step": 829
},
{
"epoch": 1.18,
"learning_rate": 3.0591320934668576e-05,
"loss": 0.3434,
"step": 830
},
{
"epoch": 1.18,
"learning_rate": 3.056747734859323e-05,
"loss": 0.3206,
"step": 831
},
{
"epoch": 1.18,
"learning_rate": 3.0543633762517884e-05,
"loss": 0.3282,
"step": 832
},
{
"epoch": 1.18,
"learning_rate": 3.051979017644254e-05,
"loss": 0.3243,
"step": 833
},
{
"epoch": 1.18,
"learning_rate": 3.049594659036719e-05,
"loss": 0.3235,
"step": 834
},
{
"epoch": 1.18,
"learning_rate": 3.0472103004291846e-05,
"loss": 0.3278,
"step": 835
},
{
"epoch": 1.19,
"learning_rate": 3.0448259418216503e-05,
"loss": 0.3517,
"step": 836
},
{
"epoch": 1.19,
"learning_rate": 3.0424415832141157e-05,
"loss": 0.3275,
"step": 837
},
{
"epoch": 1.19,
"learning_rate": 3.040057224606581e-05,
"loss": 0.3415,
"step": 838
},
{
"epoch": 1.19,
"learning_rate": 3.0376728659990468e-05,
"loss": 0.3302,
"step": 839
},
{
"epoch": 1.19,
"learning_rate": 3.0352885073915115e-05,
"loss": 0.3437,
"step": 840
},
{
"epoch": 1.19,
"learning_rate": 3.0329041487839772e-05,
"loss": 0.3239,
"step": 841
},
{
"epoch": 1.19,
"learning_rate": 3.0305197901764426e-05,
"loss": 0.3305,
"step": 842
},
{
"epoch": 1.2,
"learning_rate": 3.028135431568908e-05,
"loss": 0.3365,
"step": 843
},
{
"epoch": 1.2,
"learning_rate": 3.0257510729613737e-05,
"loss": 0.3207,
"step": 844
},
{
"epoch": 1.2,
"learning_rate": 3.023366714353839e-05,
"loss": 0.326,
"step": 845
},
{
"epoch": 1.2,
"learning_rate": 3.0209823557463045e-05,
"loss": 0.3359,
"step": 846
},
{
"epoch": 1.2,
"learning_rate": 3.0185979971387695e-05,
"loss": 0.3345,
"step": 847
},
{
"epoch": 1.2,
"learning_rate": 3.016213638531235e-05,
"loss": 0.3272,
"step": 848
},
{
"epoch": 1.2,
"learning_rate": 3.0138292799237006e-05,
"loss": 0.3499,
"step": 849
},
{
"epoch": 1.21,
"learning_rate": 3.011444921316166e-05,
"loss": 0.346,
"step": 850
},
{
"epoch": 1.21,
"learning_rate": 3.0090605627086314e-05,
"loss": 0.3472,
"step": 851
},
{
"epoch": 1.21,
"learning_rate": 3.006676204101097e-05,
"loss": 0.3433,
"step": 852
},
{
"epoch": 1.21,
"learning_rate": 3.0042918454935625e-05,
"loss": 0.3245,
"step": 853
},
{
"epoch": 1.21,
"learning_rate": 3.0019074868860282e-05,
"loss": 0.3329,
"step": 854
},
{
"epoch": 1.21,
"learning_rate": 2.999523128278493e-05,
"loss": 0.3408,
"step": 855
},
{
"epoch": 1.21,
"learning_rate": 2.9971387696709587e-05,
"loss": 0.3372,
"step": 856
},
{
"epoch": 1.22,
"learning_rate": 2.994754411063424e-05,
"loss": 0.3491,
"step": 857
},
{
"epoch": 1.22,
"learning_rate": 2.9923700524558894e-05,
"loss": 0.3468,
"step": 858
},
{
"epoch": 1.22,
"learning_rate": 2.9899856938483552e-05,
"loss": 0.3194,
"step": 859
},
{
"epoch": 1.22,
"learning_rate": 2.9876013352408206e-05,
"loss": 0.3475,
"step": 860
},
{
"epoch": 1.22,
"learning_rate": 2.985216976633286e-05,
"loss": 0.3018,
"step": 861
},
{
"epoch": 1.22,
"learning_rate": 2.982832618025751e-05,
"loss": 0.3045,
"step": 862
},
{
"epoch": 1.22,
"learning_rate": 2.9804482594182164e-05,
"loss": 0.3314,
"step": 863
},
{
"epoch": 1.23,
"learning_rate": 2.978063900810682e-05,
"loss": 0.3267,
"step": 864
},
{
"epoch": 1.23,
"learning_rate": 2.9756795422031475e-05,
"loss": 0.3399,
"step": 865
},
{
"epoch": 1.23,
"learning_rate": 2.973295183595613e-05,
"loss": 0.3405,
"step": 866
},
{
"epoch": 1.23,
"learning_rate": 2.9709108249880786e-05,
"loss": 0.3425,
"step": 867
},
{
"epoch": 1.23,
"learning_rate": 2.968526466380544e-05,
"loss": 0.3583,
"step": 868
},
{
"epoch": 1.23,
"learning_rate": 2.966142107773009e-05,
"loss": 0.3213,
"step": 869
},
{
"epoch": 1.23,
"learning_rate": 2.9637577491654744e-05,
"loss": 0.3308,
"step": 870
},
{
"epoch": 1.24,
"learning_rate": 2.9613733905579398e-05,
"loss": 0.3254,
"step": 871
},
{
"epoch": 1.24,
"learning_rate": 2.9589890319504055e-05,
"loss": 0.3426,
"step": 872
},
{
"epoch": 1.24,
"learning_rate": 2.956604673342871e-05,
"loss": 0.3247,
"step": 873
},
{
"epoch": 1.24,
"learning_rate": 2.9542203147353363e-05,
"loss": 0.3174,
"step": 874
},
{
"epoch": 1.24,
"learning_rate": 2.951835956127802e-05,
"loss": 0.3202,
"step": 875
},
{
"epoch": 1.24,
"learning_rate": 2.9494515975202674e-05,
"loss": 0.3477,
"step": 876
},
{
"epoch": 1.24,
"learning_rate": 2.9470672389127325e-05,
"loss": 0.3251,
"step": 877
},
{
"epoch": 1.25,
"learning_rate": 2.944682880305198e-05,
"loss": 0.3323,
"step": 878
},
{
"epoch": 1.25,
"learning_rate": 2.9422985216976636e-05,
"loss": 0.3147,
"step": 879
},
{
"epoch": 1.25,
"learning_rate": 2.939914163090129e-05,
"loss": 0.3436,
"step": 880
},
{
"epoch": 1.25,
"learning_rate": 2.9375298044825943e-05,
"loss": 0.333,
"step": 881
},
{
"epoch": 1.25,
"learning_rate": 2.93514544587506e-05,
"loss": 0.3439,
"step": 882
},
{
"epoch": 1.25,
"learning_rate": 2.9327610872675255e-05,
"loss": 0.3309,
"step": 883
},
{
"epoch": 1.25,
"learning_rate": 2.9303767286599905e-05,
"loss": 0.3178,
"step": 884
},
{
"epoch": 1.26,
"learning_rate": 2.927992370052456e-05,
"loss": 0.3271,
"step": 885
},
{
"epoch": 1.26,
"learning_rate": 2.9256080114449213e-05,
"loss": 0.3431,
"step": 886
},
{
"epoch": 1.26,
"learning_rate": 2.923223652837387e-05,
"loss": 0.3061,
"step": 887
},
{
"epoch": 1.26,
"learning_rate": 2.9208392942298524e-05,
"loss": 0.3181,
"step": 888
},
{
"epoch": 1.26,
"learning_rate": 2.9184549356223178e-05,
"loss": 0.3334,
"step": 889
},
{
"epoch": 1.26,
"learning_rate": 2.9160705770147835e-05,
"loss": 0.3182,
"step": 890
},
{
"epoch": 1.26,
"learning_rate": 2.9136862184072482e-05,
"loss": 0.313,
"step": 891
},
{
"epoch": 1.27,
"learning_rate": 2.911301859799714e-05,
"loss": 0.3148,
"step": 892
},
{
"epoch": 1.27,
"learning_rate": 2.9089175011921793e-05,
"loss": 0.3602,
"step": 893
},
{
"epoch": 1.27,
"learning_rate": 2.9065331425846447e-05,
"loss": 0.3373,
"step": 894
},
{
"epoch": 1.27,
"learning_rate": 2.9041487839771104e-05,
"loss": 0.341,
"step": 895
},
{
"epoch": 1.27,
"learning_rate": 2.9017644253695758e-05,
"loss": 0.3297,
"step": 896
},
{
"epoch": 1.27,
"learning_rate": 2.8993800667620412e-05,
"loss": 0.3237,
"step": 897
},
{
"epoch": 1.27,
"learning_rate": 2.896995708154507e-05,
"loss": 0.3421,
"step": 898
},
{
"epoch": 1.28,
"learning_rate": 2.8946113495469716e-05,
"loss": 0.3311,
"step": 899
},
{
"epoch": 1.28,
"learning_rate": 2.8922269909394374e-05,
"loss": 0.3391,
"step": 900
},
{
"epoch": 1.28,
"learning_rate": 2.8898426323319027e-05,
"loss": 0.3253,
"step": 901
},
{
"epoch": 1.28,
"learning_rate": 2.887458273724368e-05,
"loss": 0.3288,
"step": 902
},
{
"epoch": 1.28,
"learning_rate": 2.885073915116834e-05,
"loss": 0.3208,
"step": 903
},
{
"epoch": 1.28,
"learning_rate": 2.8826895565092992e-05,
"loss": 0.3174,
"step": 904
},
{
"epoch": 1.28,
"learning_rate": 2.8803051979017646e-05,
"loss": 0.3241,
"step": 905
},
{
"epoch": 1.29,
"learning_rate": 2.8779208392942297e-05,
"loss": 0.331,
"step": 906
},
{
"epoch": 1.29,
"learning_rate": 2.8755364806866954e-05,
"loss": 0.3445,
"step": 907
},
{
"epoch": 1.29,
"learning_rate": 2.8731521220791608e-05,
"loss": 0.3218,
"step": 908
},
{
"epoch": 1.29,
"learning_rate": 2.870767763471626e-05,
"loss": 0.3214,
"step": 909
},
{
"epoch": 1.29,
"learning_rate": 2.868383404864092e-05,
"loss": 0.3412,
"step": 910
},
{
"epoch": 1.29,
"learning_rate": 2.8659990462565573e-05,
"loss": 0.326,
"step": 911
},
{
"epoch": 1.29,
"learning_rate": 2.8636146876490227e-05,
"loss": 0.3356,
"step": 912
},
{
"epoch": 1.3,
"learning_rate": 2.8612303290414884e-05,
"loss": 0.3362,
"step": 913
},
{
"epoch": 1.3,
"learning_rate": 2.858845970433953e-05,
"loss": 0.3133,
"step": 914
},
{
"epoch": 1.3,
"learning_rate": 2.8564616118264188e-05,
"loss": 0.3065,
"step": 915
},
{
"epoch": 1.3,
"learning_rate": 2.8540772532188842e-05,
"loss": 0.3335,
"step": 916
},
{
"epoch": 1.3,
"learning_rate": 2.8516928946113496e-05,
"loss": 0.3214,
"step": 917
},
{
"epoch": 1.3,
"learning_rate": 2.8493085360038153e-05,
"loss": 0.3289,
"step": 918
},
{
"epoch": 1.3,
"learning_rate": 2.8469241773962807e-05,
"loss": 0.3107,
"step": 919
},
{
"epoch": 1.31,
"learning_rate": 2.844539818788746e-05,
"loss": 0.3374,
"step": 920
},
{
"epoch": 1.31,
"learning_rate": 2.842155460181211e-05,
"loss": 0.3378,
"step": 921
},
{
"epoch": 1.31,
"learning_rate": 2.8397711015736765e-05,
"loss": 0.3316,
"step": 922
},
{
"epoch": 1.31,
"learning_rate": 2.8373867429661422e-05,
"loss": 0.3385,
"step": 923
},
{
"epoch": 1.31,
"learning_rate": 2.8350023843586076e-05,
"loss": 0.3195,
"step": 924
},
{
"epoch": 1.31,
"learning_rate": 2.832618025751073e-05,
"loss": 0.3179,
"step": 925
},
{
"epoch": 1.31,
"learning_rate": 2.8302336671435387e-05,
"loss": 0.3282,
"step": 926
},
{
"epoch": 1.32,
"learning_rate": 2.827849308536004e-05,
"loss": 0.3313,
"step": 927
},
{
"epoch": 1.32,
"learning_rate": 2.8254649499284692e-05,
"loss": 0.3426,
"step": 928
},
{
"epoch": 1.32,
"learning_rate": 2.8230805913209346e-05,
"loss": 0.3494,
"step": 929
},
{
"epoch": 1.32,
"learning_rate": 2.8206962327134e-05,
"loss": 0.3318,
"step": 930
},
{
"epoch": 1.32,
"learning_rate": 2.8183118741058657e-05,
"loss": 0.3338,
"step": 931
},
{
"epoch": 1.32,
"learning_rate": 2.815927515498331e-05,
"loss": 0.3273,
"step": 932
},
{
"epoch": 1.32,
"learning_rate": 2.8135431568907968e-05,
"loss": 0.3254,
"step": 933
},
{
"epoch": 1.32,
"learning_rate": 2.811158798283262e-05,
"loss": 0.3351,
"step": 934
},
{
"epoch": 1.33,
"learning_rate": 2.8087744396757276e-05,
"loss": 0.3485,
"step": 935
},
{
"epoch": 1.33,
"learning_rate": 2.8063900810681926e-05,
"loss": 0.3423,
"step": 936
},
{
"epoch": 1.33,
"learning_rate": 2.804005722460658e-05,
"loss": 0.3346,
"step": 937
},
{
"epoch": 1.33,
"learning_rate": 2.8016213638531237e-05,
"loss": 0.3222,
"step": 938
},
{
"epoch": 1.33,
"learning_rate": 2.799237005245589e-05,
"loss": 0.3199,
"step": 939
},
{
"epoch": 1.33,
"learning_rate": 2.7968526466380545e-05,
"loss": 0.34,
"step": 940
},
{
"epoch": 1.33,
"learning_rate": 2.7944682880305202e-05,
"loss": 0.3185,
"step": 941
},
{
"epoch": 1.34,
"learning_rate": 2.7920839294229856e-05,
"loss": 0.3298,
"step": 942
},
{
"epoch": 1.34,
"learning_rate": 2.7896995708154506e-05,
"loss": 0.329,
"step": 943
},
{
"epoch": 1.34,
"learning_rate": 2.787315212207916e-05,
"loss": 0.3455,
"step": 944
},
{
"epoch": 1.34,
"learning_rate": 2.7849308536003814e-05,
"loss": 0.3465,
"step": 945
},
{
"epoch": 1.34,
"learning_rate": 2.782546494992847e-05,
"loss": 0.3353,
"step": 946
},
{
"epoch": 1.34,
"learning_rate": 2.7801621363853125e-05,
"loss": 0.3281,
"step": 947
},
{
"epoch": 1.34,
"learning_rate": 2.777777777777778e-05,
"loss": 0.355,
"step": 948
},
{
"epoch": 1.35,
"learning_rate": 2.7753934191702436e-05,
"loss": 0.3338,
"step": 949
},
{
"epoch": 1.35,
"learning_rate": 2.773009060562709e-05,
"loss": 0.3251,
"step": 950
},
{
"epoch": 1.35,
"learning_rate": 2.770624701955174e-05,
"loss": 0.3341,
"step": 951
},
{
"epoch": 1.35,
"learning_rate": 2.7682403433476395e-05,
"loss": 0.3235,
"step": 952
},
{
"epoch": 1.35,
"learning_rate": 2.765855984740105e-05,
"loss": 0.3461,
"step": 953
},
{
"epoch": 1.35,
"learning_rate": 2.7634716261325706e-05,
"loss": 0.3278,
"step": 954
},
{
"epoch": 1.35,
"learning_rate": 2.761087267525036e-05,
"loss": 0.3223,
"step": 955
},
{
"epoch": 1.36,
"learning_rate": 2.7587029089175013e-05,
"loss": 0.3341,
"step": 956
},
{
"epoch": 1.36,
"learning_rate": 2.756318550309967e-05,
"loss": 0.3214,
"step": 957
},
{
"epoch": 1.36,
"learning_rate": 2.753934191702432e-05,
"loss": 0.3337,
"step": 958
},
{
"epoch": 1.36,
"learning_rate": 2.7515498330948975e-05,
"loss": 0.3144,
"step": 959
},
{
"epoch": 1.36,
"learning_rate": 2.749165474487363e-05,
"loss": 0.3511,
"step": 960
},
{
"epoch": 1.36,
"learning_rate": 2.7467811158798286e-05,
"loss": 0.3409,
"step": 961
},
{
"epoch": 1.36,
"learning_rate": 2.744396757272294e-05,
"loss": 0.3199,
"step": 962
},
{
"epoch": 1.37,
"learning_rate": 2.7420123986647594e-05,
"loss": 0.3168,
"step": 963
},
{
"epoch": 1.37,
"learning_rate": 2.739628040057225e-05,
"loss": 0.3419,
"step": 964
},
{
"epoch": 1.37,
"learning_rate": 2.7372436814496898e-05,
"loss": 0.3373,
"step": 965
},
{
"epoch": 1.37,
"learning_rate": 2.7348593228421555e-05,
"loss": 0.339,
"step": 966
},
{
"epoch": 1.37,
"learning_rate": 2.732474964234621e-05,
"loss": 0.3404,
"step": 967
},
{
"epoch": 1.37,
"learning_rate": 2.7300906056270863e-05,
"loss": 0.3253,
"step": 968
},
{
"epoch": 1.37,
"learning_rate": 2.727706247019552e-05,
"loss": 0.3304,
"step": 969
},
{
"epoch": 1.38,
"learning_rate": 2.7253218884120174e-05,
"loss": 0.3286,
"step": 970
},
{
"epoch": 1.38,
"learning_rate": 2.7229375298044828e-05,
"loss": 0.3315,
"step": 971
},
{
"epoch": 1.38,
"learning_rate": 2.7205531711969485e-05,
"loss": 0.3342,
"step": 972
},
{
"epoch": 1.38,
"learning_rate": 2.7181688125894132e-05,
"loss": 0.3282,
"step": 973
},
{
"epoch": 1.38,
"learning_rate": 2.715784453981879e-05,
"loss": 0.3395,
"step": 974
},
{
"epoch": 1.38,
"learning_rate": 2.7134000953743443e-05,
"loss": 0.3312,
"step": 975
},
{
"epoch": 1.38,
"learning_rate": 2.7110157367668097e-05,
"loss": 0.3273,
"step": 976
},
{
"epoch": 1.39,
"learning_rate": 2.7086313781592755e-05,
"loss": 0.337,
"step": 977
},
{
"epoch": 1.39,
"learning_rate": 2.706247019551741e-05,
"loss": 0.3231,
"step": 978
},
{
"epoch": 1.39,
"learning_rate": 2.7038626609442062e-05,
"loss": 0.3303,
"step": 979
},
{
"epoch": 1.39,
"learning_rate": 2.7014783023366713e-05,
"loss": 0.3217,
"step": 980
},
{
"epoch": 1.39,
"learning_rate": 2.6990939437291367e-05,
"loss": 0.3315,
"step": 981
},
{
"epoch": 1.39,
"learning_rate": 2.6967095851216024e-05,
"loss": 0.3298,
"step": 982
},
{
"epoch": 1.39,
"learning_rate": 2.6943252265140678e-05,
"loss": 0.3261,
"step": 983
},
{
"epoch": 1.4,
"learning_rate": 2.691940867906533e-05,
"loss": 0.3412,
"step": 984
},
{
"epoch": 1.4,
"learning_rate": 2.689556509298999e-05,
"loss": 0.3264,
"step": 985
},
{
"epoch": 1.4,
"learning_rate": 2.6871721506914643e-05,
"loss": 0.3297,
"step": 986
},
{
"epoch": 1.4,
"learning_rate": 2.6847877920839293e-05,
"loss": 0.3248,
"step": 987
},
{
"epoch": 1.4,
"learning_rate": 2.6824034334763947e-05,
"loss": 0.3317,
"step": 988
},
{
"epoch": 1.4,
"learning_rate": 2.6800190748688604e-05,
"loss": 0.3265,
"step": 989
},
{
"epoch": 1.4,
"learning_rate": 2.6776347162613258e-05,
"loss": 0.3348,
"step": 990
},
{
"epoch": 1.41,
"learning_rate": 2.6752503576537912e-05,
"loss": 0.3242,
"step": 991
},
{
"epoch": 1.41,
"learning_rate": 2.672865999046257e-05,
"loss": 0.3295,
"step": 992
},
{
"epoch": 1.41,
"learning_rate": 2.6704816404387223e-05,
"loss": 0.3245,
"step": 993
},
{
"epoch": 1.41,
"learning_rate": 2.6680972818311877e-05,
"loss": 0.3205,
"step": 994
},
{
"epoch": 1.41,
"learning_rate": 2.6657129232236527e-05,
"loss": 0.3302,
"step": 995
},
{
"epoch": 1.41,
"learning_rate": 2.663328564616118e-05,
"loss": 0.335,
"step": 996
},
{
"epoch": 1.41,
"learning_rate": 2.660944206008584e-05,
"loss": 0.3239,
"step": 997
},
{
"epoch": 1.42,
"learning_rate": 2.6585598474010492e-05,
"loss": 0.3368,
"step": 998
},
{
"epoch": 1.42,
"learning_rate": 2.6561754887935146e-05,
"loss": 0.3361,
"step": 999
},
{
"epoch": 1.42,
"learning_rate": 2.6537911301859804e-05,
"loss": 0.3478,
"step": 1000
},
{
"epoch": 1.42,
"learning_rate": 2.6514067715784457e-05,
"loss": 0.332,
"step": 1001
},
{
"epoch": 1.42,
"learning_rate": 2.6490224129709108e-05,
"loss": 0.341,
"step": 1002
},
{
"epoch": 1.42,
"learning_rate": 2.6466380543633762e-05,
"loss": 0.3396,
"step": 1003
},
{
"epoch": 1.43,
"learning_rate": 2.6442536957558416e-05,
"loss": 0.3245,
"step": 1004
},
{
"epoch": 1.43,
"learning_rate": 2.6418693371483073e-05,
"loss": 0.3337,
"step": 1005
},
{
"epoch": 1.43,
"learning_rate": 2.6394849785407727e-05,
"loss": 0.3364,
"step": 1006
},
{
"epoch": 1.43,
"learning_rate": 2.637100619933238e-05,
"loss": 0.3276,
"step": 1007
},
{
"epoch": 1.43,
"learning_rate": 2.6347162613257038e-05,
"loss": 0.329,
"step": 1008
},
{
"epoch": 1.43,
"learning_rate": 2.632331902718169e-05,
"loss": 0.331,
"step": 1009
},
{
"epoch": 1.43,
"learning_rate": 2.6299475441106342e-05,
"loss": 0.3312,
"step": 1010
},
{
"epoch": 1.44,
"learning_rate": 2.6275631855030996e-05,
"loss": 0.3219,
"step": 1011
},
{
"epoch": 1.44,
"learning_rate": 2.6251788268955653e-05,
"loss": 0.3512,
"step": 1012
},
{
"epoch": 1.44,
"learning_rate": 2.6227944682880307e-05,
"loss": 0.3252,
"step": 1013
},
{
"epoch": 1.44,
"learning_rate": 2.620410109680496e-05,
"loss": 0.3286,
"step": 1014
},
{
"epoch": 1.44,
"learning_rate": 2.6180257510729618e-05,
"loss": 0.3414,
"step": 1015
},
{
"epoch": 1.44,
"learning_rate": 2.6156413924654272e-05,
"loss": 0.337,
"step": 1016
},
{
"epoch": 1.44,
"learning_rate": 2.6132570338578923e-05,
"loss": 0.3302,
"step": 1017
},
{
"epoch": 1.45,
"learning_rate": 2.6108726752503576e-05,
"loss": 0.358,
"step": 1018
},
{
"epoch": 1.45,
"learning_rate": 2.608488316642823e-05,
"loss": 0.3227,
"step": 1019
},
{
"epoch": 1.45,
"learning_rate": 2.6061039580352888e-05,
"loss": 0.3269,
"step": 1020
},
{
"epoch": 1.45,
"learning_rate": 2.603719599427754e-05,
"loss": 0.3181,
"step": 1021
},
{
"epoch": 1.45,
"learning_rate": 2.6013352408202195e-05,
"loss": 0.3403,
"step": 1022
},
{
"epoch": 1.45,
"learning_rate": 2.5989508822126852e-05,
"loss": 0.3401,
"step": 1023
},
{
"epoch": 1.45,
"learning_rate": 2.59656652360515e-05,
"loss": 0.3257,
"step": 1024
},
{
"epoch": 1.46,
"learning_rate": 2.5941821649976157e-05,
"loss": 0.3477,
"step": 1025
},
{
"epoch": 1.46,
"learning_rate": 2.591797806390081e-05,
"loss": 0.3438,
"step": 1026
},
{
"epoch": 1.46,
"learning_rate": 2.5894134477825465e-05,
"loss": 0.3354,
"step": 1027
},
{
"epoch": 1.46,
"learning_rate": 2.5870290891750122e-05,
"loss": 0.3313,
"step": 1028
},
{
"epoch": 1.46,
"learning_rate": 2.5846447305674776e-05,
"loss": 0.3156,
"step": 1029
},
{
"epoch": 1.46,
"learning_rate": 2.582260371959943e-05,
"loss": 0.3185,
"step": 1030
},
{
"epoch": 1.46,
"learning_rate": 2.5798760133524087e-05,
"loss": 0.3382,
"step": 1031
},
{
"epoch": 1.47,
"learning_rate": 2.5774916547448734e-05,
"loss": 0.328,
"step": 1032
},
{
"epoch": 1.47,
"learning_rate": 2.575107296137339e-05,
"loss": 0.3292,
"step": 1033
},
{
"epoch": 1.47,
"learning_rate": 2.5727229375298045e-05,
"loss": 0.3367,
"step": 1034
},
{
"epoch": 1.47,
"learning_rate": 2.57033857892227e-05,
"loss": 0.3404,
"step": 1035
},
{
"epoch": 1.47,
"learning_rate": 2.5679542203147356e-05,
"loss": 0.3464,
"step": 1036
},
{
"epoch": 1.47,
"learning_rate": 2.565569861707201e-05,
"loss": 0.3319,
"step": 1037
},
{
"epoch": 1.47,
"learning_rate": 2.5631855030996664e-05,
"loss": 0.3353,
"step": 1038
},
{
"epoch": 1.48,
"learning_rate": 2.5608011444921314e-05,
"loss": 0.3259,
"step": 1039
},
{
"epoch": 1.48,
"learning_rate": 2.558416785884597e-05,
"loss": 0.3292,
"step": 1040
},
{
"epoch": 1.48,
"learning_rate": 2.5560324272770625e-05,
"loss": 0.327,
"step": 1041
},
{
"epoch": 1.48,
"learning_rate": 2.553648068669528e-05,
"loss": 0.3427,
"step": 1042
},
{
"epoch": 1.48,
"learning_rate": 2.5512637100619936e-05,
"loss": 0.3306,
"step": 1043
},
{
"epoch": 1.48,
"learning_rate": 2.548879351454459e-05,
"loss": 0.3312,
"step": 1044
},
{
"epoch": 1.48,
"learning_rate": 2.5464949928469244e-05,
"loss": 0.3477,
"step": 1045
},
{
"epoch": 1.49,
"learning_rate": 2.54411063423939e-05,
"loss": 0.3461,
"step": 1046
},
{
"epoch": 1.49,
"learning_rate": 2.541726275631855e-05,
"loss": 0.315,
"step": 1047
},
{
"epoch": 1.49,
"learning_rate": 2.5393419170243206e-05,
"loss": 0.3443,
"step": 1048
},
{
"epoch": 1.49,
"learning_rate": 2.536957558416786e-05,
"loss": 0.3394,
"step": 1049
},
{
"epoch": 1.49,
"learning_rate": 2.5345731998092513e-05,
"loss": 0.3188,
"step": 1050
},
{
"epoch": 1.49,
"learning_rate": 2.532188841201717e-05,
"loss": 0.3428,
"step": 1051
},
{
"epoch": 1.49,
"learning_rate": 2.5298044825941825e-05,
"loss": 0.3371,
"step": 1052
},
{
"epoch": 1.5,
"learning_rate": 2.527420123986648e-05,
"loss": 0.3338,
"step": 1053
},
{
"epoch": 1.5,
"learning_rate": 2.525035765379113e-05,
"loss": 0.3283,
"step": 1054
},
{
"epoch": 1.5,
"learning_rate": 2.5226514067715783e-05,
"loss": 0.308,
"step": 1055
},
{
"epoch": 1.5,
"learning_rate": 2.520267048164044e-05,
"loss": 0.3226,
"step": 1056
},
{
"epoch": 1.5,
"learning_rate": 2.5178826895565094e-05,
"loss": 0.3231,
"step": 1057
},
{
"epoch": 1.5,
"learning_rate": 2.5154983309489748e-05,
"loss": 0.3474,
"step": 1058
},
{
"epoch": 1.5,
"learning_rate": 2.5131139723414405e-05,
"loss": 0.3378,
"step": 1059
},
{
"epoch": 1.51,
"learning_rate": 2.510729613733906e-05,
"loss": 0.3019,
"step": 1060
},
{
"epoch": 1.51,
"learning_rate": 2.508345255126371e-05,
"loss": 0.3466,
"step": 1061
},
{
"epoch": 1.51,
"learning_rate": 2.5059608965188363e-05,
"loss": 0.3294,
"step": 1062
},
{
"epoch": 1.51,
"learning_rate": 2.5035765379113017e-05,
"loss": 0.3413,
"step": 1063
},
{
"epoch": 1.51,
"learning_rate": 2.5011921793037674e-05,
"loss": 0.3287,
"step": 1064
},
{
"epoch": 1.51,
"learning_rate": 2.4988078206962328e-05,
"loss": 0.3124,
"step": 1065
},
{
"epoch": 1.51,
"learning_rate": 2.4964234620886985e-05,
"loss": 0.3072,
"step": 1066
},
{
"epoch": 1.51,
"learning_rate": 2.4940391034811636e-05,
"loss": 0.3226,
"step": 1067
},
{
"epoch": 1.52,
"learning_rate": 2.491654744873629e-05,
"loss": 0.3305,
"step": 1068
},
{
"epoch": 1.52,
"learning_rate": 2.4892703862660947e-05,
"loss": 0.3353,
"step": 1069
},
{
"epoch": 1.52,
"learning_rate": 2.48688602765856e-05,
"loss": 0.3312,
"step": 1070
},
{
"epoch": 1.52,
"learning_rate": 2.4845016690510255e-05,
"loss": 0.3482,
"step": 1071
},
{
"epoch": 1.52,
"learning_rate": 2.482117310443491e-05,
"loss": 0.3356,
"step": 1072
},
{
"epoch": 1.52,
"learning_rate": 2.4797329518359562e-05,
"loss": 0.3251,
"step": 1073
},
{
"epoch": 1.52,
"learning_rate": 2.4773485932284216e-05,
"loss": 0.3366,
"step": 1074
},
{
"epoch": 1.53,
"learning_rate": 2.474964234620887e-05,
"loss": 0.3152,
"step": 1075
},
{
"epoch": 1.53,
"learning_rate": 2.4725798760133524e-05,
"loss": 0.324,
"step": 1076
},
{
"epoch": 1.53,
"learning_rate": 2.470195517405818e-05,
"loss": 0.3322,
"step": 1077
},
{
"epoch": 1.53,
"learning_rate": 2.467811158798283e-05,
"loss": 0.3454,
"step": 1078
},
{
"epoch": 1.53,
"learning_rate": 2.465426800190749e-05,
"loss": 0.3111,
"step": 1079
},
{
"epoch": 1.53,
"learning_rate": 2.4630424415832143e-05,
"loss": 0.3353,
"step": 1080
},
{
"epoch": 1.53,
"learning_rate": 2.4606580829756797e-05,
"loss": 0.306,
"step": 1081
},
{
"epoch": 1.54,
"learning_rate": 2.458273724368145e-05,
"loss": 0.3248,
"step": 1082
},
{
"epoch": 1.54,
"learning_rate": 2.4558893657606104e-05,
"loss": 0.3397,
"step": 1083
},
{
"epoch": 1.54,
"learning_rate": 2.453505007153076e-05,
"loss": 0.3328,
"step": 1084
},
{
"epoch": 1.54,
"learning_rate": 2.4511206485455412e-05,
"loss": 0.352,
"step": 1085
},
{
"epoch": 1.54,
"learning_rate": 2.4487362899380066e-05,
"loss": 0.3361,
"step": 1086
},
{
"epoch": 1.54,
"learning_rate": 2.4463519313304723e-05,
"loss": 0.3175,
"step": 1087
},
{
"epoch": 1.54,
"learning_rate": 2.4439675727229377e-05,
"loss": 0.3379,
"step": 1088
},
{
"epoch": 1.55,
"learning_rate": 2.441583214115403e-05,
"loss": 0.3415,
"step": 1089
},
{
"epoch": 1.55,
"learning_rate": 2.4391988555078685e-05,
"loss": 0.342,
"step": 1090
},
{
"epoch": 1.55,
"learning_rate": 2.436814496900334e-05,
"loss": 0.347,
"step": 1091
},
{
"epoch": 1.55,
"learning_rate": 2.4344301382927996e-05,
"loss": 0.3505,
"step": 1092
},
{
"epoch": 1.55,
"learning_rate": 2.4320457796852646e-05,
"loss": 0.3274,
"step": 1093
},
{
"epoch": 1.55,
"learning_rate": 2.4296614210777304e-05,
"loss": 0.3285,
"step": 1094
},
{
"epoch": 1.55,
"learning_rate": 2.4272770624701957e-05,
"loss": 0.3325,
"step": 1095
},
{
"epoch": 1.56,
"learning_rate": 2.4248927038626608e-05,
"loss": 0.3354,
"step": 1096
},
{
"epoch": 1.56,
"learning_rate": 2.4225083452551265e-05,
"loss": 0.3401,
"step": 1097
},
{
"epoch": 1.56,
"learning_rate": 2.420123986647592e-05,
"loss": 0.3215,
"step": 1098
},
{
"epoch": 1.56,
"learning_rate": 2.4177396280400573e-05,
"loss": 0.3188,
"step": 1099
},
{
"epoch": 1.56,
"learning_rate": 2.4153552694325227e-05,
"loss": 0.3242,
"step": 1100
},
{
"epoch": 1.56,
"learning_rate": 2.412970910824988e-05,
"loss": 0.3374,
"step": 1101
},
{
"epoch": 1.56,
"learning_rate": 2.4105865522174538e-05,
"loss": 0.3284,
"step": 1102
},
{
"epoch": 1.57,
"learning_rate": 2.4082021936099192e-05,
"loss": 0.3452,
"step": 1103
},
{
"epoch": 1.57,
"learning_rate": 2.4058178350023846e-05,
"loss": 0.3239,
"step": 1104
},
{
"epoch": 1.57,
"learning_rate": 2.40343347639485e-05,
"loss": 0.3177,
"step": 1105
},
{
"epoch": 1.57,
"learning_rate": 2.4010491177873153e-05,
"loss": 0.3193,
"step": 1106
},
{
"epoch": 1.57,
"learning_rate": 2.3986647591797807e-05,
"loss": 0.3524,
"step": 1107
},
{
"epoch": 1.57,
"learning_rate": 2.396280400572246e-05,
"loss": 0.3308,
"step": 1108
},
{
"epoch": 1.57,
"learning_rate": 2.3938960419647115e-05,
"loss": 0.319,
"step": 1109
},
{
"epoch": 1.58,
"learning_rate": 2.3915116833571772e-05,
"loss": 0.3455,
"step": 1110
},
{
"epoch": 1.58,
"learning_rate": 2.3891273247496423e-05,
"loss": 0.3374,
"step": 1111
},
{
"epoch": 1.58,
"learning_rate": 2.386742966142108e-05,
"loss": 0.321,
"step": 1112
},
{
"epoch": 1.58,
"learning_rate": 2.3843586075345734e-05,
"loss": 0.3253,
"step": 1113
},
{
"epoch": 1.58,
"learning_rate": 2.3819742489270388e-05,
"loss": 0.3127,
"step": 1114
},
{
"epoch": 1.58,
"learning_rate": 2.379589890319504e-05,
"loss": 0.3248,
"step": 1115
},
{
"epoch": 1.58,
"learning_rate": 2.3772055317119695e-05,
"loss": 0.3384,
"step": 1116
},
{
"epoch": 1.59,
"learning_rate": 2.374821173104435e-05,
"loss": 0.3345,
"step": 1117
},
{
"epoch": 1.59,
"learning_rate": 2.3724368144969006e-05,
"loss": 0.3265,
"step": 1118
},
{
"epoch": 1.59,
"learning_rate": 2.3700524558893657e-05,
"loss": 0.3253,
"step": 1119
},
{
"epoch": 1.59,
"learning_rate": 2.3676680972818314e-05,
"loss": 0.3167,
"step": 1120
},
{
"epoch": 1.59,
"learning_rate": 2.3652837386742968e-05,
"loss": 0.3299,
"step": 1121
},
{
"epoch": 1.59,
"learning_rate": 2.3628993800667622e-05,
"loss": 0.3402,
"step": 1122
},
{
"epoch": 1.59,
"learning_rate": 2.3605150214592276e-05,
"loss": 0.3169,
"step": 1123
},
{
"epoch": 1.6,
"learning_rate": 2.358130662851693e-05,
"loss": 0.3247,
"step": 1124
},
{
"epoch": 1.6,
"learning_rate": 2.3557463042441587e-05,
"loss": 0.321,
"step": 1125
},
{
"epoch": 1.6,
"learning_rate": 2.3533619456366237e-05,
"loss": 0.3289,
"step": 1126
},
{
"epoch": 1.6,
"learning_rate": 2.350977587029089e-05,
"loss": 0.3276,
"step": 1127
},
{
"epoch": 1.6,
"learning_rate": 2.348593228421555e-05,
"loss": 0.3211,
"step": 1128
},
{
"epoch": 1.6,
"learning_rate": 2.3462088698140202e-05,
"loss": 0.3177,
"step": 1129
},
{
"epoch": 1.6,
"learning_rate": 2.3438245112064856e-05,
"loss": 0.3106,
"step": 1130
},
{
"epoch": 1.61,
"learning_rate": 2.341440152598951e-05,
"loss": 0.326,
"step": 1131
},
{
"epoch": 1.61,
"learning_rate": 2.3390557939914164e-05,
"loss": 0.3321,
"step": 1132
},
{
"epoch": 1.61,
"learning_rate": 2.3366714353838818e-05,
"loss": 0.3128,
"step": 1133
},
{
"epoch": 1.61,
"learning_rate": 2.334287076776347e-05,
"loss": 0.312,
"step": 1134
},
{
"epoch": 1.61,
"learning_rate": 2.331902718168813e-05,
"loss": 0.3126,
"step": 1135
},
{
"epoch": 1.61,
"learning_rate": 2.3295183595612783e-05,
"loss": 0.3041,
"step": 1136
},
{
"epoch": 1.61,
"learning_rate": 2.3271340009537433e-05,
"loss": 0.3266,
"step": 1137
},
{
"epoch": 1.62,
"learning_rate": 2.324749642346209e-05,
"loss": 0.3237,
"step": 1138
},
{
"epoch": 1.62,
"learning_rate": 2.3223652837386744e-05,
"loss": 0.321,
"step": 1139
},
{
"epoch": 1.62,
"learning_rate": 2.3199809251311398e-05,
"loss": 0.3178,
"step": 1140
},
{
"epoch": 1.62,
"learning_rate": 2.3175965665236052e-05,
"loss": 0.3309,
"step": 1141
},
{
"epoch": 1.62,
"learning_rate": 2.3152122079160706e-05,
"loss": 0.3187,
"step": 1142
},
{
"epoch": 1.62,
"learning_rate": 2.3128278493085363e-05,
"loss": 0.3411,
"step": 1143
},
{
"epoch": 1.62,
"learning_rate": 2.3104434907010014e-05,
"loss": 0.3401,
"step": 1144
},
{
"epoch": 1.63,
"learning_rate": 2.308059132093467e-05,
"loss": 0.3225,
"step": 1145
},
{
"epoch": 1.63,
"learning_rate": 2.3056747734859325e-05,
"loss": 0.3426,
"step": 1146
},
{
"epoch": 1.63,
"learning_rate": 2.303290414878398e-05,
"loss": 0.3119,
"step": 1147
},
{
"epoch": 1.63,
"learning_rate": 2.3009060562708632e-05,
"loss": 0.3291,
"step": 1148
},
{
"epoch": 1.63,
"learning_rate": 2.2985216976633286e-05,
"loss": 0.3333,
"step": 1149
},
{
"epoch": 1.63,
"learning_rate": 2.296137339055794e-05,
"loss": 0.335,
"step": 1150
},
{
"epoch": 1.63,
"learning_rate": 2.2937529804482597e-05,
"loss": 0.3185,
"step": 1151
},
{
"epoch": 1.64,
"learning_rate": 2.2913686218407248e-05,
"loss": 0.3206,
"step": 1152
},
{
"epoch": 1.64,
"learning_rate": 2.2889842632331905e-05,
"loss": 0.3203,
"step": 1153
},
{
"epoch": 1.64,
"learning_rate": 2.286599904625656e-05,
"loss": 0.3314,
"step": 1154
},
{
"epoch": 1.64,
"learning_rate": 2.284215546018121e-05,
"loss": 0.3255,
"step": 1155
},
{
"epoch": 1.64,
"learning_rate": 2.2818311874105867e-05,
"loss": 0.3161,
"step": 1156
},
{
"epoch": 1.64,
"learning_rate": 2.279446828803052e-05,
"loss": 0.3225,
"step": 1157
},
{
"epoch": 1.64,
"learning_rate": 2.2770624701955178e-05,
"loss": 0.336,
"step": 1158
},
{
"epoch": 1.65,
"learning_rate": 2.2746781115879828e-05,
"loss": 0.3164,
"step": 1159
},
{
"epoch": 1.65,
"learning_rate": 2.2722937529804482e-05,
"loss": 0.3175,
"step": 1160
},
{
"epoch": 1.65,
"learning_rate": 2.269909394372914e-05,
"loss": 0.3332,
"step": 1161
},
{
"epoch": 1.65,
"learning_rate": 2.2675250357653793e-05,
"loss": 0.3262,
"step": 1162
},
{
"epoch": 1.65,
"learning_rate": 2.2651406771578447e-05,
"loss": 0.3149,
"step": 1163
},
{
"epoch": 1.65,
"learning_rate": 2.26275631855031e-05,
"loss": 0.3272,
"step": 1164
},
{
"epoch": 1.65,
"learning_rate": 2.2603719599427755e-05,
"loss": 0.3319,
"step": 1165
},
{
"epoch": 1.66,
"learning_rate": 2.2579876013352412e-05,
"loss": 0.3135,
"step": 1166
},
{
"epoch": 1.66,
"learning_rate": 2.2556032427277062e-05,
"loss": 0.3147,
"step": 1167
},
{
"epoch": 1.66,
"learning_rate": 2.2532188841201716e-05,
"loss": 0.3257,
"step": 1168
},
{
"epoch": 1.66,
"learning_rate": 2.2508345255126374e-05,
"loss": 0.3235,
"step": 1169
},
{
"epoch": 1.66,
"learning_rate": 2.2484501669051024e-05,
"loss": 0.3274,
"step": 1170
},
{
"epoch": 1.66,
"learning_rate": 2.246065808297568e-05,
"loss": 0.3214,
"step": 1171
},
{
"epoch": 1.66,
"learning_rate": 2.2436814496900335e-05,
"loss": 0.3329,
"step": 1172
},
{
"epoch": 1.67,
"learning_rate": 2.241297091082499e-05,
"loss": 0.3211,
"step": 1173
},
{
"epoch": 1.67,
"learning_rate": 2.2389127324749643e-05,
"loss": 0.3377,
"step": 1174
},
{
"epoch": 1.67,
"learning_rate": 2.2365283738674297e-05,
"loss": 0.3271,
"step": 1175
},
{
"epoch": 1.67,
"learning_rate": 2.2341440152598954e-05,
"loss": 0.3253,
"step": 1176
},
{
"epoch": 1.67,
"learning_rate": 2.2317596566523608e-05,
"loss": 0.3075,
"step": 1177
},
{
"epoch": 1.67,
"learning_rate": 2.229375298044826e-05,
"loss": 0.3235,
"step": 1178
},
{
"epoch": 1.67,
"learning_rate": 2.2269909394372916e-05,
"loss": 0.3093,
"step": 1179
},
{
"epoch": 1.68,
"learning_rate": 2.224606580829757e-05,
"loss": 0.3228,
"step": 1180
},
{
"epoch": 1.68,
"learning_rate": 2.2222222222222223e-05,
"loss": 0.3405,
"step": 1181
},
{
"epoch": 1.68,
"learning_rate": 2.2198378636146877e-05,
"loss": 0.3082,
"step": 1182
},
{
"epoch": 1.68,
"learning_rate": 2.217453505007153e-05,
"loss": 0.3186,
"step": 1183
},
{
"epoch": 1.68,
"learning_rate": 2.2150691463996188e-05,
"loss": 0.3151,
"step": 1184
},
{
"epoch": 1.68,
"learning_rate": 2.212684787792084e-05,
"loss": 0.323,
"step": 1185
},
{
"epoch": 1.68,
"learning_rate": 2.2103004291845496e-05,
"loss": 0.3311,
"step": 1186
},
{
"epoch": 1.69,
"learning_rate": 2.207916070577015e-05,
"loss": 0.3341,
"step": 1187
},
{
"epoch": 1.69,
"learning_rate": 2.2055317119694804e-05,
"loss": 0.328,
"step": 1188
},
{
"epoch": 1.69,
"learning_rate": 2.2031473533619458e-05,
"loss": 0.341,
"step": 1189
},
{
"epoch": 1.69,
"learning_rate": 2.200762994754411e-05,
"loss": 0.3354,
"step": 1190
},
{
"epoch": 1.69,
"learning_rate": 2.1983786361468765e-05,
"loss": 0.3458,
"step": 1191
},
{
"epoch": 1.69,
"learning_rate": 2.195994277539342e-05,
"loss": 0.3505,
"step": 1192
},
{
"epoch": 1.69,
"learning_rate": 2.1936099189318073e-05,
"loss": 0.3328,
"step": 1193
},
{
"epoch": 1.7,
"learning_rate": 2.191225560324273e-05,
"loss": 0.3251,
"step": 1194
},
{
"epoch": 1.7,
"learning_rate": 2.1888412017167384e-05,
"loss": 0.316,
"step": 1195
},
{
"epoch": 1.7,
"learning_rate": 2.1864568431092035e-05,
"loss": 0.352,
"step": 1196
},
{
"epoch": 1.7,
"learning_rate": 2.1840724845016692e-05,
"loss": 0.3251,
"step": 1197
},
{
"epoch": 1.7,
"learning_rate": 2.1816881258941346e-05,
"loss": 0.3123,
"step": 1198
},
{
"epoch": 1.7,
"learning_rate": 2.1793037672866003e-05,
"loss": 0.3257,
"step": 1199
},
{
"epoch": 1.7,
"learning_rate": 2.1769194086790653e-05,
"loss": 0.3156,
"step": 1200
},
{
"epoch": 1.71,
"learning_rate": 2.1745350500715307e-05,
"loss": 0.3279,
"step": 1201
},
{
"epoch": 1.71,
"learning_rate": 2.1721506914639965e-05,
"loss": 0.3339,
"step": 1202
},
{
"epoch": 1.71,
"learning_rate": 2.1697663328564615e-05,
"loss": 0.3346,
"step": 1203
},
{
"epoch": 1.71,
"learning_rate": 2.1673819742489272e-05,
"loss": 0.3492,
"step": 1204
},
{
"epoch": 1.71,
"learning_rate": 2.1649976156413926e-05,
"loss": 0.3281,
"step": 1205
},
{
"epoch": 1.71,
"learning_rate": 2.162613257033858e-05,
"loss": 0.3269,
"step": 1206
},
{
"epoch": 1.71,
"learning_rate": 2.1602288984263234e-05,
"loss": 0.3447,
"step": 1207
},
{
"epoch": 1.71,
"learning_rate": 2.1578445398187888e-05,
"loss": 0.3292,
"step": 1208
},
{
"epoch": 1.72,
"learning_rate": 2.155460181211254e-05,
"loss": 0.3375,
"step": 1209
},
{
"epoch": 1.72,
"learning_rate": 2.15307582260372e-05,
"loss": 0.3223,
"step": 1210
},
{
"epoch": 1.72,
"learning_rate": 2.150691463996185e-05,
"loss": 0.3235,
"step": 1211
},
{
"epoch": 1.72,
"learning_rate": 2.1483071053886506e-05,
"loss": 0.3126,
"step": 1212
},
{
"epoch": 1.72,
"learning_rate": 2.145922746781116e-05,
"loss": 0.3133,
"step": 1213
},
{
"epoch": 1.72,
"learning_rate": 2.1435383881735814e-05,
"loss": 0.3365,
"step": 1214
},
{
"epoch": 1.72,
"learning_rate": 2.1411540295660468e-05,
"loss": 0.3254,
"step": 1215
},
{
"epoch": 1.73,
"learning_rate": 2.1387696709585122e-05,
"loss": 0.3393,
"step": 1216
},
{
"epoch": 1.73,
"learning_rate": 2.136385312350978e-05,
"loss": 0.3454,
"step": 1217
},
{
"epoch": 1.73,
"learning_rate": 2.134000953743443e-05,
"loss": 0.3376,
"step": 1218
},
{
"epoch": 1.73,
"learning_rate": 2.1316165951359084e-05,
"loss": 0.329,
"step": 1219
},
{
"epoch": 1.73,
"learning_rate": 2.129232236528374e-05,
"loss": 0.3295,
"step": 1220
},
{
"epoch": 1.73,
"learning_rate": 2.1268478779208395e-05,
"loss": 0.3209,
"step": 1221
},
{
"epoch": 1.73,
"learning_rate": 2.124463519313305e-05,
"loss": 0.3122,
"step": 1222
},
{
"epoch": 1.74,
"learning_rate": 2.1220791607057702e-05,
"loss": 0.3285,
"step": 1223
},
{
"epoch": 1.74,
"learning_rate": 2.1196948020982356e-05,
"loss": 0.3407,
"step": 1224
},
{
"epoch": 1.74,
"learning_rate": 2.1173104434907013e-05,
"loss": 0.308,
"step": 1225
},
{
"epoch": 1.74,
"learning_rate": 2.1149260848831664e-05,
"loss": 0.3347,
"step": 1226
},
{
"epoch": 1.74,
"learning_rate": 2.112541726275632e-05,
"loss": 0.3342,
"step": 1227
},
{
"epoch": 1.74,
"learning_rate": 2.1101573676680975e-05,
"loss": 0.3232,
"step": 1228
},
{
"epoch": 1.74,
"learning_rate": 2.1077730090605625e-05,
"loss": 0.3163,
"step": 1229
},
{
"epoch": 1.75,
"learning_rate": 2.1053886504530283e-05,
"loss": 0.3326,
"step": 1230
},
{
"epoch": 1.75,
"learning_rate": 2.1030042918454937e-05,
"loss": 0.3413,
"step": 1231
},
{
"epoch": 1.75,
"learning_rate": 2.100619933237959e-05,
"loss": 0.3444,
"step": 1232
},
{
"epoch": 1.75,
"learning_rate": 2.0982355746304244e-05,
"loss": 0.3035,
"step": 1233
},
{
"epoch": 1.75,
"learning_rate": 2.0958512160228898e-05,
"loss": 0.316,
"step": 1234
},
{
"epoch": 1.75,
"learning_rate": 2.0934668574153555e-05,
"loss": 0.302,
"step": 1235
},
{
"epoch": 1.75,
"learning_rate": 2.091082498807821e-05,
"loss": 0.3135,
"step": 1236
},
{
"epoch": 1.76,
"learning_rate": 2.0886981402002863e-05,
"loss": 0.319,
"step": 1237
},
{
"epoch": 1.76,
"learning_rate": 2.0863137815927517e-05,
"loss": 0.2999,
"step": 1238
},
{
"epoch": 1.76,
"learning_rate": 2.083929422985217e-05,
"loss": 0.3128,
"step": 1239
},
{
"epoch": 1.76,
"learning_rate": 2.0815450643776825e-05,
"loss": 0.3148,
"step": 1240
},
{
"epoch": 1.76,
"learning_rate": 2.079160705770148e-05,
"loss": 0.3436,
"step": 1241
},
{
"epoch": 1.76,
"learning_rate": 2.0767763471626132e-05,
"loss": 0.3249,
"step": 1242
},
{
"epoch": 1.76,
"learning_rate": 2.074391988555079e-05,
"loss": 0.3244,
"step": 1243
},
{
"epoch": 1.77,
"learning_rate": 2.072007629947544e-05,
"loss": 0.3236,
"step": 1244
},
{
"epoch": 1.77,
"learning_rate": 2.0696232713400097e-05,
"loss": 0.3322,
"step": 1245
},
{
"epoch": 1.77,
"learning_rate": 2.067238912732475e-05,
"loss": 0.325,
"step": 1246
},
{
"epoch": 1.77,
"learning_rate": 2.0648545541249405e-05,
"loss": 0.326,
"step": 1247
},
{
"epoch": 1.77,
"learning_rate": 2.062470195517406e-05,
"loss": 0.306,
"step": 1248
},
{
"epoch": 1.77,
"learning_rate": 2.0600858369098713e-05,
"loss": 0.3374,
"step": 1249
},
{
"epoch": 1.77,
"learning_rate": 2.0577014783023367e-05,
"loss": 0.3298,
"step": 1250
},
{
"epoch": 1.78,
"learning_rate": 2.055317119694802e-05,
"loss": 0.3135,
"step": 1251
},
{
"epoch": 1.78,
"learning_rate": 2.0529327610872674e-05,
"loss": 0.3266,
"step": 1252
},
{
"epoch": 1.78,
"learning_rate": 2.050548402479733e-05,
"loss": 0.3043,
"step": 1253
},
{
"epoch": 1.78,
"learning_rate": 2.0481640438721986e-05,
"loss": 0.3157,
"step": 1254
},
{
"epoch": 1.78,
"learning_rate": 2.045779685264664e-05,
"loss": 0.3154,
"step": 1255
},
{
"epoch": 1.78,
"learning_rate": 2.0433953266571293e-05,
"loss": 0.3091,
"step": 1256
},
{
"epoch": 1.78,
"learning_rate": 2.0410109680495947e-05,
"loss": 0.3126,
"step": 1257
},
{
"epoch": 1.79,
"learning_rate": 2.0386266094420604e-05,
"loss": 0.3245,
"step": 1258
},
{
"epoch": 1.79,
"learning_rate": 2.0362422508345255e-05,
"loss": 0.3195,
"step": 1259
},
{
"epoch": 1.79,
"learning_rate": 2.033857892226991e-05,
"loss": 0.3226,
"step": 1260
},
{
"epoch": 1.79,
"learning_rate": 2.0314735336194566e-05,
"loss": 0.3189,
"step": 1261
},
{
"epoch": 1.79,
"learning_rate": 2.029089175011922e-05,
"loss": 0.3198,
"step": 1262
},
{
"epoch": 1.79,
"learning_rate": 2.0267048164043874e-05,
"loss": 0.3212,
"step": 1263
},
{
"epoch": 1.79,
"learning_rate": 2.0243204577968528e-05,
"loss": 0.3267,
"step": 1264
},
{
"epoch": 1.8,
"learning_rate": 2.021936099189318e-05,
"loss": 0.3252,
"step": 1265
},
{
"epoch": 1.8,
"learning_rate": 2.0195517405817835e-05,
"loss": 0.3151,
"step": 1266
},
{
"epoch": 1.8,
"learning_rate": 2.017167381974249e-05,
"loss": 0.3082,
"step": 1267
},
{
"epoch": 1.8,
"learning_rate": 2.0147830233667146e-05,
"loss": 0.3098,
"step": 1268
},
{
"epoch": 1.8,
"learning_rate": 2.01239866475918e-05,
"loss": 0.3183,
"step": 1269
},
{
"epoch": 1.8,
"learning_rate": 2.010014306151645e-05,
"loss": 0.3457,
"step": 1270
},
{
"epoch": 1.8,
"learning_rate": 2.0076299475441108e-05,
"loss": 0.3319,
"step": 1271
},
{
"epoch": 1.81,
"learning_rate": 2.0052455889365762e-05,
"loss": 0.3351,
"step": 1272
},
{
"epoch": 1.81,
"learning_rate": 2.0028612303290416e-05,
"loss": 0.3301,
"step": 1273
},
{
"epoch": 1.81,
"learning_rate": 2.000476871721507e-05,
"loss": 0.3235,
"step": 1274
},
{
"epoch": 1.81,
"learning_rate": 1.9980925131139723e-05,
"loss": 0.3185,
"step": 1275
},
{
"epoch": 1.81,
"learning_rate": 1.995708154506438e-05,
"loss": 0.3245,
"step": 1276
},
{
"epoch": 1.81,
"learning_rate": 1.993323795898903e-05,
"loss": 0.3061,
"step": 1277
},
{
"epoch": 1.81,
"learning_rate": 1.990939437291369e-05,
"loss": 0.3125,
"step": 1278
},
{
"epoch": 1.82,
"learning_rate": 1.9885550786838342e-05,
"loss": 0.3158,
"step": 1279
},
{
"epoch": 1.82,
"learning_rate": 1.9861707200762996e-05,
"loss": 0.3049,
"step": 1280
},
{
"epoch": 1.82,
"learning_rate": 1.983786361468765e-05,
"loss": 0.3183,
"step": 1281
},
{
"epoch": 1.82,
"learning_rate": 1.9814020028612304e-05,
"loss": 0.3434,
"step": 1282
},
{
"epoch": 1.82,
"learning_rate": 1.9790176442536958e-05,
"loss": 0.3322,
"step": 1283
},
{
"epoch": 1.82,
"learning_rate": 1.9766332856461615e-05,
"loss": 0.3229,
"step": 1284
},
{
"epoch": 1.82,
"learning_rate": 1.9742489270386265e-05,
"loss": 0.333,
"step": 1285
},
{
"epoch": 1.83,
"learning_rate": 1.9718645684310923e-05,
"loss": 0.3275,
"step": 1286
},
{
"epoch": 1.83,
"learning_rate": 1.9694802098235576e-05,
"loss": 0.3488,
"step": 1287
},
{
"epoch": 1.83,
"learning_rate": 1.9670958512160227e-05,
"loss": 0.3308,
"step": 1288
},
{
"epoch": 1.83,
"learning_rate": 1.9647114926084884e-05,
"loss": 0.3161,
"step": 1289
},
{
"epoch": 1.83,
"learning_rate": 1.9623271340009538e-05,
"loss": 0.3081,
"step": 1290
},
{
"epoch": 1.83,
"learning_rate": 1.9599427753934195e-05,
"loss": 0.3348,
"step": 1291
},
{
"epoch": 1.83,
"learning_rate": 1.9575584167858846e-05,
"loss": 0.3271,
"step": 1292
},
{
"epoch": 1.84,
"learning_rate": 1.95517405817835e-05,
"loss": 0.3131,
"step": 1293
},
{
"epoch": 1.84,
"learning_rate": 1.9527896995708157e-05,
"loss": 0.313,
"step": 1294
},
{
"epoch": 1.84,
"learning_rate": 1.950405340963281e-05,
"loss": 0.3181,
"step": 1295
},
{
"epoch": 1.84,
"learning_rate": 1.9480209823557465e-05,
"loss": 0.3206,
"step": 1296
},
{
"epoch": 1.84,
"learning_rate": 1.945636623748212e-05,
"loss": 0.3329,
"step": 1297
},
{
"epoch": 1.84,
"learning_rate": 1.9432522651406772e-05,
"loss": 0.3427,
"step": 1298
},
{
"epoch": 1.84,
"learning_rate": 1.9408679065331426e-05,
"loss": 0.3196,
"step": 1299
},
{
"epoch": 1.85,
"learning_rate": 1.938483547925608e-05,
"loss": 0.3124,
"step": 1300
},
{
"epoch": 1.85,
"learning_rate": 1.9360991893180734e-05,
"loss": 0.3404,
"step": 1301
},
{
"epoch": 1.85,
"learning_rate": 1.933714830710539e-05,
"loss": 0.3243,
"step": 1302
},
{
"epoch": 1.85,
"learning_rate": 1.931330472103004e-05,
"loss": 0.3322,
"step": 1303
},
{
"epoch": 1.85,
"learning_rate": 1.92894611349547e-05,
"loss": 0.3228,
"step": 1304
},
{
"epoch": 1.85,
"learning_rate": 1.9265617548879353e-05,
"loss": 0.332,
"step": 1305
},
{
"epoch": 1.85,
"learning_rate": 1.9241773962804007e-05,
"loss": 0.3284,
"step": 1306
},
{
"epoch": 1.86,
"learning_rate": 1.921793037672866e-05,
"loss": 0.3289,
"step": 1307
},
{
"epoch": 1.86,
"learning_rate": 1.9194086790653314e-05,
"loss": 0.318,
"step": 1308
},
{
"epoch": 1.86,
"learning_rate": 1.917024320457797e-05,
"loss": 0.3482,
"step": 1309
},
{
"epoch": 1.86,
"learning_rate": 1.9146399618502625e-05,
"loss": 0.3328,
"step": 1310
},
{
"epoch": 1.86,
"learning_rate": 1.9122556032427276e-05,
"loss": 0.329,
"step": 1311
},
{
"epoch": 1.86,
"learning_rate": 1.9098712446351933e-05,
"loss": 0.333,
"step": 1312
},
{
"epoch": 1.86,
"learning_rate": 1.9074868860276587e-05,
"loss": 0.3264,
"step": 1313
},
{
"epoch": 1.87,
"learning_rate": 1.905102527420124e-05,
"loss": 0.3157,
"step": 1314
},
{
"epoch": 1.87,
"learning_rate": 1.9027181688125895e-05,
"loss": 0.3091,
"step": 1315
},
{
"epoch": 1.87,
"learning_rate": 1.900333810205055e-05,
"loss": 0.3336,
"step": 1316
},
{
"epoch": 1.87,
"learning_rate": 1.8979494515975206e-05,
"loss": 0.3254,
"step": 1317
},
{
"epoch": 1.87,
"learning_rate": 1.8955650929899856e-05,
"loss": 0.3419,
"step": 1318
},
{
"epoch": 1.87,
"learning_rate": 1.8931807343824514e-05,
"loss": 0.3156,
"step": 1319
},
{
"epoch": 1.87,
"learning_rate": 1.8907963757749167e-05,
"loss": 0.318,
"step": 1320
},
{
"epoch": 1.88,
"learning_rate": 1.888412017167382e-05,
"loss": 0.3218,
"step": 1321
},
{
"epoch": 1.88,
"learning_rate": 1.8860276585598475e-05,
"loss": 0.3203,
"step": 1322
},
{
"epoch": 1.88,
"learning_rate": 1.883643299952313e-05,
"loss": 0.3197,
"step": 1323
},
{
"epoch": 1.88,
"learning_rate": 1.8812589413447783e-05,
"loss": 0.3497,
"step": 1324
},
{
"epoch": 1.88,
"learning_rate": 1.8788745827372437e-05,
"loss": 0.3298,
"step": 1325
},
{
"epoch": 1.88,
"learning_rate": 1.876490224129709e-05,
"loss": 0.3164,
"step": 1326
},
{
"epoch": 1.88,
"learning_rate": 1.8741058655221748e-05,
"loss": 0.3243,
"step": 1327
},
{
"epoch": 1.89,
"learning_rate": 1.87172150691464e-05,
"loss": 0.3199,
"step": 1328
},
{
"epoch": 1.89,
"learning_rate": 1.8693371483071052e-05,
"loss": 0.3225,
"step": 1329
},
{
"epoch": 1.89,
"learning_rate": 1.866952789699571e-05,
"loss": 0.3357,
"step": 1330
},
{
"epoch": 1.89,
"learning_rate": 1.8645684310920363e-05,
"loss": 0.3244,
"step": 1331
},
{
"epoch": 1.89,
"learning_rate": 1.862184072484502e-05,
"loss": 0.3338,
"step": 1332
},
{
"epoch": 1.89,
"learning_rate": 1.859799713876967e-05,
"loss": 0.3296,
"step": 1333
},
{
"epoch": 1.89,
"learning_rate": 1.8574153552694325e-05,
"loss": 0.338,
"step": 1334
},
{
"epoch": 1.9,
"learning_rate": 1.8550309966618982e-05,
"loss": 0.3221,
"step": 1335
},
{
"epoch": 1.9,
"learning_rate": 1.8526466380543633e-05,
"loss": 0.3509,
"step": 1336
},
{
"epoch": 1.9,
"learning_rate": 1.850262279446829e-05,
"loss": 0.3185,
"step": 1337
},
{
"epoch": 1.9,
"learning_rate": 1.8478779208392944e-05,
"loss": 0.3187,
"step": 1338
},
{
"epoch": 1.9,
"learning_rate": 1.8454935622317597e-05,
"loss": 0.3214,
"step": 1339
},
{
"epoch": 1.9,
"learning_rate": 1.843109203624225e-05,
"loss": 0.3148,
"step": 1340
},
{
"epoch": 1.9,
"learning_rate": 1.8407248450166905e-05,
"loss": 0.3053,
"step": 1341
},
{
"epoch": 1.91,
"learning_rate": 1.838340486409156e-05,
"loss": 0.3335,
"step": 1342
},
{
"epoch": 1.91,
"learning_rate": 1.8359561278016216e-05,
"loss": 0.3334,
"step": 1343
},
{
"epoch": 1.91,
"learning_rate": 1.8335717691940867e-05,
"loss": 0.3305,
"step": 1344
},
{
"epoch": 1.91,
"learning_rate": 1.8311874105865524e-05,
"loss": 0.3169,
"step": 1345
},
{
"epoch": 1.91,
"learning_rate": 1.8288030519790178e-05,
"loss": 0.3308,
"step": 1346
},
{
"epoch": 1.91,
"learning_rate": 1.8264186933714832e-05,
"loss": 0.3172,
"step": 1347
},
{
"epoch": 1.91,
"learning_rate": 1.8240343347639486e-05,
"loss": 0.3288,
"step": 1348
},
{
"epoch": 1.92,
"learning_rate": 1.821649976156414e-05,
"loss": 0.3304,
"step": 1349
},
{
"epoch": 1.92,
"learning_rate": 1.8192656175488797e-05,
"loss": 0.3177,
"step": 1350
},
{
"epoch": 1.92,
"learning_rate": 1.8168812589413447e-05,
"loss": 0.3054,
"step": 1351
},
{
"epoch": 1.92,
"learning_rate": 1.81449690033381e-05,
"loss": 0.3252,
"step": 1352
},
{
"epoch": 1.92,
"learning_rate": 1.812112541726276e-05,
"loss": 0.3211,
"step": 1353
},
{
"epoch": 1.92,
"learning_rate": 1.8097281831187412e-05,
"loss": 0.3297,
"step": 1354
},
{
"epoch": 1.92,
"learning_rate": 1.8073438245112066e-05,
"loss": 0.3336,
"step": 1355
},
{
"epoch": 1.92,
"learning_rate": 1.804959465903672e-05,
"loss": 0.3373,
"step": 1356
},
{
"epoch": 1.93,
"learning_rate": 1.8025751072961374e-05,
"loss": 0.3247,
"step": 1357
},
{
"epoch": 1.93,
"learning_rate": 1.800190748688603e-05,
"loss": 0.3204,
"step": 1358
},
{
"epoch": 1.93,
"learning_rate": 1.797806390081068e-05,
"loss": 0.321,
"step": 1359
},
{
"epoch": 1.93,
"learning_rate": 1.795422031473534e-05,
"loss": 0.3172,
"step": 1360
},
{
"epoch": 1.93,
"learning_rate": 1.7930376728659993e-05,
"loss": 0.3263,
"step": 1361
},
{
"epoch": 1.93,
"learning_rate": 1.7906533142584643e-05,
"loss": 0.3275,
"step": 1362
},
{
"epoch": 1.93,
"learning_rate": 1.78826895565093e-05,
"loss": 0.3261,
"step": 1363
},
{
"epoch": 1.94,
"learning_rate": 1.7858845970433954e-05,
"loss": 0.3448,
"step": 1364
},
{
"epoch": 1.94,
"learning_rate": 1.7835002384358608e-05,
"loss": 0.3119,
"step": 1365
},
{
"epoch": 1.94,
"learning_rate": 1.7811158798283262e-05,
"loss": 0.3165,
"step": 1366
},
{
"epoch": 1.94,
"learning_rate": 1.7787315212207916e-05,
"loss": 0.3359,
"step": 1367
},
{
"epoch": 1.94,
"learning_rate": 1.7763471626132573e-05,
"loss": 0.3245,
"step": 1368
},
{
"epoch": 1.94,
"learning_rate": 1.7739628040057227e-05,
"loss": 0.3219,
"step": 1369
},
{
"epoch": 1.94,
"learning_rate": 1.771578445398188e-05,
"loss": 0.326,
"step": 1370
},
{
"epoch": 1.95,
"learning_rate": 1.7691940867906535e-05,
"loss": 0.3004,
"step": 1371
},
{
"epoch": 1.95,
"learning_rate": 1.766809728183119e-05,
"loss": 0.3249,
"step": 1372
},
{
"epoch": 1.95,
"learning_rate": 1.7644253695755842e-05,
"loss": 0.3093,
"step": 1373
},
{
"epoch": 1.95,
"learning_rate": 1.7620410109680496e-05,
"loss": 0.3017,
"step": 1374
},
{
"epoch": 1.95,
"learning_rate": 1.759656652360515e-05,
"loss": 0.3435,
"step": 1375
},
{
"epoch": 1.95,
"learning_rate": 1.7572722937529807e-05,
"loss": 0.3139,
"step": 1376
},
{
"epoch": 1.95,
"learning_rate": 1.7548879351454458e-05,
"loss": 0.324,
"step": 1377
},
{
"epoch": 1.96,
"learning_rate": 1.7525035765379115e-05,
"loss": 0.323,
"step": 1378
},
{
"epoch": 1.96,
"learning_rate": 1.750119217930377e-05,
"loss": 0.3146,
"step": 1379
},
{
"epoch": 1.96,
"learning_rate": 1.7477348593228423e-05,
"loss": 0.3316,
"step": 1380
},
{
"epoch": 1.96,
"learning_rate": 1.7453505007153077e-05,
"loss": 0.3357,
"step": 1381
},
{
"epoch": 1.96,
"learning_rate": 1.742966142107773e-05,
"loss": 0.3396,
"step": 1382
},
{
"epoch": 1.96,
"learning_rate": 1.7405817835002384e-05,
"loss": 0.3394,
"step": 1383
},
{
"epoch": 1.96,
"learning_rate": 1.7381974248927038e-05,
"loss": 0.3128,
"step": 1384
},
{
"epoch": 1.97,
"learning_rate": 1.7358130662851692e-05,
"loss": 0.3236,
"step": 1385
},
{
"epoch": 1.97,
"learning_rate": 1.733428707677635e-05,
"loss": 0.3202,
"step": 1386
},
{
"epoch": 1.97,
"learning_rate": 1.7310443490701003e-05,
"loss": 0.3342,
"step": 1387
},
{
"epoch": 1.97,
"learning_rate": 1.7286599904625657e-05,
"loss": 0.3438,
"step": 1388
},
{
"epoch": 1.97,
"learning_rate": 1.726275631855031e-05,
"loss": 0.3321,
"step": 1389
},
{
"epoch": 1.97,
"learning_rate": 1.7238912732474965e-05,
"loss": 0.3263,
"step": 1390
},
{
"epoch": 1.97,
"learning_rate": 1.7215069146399622e-05,
"loss": 0.3275,
"step": 1391
},
{
"epoch": 1.98,
"learning_rate": 1.7191225560324272e-05,
"loss": 0.3233,
"step": 1392
},
{
"epoch": 1.98,
"learning_rate": 1.7167381974248926e-05,
"loss": 0.3073,
"step": 1393
},
{
"epoch": 1.98,
"learning_rate": 1.7143538388173583e-05,
"loss": 0.3221,
"step": 1394
},
{
"epoch": 1.98,
"learning_rate": 1.7119694802098234e-05,
"loss": 0.3287,
"step": 1395
},
{
"epoch": 1.98,
"learning_rate": 1.709585121602289e-05,
"loss": 0.3158,
"step": 1396
},
{
"epoch": 1.98,
"learning_rate": 1.7072007629947545e-05,
"loss": 0.33,
"step": 1397
},
{
"epoch": 1.98,
"learning_rate": 1.70481640438722e-05,
"loss": 0.3221,
"step": 1398
},
{
"epoch": 1.99,
"learning_rate": 1.7024320457796853e-05,
"loss": 0.3083,
"step": 1399
},
{
"epoch": 1.99,
"learning_rate": 1.7000476871721507e-05,
"loss": 0.3174,
"step": 1400
},
{
"epoch": 1.99,
"learning_rate": 1.6976633285646164e-05,
"loss": 0.3063,
"step": 1401
},
{
"epoch": 1.99,
"learning_rate": 1.6952789699570818e-05,
"loss": 0.3245,
"step": 1402
},
{
"epoch": 1.99,
"learning_rate": 1.6928946113495468e-05,
"loss": 0.3275,
"step": 1403
},
{
"epoch": 1.99,
"learning_rate": 1.6905102527420125e-05,
"loss": 0.3107,
"step": 1404
},
{
"epoch": 1.99,
"learning_rate": 1.688125894134478e-05,
"loss": 0.3323,
"step": 1405
},
{
"epoch": 2.0,
"learning_rate": 1.6857415355269433e-05,
"loss": 0.312,
"step": 1406
},
{
"epoch": 2.0,
"learning_rate": 1.6833571769194087e-05,
"loss": 0.3295,
"step": 1407
},
{
"epoch": 2.0,
"learning_rate": 1.680972818311874e-05,
"loss": 0.3161,
"step": 1408
},
{
"epoch": 2.0,
"learning_rate": 1.6785884597043398e-05,
"loss": 0.3252,
"step": 1409
},
{
"epoch": 2.0,
"learning_rate": 1.676204101096805e-05,
"loss": 0.2355,
"step": 1410
},
{
"epoch": 2.0,
"learning_rate": 1.6738197424892706e-05,
"loss": 0.2399,
"step": 1411
},
{
"epoch": 2.0,
"learning_rate": 1.671435383881736e-05,
"loss": 0.237,
"step": 1412
},
{
"epoch": 2.01,
"learning_rate": 1.6690510252742014e-05,
"loss": 0.2405,
"step": 1413
},
{
"epoch": 2.01,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.2485,
"step": 1414
},
{
"epoch": 2.01,
"learning_rate": 1.664282308059132e-05,
"loss": 0.2256,
"step": 1415
},
{
"epoch": 2.01,
"learning_rate": 1.6618979494515975e-05,
"loss": 0.2373,
"step": 1416
},
{
"epoch": 2.01,
"learning_rate": 1.6595135908440632e-05,
"loss": 0.2322,
"step": 1417
},
{
"epoch": 2.01,
"learning_rate": 1.6571292322365283e-05,
"loss": 0.2433,
"step": 1418
},
{
"epoch": 2.01,
"learning_rate": 1.654744873628994e-05,
"loss": 0.2349,
"step": 1419
},
{
"epoch": 2.02,
"learning_rate": 1.6523605150214594e-05,
"loss": 0.2378,
"step": 1420
},
{
"epoch": 2.02,
"learning_rate": 1.6499761564139244e-05,
"loss": 0.2499,
"step": 1421
},
{
"epoch": 2.02,
"learning_rate": 1.6475917978063902e-05,
"loss": 0.2257,
"step": 1422
},
{
"epoch": 2.02,
"learning_rate": 1.6452074391988556e-05,
"loss": 0.2323,
"step": 1423
},
{
"epoch": 2.02,
"learning_rate": 1.6428230805913213e-05,
"loss": 0.2374,
"step": 1424
},
{
"epoch": 2.02,
"learning_rate": 1.6404387219837863e-05,
"loss": 0.2313,
"step": 1425
},
{
"epoch": 2.02,
"learning_rate": 1.6380543633762517e-05,
"loss": 0.2296,
"step": 1426
},
{
"epoch": 2.03,
"learning_rate": 1.6356700047687174e-05,
"loss": 0.2171,
"step": 1427
},
{
"epoch": 2.03,
"learning_rate": 1.6332856461611828e-05,
"loss": 0.2311,
"step": 1428
},
{
"epoch": 2.03,
"learning_rate": 1.6309012875536482e-05,
"loss": 0.241,
"step": 1429
},
{
"epoch": 2.03,
"learning_rate": 1.6285169289461136e-05,
"loss": 0.2453,
"step": 1430
},
{
"epoch": 2.03,
"learning_rate": 1.626132570338579e-05,
"loss": 0.2351,
"step": 1431
},
{
"epoch": 2.03,
"learning_rate": 1.6237482117310444e-05,
"loss": 0.2319,
"step": 1432
},
{
"epoch": 2.03,
"learning_rate": 1.6213638531235098e-05,
"loss": 0.2407,
"step": 1433
},
{
"epoch": 2.04,
"learning_rate": 1.618979494515975e-05,
"loss": 0.239,
"step": 1434
},
{
"epoch": 2.04,
"learning_rate": 1.616595135908441e-05,
"loss": 0.2172,
"step": 1435
},
{
"epoch": 2.04,
"learning_rate": 1.614210777300906e-05,
"loss": 0.233,
"step": 1436
},
{
"epoch": 2.04,
"learning_rate": 1.6118264186933716e-05,
"loss": 0.2536,
"step": 1437
},
{
"epoch": 2.04,
"learning_rate": 1.609442060085837e-05,
"loss": 0.2512,
"step": 1438
},
{
"epoch": 2.04,
"learning_rate": 1.6070577014783024e-05,
"loss": 0.2474,
"step": 1439
},
{
"epoch": 2.04,
"learning_rate": 1.6046733428707678e-05,
"loss": 0.2268,
"step": 1440
},
{
"epoch": 2.05,
"learning_rate": 1.6022889842632332e-05,
"loss": 0.2295,
"step": 1441
},
{
"epoch": 2.05,
"learning_rate": 1.599904625655699e-05,
"loss": 0.2364,
"step": 1442
},
{
"epoch": 2.05,
"learning_rate": 1.597520267048164e-05,
"loss": 0.2251,
"step": 1443
},
{
"epoch": 2.05,
"learning_rate": 1.5951359084406293e-05,
"loss": 0.2494,
"step": 1444
},
{
"epoch": 2.05,
"learning_rate": 1.592751549833095e-05,
"loss": 0.2279,
"step": 1445
},
{
"epoch": 2.05,
"learning_rate": 1.5903671912255605e-05,
"loss": 0.2447,
"step": 1446
},
{
"epoch": 2.05,
"learning_rate": 1.587982832618026e-05,
"loss": 0.2267,
"step": 1447
},
{
"epoch": 2.06,
"learning_rate": 1.5855984740104912e-05,
"loss": 0.2351,
"step": 1448
},
{
"epoch": 2.06,
"learning_rate": 1.5832141154029566e-05,
"loss": 0.2297,
"step": 1449
},
{
"epoch": 2.06,
"learning_rate": 1.5808297567954223e-05,
"loss": 0.2279,
"step": 1450
},
{
"epoch": 2.06,
"learning_rate": 1.5784453981878874e-05,
"loss": 0.228,
"step": 1451
},
{
"epoch": 2.06,
"learning_rate": 1.576061039580353e-05,
"loss": 0.2255,
"step": 1452
},
{
"epoch": 2.06,
"learning_rate": 1.5736766809728185e-05,
"loss": 0.2332,
"step": 1453
},
{
"epoch": 2.06,
"learning_rate": 1.5712923223652835e-05,
"loss": 0.2201,
"step": 1454
},
{
"epoch": 2.07,
"learning_rate": 1.5689079637577493e-05,
"loss": 0.2398,
"step": 1455
},
{
"epoch": 2.07,
"learning_rate": 1.5665236051502147e-05,
"loss": 0.2225,
"step": 1456
},
{
"epoch": 2.07,
"learning_rate": 1.56413924654268e-05,
"loss": 0.2309,
"step": 1457
},
{
"epoch": 2.07,
"learning_rate": 1.5617548879351454e-05,
"loss": 0.2367,
"step": 1458
},
{
"epoch": 2.07,
"learning_rate": 1.5593705293276108e-05,
"loss": 0.2381,
"step": 1459
},
{
"epoch": 2.07,
"learning_rate": 1.5569861707200765e-05,
"loss": 0.2361,
"step": 1460
},
{
"epoch": 2.07,
"learning_rate": 1.554601812112542e-05,
"loss": 0.2351,
"step": 1461
},
{
"epoch": 2.08,
"learning_rate": 1.552217453505007e-05,
"loss": 0.2368,
"step": 1462
},
{
"epoch": 2.08,
"learning_rate": 1.5498330948974727e-05,
"loss": 0.2365,
"step": 1463
},
{
"epoch": 2.08,
"learning_rate": 1.547448736289938e-05,
"loss": 0.2333,
"step": 1464
},
{
"epoch": 2.08,
"learning_rate": 1.5450643776824038e-05,
"loss": 0.2336,
"step": 1465
},
{
"epoch": 2.08,
"learning_rate": 1.542680019074869e-05,
"loss": 0.2312,
"step": 1466
},
{
"epoch": 2.08,
"learning_rate": 1.5402956604673342e-05,
"loss": 0.228,
"step": 1467
},
{
"epoch": 2.08,
"learning_rate": 1.5379113018598e-05,
"loss": 0.2482,
"step": 1468
},
{
"epoch": 2.09,
"learning_rate": 1.535526943252265e-05,
"loss": 0.2295,
"step": 1469
},
{
"epoch": 2.09,
"learning_rate": 1.5331425846447307e-05,
"loss": 0.2319,
"step": 1470
},
{
"epoch": 2.09,
"learning_rate": 1.530758226037196e-05,
"loss": 0.2321,
"step": 1471
},
{
"epoch": 2.09,
"learning_rate": 1.5283738674296615e-05,
"loss": 0.2232,
"step": 1472
},
{
"epoch": 2.09,
"learning_rate": 1.525989508822127e-05,
"loss": 0.2355,
"step": 1473
},
{
"epoch": 2.09,
"learning_rate": 1.5236051502145923e-05,
"loss": 0.2249,
"step": 1474
},
{
"epoch": 2.09,
"learning_rate": 1.5212207916070578e-05,
"loss": 0.24,
"step": 1475
},
{
"epoch": 2.1,
"learning_rate": 1.5188364329995234e-05,
"loss": 0.2261,
"step": 1476
},
{
"epoch": 2.1,
"learning_rate": 1.5164520743919886e-05,
"loss": 0.237,
"step": 1477
},
{
"epoch": 2.1,
"learning_rate": 1.514067715784454e-05,
"loss": 0.2263,
"step": 1478
},
{
"epoch": 2.1,
"learning_rate": 1.5116833571769195e-05,
"loss": 0.2392,
"step": 1479
},
{
"epoch": 2.1,
"learning_rate": 1.5092989985693848e-05,
"loss": 0.2314,
"step": 1480
},
{
"epoch": 2.1,
"learning_rate": 1.5069146399618503e-05,
"loss": 0.2287,
"step": 1481
},
{
"epoch": 2.1,
"learning_rate": 1.5045302813543157e-05,
"loss": 0.2379,
"step": 1482
},
{
"epoch": 2.11,
"learning_rate": 1.5021459227467813e-05,
"loss": 0.236,
"step": 1483
},
{
"epoch": 2.11,
"learning_rate": 1.4997615641392465e-05,
"loss": 0.2328,
"step": 1484
},
{
"epoch": 2.11,
"learning_rate": 1.497377205531712e-05,
"loss": 0.2305,
"step": 1485
},
{
"epoch": 2.11,
"learning_rate": 1.4949928469241776e-05,
"loss": 0.2277,
"step": 1486
},
{
"epoch": 2.11,
"learning_rate": 1.492608488316643e-05,
"loss": 0.2333,
"step": 1487
},
{
"epoch": 2.11,
"learning_rate": 1.4902241297091082e-05,
"loss": 0.2397,
"step": 1488
},
{
"epoch": 2.11,
"learning_rate": 1.4878397711015737e-05,
"loss": 0.222,
"step": 1489
},
{
"epoch": 2.12,
"learning_rate": 1.4854554124940393e-05,
"loss": 0.2313,
"step": 1490
},
{
"epoch": 2.12,
"learning_rate": 1.4830710538865045e-05,
"loss": 0.2349,
"step": 1491
},
{
"epoch": 2.12,
"learning_rate": 1.4806866952789699e-05,
"loss": 0.2355,
"step": 1492
},
{
"epoch": 2.12,
"learning_rate": 1.4783023366714355e-05,
"loss": 0.2311,
"step": 1493
},
{
"epoch": 2.12,
"learning_rate": 1.475917978063901e-05,
"loss": 0.2305,
"step": 1494
},
{
"epoch": 2.12,
"learning_rate": 1.4735336194563662e-05,
"loss": 0.2292,
"step": 1495
},
{
"epoch": 2.12,
"learning_rate": 1.4711492608488318e-05,
"loss": 0.2386,
"step": 1496
},
{
"epoch": 2.12,
"learning_rate": 1.4687649022412972e-05,
"loss": 0.2292,
"step": 1497
},
{
"epoch": 2.13,
"learning_rate": 1.4663805436337627e-05,
"loss": 0.2243,
"step": 1498
},
{
"epoch": 2.13,
"learning_rate": 1.463996185026228e-05,
"loss": 0.2259,
"step": 1499
},
{
"epoch": 2.13,
"learning_rate": 1.4616118264186935e-05,
"loss": 0.2261,
"step": 1500
},
{
"epoch": 2.13,
"learning_rate": 1.4592274678111589e-05,
"loss": 0.2261,
"step": 1501
},
{
"epoch": 2.13,
"learning_rate": 1.4568431092036241e-05,
"loss": 0.2431,
"step": 1502
},
{
"epoch": 2.13,
"learning_rate": 1.4544587505960897e-05,
"loss": 0.2386,
"step": 1503
},
{
"epoch": 2.13,
"learning_rate": 1.4520743919885552e-05,
"loss": 0.2343,
"step": 1504
},
{
"epoch": 2.14,
"learning_rate": 1.4496900333810206e-05,
"loss": 0.2353,
"step": 1505
},
{
"epoch": 2.14,
"learning_rate": 1.4473056747734858e-05,
"loss": 0.2306,
"step": 1506
},
{
"epoch": 2.14,
"learning_rate": 1.4449213161659514e-05,
"loss": 0.232,
"step": 1507
},
{
"epoch": 2.14,
"learning_rate": 1.442536957558417e-05,
"loss": 0.2252,
"step": 1508
},
{
"epoch": 2.14,
"learning_rate": 1.4401525989508823e-05,
"loss": 0.226,
"step": 1509
},
{
"epoch": 2.14,
"learning_rate": 1.4377682403433477e-05,
"loss": 0.2368,
"step": 1510
},
{
"epoch": 2.14,
"learning_rate": 1.435383881735813e-05,
"loss": 0.2316,
"step": 1511
},
{
"epoch": 2.15,
"learning_rate": 1.4329995231282786e-05,
"loss": 0.2274,
"step": 1512
},
{
"epoch": 2.15,
"learning_rate": 1.4306151645207442e-05,
"loss": 0.2162,
"step": 1513
},
{
"epoch": 2.15,
"learning_rate": 1.4282308059132094e-05,
"loss": 0.2509,
"step": 1514
},
{
"epoch": 2.15,
"learning_rate": 1.4258464473056748e-05,
"loss": 0.2149,
"step": 1515
},
{
"epoch": 2.15,
"learning_rate": 1.4234620886981404e-05,
"loss": 0.2338,
"step": 1516
},
{
"epoch": 2.15,
"learning_rate": 1.4210777300906056e-05,
"loss": 0.2323,
"step": 1517
},
{
"epoch": 2.15,
"learning_rate": 1.4186933714830711e-05,
"loss": 0.2294,
"step": 1518
},
{
"epoch": 2.16,
"learning_rate": 1.4163090128755365e-05,
"loss": 0.2281,
"step": 1519
},
{
"epoch": 2.16,
"learning_rate": 1.413924654268002e-05,
"loss": 0.2244,
"step": 1520
},
{
"epoch": 2.16,
"learning_rate": 1.4115402956604673e-05,
"loss": 0.2425,
"step": 1521
},
{
"epoch": 2.16,
"learning_rate": 1.4091559370529328e-05,
"loss": 0.242,
"step": 1522
},
{
"epoch": 2.16,
"learning_rate": 1.4067715784453984e-05,
"loss": 0.2391,
"step": 1523
},
{
"epoch": 2.16,
"learning_rate": 1.4043872198378638e-05,
"loss": 0.2294,
"step": 1524
},
{
"epoch": 2.16,
"learning_rate": 1.402002861230329e-05,
"loss": 0.2289,
"step": 1525
},
{
"epoch": 2.17,
"learning_rate": 1.3996185026227945e-05,
"loss": 0.2214,
"step": 1526
},
{
"epoch": 2.17,
"learning_rate": 1.3972341440152601e-05,
"loss": 0.2267,
"step": 1527
},
{
"epoch": 2.17,
"learning_rate": 1.3948497854077253e-05,
"loss": 0.2513,
"step": 1528
},
{
"epoch": 2.17,
"learning_rate": 1.3924654268001907e-05,
"loss": 0.2409,
"step": 1529
},
{
"epoch": 2.17,
"learning_rate": 1.3900810681926563e-05,
"loss": 0.2281,
"step": 1530
},
{
"epoch": 2.17,
"learning_rate": 1.3876967095851218e-05,
"loss": 0.243,
"step": 1531
},
{
"epoch": 2.17,
"learning_rate": 1.385312350977587e-05,
"loss": 0.2324,
"step": 1532
},
{
"epoch": 2.18,
"learning_rate": 1.3829279923700524e-05,
"loss": 0.2258,
"step": 1533
},
{
"epoch": 2.18,
"learning_rate": 1.380543633762518e-05,
"loss": 0.233,
"step": 1534
},
{
"epoch": 2.18,
"learning_rate": 1.3781592751549835e-05,
"loss": 0.2321,
"step": 1535
},
{
"epoch": 2.18,
"learning_rate": 1.3757749165474487e-05,
"loss": 0.2571,
"step": 1536
},
{
"epoch": 2.18,
"learning_rate": 1.3733905579399143e-05,
"loss": 0.2382,
"step": 1537
},
{
"epoch": 2.18,
"learning_rate": 1.3710061993323797e-05,
"loss": 0.2312,
"step": 1538
},
{
"epoch": 2.18,
"learning_rate": 1.3686218407248449e-05,
"loss": 0.23,
"step": 1539
},
{
"epoch": 2.19,
"learning_rate": 1.3662374821173105e-05,
"loss": 0.2269,
"step": 1540
},
{
"epoch": 2.19,
"learning_rate": 1.363853123509776e-05,
"loss": 0.2444,
"step": 1541
},
{
"epoch": 2.19,
"learning_rate": 1.3614687649022414e-05,
"loss": 0.2376,
"step": 1542
},
{
"epoch": 2.19,
"learning_rate": 1.3590844062947066e-05,
"loss": 0.2296,
"step": 1543
},
{
"epoch": 2.19,
"learning_rate": 1.3567000476871722e-05,
"loss": 0.2273,
"step": 1544
},
{
"epoch": 2.19,
"learning_rate": 1.3543156890796377e-05,
"loss": 0.2442,
"step": 1545
},
{
"epoch": 2.19,
"learning_rate": 1.3519313304721031e-05,
"loss": 0.23,
"step": 1546
},
{
"epoch": 2.2,
"learning_rate": 1.3495469718645683e-05,
"loss": 0.238,
"step": 1547
},
{
"epoch": 2.2,
"learning_rate": 1.3471626132570339e-05,
"loss": 0.2385,
"step": 1548
},
{
"epoch": 2.2,
"learning_rate": 1.3447782546494994e-05,
"loss": 0.2225,
"step": 1549
},
{
"epoch": 2.2,
"learning_rate": 1.3423938960419647e-05,
"loss": 0.2499,
"step": 1550
},
{
"epoch": 2.2,
"learning_rate": 1.3400095374344302e-05,
"loss": 0.2155,
"step": 1551
},
{
"epoch": 2.2,
"learning_rate": 1.3376251788268956e-05,
"loss": 0.2504,
"step": 1552
},
{
"epoch": 2.2,
"learning_rate": 1.3352408202193612e-05,
"loss": 0.2302,
"step": 1553
},
{
"epoch": 2.21,
"learning_rate": 1.3328564616118264e-05,
"loss": 0.2461,
"step": 1554
},
{
"epoch": 2.21,
"learning_rate": 1.330472103004292e-05,
"loss": 0.2386,
"step": 1555
},
{
"epoch": 2.21,
"learning_rate": 1.3280877443967573e-05,
"loss": 0.2302,
"step": 1556
},
{
"epoch": 2.21,
"learning_rate": 1.3257033857892229e-05,
"loss": 0.2226,
"step": 1557
},
{
"epoch": 2.21,
"learning_rate": 1.3233190271816881e-05,
"loss": 0.2183,
"step": 1558
},
{
"epoch": 2.21,
"learning_rate": 1.3209346685741536e-05,
"loss": 0.2252,
"step": 1559
},
{
"epoch": 2.21,
"learning_rate": 1.318550309966619e-05,
"loss": 0.2313,
"step": 1560
},
{
"epoch": 2.22,
"learning_rate": 1.3161659513590846e-05,
"loss": 0.2325,
"step": 1561
},
{
"epoch": 2.22,
"learning_rate": 1.3137815927515498e-05,
"loss": 0.2253,
"step": 1562
},
{
"epoch": 2.22,
"learning_rate": 1.3113972341440154e-05,
"loss": 0.2399,
"step": 1563
},
{
"epoch": 2.22,
"learning_rate": 1.3090128755364809e-05,
"loss": 0.232,
"step": 1564
},
{
"epoch": 2.22,
"learning_rate": 1.3066285169289461e-05,
"loss": 0.2173,
"step": 1565
},
{
"epoch": 2.22,
"learning_rate": 1.3042441583214115e-05,
"loss": 0.2293,
"step": 1566
},
{
"epoch": 2.22,
"learning_rate": 1.301859799713877e-05,
"loss": 0.2199,
"step": 1567
},
{
"epoch": 2.23,
"learning_rate": 1.2994754411063426e-05,
"loss": 0.2154,
"step": 1568
},
{
"epoch": 2.23,
"learning_rate": 1.2970910824988078e-05,
"loss": 0.2485,
"step": 1569
},
{
"epoch": 2.23,
"learning_rate": 1.2947067238912732e-05,
"loss": 0.2168,
"step": 1570
},
{
"epoch": 2.23,
"learning_rate": 1.2923223652837388e-05,
"loss": 0.2295,
"step": 1571
},
{
"epoch": 2.23,
"learning_rate": 1.2899380066762043e-05,
"loss": 0.2455,
"step": 1572
},
{
"epoch": 2.23,
"learning_rate": 1.2875536480686696e-05,
"loss": 0.2301,
"step": 1573
},
{
"epoch": 2.23,
"learning_rate": 1.285169289461135e-05,
"loss": 0.2371,
"step": 1574
},
{
"epoch": 2.24,
"learning_rate": 1.2827849308536005e-05,
"loss": 0.2227,
"step": 1575
},
{
"epoch": 2.24,
"learning_rate": 1.2804005722460657e-05,
"loss": 0.2376,
"step": 1576
},
{
"epoch": 2.24,
"learning_rate": 1.2780162136385313e-05,
"loss": 0.2259,
"step": 1577
},
{
"epoch": 2.24,
"learning_rate": 1.2756318550309968e-05,
"loss": 0.2236,
"step": 1578
},
{
"epoch": 2.24,
"learning_rate": 1.2732474964234622e-05,
"loss": 0.223,
"step": 1579
},
{
"epoch": 2.24,
"learning_rate": 1.2708631378159274e-05,
"loss": 0.2217,
"step": 1580
},
{
"epoch": 2.24,
"learning_rate": 1.268478779208393e-05,
"loss": 0.2209,
"step": 1581
},
{
"epoch": 2.25,
"learning_rate": 1.2660944206008585e-05,
"loss": 0.2341,
"step": 1582
},
{
"epoch": 2.25,
"learning_rate": 1.263710061993324e-05,
"loss": 0.2231,
"step": 1583
},
{
"epoch": 2.25,
"learning_rate": 1.2613257033857891e-05,
"loss": 0.2382,
"step": 1584
},
{
"epoch": 2.25,
"learning_rate": 1.2589413447782547e-05,
"loss": 0.2366,
"step": 1585
},
{
"epoch": 2.25,
"learning_rate": 1.2565569861707202e-05,
"loss": 0.2268,
"step": 1586
},
{
"epoch": 2.25,
"learning_rate": 1.2541726275631855e-05,
"loss": 0.2186,
"step": 1587
},
{
"epoch": 2.25,
"learning_rate": 1.2517882689556509e-05,
"loss": 0.2422,
"step": 1588
},
{
"epoch": 2.26,
"learning_rate": 1.2494039103481164e-05,
"loss": 0.2387,
"step": 1589
},
{
"epoch": 2.26,
"learning_rate": 1.2470195517405818e-05,
"loss": 0.2431,
"step": 1590
},
{
"epoch": 2.26,
"learning_rate": 1.2446351931330473e-05,
"loss": 0.2465,
"step": 1591
},
{
"epoch": 2.26,
"learning_rate": 1.2422508345255127e-05,
"loss": 0.2347,
"step": 1592
},
{
"epoch": 2.26,
"learning_rate": 1.2398664759179781e-05,
"loss": 0.2275,
"step": 1593
},
{
"epoch": 2.26,
"learning_rate": 1.2374821173104435e-05,
"loss": 0.2335,
"step": 1594
},
{
"epoch": 2.26,
"learning_rate": 1.235097758702909e-05,
"loss": 0.2301,
"step": 1595
},
{
"epoch": 2.27,
"learning_rate": 1.2327134000953744e-05,
"loss": 0.2222,
"step": 1596
},
{
"epoch": 2.27,
"learning_rate": 1.2303290414878398e-05,
"loss": 0.2314,
"step": 1597
},
{
"epoch": 2.27,
"learning_rate": 1.2279446828803052e-05,
"loss": 0.2309,
"step": 1598
},
{
"epoch": 2.27,
"learning_rate": 1.2255603242727706e-05,
"loss": 0.2381,
"step": 1599
},
{
"epoch": 2.27,
"learning_rate": 1.2231759656652362e-05,
"loss": 0.2376,
"step": 1600
},
{
"epoch": 2.27,
"learning_rate": 1.2207916070577015e-05,
"loss": 0.2388,
"step": 1601
},
{
"epoch": 2.27,
"learning_rate": 1.218407248450167e-05,
"loss": 0.231,
"step": 1602
},
{
"epoch": 2.28,
"learning_rate": 1.2160228898426323e-05,
"loss": 0.2327,
"step": 1603
},
{
"epoch": 2.28,
"learning_rate": 1.2136385312350979e-05,
"loss": 0.2392,
"step": 1604
},
{
"epoch": 2.28,
"learning_rate": 1.2112541726275633e-05,
"loss": 0.2321,
"step": 1605
},
{
"epoch": 2.28,
"learning_rate": 1.2088698140200286e-05,
"loss": 0.2388,
"step": 1606
},
{
"epoch": 2.28,
"learning_rate": 1.206485455412494e-05,
"loss": 0.2286,
"step": 1607
},
{
"epoch": 2.28,
"learning_rate": 1.2041010968049596e-05,
"loss": 0.2318,
"step": 1608
},
{
"epoch": 2.28,
"learning_rate": 1.201716738197425e-05,
"loss": 0.2411,
"step": 1609
},
{
"epoch": 2.29,
"learning_rate": 1.1993323795898904e-05,
"loss": 0.2392,
"step": 1610
},
{
"epoch": 2.29,
"learning_rate": 1.1969480209823557e-05,
"loss": 0.2285,
"step": 1611
},
{
"epoch": 2.29,
"learning_rate": 1.1945636623748211e-05,
"loss": 0.2311,
"step": 1612
},
{
"epoch": 2.29,
"learning_rate": 1.1921793037672867e-05,
"loss": 0.2289,
"step": 1613
},
{
"epoch": 2.29,
"learning_rate": 1.189794945159752e-05,
"loss": 0.2453,
"step": 1614
},
{
"epoch": 2.29,
"learning_rate": 1.1874105865522175e-05,
"loss": 0.234,
"step": 1615
},
{
"epoch": 2.29,
"learning_rate": 1.1850262279446828e-05,
"loss": 0.2513,
"step": 1616
},
{
"epoch": 2.3,
"learning_rate": 1.1826418693371484e-05,
"loss": 0.2287,
"step": 1617
},
{
"epoch": 2.3,
"learning_rate": 1.1802575107296138e-05,
"loss": 0.2171,
"step": 1618
},
{
"epoch": 2.3,
"learning_rate": 1.1778731521220793e-05,
"loss": 0.217,
"step": 1619
},
{
"epoch": 2.3,
"learning_rate": 1.1754887935145446e-05,
"loss": 0.2359,
"step": 1620
},
{
"epoch": 2.3,
"learning_rate": 1.1731044349070101e-05,
"loss": 0.2233,
"step": 1621
},
{
"epoch": 2.3,
"learning_rate": 1.1707200762994755e-05,
"loss": 0.2152,
"step": 1622
},
{
"epoch": 2.3,
"learning_rate": 1.1683357176919409e-05,
"loss": 0.2307,
"step": 1623
},
{
"epoch": 2.31,
"learning_rate": 1.1659513590844064e-05,
"loss": 0.2296,
"step": 1624
},
{
"epoch": 2.31,
"learning_rate": 1.1635670004768717e-05,
"loss": 0.2162,
"step": 1625
},
{
"epoch": 2.31,
"learning_rate": 1.1611826418693372e-05,
"loss": 0.2314,
"step": 1626
},
{
"epoch": 2.31,
"learning_rate": 1.1587982832618026e-05,
"loss": 0.2345,
"step": 1627
},
{
"epoch": 2.31,
"learning_rate": 1.1564139246542682e-05,
"loss": 0.2408,
"step": 1628
},
{
"epoch": 2.31,
"learning_rate": 1.1540295660467335e-05,
"loss": 0.2298,
"step": 1629
},
{
"epoch": 2.31,
"learning_rate": 1.151645207439199e-05,
"loss": 0.2361,
"step": 1630
},
{
"epoch": 2.32,
"learning_rate": 1.1492608488316643e-05,
"loss": 0.2233,
"step": 1631
},
{
"epoch": 2.32,
"learning_rate": 1.1468764902241299e-05,
"loss": 0.2415,
"step": 1632
},
{
"epoch": 2.32,
"learning_rate": 1.1444921316165953e-05,
"loss": 0.2419,
"step": 1633
},
{
"epoch": 2.32,
"learning_rate": 1.1421077730090605e-05,
"loss": 0.2258,
"step": 1634
},
{
"epoch": 2.32,
"learning_rate": 1.139723414401526e-05,
"loss": 0.2327,
"step": 1635
},
{
"epoch": 2.32,
"learning_rate": 1.1373390557939914e-05,
"loss": 0.227,
"step": 1636
},
{
"epoch": 2.32,
"learning_rate": 1.134954697186457e-05,
"loss": 0.2379,
"step": 1637
},
{
"epoch": 2.32,
"learning_rate": 1.1325703385789224e-05,
"loss": 0.2185,
"step": 1638
},
{
"epoch": 2.33,
"learning_rate": 1.1301859799713877e-05,
"loss": 0.2291,
"step": 1639
},
{
"epoch": 2.33,
"learning_rate": 1.1278016213638531e-05,
"loss": 0.2262,
"step": 1640
},
{
"epoch": 2.33,
"learning_rate": 1.1254172627563187e-05,
"loss": 0.2357,
"step": 1641
},
{
"epoch": 2.33,
"learning_rate": 1.123032904148784e-05,
"loss": 0.2297,
"step": 1642
},
{
"epoch": 2.33,
"learning_rate": 1.1206485455412495e-05,
"loss": 0.2316,
"step": 1643
},
{
"epoch": 2.33,
"learning_rate": 1.1182641869337148e-05,
"loss": 0.234,
"step": 1644
},
{
"epoch": 2.33,
"learning_rate": 1.1158798283261804e-05,
"loss": 0.2347,
"step": 1645
},
{
"epoch": 2.34,
"learning_rate": 1.1134954697186458e-05,
"loss": 0.2217,
"step": 1646
},
{
"epoch": 2.34,
"learning_rate": 1.1111111111111112e-05,
"loss": 0.2334,
"step": 1647
},
{
"epoch": 2.34,
"learning_rate": 1.1087267525035766e-05,
"loss": 0.2366,
"step": 1648
},
{
"epoch": 2.34,
"learning_rate": 1.106342393896042e-05,
"loss": 0.2412,
"step": 1649
},
{
"epoch": 2.34,
"learning_rate": 1.1039580352885075e-05,
"loss": 0.2208,
"step": 1650
},
{
"epoch": 2.34,
"learning_rate": 1.1015736766809729e-05,
"loss": 0.2312,
"step": 1651
},
{
"epoch": 2.34,
"learning_rate": 1.0991893180734383e-05,
"loss": 0.2493,
"step": 1652
},
{
"epoch": 2.35,
"learning_rate": 1.0968049594659036e-05,
"loss": 0.2348,
"step": 1653
},
{
"epoch": 2.35,
"learning_rate": 1.0944206008583692e-05,
"loss": 0.2222,
"step": 1654
},
{
"epoch": 2.35,
"learning_rate": 1.0920362422508346e-05,
"loss": 0.2242,
"step": 1655
},
{
"epoch": 2.35,
"learning_rate": 1.0896518836433001e-05,
"loss": 0.2371,
"step": 1656
},
{
"epoch": 2.35,
"learning_rate": 1.0872675250357654e-05,
"loss": 0.2361,
"step": 1657
},
{
"epoch": 2.35,
"learning_rate": 1.0848831664282307e-05,
"loss": 0.229,
"step": 1658
},
{
"epoch": 2.35,
"learning_rate": 1.0824988078206963e-05,
"loss": 0.2345,
"step": 1659
},
{
"epoch": 2.36,
"learning_rate": 1.0801144492131617e-05,
"loss": 0.2281,
"step": 1660
},
{
"epoch": 2.36,
"learning_rate": 1.077730090605627e-05,
"loss": 0.234,
"step": 1661
},
{
"epoch": 2.36,
"learning_rate": 1.0753457319980925e-05,
"loss": 0.239,
"step": 1662
},
{
"epoch": 2.36,
"learning_rate": 1.072961373390558e-05,
"loss": 0.2246,
"step": 1663
},
{
"epoch": 2.36,
"learning_rate": 1.0705770147830234e-05,
"loss": 0.2286,
"step": 1664
},
{
"epoch": 2.36,
"learning_rate": 1.068192656175489e-05,
"loss": 0.2421,
"step": 1665
},
{
"epoch": 2.36,
"learning_rate": 1.0658082975679542e-05,
"loss": 0.2244,
"step": 1666
},
{
"epoch": 2.37,
"learning_rate": 1.0634239389604197e-05,
"loss": 0.2281,
"step": 1667
},
{
"epoch": 2.37,
"learning_rate": 1.0610395803528851e-05,
"loss": 0.2384,
"step": 1668
},
{
"epoch": 2.37,
"learning_rate": 1.0586552217453507e-05,
"loss": 0.2327,
"step": 1669
},
{
"epoch": 2.37,
"learning_rate": 1.056270863137816e-05,
"loss": 0.2276,
"step": 1670
},
{
"epoch": 2.37,
"learning_rate": 1.0538865045302813e-05,
"loss": 0.2225,
"step": 1671
},
{
"epoch": 2.37,
"learning_rate": 1.0515021459227468e-05,
"loss": 0.2166,
"step": 1672
},
{
"epoch": 2.37,
"learning_rate": 1.0491177873152122e-05,
"loss": 0.2369,
"step": 1673
},
{
"epoch": 2.38,
"learning_rate": 1.0467334287076778e-05,
"loss": 0.2329,
"step": 1674
},
{
"epoch": 2.38,
"learning_rate": 1.0443490701001432e-05,
"loss": 0.2497,
"step": 1675
},
{
"epoch": 2.38,
"learning_rate": 1.0419647114926085e-05,
"loss": 0.2272,
"step": 1676
},
{
"epoch": 2.38,
"learning_rate": 1.039580352885074e-05,
"loss": 0.227,
"step": 1677
},
{
"epoch": 2.38,
"learning_rate": 1.0371959942775395e-05,
"loss": 0.221,
"step": 1678
},
{
"epoch": 2.38,
"learning_rate": 1.0348116356700049e-05,
"loss": 0.2352,
"step": 1679
},
{
"epoch": 2.38,
"learning_rate": 1.0324272770624703e-05,
"loss": 0.2275,
"step": 1680
},
{
"epoch": 2.39,
"learning_rate": 1.0300429184549356e-05,
"loss": 0.2307,
"step": 1681
},
{
"epoch": 2.39,
"learning_rate": 1.027658559847401e-05,
"loss": 0.2263,
"step": 1682
},
{
"epoch": 2.39,
"learning_rate": 1.0252742012398666e-05,
"loss": 0.2367,
"step": 1683
},
{
"epoch": 2.39,
"learning_rate": 1.022889842632332e-05,
"loss": 0.2193,
"step": 1684
},
{
"epoch": 2.39,
"learning_rate": 1.0205054840247974e-05,
"loss": 0.2253,
"step": 1685
},
{
"epoch": 2.39,
"learning_rate": 1.0181211254172627e-05,
"loss": 0.2171,
"step": 1686
},
{
"epoch": 2.39,
"learning_rate": 1.0157367668097283e-05,
"loss": 0.243,
"step": 1687
},
{
"epoch": 2.4,
"learning_rate": 1.0133524082021937e-05,
"loss": 0.2333,
"step": 1688
},
{
"epoch": 2.4,
"learning_rate": 1.010968049594659e-05,
"loss": 0.2342,
"step": 1689
},
{
"epoch": 2.4,
"learning_rate": 1.0085836909871245e-05,
"loss": 0.23,
"step": 1690
},
{
"epoch": 2.4,
"learning_rate": 1.00619933237959e-05,
"loss": 0.2253,
"step": 1691
},
{
"epoch": 2.4,
"learning_rate": 1.0038149737720554e-05,
"loss": 0.2329,
"step": 1692
},
{
"epoch": 2.4,
"learning_rate": 1.0014306151645208e-05,
"loss": 0.2344,
"step": 1693
},
{
"epoch": 2.4,
"learning_rate": 9.990462565569862e-06,
"loss": 0.2322,
"step": 1694
},
{
"epoch": 2.41,
"learning_rate": 9.966618979494516e-06,
"loss": 0.2395,
"step": 1695
},
{
"epoch": 2.41,
"learning_rate": 9.942775393419171e-06,
"loss": 0.2379,
"step": 1696
},
{
"epoch": 2.41,
"learning_rate": 9.918931807343825e-06,
"loss": 0.23,
"step": 1697
},
{
"epoch": 2.41,
"learning_rate": 9.895088221268479e-06,
"loss": 0.2339,
"step": 1698
},
{
"epoch": 2.41,
"learning_rate": 9.871244635193133e-06,
"loss": 0.2265,
"step": 1699
},
{
"epoch": 2.41,
"learning_rate": 9.847401049117788e-06,
"loss": 0.219,
"step": 1700
},
{
"epoch": 2.41,
"learning_rate": 9.823557463042442e-06,
"loss": 0.2309,
"step": 1701
},
{
"epoch": 2.42,
"learning_rate": 9.799713876967098e-06,
"loss": 0.2348,
"step": 1702
},
{
"epoch": 2.42,
"learning_rate": 9.77587029089175e-06,
"loss": 0.2175,
"step": 1703
},
{
"epoch": 2.42,
"learning_rate": 9.752026704816405e-06,
"loss": 0.2273,
"step": 1704
},
{
"epoch": 2.42,
"learning_rate": 9.72818311874106e-06,
"loss": 0.2301,
"step": 1705
},
{
"epoch": 2.42,
"learning_rate": 9.704339532665713e-06,
"loss": 0.238,
"step": 1706
},
{
"epoch": 2.42,
"learning_rate": 9.680495946590367e-06,
"loss": 0.2143,
"step": 1707
},
{
"epoch": 2.42,
"learning_rate": 9.65665236051502e-06,
"loss": 0.2391,
"step": 1708
},
{
"epoch": 2.43,
"learning_rate": 9.632808774439676e-06,
"loss": 0.239,
"step": 1709
},
{
"epoch": 2.43,
"learning_rate": 9.60896518836433e-06,
"loss": 0.2425,
"step": 1710
},
{
"epoch": 2.43,
"learning_rate": 9.585121602288986e-06,
"loss": 0.2196,
"step": 1711
},
{
"epoch": 2.43,
"learning_rate": 9.561278016213638e-06,
"loss": 0.2313,
"step": 1712
},
{
"epoch": 2.43,
"learning_rate": 9.537434430138293e-06,
"loss": 0.2409,
"step": 1713
},
{
"epoch": 2.43,
"learning_rate": 9.513590844062947e-06,
"loss": 0.2213,
"step": 1714
},
{
"epoch": 2.43,
"learning_rate": 9.489747257987603e-06,
"loss": 0.2363,
"step": 1715
},
{
"epoch": 2.44,
"learning_rate": 9.465903671912257e-06,
"loss": 0.2255,
"step": 1716
},
{
"epoch": 2.44,
"learning_rate": 9.44206008583691e-06,
"loss": 0.2315,
"step": 1717
},
{
"epoch": 2.44,
"learning_rate": 9.418216499761564e-06,
"loss": 0.2384,
"step": 1718
},
{
"epoch": 2.44,
"learning_rate": 9.394372913686218e-06,
"loss": 0.2321,
"step": 1719
},
{
"epoch": 2.44,
"learning_rate": 9.370529327610874e-06,
"loss": 0.2186,
"step": 1720
},
{
"epoch": 2.44,
"learning_rate": 9.346685741535526e-06,
"loss": 0.2384,
"step": 1721
},
{
"epoch": 2.44,
"learning_rate": 9.322842155460182e-06,
"loss": 0.2307,
"step": 1722
},
{
"epoch": 2.45,
"learning_rate": 9.298998569384835e-06,
"loss": 0.2205,
"step": 1723
},
{
"epoch": 2.45,
"learning_rate": 9.275154983309491e-06,
"loss": 0.2352,
"step": 1724
},
{
"epoch": 2.45,
"learning_rate": 9.251311397234145e-06,
"loss": 0.228,
"step": 1725
},
{
"epoch": 2.45,
"learning_rate": 9.227467811158799e-06,
"loss": 0.2331,
"step": 1726
},
{
"epoch": 2.45,
"learning_rate": 9.203624225083453e-06,
"loss": 0.2241,
"step": 1727
},
{
"epoch": 2.45,
"learning_rate": 9.179780639008108e-06,
"loss": 0.2394,
"step": 1728
},
{
"epoch": 2.45,
"learning_rate": 9.155937052932762e-06,
"loss": 0.2306,
"step": 1729
},
{
"epoch": 2.46,
"learning_rate": 9.132093466857416e-06,
"loss": 0.2465,
"step": 1730
},
{
"epoch": 2.46,
"learning_rate": 9.10824988078207e-06,
"loss": 0.2273,
"step": 1731
},
{
"epoch": 2.46,
"learning_rate": 9.084406294706724e-06,
"loss": 0.2378,
"step": 1732
},
{
"epoch": 2.46,
"learning_rate": 9.06056270863138e-06,
"loss": 0.2481,
"step": 1733
},
{
"epoch": 2.46,
"learning_rate": 9.036719122556033e-06,
"loss": 0.2255,
"step": 1734
},
{
"epoch": 2.46,
"learning_rate": 9.012875536480687e-06,
"loss": 0.232,
"step": 1735
},
{
"epoch": 2.46,
"learning_rate": 8.98903195040534e-06,
"loss": 0.2266,
"step": 1736
},
{
"epoch": 2.47,
"learning_rate": 8.965188364329996e-06,
"loss": 0.2391,
"step": 1737
},
{
"epoch": 2.47,
"learning_rate": 8.94134477825465e-06,
"loss": 0.2399,
"step": 1738
},
{
"epoch": 2.47,
"learning_rate": 8.917501192179304e-06,
"loss": 0.2342,
"step": 1739
},
{
"epoch": 2.47,
"learning_rate": 8.893657606103958e-06,
"loss": 0.2381,
"step": 1740
},
{
"epoch": 2.47,
"learning_rate": 8.869814020028613e-06,
"loss": 0.2221,
"step": 1741
},
{
"epoch": 2.47,
"learning_rate": 8.845970433953267e-06,
"loss": 0.2247,
"step": 1742
},
{
"epoch": 2.47,
"learning_rate": 8.822126847877921e-06,
"loss": 0.2375,
"step": 1743
},
{
"epoch": 2.48,
"learning_rate": 8.798283261802575e-06,
"loss": 0.2249,
"step": 1744
},
{
"epoch": 2.48,
"learning_rate": 8.774439675727229e-06,
"loss": 0.2389,
"step": 1745
},
{
"epoch": 2.48,
"learning_rate": 8.750596089651884e-06,
"loss": 0.2374,
"step": 1746
},
{
"epoch": 2.48,
"learning_rate": 8.726752503576538e-06,
"loss": 0.2215,
"step": 1747
},
{
"epoch": 2.48,
"learning_rate": 8.702908917501192e-06,
"loss": 0.2363,
"step": 1748
},
{
"epoch": 2.48,
"learning_rate": 8.679065331425846e-06,
"loss": 0.2446,
"step": 1749
},
{
"epoch": 2.48,
"learning_rate": 8.655221745350502e-06,
"loss": 0.2252,
"step": 1750
},
{
"epoch": 2.49,
"learning_rate": 8.631378159275155e-06,
"loss": 0.2372,
"step": 1751
},
{
"epoch": 2.49,
"learning_rate": 8.607534573199811e-06,
"loss": 0.2146,
"step": 1752
},
{
"epoch": 2.49,
"learning_rate": 8.583690987124463e-06,
"loss": 0.2194,
"step": 1753
},
{
"epoch": 2.49,
"learning_rate": 8.559847401049117e-06,
"loss": 0.2407,
"step": 1754
},
{
"epoch": 2.49,
"learning_rate": 8.536003814973773e-06,
"loss": 0.2263,
"step": 1755
},
{
"epoch": 2.49,
"learning_rate": 8.512160228898426e-06,
"loss": 0.2187,
"step": 1756
},
{
"epoch": 2.49,
"learning_rate": 8.488316642823082e-06,
"loss": 0.2156,
"step": 1757
},
{
"epoch": 2.5,
"learning_rate": 8.464473056747734e-06,
"loss": 0.2142,
"step": 1758
},
{
"epoch": 2.5,
"learning_rate": 8.44062947067239e-06,
"loss": 0.2331,
"step": 1759
},
{
"epoch": 2.5,
"learning_rate": 8.416785884597044e-06,
"loss": 0.2312,
"step": 1760
},
{
"epoch": 2.5,
"learning_rate": 8.392942298521699e-06,
"loss": 0.2357,
"step": 1761
},
{
"epoch": 2.5,
"learning_rate": 8.369098712446353e-06,
"loss": 0.2267,
"step": 1762
},
{
"epoch": 2.5,
"learning_rate": 8.345255126371007e-06,
"loss": 0.2385,
"step": 1763
},
{
"epoch": 2.5,
"learning_rate": 8.32141154029566e-06,
"loss": 0.2373,
"step": 1764
},
{
"epoch": 2.51,
"learning_rate": 8.297567954220316e-06,
"loss": 0.2312,
"step": 1765
},
{
"epoch": 2.51,
"learning_rate": 8.27372436814497e-06,
"loss": 0.2188,
"step": 1766
},
{
"epoch": 2.51,
"learning_rate": 8.249880782069622e-06,
"loss": 0.2304,
"step": 1767
},
{
"epoch": 2.51,
"learning_rate": 8.226037195994278e-06,
"loss": 0.2132,
"step": 1768
},
{
"epoch": 2.51,
"learning_rate": 8.202193609918932e-06,
"loss": 0.2288,
"step": 1769
},
{
"epoch": 2.51,
"learning_rate": 8.178350023843587e-06,
"loss": 0.2231,
"step": 1770
},
{
"epoch": 2.51,
"learning_rate": 8.154506437768241e-06,
"loss": 0.2229,
"step": 1771
},
{
"epoch": 2.52,
"learning_rate": 8.130662851692895e-06,
"loss": 0.2248,
"step": 1772
},
{
"epoch": 2.52,
"learning_rate": 8.106819265617549e-06,
"loss": 0.2237,
"step": 1773
},
{
"epoch": 2.52,
"learning_rate": 8.082975679542204e-06,
"loss": 0.24,
"step": 1774
},
{
"epoch": 2.52,
"learning_rate": 8.059132093466858e-06,
"loss": 0.239,
"step": 1775
},
{
"epoch": 2.52,
"learning_rate": 8.035288507391512e-06,
"loss": 0.2392,
"step": 1776
},
{
"epoch": 2.52,
"learning_rate": 8.011444921316166e-06,
"loss": 0.222,
"step": 1777
},
{
"epoch": 2.52,
"learning_rate": 7.98760133524082e-06,
"loss": 0.2257,
"step": 1778
},
{
"epoch": 2.53,
"learning_rate": 7.963757749165475e-06,
"loss": 0.2267,
"step": 1779
},
{
"epoch": 2.53,
"learning_rate": 7.93991416309013e-06,
"loss": 0.2272,
"step": 1780
},
{
"epoch": 2.53,
"learning_rate": 7.916070577014783e-06,
"loss": 0.2229,
"step": 1781
},
{
"epoch": 2.53,
"learning_rate": 7.892226990939437e-06,
"loss": 0.2099,
"step": 1782
},
{
"epoch": 2.53,
"learning_rate": 7.868383404864092e-06,
"loss": 0.2154,
"step": 1783
},
{
"epoch": 2.53,
"learning_rate": 7.844539818788746e-06,
"loss": 0.2194,
"step": 1784
},
{
"epoch": 2.53,
"learning_rate": 7.8206962327134e-06,
"loss": 0.2392,
"step": 1785
},
{
"epoch": 2.53,
"learning_rate": 7.796852646638054e-06,
"loss": 0.2233,
"step": 1786
},
{
"epoch": 2.54,
"learning_rate": 7.77300906056271e-06,
"loss": 0.2193,
"step": 1787
},
{
"epoch": 2.54,
"learning_rate": 7.749165474487363e-06,
"loss": 0.2232,
"step": 1788
},
{
"epoch": 2.54,
"learning_rate": 7.725321888412019e-06,
"loss": 0.2429,
"step": 1789
},
{
"epoch": 2.54,
"learning_rate": 7.701478302336671e-06,
"loss": 0.2185,
"step": 1790
},
{
"epoch": 2.54,
"learning_rate": 7.677634716261325e-06,
"loss": 0.2262,
"step": 1791
},
{
"epoch": 2.54,
"learning_rate": 7.65379113018598e-06,
"loss": 0.2332,
"step": 1792
},
{
"epoch": 2.54,
"learning_rate": 7.629947544110634e-06,
"loss": 0.221,
"step": 1793
},
{
"epoch": 2.55,
"learning_rate": 7.606103958035289e-06,
"loss": 0.2405,
"step": 1794
},
{
"epoch": 2.55,
"learning_rate": 7.582260371959943e-06,
"loss": 0.2506,
"step": 1795
},
{
"epoch": 2.55,
"learning_rate": 7.558416785884598e-06,
"loss": 0.2211,
"step": 1796
},
{
"epoch": 2.55,
"learning_rate": 7.534573199809252e-06,
"loss": 0.2279,
"step": 1797
},
{
"epoch": 2.55,
"learning_rate": 7.510729613733906e-06,
"loss": 0.2219,
"step": 1798
},
{
"epoch": 2.55,
"learning_rate": 7.48688602765856e-06,
"loss": 0.235,
"step": 1799
},
{
"epoch": 2.55,
"learning_rate": 7.463042441583215e-06,
"loss": 0.2164,
"step": 1800
},
{
"epoch": 2.56,
"learning_rate": 7.439198855507869e-06,
"loss": 0.2291,
"step": 1801
},
{
"epoch": 2.56,
"learning_rate": 7.415355269432523e-06,
"loss": 0.2258,
"step": 1802
},
{
"epoch": 2.56,
"learning_rate": 7.391511683357177e-06,
"loss": 0.2403,
"step": 1803
},
{
"epoch": 2.56,
"learning_rate": 7.367668097281831e-06,
"loss": 0.2382,
"step": 1804
},
{
"epoch": 2.56,
"learning_rate": 7.343824511206486e-06,
"loss": 0.2299,
"step": 1805
},
{
"epoch": 2.56,
"learning_rate": 7.31998092513114e-06,
"loss": 0.2404,
"step": 1806
},
{
"epoch": 2.56,
"learning_rate": 7.296137339055794e-06,
"loss": 0.2243,
"step": 1807
},
{
"epoch": 2.57,
"learning_rate": 7.272293752980448e-06,
"loss": 0.2289,
"step": 1808
},
{
"epoch": 2.57,
"learning_rate": 7.248450166905103e-06,
"loss": 0.2184,
"step": 1809
},
{
"epoch": 2.57,
"learning_rate": 7.224606580829757e-06,
"loss": 0.2274,
"step": 1810
},
{
"epoch": 2.57,
"learning_rate": 7.2007629947544116e-06,
"loss": 0.2178,
"step": 1811
},
{
"epoch": 2.57,
"learning_rate": 7.176919408679065e-06,
"loss": 0.2211,
"step": 1812
},
{
"epoch": 2.57,
"learning_rate": 7.153075822603721e-06,
"loss": 0.2303,
"step": 1813
},
{
"epoch": 2.57,
"learning_rate": 7.129232236528374e-06,
"loss": 0.2192,
"step": 1814
},
{
"epoch": 2.58,
"learning_rate": 7.105388650453028e-06,
"loss": 0.2357,
"step": 1815
},
{
"epoch": 2.58,
"learning_rate": 7.0815450643776825e-06,
"loss": 0.2341,
"step": 1816
},
{
"epoch": 2.58,
"learning_rate": 7.057701478302336e-06,
"loss": 0.2257,
"step": 1817
},
{
"epoch": 2.58,
"learning_rate": 7.033857892226992e-06,
"loss": 0.224,
"step": 1818
},
{
"epoch": 2.58,
"learning_rate": 7.010014306151645e-06,
"loss": 0.2306,
"step": 1819
},
{
"epoch": 2.58,
"learning_rate": 6.9861707200763005e-06,
"loss": 0.2188,
"step": 1820
},
{
"epoch": 2.58,
"learning_rate": 6.9623271340009535e-06,
"loss": 0.2423,
"step": 1821
},
{
"epoch": 2.59,
"learning_rate": 6.938483547925609e-06,
"loss": 0.2192,
"step": 1822
},
{
"epoch": 2.59,
"learning_rate": 6.914639961850262e-06,
"loss": 0.2175,
"step": 1823
},
{
"epoch": 2.59,
"learning_rate": 6.890796375774918e-06,
"loss": 0.2314,
"step": 1824
},
{
"epoch": 2.59,
"learning_rate": 6.8669527896995715e-06,
"loss": 0.2267,
"step": 1825
},
{
"epoch": 2.59,
"learning_rate": 6.8431092036242245e-06,
"loss": 0.2382,
"step": 1826
},
{
"epoch": 2.59,
"learning_rate": 6.81926561754888e-06,
"loss": 0.2262,
"step": 1827
},
{
"epoch": 2.59,
"learning_rate": 6.795422031473533e-06,
"loss": 0.2115,
"step": 1828
},
{
"epoch": 2.6,
"learning_rate": 6.771578445398189e-06,
"loss": 0.2238,
"step": 1829
},
{
"epoch": 2.6,
"learning_rate": 6.747734859322842e-06,
"loss": 0.2162,
"step": 1830
},
{
"epoch": 2.6,
"learning_rate": 6.723891273247497e-06,
"loss": 0.2404,
"step": 1831
},
{
"epoch": 2.6,
"learning_rate": 6.700047687172151e-06,
"loss": 0.2175,
"step": 1832
},
{
"epoch": 2.6,
"learning_rate": 6.676204101096806e-06,
"loss": 0.2394,
"step": 1833
},
{
"epoch": 2.6,
"learning_rate": 6.65236051502146e-06,
"loss": 0.2339,
"step": 1834
},
{
"epoch": 2.6,
"learning_rate": 6.628516928946114e-06,
"loss": 0.2362,
"step": 1835
},
{
"epoch": 2.61,
"learning_rate": 6.604673342870768e-06,
"loss": 0.2158,
"step": 1836
},
{
"epoch": 2.61,
"learning_rate": 6.580829756795423e-06,
"loss": 0.2302,
"step": 1837
},
{
"epoch": 2.61,
"learning_rate": 6.556986170720077e-06,
"loss": 0.2135,
"step": 1838
},
{
"epoch": 2.61,
"learning_rate": 6.533142584644731e-06,
"loss": 0.2277,
"step": 1839
},
{
"epoch": 2.61,
"learning_rate": 6.509298998569385e-06,
"loss": 0.2269,
"step": 1840
},
{
"epoch": 2.61,
"learning_rate": 6.485455412494039e-06,
"loss": 0.2265,
"step": 1841
},
{
"epoch": 2.61,
"learning_rate": 6.461611826418694e-06,
"loss": 0.224,
"step": 1842
},
{
"epoch": 2.62,
"learning_rate": 6.437768240343348e-06,
"loss": 0.2284,
"step": 1843
},
{
"epoch": 2.62,
"learning_rate": 6.4139246542680025e-06,
"loss": 0.2409,
"step": 1844
},
{
"epoch": 2.62,
"learning_rate": 6.390081068192656e-06,
"loss": 0.2285,
"step": 1845
},
{
"epoch": 2.62,
"learning_rate": 6.366237482117311e-06,
"loss": 0.2273,
"step": 1846
},
{
"epoch": 2.62,
"learning_rate": 6.342393896041965e-06,
"loss": 0.2307,
"step": 1847
},
{
"epoch": 2.62,
"learning_rate": 6.31855030996662e-06,
"loss": 0.2277,
"step": 1848
},
{
"epoch": 2.62,
"learning_rate": 6.2947067238912735e-06,
"loss": 0.2173,
"step": 1849
},
{
"epoch": 2.63,
"learning_rate": 6.270863137815927e-06,
"loss": 0.2201,
"step": 1850
},
{
"epoch": 2.63,
"learning_rate": 6.247019551740582e-06,
"loss": 0.2091,
"step": 1851
},
{
"epoch": 2.63,
"learning_rate": 6.223175965665237e-06,
"loss": 0.219,
"step": 1852
},
{
"epoch": 2.63,
"learning_rate": 6.199332379589891e-06,
"loss": 0.2129,
"step": 1853
},
{
"epoch": 2.63,
"learning_rate": 6.175488793514545e-06,
"loss": 0.2484,
"step": 1854
},
{
"epoch": 2.63,
"learning_rate": 6.151645207439199e-06,
"loss": 0.2224,
"step": 1855
},
{
"epoch": 2.63,
"learning_rate": 6.127801621363853e-06,
"loss": 0.2285,
"step": 1856
},
{
"epoch": 2.64,
"learning_rate": 6.103958035288508e-06,
"loss": 0.2353,
"step": 1857
},
{
"epoch": 2.64,
"learning_rate": 6.080114449213162e-06,
"loss": 0.2196,
"step": 1858
},
{
"epoch": 2.64,
"learning_rate": 6.056270863137816e-06,
"loss": 0.2303,
"step": 1859
},
{
"epoch": 2.64,
"learning_rate": 6.03242727706247e-06,
"loss": 0.2185,
"step": 1860
},
{
"epoch": 2.64,
"learning_rate": 6.008583690987125e-06,
"loss": 0.2157,
"step": 1861
},
{
"epoch": 2.64,
"learning_rate": 5.984740104911779e-06,
"loss": 0.2256,
"step": 1862
},
{
"epoch": 2.64,
"learning_rate": 5.9608965188364334e-06,
"loss": 0.2424,
"step": 1863
},
{
"epoch": 2.65,
"learning_rate": 5.937052932761087e-06,
"loss": 0.2242,
"step": 1864
},
{
"epoch": 2.65,
"learning_rate": 5.913209346685742e-06,
"loss": 0.2276,
"step": 1865
},
{
"epoch": 2.65,
"learning_rate": 5.889365760610397e-06,
"loss": 0.2246,
"step": 1866
},
{
"epoch": 2.65,
"learning_rate": 5.8655221745350506e-06,
"loss": 0.2247,
"step": 1867
},
{
"epoch": 2.65,
"learning_rate": 5.841678588459704e-06,
"loss": 0.2324,
"step": 1868
},
{
"epoch": 2.65,
"learning_rate": 5.817835002384358e-06,
"loss": 0.2288,
"step": 1869
},
{
"epoch": 2.65,
"learning_rate": 5.793991416309013e-06,
"loss": 0.2307,
"step": 1870
},
{
"epoch": 2.66,
"learning_rate": 5.770147830233668e-06,
"loss": 0.2185,
"step": 1871
},
{
"epoch": 2.66,
"learning_rate": 5.7463042441583216e-06,
"loss": 0.2298,
"step": 1872
},
{
"epoch": 2.66,
"learning_rate": 5.722460658082976e-06,
"loss": 0.2262,
"step": 1873
},
{
"epoch": 2.66,
"learning_rate": 5.69861707200763e-06,
"loss": 0.2312,
"step": 1874
},
{
"epoch": 2.66,
"learning_rate": 5.674773485932285e-06,
"loss": 0.2141,
"step": 1875
},
{
"epoch": 2.66,
"learning_rate": 5.650929899856939e-06,
"loss": 0.2306,
"step": 1876
},
{
"epoch": 2.66,
"learning_rate": 5.627086313781593e-06,
"loss": 0.2331,
"step": 1877
},
{
"epoch": 2.67,
"learning_rate": 5.603242727706247e-06,
"loss": 0.2279,
"step": 1878
},
{
"epoch": 2.67,
"learning_rate": 5.579399141630902e-06,
"loss": 0.2235,
"step": 1879
},
{
"epoch": 2.67,
"learning_rate": 5.555555555555556e-06,
"loss": 0.2321,
"step": 1880
},
{
"epoch": 2.67,
"learning_rate": 5.53171196948021e-06,
"loss": 0.2102,
"step": 1881
},
{
"epoch": 2.67,
"learning_rate": 5.507868383404864e-06,
"loss": 0.2378,
"step": 1882
},
{
"epoch": 2.67,
"learning_rate": 5.484024797329518e-06,
"loss": 0.2209,
"step": 1883
},
{
"epoch": 2.67,
"learning_rate": 5.460181211254173e-06,
"loss": 0.2141,
"step": 1884
},
{
"epoch": 2.68,
"learning_rate": 5.436337625178827e-06,
"loss": 0.2286,
"step": 1885
},
{
"epoch": 2.68,
"learning_rate": 5.4124940391034815e-06,
"loss": 0.2142,
"step": 1886
},
{
"epoch": 2.68,
"learning_rate": 5.388650453028135e-06,
"loss": 0.2288,
"step": 1887
},
{
"epoch": 2.68,
"learning_rate": 5.36480686695279e-06,
"loss": 0.2148,
"step": 1888
},
{
"epoch": 2.68,
"learning_rate": 5.340963280877445e-06,
"loss": 0.2258,
"step": 1889
},
{
"epoch": 2.68,
"learning_rate": 5.317119694802099e-06,
"loss": 0.2312,
"step": 1890
},
{
"epoch": 2.68,
"learning_rate": 5.293276108726753e-06,
"loss": 0.2305,
"step": 1891
},
{
"epoch": 2.69,
"learning_rate": 5.269432522651406e-06,
"loss": 0.2217,
"step": 1892
},
{
"epoch": 2.69,
"learning_rate": 5.245588936576061e-06,
"loss": 0.2401,
"step": 1893
},
{
"epoch": 2.69,
"learning_rate": 5.221745350500716e-06,
"loss": 0.2337,
"step": 1894
},
{
"epoch": 2.69,
"learning_rate": 5.19790176442537e-06,
"loss": 0.2274,
"step": 1895
},
{
"epoch": 2.69,
"learning_rate": 5.174058178350024e-06,
"loss": 0.2179,
"step": 1896
},
{
"epoch": 2.69,
"learning_rate": 5.150214592274678e-06,
"loss": 0.2153,
"step": 1897
},
{
"epoch": 2.69,
"learning_rate": 5.126371006199333e-06,
"loss": 0.2313,
"step": 1898
},
{
"epoch": 2.7,
"learning_rate": 5.102527420123987e-06,
"loss": 0.2201,
"step": 1899
},
{
"epoch": 2.7,
"learning_rate": 5.0786838340486415e-06,
"loss": 0.2262,
"step": 1900
},
{
"epoch": 2.7,
"learning_rate": 5.054840247973295e-06,
"loss": 0.2319,
"step": 1901
},
{
"epoch": 2.7,
"learning_rate": 5.03099666189795e-06,
"loss": 0.2351,
"step": 1902
},
{
"epoch": 2.7,
"learning_rate": 5.007153075822604e-06,
"loss": 0.2219,
"step": 1903
},
{
"epoch": 2.7,
"learning_rate": 4.983309489747258e-06,
"loss": 0.212,
"step": 1904
},
{
"epoch": 2.7,
"learning_rate": 4.9594659036719125e-06,
"loss": 0.2282,
"step": 1905
},
{
"epoch": 2.71,
"learning_rate": 4.935622317596566e-06,
"loss": 0.2221,
"step": 1906
},
{
"epoch": 2.71,
"learning_rate": 4.911778731521221e-06,
"loss": 0.2253,
"step": 1907
},
{
"epoch": 2.71,
"learning_rate": 4.887935145445875e-06,
"loss": 0.2264,
"step": 1908
},
{
"epoch": 2.71,
"learning_rate": 4.86409155937053e-06,
"loss": 0.2253,
"step": 1909
},
{
"epoch": 2.71,
"learning_rate": 4.8402479732951835e-06,
"loss": 0.2303,
"step": 1910
},
{
"epoch": 2.71,
"learning_rate": 4.816404387219838e-06,
"loss": 0.2186,
"step": 1911
},
{
"epoch": 2.71,
"learning_rate": 4.792560801144493e-06,
"loss": 0.229,
"step": 1912
},
{
"epoch": 2.72,
"learning_rate": 4.768717215069147e-06,
"loss": 0.2426,
"step": 1913
},
{
"epoch": 2.72,
"learning_rate": 4.7448736289938015e-06,
"loss": 0.2342,
"step": 1914
},
{
"epoch": 2.72,
"learning_rate": 4.721030042918455e-06,
"loss": 0.2335,
"step": 1915
},
{
"epoch": 2.72,
"learning_rate": 4.697186456843109e-06,
"loss": 0.2189,
"step": 1916
},
{
"epoch": 2.72,
"learning_rate": 4.673342870767763e-06,
"loss": 0.2129,
"step": 1917
},
{
"epoch": 2.72,
"learning_rate": 4.649499284692418e-06,
"loss": 0.2286,
"step": 1918
},
{
"epoch": 2.72,
"learning_rate": 4.6256556986170724e-06,
"loss": 0.2337,
"step": 1919
},
{
"epoch": 2.73,
"learning_rate": 4.601812112541726e-06,
"loss": 0.2077,
"step": 1920
},
{
"epoch": 2.73,
"learning_rate": 4.577968526466381e-06,
"loss": 0.2296,
"step": 1921
},
{
"epoch": 2.73,
"learning_rate": 4.554124940391035e-06,
"loss": 0.2263,
"step": 1922
},
{
"epoch": 2.73,
"learning_rate": 4.53028135431569e-06,
"loss": 0.2209,
"step": 1923
},
{
"epoch": 2.73,
"learning_rate": 4.5064377682403434e-06,
"loss": 0.2247,
"step": 1924
},
{
"epoch": 2.73,
"learning_rate": 4.482594182164998e-06,
"loss": 0.2154,
"step": 1925
},
{
"epoch": 2.73,
"learning_rate": 4.458750596089652e-06,
"loss": 0.2351,
"step": 1926
},
{
"epoch": 2.73,
"learning_rate": 4.434907010014307e-06,
"loss": 0.2226,
"step": 1927
},
{
"epoch": 2.74,
"learning_rate": 4.4110634239389606e-06,
"loss": 0.2342,
"step": 1928
},
{
"epoch": 2.74,
"learning_rate": 4.3872198378636144e-06,
"loss": 0.2239,
"step": 1929
},
{
"epoch": 2.74,
"learning_rate": 4.363376251788269e-06,
"loss": 0.2315,
"step": 1930
},
{
"epoch": 2.74,
"learning_rate": 4.339532665712923e-06,
"loss": 0.2234,
"step": 1931
},
{
"epoch": 2.74,
"learning_rate": 4.315689079637578e-06,
"loss": 0.229,
"step": 1932
},
{
"epoch": 2.74,
"learning_rate": 4.2918454935622316e-06,
"loss": 0.2312,
"step": 1933
},
{
"epoch": 2.74,
"learning_rate": 4.268001907486886e-06,
"loss": 0.2361,
"step": 1934
},
{
"epoch": 2.75,
"learning_rate": 4.244158321411541e-06,
"loss": 0.2366,
"step": 1935
},
{
"epoch": 2.75,
"learning_rate": 4.220314735336195e-06,
"loss": 0.2168,
"step": 1936
},
{
"epoch": 2.75,
"learning_rate": 4.1964711492608495e-06,
"loss": 0.2272,
"step": 1937
},
{
"epoch": 2.75,
"learning_rate": 4.172627563185503e-06,
"loss": 0.2308,
"step": 1938
},
{
"epoch": 2.75,
"learning_rate": 4.148783977110158e-06,
"loss": 0.2315,
"step": 1939
},
{
"epoch": 2.75,
"learning_rate": 4.124940391034811e-06,
"loss": 0.2134,
"step": 1940
},
{
"epoch": 2.75,
"learning_rate": 4.101096804959466e-06,
"loss": 0.2287,
"step": 1941
},
{
"epoch": 2.76,
"learning_rate": 4.0772532188841205e-06,
"loss": 0.2199,
"step": 1942
},
{
"epoch": 2.76,
"learning_rate": 4.053409632808774e-06,
"loss": 0.2412,
"step": 1943
},
{
"epoch": 2.76,
"learning_rate": 4.029566046733429e-06,
"loss": 0.2262,
"step": 1944
},
{
"epoch": 2.76,
"learning_rate": 4.005722460658083e-06,
"loss": 0.227,
"step": 1945
},
{
"epoch": 2.76,
"learning_rate": 3.981878874582738e-06,
"loss": 0.2307,
"step": 1946
},
{
"epoch": 2.76,
"learning_rate": 3.9580352885073915e-06,
"loss": 0.2265,
"step": 1947
},
{
"epoch": 2.76,
"learning_rate": 3.934191702432046e-06,
"loss": 0.235,
"step": 1948
},
{
"epoch": 2.77,
"learning_rate": 3.9103481163567e-06,
"loss": 0.2315,
"step": 1949
},
{
"epoch": 2.77,
"learning_rate": 3.886504530281355e-06,
"loss": 0.2332,
"step": 1950
},
{
"epoch": 2.77,
"learning_rate": 3.8626609442060095e-06,
"loss": 0.2189,
"step": 1951
},
{
"epoch": 2.77,
"learning_rate": 3.8388173581306625e-06,
"loss": 0.2156,
"step": 1952
},
{
"epoch": 2.77,
"learning_rate": 3.814973772055317e-06,
"loss": 0.2137,
"step": 1953
},
{
"epoch": 2.77,
"learning_rate": 3.7911301859799715e-06,
"loss": 0.2223,
"step": 1954
},
{
"epoch": 2.77,
"learning_rate": 3.767286599904626e-06,
"loss": 0.2377,
"step": 1955
},
{
"epoch": 2.78,
"learning_rate": 3.74344301382928e-06,
"loss": 0.2301,
"step": 1956
},
{
"epoch": 2.78,
"learning_rate": 3.7195994277539344e-06,
"loss": 0.2298,
"step": 1957
},
{
"epoch": 2.78,
"learning_rate": 3.6957558416785886e-06,
"loss": 0.208,
"step": 1958
},
{
"epoch": 2.78,
"learning_rate": 3.671912255603243e-06,
"loss": 0.2175,
"step": 1959
},
{
"epoch": 2.78,
"learning_rate": 3.648068669527897e-06,
"loss": 0.2388,
"step": 1960
},
{
"epoch": 2.78,
"learning_rate": 3.6242250834525515e-06,
"loss": 0.2227,
"step": 1961
},
{
"epoch": 2.78,
"learning_rate": 3.6003814973772058e-06,
"loss": 0.2236,
"step": 1962
},
{
"epoch": 2.79,
"learning_rate": 3.5765379113018605e-06,
"loss": 0.2349,
"step": 1963
},
{
"epoch": 2.79,
"learning_rate": 3.552694325226514e-06,
"loss": 0.2092,
"step": 1964
},
{
"epoch": 2.79,
"learning_rate": 3.528850739151168e-06,
"loss": 0.2134,
"step": 1965
},
{
"epoch": 2.79,
"learning_rate": 3.5050071530758225e-06,
"loss": 0.2321,
"step": 1966
},
{
"epoch": 2.79,
"learning_rate": 3.4811635670004768e-06,
"loss": 0.2244,
"step": 1967
},
{
"epoch": 2.79,
"learning_rate": 3.457319980925131e-06,
"loss": 0.2294,
"step": 1968
},
{
"epoch": 2.79,
"learning_rate": 3.4334763948497858e-06,
"loss": 0.209,
"step": 1969
},
{
"epoch": 2.8,
"learning_rate": 3.40963280877444e-06,
"loss": 0.2257,
"step": 1970
},
{
"epoch": 2.8,
"learning_rate": 3.3857892226990943e-06,
"loss": 0.2212,
"step": 1971
},
{
"epoch": 2.8,
"learning_rate": 3.3619456366237486e-06,
"loss": 0.2219,
"step": 1972
},
{
"epoch": 2.8,
"learning_rate": 3.338102050548403e-06,
"loss": 0.2285,
"step": 1973
},
{
"epoch": 2.8,
"learning_rate": 3.314258464473057e-06,
"loss": 0.2222,
"step": 1974
},
{
"epoch": 2.8,
"learning_rate": 3.2904148783977115e-06,
"loss": 0.2111,
"step": 1975
},
{
"epoch": 2.8,
"learning_rate": 3.2665712923223653e-06,
"loss": 0.2159,
"step": 1976
},
{
"epoch": 2.81,
"learning_rate": 3.2427277062470196e-06,
"loss": 0.224,
"step": 1977
},
{
"epoch": 2.81,
"learning_rate": 3.218884120171674e-06,
"loss": 0.2362,
"step": 1978
},
{
"epoch": 2.81,
"learning_rate": 3.195040534096328e-06,
"loss": 0.2245,
"step": 1979
},
{
"epoch": 2.81,
"learning_rate": 3.1711969480209824e-06,
"loss": 0.2121,
"step": 1980
},
{
"epoch": 2.81,
"learning_rate": 3.1473533619456367e-06,
"loss": 0.2273,
"step": 1981
},
{
"epoch": 2.81,
"learning_rate": 3.123509775870291e-06,
"loss": 0.2374,
"step": 1982
},
{
"epoch": 2.81,
"learning_rate": 3.0996661897949453e-06,
"loss": 0.2261,
"step": 1983
},
{
"epoch": 2.82,
"learning_rate": 3.0758226037195996e-06,
"loss": 0.2138,
"step": 1984
},
{
"epoch": 2.82,
"learning_rate": 3.051979017644254e-06,
"loss": 0.2342,
"step": 1985
},
{
"epoch": 2.82,
"learning_rate": 3.028135431568908e-06,
"loss": 0.2206,
"step": 1986
},
{
"epoch": 2.82,
"learning_rate": 3.0042918454935624e-06,
"loss": 0.2381,
"step": 1987
},
{
"epoch": 2.82,
"learning_rate": 2.9804482594182167e-06,
"loss": 0.2219,
"step": 1988
},
{
"epoch": 2.82,
"learning_rate": 2.956604673342871e-06,
"loss": 0.227,
"step": 1989
},
{
"epoch": 2.82,
"learning_rate": 2.9327610872675253e-06,
"loss": 0.2332,
"step": 1990
},
{
"epoch": 2.83,
"learning_rate": 2.908917501192179e-06,
"loss": 0.2344,
"step": 1991
},
{
"epoch": 2.83,
"learning_rate": 2.885073915116834e-06,
"loss": 0.2206,
"step": 1992
},
{
"epoch": 2.83,
"learning_rate": 2.861230329041488e-06,
"loss": 0.2378,
"step": 1993
},
{
"epoch": 2.83,
"learning_rate": 2.8373867429661424e-06,
"loss": 0.2204,
"step": 1994
},
{
"epoch": 2.83,
"learning_rate": 2.8135431568907967e-06,
"loss": 0.2302,
"step": 1995
},
{
"epoch": 2.83,
"learning_rate": 2.789699570815451e-06,
"loss": 0.224,
"step": 1996
},
{
"epoch": 2.83,
"learning_rate": 2.765855984740105e-06,
"loss": 0.2357,
"step": 1997
},
{
"epoch": 2.84,
"learning_rate": 2.742012398664759e-06,
"loss": 0.2252,
"step": 1998
},
{
"epoch": 2.84,
"learning_rate": 2.7181688125894134e-06,
"loss": 0.2229,
"step": 1999
},
{
"epoch": 2.84,
"learning_rate": 2.6943252265140677e-06,
"loss": 0.2173,
"step": 2000
},
{
"epoch": 2.84,
"learning_rate": 2.6704816404387224e-06,
"loss": 0.2249,
"step": 2001
},
{
"epoch": 2.84,
"learning_rate": 2.6466380543633767e-06,
"loss": 0.2236,
"step": 2002
},
{
"epoch": 2.84,
"learning_rate": 2.6227944682880305e-06,
"loss": 0.2128,
"step": 2003
},
{
"epoch": 2.84,
"learning_rate": 2.598950882212685e-06,
"loss": 0.2335,
"step": 2004
},
{
"epoch": 2.85,
"learning_rate": 2.575107296137339e-06,
"loss": 0.2221,
"step": 2005
},
{
"epoch": 2.85,
"learning_rate": 2.5512637100619934e-06,
"loss": 0.228,
"step": 2006
},
{
"epoch": 2.85,
"learning_rate": 2.5274201239866477e-06,
"loss": 0.2314,
"step": 2007
},
{
"epoch": 2.85,
"learning_rate": 2.503576537911302e-06,
"loss": 0.2296,
"step": 2008
},
{
"epoch": 2.85,
"learning_rate": 2.4797329518359562e-06,
"loss": 0.2278,
"step": 2009
},
{
"epoch": 2.85,
"learning_rate": 2.4558893657606105e-06,
"loss": 0.229,
"step": 2010
},
{
"epoch": 2.85,
"learning_rate": 2.432045779685265e-06,
"loss": 0.2208,
"step": 2011
},
{
"epoch": 2.86,
"learning_rate": 2.408202193609919e-06,
"loss": 0.2198,
"step": 2012
},
{
"epoch": 2.86,
"learning_rate": 2.3843586075345734e-06,
"loss": 0.228,
"step": 2013
},
{
"epoch": 2.86,
"learning_rate": 2.3605150214592277e-06,
"loss": 0.2232,
"step": 2014
},
{
"epoch": 2.86,
"learning_rate": 2.3366714353838815e-06,
"loss": 0.2201,
"step": 2015
},
{
"epoch": 2.86,
"learning_rate": 2.3128278493085362e-06,
"loss": 0.2122,
"step": 2016
},
{
"epoch": 2.86,
"learning_rate": 2.2889842632331905e-06,
"loss": 0.2222,
"step": 2017
},
{
"epoch": 2.86,
"learning_rate": 2.265140677157845e-06,
"loss": 0.2285,
"step": 2018
},
{
"epoch": 2.87,
"learning_rate": 2.241297091082499e-06,
"loss": 0.2307,
"step": 2019
},
{
"epoch": 2.87,
"learning_rate": 2.2174535050071534e-06,
"loss": 0.2214,
"step": 2020
},
{
"epoch": 2.87,
"learning_rate": 2.1936099189318072e-06,
"loss": 0.2248,
"step": 2021
},
{
"epoch": 2.87,
"learning_rate": 2.1697663328564615e-06,
"loss": 0.2191,
"step": 2022
},
{
"epoch": 2.87,
"learning_rate": 2.1459227467811158e-06,
"loss": 0.2242,
"step": 2023
},
{
"epoch": 2.87,
"learning_rate": 2.1220791607057705e-06,
"loss": 0.2195,
"step": 2024
},
{
"epoch": 2.87,
"learning_rate": 2.0982355746304248e-06,
"loss": 0.209,
"step": 2025
},
{
"epoch": 2.88,
"learning_rate": 2.074391988555079e-06,
"loss": 0.2316,
"step": 2026
},
{
"epoch": 2.88,
"learning_rate": 2.050548402479733e-06,
"loss": 0.2341,
"step": 2027
},
{
"epoch": 2.88,
"learning_rate": 2.026704816404387e-06,
"loss": 0.2226,
"step": 2028
},
{
"epoch": 2.88,
"learning_rate": 2.0028612303290415e-06,
"loss": 0.2256,
"step": 2029
},
{
"epoch": 2.88,
"learning_rate": 1.9790176442536958e-06,
"loss": 0.2318,
"step": 2030
},
{
"epoch": 2.88,
"learning_rate": 1.95517405817835e-06,
"loss": 0.2236,
"step": 2031
},
{
"epoch": 2.88,
"learning_rate": 1.9313304721030048e-06,
"loss": 0.222,
"step": 2032
},
{
"epoch": 2.89,
"learning_rate": 1.9074868860276586e-06,
"loss": 0.2256,
"step": 2033
},
{
"epoch": 2.89,
"learning_rate": 1.883643299952313e-06,
"loss": 0.2308,
"step": 2034
},
{
"epoch": 2.89,
"learning_rate": 1.8597997138769672e-06,
"loss": 0.2227,
"step": 2035
},
{
"epoch": 2.89,
"learning_rate": 1.8359561278016215e-06,
"loss": 0.2178,
"step": 2036
},
{
"epoch": 2.89,
"learning_rate": 1.8121125417262757e-06,
"loss": 0.2109,
"step": 2037
},
{
"epoch": 2.89,
"learning_rate": 1.7882689556509302e-06,
"loss": 0.2216,
"step": 2038
},
{
"epoch": 2.89,
"learning_rate": 1.764425369575584e-06,
"loss": 0.233,
"step": 2039
},
{
"epoch": 2.9,
"learning_rate": 1.7405817835002384e-06,
"loss": 0.2249,
"step": 2040
},
{
"epoch": 2.9,
"learning_rate": 1.7167381974248929e-06,
"loss": 0.2277,
"step": 2041
},
{
"epoch": 2.9,
"learning_rate": 1.6928946113495472e-06,
"loss": 0.2283,
"step": 2042
},
{
"epoch": 2.9,
"learning_rate": 1.6690510252742014e-06,
"loss": 0.225,
"step": 2043
},
{
"epoch": 2.9,
"learning_rate": 1.6452074391988557e-06,
"loss": 0.2131,
"step": 2044
},
{
"epoch": 2.9,
"learning_rate": 1.6213638531235098e-06,
"loss": 0.2171,
"step": 2045
},
{
"epoch": 2.9,
"learning_rate": 1.597520267048164e-06,
"loss": 0.2299,
"step": 2046
},
{
"epoch": 2.91,
"learning_rate": 1.5736766809728184e-06,
"loss": 0.2308,
"step": 2047
},
{
"epoch": 2.91,
"learning_rate": 1.5498330948974726e-06,
"loss": 0.2448,
"step": 2048
},
{
"epoch": 2.91,
"learning_rate": 1.525989508822127e-06,
"loss": 0.2255,
"step": 2049
},
{
"epoch": 2.91,
"learning_rate": 1.5021459227467812e-06,
"loss": 0.2296,
"step": 2050
},
{
"epoch": 2.91,
"learning_rate": 1.4783023366714355e-06,
"loss": 0.2232,
"step": 2051
},
{
"epoch": 2.91,
"learning_rate": 1.4544587505960896e-06,
"loss": 0.2194,
"step": 2052
},
{
"epoch": 2.91,
"learning_rate": 1.430615164520744e-06,
"loss": 0.2435,
"step": 2053
},
{
"epoch": 2.92,
"learning_rate": 1.4067715784453983e-06,
"loss": 0.2201,
"step": 2054
},
{
"epoch": 2.92,
"learning_rate": 1.3829279923700524e-06,
"loss": 0.2393,
"step": 2055
},
{
"epoch": 2.92,
"learning_rate": 1.3590844062947067e-06,
"loss": 0.2147,
"step": 2056
},
{
"epoch": 2.92,
"learning_rate": 1.3352408202193612e-06,
"loss": 0.2225,
"step": 2057
},
{
"epoch": 2.92,
"learning_rate": 1.3113972341440153e-06,
"loss": 0.2356,
"step": 2058
},
{
"epoch": 2.92,
"learning_rate": 1.2875536480686696e-06,
"loss": 0.2255,
"step": 2059
},
{
"epoch": 2.92,
"learning_rate": 1.2637100619933238e-06,
"loss": 0.2132,
"step": 2060
},
{
"epoch": 2.93,
"learning_rate": 1.2398664759179781e-06,
"loss": 0.2136,
"step": 2061
},
{
"epoch": 2.93,
"learning_rate": 1.2160228898426324e-06,
"loss": 0.2362,
"step": 2062
},
{
"epoch": 2.93,
"learning_rate": 1.1921793037672867e-06,
"loss": 0.2196,
"step": 2063
},
{
"epoch": 2.93,
"learning_rate": 1.1683357176919408e-06,
"loss": 0.2305,
"step": 2064
},
{
"epoch": 2.93,
"learning_rate": 1.1444921316165953e-06,
"loss": 0.2205,
"step": 2065
},
{
"epoch": 2.93,
"learning_rate": 1.1206485455412495e-06,
"loss": 0.2239,
"step": 2066
},
{
"epoch": 2.93,
"learning_rate": 1.0968049594659036e-06,
"loss": 0.2199,
"step": 2067
},
{
"epoch": 2.94,
"learning_rate": 1.0729613733905579e-06,
"loss": 0.2234,
"step": 2068
},
{
"epoch": 2.94,
"learning_rate": 1.0491177873152124e-06,
"loss": 0.2231,
"step": 2069
},
{
"epoch": 2.94,
"learning_rate": 1.0252742012398665e-06,
"loss": 0.2177,
"step": 2070
},
{
"epoch": 2.94,
"learning_rate": 1.0014306151645207e-06,
"loss": 0.2223,
"step": 2071
},
{
"epoch": 2.94,
"learning_rate": 9.77587029089175e-07,
"loss": 0.219,
"step": 2072
},
{
"epoch": 2.94,
"learning_rate": 9.537434430138293e-07,
"loss": 0.2228,
"step": 2073
},
{
"epoch": 2.94,
"learning_rate": 9.298998569384836e-07,
"loss": 0.222,
"step": 2074
},
{
"epoch": 2.94,
"learning_rate": 9.060562708631379e-07,
"loss": 0.2158,
"step": 2075
},
{
"epoch": 2.95,
"learning_rate": 8.82212684787792e-07,
"loss": 0.2192,
"step": 2076
},
{
"epoch": 2.95,
"learning_rate": 8.583690987124464e-07,
"loss": 0.2335,
"step": 2077
},
{
"epoch": 2.95,
"learning_rate": 8.345255126371007e-07,
"loss": 0.2146,
"step": 2078
},
{
"epoch": 2.95,
"learning_rate": 8.106819265617549e-07,
"loss": 0.2347,
"step": 2079
},
{
"epoch": 2.95,
"learning_rate": 7.868383404864092e-07,
"loss": 0.2272,
"step": 2080
},
{
"epoch": 2.95,
"learning_rate": 7.629947544110635e-07,
"loss": 0.2301,
"step": 2081
},
{
"epoch": 2.95,
"learning_rate": 7.391511683357177e-07,
"loss": 0.2073,
"step": 2082
},
{
"epoch": 2.96,
"learning_rate": 7.15307582260372e-07,
"loss": 0.2117,
"step": 2083
},
{
"epoch": 2.96,
"learning_rate": 6.914639961850262e-07,
"loss": 0.2227,
"step": 2084
},
{
"epoch": 2.96,
"learning_rate": 6.676204101096806e-07,
"loss": 0.2312,
"step": 2085
},
{
"epoch": 2.96,
"learning_rate": 6.437768240343348e-07,
"loss": 0.2369,
"step": 2086
},
{
"epoch": 2.96,
"learning_rate": 6.199332379589891e-07,
"loss": 0.2354,
"step": 2087
},
{
"epoch": 2.96,
"learning_rate": 5.960896518836433e-07,
"loss": 0.2388,
"step": 2088
},
{
"epoch": 2.96,
"learning_rate": 5.722460658082976e-07,
"loss": 0.2327,
"step": 2089
},
{
"epoch": 2.97,
"learning_rate": 5.484024797329518e-07,
"loss": 0.2306,
"step": 2090
},
{
"epoch": 2.97,
"learning_rate": 5.245588936576062e-07,
"loss": 0.2148,
"step": 2091
},
{
"epoch": 2.97,
"learning_rate": 5.007153075822604e-07,
"loss": 0.2143,
"step": 2092
},
{
"epoch": 2.97,
"learning_rate": 4.768717215069147e-07,
"loss": 0.2304,
"step": 2093
},
{
"epoch": 2.97,
"learning_rate": 4.5302813543156894e-07,
"loss": 0.2285,
"step": 2094
},
{
"epoch": 2.97,
"learning_rate": 4.291845493562232e-07,
"loss": 0.219,
"step": 2095
},
{
"epoch": 2.97,
"learning_rate": 4.0534096328087745e-07,
"loss": 0.2157,
"step": 2096
},
{
"epoch": 2.98,
"learning_rate": 3.8149737720553173e-07,
"loss": 0.2237,
"step": 2097
},
{
"epoch": 2.98,
"learning_rate": 3.57653791130186e-07,
"loss": 0.2117,
"step": 2098
},
{
"epoch": 2.98,
"learning_rate": 3.338102050548403e-07,
"loss": 0.2309,
"step": 2099
},
{
"epoch": 2.98,
"learning_rate": 3.0996661897949453e-07,
"loss": 0.23,
"step": 2100
},
{
"epoch": 2.98,
"learning_rate": 2.861230329041488e-07,
"loss": 0.2318,
"step": 2101
},
{
"epoch": 2.98,
"learning_rate": 2.622794468288031e-07,
"loss": 0.2228,
"step": 2102
},
{
"epoch": 2.98,
"learning_rate": 2.3843586075345733e-07,
"loss": 0.2249,
"step": 2103
},
{
"epoch": 2.99,
"learning_rate": 2.145922746781116e-07,
"loss": 0.2199,
"step": 2104
},
{
"epoch": 2.99,
"learning_rate": 1.9074868860276587e-07,
"loss": 0.202,
"step": 2105
},
{
"epoch": 2.99,
"learning_rate": 1.6690510252742015e-07,
"loss": 0.2102,
"step": 2106
},
{
"epoch": 2.99,
"learning_rate": 1.430615164520744e-07,
"loss": 0.2304,
"step": 2107
},
{
"epoch": 2.99,
"learning_rate": 1.1921793037672866e-07,
"loss": 0.2076,
"step": 2108
},
{
"epoch": 2.99,
"learning_rate": 9.537434430138293e-08,
"loss": 0.2177,
"step": 2109
},
{
"epoch": 2.99,
"learning_rate": 7.15307582260372e-08,
"loss": 0.2023,
"step": 2110
},
{
"epoch": 3.0,
"learning_rate": 4.7687172150691467e-08,
"loss": 0.2201,
"step": 2111
},
{
"epoch": 3.0,
"learning_rate": 2.3843586075345733e-08,
"loss": 0.2403,
"step": 2112
},
{
"epoch": 3.0,
"step": 2112,
"total_flos": 475126126837760.0,
"train_loss": 0.13942427967787918,
"train_runtime": 86201.3655,
"train_samples_per_second": 6.28,
"train_steps_per_second": 0.025
}
],
"logging_steps": 1.0,
"max_steps": 2112,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 475126126837760.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}