exp4 / trainer_state.json
Dragonhead's picture
Upload folder using huggingface_hub
2303dbc verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 3002,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0006662780044973765,
"learning_rate": 0.0,
"loss": 6.0407,
"step": 1
},
{
"epoch": 0.001332556008994753,
"learning_rate": 5.494505494505495e-07,
"loss": 6.0155,
"step": 2
},
{
"epoch": 0.0019988340134921294,
"learning_rate": 1.098901098901099e-06,
"loss": 6.0367,
"step": 3
},
{
"epoch": 0.002665112017989506,
"learning_rate": 1.6483516483516484e-06,
"loss": 6.0308,
"step": 4
},
{
"epoch": 0.0033313900224868826,
"learning_rate": 2.197802197802198e-06,
"loss": 6.0202,
"step": 5
},
{
"epoch": 0.003997668026984259,
"learning_rate": 2.747252747252747e-06,
"loss": 6.0369,
"step": 6
},
{
"epoch": 0.004663946031481635,
"learning_rate": 3.2967032967032968e-06,
"loss": 6.0004,
"step": 7
},
{
"epoch": 0.005330224035979012,
"learning_rate": 3.846153846153847e-06,
"loss": 6.0081,
"step": 8
},
{
"epoch": 0.005996502040476389,
"learning_rate": 4.395604395604396e-06,
"loss": 6.0258,
"step": 9
},
{
"epoch": 0.006662780044973765,
"learning_rate": 4.945054945054945e-06,
"loss": 5.965,
"step": 10
},
{
"epoch": 0.007329058049471142,
"learning_rate": 5.494505494505494e-06,
"loss": 5.9869,
"step": 11
},
{
"epoch": 0.007995336053968518,
"learning_rate": 6.043956043956044e-06,
"loss": 5.9934,
"step": 12
},
{
"epoch": 0.008661614058465895,
"learning_rate": 6.5934065934065935e-06,
"loss": 5.9449,
"step": 13
},
{
"epoch": 0.00932789206296327,
"learning_rate": 7.142857142857143e-06,
"loss": 5.9949,
"step": 14
},
{
"epoch": 0.009994170067460648,
"learning_rate": 7.692307692307694e-06,
"loss": 5.921,
"step": 15
},
{
"epoch": 0.010660448071958024,
"learning_rate": 8.241758241758243e-06,
"loss": 5.9454,
"step": 16
},
{
"epoch": 0.011326726076455402,
"learning_rate": 8.791208791208792e-06,
"loss": 5.9267,
"step": 17
},
{
"epoch": 0.011993004080952777,
"learning_rate": 9.340659340659341e-06,
"loss": 5.9296,
"step": 18
},
{
"epoch": 0.012659282085450155,
"learning_rate": 9.89010989010989e-06,
"loss": 5.8777,
"step": 19
},
{
"epoch": 0.01332556008994753,
"learning_rate": 1.0439560439560441e-05,
"loss": 5.8725,
"step": 20
},
{
"epoch": 0.013991838094444908,
"learning_rate": 1.0989010989010989e-05,
"loss": 5.8727,
"step": 21
},
{
"epoch": 0.014658116098942284,
"learning_rate": 1.153846153846154e-05,
"loss": 5.7883,
"step": 22
},
{
"epoch": 0.01532439410343966,
"learning_rate": 1.2087912087912089e-05,
"loss": 5.8406,
"step": 23
},
{
"epoch": 0.015990672107937035,
"learning_rate": 1.2637362637362638e-05,
"loss": 5.8149,
"step": 24
},
{
"epoch": 0.016656950112434413,
"learning_rate": 1.3186813186813187e-05,
"loss": 5.7868,
"step": 25
},
{
"epoch": 0.01732322811693179,
"learning_rate": 1.3736263736263738e-05,
"loss": 5.7693,
"step": 26
},
{
"epoch": 0.017989506121429168,
"learning_rate": 1.4285714285714285e-05,
"loss": 5.7749,
"step": 27
},
{
"epoch": 0.01865578412592654,
"learning_rate": 1.4835164835164836e-05,
"loss": 5.7304,
"step": 28
},
{
"epoch": 0.01932206213042392,
"learning_rate": 1.5384615384615387e-05,
"loss": 5.7311,
"step": 29
},
{
"epoch": 0.019988340134921297,
"learning_rate": 1.5934065934065933e-05,
"loss": 5.6832,
"step": 30
},
{
"epoch": 0.020654618139418674,
"learning_rate": 1.6483516483516486e-05,
"loss": 5.691,
"step": 31
},
{
"epoch": 0.021320896143916048,
"learning_rate": 1.7032967032967035e-05,
"loss": 5.65,
"step": 32
},
{
"epoch": 0.021987174148413426,
"learning_rate": 1.7582417582417584e-05,
"loss": 5.6071,
"step": 33
},
{
"epoch": 0.022653452152910803,
"learning_rate": 1.8131868131868133e-05,
"loss": 5.6248,
"step": 34
},
{
"epoch": 0.023319730157408177,
"learning_rate": 1.8681318681318682e-05,
"loss": 5.6343,
"step": 35
},
{
"epoch": 0.023986008161905555,
"learning_rate": 1.923076923076923e-05,
"loss": 5.5838,
"step": 36
},
{
"epoch": 0.024652286166402932,
"learning_rate": 1.978021978021978e-05,
"loss": 5.5894,
"step": 37
},
{
"epoch": 0.02531856417090031,
"learning_rate": 2.032967032967033e-05,
"loss": 5.5606,
"step": 38
},
{
"epoch": 0.025984842175397684,
"learning_rate": 2.0879120879120882e-05,
"loss": 5.5264,
"step": 39
},
{
"epoch": 0.02665112017989506,
"learning_rate": 2.1428571428571428e-05,
"loss": 5.4988,
"step": 40
},
{
"epoch": 0.02731739818439244,
"learning_rate": 2.1978021978021977e-05,
"loss": 5.4701,
"step": 41
},
{
"epoch": 0.027983676188889816,
"learning_rate": 2.252747252747253e-05,
"loss": 5.4785,
"step": 42
},
{
"epoch": 0.02864995419338719,
"learning_rate": 2.307692307692308e-05,
"loss": 5.4427,
"step": 43
},
{
"epoch": 0.029316232197884567,
"learning_rate": 2.3626373626373628e-05,
"loss": 5.4109,
"step": 44
},
{
"epoch": 0.029982510202381945,
"learning_rate": 2.4175824175824177e-05,
"loss": 5.4286,
"step": 45
},
{
"epoch": 0.03064878820687932,
"learning_rate": 2.4725274725274727e-05,
"loss": 5.4021,
"step": 46
},
{
"epoch": 0.031315066211376696,
"learning_rate": 2.5274725274725276e-05,
"loss": 5.38,
"step": 47
},
{
"epoch": 0.03198134421587407,
"learning_rate": 2.582417582417583e-05,
"loss": 5.3758,
"step": 48
},
{
"epoch": 0.03264762222037145,
"learning_rate": 2.6373626373626374e-05,
"loss": 5.3265,
"step": 49
},
{
"epoch": 0.033313900224868825,
"learning_rate": 2.6923076923076923e-05,
"loss": 5.3135,
"step": 50
},
{
"epoch": 0.033980178229366206,
"learning_rate": 2.7472527472527476e-05,
"loss": 5.3547,
"step": 51
},
{
"epoch": 0.03464645623386358,
"learning_rate": 2.8021978021978025e-05,
"loss": 5.311,
"step": 52
},
{
"epoch": 0.035312734238360954,
"learning_rate": 2.857142857142857e-05,
"loss": 5.3107,
"step": 53
},
{
"epoch": 0.035979012242858335,
"learning_rate": 2.9120879120879123e-05,
"loss": 5.2785,
"step": 54
},
{
"epoch": 0.03664529024735571,
"learning_rate": 2.9670329670329673e-05,
"loss": 5.1847,
"step": 55
},
{
"epoch": 0.03731156825185308,
"learning_rate": 3.021978021978022e-05,
"loss": 5.2295,
"step": 56
},
{
"epoch": 0.037977846256350464,
"learning_rate": 3.0769230769230774e-05,
"loss": 5.2033,
"step": 57
},
{
"epoch": 0.03864412426084784,
"learning_rate": 3.131868131868132e-05,
"loss": 5.1961,
"step": 58
},
{
"epoch": 0.03931040226534521,
"learning_rate": 3.1868131868131866e-05,
"loss": 5.1697,
"step": 59
},
{
"epoch": 0.03997668026984259,
"learning_rate": 3.241758241758242e-05,
"loss": 5.1378,
"step": 60
},
{
"epoch": 0.04064295827433997,
"learning_rate": 3.296703296703297e-05,
"loss": 5.1154,
"step": 61
},
{
"epoch": 0.04130923627883735,
"learning_rate": 3.3516483516483513e-05,
"loss": 5.119,
"step": 62
},
{
"epoch": 0.04197551428333472,
"learning_rate": 3.406593406593407e-05,
"loss": 5.1348,
"step": 63
},
{
"epoch": 0.042641792287832096,
"learning_rate": 3.461538461538462e-05,
"loss": 5.0983,
"step": 64
},
{
"epoch": 0.04330807029232948,
"learning_rate": 3.516483516483517e-05,
"loss": 5.0798,
"step": 65
},
{
"epoch": 0.04397434829682685,
"learning_rate": 3.571428571428572e-05,
"loss": 5.0724,
"step": 66
},
{
"epoch": 0.044640626301324225,
"learning_rate": 3.6263736263736266e-05,
"loss": 5.068,
"step": 67
},
{
"epoch": 0.045306904305821606,
"learning_rate": 3.6813186813186815e-05,
"loss": 5.0335,
"step": 68
},
{
"epoch": 0.04597318231031898,
"learning_rate": 3.7362637362637365e-05,
"loss": 5.0003,
"step": 69
},
{
"epoch": 0.046639460314816354,
"learning_rate": 3.7912087912087914e-05,
"loss": 5.0059,
"step": 70
},
{
"epoch": 0.047305738319313735,
"learning_rate": 3.846153846153846e-05,
"loss": 5.0186,
"step": 71
},
{
"epoch": 0.04797201632381111,
"learning_rate": 3.901098901098901e-05,
"loss": 4.9817,
"step": 72
},
{
"epoch": 0.04863829432830849,
"learning_rate": 3.956043956043956e-05,
"loss": 4.9817,
"step": 73
},
{
"epoch": 0.049304572332805864,
"learning_rate": 4.010989010989011e-05,
"loss": 4.9954,
"step": 74
},
{
"epoch": 0.04997085033730324,
"learning_rate": 4.065934065934066e-05,
"loss": 4.9764,
"step": 75
},
{
"epoch": 0.05063712834180062,
"learning_rate": 4.120879120879121e-05,
"loss": 4.9578,
"step": 76
},
{
"epoch": 0.05130340634629799,
"learning_rate": 4.1758241758241765e-05,
"loss": 4.9679,
"step": 77
},
{
"epoch": 0.05196968435079537,
"learning_rate": 4.230769230769231e-05,
"loss": 4.9494,
"step": 78
},
{
"epoch": 0.05263596235529275,
"learning_rate": 4.2857142857142856e-05,
"loss": 4.9266,
"step": 79
},
{
"epoch": 0.05330224035979012,
"learning_rate": 4.340659340659341e-05,
"loss": 4.9264,
"step": 80
},
{
"epoch": 0.053968518364287496,
"learning_rate": 4.3956043956043955e-05,
"loss": 4.9325,
"step": 81
},
{
"epoch": 0.05463479636878488,
"learning_rate": 4.4505494505494504e-05,
"loss": 4.9036,
"step": 82
},
{
"epoch": 0.05530107437328225,
"learning_rate": 4.505494505494506e-05,
"loss": 4.933,
"step": 83
},
{
"epoch": 0.05596735237777963,
"learning_rate": 4.56043956043956e-05,
"loss": 4.9094,
"step": 84
},
{
"epoch": 0.056633630382277006,
"learning_rate": 4.615384615384616e-05,
"loss": 4.8983,
"step": 85
},
{
"epoch": 0.05729990838677438,
"learning_rate": 4.670329670329671e-05,
"loss": 4.9235,
"step": 86
},
{
"epoch": 0.05796618639127176,
"learning_rate": 4.7252747252747257e-05,
"loss": 4.8999,
"step": 87
},
{
"epoch": 0.058632464395769135,
"learning_rate": 4.7802197802197806e-05,
"loss": 4.8887,
"step": 88
},
{
"epoch": 0.05929874240026651,
"learning_rate": 4.8351648351648355e-05,
"loss": 4.8888,
"step": 89
},
{
"epoch": 0.05996502040476389,
"learning_rate": 4.8901098901098904e-05,
"loss": 4.896,
"step": 90
},
{
"epoch": 0.060631298409261264,
"learning_rate": 4.945054945054945e-05,
"loss": 4.8584,
"step": 91
},
{
"epoch": 0.06129757641375864,
"learning_rate": 5e-05,
"loss": 4.8635,
"step": 92
},
{
"epoch": 0.06196385441825602,
"learning_rate": 4.9999985441209204e-05,
"loss": 4.8335,
"step": 93
},
{
"epoch": 0.06263013242275339,
"learning_rate": 4.9999941764853785e-05,
"loss": 4.8425,
"step": 94
},
{
"epoch": 0.06329641042725077,
"learning_rate": 4.99998689709846e-05,
"loss": 4.8485,
"step": 95
},
{
"epoch": 0.06396268843174814,
"learning_rate": 4.999976705968644e-05,
"loss": 4.7787,
"step": 96
},
{
"epoch": 0.06462896643624552,
"learning_rate": 4.9999636031078e-05,
"loss": 4.8111,
"step": 97
},
{
"epoch": 0.0652952444407429,
"learning_rate": 4.9999475885311884e-05,
"loss": 4.8388,
"step": 98
},
{
"epoch": 0.06596152244524027,
"learning_rate": 4.9999286622574626e-05,
"loss": 4.8166,
"step": 99
},
{
"epoch": 0.06662780044973765,
"learning_rate": 4.9999068243086644e-05,
"loss": 4.7999,
"step": 100
},
{
"epoch": 0.06729407845423503,
"learning_rate": 4.9998820747102305e-05,
"loss": 4.8313,
"step": 101
},
{
"epoch": 0.06796035645873241,
"learning_rate": 4.999854413490985e-05,
"loss": 4.8158,
"step": 102
},
{
"epoch": 0.06862663446322978,
"learning_rate": 4.999823840683147e-05,
"loss": 4.7823,
"step": 103
},
{
"epoch": 0.06929291246772716,
"learning_rate": 4.999790356322323e-05,
"loss": 4.8122,
"step": 104
},
{
"epoch": 0.06995919047222454,
"learning_rate": 4.999753960447513e-05,
"loss": 4.7459,
"step": 105
},
{
"epoch": 0.07062546847672191,
"learning_rate": 4.9997146531011076e-05,
"loss": 4.7865,
"step": 106
},
{
"epoch": 0.07129174648121929,
"learning_rate": 4.9996724343288875e-05,
"loss": 4.7432,
"step": 107
},
{
"epoch": 0.07195802448571667,
"learning_rate": 4.9996273041800257e-05,
"loss": 4.7635,
"step": 108
},
{
"epoch": 0.07262430249021404,
"learning_rate": 4.9995792627070856e-05,
"loss": 4.7816,
"step": 109
},
{
"epoch": 0.07329058049471142,
"learning_rate": 4.999528309966021e-05,
"loss": 4.7464,
"step": 110
},
{
"epoch": 0.0739568584992088,
"learning_rate": 4.999474446016176e-05,
"loss": 4.7717,
"step": 111
},
{
"epoch": 0.07462313650370617,
"learning_rate": 4.999417670920287e-05,
"loss": 4.7658,
"step": 112
},
{
"epoch": 0.07528941450820355,
"learning_rate": 4.999357984744479e-05,
"loss": 4.7398,
"step": 113
},
{
"epoch": 0.07595569251270093,
"learning_rate": 4.999295387558271e-05,
"loss": 4.7242,
"step": 114
},
{
"epoch": 0.0766219705171983,
"learning_rate": 4.999229879434568e-05,
"loss": 4.6888,
"step": 115
},
{
"epoch": 0.07728824852169568,
"learning_rate": 4.999161460449669e-05,
"loss": 4.7404,
"step": 116
},
{
"epoch": 0.07795452652619306,
"learning_rate": 4.99909013068326e-05,
"loss": 4.7114,
"step": 117
},
{
"epoch": 0.07862080453069042,
"learning_rate": 4.999015890218421e-05,
"loss": 4.7017,
"step": 118
},
{
"epoch": 0.0792870825351878,
"learning_rate": 4.9989387391416185e-05,
"loss": 4.7107,
"step": 119
},
{
"epoch": 0.07995336053968519,
"learning_rate": 4.998858677542711e-05,
"loss": 4.7256,
"step": 120
},
{
"epoch": 0.08061963854418255,
"learning_rate": 4.998775705514947e-05,
"loss": 4.7365,
"step": 121
},
{
"epoch": 0.08128591654867993,
"learning_rate": 4.998689823154965e-05,
"loss": 4.723,
"step": 122
},
{
"epoch": 0.08195219455317732,
"learning_rate": 4.998601030562791e-05,
"loss": 4.6716,
"step": 123
},
{
"epoch": 0.0826184725576747,
"learning_rate": 4.9985093278418426e-05,
"loss": 4.7119,
"step": 124
},
{
"epoch": 0.08328475056217206,
"learning_rate": 4.998414715098926e-05,
"loss": 4.6685,
"step": 125
},
{
"epoch": 0.08395102856666944,
"learning_rate": 4.9983171924442374e-05,
"loss": 4.6733,
"step": 126
},
{
"epoch": 0.08461730657116683,
"learning_rate": 4.998216759991361e-05,
"loss": 4.7082,
"step": 127
},
{
"epoch": 0.08528358457566419,
"learning_rate": 4.998113417857272e-05,
"loss": 4.6918,
"step": 128
},
{
"epoch": 0.08594986258016157,
"learning_rate": 4.998007166162333e-05,
"loss": 4.6874,
"step": 129
},
{
"epoch": 0.08661614058465895,
"learning_rate": 4.997898005030295e-05,
"loss": 4.683,
"step": 130
},
{
"epoch": 0.08728241858915632,
"learning_rate": 4.997785934588298e-05,
"loss": 4.6609,
"step": 131
},
{
"epoch": 0.0879486965936537,
"learning_rate": 4.997670954966872e-05,
"loss": 4.647,
"step": 132
},
{
"epoch": 0.08861497459815108,
"learning_rate": 4.9975530662999344e-05,
"loss": 4.6885,
"step": 133
},
{
"epoch": 0.08928125260264845,
"learning_rate": 4.997432268724789e-05,
"loss": 4.6627,
"step": 134
},
{
"epoch": 0.08994753060714583,
"learning_rate": 4.9973085623821304e-05,
"loss": 4.6224,
"step": 135
},
{
"epoch": 0.09061380861164321,
"learning_rate": 4.9971819474160384e-05,
"loss": 4.6601,
"step": 136
},
{
"epoch": 0.09128008661614058,
"learning_rate": 4.997052423973983e-05,
"loss": 4.6226,
"step": 137
},
{
"epoch": 0.09194636462063796,
"learning_rate": 4.996919992206821e-05,
"loss": 4.666,
"step": 138
},
{
"epoch": 0.09261264262513534,
"learning_rate": 4.996784652268795e-05,
"loss": 4.6424,
"step": 139
},
{
"epoch": 0.09327892062963271,
"learning_rate": 4.996646404317537e-05,
"loss": 4.6593,
"step": 140
},
{
"epoch": 0.09394519863413009,
"learning_rate": 4.996505248514063e-05,
"loss": 4.6517,
"step": 141
},
{
"epoch": 0.09461147663862747,
"learning_rate": 4.996361185022779e-05,
"loss": 4.6581,
"step": 142
},
{
"epoch": 0.09527775464312484,
"learning_rate": 4.996214214011476e-05,
"loss": 4.6286,
"step": 143
},
{
"epoch": 0.09594403264762222,
"learning_rate": 4.996064335651332e-05,
"loss": 4.649,
"step": 144
},
{
"epoch": 0.0966103106521196,
"learning_rate": 4.995911550116911e-05,
"loss": 4.6143,
"step": 145
},
{
"epoch": 0.09727658865661698,
"learning_rate": 4.9957558575861606e-05,
"loss": 4.6136,
"step": 146
},
{
"epoch": 0.09794286666111435,
"learning_rate": 4.9955972582404185e-05,
"loss": 4.6261,
"step": 147
},
{
"epoch": 0.09860914466561173,
"learning_rate": 4.995435752264406e-05,
"loss": 4.6352,
"step": 148
},
{
"epoch": 0.09927542267010911,
"learning_rate": 4.995271339846229e-05,
"loss": 4.6492,
"step": 149
},
{
"epoch": 0.09994170067460648,
"learning_rate": 4.9951040211773795e-05,
"loss": 4.5977,
"step": 150
},
{
"epoch": 0.10060797867910386,
"learning_rate": 4.9949337964527334e-05,
"loss": 4.6205,
"step": 151
},
{
"epoch": 0.10127425668360124,
"learning_rate": 4.994760665870552e-05,
"loss": 4.6554,
"step": 152
},
{
"epoch": 0.1019405346880986,
"learning_rate": 4.994584629632482e-05,
"loss": 4.6201,
"step": 153
},
{
"epoch": 0.10260681269259599,
"learning_rate": 4.994405687943552e-05,
"loss": 4.5537,
"step": 154
},
{
"epoch": 0.10327309069709337,
"learning_rate": 4.994223841012178e-05,
"loss": 4.5944,
"step": 155
},
{
"epoch": 0.10393936870159073,
"learning_rate": 4.994039089050156e-05,
"loss": 4.5989,
"step": 156
},
{
"epoch": 0.10460564670608812,
"learning_rate": 4.9938514322726676e-05,
"loss": 4.5668,
"step": 157
},
{
"epoch": 0.1052719247105855,
"learning_rate": 4.993660870898278e-05,
"loss": 4.5856,
"step": 158
},
{
"epoch": 0.10593820271508286,
"learning_rate": 4.9934674051489334e-05,
"loss": 4.5817,
"step": 159
},
{
"epoch": 0.10660448071958024,
"learning_rate": 4.9932710352499644e-05,
"loss": 4.6066,
"step": 160
},
{
"epoch": 0.10727075872407763,
"learning_rate": 4.9930717614300846e-05,
"loss": 4.6158,
"step": 161
},
{
"epoch": 0.10793703672857499,
"learning_rate": 4.992869583921388e-05,
"loss": 4.5853,
"step": 162
},
{
"epoch": 0.10860331473307237,
"learning_rate": 4.992664502959351e-05,
"loss": 4.594,
"step": 163
},
{
"epoch": 0.10926959273756975,
"learning_rate": 4.9924565187828334e-05,
"loss": 4.5727,
"step": 164
},
{
"epoch": 0.10993587074206712,
"learning_rate": 4.9922456316340746e-05,
"loss": 4.6132,
"step": 165
},
{
"epoch": 0.1106021487465645,
"learning_rate": 4.9920318417586944e-05,
"loss": 4.5477,
"step": 166
},
{
"epoch": 0.11126842675106188,
"learning_rate": 4.9918151494056956e-05,
"loss": 4.572,
"step": 167
},
{
"epoch": 0.11193470475555926,
"learning_rate": 4.9915955548274606e-05,
"loss": 4.5472,
"step": 168
},
{
"epoch": 0.11260098276005663,
"learning_rate": 4.9913730582797514e-05,
"loss": 4.5751,
"step": 169
},
{
"epoch": 0.11326726076455401,
"learning_rate": 4.991147660021711e-05,
"loss": 4.5776,
"step": 170
},
{
"epoch": 0.11393353876905139,
"learning_rate": 4.99091936031586e-05,
"loss": 4.5879,
"step": 171
},
{
"epoch": 0.11459981677354876,
"learning_rate": 4.9906881594281016e-05,
"loss": 4.5459,
"step": 172
},
{
"epoch": 0.11526609477804614,
"learning_rate": 4.9904540576277164e-05,
"loss": 4.6133,
"step": 173
},
{
"epoch": 0.11593237278254352,
"learning_rate": 4.990217055187362e-05,
"loss": 4.5549,
"step": 174
},
{
"epoch": 0.11659865078704089,
"learning_rate": 4.9899771523830776e-05,
"loss": 4.5956,
"step": 175
},
{
"epoch": 0.11726492879153827,
"learning_rate": 4.989734349494277e-05,
"loss": 4.553,
"step": 176
},
{
"epoch": 0.11793120679603565,
"learning_rate": 4.989488646803754e-05,
"loss": 4.5065,
"step": 177
},
{
"epoch": 0.11859748480053302,
"learning_rate": 4.98924004459768e-05,
"loss": 4.6218,
"step": 178
},
{
"epoch": 0.1192637628050304,
"learning_rate": 4.9889885431656024e-05,
"loss": 4.5372,
"step": 179
},
{
"epoch": 0.11993004080952778,
"learning_rate": 4.9887341428004466e-05,
"loss": 4.4715,
"step": 180
},
{
"epoch": 0.12059631881402515,
"learning_rate": 4.988476843798512e-05,
"loss": 4.5993,
"step": 181
},
{
"epoch": 0.12126259681852253,
"learning_rate": 4.988216646459477e-05,
"loss": 4.5548,
"step": 182
},
{
"epoch": 0.12192887482301991,
"learning_rate": 4.9879535510863926e-05,
"loss": 4.4919,
"step": 183
},
{
"epoch": 0.12259515282751728,
"learning_rate": 4.9876875579856884e-05,
"loss": 4.5132,
"step": 184
},
{
"epoch": 0.12326143083201466,
"learning_rate": 4.987418667467167e-05,
"loss": 4.5502,
"step": 185
},
{
"epoch": 0.12392770883651204,
"learning_rate": 4.987146879844006e-05,
"loss": 4.5136,
"step": 186
},
{
"epoch": 0.1245939868410094,
"learning_rate": 4.986872195432757e-05,
"loss": 4.5594,
"step": 187
},
{
"epoch": 0.12526026484550679,
"learning_rate": 4.986594614553346e-05,
"loss": 4.5294,
"step": 188
},
{
"epoch": 0.12592654285000415,
"learning_rate": 4.9863141375290726e-05,
"loss": 4.5162,
"step": 189
},
{
"epoch": 0.12659282085450155,
"learning_rate": 4.986030764686609e-05,
"loss": 4.5408,
"step": 190
},
{
"epoch": 0.12725909885899891,
"learning_rate": 4.985744496356002e-05,
"loss": 4.5451,
"step": 191
},
{
"epoch": 0.12792537686349628,
"learning_rate": 4.9854553328706667e-05,
"loss": 4.5246,
"step": 192
},
{
"epoch": 0.12859165486799368,
"learning_rate": 4.985163274567394e-05,
"loss": 4.549,
"step": 193
},
{
"epoch": 0.12925793287249104,
"learning_rate": 4.984868321786345e-05,
"loss": 4.5119,
"step": 194
},
{
"epoch": 0.1299242108769884,
"learning_rate": 4.984570474871053e-05,
"loss": 4.5146,
"step": 195
},
{
"epoch": 0.1305904888814858,
"learning_rate": 4.9842697341684195e-05,
"loss": 4.4813,
"step": 196
},
{
"epoch": 0.13125676688598317,
"learning_rate": 4.983966100028721e-05,
"loss": 4.4736,
"step": 197
},
{
"epoch": 0.13192304489048054,
"learning_rate": 4.983659572805598e-05,
"loss": 4.5111,
"step": 198
},
{
"epoch": 0.13258932289497793,
"learning_rate": 4.9833501528560654e-05,
"loss": 4.5274,
"step": 199
},
{
"epoch": 0.1332556008994753,
"learning_rate": 4.9830378405405056e-05,
"loss": 4.5251,
"step": 200
},
{
"epoch": 0.1339218789039727,
"learning_rate": 4.9827226362226695e-05,
"loss": 4.4726,
"step": 201
},
{
"epoch": 0.13458815690847006,
"learning_rate": 4.982404540269677e-05,
"loss": 4.5376,
"step": 202
},
{
"epoch": 0.13525443491296743,
"learning_rate": 4.982083553052015e-05,
"loss": 4.5167,
"step": 203
},
{
"epoch": 0.13592071291746483,
"learning_rate": 4.981759674943538e-05,
"loss": 4.5353,
"step": 204
},
{
"epoch": 0.1365869909219622,
"learning_rate": 4.981432906321469e-05,
"loss": 4.524,
"step": 205
},
{
"epoch": 0.13725326892645956,
"learning_rate": 4.981103247566396e-05,
"loss": 4.5275,
"step": 206
},
{
"epoch": 0.13791954693095695,
"learning_rate": 4.980770699062273e-05,
"loss": 4.5068,
"step": 207
},
{
"epoch": 0.13858582493545432,
"learning_rate": 4.980435261196421e-05,
"loss": 4.4791,
"step": 208
},
{
"epoch": 0.1392521029399517,
"learning_rate": 4.980096934359526e-05,
"loss": 4.5073,
"step": 209
},
{
"epoch": 0.13991838094444908,
"learning_rate": 4.9797557189456376e-05,
"loss": 4.5383,
"step": 210
},
{
"epoch": 0.14058465894894645,
"learning_rate": 4.9794116153521695e-05,
"loss": 4.5615,
"step": 211
},
{
"epoch": 0.14125093695344382,
"learning_rate": 4.9790646239799035e-05,
"loss": 4.4864,
"step": 212
},
{
"epoch": 0.1419172149579412,
"learning_rate": 4.978714745232979e-05,
"loss": 4.4746,
"step": 213
},
{
"epoch": 0.14258349296243858,
"learning_rate": 4.978361979518901e-05,
"loss": 4.5139,
"step": 214
},
{
"epoch": 0.14324977096693595,
"learning_rate": 4.978006327248537e-05,
"loss": 4.4864,
"step": 215
},
{
"epoch": 0.14391604897143334,
"learning_rate": 4.977647788836117e-05,
"loss": 4.4936,
"step": 216
},
{
"epoch": 0.1445823269759307,
"learning_rate": 4.977286364699232e-05,
"loss": 4.497,
"step": 217
},
{
"epoch": 0.14524860498042808,
"learning_rate": 4.976922055258833e-05,
"loss": 4.4785,
"step": 218
},
{
"epoch": 0.14591488298492547,
"learning_rate": 4.976554860939233e-05,
"loss": 4.4677,
"step": 219
},
{
"epoch": 0.14658116098942284,
"learning_rate": 4.9761847821681045e-05,
"loss": 4.4901,
"step": 220
},
{
"epoch": 0.1472474389939202,
"learning_rate": 4.9758118193764794e-05,
"loss": 4.5107,
"step": 221
},
{
"epoch": 0.1479137169984176,
"learning_rate": 4.9754359729987475e-05,
"loss": 4.5,
"step": 222
},
{
"epoch": 0.14857999500291497,
"learning_rate": 4.9750572434726603e-05,
"loss": 4.4358,
"step": 223
},
{
"epoch": 0.14924627300741233,
"learning_rate": 4.974675631239324e-05,
"loss": 4.4847,
"step": 224
},
{
"epoch": 0.14991255101190973,
"learning_rate": 4.974291136743204e-05,
"loss": 4.5072,
"step": 225
},
{
"epoch": 0.1505788290164071,
"learning_rate": 4.973903760432123e-05,
"loss": 4.4339,
"step": 226
},
{
"epoch": 0.15124510702090446,
"learning_rate": 4.9735135027572576e-05,
"loss": 4.4287,
"step": 227
},
{
"epoch": 0.15191138502540186,
"learning_rate": 4.973120364173144e-05,
"loss": 4.4386,
"step": 228
},
{
"epoch": 0.15257766302989922,
"learning_rate": 4.972724345137671e-05,
"loss": 4.5081,
"step": 229
},
{
"epoch": 0.1532439410343966,
"learning_rate": 4.9723254461120826e-05,
"loss": 4.4975,
"step": 230
},
{
"epoch": 0.153910219038894,
"learning_rate": 4.97192366756098e-05,
"loss": 4.4634,
"step": 231
},
{
"epoch": 0.15457649704339135,
"learning_rate": 4.9715190099523146e-05,
"loss": 4.4864,
"step": 232
},
{
"epoch": 0.15524277504788872,
"learning_rate": 4.971111473757392e-05,
"loss": 4.5022,
"step": 233
},
{
"epoch": 0.15590905305238612,
"learning_rate": 4.970701059450872e-05,
"loss": 4.4954,
"step": 234
},
{
"epoch": 0.15657533105688348,
"learning_rate": 4.970287767510764e-05,
"loss": 4.4835,
"step": 235
},
{
"epoch": 0.15724160906138085,
"learning_rate": 4.9698715984184326e-05,
"loss": 4.4899,
"step": 236
},
{
"epoch": 0.15790788706587824,
"learning_rate": 4.96945255265859e-05,
"loss": 4.5067,
"step": 237
},
{
"epoch": 0.1585741650703756,
"learning_rate": 4.9690306307192996e-05,
"loss": 4.4808,
"step": 238
},
{
"epoch": 0.15924044307487298,
"learning_rate": 4.9686058330919764e-05,
"loss": 4.4594,
"step": 239
},
{
"epoch": 0.15990672107937037,
"learning_rate": 4.9681781602713826e-05,
"loss": 4.4624,
"step": 240
},
{
"epoch": 0.16057299908386774,
"learning_rate": 4.967747612755632e-05,
"loss": 4.4525,
"step": 241
},
{
"epoch": 0.1612392770883651,
"learning_rate": 4.9673141910461826e-05,
"loss": 4.441,
"step": 242
},
{
"epoch": 0.1619055550928625,
"learning_rate": 4.966877895647843e-05,
"loss": 4.426,
"step": 243
},
{
"epoch": 0.16257183309735987,
"learning_rate": 4.966438727068767e-05,
"loss": 4.4253,
"step": 244
},
{
"epoch": 0.16323811110185724,
"learning_rate": 4.9659966858204576e-05,
"loss": 4.4875,
"step": 245
},
{
"epoch": 0.16390438910635463,
"learning_rate": 4.96555177241776e-05,
"loss": 4.4981,
"step": 246
},
{
"epoch": 0.164570667110852,
"learning_rate": 4.965103987378866e-05,
"loss": 4.4453,
"step": 247
},
{
"epoch": 0.1652369451153494,
"learning_rate": 4.964653331225314e-05,
"loss": 4.453,
"step": 248
},
{
"epoch": 0.16590322311984676,
"learning_rate": 4.964199804481984e-05,
"loss": 4.4701,
"step": 249
},
{
"epoch": 0.16656950112434413,
"learning_rate": 4.9637434076770994e-05,
"loss": 4.4298,
"step": 250
},
{
"epoch": 0.16723577912884152,
"learning_rate": 4.9632841413422276e-05,
"loss": 4.4781,
"step": 251
},
{
"epoch": 0.1679020571333389,
"learning_rate": 4.962822006012278e-05,
"loss": 4.4735,
"step": 252
},
{
"epoch": 0.16856833513783626,
"learning_rate": 4.962357002225499e-05,
"loss": 4.4732,
"step": 253
},
{
"epoch": 0.16923461314233365,
"learning_rate": 4.961889130523485e-05,
"loss": 4.4595,
"step": 254
},
{
"epoch": 0.16990089114683102,
"learning_rate": 4.961418391451166e-05,
"loss": 4.4666,
"step": 255
},
{
"epoch": 0.17056716915132838,
"learning_rate": 4.960944785556814e-05,
"loss": 4.4482,
"step": 256
},
{
"epoch": 0.17123344715582578,
"learning_rate": 4.960468313392039e-05,
"loss": 4.4532,
"step": 257
},
{
"epoch": 0.17189972516032315,
"learning_rate": 4.9599889755117895e-05,
"loss": 4.4472,
"step": 258
},
{
"epoch": 0.17256600316482051,
"learning_rate": 4.959506772474352e-05,
"loss": 4.4018,
"step": 259
},
{
"epoch": 0.1732322811693179,
"learning_rate": 4.95902170484135e-05,
"loss": 4.4424,
"step": 260
},
{
"epoch": 0.17389855917381528,
"learning_rate": 4.9585337731777434e-05,
"loss": 4.4756,
"step": 261
},
{
"epoch": 0.17456483717831264,
"learning_rate": 4.958042978051829e-05,
"loss": 4.3812,
"step": 262
},
{
"epoch": 0.17523111518281004,
"learning_rate": 4.957549320035235e-05,
"loss": 4.4625,
"step": 263
},
{
"epoch": 0.1758973931873074,
"learning_rate": 4.957052799702928e-05,
"loss": 4.4497,
"step": 264
},
{
"epoch": 0.17656367119180477,
"learning_rate": 4.956553417633207e-05,
"loss": 4.4164,
"step": 265
},
{
"epoch": 0.17722994919630217,
"learning_rate": 4.956051174407703e-05,
"loss": 4.373,
"step": 266
},
{
"epoch": 0.17789622720079953,
"learning_rate": 4.955546070611381e-05,
"loss": 4.3998,
"step": 267
},
{
"epoch": 0.1785625052052969,
"learning_rate": 4.955038106832537e-05,
"loss": 4.4354,
"step": 268
},
{
"epoch": 0.1792287832097943,
"learning_rate": 4.9545272836627986e-05,
"loss": 4.4223,
"step": 269
},
{
"epoch": 0.17989506121429166,
"learning_rate": 4.9540136016971215e-05,
"loss": 4.4381,
"step": 270
},
{
"epoch": 0.18056133921878903,
"learning_rate": 4.953497061533795e-05,
"loss": 4.4095,
"step": 271
},
{
"epoch": 0.18122761722328642,
"learning_rate": 4.952977663774434e-05,
"loss": 4.4331,
"step": 272
},
{
"epoch": 0.1818938952277838,
"learning_rate": 4.952455409023982e-05,
"loss": 4.489,
"step": 273
},
{
"epoch": 0.18256017323228116,
"learning_rate": 4.9519302978907125e-05,
"loss": 4.4181,
"step": 274
},
{
"epoch": 0.18322645123677855,
"learning_rate": 4.951402330986222e-05,
"loss": 4.4823,
"step": 275
},
{
"epoch": 0.18389272924127592,
"learning_rate": 4.950871508925437e-05,
"loss": 4.4344,
"step": 276
},
{
"epoch": 0.1845590072457733,
"learning_rate": 4.9503378323266076e-05,
"loss": 4.4195,
"step": 277
},
{
"epoch": 0.18522528525027068,
"learning_rate": 4.9498013018113076e-05,
"loss": 4.44,
"step": 278
},
{
"epoch": 0.18589156325476805,
"learning_rate": 4.949261918004437e-05,
"loss": 4.4229,
"step": 279
},
{
"epoch": 0.18655784125926542,
"learning_rate": 4.948719681534218e-05,
"loss": 4.422,
"step": 280
},
{
"epoch": 0.1872241192637628,
"learning_rate": 4.9481745930321935e-05,
"loss": 4.4572,
"step": 281
},
{
"epoch": 0.18789039726826018,
"learning_rate": 4.94762665313323e-05,
"loss": 4.445,
"step": 282
},
{
"epoch": 0.18855667527275755,
"learning_rate": 4.947075862475518e-05,
"loss": 4.4102,
"step": 283
},
{
"epoch": 0.18922295327725494,
"learning_rate": 4.946522221700562e-05,
"loss": 4.39,
"step": 284
},
{
"epoch": 0.1898892312817523,
"learning_rate": 4.94596573145319e-05,
"loss": 4.3899,
"step": 285
},
{
"epoch": 0.19055550928624967,
"learning_rate": 4.9454063923815485e-05,
"loss": 4.4474,
"step": 286
},
{
"epoch": 0.19122178729074707,
"learning_rate": 4.944844205137101e-05,
"loss": 4.4055,
"step": 287
},
{
"epoch": 0.19188806529524444,
"learning_rate": 4.94427917037463e-05,
"loss": 4.4216,
"step": 288
},
{
"epoch": 0.1925543432997418,
"learning_rate": 4.9437112887522316e-05,
"loss": 4.4892,
"step": 289
},
{
"epoch": 0.1932206213042392,
"learning_rate": 4.943140560931321e-05,
"loss": 4.4498,
"step": 290
},
{
"epoch": 0.19388689930873657,
"learning_rate": 4.942566987576625e-05,
"loss": 4.3976,
"step": 291
},
{
"epoch": 0.19455317731323396,
"learning_rate": 4.941990569356187e-05,
"loss": 4.3917,
"step": 292
},
{
"epoch": 0.19521945531773133,
"learning_rate": 4.9414113069413646e-05,
"loss": 4.4213,
"step": 293
},
{
"epoch": 0.1958857333222287,
"learning_rate": 4.9408292010068244e-05,
"loss": 4.4582,
"step": 294
},
{
"epoch": 0.1965520113267261,
"learning_rate": 4.9402442522305494e-05,
"loss": 4.3994,
"step": 295
},
{
"epoch": 0.19721828933122346,
"learning_rate": 4.939656461293829e-05,
"loss": 4.4307,
"step": 296
},
{
"epoch": 0.19788456733572082,
"learning_rate": 4.9390658288812675e-05,
"loss": 4.3798,
"step": 297
},
{
"epoch": 0.19855084534021822,
"learning_rate": 4.9384723556807744e-05,
"loss": 4.4153,
"step": 298
},
{
"epoch": 0.19921712334471559,
"learning_rate": 4.937876042383571e-05,
"loss": 4.3923,
"step": 299
},
{
"epoch": 0.19988340134921295,
"learning_rate": 4.937276889684185e-05,
"loss": 4.3945,
"step": 300
},
{
"epoch": 0.20054967935371035,
"learning_rate": 4.9366748982804516e-05,
"loss": 4.3906,
"step": 301
},
{
"epoch": 0.20121595735820771,
"learning_rate": 4.9360700688735124e-05,
"loss": 4.4157,
"step": 302
},
{
"epoch": 0.20188223536270508,
"learning_rate": 4.935462402167814e-05,
"loss": 4.4023,
"step": 303
},
{
"epoch": 0.20254851336720248,
"learning_rate": 4.9348518988711066e-05,
"loss": 4.3835,
"step": 304
},
{
"epoch": 0.20321479137169984,
"learning_rate": 4.934238559694448e-05,
"loss": 4.4232,
"step": 305
},
{
"epoch": 0.2038810693761972,
"learning_rate": 4.933622385352194e-05,
"loss": 4.394,
"step": 306
},
{
"epoch": 0.2045473473806946,
"learning_rate": 4.933003376562006e-05,
"loss": 4.4142,
"step": 307
},
{
"epoch": 0.20521362538519197,
"learning_rate": 4.932381534044845e-05,
"loss": 4.414,
"step": 308
},
{
"epoch": 0.20587990338968934,
"learning_rate": 4.931756858524973e-05,
"loss": 4.3999,
"step": 309
},
{
"epoch": 0.20654618139418673,
"learning_rate": 4.931129350729953e-05,
"loss": 4.469,
"step": 310
},
{
"epoch": 0.2072124593986841,
"learning_rate": 4.930499011390644e-05,
"loss": 4.3687,
"step": 311
},
{
"epoch": 0.20787873740318147,
"learning_rate": 4.9298658412412036e-05,
"loss": 4.3746,
"step": 312
},
{
"epoch": 0.20854501540767886,
"learning_rate": 4.929229841019089e-05,
"loss": 4.3859,
"step": 313
},
{
"epoch": 0.20921129341217623,
"learning_rate": 4.92859101146505e-05,
"loss": 4.407,
"step": 314
},
{
"epoch": 0.2098775714166736,
"learning_rate": 4.9279493533231346e-05,
"loss": 4.356,
"step": 315
},
{
"epoch": 0.210543849421171,
"learning_rate": 4.927304867340684e-05,
"loss": 4.3561,
"step": 316
},
{
"epoch": 0.21121012742566836,
"learning_rate": 4.926657554268333e-05,
"loss": 4.3962,
"step": 317
},
{
"epoch": 0.21187640543016573,
"learning_rate": 4.926007414860009e-05,
"loss": 4.3516,
"step": 318
},
{
"epoch": 0.21254268343466312,
"learning_rate": 4.925354449872932e-05,
"loss": 4.389,
"step": 319
},
{
"epoch": 0.2132089614391605,
"learning_rate": 4.924698660067612e-05,
"loss": 4.3703,
"step": 320
},
{
"epoch": 0.21387523944365786,
"learning_rate": 4.924040046207849e-05,
"loss": 4.3894,
"step": 321
},
{
"epoch": 0.21454151744815525,
"learning_rate": 4.923378609060734e-05,
"loss": 4.3517,
"step": 322
},
{
"epoch": 0.21520779545265262,
"learning_rate": 4.9227143493966446e-05,
"loss": 4.4595,
"step": 323
},
{
"epoch": 0.21587407345714998,
"learning_rate": 4.922047267989246e-05,
"loss": 4.3718,
"step": 324
},
{
"epoch": 0.21654035146164738,
"learning_rate": 4.921377365615489e-05,
"loss": 4.3841,
"step": 325
},
{
"epoch": 0.21720662946614475,
"learning_rate": 4.9207046430556135e-05,
"loss": 4.3538,
"step": 326
},
{
"epoch": 0.2178729074706421,
"learning_rate": 4.92002910109314e-05,
"loss": 4.3913,
"step": 327
},
{
"epoch": 0.2185391854751395,
"learning_rate": 4.919350740514874e-05,
"loss": 4.3916,
"step": 328
},
{
"epoch": 0.21920546347963688,
"learning_rate": 4.918669562110906e-05,
"loss": 4.4403,
"step": 329
},
{
"epoch": 0.21987174148413424,
"learning_rate": 4.9179855666746054e-05,
"loss": 4.366,
"step": 330
},
{
"epoch": 0.22053801948863164,
"learning_rate": 4.917298755002624e-05,
"loss": 4.3779,
"step": 331
},
{
"epoch": 0.221204297493129,
"learning_rate": 4.916609127894895e-05,
"loss": 4.4494,
"step": 332
},
{
"epoch": 0.22187057549762637,
"learning_rate": 4.915916686154626e-05,
"loss": 4.3516,
"step": 333
},
{
"epoch": 0.22253685350212377,
"learning_rate": 4.91522143058831e-05,
"loss": 4.4395,
"step": 334
},
{
"epoch": 0.22320313150662113,
"learning_rate": 4.914523362005711e-05,
"loss": 4.4005,
"step": 335
},
{
"epoch": 0.22386940951111853,
"learning_rate": 4.913822481219873e-05,
"loss": 4.4068,
"step": 336
},
{
"epoch": 0.2245356875156159,
"learning_rate": 4.9131187890471134e-05,
"loss": 4.4002,
"step": 337
},
{
"epoch": 0.22520196552011326,
"learning_rate": 4.9124122863070255e-05,
"loss": 4.388,
"step": 338
},
{
"epoch": 0.22586824352461066,
"learning_rate": 4.911702973822474e-05,
"loss": 4.3542,
"step": 339
},
{
"epoch": 0.22653452152910802,
"learning_rate": 4.910990852419599e-05,
"loss": 4.3643,
"step": 340
},
{
"epoch": 0.2272007995336054,
"learning_rate": 4.910275922927809e-05,
"loss": 4.395,
"step": 341
},
{
"epoch": 0.22786707753810279,
"learning_rate": 4.9095581861797876e-05,
"loss": 4.3709,
"step": 342
},
{
"epoch": 0.22853335554260015,
"learning_rate": 4.9088376430114816e-05,
"loss": 4.3973,
"step": 343
},
{
"epoch": 0.22919963354709752,
"learning_rate": 4.9081142942621125e-05,
"loss": 4.4222,
"step": 344
},
{
"epoch": 0.22986591155159491,
"learning_rate": 4.907388140774165e-05,
"loss": 4.3416,
"step": 345
},
{
"epoch": 0.23053218955609228,
"learning_rate": 4.9066591833933946e-05,
"loss": 4.4016,
"step": 346
},
{
"epoch": 0.23119846756058965,
"learning_rate": 4.905927422968819e-05,
"loss": 4.3485,
"step": 347
},
{
"epoch": 0.23186474556508704,
"learning_rate": 4.905192860352722e-05,
"loss": 4.3736,
"step": 348
},
{
"epoch": 0.2325310235695844,
"learning_rate": 4.9044554964006505e-05,
"loss": 4.3722,
"step": 349
},
{
"epoch": 0.23319730157408178,
"learning_rate": 4.903715331971417e-05,
"loss": 4.311,
"step": 350
},
{
"epoch": 0.23386357957857917,
"learning_rate": 4.902972367927091e-05,
"loss": 4.3522,
"step": 351
},
{
"epoch": 0.23452985758307654,
"learning_rate": 4.9022266051330055e-05,
"loss": 4.3447,
"step": 352
},
{
"epoch": 0.2351961355875739,
"learning_rate": 4.9014780444577544e-05,
"loss": 4.4169,
"step": 353
},
{
"epoch": 0.2358624135920713,
"learning_rate": 4.900726686773187e-05,
"loss": 4.3726,
"step": 354
},
{
"epoch": 0.23652869159656867,
"learning_rate": 4.899972532954413e-05,
"loss": 4.4393,
"step": 355
},
{
"epoch": 0.23719496960106604,
"learning_rate": 4.8992155838797976e-05,
"loss": 4.3376,
"step": 356
},
{
"epoch": 0.23786124760556343,
"learning_rate": 4.898455840430962e-05,
"loss": 4.325,
"step": 357
},
{
"epoch": 0.2385275256100608,
"learning_rate": 4.8976933034927816e-05,
"loss": 4.3556,
"step": 358
},
{
"epoch": 0.23919380361455816,
"learning_rate": 4.8969279739533856e-05,
"loss": 4.3251,
"step": 359
},
{
"epoch": 0.23986008161905556,
"learning_rate": 4.896159852704156e-05,
"loss": 4.3675,
"step": 360
},
{
"epoch": 0.24052635962355293,
"learning_rate": 4.895388940639727e-05,
"loss": 4.396,
"step": 361
},
{
"epoch": 0.2411926376280503,
"learning_rate": 4.894615238657981e-05,
"loss": 4.404,
"step": 362
},
{
"epoch": 0.2418589156325477,
"learning_rate": 4.893838747660052e-05,
"loss": 4.4052,
"step": 363
},
{
"epoch": 0.24252519363704506,
"learning_rate": 4.893059468550321e-05,
"loss": 4.3857,
"step": 364
},
{
"epoch": 0.24319147164154242,
"learning_rate": 4.892277402236417e-05,
"loss": 4.3674,
"step": 365
},
{
"epoch": 0.24385774964603982,
"learning_rate": 4.8914925496292165e-05,
"loss": 4.3845,
"step": 366
},
{
"epoch": 0.24452402765053718,
"learning_rate": 4.890704911642838e-05,
"loss": 4.4042,
"step": 367
},
{
"epoch": 0.24519030565503455,
"learning_rate": 4.889914489194647e-05,
"loss": 4.3262,
"step": 368
},
{
"epoch": 0.24585658365953195,
"learning_rate": 4.8891212832052516e-05,
"loss": 4.3431,
"step": 369
},
{
"epoch": 0.2465228616640293,
"learning_rate": 4.8883252945985e-05,
"loss": 4.3381,
"step": 370
},
{
"epoch": 0.24718913966852668,
"learning_rate": 4.887526524301484e-05,
"loss": 4.3895,
"step": 371
},
{
"epoch": 0.24785541767302408,
"learning_rate": 4.886724973244533e-05,
"loss": 4.3479,
"step": 372
},
{
"epoch": 0.24852169567752144,
"learning_rate": 4.885920642361217e-05,
"loss": 4.3252,
"step": 373
},
{
"epoch": 0.2491879736820188,
"learning_rate": 4.8851135325883424e-05,
"loss": 4.3856,
"step": 374
},
{
"epoch": 0.2498542516865162,
"learning_rate": 4.884303644865953e-05,
"loss": 4.3647,
"step": 375
},
{
"epoch": 0.25052052969101357,
"learning_rate": 4.8834909801373264e-05,
"loss": 4.4015,
"step": 376
},
{
"epoch": 0.25118680769551094,
"learning_rate": 4.8826755393489774e-05,
"loss": 4.3679,
"step": 377
},
{
"epoch": 0.2518530857000083,
"learning_rate": 4.881857323450652e-05,
"loss": 4.3645,
"step": 378
},
{
"epoch": 0.25251936370450573,
"learning_rate": 4.881036333395329e-05,
"loss": 4.3342,
"step": 379
},
{
"epoch": 0.2531856417090031,
"learning_rate": 4.880212570139218e-05,
"loss": 4.4006,
"step": 380
},
{
"epoch": 0.25385191971350046,
"learning_rate": 4.8793860346417574e-05,
"loss": 4.3553,
"step": 381
},
{
"epoch": 0.25451819771799783,
"learning_rate": 4.8785567278656186e-05,
"loss": 4.3129,
"step": 382
},
{
"epoch": 0.2551844757224952,
"learning_rate": 4.877724650776696e-05,
"loss": 4.3494,
"step": 383
},
{
"epoch": 0.25585075372699256,
"learning_rate": 4.8768898043441136e-05,
"loss": 4.4031,
"step": 384
},
{
"epoch": 0.25651703173149,
"learning_rate": 4.876052189540219e-05,
"loss": 4.3717,
"step": 385
},
{
"epoch": 0.25718330973598735,
"learning_rate": 4.875211807340584e-05,
"loss": 4.3652,
"step": 386
},
{
"epoch": 0.2578495877404847,
"learning_rate": 4.874368658724007e-05,
"loss": 4.3544,
"step": 387
},
{
"epoch": 0.2585158657449821,
"learning_rate": 4.873522744672503e-05,
"loss": 4.3551,
"step": 388
},
{
"epoch": 0.25918214374947945,
"learning_rate": 4.8726740661713135e-05,
"loss": 4.3392,
"step": 389
},
{
"epoch": 0.2598484217539768,
"learning_rate": 4.871822624208895e-05,
"loss": 4.3595,
"step": 390
},
{
"epoch": 0.26051469975847424,
"learning_rate": 4.8709684197769266e-05,
"loss": 4.3586,
"step": 391
},
{
"epoch": 0.2611809777629716,
"learning_rate": 4.870111453870302e-05,
"loss": 4.3792,
"step": 392
},
{
"epoch": 0.261847255767469,
"learning_rate": 4.869251727487132e-05,
"loss": 4.3683,
"step": 393
},
{
"epoch": 0.26251353377196635,
"learning_rate": 4.868389241628742e-05,
"loss": 4.3693,
"step": 394
},
{
"epoch": 0.2631798117764637,
"learning_rate": 4.8675239972996746e-05,
"loss": 4.3389,
"step": 395
},
{
"epoch": 0.2638460897809611,
"learning_rate": 4.8666559955076804e-05,
"loss": 4.3907,
"step": 396
},
{
"epoch": 0.2645123677854585,
"learning_rate": 4.8657852372637253e-05,
"loss": 4.3473,
"step": 397
},
{
"epoch": 0.26517864578995587,
"learning_rate": 4.8649117235819835e-05,
"loss": 4.3294,
"step": 398
},
{
"epoch": 0.26584492379445324,
"learning_rate": 4.86403545547984e-05,
"loss": 4.3471,
"step": 399
},
{
"epoch": 0.2665112017989506,
"learning_rate": 4.863156433977884e-05,
"loss": 4.3125,
"step": 400
},
{
"epoch": 0.26717747980344797,
"learning_rate": 4.86227466009992e-05,
"loss": 4.395,
"step": 401
},
{
"epoch": 0.2678437578079454,
"learning_rate": 4.861390134872949e-05,
"loss": 4.336,
"step": 402
},
{
"epoch": 0.26851003581244276,
"learning_rate": 4.860502859327182e-05,
"loss": 4.3663,
"step": 403
},
{
"epoch": 0.2691763138169401,
"learning_rate": 4.8596128344960304e-05,
"loss": 4.3622,
"step": 404
},
{
"epoch": 0.2698425918214375,
"learning_rate": 4.858720061416111e-05,
"loss": 4.3397,
"step": 405
},
{
"epoch": 0.27050886982593486,
"learning_rate": 4.857824541127238e-05,
"loss": 4.3679,
"step": 406
},
{
"epoch": 0.27117514783043223,
"learning_rate": 4.856926274672427e-05,
"loss": 4.3421,
"step": 407
},
{
"epoch": 0.27184142583492965,
"learning_rate": 4.856025263097892e-05,
"loss": 4.3228,
"step": 408
},
{
"epoch": 0.272507703839427,
"learning_rate": 4.855121507453045e-05,
"loss": 4.2952,
"step": 409
},
{
"epoch": 0.2731739818439244,
"learning_rate": 4.854215008790492e-05,
"loss": 4.3737,
"step": 410
},
{
"epoch": 0.27384025984842175,
"learning_rate": 4.8533057681660356e-05,
"loss": 4.3474,
"step": 411
},
{
"epoch": 0.2745065378529191,
"learning_rate": 4.85239378663867e-05,
"loss": 4.3744,
"step": 412
},
{
"epoch": 0.2751728158574165,
"learning_rate": 4.8514790652705847e-05,
"loss": 4.337,
"step": 413
},
{
"epoch": 0.2758390938619139,
"learning_rate": 4.8505616051271584e-05,
"loss": 4.3603,
"step": 414
},
{
"epoch": 0.2765053718664113,
"learning_rate": 4.8496414072769594e-05,
"loss": 4.3149,
"step": 415
},
{
"epoch": 0.27717164987090864,
"learning_rate": 4.848718472791746e-05,
"loss": 4.2901,
"step": 416
},
{
"epoch": 0.277837927875406,
"learning_rate": 4.847792802746461e-05,
"loss": 4.3632,
"step": 417
},
{
"epoch": 0.2785042058799034,
"learning_rate": 4.846864398219237e-05,
"loss": 4.3358,
"step": 418
},
{
"epoch": 0.27917048388440074,
"learning_rate": 4.8459332602913904e-05,
"loss": 4.352,
"step": 419
},
{
"epoch": 0.27983676188889817,
"learning_rate": 4.8449993900474187e-05,
"loss": 4.3812,
"step": 420
},
{
"epoch": 0.28050303989339553,
"learning_rate": 4.844062788575005e-05,
"loss": 4.3054,
"step": 421
},
{
"epoch": 0.2811693178978929,
"learning_rate": 4.843123456965012e-05,
"loss": 4.3336,
"step": 422
},
{
"epoch": 0.28183559590239027,
"learning_rate": 4.8421813963114815e-05,
"loss": 4.3006,
"step": 423
},
{
"epoch": 0.28250187390688764,
"learning_rate": 4.8412366077116344e-05,
"loss": 4.3658,
"step": 424
},
{
"epoch": 0.283168151911385,
"learning_rate": 4.840289092265871e-05,
"loss": 4.2924,
"step": 425
},
{
"epoch": 0.2838344299158824,
"learning_rate": 4.8393388510777635e-05,
"loss": 4.3534,
"step": 426
},
{
"epoch": 0.2845007079203798,
"learning_rate": 4.838385885254062e-05,
"loss": 4.3209,
"step": 427
},
{
"epoch": 0.28516698592487716,
"learning_rate": 4.8374301959046886e-05,
"loss": 4.2978,
"step": 428
},
{
"epoch": 0.2858332639293745,
"learning_rate": 4.836471784142738e-05,
"loss": 4.3677,
"step": 429
},
{
"epoch": 0.2864995419338719,
"learning_rate": 4.835510651084475e-05,
"loss": 4.3094,
"step": 430
},
{
"epoch": 0.28716581993836926,
"learning_rate": 4.834546797849335e-05,
"loss": 4.2839,
"step": 431
},
{
"epoch": 0.2878320979428667,
"learning_rate": 4.8335802255599217e-05,
"loss": 4.3324,
"step": 432
},
{
"epoch": 0.28849837594736405,
"learning_rate": 4.832610935342003e-05,
"loss": 4.3461,
"step": 433
},
{
"epoch": 0.2891646539518614,
"learning_rate": 4.8316389283245155e-05,
"loss": 4.2987,
"step": 434
},
{
"epoch": 0.2898309319563588,
"learning_rate": 4.830664205639559e-05,
"loss": 4.2962,
"step": 435
},
{
"epoch": 0.29049720996085615,
"learning_rate": 4.829686768422397e-05,
"loss": 4.3337,
"step": 436
},
{
"epoch": 0.2911634879653535,
"learning_rate": 4.828706617811452e-05,
"loss": 4.3322,
"step": 437
},
{
"epoch": 0.29182976596985094,
"learning_rate": 4.82772375494831e-05,
"loss": 4.293,
"step": 438
},
{
"epoch": 0.2924960439743483,
"learning_rate": 4.826738180977714e-05,
"loss": 4.2618,
"step": 439
},
{
"epoch": 0.2931623219788457,
"learning_rate": 4.825749897047565e-05,
"loss": 4.2737,
"step": 440
},
{
"epoch": 0.29382859998334304,
"learning_rate": 4.8247589043089215e-05,
"loss": 4.3273,
"step": 441
},
{
"epoch": 0.2944948779878404,
"learning_rate": 4.823765203915995e-05,
"loss": 4.273,
"step": 442
},
{
"epoch": 0.2951611559923378,
"learning_rate": 4.822768797026151e-05,
"loss": 4.3042,
"step": 443
},
{
"epoch": 0.2958274339968352,
"learning_rate": 4.821769684799911e-05,
"loss": 4.367,
"step": 444
},
{
"epoch": 0.29649371200133257,
"learning_rate": 4.8207678684009404e-05,
"loss": 4.3187,
"step": 445
},
{
"epoch": 0.29715999000582993,
"learning_rate": 4.81976334899606e-05,
"loss": 4.3225,
"step": 446
},
{
"epoch": 0.2978262680103273,
"learning_rate": 4.8187561277552374e-05,
"loss": 4.3644,
"step": 447
},
{
"epoch": 0.29849254601482467,
"learning_rate": 4.817746205851584e-05,
"loss": 4.3395,
"step": 448
},
{
"epoch": 0.2991588240193221,
"learning_rate": 4.816733584461362e-05,
"loss": 4.3006,
"step": 449
},
{
"epoch": 0.29982510202381946,
"learning_rate": 4.815718264763973e-05,
"loss": 4.3122,
"step": 450
},
{
"epoch": 0.3004913800283168,
"learning_rate": 4.814700247941964e-05,
"loss": 4.3761,
"step": 451
},
{
"epoch": 0.3011576580328142,
"learning_rate": 4.813679535181022e-05,
"loss": 4.3505,
"step": 452
},
{
"epoch": 0.30182393603731156,
"learning_rate": 4.812656127669975e-05,
"loss": 4.3503,
"step": 453
},
{
"epoch": 0.3024902140418089,
"learning_rate": 4.8116300266007884e-05,
"loss": 4.294,
"step": 454
},
{
"epoch": 0.30315649204630635,
"learning_rate": 4.810601233168567e-05,
"loss": 4.2903,
"step": 455
},
{
"epoch": 0.3038227700508037,
"learning_rate": 4.809569748571547e-05,
"loss": 4.3025,
"step": 456
},
{
"epoch": 0.3044890480553011,
"learning_rate": 4.8085355740111046e-05,
"loss": 4.3524,
"step": 457
},
{
"epoch": 0.30515532605979845,
"learning_rate": 4.807498710691746e-05,
"loss": 4.3089,
"step": 458
},
{
"epoch": 0.3058216040642958,
"learning_rate": 4.806459159821107e-05,
"loss": 4.3293,
"step": 459
},
{
"epoch": 0.3064878820687932,
"learning_rate": 4.805416922609959e-05,
"loss": 4.32,
"step": 460
},
{
"epoch": 0.3071541600732906,
"learning_rate": 4.804372000272196e-05,
"loss": 4.3316,
"step": 461
},
{
"epoch": 0.307820438077788,
"learning_rate": 4.803324394024845e-05,
"loss": 4.3145,
"step": 462
},
{
"epoch": 0.30848671608228534,
"learning_rate": 4.8022741050880546e-05,
"loss": 4.3416,
"step": 463
},
{
"epoch": 0.3091529940867827,
"learning_rate": 4.8012211346851024e-05,
"loss": 4.2449,
"step": 464
},
{
"epoch": 0.3098192720912801,
"learning_rate": 4.8001654840423826e-05,
"loss": 4.3183,
"step": 465
},
{
"epoch": 0.31048555009577744,
"learning_rate": 4.799107154389418e-05,
"loss": 4.3192,
"step": 466
},
{
"epoch": 0.31115182810027486,
"learning_rate": 4.798046146958848e-05,
"loss": 4.3324,
"step": 467
},
{
"epoch": 0.31181810610477223,
"learning_rate": 4.79698246298643e-05,
"loss": 4.3416,
"step": 468
},
{
"epoch": 0.3124843841092696,
"learning_rate": 4.795916103711042e-05,
"loss": 4.3037,
"step": 469
},
{
"epoch": 0.31315066211376696,
"learning_rate": 4.794847070374675e-05,
"loss": 4.3129,
"step": 470
},
{
"epoch": 0.31381694011826433,
"learning_rate": 4.793775364222436e-05,
"loss": 4.3191,
"step": 471
},
{
"epoch": 0.3144832181227617,
"learning_rate": 4.792700986502544e-05,
"loss": 4.3342,
"step": 472
},
{
"epoch": 0.3151494961272591,
"learning_rate": 4.791623938466332e-05,
"loss": 4.3435,
"step": 473
},
{
"epoch": 0.3158157741317565,
"learning_rate": 4.7905442213682396e-05,
"loss": 4.2463,
"step": 474
},
{
"epoch": 0.31648205213625386,
"learning_rate": 4.789461836465817e-05,
"loss": 4.2915,
"step": 475
},
{
"epoch": 0.3171483301407512,
"learning_rate": 4.7883767850197225e-05,
"loss": 4.3703,
"step": 476
},
{
"epoch": 0.3178146081452486,
"learning_rate": 4.787289068293718e-05,
"loss": 4.3181,
"step": 477
},
{
"epoch": 0.31848088614974596,
"learning_rate": 4.786198687554672e-05,
"loss": 4.24,
"step": 478
},
{
"epoch": 0.3191471641542434,
"learning_rate": 4.7851056440725526e-05,
"loss": 4.2733,
"step": 479
},
{
"epoch": 0.31981344215874075,
"learning_rate": 4.784009939120433e-05,
"loss": 4.3235,
"step": 480
},
{
"epoch": 0.3204797201632381,
"learning_rate": 4.782911573974482e-05,
"loss": 4.2917,
"step": 481
},
{
"epoch": 0.3211459981677355,
"learning_rate": 4.781810549913972e-05,
"loss": 4.3617,
"step": 482
},
{
"epoch": 0.32181227617223285,
"learning_rate": 4.780706868221267e-05,
"loss": 4.3159,
"step": 483
},
{
"epoch": 0.3224785541767302,
"learning_rate": 4.7796005301818305e-05,
"loss": 4.3571,
"step": 484
},
{
"epoch": 0.32314483218122764,
"learning_rate": 4.778491537084216e-05,
"loss": 4.3384,
"step": 485
},
{
"epoch": 0.323811110185725,
"learning_rate": 4.777379890220074e-05,
"loss": 4.3371,
"step": 486
},
{
"epoch": 0.32447738819022237,
"learning_rate": 4.77626559088414e-05,
"loss": 4.318,
"step": 487
},
{
"epoch": 0.32514366619471974,
"learning_rate": 4.775148640374245e-05,
"loss": 4.2472,
"step": 488
},
{
"epoch": 0.3258099441992171,
"learning_rate": 4.7740290399913033e-05,
"loss": 4.301,
"step": 489
},
{
"epoch": 0.3264762222037145,
"learning_rate": 4.772906791039317e-05,
"loss": 4.3924,
"step": 490
},
{
"epoch": 0.3271425002082119,
"learning_rate": 4.771781894825374e-05,
"loss": 4.3328,
"step": 491
},
{
"epoch": 0.32780877821270926,
"learning_rate": 4.7706543526596445e-05,
"loss": 4.3356,
"step": 492
},
{
"epoch": 0.32847505621720663,
"learning_rate": 4.7695241658553794e-05,
"loss": 4.2828,
"step": 493
},
{
"epoch": 0.329141334221704,
"learning_rate": 4.7683913357289124e-05,
"loss": 4.3,
"step": 494
},
{
"epoch": 0.32980761222620136,
"learning_rate": 4.767255863599653e-05,
"loss": 4.3164,
"step": 495
},
{
"epoch": 0.3304738902306988,
"learning_rate": 4.766117750790091e-05,
"loss": 4.3029,
"step": 496
},
{
"epoch": 0.33114016823519615,
"learning_rate": 4.764976998625789e-05,
"loss": 4.3285,
"step": 497
},
{
"epoch": 0.3318064462396935,
"learning_rate": 4.763833608435385e-05,
"loss": 4.2908,
"step": 498
},
{
"epoch": 0.3324727242441909,
"learning_rate": 4.7626875815505894e-05,
"loss": 4.2819,
"step": 499
},
{
"epoch": 0.33313900224868825,
"learning_rate": 4.761538919306183e-05,
"loss": 4.3024,
"step": 500
},
{
"epoch": 0.3338052802531856,
"learning_rate": 4.7603876230400166e-05,
"loss": 4.2631,
"step": 501
},
{
"epoch": 0.33447155825768304,
"learning_rate": 4.75923369409301e-05,
"loss": 4.2957,
"step": 502
},
{
"epoch": 0.3351378362621804,
"learning_rate": 4.7580771338091464e-05,
"loss": 4.3331,
"step": 503
},
{
"epoch": 0.3358041142666778,
"learning_rate": 4.7569179435354756e-05,
"loss": 4.338,
"step": 504
},
{
"epoch": 0.33647039227117514,
"learning_rate": 4.755756124622111e-05,
"loss": 4.3519,
"step": 505
},
{
"epoch": 0.3371366702756725,
"learning_rate": 4.7545916784222254e-05,
"loss": 4.2666,
"step": 506
},
{
"epoch": 0.3378029482801699,
"learning_rate": 4.753424606292055e-05,
"loss": 4.3137,
"step": 507
},
{
"epoch": 0.3384692262846673,
"learning_rate": 4.752254909590892e-05,
"loss": 4.265,
"step": 508
},
{
"epoch": 0.33913550428916467,
"learning_rate": 4.7510825896810845e-05,
"loss": 4.3073,
"step": 509
},
{
"epoch": 0.33980178229366204,
"learning_rate": 4.749907647928039e-05,
"loss": 4.2592,
"step": 510
},
{
"epoch": 0.3404680602981594,
"learning_rate": 4.7487300857002134e-05,
"loss": 4.2814,
"step": 511
},
{
"epoch": 0.34113433830265677,
"learning_rate": 4.7475499043691186e-05,
"loss": 4.2944,
"step": 512
},
{
"epoch": 0.34180061630715414,
"learning_rate": 4.746367105309315e-05,
"loss": 4.2777,
"step": 513
},
{
"epoch": 0.34246689431165156,
"learning_rate": 4.7451816898984137e-05,
"loss": 4.3257,
"step": 514
},
{
"epoch": 0.3431331723161489,
"learning_rate": 4.74399365951707e-05,
"loss": 4.3083,
"step": 515
},
{
"epoch": 0.3437994503206463,
"learning_rate": 4.7428030155489875e-05,
"loss": 4.2947,
"step": 516
},
{
"epoch": 0.34446572832514366,
"learning_rate": 4.741609759380915e-05,
"loss": 4.2583,
"step": 517
},
{
"epoch": 0.34513200632964103,
"learning_rate": 4.740413892402639e-05,
"loss": 4.3088,
"step": 518
},
{
"epoch": 0.3457982843341384,
"learning_rate": 4.739215416006992e-05,
"loss": 4.2661,
"step": 519
},
{
"epoch": 0.3464645623386358,
"learning_rate": 4.738014331589842e-05,
"loss": 4.3365,
"step": 520
},
{
"epoch": 0.3471308403431332,
"learning_rate": 4.736810640550096e-05,
"loss": 4.2673,
"step": 521
},
{
"epoch": 0.34779711834763055,
"learning_rate": 4.735604344289697e-05,
"loss": 4.2513,
"step": 522
},
{
"epoch": 0.3484633963521279,
"learning_rate": 4.734395444213622e-05,
"loss": 4.299,
"step": 523
},
{
"epoch": 0.3491296743566253,
"learning_rate": 4.7331839417298825e-05,
"loss": 4.3144,
"step": 524
},
{
"epoch": 0.34979595236112265,
"learning_rate": 4.731969838249517e-05,
"loss": 4.2797,
"step": 525
},
{
"epoch": 0.3504622303656201,
"learning_rate": 4.7307531351865976e-05,
"loss": 4.3216,
"step": 526
},
{
"epoch": 0.35112850837011744,
"learning_rate": 4.7295338339582215e-05,
"loss": 4.323,
"step": 527
},
{
"epoch": 0.3517947863746148,
"learning_rate": 4.728311935984513e-05,
"loss": 4.2798,
"step": 528
},
{
"epoch": 0.3524610643791122,
"learning_rate": 4.7270874426886205e-05,
"loss": 4.3008,
"step": 529
},
{
"epoch": 0.35312734238360954,
"learning_rate": 4.7258603554967154e-05,
"loss": 4.3164,
"step": 530
},
{
"epoch": 0.3537936203881069,
"learning_rate": 4.72463067583799e-05,
"loss": 4.3061,
"step": 531
},
{
"epoch": 0.35445989839260433,
"learning_rate": 4.7233984051446564e-05,
"loss": 4.2878,
"step": 532
},
{
"epoch": 0.3551261763971017,
"learning_rate": 4.722163544851945e-05,
"loss": 4.2692,
"step": 533
},
{
"epoch": 0.35579245440159907,
"learning_rate": 4.7209260963981003e-05,
"loss": 4.2621,
"step": 534
},
{
"epoch": 0.35645873240609643,
"learning_rate": 4.719686061224383e-05,
"loss": 4.3184,
"step": 535
},
{
"epoch": 0.3571250104105938,
"learning_rate": 4.7184434407750664e-05,
"loss": 4.2968,
"step": 536
},
{
"epoch": 0.3577912884150912,
"learning_rate": 4.7171982364974346e-05,
"loss": 4.2835,
"step": 537
},
{
"epoch": 0.3584575664195886,
"learning_rate": 4.7159504498417814e-05,
"loss": 4.2714,
"step": 538
},
{
"epoch": 0.35912384442408596,
"learning_rate": 4.714700082261407e-05,
"loss": 4.2761,
"step": 539
},
{
"epoch": 0.3597901224285833,
"learning_rate": 4.71344713521262e-05,
"loss": 4.2837,
"step": 540
},
{
"epoch": 0.3604564004330807,
"learning_rate": 4.71219161015473e-05,
"loss": 4.2803,
"step": 541
},
{
"epoch": 0.36112267843757806,
"learning_rate": 4.710933508550053e-05,
"loss": 4.3266,
"step": 542
},
{
"epoch": 0.3617889564420755,
"learning_rate": 4.7096728318639025e-05,
"loss": 4.3041,
"step": 543
},
{
"epoch": 0.36245523444657285,
"learning_rate": 4.708409581564594e-05,
"loss": 4.2871,
"step": 544
},
{
"epoch": 0.3631215124510702,
"learning_rate": 4.707143759123439e-05,
"loss": 4.2918,
"step": 545
},
{
"epoch": 0.3637877904555676,
"learning_rate": 4.705875366014745e-05,
"loss": 4.3206,
"step": 546
},
{
"epoch": 0.36445406846006495,
"learning_rate": 4.7046044037158126e-05,
"loss": 4.286,
"step": 547
},
{
"epoch": 0.3651203464645623,
"learning_rate": 4.703330873706937e-05,
"loss": 4.2914,
"step": 548
},
{
"epoch": 0.36578662446905974,
"learning_rate": 4.7020547774714016e-05,
"loss": 4.2762,
"step": 549
},
{
"epoch": 0.3664529024735571,
"learning_rate": 4.700776116495481e-05,
"loss": 4.2879,
"step": 550
},
{
"epoch": 0.3671191804780545,
"learning_rate": 4.699494892268436e-05,
"loss": 4.3085,
"step": 551
},
{
"epoch": 0.36778545848255184,
"learning_rate": 4.698211106282511e-05,
"loss": 4.3023,
"step": 552
},
{
"epoch": 0.3684517364870492,
"learning_rate": 4.696924760032937e-05,
"loss": 4.2972,
"step": 553
},
{
"epoch": 0.3691180144915466,
"learning_rate": 4.6956358550179255e-05,
"loss": 4.3259,
"step": 554
},
{
"epoch": 0.369784292496044,
"learning_rate": 4.694344392738668e-05,
"loss": 4.2715,
"step": 555
},
{
"epoch": 0.37045057050054137,
"learning_rate": 4.693050374699335e-05,
"loss": 4.27,
"step": 556
},
{
"epoch": 0.37111684850503873,
"learning_rate": 4.691753802407074e-05,
"loss": 4.3548,
"step": 557
},
{
"epoch": 0.3717831265095361,
"learning_rate": 4.690454677372007e-05,
"loss": 4.3184,
"step": 558
},
{
"epoch": 0.37244940451403347,
"learning_rate": 4.689153001107228e-05,
"loss": 4.326,
"step": 559
},
{
"epoch": 0.37311568251853083,
"learning_rate": 4.687848775128804e-05,
"loss": 4.2631,
"step": 560
},
{
"epoch": 0.37378196052302826,
"learning_rate": 4.686542000955772e-05,
"loss": 4.3253,
"step": 561
},
{
"epoch": 0.3744482385275256,
"learning_rate": 4.685232680110136e-05,
"loss": 4.2675,
"step": 562
},
{
"epoch": 0.375114516532023,
"learning_rate": 4.683920814116866e-05,
"loss": 4.26,
"step": 563
},
{
"epoch": 0.37578079453652036,
"learning_rate": 4.682606404503896e-05,
"loss": 4.3134,
"step": 564
},
{
"epoch": 0.3764470725410177,
"learning_rate": 4.681289452802125e-05,
"loss": 4.2914,
"step": 565
},
{
"epoch": 0.3771133505455151,
"learning_rate": 4.679969960545409e-05,
"loss": 4.2463,
"step": 566
},
{
"epoch": 0.3777796285500125,
"learning_rate": 4.678647929270565e-05,
"loss": 4.2999,
"step": 567
},
{
"epoch": 0.3784459065545099,
"learning_rate": 4.677323360517369e-05,
"loss": 4.2751,
"step": 568
},
{
"epoch": 0.37911218455900725,
"learning_rate": 4.675996255828549e-05,
"loss": 4.2648,
"step": 569
},
{
"epoch": 0.3797784625635046,
"learning_rate": 4.674666616749789e-05,
"loss": 4.2676,
"step": 570
},
{
"epoch": 0.380444740568002,
"learning_rate": 4.6733344448297224e-05,
"loss": 4.3179,
"step": 571
},
{
"epoch": 0.38111101857249935,
"learning_rate": 4.6719997416199354e-05,
"loss": 4.2805,
"step": 572
},
{
"epoch": 0.38177729657699677,
"learning_rate": 4.670662508674962e-05,
"loss": 4.3175,
"step": 573
},
{
"epoch": 0.38244357458149414,
"learning_rate": 4.66932274755228e-05,
"loss": 4.2639,
"step": 574
},
{
"epoch": 0.3831098525859915,
"learning_rate": 4.6679804598123145e-05,
"loss": 4.2832,
"step": 575
},
{
"epoch": 0.3837761305904889,
"learning_rate": 4.666635647018433e-05,
"loss": 4.2793,
"step": 576
},
{
"epoch": 0.38444240859498624,
"learning_rate": 4.665288310736942e-05,
"loss": 4.2557,
"step": 577
},
{
"epoch": 0.3851086865994836,
"learning_rate": 4.663938452537089e-05,
"loss": 4.2854,
"step": 578
},
{
"epoch": 0.38577496460398103,
"learning_rate": 4.662586073991059e-05,
"loss": 4.3111,
"step": 579
},
{
"epoch": 0.3864412426084784,
"learning_rate": 4.6612311766739703e-05,
"loss": 4.28,
"step": 580
},
{
"epoch": 0.38710752061297576,
"learning_rate": 4.6598737621638774e-05,
"loss": 4.2775,
"step": 581
},
{
"epoch": 0.38777379861747313,
"learning_rate": 4.658513832041765e-05,
"loss": 4.2968,
"step": 582
},
{
"epoch": 0.3884400766219705,
"learning_rate": 4.657151387891548e-05,
"loss": 4.2953,
"step": 583
},
{
"epoch": 0.3891063546264679,
"learning_rate": 4.6557864313000695e-05,
"loss": 4.263,
"step": 584
},
{
"epoch": 0.3897726326309653,
"learning_rate": 4.6544189638570994e-05,
"loss": 4.2777,
"step": 585
},
{
"epoch": 0.39043891063546265,
"learning_rate": 4.653048987155332e-05,
"loss": 4.2276,
"step": 586
},
{
"epoch": 0.39110518863996,
"learning_rate": 4.651676502790381e-05,
"loss": 4.3269,
"step": 587
},
{
"epoch": 0.3917714666444574,
"learning_rate": 4.650301512360787e-05,
"loss": 4.2756,
"step": 588
},
{
"epoch": 0.39243774464895476,
"learning_rate": 4.648924017468003e-05,
"loss": 4.3051,
"step": 589
},
{
"epoch": 0.3931040226534522,
"learning_rate": 4.647544019716403e-05,
"loss": 4.2598,
"step": 590
},
{
"epoch": 0.39377030065794955,
"learning_rate": 4.6461615207132756e-05,
"loss": 4.2566,
"step": 591
},
{
"epoch": 0.3944365786624469,
"learning_rate": 4.6447765220688204e-05,
"loss": 4.2586,
"step": 592
},
{
"epoch": 0.3951028566669443,
"learning_rate": 4.6433890253961506e-05,
"loss": 4.2448,
"step": 593
},
{
"epoch": 0.39576913467144165,
"learning_rate": 4.641999032311288e-05,
"loss": 4.2516,
"step": 594
},
{
"epoch": 0.396435412675939,
"learning_rate": 4.6406065444331624e-05,
"loss": 4.2631,
"step": 595
},
{
"epoch": 0.39710169068043644,
"learning_rate": 4.639211563383609e-05,
"loss": 4.2723,
"step": 596
},
{
"epoch": 0.3977679686849338,
"learning_rate": 4.637814090787365e-05,
"loss": 4.2688,
"step": 597
},
{
"epoch": 0.39843424668943117,
"learning_rate": 4.6364141282720744e-05,
"loss": 4.2832,
"step": 598
},
{
"epoch": 0.39910052469392854,
"learning_rate": 4.635011677468275e-05,
"loss": 4.2598,
"step": 599
},
{
"epoch": 0.3997668026984259,
"learning_rate": 4.633606740009408e-05,
"loss": 4.3138,
"step": 600
},
{
"epoch": 0.40043308070292327,
"learning_rate": 4.632199317531808e-05,
"loss": 4.2201,
"step": 601
},
{
"epoch": 0.4010993587074207,
"learning_rate": 4.630789411674704e-05,
"loss": 4.3189,
"step": 602
},
{
"epoch": 0.40176563671191806,
"learning_rate": 4.6293770240802195e-05,
"loss": 4.2446,
"step": 603
},
{
"epoch": 0.40243191471641543,
"learning_rate": 4.627962156393365e-05,
"loss": 4.2733,
"step": 604
},
{
"epoch": 0.4030981927209128,
"learning_rate": 4.6265448102620424e-05,
"loss": 4.3175,
"step": 605
},
{
"epoch": 0.40376447072541016,
"learning_rate": 4.6251249873370396e-05,
"loss": 4.2339,
"step": 606
},
{
"epoch": 0.40443074872990753,
"learning_rate": 4.623702689272029e-05,
"loss": 4.3064,
"step": 607
},
{
"epoch": 0.40509702673440495,
"learning_rate": 4.622277917723565e-05,
"loss": 4.2997,
"step": 608
},
{
"epoch": 0.4057633047389023,
"learning_rate": 4.620850674351084e-05,
"loss": 4.2887,
"step": 609
},
{
"epoch": 0.4064295827433997,
"learning_rate": 4.619420960816902e-05,
"loss": 4.2593,
"step": 610
},
{
"epoch": 0.40709586074789705,
"learning_rate": 4.61798877878621e-05,
"loss": 4.2811,
"step": 611
},
{
"epoch": 0.4077621387523944,
"learning_rate": 4.6165541299270756e-05,
"loss": 4.2913,
"step": 612
},
{
"epoch": 0.4084284167568918,
"learning_rate": 4.615117015910438e-05,
"loss": 4.3161,
"step": 613
},
{
"epoch": 0.4090946947613892,
"learning_rate": 4.61367743841011e-05,
"loss": 4.2581,
"step": 614
},
{
"epoch": 0.4097609727658866,
"learning_rate": 4.612235399102771e-05,
"loss": 4.243,
"step": 615
},
{
"epoch": 0.41042725077038394,
"learning_rate": 4.61079089966797e-05,
"loss": 4.3219,
"step": 616
},
{
"epoch": 0.4110935287748813,
"learning_rate": 4.609343941788119e-05,
"loss": 4.3108,
"step": 617
},
{
"epoch": 0.4117598067793787,
"learning_rate": 4.6078945271484956e-05,
"loss": 4.2747,
"step": 618
},
{
"epoch": 0.41242608478387605,
"learning_rate": 4.6064426574372374e-05,
"loss": 4.3115,
"step": 619
},
{
"epoch": 0.41309236278837347,
"learning_rate": 4.6049883343453416e-05,
"loss": 4.2493,
"step": 620
},
{
"epoch": 0.41375864079287084,
"learning_rate": 4.603531559566664e-05,
"loss": 4.2897,
"step": 621
},
{
"epoch": 0.4144249187973682,
"learning_rate": 4.602072334797913e-05,
"loss": 4.2619,
"step": 622
},
{
"epoch": 0.41509119680186557,
"learning_rate": 4.600610661738654e-05,
"loss": 4.2506,
"step": 623
},
{
"epoch": 0.41575747480636294,
"learning_rate": 4.599146542091302e-05,
"loss": 4.2382,
"step": 624
},
{
"epoch": 0.41642375281086036,
"learning_rate": 4.597679977561122e-05,
"loss": 4.3003,
"step": 625
},
{
"epoch": 0.4170900308153577,
"learning_rate": 4.5962109698562264e-05,
"loss": 4.2225,
"step": 626
},
{
"epoch": 0.4177563088198551,
"learning_rate": 4.5947395206875735e-05,
"loss": 4.3036,
"step": 627
},
{
"epoch": 0.41842258682435246,
"learning_rate": 4.5932656317689635e-05,
"loss": 4.2312,
"step": 628
},
{
"epoch": 0.4190888648288498,
"learning_rate": 4.591789304817042e-05,
"loss": 4.254,
"step": 629
},
{
"epoch": 0.4197551428333472,
"learning_rate": 4.5903105415512905e-05,
"loss": 4.2782,
"step": 630
},
{
"epoch": 0.4204214208378446,
"learning_rate": 4.588829343694029e-05,
"loss": 4.2447,
"step": 631
},
{
"epoch": 0.421087698842342,
"learning_rate": 4.587345712970415e-05,
"loss": 4.2315,
"step": 632
},
{
"epoch": 0.42175397684683935,
"learning_rate": 4.585859651108437e-05,
"loss": 4.2479,
"step": 633
},
{
"epoch": 0.4224202548513367,
"learning_rate": 4.5843711598389156e-05,
"loss": 4.2562,
"step": 634
},
{
"epoch": 0.4230865328558341,
"learning_rate": 4.582880240895502e-05,
"loss": 4.2834,
"step": 635
},
{
"epoch": 0.42375281086033145,
"learning_rate": 4.581386896014674e-05,
"loss": 4.3125,
"step": 636
},
{
"epoch": 0.4244190888648289,
"learning_rate": 4.579891126935737e-05,
"loss": 4.2427,
"step": 637
},
{
"epoch": 0.42508536686932624,
"learning_rate": 4.5783929354008164e-05,
"loss": 4.2338,
"step": 638
},
{
"epoch": 0.4257516448738236,
"learning_rate": 4.5768923231548604e-05,
"loss": 4.2505,
"step": 639
},
{
"epoch": 0.426417922878321,
"learning_rate": 4.5753892919456386e-05,
"loss": 4.2493,
"step": 640
},
{
"epoch": 0.42708420088281834,
"learning_rate": 4.573883843523735e-05,
"loss": 4.2572,
"step": 641
},
{
"epoch": 0.4277504788873157,
"learning_rate": 4.5723759796425506e-05,
"loss": 4.266,
"step": 642
},
{
"epoch": 0.42841675689181313,
"learning_rate": 4.5708657020583e-05,
"loss": 4.3052,
"step": 643
},
{
"epoch": 0.4290830348963105,
"learning_rate": 4.5693530125300074e-05,
"loss": 4.2828,
"step": 644
},
{
"epoch": 0.42974931290080787,
"learning_rate": 4.5678379128195085e-05,
"loss": 4.3018,
"step": 645
},
{
"epoch": 0.43041559090530523,
"learning_rate": 4.5663204046914435e-05,
"loss": 4.2437,
"step": 646
},
{
"epoch": 0.4310818689098026,
"learning_rate": 4.56480048991326e-05,
"loss": 4.3304,
"step": 647
},
{
"epoch": 0.43174814691429997,
"learning_rate": 4.563278170255208e-05,
"loss": 4.2885,
"step": 648
},
{
"epoch": 0.4324144249187974,
"learning_rate": 4.5617534474903375e-05,
"loss": 4.2569,
"step": 649
},
{
"epoch": 0.43308070292329476,
"learning_rate": 4.560226323394498e-05,
"loss": 4.2464,
"step": 650
},
{
"epoch": 0.4337469809277921,
"learning_rate": 4.558696799746336e-05,
"loss": 4.2618,
"step": 651
},
{
"epoch": 0.4344132589322895,
"learning_rate": 4.557164878327292e-05,
"loss": 4.275,
"step": 652
},
{
"epoch": 0.43507953693678686,
"learning_rate": 4.555630560921602e-05,
"loss": 4.2522,
"step": 653
},
{
"epoch": 0.4357458149412842,
"learning_rate": 4.554093849316289e-05,
"loss": 4.3075,
"step": 654
},
{
"epoch": 0.43641209294578165,
"learning_rate": 4.5525547453011663e-05,
"loss": 4.2954,
"step": 655
},
{
"epoch": 0.437078370950279,
"learning_rate": 4.551013250668833e-05,
"loss": 4.2741,
"step": 656
},
{
"epoch": 0.4377446489547764,
"learning_rate": 4.549469367214673e-05,
"loss": 4.2786,
"step": 657
},
{
"epoch": 0.43841092695927375,
"learning_rate": 4.5479230967368525e-05,
"loss": 4.2383,
"step": 658
},
{
"epoch": 0.4390772049637711,
"learning_rate": 4.546374441036319e-05,
"loss": 4.2614,
"step": 659
},
{
"epoch": 0.4397434829682685,
"learning_rate": 4.5448234019167945e-05,
"loss": 4.2433,
"step": 660
},
{
"epoch": 0.4404097609727659,
"learning_rate": 4.543269981184781e-05,
"loss": 4.2641,
"step": 661
},
{
"epoch": 0.4410760389772633,
"learning_rate": 4.5417141806495524e-05,
"loss": 4.2505,
"step": 662
},
{
"epoch": 0.44174231698176064,
"learning_rate": 4.540156002123154e-05,
"loss": 4.3148,
"step": 663
},
{
"epoch": 0.442408594986258,
"learning_rate": 4.5385954474204026e-05,
"loss": 4.2294,
"step": 664
},
{
"epoch": 0.4430748729907554,
"learning_rate": 4.5370325183588804e-05,
"loss": 4.2602,
"step": 665
},
{
"epoch": 0.44374115099525274,
"learning_rate": 4.535467216758936e-05,
"loss": 4.2285,
"step": 666
},
{
"epoch": 0.44440742899975016,
"learning_rate": 4.533899544443682e-05,
"loss": 4.2565,
"step": 667
},
{
"epoch": 0.44507370700424753,
"learning_rate": 4.53232950323899e-05,
"loss": 4.2251,
"step": 668
},
{
"epoch": 0.4457399850087449,
"learning_rate": 4.530757094973494e-05,
"loss": 4.2846,
"step": 669
},
{
"epoch": 0.44640626301324227,
"learning_rate": 4.529182321478582e-05,
"loss": 4.2559,
"step": 670
},
{
"epoch": 0.44707254101773963,
"learning_rate": 4.527605184588398e-05,
"loss": 4.2151,
"step": 671
},
{
"epoch": 0.44773881902223706,
"learning_rate": 4.5260256861398386e-05,
"loss": 4.2438,
"step": 672
},
{
"epoch": 0.4484050970267344,
"learning_rate": 4.52444382797255e-05,
"loss": 4.2494,
"step": 673
},
{
"epoch": 0.4490713750312318,
"learning_rate": 4.522859611928929e-05,
"loss": 4.2726,
"step": 674
},
{
"epoch": 0.44973765303572916,
"learning_rate": 4.521273039854116e-05,
"loss": 4.2323,
"step": 675
},
{
"epoch": 0.4504039310402265,
"learning_rate": 4.5196841135959975e-05,
"loss": 4.2525,
"step": 676
},
{
"epoch": 0.4510702090447239,
"learning_rate": 4.5180928350052e-05,
"loss": 4.2437,
"step": 677
},
{
"epoch": 0.4517364870492213,
"learning_rate": 4.516499205935092e-05,
"loss": 4.281,
"step": 678
},
{
"epoch": 0.4524027650537187,
"learning_rate": 4.514903228241778e-05,
"loss": 4.2821,
"step": 679
},
{
"epoch": 0.45306904305821605,
"learning_rate": 4.513304903784099e-05,
"loss": 4.2997,
"step": 680
},
{
"epoch": 0.4537353210627134,
"learning_rate": 4.5117042344236274e-05,
"loss": 4.2286,
"step": 681
},
{
"epoch": 0.4544015990672108,
"learning_rate": 4.510101222024669e-05,
"loss": 4.2808,
"step": 682
},
{
"epoch": 0.45506787707170815,
"learning_rate": 4.5084958684542576e-05,
"loss": 4.2706,
"step": 683
},
{
"epoch": 0.45573415507620557,
"learning_rate": 4.506888175582153e-05,
"loss": 4.2944,
"step": 684
},
{
"epoch": 0.45640043308070294,
"learning_rate": 4.5052781452808416e-05,
"loss": 4.255,
"step": 685
},
{
"epoch": 0.4570667110852003,
"learning_rate": 4.50366577942553e-05,
"loss": 4.2678,
"step": 686
},
{
"epoch": 0.4577329890896977,
"learning_rate": 4.502051079894146e-05,
"loss": 4.2141,
"step": 687
},
{
"epoch": 0.45839926709419504,
"learning_rate": 4.500434048567336e-05,
"loss": 4.2604,
"step": 688
},
{
"epoch": 0.4590655450986924,
"learning_rate": 4.498814687328461e-05,
"loss": 4.2354,
"step": 689
},
{
"epoch": 0.45973182310318983,
"learning_rate": 4.4971929980635964e-05,
"loss": 4.2456,
"step": 690
},
{
"epoch": 0.4603981011076872,
"learning_rate": 4.4955689826615296e-05,
"loss": 4.2048,
"step": 691
},
{
"epoch": 0.46106437911218456,
"learning_rate": 4.493942643013756e-05,
"loss": 4.2111,
"step": 692
},
{
"epoch": 0.46173065711668193,
"learning_rate": 4.492313981014479e-05,
"loss": 4.2683,
"step": 693
},
{
"epoch": 0.4623969351211793,
"learning_rate": 4.490682998560606e-05,
"loss": 4.2924,
"step": 694
},
{
"epoch": 0.46306321312567666,
"learning_rate": 4.489049697551747e-05,
"loss": 4.2802,
"step": 695
},
{
"epoch": 0.4637294911301741,
"learning_rate": 4.4874140798902164e-05,
"loss": 4.229,
"step": 696
},
{
"epoch": 0.46439576913467145,
"learning_rate": 4.48577614748102e-05,
"loss": 4.2068,
"step": 697
},
{
"epoch": 0.4650620471391688,
"learning_rate": 4.4841359022318636e-05,
"loss": 4.2129,
"step": 698
},
{
"epoch": 0.4657283251436662,
"learning_rate": 4.482493346053147e-05,
"loss": 4.2323,
"step": 699
},
{
"epoch": 0.46639460314816356,
"learning_rate": 4.480848480857961e-05,
"loss": 4.2851,
"step": 700
},
{
"epoch": 0.4670608811526609,
"learning_rate": 4.4792013085620844e-05,
"loss": 4.2009,
"step": 701
},
{
"epoch": 0.46772715915715835,
"learning_rate": 4.477551831083985e-05,
"loss": 4.2571,
"step": 702
},
{
"epoch": 0.4683934371616557,
"learning_rate": 4.4759000503448137e-05,
"loss": 4.2715,
"step": 703
},
{
"epoch": 0.4690597151661531,
"learning_rate": 4.4742459682684054e-05,
"loss": 4.2592,
"step": 704
},
{
"epoch": 0.46972599317065045,
"learning_rate": 4.472589586781275e-05,
"loss": 4.2726,
"step": 705
},
{
"epoch": 0.4703922711751478,
"learning_rate": 4.470930907812616e-05,
"loss": 4.2587,
"step": 706
},
{
"epoch": 0.4710585491796452,
"learning_rate": 4.469269933294296e-05,
"loss": 4.2211,
"step": 707
},
{
"epoch": 0.4717248271841426,
"learning_rate": 4.4676066651608587e-05,
"loss": 4.2239,
"step": 708
},
{
"epoch": 0.47239110518863997,
"learning_rate": 4.465941105349516e-05,
"loss": 4.2517,
"step": 709
},
{
"epoch": 0.47305738319313734,
"learning_rate": 4.4642732558001534e-05,
"loss": 4.2599,
"step": 710
},
{
"epoch": 0.4737236611976347,
"learning_rate": 4.4626031184553186e-05,
"loss": 4.2144,
"step": 711
},
{
"epoch": 0.47438993920213207,
"learning_rate": 4.460930695260227e-05,
"loss": 4.2287,
"step": 712
},
{
"epoch": 0.47505621720662944,
"learning_rate": 4.459255988162755e-05,
"loss": 4.2708,
"step": 713
},
{
"epoch": 0.47572249521112686,
"learning_rate": 4.45757899911344e-05,
"loss": 4.2292,
"step": 714
},
{
"epoch": 0.47638877321562423,
"learning_rate": 4.455899730065476e-05,
"loss": 4.2043,
"step": 715
},
{
"epoch": 0.4770550512201216,
"learning_rate": 4.454218182974713e-05,
"loss": 4.237,
"step": 716
},
{
"epoch": 0.47772132922461896,
"learning_rate": 4.4525343597996556e-05,
"loss": 4.2446,
"step": 717
},
{
"epoch": 0.47838760722911633,
"learning_rate": 4.4508482625014566e-05,
"loss": 4.2293,
"step": 718
},
{
"epoch": 0.47905388523361375,
"learning_rate": 4.4491598930439205e-05,
"loss": 4.2516,
"step": 719
},
{
"epoch": 0.4797201632381111,
"learning_rate": 4.4474692533934944e-05,
"loss": 4.2391,
"step": 720
},
{
"epoch": 0.4803864412426085,
"learning_rate": 4.4457763455192746e-05,
"loss": 4.2249,
"step": 721
},
{
"epoch": 0.48105271924710585,
"learning_rate": 4.444081171392995e-05,
"loss": 4.2725,
"step": 722
},
{
"epoch": 0.4817189972516032,
"learning_rate": 4.4423837329890304e-05,
"loss": 4.2293,
"step": 723
},
{
"epoch": 0.4823852752561006,
"learning_rate": 4.440684032284394e-05,
"loss": 4.3053,
"step": 724
},
{
"epoch": 0.483051553260598,
"learning_rate": 4.4389820712587305e-05,
"loss": 4.2321,
"step": 725
},
{
"epoch": 0.4837178312650954,
"learning_rate": 4.4372778518943215e-05,
"loss": 4.2247,
"step": 726
},
{
"epoch": 0.48438410926959274,
"learning_rate": 4.435571376176076e-05,
"loss": 4.205,
"step": 727
},
{
"epoch": 0.4850503872740901,
"learning_rate": 4.4338626460915317e-05,
"loss": 4.2773,
"step": 728
},
{
"epoch": 0.4857166652785875,
"learning_rate": 4.432151663630853e-05,
"loss": 4.2562,
"step": 729
},
{
"epoch": 0.48638294328308485,
"learning_rate": 4.430438430786825e-05,
"loss": 4.2592,
"step": 730
},
{
"epoch": 0.48704922128758227,
"learning_rate": 4.428722949554857e-05,
"loss": 4.2315,
"step": 731
},
{
"epoch": 0.48771549929207964,
"learning_rate": 4.427005221932976e-05,
"loss": 4.2966,
"step": 732
},
{
"epoch": 0.488381777296577,
"learning_rate": 4.425285249921825e-05,
"loss": 4.247,
"step": 733
},
{
"epoch": 0.48904805530107437,
"learning_rate": 4.423563035524658e-05,
"loss": 4.2325,
"step": 734
},
{
"epoch": 0.48971433330557174,
"learning_rate": 4.4218385807473475e-05,
"loss": 4.2208,
"step": 735
},
{
"epoch": 0.4903806113100691,
"learning_rate": 4.42011188759837e-05,
"loss": 4.3079,
"step": 736
},
{
"epoch": 0.4910468893145665,
"learning_rate": 4.418382958088811e-05,
"loss": 4.2137,
"step": 737
},
{
"epoch": 0.4917131673190639,
"learning_rate": 4.4166517942323596e-05,
"loss": 4.2672,
"step": 738
},
{
"epoch": 0.49237944532356126,
"learning_rate": 4.414918398045309e-05,
"loss": 4.2671,
"step": 739
},
{
"epoch": 0.4930457233280586,
"learning_rate": 4.41318277154655e-05,
"loss": 4.2664,
"step": 740
},
{
"epoch": 0.493712001332556,
"learning_rate": 4.4114449167575744e-05,
"loss": 4.2004,
"step": 741
},
{
"epoch": 0.49437827933705336,
"learning_rate": 4.4097048357024665e-05,
"loss": 4.2822,
"step": 742
},
{
"epoch": 0.4950445573415508,
"learning_rate": 4.407962530407903e-05,
"loss": 4.2319,
"step": 743
},
{
"epoch": 0.49571083534604815,
"learning_rate": 4.4062180029031544e-05,
"loss": 4.2761,
"step": 744
},
{
"epoch": 0.4963771133505455,
"learning_rate": 4.404471255220076e-05,
"loss": 4.2015,
"step": 745
},
{
"epoch": 0.4970433913550429,
"learning_rate": 4.402722289393113e-05,
"loss": 4.2396,
"step": 746
},
{
"epoch": 0.49770966935954025,
"learning_rate": 4.400971107459288e-05,
"loss": 4.2327,
"step": 747
},
{
"epoch": 0.4983759473640376,
"learning_rate": 4.3992177114582124e-05,
"loss": 4.2155,
"step": 748
},
{
"epoch": 0.49904222536853504,
"learning_rate": 4.397462103432069e-05,
"loss": 4.2446,
"step": 749
},
{
"epoch": 0.4997085033730324,
"learning_rate": 4.395704285425623e-05,
"loss": 4.2276,
"step": 750
},
{
"epoch": 0.5003747813775298,
"learning_rate": 4.393944259486208e-05,
"loss": 4.2468,
"step": 751
},
{
"epoch": 0.5010410593820271,
"learning_rate": 4.392182027663733e-05,
"loss": 4.2422,
"step": 752
},
{
"epoch": 0.5017073373865245,
"learning_rate": 4.390417592010675e-05,
"loss": 4.251,
"step": 753
},
{
"epoch": 0.5023736153910219,
"learning_rate": 4.3886509545820786e-05,
"loss": 4.24,
"step": 754
},
{
"epoch": 0.5030398933955192,
"learning_rate": 4.3868821174355525e-05,
"loss": 4.2764,
"step": 755
},
{
"epoch": 0.5037061714000166,
"learning_rate": 4.3851110826312656e-05,
"loss": 4.2686,
"step": 756
},
{
"epoch": 0.504372449404514,
"learning_rate": 4.383337852231949e-05,
"loss": 4.244,
"step": 757
},
{
"epoch": 0.5050387274090115,
"learning_rate": 4.38156242830289e-05,
"loss": 4.1726,
"step": 758
},
{
"epoch": 0.5057050054135088,
"learning_rate": 4.37978481291193e-05,
"loss": 4.2363,
"step": 759
},
{
"epoch": 0.5063712834180062,
"learning_rate": 4.378005008129463e-05,
"loss": 4.251,
"step": 760
},
{
"epoch": 0.5070375614225036,
"learning_rate": 4.376223016028435e-05,
"loss": 4.231,
"step": 761
},
{
"epoch": 0.5077038394270009,
"learning_rate": 4.374438838684337e-05,
"loss": 4.2909,
"step": 762
},
{
"epoch": 0.5083701174314983,
"learning_rate": 4.3726524781752065e-05,
"loss": 4.2289,
"step": 763
},
{
"epoch": 0.5090363954359957,
"learning_rate": 4.370863936581624e-05,
"loss": 4.2608,
"step": 764
},
{
"epoch": 0.509702673440493,
"learning_rate": 4.369073215986708e-05,
"loss": 4.1911,
"step": 765
},
{
"epoch": 0.5103689514449904,
"learning_rate": 4.367280318476118e-05,
"loss": 4.2843,
"step": 766
},
{
"epoch": 0.5110352294494878,
"learning_rate": 4.365485246138048e-05,
"loss": 4.2515,
"step": 767
},
{
"epoch": 0.5117015074539851,
"learning_rate": 4.363688001063222e-05,
"loss": 4.2588,
"step": 768
},
{
"epoch": 0.5123677854584826,
"learning_rate": 4.361888585344901e-05,
"loss": 4.2155,
"step": 769
},
{
"epoch": 0.51303406346298,
"learning_rate": 4.3600870010788675e-05,
"loss": 4.2719,
"step": 770
},
{
"epoch": 0.5137003414674773,
"learning_rate": 4.358283250363434e-05,
"loss": 4.2391,
"step": 771
},
{
"epoch": 0.5143666194719747,
"learning_rate": 4.356477335299434e-05,
"loss": 4.247,
"step": 772
},
{
"epoch": 0.5150328974764721,
"learning_rate": 4.354669257990223e-05,
"loss": 4.1949,
"step": 773
},
{
"epoch": 0.5156991754809694,
"learning_rate": 4.352859020541674e-05,
"loss": 4.2437,
"step": 774
},
{
"epoch": 0.5163654534854668,
"learning_rate": 4.351046625062177e-05,
"loss": 4.2356,
"step": 775
},
{
"epoch": 0.5170317314899642,
"learning_rate": 4.3492320736626355e-05,
"loss": 4.1832,
"step": 776
},
{
"epoch": 0.5176980094944615,
"learning_rate": 4.347415368456463e-05,
"loss": 4.1989,
"step": 777
},
{
"epoch": 0.5183642874989589,
"learning_rate": 4.345596511559582e-05,
"loss": 4.2583,
"step": 778
},
{
"epoch": 0.5190305655034563,
"learning_rate": 4.34377550509042e-05,
"loss": 4.2389,
"step": 779
},
{
"epoch": 0.5196968435079536,
"learning_rate": 4.341952351169911e-05,
"loss": 4.2399,
"step": 780
},
{
"epoch": 0.5203631215124511,
"learning_rate": 4.340127051921488e-05,
"loss": 4.2071,
"step": 781
},
{
"epoch": 0.5210293995169485,
"learning_rate": 4.338299609471081e-05,
"loss": 4.2212,
"step": 782
},
{
"epoch": 0.5216956775214459,
"learning_rate": 4.3364700259471205e-05,
"loss": 4.2261,
"step": 783
},
{
"epoch": 0.5223619555259432,
"learning_rate": 4.3346383034805264e-05,
"loss": 4.245,
"step": 784
},
{
"epoch": 0.5230282335304406,
"learning_rate": 4.332804444204714e-05,
"loss": 4.2142,
"step": 785
},
{
"epoch": 0.523694511534938,
"learning_rate": 4.3309684502555834e-05,
"loss": 4.2483,
"step": 786
},
{
"epoch": 0.5243607895394353,
"learning_rate": 4.329130323771524e-05,
"loss": 4.2738,
"step": 787
},
{
"epoch": 0.5250270675439327,
"learning_rate": 4.327290066893407e-05,
"loss": 4.2719,
"step": 788
},
{
"epoch": 0.5256933455484301,
"learning_rate": 4.325447681764586e-05,
"loss": 4.2152,
"step": 789
},
{
"epoch": 0.5263596235529274,
"learning_rate": 4.323603170530892e-05,
"loss": 4.2065,
"step": 790
},
{
"epoch": 0.5270259015574248,
"learning_rate": 4.3217565353406346e-05,
"loss": 4.258,
"step": 791
},
{
"epoch": 0.5276921795619222,
"learning_rate": 4.319907778344595e-05,
"loss": 4.1857,
"step": 792
},
{
"epoch": 0.5283584575664196,
"learning_rate": 4.318056901696027e-05,
"loss": 4.2331,
"step": 793
},
{
"epoch": 0.529024735570917,
"learning_rate": 4.316203907550652e-05,
"loss": 4.2316,
"step": 794
},
{
"epoch": 0.5296910135754144,
"learning_rate": 4.31434879806666e-05,
"loss": 4.221,
"step": 795
},
{
"epoch": 0.5303572915799117,
"learning_rate": 4.3124915754047004e-05,
"loss": 4.231,
"step": 796
},
{
"epoch": 0.5310235695844091,
"learning_rate": 4.310632241727888e-05,
"loss": 4.2364,
"step": 797
},
{
"epoch": 0.5316898475889065,
"learning_rate": 4.3087707992017954e-05,
"loss": 4.2342,
"step": 798
},
{
"epoch": 0.5323561255934038,
"learning_rate": 4.3069072499944494e-05,
"loss": 4.2463,
"step": 799
},
{
"epoch": 0.5330224035979012,
"learning_rate": 4.305041596276333e-05,
"loss": 4.2112,
"step": 800
},
{
"epoch": 0.5336886816023986,
"learning_rate": 4.3031738402203784e-05,
"loss": 4.2026,
"step": 801
},
{
"epoch": 0.5343549596068959,
"learning_rate": 4.301303984001967e-05,
"loss": 4.2276,
"step": 802
},
{
"epoch": 0.5350212376113933,
"learning_rate": 4.2994320297989285e-05,
"loss": 4.2252,
"step": 803
},
{
"epoch": 0.5356875156158908,
"learning_rate": 4.2975579797915314e-05,
"loss": 4.2762,
"step": 804
},
{
"epoch": 0.5363537936203882,
"learning_rate": 4.295681836162489e-05,
"loss": 4.1782,
"step": 805
},
{
"epoch": 0.5370200716248855,
"learning_rate": 4.293803601096952e-05,
"loss": 4.2288,
"step": 806
},
{
"epoch": 0.5376863496293829,
"learning_rate": 4.291923276782507e-05,
"loss": 4.2366,
"step": 807
},
{
"epoch": 0.5383526276338803,
"learning_rate": 4.2900408654091726e-05,
"loss": 4.2069,
"step": 808
},
{
"epoch": 0.5390189056383776,
"learning_rate": 4.2881563691694015e-05,
"loss": 4.2132,
"step": 809
},
{
"epoch": 0.539685183642875,
"learning_rate": 4.2862697902580715e-05,
"loss": 4.2309,
"step": 810
},
{
"epoch": 0.5403514616473724,
"learning_rate": 4.284381130872487e-05,
"loss": 4.2408,
"step": 811
},
{
"epoch": 0.5410177396518697,
"learning_rate": 4.282490393212376e-05,
"loss": 4.226,
"step": 812
},
{
"epoch": 0.5416840176563671,
"learning_rate": 4.2805975794798866e-05,
"loss": 4.1678,
"step": 813
},
{
"epoch": 0.5423502956608645,
"learning_rate": 4.278702691879587e-05,
"loss": 4.2434,
"step": 814
},
{
"epoch": 0.5430165736653618,
"learning_rate": 4.2768057326184565e-05,
"loss": 4.2417,
"step": 815
},
{
"epoch": 0.5436828516698593,
"learning_rate": 4.27490670390589e-05,
"loss": 4.2283,
"step": 816
},
{
"epoch": 0.5443491296743567,
"learning_rate": 4.273005607953694e-05,
"loss": 4.2347,
"step": 817
},
{
"epoch": 0.545015407678854,
"learning_rate": 4.27110244697608e-05,
"loss": 4.2459,
"step": 818
},
{
"epoch": 0.5456816856833514,
"learning_rate": 4.2691972231896656e-05,
"loss": 4.2998,
"step": 819
},
{
"epoch": 0.5463479636878488,
"learning_rate": 4.2672899388134715e-05,
"loss": 4.1929,
"step": 820
},
{
"epoch": 0.5470142416923461,
"learning_rate": 4.265380596068919e-05,
"loss": 4.2207,
"step": 821
},
{
"epoch": 0.5476805196968435,
"learning_rate": 4.2634691971798246e-05,
"loss": 4.2642,
"step": 822
},
{
"epoch": 0.5483467977013409,
"learning_rate": 4.2615557443724005e-05,
"loss": 4.2372,
"step": 823
},
{
"epoch": 0.5490130757058382,
"learning_rate": 4.2596402398752533e-05,
"loss": 4.1944,
"step": 824
},
{
"epoch": 0.5496793537103356,
"learning_rate": 4.257722685919375e-05,
"loss": 4.2493,
"step": 825
},
{
"epoch": 0.550345631714833,
"learning_rate": 4.2558030847381495e-05,
"loss": 4.2457,
"step": 826
},
{
"epoch": 0.5510119097193303,
"learning_rate": 4.2538814385673405e-05,
"loss": 4.2236,
"step": 827
},
{
"epoch": 0.5516781877238278,
"learning_rate": 4.251957749645096e-05,
"loss": 4.193,
"step": 828
},
{
"epoch": 0.5523444657283252,
"learning_rate": 4.2500320202119434e-05,
"loss": 4.2588,
"step": 829
},
{
"epoch": 0.5530107437328226,
"learning_rate": 4.2481042525107854e-05,
"loss": 4.2357,
"step": 830
},
{
"epoch": 0.5536770217373199,
"learning_rate": 4.2461744487868996e-05,
"loss": 4.2646,
"step": 831
},
{
"epoch": 0.5543432997418173,
"learning_rate": 4.2442426112879354e-05,
"loss": 4.1719,
"step": 832
},
{
"epoch": 0.5550095777463147,
"learning_rate": 4.2423087422639085e-05,
"loss": 4.1706,
"step": 833
},
{
"epoch": 0.555675855750812,
"learning_rate": 4.240372843967203e-05,
"loss": 4.1958,
"step": 834
},
{
"epoch": 0.5563421337553094,
"learning_rate": 4.238434918652568e-05,
"loss": 4.2535,
"step": 835
},
{
"epoch": 0.5570084117598068,
"learning_rate": 4.2364949685771094e-05,
"loss": 4.2599,
"step": 836
},
{
"epoch": 0.5576746897643041,
"learning_rate": 4.234552996000294e-05,
"loss": 4.2108,
"step": 837
},
{
"epoch": 0.5583409677688015,
"learning_rate": 4.232609003183943e-05,
"loss": 4.2315,
"step": 838
},
{
"epoch": 0.5590072457732989,
"learning_rate": 4.230662992392232e-05,
"loss": 4.2531,
"step": 839
},
{
"epoch": 0.5596735237777963,
"learning_rate": 4.228714965891686e-05,
"loss": 4.2042,
"step": 840
},
{
"epoch": 0.5603398017822937,
"learning_rate": 4.226764925951177e-05,
"loss": 4.2279,
"step": 841
},
{
"epoch": 0.5610060797867911,
"learning_rate": 4.224812874841923e-05,
"loss": 4.248,
"step": 842
},
{
"epoch": 0.5616723577912884,
"learning_rate": 4.222858814837487e-05,
"loss": 4.2185,
"step": 843
},
{
"epoch": 0.5623386357957858,
"learning_rate": 4.220902748213765e-05,
"loss": 4.2127,
"step": 844
},
{
"epoch": 0.5630049138002832,
"learning_rate": 4.218944677248996e-05,
"loss": 4.2062,
"step": 845
},
{
"epoch": 0.5636711918047805,
"learning_rate": 4.2169846042237525e-05,
"loss": 4.2546,
"step": 846
},
{
"epoch": 0.5643374698092779,
"learning_rate": 4.215022531420937e-05,
"loss": 4.2368,
"step": 847
},
{
"epoch": 0.5650037478137753,
"learning_rate": 4.213058461125781e-05,
"loss": 4.1808,
"step": 848
},
{
"epoch": 0.5656700258182726,
"learning_rate": 4.211092395625846e-05,
"loss": 4.2232,
"step": 849
},
{
"epoch": 0.56633630382277,
"learning_rate": 4.209124337211013e-05,
"loss": 4.2647,
"step": 850
},
{
"epoch": 0.5670025818272675,
"learning_rate": 4.207154288173488e-05,
"loss": 4.1633,
"step": 851
},
{
"epoch": 0.5676688598317648,
"learning_rate": 4.205182250807791e-05,
"loss": 4.2835,
"step": 852
},
{
"epoch": 0.5683351378362622,
"learning_rate": 4.203208227410762e-05,
"loss": 4.2192,
"step": 853
},
{
"epoch": 0.5690014158407596,
"learning_rate": 4.2012322202815525e-05,
"loss": 4.2358,
"step": 854
},
{
"epoch": 0.569667693845257,
"learning_rate": 4.199254231721624e-05,
"loss": 4.2915,
"step": 855
},
{
"epoch": 0.5703339718497543,
"learning_rate": 4.197274264034746e-05,
"loss": 4.2737,
"step": 856
},
{
"epoch": 0.5710002498542517,
"learning_rate": 4.195292319526995e-05,
"loss": 4.2371,
"step": 857
},
{
"epoch": 0.571666527858749,
"learning_rate": 4.193308400506745e-05,
"loss": 4.2163,
"step": 858
},
{
"epoch": 0.5723328058632464,
"learning_rate": 4.191322509284675e-05,
"loss": 4.2461,
"step": 859
},
{
"epoch": 0.5729990838677438,
"learning_rate": 4.189334648173761e-05,
"loss": 4.1969,
"step": 860
},
{
"epoch": 0.5736653618722412,
"learning_rate": 4.1873448194892675e-05,
"loss": 4.2311,
"step": 861
},
{
"epoch": 0.5743316398767385,
"learning_rate": 4.1853530255487557e-05,
"loss": 4.2437,
"step": 862
},
{
"epoch": 0.574997917881236,
"learning_rate": 4.183359268672076e-05,
"loss": 4.2013,
"step": 863
},
{
"epoch": 0.5756641958857334,
"learning_rate": 4.181363551181361e-05,
"loss": 4.1796,
"step": 864
},
{
"epoch": 0.5763304738902307,
"learning_rate": 4.179365875401032e-05,
"loss": 4.2242,
"step": 865
},
{
"epoch": 0.5769967518947281,
"learning_rate": 4.1773662436577876e-05,
"loss": 4.308,
"step": 866
},
{
"epoch": 0.5776630298992255,
"learning_rate": 4.1753646582806046e-05,
"loss": 4.206,
"step": 867
},
{
"epoch": 0.5783293079037228,
"learning_rate": 4.173361121600737e-05,
"loss": 4.1659,
"step": 868
},
{
"epoch": 0.5789955859082202,
"learning_rate": 4.171355635951709e-05,
"loss": 4.282,
"step": 869
},
{
"epoch": 0.5796618639127176,
"learning_rate": 4.169348203669319e-05,
"loss": 4.2339,
"step": 870
},
{
"epoch": 0.5803281419172149,
"learning_rate": 4.167338827091627e-05,
"loss": 4.2394,
"step": 871
},
{
"epoch": 0.5809944199217123,
"learning_rate": 4.1653275085589625e-05,
"loss": 4.2392,
"step": 872
},
{
"epoch": 0.5816606979262097,
"learning_rate": 4.163314250413913e-05,
"loss": 4.2317,
"step": 873
},
{
"epoch": 0.582326975930707,
"learning_rate": 4.16129905500133e-05,
"loss": 4.2525,
"step": 874
},
{
"epoch": 0.5829932539352045,
"learning_rate": 4.159281924668314e-05,
"loss": 4.2303,
"step": 875
},
{
"epoch": 0.5836595319397019,
"learning_rate": 4.1572628617642255e-05,
"loss": 4.2183,
"step": 876
},
{
"epoch": 0.5843258099441992,
"learning_rate": 4.155241868640674e-05,
"loss": 4.2334,
"step": 877
},
{
"epoch": 0.5849920879486966,
"learning_rate": 4.153218947651516e-05,
"loss": 4.2457,
"step": 878
},
{
"epoch": 0.585658365953194,
"learning_rate": 4.151194101152855e-05,
"loss": 4.2006,
"step": 879
},
{
"epoch": 0.5863246439576913,
"learning_rate": 4.149167331503036e-05,
"loss": 4.2277,
"step": 880
},
{
"epoch": 0.5869909219621887,
"learning_rate": 4.147138641062642e-05,
"loss": 4.2306,
"step": 881
},
{
"epoch": 0.5876571999666861,
"learning_rate": 4.1451080321945e-05,
"loss": 4.2417,
"step": 882
},
{
"epoch": 0.5883234779711835,
"learning_rate": 4.1430755072636626e-05,
"loss": 4.1818,
"step": 883
},
{
"epoch": 0.5889897559756808,
"learning_rate": 4.1410410686374195e-05,
"loss": 4.1559,
"step": 884
},
{
"epoch": 0.5896560339801782,
"learning_rate": 4.139004718685289e-05,
"loss": 4.1916,
"step": 885
},
{
"epoch": 0.5903223119846756,
"learning_rate": 4.1369664597790134e-05,
"loss": 4.2387,
"step": 886
},
{
"epoch": 0.590988589989173,
"learning_rate": 4.13492629429256e-05,
"loss": 4.2322,
"step": 887
},
{
"epoch": 0.5916548679936704,
"learning_rate": 4.132884224602116e-05,
"loss": 4.2554,
"step": 888
},
{
"epoch": 0.5923211459981678,
"learning_rate": 4.130840253086087e-05,
"loss": 4.2454,
"step": 889
},
{
"epoch": 0.5929874240026651,
"learning_rate": 4.128794382125093e-05,
"loss": 4.1987,
"step": 890
},
{
"epoch": 0.5936537020071625,
"learning_rate": 4.126746614101966e-05,
"loss": 4.2116,
"step": 891
},
{
"epoch": 0.5943199800116599,
"learning_rate": 4.1246969514017494e-05,
"loss": 4.2571,
"step": 892
},
{
"epoch": 0.5949862580161572,
"learning_rate": 4.122645396411691e-05,
"loss": 4.2463,
"step": 893
},
{
"epoch": 0.5956525360206546,
"learning_rate": 4.120591951521244e-05,
"loss": 4.2308,
"step": 894
},
{
"epoch": 0.596318814025152,
"learning_rate": 4.118536619122062e-05,
"loss": 4.1972,
"step": 895
},
{
"epoch": 0.5969850920296493,
"learning_rate": 4.1164794016079985e-05,
"loss": 4.2093,
"step": 896
},
{
"epoch": 0.5976513700341467,
"learning_rate": 4.114420301375099e-05,
"loss": 4.2425,
"step": 897
},
{
"epoch": 0.5983176480386442,
"learning_rate": 4.112359320821607e-05,
"loss": 4.1783,
"step": 898
},
{
"epoch": 0.5989839260431415,
"learning_rate": 4.1102964623479523e-05,
"loss": 4.2436,
"step": 899
},
{
"epoch": 0.5996502040476389,
"learning_rate": 4.1082317283567526e-05,
"loss": 4.1911,
"step": 900
},
{
"epoch": 0.6003164820521363,
"learning_rate": 4.106165121252811e-05,
"loss": 4.2198,
"step": 901
},
{
"epoch": 0.6009827600566336,
"learning_rate": 4.10409664344311e-05,
"loss": 4.21,
"step": 902
},
{
"epoch": 0.601649038061131,
"learning_rate": 4.102026297336814e-05,
"loss": 4.2593,
"step": 903
},
{
"epoch": 0.6023153160656284,
"learning_rate": 4.0999540853452605e-05,
"loss": 4.2448,
"step": 904
},
{
"epoch": 0.6029815940701257,
"learning_rate": 4.0978800098819636e-05,
"loss": 4.2628,
"step": 905
},
{
"epoch": 0.6036478720746231,
"learning_rate": 4.0958040733626036e-05,
"loss": 4.2293,
"step": 906
},
{
"epoch": 0.6043141500791205,
"learning_rate": 4.093726278205031e-05,
"loss": 4.2572,
"step": 907
},
{
"epoch": 0.6049804280836178,
"learning_rate": 4.091646626829263e-05,
"loss": 4.1672,
"step": 908
},
{
"epoch": 0.6056467060881152,
"learning_rate": 4.0895651216574725e-05,
"loss": 4.2201,
"step": 909
},
{
"epoch": 0.6063129840926127,
"learning_rate": 4.087481765113999e-05,
"loss": 4.1759,
"step": 910
},
{
"epoch": 0.6069792620971101,
"learning_rate": 4.0853965596253315e-05,
"loss": 4.2364,
"step": 911
},
{
"epoch": 0.6076455401016074,
"learning_rate": 4.083309507620118e-05,
"loss": 4.1883,
"step": 912
},
{
"epoch": 0.6083118181061048,
"learning_rate": 4.081220611529153e-05,
"loss": 4.2004,
"step": 913
},
{
"epoch": 0.6089780961106022,
"learning_rate": 4.079129873785382e-05,
"loss": 4.2148,
"step": 914
},
{
"epoch": 0.6096443741150995,
"learning_rate": 4.077037296823893e-05,
"loss": 4.2,
"step": 915
},
{
"epoch": 0.6103106521195969,
"learning_rate": 4.0749428830819195e-05,
"loss": 4.1947,
"step": 916
},
{
"epoch": 0.6109769301240943,
"learning_rate": 4.072846634998829e-05,
"loss": 4.226,
"step": 917
},
{
"epoch": 0.6116432081285916,
"learning_rate": 4.07074855501613e-05,
"loss": 4.2578,
"step": 918
},
{
"epoch": 0.612309486133089,
"learning_rate": 4.068648645577462e-05,
"loss": 4.2288,
"step": 919
},
{
"epoch": 0.6129757641375864,
"learning_rate": 4.066546909128598e-05,
"loss": 4.2197,
"step": 920
},
{
"epoch": 0.6136420421420837,
"learning_rate": 4.064443348117436e-05,
"loss": 4.23,
"step": 921
},
{
"epoch": 0.6143083201465812,
"learning_rate": 4.0623379649940026e-05,
"loss": 4.2508,
"step": 922
},
{
"epoch": 0.6149745981510786,
"learning_rate": 4.060230762210441e-05,
"loss": 4.2537,
"step": 923
},
{
"epoch": 0.615640876155576,
"learning_rate": 4.0581217422210197e-05,
"loss": 4.1968,
"step": 924
},
{
"epoch": 0.6163071541600733,
"learning_rate": 4.0560109074821195e-05,
"loss": 4.1874,
"step": 925
},
{
"epoch": 0.6169734321645707,
"learning_rate": 4.0538982604522376e-05,
"loss": 4.1946,
"step": 926
},
{
"epoch": 0.617639710169068,
"learning_rate": 4.051783803591982e-05,
"loss": 4.2286,
"step": 927
},
{
"epoch": 0.6183059881735654,
"learning_rate": 4.0496675393640645e-05,
"loss": 4.1978,
"step": 928
},
{
"epoch": 0.6189722661780628,
"learning_rate": 4.0475494702333075e-05,
"loss": 4.1638,
"step": 929
},
{
"epoch": 0.6196385441825601,
"learning_rate": 4.045429598666632e-05,
"loss": 4.2151,
"step": 930
},
{
"epoch": 0.6203048221870575,
"learning_rate": 4.0433079271330586e-05,
"loss": 4.1608,
"step": 931
},
{
"epoch": 0.6209711001915549,
"learning_rate": 4.0411844581037075e-05,
"loss": 4.219,
"step": 932
},
{
"epoch": 0.6216373781960522,
"learning_rate": 4.0390591940517874e-05,
"loss": 4.1692,
"step": 933
},
{
"epoch": 0.6223036562005497,
"learning_rate": 4.0369321374526016e-05,
"loss": 4.1979,
"step": 934
},
{
"epoch": 0.6229699342050471,
"learning_rate": 4.0348032907835386e-05,
"loss": 4.2169,
"step": 935
},
{
"epoch": 0.6236362122095445,
"learning_rate": 4.032672656524075e-05,
"loss": 4.2121,
"step": 936
},
{
"epoch": 0.6243024902140418,
"learning_rate": 4.0305402371557664e-05,
"loss": 4.2254,
"step": 937
},
{
"epoch": 0.6249687682185392,
"learning_rate": 4.0284060351622485e-05,
"loss": 4.2227,
"step": 938
},
{
"epoch": 0.6256350462230366,
"learning_rate": 4.026270053029233e-05,
"loss": 4.2157,
"step": 939
},
{
"epoch": 0.6263013242275339,
"learning_rate": 4.024132293244507e-05,
"loss": 4.2488,
"step": 940
},
{
"epoch": 0.6269676022320313,
"learning_rate": 4.021992758297925e-05,
"loss": 4.2065,
"step": 941
},
{
"epoch": 0.6276338802365287,
"learning_rate": 4.0198514506814097e-05,
"loss": 4.2889,
"step": 942
},
{
"epoch": 0.628300158241026,
"learning_rate": 4.0177083728889495e-05,
"loss": 4.2318,
"step": 943
},
{
"epoch": 0.6289664362455234,
"learning_rate": 4.015563527416595e-05,
"loss": 4.1941,
"step": 944
},
{
"epoch": 0.6296327142500209,
"learning_rate": 4.013416916762455e-05,
"loss": 4.2394,
"step": 945
},
{
"epoch": 0.6302989922545182,
"learning_rate": 4.011268543426692e-05,
"loss": 4.1886,
"step": 946
},
{
"epoch": 0.6309652702590156,
"learning_rate": 4.0091184099115245e-05,
"loss": 4.2066,
"step": 947
},
{
"epoch": 0.631631548263513,
"learning_rate": 4.006966518721219e-05,
"loss": 4.1906,
"step": 948
},
{
"epoch": 0.6322978262680103,
"learning_rate": 4.004812872362093e-05,
"loss": 4.2071,
"step": 949
},
{
"epoch": 0.6329641042725077,
"learning_rate": 4.002657473342503e-05,
"loss": 4.1946,
"step": 950
},
{
"epoch": 0.6336303822770051,
"learning_rate": 4.000500324172849e-05,
"loss": 4.2157,
"step": 951
},
{
"epoch": 0.6342966602815024,
"learning_rate": 3.998341427365572e-05,
"loss": 4.2002,
"step": 952
},
{
"epoch": 0.6349629382859998,
"learning_rate": 3.996180785435144e-05,
"loss": 4.2172,
"step": 953
},
{
"epoch": 0.6356292162904972,
"learning_rate": 3.994018400898072e-05,
"loss": 4.2399,
"step": 954
},
{
"epoch": 0.6362954942949945,
"learning_rate": 3.991854276272894e-05,
"loss": 4.2244,
"step": 955
},
{
"epoch": 0.6369617722994919,
"learning_rate": 3.989688414080171e-05,
"loss": 4.2035,
"step": 956
},
{
"epoch": 0.6376280503039894,
"learning_rate": 3.987520816842491e-05,
"loss": 4.2239,
"step": 957
},
{
"epoch": 0.6382943283084868,
"learning_rate": 3.98535148708446e-05,
"loss": 4.2266,
"step": 958
},
{
"epoch": 0.6389606063129841,
"learning_rate": 3.9831804273327054e-05,
"loss": 4.1804,
"step": 959
},
{
"epoch": 0.6396268843174815,
"learning_rate": 3.981007640115867e-05,
"loss": 4.2055,
"step": 960
},
{
"epoch": 0.6402931623219789,
"learning_rate": 3.978833127964596e-05,
"loss": 4.2253,
"step": 961
},
{
"epoch": 0.6409594403264762,
"learning_rate": 3.9766568934115556e-05,
"loss": 4.2573,
"step": 962
},
{
"epoch": 0.6416257183309736,
"learning_rate": 3.9744789389914124e-05,
"loss": 4.1668,
"step": 963
},
{
"epoch": 0.642291996335471,
"learning_rate": 3.9722992672408374e-05,
"loss": 4.2275,
"step": 964
},
{
"epoch": 0.6429582743399683,
"learning_rate": 3.9701178806985004e-05,
"loss": 4.2245,
"step": 965
},
{
"epoch": 0.6436245523444657,
"learning_rate": 3.96793478190507e-05,
"loss": 4.2108,
"step": 966
},
{
"epoch": 0.6442908303489631,
"learning_rate": 3.9657499734032086e-05,
"loss": 4.1881,
"step": 967
},
{
"epoch": 0.6449571083534604,
"learning_rate": 3.963563457737569e-05,
"loss": 4.2355,
"step": 968
},
{
"epoch": 0.6456233863579579,
"learning_rate": 3.961375237454795e-05,
"loss": 4.2139,
"step": 969
},
{
"epoch": 0.6462896643624553,
"learning_rate": 3.9591853151035123e-05,
"loss": 4.2079,
"step": 970
},
{
"epoch": 0.6469559423669526,
"learning_rate": 3.956993693234331e-05,
"loss": 4.2299,
"step": 971
},
{
"epoch": 0.64762222037145,
"learning_rate": 3.9548003743998406e-05,
"loss": 4.1861,
"step": 972
},
{
"epoch": 0.6482884983759474,
"learning_rate": 3.9526053611546064e-05,
"loss": 4.1676,
"step": 973
},
{
"epoch": 0.6489547763804447,
"learning_rate": 3.950408656055168e-05,
"loss": 4.2492,
"step": 974
},
{
"epoch": 0.6496210543849421,
"learning_rate": 3.9482102616600336e-05,
"loss": 4.2625,
"step": 975
},
{
"epoch": 0.6502873323894395,
"learning_rate": 3.946010180529681e-05,
"loss": 4.2408,
"step": 976
},
{
"epoch": 0.6509536103939368,
"learning_rate": 3.943808415226553e-05,
"loss": 4.2515,
"step": 977
},
{
"epoch": 0.6516198883984342,
"learning_rate": 3.941604968315052e-05,
"loss": 4.2261,
"step": 978
},
{
"epoch": 0.6522861664029316,
"learning_rate": 3.93939984236154e-05,
"loss": 4.1979,
"step": 979
},
{
"epoch": 0.652952444407429,
"learning_rate": 3.937193039934333e-05,
"loss": 4.2479,
"step": 980
},
{
"epoch": 0.6536187224119264,
"learning_rate": 3.934984563603703e-05,
"loss": 4.1933,
"step": 981
},
{
"epoch": 0.6542850004164238,
"learning_rate": 3.9327744159418675e-05,
"loss": 4.1862,
"step": 982
},
{
"epoch": 0.6549512784209212,
"learning_rate": 3.9305625995229954e-05,
"loss": 4.1988,
"step": 983
},
{
"epoch": 0.6556175564254185,
"learning_rate": 3.9283491169231944e-05,
"loss": 4.2475,
"step": 984
},
{
"epoch": 0.6562838344299159,
"learning_rate": 3.926133970720516e-05,
"loss": 4.1495,
"step": 985
},
{
"epoch": 0.6569501124344133,
"learning_rate": 3.923917163494947e-05,
"loss": 4.2448,
"step": 986
},
{
"epoch": 0.6576163904389106,
"learning_rate": 3.921698697828411e-05,
"loss": 4.2459,
"step": 987
},
{
"epoch": 0.658282668443408,
"learning_rate": 3.919478576304762e-05,
"loss": 4.2188,
"step": 988
},
{
"epoch": 0.6589489464479054,
"learning_rate": 3.917256801509783e-05,
"loss": 4.1891,
"step": 989
},
{
"epoch": 0.6596152244524027,
"learning_rate": 3.915033376031182e-05,
"loss": 4.2108,
"step": 990
},
{
"epoch": 0.6602815024569001,
"learning_rate": 3.91280830245859e-05,
"loss": 4.122,
"step": 991
},
{
"epoch": 0.6609477804613976,
"learning_rate": 3.910581583383557e-05,
"loss": 4.2183,
"step": 992
},
{
"epoch": 0.6616140584658949,
"learning_rate": 3.908353221399551e-05,
"loss": 4.174,
"step": 993
},
{
"epoch": 0.6622803364703923,
"learning_rate": 3.906123219101952e-05,
"loss": 4.1827,
"step": 994
},
{
"epoch": 0.6629466144748897,
"learning_rate": 3.90389157908805e-05,
"loss": 4.1739,
"step": 995
},
{
"epoch": 0.663612892479387,
"learning_rate": 3.9016583039570454e-05,
"loss": 4.2417,
"step": 996
},
{
"epoch": 0.6642791704838844,
"learning_rate": 3.899423396310039e-05,
"loss": 4.2193,
"step": 997
},
{
"epoch": 0.6649454484883818,
"learning_rate": 3.897186858750036e-05,
"loss": 4.2034,
"step": 998
},
{
"epoch": 0.6656117264928791,
"learning_rate": 3.8949486938819394e-05,
"loss": 4.2127,
"step": 999
},
{
"epoch": 0.6662780044973765,
"learning_rate": 3.892708904312546e-05,
"loss": 4.154,
"step": 1000
},
{
"epoch": 0.6669442825018739,
"learning_rate": 3.890467492650548e-05,
"loss": 4.2419,
"step": 1001
},
{
"epoch": 0.6676105605063712,
"learning_rate": 3.888224461506522e-05,
"loss": 4.2043,
"step": 1002
},
{
"epoch": 0.6682768385108686,
"learning_rate": 3.8859798134929365e-05,
"loss": 4.1697,
"step": 1003
},
{
"epoch": 0.6689431165153661,
"learning_rate": 3.8837335512241376e-05,
"loss": 4.1658,
"step": 1004
},
{
"epoch": 0.6696093945198635,
"learning_rate": 3.8814856773163576e-05,
"loss": 4.1931,
"step": 1005
},
{
"epoch": 0.6702756725243608,
"learning_rate": 3.8792361943877e-05,
"loss": 4.2376,
"step": 1006
},
{
"epoch": 0.6709419505288582,
"learning_rate": 3.876985105058145e-05,
"loss": 4.1789,
"step": 1007
},
{
"epoch": 0.6716082285333556,
"learning_rate": 3.874732411949545e-05,
"loss": 4.1591,
"step": 1008
},
{
"epoch": 0.6722745065378529,
"learning_rate": 3.8724781176856184e-05,
"loss": 4.2283,
"step": 1009
},
{
"epoch": 0.6729407845423503,
"learning_rate": 3.8702222248919494e-05,
"loss": 4.2157,
"step": 1010
},
{
"epoch": 0.6736070625468477,
"learning_rate": 3.867964736195983e-05,
"loss": 4.2075,
"step": 1011
},
{
"epoch": 0.674273340551345,
"learning_rate": 3.865705654227024e-05,
"loss": 4.2088,
"step": 1012
},
{
"epoch": 0.6749396185558424,
"learning_rate": 3.863444981616232e-05,
"loss": 4.2248,
"step": 1013
},
{
"epoch": 0.6756058965603398,
"learning_rate": 3.861182720996621e-05,
"loss": 4.2482,
"step": 1014
},
{
"epoch": 0.6762721745648371,
"learning_rate": 3.858918875003053e-05,
"loss": 4.1895,
"step": 1015
},
{
"epoch": 0.6769384525693346,
"learning_rate": 3.856653446272236e-05,
"loss": 4.2083,
"step": 1016
},
{
"epoch": 0.677604730573832,
"learning_rate": 3.854386437442723e-05,
"loss": 4.2116,
"step": 1017
},
{
"epoch": 0.6782710085783293,
"learning_rate": 3.852117851154906e-05,
"loss": 4.2645,
"step": 1018
},
{
"epoch": 0.6789372865828267,
"learning_rate": 3.8498476900510154e-05,
"loss": 4.2239,
"step": 1019
},
{
"epoch": 0.6796035645873241,
"learning_rate": 3.8475759567751144e-05,
"loss": 4.2048,
"step": 1020
},
{
"epoch": 0.6802698425918214,
"learning_rate": 3.8453026539731e-05,
"loss": 4.1515,
"step": 1021
},
{
"epoch": 0.6809361205963188,
"learning_rate": 3.843027784292693e-05,
"loss": 4.1805,
"step": 1022
},
{
"epoch": 0.6816023986008162,
"learning_rate": 3.840751350383443e-05,
"loss": 4.2233,
"step": 1023
},
{
"epoch": 0.6822686766053135,
"learning_rate": 3.83847335489672e-05,
"loss": 4.1952,
"step": 1024
},
{
"epoch": 0.6829349546098109,
"learning_rate": 3.8361938004857124e-05,
"loss": 4.2039,
"step": 1025
},
{
"epoch": 0.6836012326143083,
"learning_rate": 3.8339126898054246e-05,
"loss": 4.1632,
"step": 1026
},
{
"epoch": 0.6842675106188058,
"learning_rate": 3.8316300255126735e-05,
"loss": 4.2085,
"step": 1027
},
{
"epoch": 0.6849337886233031,
"learning_rate": 3.829345810266086e-05,
"loss": 4.1645,
"step": 1028
},
{
"epoch": 0.6856000666278005,
"learning_rate": 3.8270600467260954e-05,
"loss": 4.2249,
"step": 1029
},
{
"epoch": 0.6862663446322979,
"learning_rate": 3.824772737554937e-05,
"loss": 4.164,
"step": 1030
},
{
"epoch": 0.6869326226367952,
"learning_rate": 3.822483885416649e-05,
"loss": 4.2246,
"step": 1031
},
{
"epoch": 0.6875989006412926,
"learning_rate": 3.820193492977062e-05,
"loss": 4.1365,
"step": 1032
},
{
"epoch": 0.68826517864579,
"learning_rate": 3.817901562903807e-05,
"loss": 4.2022,
"step": 1033
},
{
"epoch": 0.6889314566502873,
"learning_rate": 3.8156080978663e-05,
"loss": 4.1927,
"step": 1034
},
{
"epoch": 0.6895977346547847,
"learning_rate": 3.813313100535747e-05,
"loss": 4.1731,
"step": 1035
},
{
"epoch": 0.6902640126592821,
"learning_rate": 3.8110165735851413e-05,
"loss": 4.2305,
"step": 1036
},
{
"epoch": 0.6909302906637794,
"learning_rate": 3.8087185196892526e-05,
"loss": 4.2104,
"step": 1037
},
{
"epoch": 0.6915965686682768,
"learning_rate": 3.8064189415246346e-05,
"loss": 4.1984,
"step": 1038
},
{
"epoch": 0.6922628466727743,
"learning_rate": 3.804117841769611e-05,
"loss": 4.1534,
"step": 1039
},
{
"epoch": 0.6929291246772716,
"learning_rate": 3.801815223104281e-05,
"loss": 4.224,
"step": 1040
},
{
"epoch": 0.693595402681769,
"learning_rate": 3.7995110882105125e-05,
"loss": 4.1879,
"step": 1041
},
{
"epoch": 0.6942616806862664,
"learning_rate": 3.797205439771938e-05,
"loss": 4.257,
"step": 1042
},
{
"epoch": 0.6949279586907637,
"learning_rate": 3.794898280473955e-05,
"loss": 4.1959,
"step": 1043
},
{
"epoch": 0.6955942366952611,
"learning_rate": 3.792589613003719e-05,
"loss": 4.2129,
"step": 1044
},
{
"epoch": 0.6962605146997585,
"learning_rate": 3.790279440050143e-05,
"loss": 4.1745,
"step": 1045
},
{
"epoch": 0.6969267927042558,
"learning_rate": 3.7879677643038905e-05,
"loss": 4.2004,
"step": 1046
},
{
"epoch": 0.6975930707087532,
"learning_rate": 3.7856545884573816e-05,
"loss": 4.1924,
"step": 1047
},
{
"epoch": 0.6982593487132506,
"learning_rate": 3.783339915204777e-05,
"loss": 4.2131,
"step": 1048
},
{
"epoch": 0.6989256267177479,
"learning_rate": 3.781023747241985e-05,
"loss": 4.2264,
"step": 1049
},
{
"epoch": 0.6995919047222453,
"learning_rate": 3.7787060872666536e-05,
"loss": 4.184,
"step": 1050
},
{
"epoch": 0.7002581827267428,
"learning_rate": 3.776386937978169e-05,
"loss": 4.1944,
"step": 1051
},
{
"epoch": 0.7009244607312402,
"learning_rate": 3.7740663020776534e-05,
"loss": 4.2569,
"step": 1052
},
{
"epoch": 0.7015907387357375,
"learning_rate": 3.7717441822679576e-05,
"loss": 4.1988,
"step": 1053
},
{
"epoch": 0.7022570167402349,
"learning_rate": 3.769420581253662e-05,
"loss": 4.1987,
"step": 1054
},
{
"epoch": 0.7029232947447323,
"learning_rate": 3.767095501741073e-05,
"loss": 4.1454,
"step": 1055
},
{
"epoch": 0.7035895727492296,
"learning_rate": 3.764768946438219e-05,
"loss": 4.2162,
"step": 1056
},
{
"epoch": 0.704255850753727,
"learning_rate": 3.762440918054844e-05,
"loss": 4.2183,
"step": 1057
},
{
"epoch": 0.7049221287582244,
"learning_rate": 3.760111419302412e-05,
"loss": 4.1844,
"step": 1058
},
{
"epoch": 0.7055884067627217,
"learning_rate": 3.757780452894098e-05,
"loss": 4.2091,
"step": 1059
},
{
"epoch": 0.7062546847672191,
"learning_rate": 3.755448021544785e-05,
"loss": 4.1752,
"step": 1060
},
{
"epoch": 0.7069209627717165,
"learning_rate": 3.753114127971065e-05,
"loss": 4.1877,
"step": 1061
},
{
"epoch": 0.7075872407762138,
"learning_rate": 3.75077877489123e-05,
"loss": 4.2283,
"step": 1062
},
{
"epoch": 0.7082535187807113,
"learning_rate": 3.748441965025275e-05,
"loss": 4.1877,
"step": 1063
},
{
"epoch": 0.7089197967852087,
"learning_rate": 3.7461037010948884e-05,
"loss": 4.2186,
"step": 1064
},
{
"epoch": 0.709586074789706,
"learning_rate": 3.743763985823454e-05,
"loss": 4.2735,
"step": 1065
},
{
"epoch": 0.7102523527942034,
"learning_rate": 3.7414228219360474e-05,
"loss": 4.1688,
"step": 1066
},
{
"epoch": 0.7109186307987008,
"learning_rate": 3.739080212159429e-05,
"loss": 4.241,
"step": 1067
},
{
"epoch": 0.7115849088031981,
"learning_rate": 3.736736159222042e-05,
"loss": 4.1769,
"step": 1068
},
{
"epoch": 0.7122511868076955,
"learning_rate": 3.7343906658540154e-05,
"loss": 4.2333,
"step": 1069
},
{
"epoch": 0.7129174648121929,
"learning_rate": 3.732043734787152e-05,
"loss": 4.2128,
"step": 1070
},
{
"epoch": 0.7135837428166902,
"learning_rate": 3.72969536875493e-05,
"loss": 4.181,
"step": 1071
},
{
"epoch": 0.7142500208211876,
"learning_rate": 3.727345570492499e-05,
"loss": 4.1877,
"step": 1072
},
{
"epoch": 0.714916298825685,
"learning_rate": 3.724994342736676e-05,
"loss": 4.1797,
"step": 1073
},
{
"epoch": 0.7155825768301824,
"learning_rate": 3.722641688225944e-05,
"loss": 4.2154,
"step": 1074
},
{
"epoch": 0.7162488548346798,
"learning_rate": 3.7202876097004494e-05,
"loss": 4.1815,
"step": 1075
},
{
"epoch": 0.7169151328391772,
"learning_rate": 3.7179321099019916e-05,
"loss": 4.1886,
"step": 1076
},
{
"epoch": 0.7175814108436745,
"learning_rate": 3.715575191574031e-05,
"loss": 4.1979,
"step": 1077
},
{
"epoch": 0.7182476888481719,
"learning_rate": 3.7132168574616786e-05,
"loss": 4.1575,
"step": 1078
},
{
"epoch": 0.7189139668526693,
"learning_rate": 3.710857110311692e-05,
"loss": 4.187,
"step": 1079
},
{
"epoch": 0.7195802448571667,
"learning_rate": 3.7084959528724785e-05,
"loss": 4.1499,
"step": 1080
},
{
"epoch": 0.720246522861664,
"learning_rate": 3.706133387894084e-05,
"loss": 4.1859,
"step": 1081
},
{
"epoch": 0.7209128008661614,
"learning_rate": 3.703769418128197e-05,
"loss": 4.2187,
"step": 1082
},
{
"epoch": 0.7215790788706588,
"learning_rate": 3.7014040463281395e-05,
"loss": 4.2081,
"step": 1083
},
{
"epoch": 0.7222453568751561,
"learning_rate": 3.699037275248869e-05,
"loss": 4.1752,
"step": 1084
},
{
"epoch": 0.7229116348796535,
"learning_rate": 3.696669107646971e-05,
"loss": 4.1821,
"step": 1085
},
{
"epoch": 0.723577912884151,
"learning_rate": 3.694299546280657e-05,
"loss": 4.2302,
"step": 1086
},
{
"epoch": 0.7242441908886483,
"learning_rate": 3.691928593909766e-05,
"loss": 4.135,
"step": 1087
},
{
"epoch": 0.7249104688931457,
"learning_rate": 3.68955625329575e-05,
"loss": 4.1695,
"step": 1088
},
{
"epoch": 0.7255767468976431,
"learning_rate": 3.687182527201684e-05,
"loss": 4.1922,
"step": 1089
},
{
"epoch": 0.7262430249021404,
"learning_rate": 3.684807418392255e-05,
"loss": 4.1641,
"step": 1090
},
{
"epoch": 0.7269093029066378,
"learning_rate": 3.6824309296337584e-05,
"loss": 4.1859,
"step": 1091
},
{
"epoch": 0.7275755809111352,
"learning_rate": 3.6800530636941e-05,
"loss": 4.2159,
"step": 1092
},
{
"epoch": 0.7282418589156325,
"learning_rate": 3.677673823342786e-05,
"loss": 4.215,
"step": 1093
},
{
"epoch": 0.7289081369201299,
"learning_rate": 3.675293211350928e-05,
"loss": 4.1805,
"step": 1094
},
{
"epoch": 0.7295744149246273,
"learning_rate": 3.6729112304912305e-05,
"loss": 4.2059,
"step": 1095
},
{
"epoch": 0.7302406929291246,
"learning_rate": 3.6705278835379945e-05,
"loss": 4.2213,
"step": 1096
},
{
"epoch": 0.730906970933622,
"learning_rate": 3.6681431732671135e-05,
"loss": 4.2346,
"step": 1097
},
{
"epoch": 0.7315732489381195,
"learning_rate": 3.665757102456067e-05,
"loss": 4.1891,
"step": 1098
},
{
"epoch": 0.7322395269426168,
"learning_rate": 3.6633696738839176e-05,
"loss": 4.2064,
"step": 1099
},
{
"epoch": 0.7329058049471142,
"learning_rate": 3.660980890331313e-05,
"loss": 4.2071,
"step": 1100
},
{
"epoch": 0.7335720829516116,
"learning_rate": 3.658590754580476e-05,
"loss": 4.2776,
"step": 1101
},
{
"epoch": 0.734238360956109,
"learning_rate": 3.656199269415206e-05,
"loss": 4.1662,
"step": 1102
},
{
"epoch": 0.7349046389606063,
"learning_rate": 3.6538064376208745e-05,
"loss": 4.1613,
"step": 1103
},
{
"epoch": 0.7355709169651037,
"learning_rate": 3.651412261984419e-05,
"loss": 4.1852,
"step": 1104
},
{
"epoch": 0.736237194969601,
"learning_rate": 3.649016745294345e-05,
"loss": 4.1927,
"step": 1105
},
{
"epoch": 0.7369034729740984,
"learning_rate": 3.646619890340718e-05,
"loss": 4.1849,
"step": 1106
},
{
"epoch": 0.7375697509785958,
"learning_rate": 3.644221699915162e-05,
"loss": 4.1726,
"step": 1107
},
{
"epoch": 0.7382360289830932,
"learning_rate": 3.6418221768108586e-05,
"loss": 4.1943,
"step": 1108
},
{
"epoch": 0.7389023069875905,
"learning_rate": 3.639421323822539e-05,
"loss": 4.2299,
"step": 1109
},
{
"epoch": 0.739568584992088,
"learning_rate": 3.637019143746485e-05,
"loss": 4.1848,
"step": 1110
},
{
"epoch": 0.7402348629965854,
"learning_rate": 3.634615639380524e-05,
"loss": 4.2036,
"step": 1111
},
{
"epoch": 0.7409011410010827,
"learning_rate": 3.632210813524025e-05,
"loss": 4.2011,
"step": 1112
},
{
"epoch": 0.7415674190055801,
"learning_rate": 3.629804668977897e-05,
"loss": 4.1585,
"step": 1113
},
{
"epoch": 0.7422336970100775,
"learning_rate": 3.627397208544583e-05,
"loss": 4.2146,
"step": 1114
},
{
"epoch": 0.7428999750145748,
"learning_rate": 3.624988435028062e-05,
"loss": 4.1856,
"step": 1115
},
{
"epoch": 0.7435662530190722,
"learning_rate": 3.622578351233838e-05,
"loss": 4.1601,
"step": 1116
},
{
"epoch": 0.7442325310235696,
"learning_rate": 3.6201669599689465e-05,
"loss": 4.2063,
"step": 1117
},
{
"epoch": 0.7448988090280669,
"learning_rate": 3.6177542640419404e-05,
"loss": 4.1957,
"step": 1118
},
{
"epoch": 0.7455650870325643,
"learning_rate": 3.615340266262895e-05,
"loss": 4.1461,
"step": 1119
},
{
"epoch": 0.7462313650370617,
"learning_rate": 3.612924969443401e-05,
"loss": 4.1819,
"step": 1120
},
{
"epoch": 0.7468976430415591,
"learning_rate": 3.610508376396564e-05,
"loss": 4.2322,
"step": 1121
},
{
"epoch": 0.7475639210460565,
"learning_rate": 3.608090489936997e-05,
"loss": 4.2183,
"step": 1122
},
{
"epoch": 0.7482301990505539,
"learning_rate": 3.60567131288082e-05,
"loss": 4.1577,
"step": 1123
},
{
"epoch": 0.7488964770550512,
"learning_rate": 3.6032508480456555e-05,
"loss": 4.2218,
"step": 1124
},
{
"epoch": 0.7495627550595486,
"learning_rate": 3.600829098250629e-05,
"loss": 4.2198,
"step": 1125
},
{
"epoch": 0.750229033064046,
"learning_rate": 3.5984060663163586e-05,
"loss": 4.1907,
"step": 1126
},
{
"epoch": 0.7508953110685433,
"learning_rate": 3.595981755064959e-05,
"loss": 4.2181,
"step": 1127
},
{
"epoch": 0.7515615890730407,
"learning_rate": 3.5935561673200314e-05,
"loss": 4.208,
"step": 1128
},
{
"epoch": 0.7522278670775381,
"learning_rate": 3.591129305906668e-05,
"loss": 4.1721,
"step": 1129
},
{
"epoch": 0.7528941450820354,
"learning_rate": 3.5887011736514406e-05,
"loss": 4.215,
"step": 1130
},
{
"epoch": 0.7535604230865328,
"learning_rate": 3.586271773382403e-05,
"loss": 4.1384,
"step": 1131
},
{
"epoch": 0.7542267010910302,
"learning_rate": 3.5838411079290865e-05,
"loss": 4.2322,
"step": 1132
},
{
"epoch": 0.7548929790955277,
"learning_rate": 3.581409180122494e-05,
"loss": 4.1533,
"step": 1133
},
{
"epoch": 0.755559257100025,
"learning_rate": 3.5789759927951e-05,
"loss": 4.2363,
"step": 1134
},
{
"epoch": 0.7562255351045224,
"learning_rate": 3.576541548780847e-05,
"loss": 4.1495,
"step": 1135
},
{
"epoch": 0.7568918131090198,
"learning_rate": 3.5741058509151383e-05,
"loss": 4.2378,
"step": 1136
},
{
"epoch": 0.7575580911135171,
"learning_rate": 3.57166890203484e-05,
"loss": 4.1475,
"step": 1137
},
{
"epoch": 0.7582243691180145,
"learning_rate": 3.569230704978274e-05,
"loss": 4.1475,
"step": 1138
},
{
"epoch": 0.7588906471225119,
"learning_rate": 3.5667912625852164e-05,
"loss": 4.2308,
"step": 1139
},
{
"epoch": 0.7595569251270092,
"learning_rate": 3.5643505776968935e-05,
"loss": 4.1621,
"step": 1140
},
{
"epoch": 0.7602232031315066,
"learning_rate": 3.5619086531559796e-05,
"loss": 4.1705,
"step": 1141
},
{
"epoch": 0.760889481136004,
"learning_rate": 3.559465491806592e-05,
"loss": 4.2135,
"step": 1142
},
{
"epoch": 0.7615557591405013,
"learning_rate": 3.557021096494288e-05,
"loss": 4.2271,
"step": 1143
},
{
"epoch": 0.7622220371449987,
"learning_rate": 3.554575470066064e-05,
"loss": 4.1981,
"step": 1144
},
{
"epoch": 0.7628883151494962,
"learning_rate": 3.552128615370348e-05,
"loss": 4.1845,
"step": 1145
},
{
"epoch": 0.7635545931539935,
"learning_rate": 3.5496805352570006e-05,
"loss": 4.2063,
"step": 1146
},
{
"epoch": 0.7642208711584909,
"learning_rate": 3.5472312325773075e-05,
"loss": 4.191,
"step": 1147
},
{
"epoch": 0.7648871491629883,
"learning_rate": 3.544780710183981e-05,
"loss": 4.2667,
"step": 1148
},
{
"epoch": 0.7655534271674856,
"learning_rate": 3.5423289709311516e-05,
"loss": 4.2344,
"step": 1149
},
{
"epoch": 0.766219705171983,
"learning_rate": 3.539876017674367e-05,
"loss": 4.1621,
"step": 1150
},
{
"epoch": 0.7668859831764804,
"learning_rate": 3.537421853270592e-05,
"loss": 4.1964,
"step": 1151
},
{
"epoch": 0.7675522611809777,
"learning_rate": 3.534966480578198e-05,
"loss": 4.1933,
"step": 1152
},
{
"epoch": 0.7682185391854751,
"learning_rate": 3.532509902456968e-05,
"loss": 4.1885,
"step": 1153
},
{
"epoch": 0.7688848171899725,
"learning_rate": 3.530052121768084e-05,
"loss": 4.218,
"step": 1154
},
{
"epoch": 0.7695510951944698,
"learning_rate": 3.5275931413741324e-05,
"loss": 4.2018,
"step": 1155
},
{
"epoch": 0.7702173731989672,
"learning_rate": 3.525132964139096e-05,
"loss": 4.2346,
"step": 1156
},
{
"epoch": 0.7708836512034647,
"learning_rate": 3.5226715929283506e-05,
"loss": 4.1779,
"step": 1157
},
{
"epoch": 0.7715499292079621,
"learning_rate": 3.520209030608662e-05,
"loss": 4.1478,
"step": 1158
},
{
"epoch": 0.7722162072124594,
"learning_rate": 3.517745280048188e-05,
"loss": 4.2018,
"step": 1159
},
{
"epoch": 0.7728824852169568,
"learning_rate": 3.515280344116464e-05,
"loss": 4.1713,
"step": 1160
},
{
"epoch": 0.7735487632214542,
"learning_rate": 3.51281422568441e-05,
"loss": 4.2154,
"step": 1161
},
{
"epoch": 0.7742150412259515,
"learning_rate": 3.5103469276243216e-05,
"loss": 4.2739,
"step": 1162
},
{
"epoch": 0.7748813192304489,
"learning_rate": 3.50787845280987e-05,
"loss": 4.2029,
"step": 1163
},
{
"epoch": 0.7755475972349463,
"learning_rate": 3.505408804116095e-05,
"loss": 4.2501,
"step": 1164
},
{
"epoch": 0.7762138752394436,
"learning_rate": 3.502937984419405e-05,
"loss": 4.1968,
"step": 1165
},
{
"epoch": 0.776880153243941,
"learning_rate": 3.500465996597571e-05,
"loss": 4.1449,
"step": 1166
},
{
"epoch": 0.7775464312484384,
"learning_rate": 3.497992843529726e-05,
"loss": 4.1505,
"step": 1167
},
{
"epoch": 0.7782127092529358,
"learning_rate": 3.495518528096359e-05,
"loss": 4.2196,
"step": 1168
},
{
"epoch": 0.7788789872574332,
"learning_rate": 3.493043053179314e-05,
"loss": 4.192,
"step": 1169
},
{
"epoch": 0.7795452652619306,
"learning_rate": 3.4905664216617836e-05,
"loss": 4.2375,
"step": 1170
},
{
"epoch": 0.7802115432664279,
"learning_rate": 3.4880886364283095e-05,
"loss": 4.1485,
"step": 1171
},
{
"epoch": 0.7808778212709253,
"learning_rate": 3.4856097003647756e-05,
"loss": 4.1517,
"step": 1172
},
{
"epoch": 0.7815440992754227,
"learning_rate": 3.4831296163584074e-05,
"loss": 4.2255,
"step": 1173
},
{
"epoch": 0.78221037727992,
"learning_rate": 3.480648387297767e-05,
"loss": 4.2144,
"step": 1174
},
{
"epoch": 0.7828766552844174,
"learning_rate": 3.4781660160727493e-05,
"loss": 4.2423,
"step": 1175
},
{
"epoch": 0.7835429332889148,
"learning_rate": 3.47568250557458e-05,
"loss": 4.2127,
"step": 1176
},
{
"epoch": 0.7842092112934121,
"learning_rate": 3.4731978586958134e-05,
"loss": 4.1583,
"step": 1177
},
{
"epoch": 0.7848754892979095,
"learning_rate": 3.470712078330324e-05,
"loss": 4.1945,
"step": 1178
},
{
"epoch": 0.7855417673024069,
"learning_rate": 3.4682251673733094e-05,
"loss": 4.2334,
"step": 1179
},
{
"epoch": 0.7862080453069044,
"learning_rate": 3.465737128721281e-05,
"loss": 4.1714,
"step": 1180
},
{
"epoch": 0.7868743233114017,
"learning_rate": 3.463247965272069e-05,
"loss": 4.1948,
"step": 1181
},
{
"epoch": 0.7875406013158991,
"learning_rate": 3.460757679924808e-05,
"loss": 4.1697,
"step": 1182
},
{
"epoch": 0.7882068793203965,
"learning_rate": 3.4582662755799414e-05,
"loss": 4.1957,
"step": 1183
},
{
"epoch": 0.7888731573248938,
"learning_rate": 3.4557737551392174e-05,
"loss": 4.1802,
"step": 1184
},
{
"epoch": 0.7895394353293912,
"learning_rate": 3.45328012150568e-05,
"loss": 4.1995,
"step": 1185
},
{
"epoch": 0.7902057133338886,
"learning_rate": 3.4507853775836745e-05,
"loss": 4.2059,
"step": 1186
},
{
"epoch": 0.7908719913383859,
"learning_rate": 3.4482895262788375e-05,
"loss": 4.2229,
"step": 1187
},
{
"epoch": 0.7915382693428833,
"learning_rate": 3.4457925704980944e-05,
"loss": 4.1879,
"step": 1188
},
{
"epoch": 0.7922045473473807,
"learning_rate": 3.4432945131496576e-05,
"loss": 4.1924,
"step": 1189
},
{
"epoch": 0.792870825351878,
"learning_rate": 3.440795357143023e-05,
"loss": 4.2252,
"step": 1190
},
{
"epoch": 0.7935371033563754,
"learning_rate": 3.438295105388966e-05,
"loss": 4.2348,
"step": 1191
},
{
"epoch": 0.7942033813608729,
"learning_rate": 3.4357937607995364e-05,
"loss": 4.2074,
"step": 1192
},
{
"epoch": 0.7948696593653702,
"learning_rate": 3.4332913262880606e-05,
"loss": 4.2365,
"step": 1193
},
{
"epoch": 0.7955359373698676,
"learning_rate": 3.430787804769131e-05,
"loss": 4.2631,
"step": 1194
},
{
"epoch": 0.796202215374365,
"learning_rate": 3.428283199158609e-05,
"loss": 4.1769,
"step": 1195
},
{
"epoch": 0.7968684933788623,
"learning_rate": 3.425777512373613e-05,
"loss": 4.1932,
"step": 1196
},
{
"epoch": 0.7975347713833597,
"learning_rate": 3.4232707473325285e-05,
"loss": 4.2005,
"step": 1197
},
{
"epoch": 0.7982010493878571,
"learning_rate": 3.420762906954992e-05,
"loss": 4.1937,
"step": 1198
},
{
"epoch": 0.7988673273923544,
"learning_rate": 3.418253994161892e-05,
"loss": 4.198,
"step": 1199
},
{
"epoch": 0.7995336053968518,
"learning_rate": 3.415744011875369e-05,
"loss": 4.2279,
"step": 1200
},
{
"epoch": 0.8001998834013492,
"learning_rate": 3.4132329630188065e-05,
"loss": 4.2062,
"step": 1201
},
{
"epoch": 0.8008661614058465,
"learning_rate": 3.4107208505168315e-05,
"loss": 4.1865,
"step": 1202
},
{
"epoch": 0.8015324394103439,
"learning_rate": 3.40820767729531e-05,
"loss": 4.2269,
"step": 1203
},
{
"epoch": 0.8021987174148414,
"learning_rate": 3.405693446281343e-05,
"loss": 4.1911,
"step": 1204
},
{
"epoch": 0.8028649954193388,
"learning_rate": 3.403178160403263e-05,
"loss": 4.1989,
"step": 1205
},
{
"epoch": 0.8035312734238361,
"learning_rate": 3.400661822590632e-05,
"loss": 4.1575,
"step": 1206
},
{
"epoch": 0.8041975514283335,
"learning_rate": 3.398144435774237e-05,
"loss": 4.1985,
"step": 1207
},
{
"epoch": 0.8048638294328309,
"learning_rate": 3.395626002886087e-05,
"loss": 4.1742,
"step": 1208
},
{
"epoch": 0.8055301074373282,
"learning_rate": 3.393106526859408e-05,
"loss": 4.1822,
"step": 1209
},
{
"epoch": 0.8061963854418256,
"learning_rate": 3.390586010628643e-05,
"loss": 4.2005,
"step": 1210
},
{
"epoch": 0.806862663446323,
"learning_rate": 3.3880644571294445e-05,
"loss": 4.2298,
"step": 1211
},
{
"epoch": 0.8075289414508203,
"learning_rate": 3.3855418692986755e-05,
"loss": 4.1902,
"step": 1212
},
{
"epoch": 0.8081952194553177,
"learning_rate": 3.383018250074401e-05,
"loss": 4.2057,
"step": 1213
},
{
"epoch": 0.8088614974598151,
"learning_rate": 3.380493602395888e-05,
"loss": 4.1824,
"step": 1214
},
{
"epoch": 0.8095277754643125,
"learning_rate": 3.3779679292036036e-05,
"loss": 4.1724,
"step": 1215
},
{
"epoch": 0.8101940534688099,
"learning_rate": 3.375441233439207e-05,
"loss": 4.178,
"step": 1216
},
{
"epoch": 0.8108603314733073,
"learning_rate": 3.372913518045548e-05,
"loss": 4.1541,
"step": 1217
},
{
"epoch": 0.8115266094778046,
"learning_rate": 3.370384785966667e-05,
"loss": 4.2326,
"step": 1218
},
{
"epoch": 0.812192887482302,
"learning_rate": 3.367855040147785e-05,
"loss": 4.2239,
"step": 1219
},
{
"epoch": 0.8128591654867994,
"learning_rate": 3.365324283535305e-05,
"loss": 4.1752,
"step": 1220
},
{
"epoch": 0.8135254434912967,
"learning_rate": 3.362792519076808e-05,
"loss": 4.146,
"step": 1221
},
{
"epoch": 0.8141917214957941,
"learning_rate": 3.3602597497210496e-05,
"loss": 4.1812,
"step": 1222
},
{
"epoch": 0.8148579995002915,
"learning_rate": 3.3577259784179514e-05,
"loss": 4.161,
"step": 1223
},
{
"epoch": 0.8155242775047888,
"learning_rate": 3.355191208118608e-05,
"loss": 4.1857,
"step": 1224
},
{
"epoch": 0.8161905555092862,
"learning_rate": 3.352655441775273e-05,
"loss": 4.18,
"step": 1225
},
{
"epoch": 0.8168568335137836,
"learning_rate": 3.3501186823413636e-05,
"loss": 4.174,
"step": 1226
},
{
"epoch": 0.817523111518281,
"learning_rate": 3.34758093277145e-05,
"loss": 4.236,
"step": 1227
},
{
"epoch": 0.8181893895227784,
"learning_rate": 3.3450421960212566e-05,
"loss": 4.1865,
"step": 1228
},
{
"epoch": 0.8188556675272758,
"learning_rate": 3.342502475047661e-05,
"loss": 4.1717,
"step": 1229
},
{
"epoch": 0.8195219455317732,
"learning_rate": 3.339961772808683e-05,
"loss": 4.1692,
"step": 1230
},
{
"epoch": 0.8201882235362705,
"learning_rate": 3.337420092263487e-05,
"loss": 4.1551,
"step": 1231
},
{
"epoch": 0.8208545015407679,
"learning_rate": 3.3348774363723764e-05,
"loss": 4.2164,
"step": 1232
},
{
"epoch": 0.8215207795452653,
"learning_rate": 3.332333808096792e-05,
"loss": 4.1438,
"step": 1233
},
{
"epoch": 0.8221870575497626,
"learning_rate": 3.329789210399304e-05,
"loss": 4.1848,
"step": 1234
},
{
"epoch": 0.82285333555426,
"learning_rate": 3.327243646243615e-05,
"loss": 4.1501,
"step": 1235
},
{
"epoch": 0.8235196135587574,
"learning_rate": 3.324697118594552e-05,
"loss": 4.207,
"step": 1236
},
{
"epoch": 0.8241858915632547,
"learning_rate": 3.322149630418062e-05,
"loss": 4.1693,
"step": 1237
},
{
"epoch": 0.8248521695677521,
"learning_rate": 3.319601184681216e-05,
"loss": 4.1725,
"step": 1238
},
{
"epoch": 0.8255184475722496,
"learning_rate": 3.3170517843521945e-05,
"loss": 4.2209,
"step": 1239
},
{
"epoch": 0.8261847255767469,
"learning_rate": 3.3145014324002944e-05,
"loss": 4.1924,
"step": 1240
},
{
"epoch": 0.8268510035812443,
"learning_rate": 3.311950131795917e-05,
"loss": 4.1881,
"step": 1241
},
{
"epoch": 0.8275172815857417,
"learning_rate": 3.309397885510571e-05,
"loss": 4.1776,
"step": 1242
},
{
"epoch": 0.828183559590239,
"learning_rate": 3.306844696516867e-05,
"loss": 4.1847,
"step": 1243
},
{
"epoch": 0.8288498375947364,
"learning_rate": 3.304290567788512e-05,
"loss": 4.2368,
"step": 1244
},
{
"epoch": 0.8295161155992338,
"learning_rate": 3.3017355023003074e-05,
"loss": 4.1638,
"step": 1245
},
{
"epoch": 0.8301823936037311,
"learning_rate": 3.2991795030281466e-05,
"loss": 4.1773,
"step": 1246
},
{
"epoch": 0.8308486716082285,
"learning_rate": 3.2966225729490115e-05,
"loss": 4.2235,
"step": 1247
},
{
"epoch": 0.8315149496127259,
"learning_rate": 3.294064715040965e-05,
"loss": 4.1574,
"step": 1248
},
{
"epoch": 0.8321812276172232,
"learning_rate": 3.291505932283154e-05,
"loss": 4.1527,
"step": 1249
},
{
"epoch": 0.8328475056217207,
"learning_rate": 3.2889462276558006e-05,
"loss": 4.1635,
"step": 1250
},
{
"epoch": 0.8335137836262181,
"learning_rate": 3.286385604140201e-05,
"loss": 4.2183,
"step": 1251
},
{
"epoch": 0.8341800616307155,
"learning_rate": 3.2838240647187215e-05,
"loss": 4.1622,
"step": 1252
},
{
"epoch": 0.8348463396352128,
"learning_rate": 3.281261612374796e-05,
"loss": 4.2247,
"step": 1253
},
{
"epoch": 0.8355126176397102,
"learning_rate": 3.278698250092922e-05,
"loss": 4.1852,
"step": 1254
},
{
"epoch": 0.8361788956442076,
"learning_rate": 3.2761339808586536e-05,
"loss": 4.2069,
"step": 1255
},
{
"epoch": 0.8368451736487049,
"learning_rate": 3.273568807658605e-05,
"loss": 4.1801,
"step": 1256
},
{
"epoch": 0.8375114516532023,
"learning_rate": 3.271002733480441e-05,
"loss": 4.1809,
"step": 1257
},
{
"epoch": 0.8381777296576997,
"learning_rate": 3.268435761312879e-05,
"loss": 4.1678,
"step": 1258
},
{
"epoch": 0.838844007662197,
"learning_rate": 3.2658678941456764e-05,
"loss": 4.1984,
"step": 1259
},
{
"epoch": 0.8395102856666944,
"learning_rate": 3.2632991349696386e-05,
"loss": 4.171,
"step": 1260
},
{
"epoch": 0.8401765636711918,
"learning_rate": 3.260729486776608e-05,
"loss": 4.1957,
"step": 1261
},
{
"epoch": 0.8408428416756892,
"learning_rate": 3.25815895255946e-05,
"loss": 4.172,
"step": 1262
},
{
"epoch": 0.8415091196801866,
"learning_rate": 3.2555875353121066e-05,
"loss": 4.2088,
"step": 1263
},
{
"epoch": 0.842175397684684,
"learning_rate": 3.253015238029485e-05,
"loss": 4.1847,
"step": 1264
},
{
"epoch": 0.8428416756891813,
"learning_rate": 3.2504420637075585e-05,
"loss": 4.1772,
"step": 1265
},
{
"epoch": 0.8435079536936787,
"learning_rate": 3.247868015343311e-05,
"loss": 4.1804,
"step": 1266
},
{
"epoch": 0.8441742316981761,
"learning_rate": 3.245293095934745e-05,
"loss": 4.2242,
"step": 1267
},
{
"epoch": 0.8448405097026734,
"learning_rate": 3.2427173084808794e-05,
"loss": 4.1317,
"step": 1268
},
{
"epoch": 0.8455067877071708,
"learning_rate": 3.240140655981739e-05,
"loss": 4.1682,
"step": 1269
},
{
"epoch": 0.8461730657116682,
"learning_rate": 3.2375631414383616e-05,
"loss": 4.1477,
"step": 1270
},
{
"epoch": 0.8468393437161655,
"learning_rate": 3.2349847678527874e-05,
"loss": 4.2516,
"step": 1271
},
{
"epoch": 0.8475056217206629,
"learning_rate": 3.2324055382280546e-05,
"loss": 4.2314,
"step": 1272
},
{
"epoch": 0.8481718997251603,
"learning_rate": 3.229825455568201e-05,
"loss": 4.2002,
"step": 1273
},
{
"epoch": 0.8488381777296578,
"learning_rate": 3.227244522878258e-05,
"loss": 4.1379,
"step": 1274
},
{
"epoch": 0.8495044557341551,
"learning_rate": 3.224662743164246e-05,
"loss": 4.1655,
"step": 1275
},
{
"epoch": 0.8501707337386525,
"learning_rate": 3.222080119433171e-05,
"loss": 4.1165,
"step": 1276
},
{
"epoch": 0.8508370117431499,
"learning_rate": 3.219496654693026e-05,
"loss": 4.2215,
"step": 1277
},
{
"epoch": 0.8515032897476472,
"learning_rate": 3.216912351952778e-05,
"loss": 4.1923,
"step": 1278
},
{
"epoch": 0.8521695677521446,
"learning_rate": 3.214327214222375e-05,
"loss": 4.1695,
"step": 1279
},
{
"epoch": 0.852835845756642,
"learning_rate": 3.211741244512733e-05,
"loss": 4.2303,
"step": 1280
},
{
"epoch": 0.8535021237611393,
"learning_rate": 3.209154445835742e-05,
"loss": 4.1826,
"step": 1281
},
{
"epoch": 0.8541684017656367,
"learning_rate": 3.206566821204254e-05,
"loss": 4.1582,
"step": 1282
},
{
"epoch": 0.854834679770134,
"learning_rate": 3.203978373632082e-05,
"loss": 4.1805,
"step": 1283
},
{
"epoch": 0.8555009577746314,
"learning_rate": 3.201389106134001e-05,
"loss": 4.1363,
"step": 1284
},
{
"epoch": 0.8561672357791288,
"learning_rate": 3.198799021725741e-05,
"loss": 4.2343,
"step": 1285
},
{
"epoch": 0.8568335137836263,
"learning_rate": 3.196208123423978e-05,
"loss": 4.1874,
"step": 1286
},
{
"epoch": 0.8574997917881236,
"learning_rate": 3.1936164142463416e-05,
"loss": 4.1847,
"step": 1287
},
{
"epoch": 0.858166069792621,
"learning_rate": 3.191023897211405e-05,
"loss": 4.2139,
"step": 1288
},
{
"epoch": 0.8588323477971184,
"learning_rate": 3.1884305753386797e-05,
"loss": 4.2081,
"step": 1289
},
{
"epoch": 0.8594986258016157,
"learning_rate": 3.185836451648616e-05,
"loss": 4.1769,
"step": 1290
},
{
"epoch": 0.8601649038061131,
"learning_rate": 3.1832415291625995e-05,
"loss": 4.2019,
"step": 1291
},
{
"epoch": 0.8608311818106105,
"learning_rate": 3.1806458109029444e-05,
"loss": 4.1711,
"step": 1292
},
{
"epoch": 0.8614974598151078,
"learning_rate": 3.1780492998928916e-05,
"loss": 4.1914,
"step": 1293
},
{
"epoch": 0.8621637378196052,
"learning_rate": 3.175451999156607e-05,
"loss": 4.1314,
"step": 1294
},
{
"epoch": 0.8628300158241026,
"learning_rate": 3.1728539117191744e-05,
"loss": 4.1966,
"step": 1295
},
{
"epoch": 0.8634962938285999,
"learning_rate": 3.170255040606595e-05,
"loss": 4.1655,
"step": 1296
},
{
"epoch": 0.8641625718330974,
"learning_rate": 3.1676553888457824e-05,
"loss": 4.2147,
"step": 1297
},
{
"epoch": 0.8648288498375948,
"learning_rate": 3.165054959464558e-05,
"loss": 4.1501,
"step": 1298
},
{
"epoch": 0.8654951278420921,
"learning_rate": 3.162453755491655e-05,
"loss": 4.2317,
"step": 1299
},
{
"epoch": 0.8661614058465895,
"learning_rate": 3.159851779956699e-05,
"loss": 4.2176,
"step": 1300
},
{
"epoch": 0.8668276838510869,
"learning_rate": 3.157249035890222e-05,
"loss": 4.182,
"step": 1301
},
{
"epoch": 0.8674939618555843,
"learning_rate": 3.154645526323647e-05,
"loss": 4.1717,
"step": 1302
},
{
"epoch": 0.8681602398600816,
"learning_rate": 3.152041254289293e-05,
"loss": 4.2025,
"step": 1303
},
{
"epoch": 0.868826517864579,
"learning_rate": 3.14943622282036e-05,
"loss": 4.1741,
"step": 1304
},
{
"epoch": 0.8694927958690764,
"learning_rate": 3.146830434950941e-05,
"loss": 4.1703,
"step": 1305
},
{
"epoch": 0.8701590738735737,
"learning_rate": 3.144223893716003e-05,
"loss": 4.2098,
"step": 1306
},
{
"epoch": 0.8708253518780711,
"learning_rate": 3.1416166021513925e-05,
"loss": 4.2207,
"step": 1307
},
{
"epoch": 0.8714916298825685,
"learning_rate": 3.139008563293832e-05,
"loss": 4.1605,
"step": 1308
},
{
"epoch": 0.8721579078870659,
"learning_rate": 3.136399780180913e-05,
"loss": 4.2133,
"step": 1309
},
{
"epoch": 0.8728241858915633,
"learning_rate": 3.133790255851093e-05,
"loss": 4.1608,
"step": 1310
},
{
"epoch": 0.8734904638960607,
"learning_rate": 3.131179993343693e-05,
"loss": 4.141,
"step": 1311
},
{
"epoch": 0.874156741900558,
"learning_rate": 3.128568995698895e-05,
"loss": 4.1674,
"step": 1312
},
{
"epoch": 0.8748230199050554,
"learning_rate": 3.125957265957737e-05,
"loss": 4.195,
"step": 1313
},
{
"epoch": 0.8754892979095528,
"learning_rate": 3.1233448071621084e-05,
"loss": 4.2007,
"step": 1314
},
{
"epoch": 0.8761555759140501,
"learning_rate": 3.1207316223547484e-05,
"loss": 4.1968,
"step": 1315
},
{
"epoch": 0.8768218539185475,
"learning_rate": 3.1181177145792425e-05,
"loss": 4.1533,
"step": 1316
},
{
"epoch": 0.8774881319230449,
"learning_rate": 3.115503086880017e-05,
"loss": 4.1973,
"step": 1317
},
{
"epoch": 0.8781544099275422,
"learning_rate": 3.112887742302337e-05,
"loss": 4.1821,
"step": 1318
},
{
"epoch": 0.8788206879320396,
"learning_rate": 3.110271683892304e-05,
"loss": 4.175,
"step": 1319
},
{
"epoch": 0.879486965936537,
"learning_rate": 3.107654914696849e-05,
"loss": 4.159,
"step": 1320
},
{
"epoch": 0.8801532439410344,
"learning_rate": 3.105037437763732e-05,
"loss": 4.1871,
"step": 1321
},
{
"epoch": 0.8808195219455318,
"learning_rate": 3.102419256141536e-05,
"loss": 4.1653,
"step": 1322
},
{
"epoch": 0.8814857999500292,
"learning_rate": 3.0998003728796674e-05,
"loss": 4.1505,
"step": 1323
},
{
"epoch": 0.8821520779545265,
"learning_rate": 3.0971807910283465e-05,
"loss": 4.2243,
"step": 1324
},
{
"epoch": 0.8828183559590239,
"learning_rate": 3.094560513638609e-05,
"loss": 4.1488,
"step": 1325
},
{
"epoch": 0.8834846339635213,
"learning_rate": 3.091939543762301e-05,
"loss": 4.1625,
"step": 1326
},
{
"epoch": 0.8841509119680186,
"learning_rate": 3.089317884452076e-05,
"loss": 4.1729,
"step": 1327
},
{
"epoch": 0.884817189972516,
"learning_rate": 3.086695538761386e-05,
"loss": 4.1655,
"step": 1328
},
{
"epoch": 0.8854834679770134,
"learning_rate": 3.084072509744488e-05,
"loss": 4.2072,
"step": 1329
},
{
"epoch": 0.8861497459815108,
"learning_rate": 3.0814488004564323e-05,
"loss": 4.1859,
"step": 1330
},
{
"epoch": 0.8868160239860081,
"learning_rate": 3.078824413953061e-05,
"loss": 4.1646,
"step": 1331
},
{
"epoch": 0.8874823019905055,
"learning_rate": 3.076199353291005e-05,
"loss": 4.136,
"step": 1332
},
{
"epoch": 0.888148579995003,
"learning_rate": 3.073573621527682e-05,
"loss": 4.2069,
"step": 1333
},
{
"epoch": 0.8888148579995003,
"learning_rate": 3.070947221721291e-05,
"loss": 4.1074,
"step": 1334
},
{
"epoch": 0.8894811360039977,
"learning_rate": 3.0683201569308077e-05,
"loss": 4.2027,
"step": 1335
},
{
"epoch": 0.8901474140084951,
"learning_rate": 3.065692430215982e-05,
"loss": 4.1333,
"step": 1336
},
{
"epoch": 0.8908136920129924,
"learning_rate": 3.063064044637337e-05,
"loss": 4.176,
"step": 1337
},
{
"epoch": 0.8914799700174898,
"learning_rate": 3.060435003256161e-05,
"loss": 4.1859,
"step": 1338
},
{
"epoch": 0.8921462480219872,
"learning_rate": 3.0578053091345086e-05,
"loss": 4.202,
"step": 1339
},
{
"epoch": 0.8928125260264845,
"learning_rate": 3.055174965335192e-05,
"loss": 4.199,
"step": 1340
},
{
"epoch": 0.8934788040309819,
"learning_rate": 3.0525439749217824e-05,
"loss": 4.1932,
"step": 1341
},
{
"epoch": 0.8941450820354793,
"learning_rate": 3.0499123409586004e-05,
"loss": 4.1309,
"step": 1342
},
{
"epoch": 0.8948113600399766,
"learning_rate": 3.0472800665107205e-05,
"loss": 4.2043,
"step": 1343
},
{
"epoch": 0.8954776380444741,
"learning_rate": 3.044647154643962e-05,
"loss": 4.1564,
"step": 1344
},
{
"epoch": 0.8961439160489715,
"learning_rate": 3.0420136084248847e-05,
"loss": 4.2096,
"step": 1345
},
{
"epoch": 0.8968101940534688,
"learning_rate": 3.0393794309207884e-05,
"loss": 4.1411,
"step": 1346
},
{
"epoch": 0.8974764720579662,
"learning_rate": 3.036744625199709e-05,
"loss": 4.1767,
"step": 1347
},
{
"epoch": 0.8981427500624636,
"learning_rate": 3.0341091943304136e-05,
"loss": 4.1549,
"step": 1348
},
{
"epoch": 0.898809028066961,
"learning_rate": 3.031473141382396e-05,
"loss": 4.1656,
"step": 1349
},
{
"epoch": 0.8994753060714583,
"learning_rate": 3.0288364694258752e-05,
"loss": 4.2039,
"step": 1350
},
{
"epoch": 0.9001415840759557,
"learning_rate": 3.026199181531794e-05,
"loss": 4.1848,
"step": 1351
},
{
"epoch": 0.900807862080453,
"learning_rate": 3.0235612807718084e-05,
"loss": 4.1699,
"step": 1352
},
{
"epoch": 0.9014741400849504,
"learning_rate": 3.0209227702182903e-05,
"loss": 4.2408,
"step": 1353
},
{
"epoch": 0.9021404180894478,
"learning_rate": 3.0182836529443216e-05,
"loss": 4.1646,
"step": 1354
},
{
"epoch": 0.9028066960939451,
"learning_rate": 3.015643932023691e-05,
"loss": 4.1557,
"step": 1355
},
{
"epoch": 0.9034729740984426,
"learning_rate": 3.0130036105308902e-05,
"loss": 4.2057,
"step": 1356
},
{
"epoch": 0.90413925210294,
"learning_rate": 3.0103626915411093e-05,
"loss": 4.1323,
"step": 1357
},
{
"epoch": 0.9048055301074374,
"learning_rate": 3.007721178130237e-05,
"loss": 4.143,
"step": 1358
},
{
"epoch": 0.9054718081119347,
"learning_rate": 3.0050790733748514e-05,
"loss": 4.1681,
"step": 1359
},
{
"epoch": 0.9061380861164321,
"learning_rate": 3.00243638035222e-05,
"loss": 4.2,
"step": 1360
},
{
"epoch": 0.9068043641209295,
"learning_rate": 2.9997931021402975e-05,
"loss": 4.152,
"step": 1361
},
{
"epoch": 0.9074706421254268,
"learning_rate": 2.997149241817718e-05,
"loss": 4.1748,
"step": 1362
},
{
"epoch": 0.9081369201299242,
"learning_rate": 2.9945048024637935e-05,
"loss": 4.1845,
"step": 1363
},
{
"epoch": 0.9088031981344216,
"learning_rate": 2.991859787158512e-05,
"loss": 4.197,
"step": 1364
},
{
"epoch": 0.9094694761389189,
"learning_rate": 2.989214198982531e-05,
"loss": 4.1846,
"step": 1365
},
{
"epoch": 0.9101357541434163,
"learning_rate": 2.9865680410171764e-05,
"loss": 4.1566,
"step": 1366
},
{
"epoch": 0.9108020321479137,
"learning_rate": 2.9839213163444358e-05,
"loss": 4.1297,
"step": 1367
},
{
"epoch": 0.9114683101524111,
"learning_rate": 2.9812740280469596e-05,
"loss": 4.1658,
"step": 1368
},
{
"epoch": 0.9121345881569085,
"learning_rate": 2.9786261792080522e-05,
"loss": 4.1426,
"step": 1369
},
{
"epoch": 0.9128008661614059,
"learning_rate": 2.975977772911671e-05,
"loss": 4.1999,
"step": 1370
},
{
"epoch": 0.9134671441659032,
"learning_rate": 2.9733288122424246e-05,
"loss": 4.155,
"step": 1371
},
{
"epoch": 0.9141334221704006,
"learning_rate": 2.970679300285567e-05,
"loss": 4.1734,
"step": 1372
},
{
"epoch": 0.914799700174898,
"learning_rate": 2.9680292401269915e-05,
"loss": 4.2032,
"step": 1373
},
{
"epoch": 0.9154659781793953,
"learning_rate": 2.965378634853232e-05,
"loss": 4.1747,
"step": 1374
},
{
"epoch": 0.9161322561838927,
"learning_rate": 2.9627274875514588e-05,
"loss": 4.1845,
"step": 1375
},
{
"epoch": 0.9167985341883901,
"learning_rate": 2.9600758013094704e-05,
"loss": 4.1636,
"step": 1376
},
{
"epoch": 0.9174648121928874,
"learning_rate": 2.957423579215695e-05,
"loss": 4.2149,
"step": 1377
},
{
"epoch": 0.9181310901973848,
"learning_rate": 2.9547708243591836e-05,
"loss": 4.2194,
"step": 1378
},
{
"epoch": 0.9187973682018822,
"learning_rate": 2.952117539829609e-05,
"loss": 4.1763,
"step": 1379
},
{
"epoch": 0.9194636462063797,
"learning_rate": 2.949463728717261e-05,
"loss": 4.245,
"step": 1380
},
{
"epoch": 0.920129924210877,
"learning_rate": 2.9468093941130404e-05,
"loss": 4.1469,
"step": 1381
},
{
"epoch": 0.9207962022153744,
"learning_rate": 2.9441545391084608e-05,
"loss": 4.174,
"step": 1382
},
{
"epoch": 0.9214624802198718,
"learning_rate": 2.9414991667956403e-05,
"loss": 4.1846,
"step": 1383
},
{
"epoch": 0.9221287582243691,
"learning_rate": 2.9388432802672984e-05,
"loss": 4.2096,
"step": 1384
},
{
"epoch": 0.9227950362288665,
"learning_rate": 2.936186882616756e-05,
"loss": 4.2139,
"step": 1385
},
{
"epoch": 0.9234613142333639,
"learning_rate": 2.933529976937929e-05,
"loss": 4.1711,
"step": 1386
},
{
"epoch": 0.9241275922378612,
"learning_rate": 2.9308725663253223e-05,
"loss": 4.2087,
"step": 1387
},
{
"epoch": 0.9247938702423586,
"learning_rate": 2.928214653874031e-05,
"loss": 4.2039,
"step": 1388
},
{
"epoch": 0.925460148246856,
"learning_rate": 2.9255562426797362e-05,
"loss": 4.1971,
"step": 1389
},
{
"epoch": 0.9261264262513533,
"learning_rate": 2.922897335838696e-05,
"loss": 4.1804,
"step": 1390
},
{
"epoch": 0.9267927042558508,
"learning_rate": 2.920237936447749e-05,
"loss": 4.1864,
"step": 1391
},
{
"epoch": 0.9274589822603482,
"learning_rate": 2.917578047604305e-05,
"loss": 4.1159,
"step": 1392
},
{
"epoch": 0.9281252602648455,
"learning_rate": 2.9149176724063472e-05,
"loss": 4.1594,
"step": 1393
},
{
"epoch": 0.9287915382693429,
"learning_rate": 2.912256813952422e-05,
"loss": 4.2039,
"step": 1394
},
{
"epoch": 0.9294578162738403,
"learning_rate": 2.9095954753416397e-05,
"loss": 4.1591,
"step": 1395
},
{
"epoch": 0.9301240942783376,
"learning_rate": 2.9069336596736712e-05,
"loss": 4.1827,
"step": 1396
},
{
"epoch": 0.930790372282835,
"learning_rate": 2.9042713700487412e-05,
"loss": 4.1792,
"step": 1397
},
{
"epoch": 0.9314566502873324,
"learning_rate": 2.9016086095676264e-05,
"loss": 4.0958,
"step": 1398
},
{
"epoch": 0.9321229282918297,
"learning_rate": 2.8989453813316535e-05,
"loss": 4.1769,
"step": 1399
},
{
"epoch": 0.9327892062963271,
"learning_rate": 2.8962816884426945e-05,
"loss": 4.1237,
"step": 1400
},
{
"epoch": 0.9334554843008245,
"learning_rate": 2.8936175340031586e-05,
"loss": 4.1924,
"step": 1401
},
{
"epoch": 0.9341217623053218,
"learning_rate": 2.8909529211159958e-05,
"loss": 4.1995,
"step": 1402
},
{
"epoch": 0.9347880403098193,
"learning_rate": 2.888287852884691e-05,
"loss": 4.2064,
"step": 1403
},
{
"epoch": 0.9354543183143167,
"learning_rate": 2.885622332413256e-05,
"loss": 4.1767,
"step": 1404
},
{
"epoch": 0.9361205963188141,
"learning_rate": 2.882956362806232e-05,
"loss": 4.19,
"step": 1405
},
{
"epoch": 0.9367868743233114,
"learning_rate": 2.880289947168683e-05,
"loss": 4.2011,
"step": 1406
},
{
"epoch": 0.9374531523278088,
"learning_rate": 2.877623088606191e-05,
"loss": 4.2116,
"step": 1407
},
{
"epoch": 0.9381194303323062,
"learning_rate": 2.8749557902248558e-05,
"loss": 4.1801,
"step": 1408
},
{
"epoch": 0.9387857083368035,
"learning_rate": 2.8722880551312876e-05,
"loss": 4.1376,
"step": 1409
},
{
"epoch": 0.9394519863413009,
"learning_rate": 2.869619886432607e-05,
"loss": 4.1801,
"step": 1410
},
{
"epoch": 0.9401182643457983,
"learning_rate": 2.8669512872364386e-05,
"loss": 4.2048,
"step": 1411
},
{
"epoch": 0.9407845423502956,
"learning_rate": 2.8642822606509075e-05,
"loss": 4.1797,
"step": 1412
},
{
"epoch": 0.941450820354793,
"learning_rate": 2.861612809784639e-05,
"loss": 4.1792,
"step": 1413
},
{
"epoch": 0.9421170983592904,
"learning_rate": 2.8589429377467514e-05,
"loss": 4.1836,
"step": 1414
},
{
"epoch": 0.9427833763637878,
"learning_rate": 2.856272647646852e-05,
"loss": 4.1556,
"step": 1415
},
{
"epoch": 0.9434496543682852,
"learning_rate": 2.8536019425950373e-05,
"loss": 4.1608,
"step": 1416
},
{
"epoch": 0.9441159323727826,
"learning_rate": 2.8509308257018863e-05,
"loss": 4.1406,
"step": 1417
},
{
"epoch": 0.9447822103772799,
"learning_rate": 2.848259300078458e-05,
"loss": 4.1548,
"step": 1418
},
{
"epoch": 0.9454484883817773,
"learning_rate": 2.8455873688362862e-05,
"loss": 4.2307,
"step": 1419
},
{
"epoch": 0.9461147663862747,
"learning_rate": 2.8429150350873768e-05,
"loss": 4.213,
"step": 1420
},
{
"epoch": 0.946781044390772,
"learning_rate": 2.840242301944208e-05,
"loss": 4.1926,
"step": 1421
},
{
"epoch": 0.9474473223952694,
"learning_rate": 2.83756917251972e-05,
"loss": 4.1769,
"step": 1422
},
{
"epoch": 0.9481136003997668,
"learning_rate": 2.8348956499273144e-05,
"loss": 4.1715,
"step": 1423
},
{
"epoch": 0.9487798784042641,
"learning_rate": 2.832221737280853e-05,
"loss": 4.1705,
"step": 1424
},
{
"epoch": 0.9494461564087615,
"learning_rate": 2.8295474376946496e-05,
"loss": 4.1361,
"step": 1425
},
{
"epoch": 0.9501124344132589,
"learning_rate": 2.82687275428347e-05,
"loss": 4.1744,
"step": 1426
},
{
"epoch": 0.9507787124177564,
"learning_rate": 2.824197690162526e-05,
"loss": 4.1061,
"step": 1427
},
{
"epoch": 0.9514449904222537,
"learning_rate": 2.8215222484474762e-05,
"loss": 4.2072,
"step": 1428
},
{
"epoch": 0.9521112684267511,
"learning_rate": 2.8188464322544127e-05,
"loss": 4.1508,
"step": 1429
},
{
"epoch": 0.9527775464312485,
"learning_rate": 2.8161702446998694e-05,
"loss": 4.153,
"step": 1430
},
{
"epoch": 0.9534438244357458,
"learning_rate": 2.813493688900811e-05,
"loss": 4.1635,
"step": 1431
},
{
"epoch": 0.9541101024402432,
"learning_rate": 2.8108167679746294e-05,
"loss": 4.1986,
"step": 1432
},
{
"epoch": 0.9547763804447406,
"learning_rate": 2.8081394850391442e-05,
"loss": 4.1771,
"step": 1433
},
{
"epoch": 0.9554426584492379,
"learning_rate": 2.8054618432125947e-05,
"loss": 4.208,
"step": 1434
},
{
"epoch": 0.9561089364537353,
"learning_rate": 2.8027838456136397e-05,
"loss": 4.135,
"step": 1435
},
{
"epoch": 0.9567752144582327,
"learning_rate": 2.8001054953613513e-05,
"loss": 4.2017,
"step": 1436
},
{
"epoch": 0.95744149246273,
"learning_rate": 2.797426795575213e-05,
"loss": 4.1272,
"step": 1437
},
{
"epoch": 0.9581077704672275,
"learning_rate": 2.794747749375116e-05,
"loss": 4.1857,
"step": 1438
},
{
"epoch": 0.9587740484717249,
"learning_rate": 2.7920683598813536e-05,
"loss": 4.1879,
"step": 1439
},
{
"epoch": 0.9594403264762222,
"learning_rate": 2.7893886302146182e-05,
"loss": 4.2102,
"step": 1440
},
{
"epoch": 0.9601066044807196,
"learning_rate": 2.7867085634960016e-05,
"loss": 4.1789,
"step": 1441
},
{
"epoch": 0.960772882485217,
"learning_rate": 2.784028162846985e-05,
"loss": 4.1838,
"step": 1442
},
{
"epoch": 0.9614391604897143,
"learning_rate": 2.78134743138944e-05,
"loss": 4.1577,
"step": 1443
},
{
"epoch": 0.9621054384942117,
"learning_rate": 2.7786663722456235e-05,
"loss": 4.19,
"step": 1444
},
{
"epoch": 0.9627717164987091,
"learning_rate": 2.775984988538175e-05,
"loss": 4.1857,
"step": 1445
},
{
"epoch": 0.9634379945032064,
"learning_rate": 2.7733032833901085e-05,
"loss": 4.1385,
"step": 1446
},
{
"epoch": 0.9641042725077038,
"learning_rate": 2.7706212599248165e-05,
"loss": 4.1667,
"step": 1447
},
{
"epoch": 0.9647705505122012,
"learning_rate": 2.767938921266059e-05,
"loss": 4.1302,
"step": 1448
},
{
"epoch": 0.9654368285166985,
"learning_rate": 2.7652562705379663e-05,
"loss": 4.1594,
"step": 1449
},
{
"epoch": 0.966103106521196,
"learning_rate": 2.7625733108650298e-05,
"loss": 4.1545,
"step": 1450
},
{
"epoch": 0.9667693845256934,
"learning_rate": 2.7598900453721005e-05,
"loss": 4.1727,
"step": 1451
},
{
"epoch": 0.9674356625301908,
"learning_rate": 2.757206477184388e-05,
"loss": 4.1644,
"step": 1452
},
{
"epoch": 0.9681019405346881,
"learning_rate": 2.754522609427452e-05,
"loss": 4.1677,
"step": 1453
},
{
"epoch": 0.9687682185391855,
"learning_rate": 2.7518384452272013e-05,
"loss": 4.1916,
"step": 1454
},
{
"epoch": 0.9694344965436829,
"learning_rate": 2.7491539877098927e-05,
"loss": 4.1815,
"step": 1455
},
{
"epoch": 0.9701007745481802,
"learning_rate": 2.746469240002121e-05,
"loss": 4.2179,
"step": 1456
},
{
"epoch": 0.9707670525526776,
"learning_rate": 2.7437842052308206e-05,
"loss": 4.1488,
"step": 1457
},
{
"epoch": 0.971433330557175,
"learning_rate": 2.7410988865232612e-05,
"loss": 4.1159,
"step": 1458
},
{
"epoch": 0.9720996085616723,
"learning_rate": 2.7384132870070422e-05,
"loss": 4.1715,
"step": 1459
},
{
"epoch": 0.9727658865661697,
"learning_rate": 2.7357274098100895e-05,
"loss": 4.1435,
"step": 1460
},
{
"epoch": 0.9734321645706671,
"learning_rate": 2.7330412580606534e-05,
"loss": 4.1653,
"step": 1461
},
{
"epoch": 0.9740984425751645,
"learning_rate": 2.7303548348873032e-05,
"loss": 4.1381,
"step": 1462
},
{
"epoch": 0.9747647205796619,
"learning_rate": 2.7276681434189255e-05,
"loss": 4.1792,
"step": 1463
},
{
"epoch": 0.9754309985841593,
"learning_rate": 2.724981186784718e-05,
"loss": 4.102,
"step": 1464
},
{
"epoch": 0.9760972765886566,
"learning_rate": 2.722293968114188e-05,
"loss": 4.1529,
"step": 1465
},
{
"epoch": 0.976763554593154,
"learning_rate": 2.7196064905371478e-05,
"loss": 4.198,
"step": 1466
},
{
"epoch": 0.9774298325976514,
"learning_rate": 2.7169187571837118e-05,
"loss": 4.1859,
"step": 1467
},
{
"epoch": 0.9780961106021487,
"learning_rate": 2.7142307711842906e-05,
"loss": 4.1567,
"step": 1468
},
{
"epoch": 0.9787623886066461,
"learning_rate": 2.7115425356695918e-05,
"loss": 4.1669,
"step": 1469
},
{
"epoch": 0.9794286666111435,
"learning_rate": 2.708854053770611e-05,
"loss": 4.1741,
"step": 1470
},
{
"epoch": 0.9800949446156408,
"learning_rate": 2.7061653286186317e-05,
"loss": 4.1972,
"step": 1471
},
{
"epoch": 0.9807612226201382,
"learning_rate": 2.7034763633452227e-05,
"loss": 4.1864,
"step": 1472
},
{
"epoch": 0.9814275006246357,
"learning_rate": 2.7007871610822282e-05,
"loss": 4.1316,
"step": 1473
},
{
"epoch": 0.982093778629133,
"learning_rate": 2.6980977249617718e-05,
"loss": 4.2117,
"step": 1474
},
{
"epoch": 0.9827600566336304,
"learning_rate": 2.6954080581162494e-05,
"loss": 4.151,
"step": 1475
},
{
"epoch": 0.9834263346381278,
"learning_rate": 2.6927181636783232e-05,
"loss": 4.183,
"step": 1476
},
{
"epoch": 0.9840926126426252,
"learning_rate": 2.6900280447809235e-05,
"loss": 4.1645,
"step": 1477
},
{
"epoch": 0.9847588906471225,
"learning_rate": 2.6873377045572395e-05,
"loss": 4.142,
"step": 1478
},
{
"epoch": 0.9854251686516199,
"learning_rate": 2.6846471461407197e-05,
"loss": 4.1316,
"step": 1479
},
{
"epoch": 0.9860914466561173,
"learning_rate": 2.6819563726650665e-05,
"loss": 4.2257,
"step": 1480
},
{
"epoch": 0.9867577246606146,
"learning_rate": 2.679265387264232e-05,
"loss": 4.1344,
"step": 1481
},
{
"epoch": 0.987424002665112,
"learning_rate": 2.676574193072416e-05,
"loss": 4.2045,
"step": 1482
},
{
"epoch": 0.9880902806696094,
"learning_rate": 2.6738827932240612e-05,
"loss": 4.2043,
"step": 1483
},
{
"epoch": 0.9887565586741067,
"learning_rate": 2.6711911908538494e-05,
"loss": 4.1548,
"step": 1484
},
{
"epoch": 0.9894228366786042,
"learning_rate": 2.6684993890966985e-05,
"loss": 4.1902,
"step": 1485
},
{
"epoch": 0.9900891146831016,
"learning_rate": 2.6658073910877603e-05,
"loss": 4.1492,
"step": 1486
},
{
"epoch": 0.9907553926875989,
"learning_rate": 2.663115199962412e-05,
"loss": 4.1645,
"step": 1487
},
{
"epoch": 0.9914216706920963,
"learning_rate": 2.660422818856258e-05,
"loss": 4.1998,
"step": 1488
},
{
"epoch": 0.9920879486965937,
"learning_rate": 2.6577302509051232e-05,
"loss": 4.2077,
"step": 1489
},
{
"epoch": 0.992754226701091,
"learning_rate": 2.6550374992450504e-05,
"loss": 4.1839,
"step": 1490
},
{
"epoch": 0.9934205047055884,
"learning_rate": 2.6523445670122965e-05,
"loss": 4.185,
"step": 1491
},
{
"epoch": 0.9940867827100858,
"learning_rate": 2.6496514573433283e-05,
"loss": 4.1437,
"step": 1492
},
{
"epoch": 0.9947530607145831,
"learning_rate": 2.6469581733748196e-05,
"loss": 4.1607,
"step": 1493
},
{
"epoch": 0.9954193387190805,
"learning_rate": 2.6442647182436465e-05,
"loss": 4.1835,
"step": 1494
},
{
"epoch": 0.9960856167235779,
"learning_rate": 2.641571095086885e-05,
"loss": 4.1136,
"step": 1495
},
{
"epoch": 0.9967518947280752,
"learning_rate": 2.638877307041807e-05,
"loss": 4.1866,
"step": 1496
},
{
"epoch": 0.9974181727325727,
"learning_rate": 2.6361833572458767e-05,
"loss": 4.1814,
"step": 1497
},
{
"epoch": 0.9980844507370701,
"learning_rate": 2.6334892488367456e-05,
"loss": 4.1532,
"step": 1498
},
{
"epoch": 0.9987507287415675,
"learning_rate": 2.6307949849522506e-05,
"loss": 4.1098,
"step": 1499
},
{
"epoch": 0.9994170067460648,
"learning_rate": 2.62810056873041e-05,
"loss": 4.2455,
"step": 1500
},
{
"epoch": 1.0,
"learning_rate": 2.625406003309419e-05,
"loss": 4.1771,
"step": 1501
},
{
"epoch": 1.0006662780044975,
"learning_rate": 2.622711291827646e-05,
"loss": 4.1266,
"step": 1502
},
{
"epoch": 1.0013325560089947,
"learning_rate": 2.620016437423632e-05,
"loss": 4.147,
"step": 1503
},
{
"epoch": 1.0019988340134922,
"learning_rate": 2.6173214432360806e-05,
"loss": 4.1251,
"step": 1504
},
{
"epoch": 1.0026651120179895,
"learning_rate": 2.6146263124038617e-05,
"loss": 4.2098,
"step": 1505
},
{
"epoch": 1.003331390022487,
"learning_rate": 2.611931048066003e-05,
"loss": 4.1448,
"step": 1506
},
{
"epoch": 1.0039976680269842,
"learning_rate": 2.609235653361687e-05,
"loss": 4.1709,
"step": 1507
},
{
"epoch": 1.0046639460314817,
"learning_rate": 2.6065401314302494e-05,
"loss": 4.129,
"step": 1508
},
{
"epoch": 1.005330224035979,
"learning_rate": 2.6038444854111732e-05,
"loss": 4.1714,
"step": 1509
},
{
"epoch": 1.0059965020404764,
"learning_rate": 2.6011487184440853e-05,
"loss": 4.1849,
"step": 1510
},
{
"epoch": 1.0066627800449737,
"learning_rate": 2.598452833668755e-05,
"loss": 4.1568,
"step": 1511
},
{
"epoch": 1.0073290580494711,
"learning_rate": 2.595756834225089e-05,
"loss": 4.2149,
"step": 1512
},
{
"epoch": 1.0079953360539686,
"learning_rate": 2.5930607232531247e-05,
"loss": 4.1465,
"step": 1513
},
{
"epoch": 1.0086616140584659,
"learning_rate": 2.5903645038930326e-05,
"loss": 4.1686,
"step": 1514
},
{
"epoch": 1.0093278920629634,
"learning_rate": 2.5876681792851083e-05,
"loss": 4.2141,
"step": 1515
},
{
"epoch": 1.0099941700674606,
"learning_rate": 2.5849717525697687e-05,
"loss": 4.1441,
"step": 1516
},
{
"epoch": 1.010660448071958,
"learning_rate": 2.5822752268875517e-05,
"loss": 4.1267,
"step": 1517
},
{
"epoch": 1.0113267260764554,
"learning_rate": 2.5795786053791098e-05,
"loss": 4.2115,
"step": 1518
},
{
"epoch": 1.0119930040809528,
"learning_rate": 2.576881891185206e-05,
"loss": 4.1316,
"step": 1519
},
{
"epoch": 1.01265928208545,
"learning_rate": 2.574185087446714e-05,
"loss": 4.1417,
"step": 1520
},
{
"epoch": 1.0133255600899476,
"learning_rate": 2.5714881973046067e-05,
"loss": 4.1786,
"step": 1521
},
{
"epoch": 1.0139918380944448,
"learning_rate": 2.568791223899964e-05,
"loss": 4.2103,
"step": 1522
},
{
"epoch": 1.0146581160989423,
"learning_rate": 2.5660941703739576e-05,
"loss": 4.1982,
"step": 1523
},
{
"epoch": 1.0153243941034396,
"learning_rate": 2.5633970398678557e-05,
"loss": 4.115,
"step": 1524
},
{
"epoch": 1.015990672107937,
"learning_rate": 2.5606998355230143e-05,
"loss": 4.1682,
"step": 1525
},
{
"epoch": 1.0166569501124345,
"learning_rate": 2.5580025604808772e-05,
"loss": 4.1832,
"step": 1526
},
{
"epoch": 1.0173232281169318,
"learning_rate": 2.555305217882967e-05,
"loss": 4.1563,
"step": 1527
},
{
"epoch": 1.0179895061214292,
"learning_rate": 2.5526078108708906e-05,
"loss": 4.1292,
"step": 1528
},
{
"epoch": 1.0186557841259265,
"learning_rate": 2.5499103425863246e-05,
"loss": 4.1805,
"step": 1529
},
{
"epoch": 1.019322062130424,
"learning_rate": 2.5472128161710197e-05,
"loss": 4.176,
"step": 1530
},
{
"epoch": 1.0199883401349212,
"learning_rate": 2.544515234766794e-05,
"loss": 4.1919,
"step": 1531
},
{
"epoch": 1.0206546181394187,
"learning_rate": 2.5418176015155287e-05,
"loss": 4.1638,
"step": 1532
},
{
"epoch": 1.021320896143916,
"learning_rate": 2.5391199195591663e-05,
"loss": 4.1805,
"step": 1533
},
{
"epoch": 1.0219871741484134,
"learning_rate": 2.5364221920397064e-05,
"loss": 4.1425,
"step": 1534
},
{
"epoch": 1.0226534521529107,
"learning_rate": 2.5337244220991996e-05,
"loss": 4.2028,
"step": 1535
},
{
"epoch": 1.0233197301574082,
"learning_rate": 2.5310266128797493e-05,
"loss": 4.1684,
"step": 1536
},
{
"epoch": 1.0239860081619057,
"learning_rate": 2.5283287675235018e-05,
"loss": 4.141,
"step": 1537
},
{
"epoch": 1.024652286166403,
"learning_rate": 2.525630889172646e-05,
"loss": 4.2051,
"step": 1538
},
{
"epoch": 1.0253185641709004,
"learning_rate": 2.5229329809694097e-05,
"loss": 4.202,
"step": 1539
},
{
"epoch": 1.0259848421753976,
"learning_rate": 2.5202350460560563e-05,
"loss": 4.1491,
"step": 1540
},
{
"epoch": 1.0266511201798951,
"learning_rate": 2.5175370875748778e-05,
"loss": 4.1997,
"step": 1541
},
{
"epoch": 1.0273173981843924,
"learning_rate": 2.514839108668197e-05,
"loss": 4.2229,
"step": 1542
},
{
"epoch": 1.0279836761888899,
"learning_rate": 2.512141112478358e-05,
"loss": 4.174,
"step": 1543
},
{
"epoch": 1.0286499541933871,
"learning_rate": 2.5094431021477245e-05,
"loss": 4.1598,
"step": 1544
},
{
"epoch": 1.0293162321978846,
"learning_rate": 2.50674508081868e-05,
"loss": 4.2215,
"step": 1545
},
{
"epoch": 1.0299825102023819,
"learning_rate": 2.504047051633618e-05,
"loss": 4.1654,
"step": 1546
},
{
"epoch": 1.0306487882068793,
"learning_rate": 2.5013490177349414e-05,
"loss": 4.2187,
"step": 1547
},
{
"epoch": 1.0313150662113766,
"learning_rate": 2.498650982265059e-05,
"loss": 4.1835,
"step": 1548
},
{
"epoch": 1.031981344215874,
"learning_rate": 2.4959529483663826e-05,
"loss": 4.2273,
"step": 1549
},
{
"epoch": 1.0326476222203715,
"learning_rate": 2.493254919181321e-05,
"loss": 4.1518,
"step": 1550
},
{
"epoch": 1.0333139002248688,
"learning_rate": 2.4905568978522754e-05,
"loss": 4.2227,
"step": 1551
},
{
"epoch": 1.0339801782293663,
"learning_rate": 2.487858887521643e-05,
"loss": 4.1928,
"step": 1552
},
{
"epoch": 1.0346464562338635,
"learning_rate": 2.4851608913318035e-05,
"loss": 4.168,
"step": 1553
},
{
"epoch": 1.035312734238361,
"learning_rate": 2.482462912425122e-05,
"loss": 4.1058,
"step": 1554
},
{
"epoch": 1.0359790122428583,
"learning_rate": 2.4797649539439443e-05,
"loss": 4.2087,
"step": 1555
},
{
"epoch": 1.0366452902473557,
"learning_rate": 2.4770670190305905e-05,
"loss": 4.1917,
"step": 1556
},
{
"epoch": 1.037311568251853,
"learning_rate": 2.4743691108273542e-05,
"loss": 4.1941,
"step": 1557
},
{
"epoch": 1.0379778462563505,
"learning_rate": 2.4716712324764988e-05,
"loss": 4.1881,
"step": 1558
},
{
"epoch": 1.0386441242608477,
"learning_rate": 2.4689733871202513e-05,
"loss": 4.1521,
"step": 1559
},
{
"epoch": 1.0393104022653452,
"learning_rate": 2.4662755779008e-05,
"loss": 4.1559,
"step": 1560
},
{
"epoch": 1.0399766802698427,
"learning_rate": 2.4635778079602942e-05,
"loss": 4.1822,
"step": 1561
},
{
"epoch": 1.04064295827434,
"learning_rate": 2.4608800804408342e-05,
"loss": 4.1995,
"step": 1562
},
{
"epoch": 1.0413092362788374,
"learning_rate": 2.4581823984844722e-05,
"loss": 4.2087,
"step": 1563
},
{
"epoch": 1.0419755142833347,
"learning_rate": 2.455484765233207e-05,
"loss": 4.0842,
"step": 1564
},
{
"epoch": 1.0426417922878322,
"learning_rate": 2.4527871838289812e-05,
"loss": 4.1475,
"step": 1565
},
{
"epoch": 1.0433080702923294,
"learning_rate": 2.4500896574136764e-05,
"loss": 4.1403,
"step": 1566
},
{
"epoch": 1.043974348296827,
"learning_rate": 2.44739218912911e-05,
"loss": 4.149,
"step": 1567
},
{
"epoch": 1.0446406263013241,
"learning_rate": 2.444694782117033e-05,
"loss": 4.1652,
"step": 1568
},
{
"epoch": 1.0453069043058216,
"learning_rate": 2.4419974395191243e-05,
"loss": 4.2032,
"step": 1569
},
{
"epoch": 1.0459731823103189,
"learning_rate": 2.4393001644769863e-05,
"loss": 4.1557,
"step": 1570
},
{
"epoch": 1.0466394603148164,
"learning_rate": 2.436602960132145e-05,
"loss": 4.1891,
"step": 1571
},
{
"epoch": 1.0473057383193138,
"learning_rate": 2.433905829626043e-05,
"loss": 4.17,
"step": 1572
},
{
"epoch": 1.047972016323811,
"learning_rate": 2.431208776100036e-05,
"loss": 4.1507,
"step": 1573
},
{
"epoch": 1.0486382943283086,
"learning_rate": 2.428511802695394e-05,
"loss": 4.196,
"step": 1574
},
{
"epoch": 1.0493045723328058,
"learning_rate": 2.4258149125532876e-05,
"loss": 4.1504,
"step": 1575
},
{
"epoch": 1.0499708503373033,
"learning_rate": 2.4231181088147935e-05,
"loss": 4.123,
"step": 1576
},
{
"epoch": 1.0506371283418006,
"learning_rate": 2.4204213946208904e-05,
"loss": 4.1012,
"step": 1577
},
{
"epoch": 1.051303406346298,
"learning_rate": 2.4177247731124493e-05,
"loss": 4.1361,
"step": 1578
},
{
"epoch": 1.0519696843507953,
"learning_rate": 2.4150282474302315e-05,
"loss": 4.1387,
"step": 1579
},
{
"epoch": 1.0526359623552928,
"learning_rate": 2.4123318207148927e-05,
"loss": 4.196,
"step": 1580
},
{
"epoch": 1.05330224035979,
"learning_rate": 2.4096354961069687e-05,
"loss": 4.1385,
"step": 1581
},
{
"epoch": 1.0539685183642875,
"learning_rate": 2.4069392767468756e-05,
"loss": 4.1504,
"step": 1582
},
{
"epoch": 1.0546347963687848,
"learning_rate": 2.4042431657749117e-05,
"loss": 4.1817,
"step": 1583
},
{
"epoch": 1.0553010743732822,
"learning_rate": 2.401547166331245e-05,
"loss": 4.1837,
"step": 1584
},
{
"epoch": 1.0559673523777797,
"learning_rate": 2.3988512815559146e-05,
"loss": 4.1516,
"step": 1585
},
{
"epoch": 1.056633630382277,
"learning_rate": 2.3961555145888274e-05,
"loss": 4.1185,
"step": 1586
},
{
"epoch": 1.0572999083867745,
"learning_rate": 2.393459868569751e-05,
"loss": 4.1925,
"step": 1587
},
{
"epoch": 1.0579661863912717,
"learning_rate": 2.390764346638313e-05,
"loss": 4.1807,
"step": 1588
},
{
"epoch": 1.0586324643957692,
"learning_rate": 2.3880689519339972e-05,
"loss": 4.1997,
"step": 1589
},
{
"epoch": 1.0592987424002664,
"learning_rate": 2.3853736875961386e-05,
"loss": 4.1518,
"step": 1590
},
{
"epoch": 1.059965020404764,
"learning_rate": 2.38267855676392e-05,
"loss": 4.2105,
"step": 1591
},
{
"epoch": 1.0606312984092612,
"learning_rate": 2.379983562576369e-05,
"loss": 4.158,
"step": 1592
},
{
"epoch": 1.0612975764137587,
"learning_rate": 2.3772887081723546e-05,
"loss": 4.1829,
"step": 1593
},
{
"epoch": 1.061963854418256,
"learning_rate": 2.374593996690582e-05,
"loss": 4.2276,
"step": 1594
},
{
"epoch": 1.0626301324227534,
"learning_rate": 2.371899431269591e-05,
"loss": 4.1852,
"step": 1595
},
{
"epoch": 1.0632964104272509,
"learning_rate": 2.3692050150477503e-05,
"loss": 4.1746,
"step": 1596
},
{
"epoch": 1.0639626884317481,
"learning_rate": 2.3665107511632556e-05,
"loss": 4.1614,
"step": 1597
},
{
"epoch": 1.0646289664362456,
"learning_rate": 2.363816642754124e-05,
"loss": 4.1863,
"step": 1598
},
{
"epoch": 1.0652952444407429,
"learning_rate": 2.3611226929581935e-05,
"loss": 4.1471,
"step": 1599
},
{
"epoch": 1.0659615224452403,
"learning_rate": 2.358428904913116e-05,
"loss": 4.1799,
"step": 1600
},
{
"epoch": 1.0666278004497376,
"learning_rate": 2.3557352817563544e-05,
"loss": 4.114,
"step": 1601
},
{
"epoch": 1.067294078454235,
"learning_rate": 2.3530418266251814e-05,
"loss": 4.1329,
"step": 1602
},
{
"epoch": 1.0679603564587323,
"learning_rate": 2.3503485426566722e-05,
"loss": 4.182,
"step": 1603
},
{
"epoch": 1.0686266344632298,
"learning_rate": 2.347655432987703e-05,
"loss": 4.1453,
"step": 1604
},
{
"epoch": 1.069292912467727,
"learning_rate": 2.34496250075495e-05,
"loss": 4.1532,
"step": 1605
},
{
"epoch": 1.0699591904722245,
"learning_rate": 2.3422697490948777e-05,
"loss": 4.1546,
"step": 1606
},
{
"epoch": 1.070625468476722,
"learning_rate": 2.3395771811437422e-05,
"loss": 4.1732,
"step": 1607
},
{
"epoch": 1.0712917464812193,
"learning_rate": 2.3368848000375883e-05,
"loss": 4.1819,
"step": 1608
},
{
"epoch": 1.0719580244857168,
"learning_rate": 2.334192608912241e-05,
"loss": 4.1984,
"step": 1609
},
{
"epoch": 1.072624302490214,
"learning_rate": 2.331500610903301e-05,
"loss": 4.1557,
"step": 1610
},
{
"epoch": 1.0732905804947115,
"learning_rate": 2.3288088091461512e-05,
"loss": 4.1639,
"step": 1611
},
{
"epoch": 1.0739568584992087,
"learning_rate": 2.3261172067759397e-05,
"loss": 4.1588,
"step": 1612
},
{
"epoch": 1.0746231365037062,
"learning_rate": 2.323425806927584e-05,
"loss": 4.1852,
"step": 1613
},
{
"epoch": 1.0752894145082035,
"learning_rate": 2.320734612735768e-05,
"loss": 4.1902,
"step": 1614
},
{
"epoch": 1.075955692512701,
"learning_rate": 2.318043627334934e-05,
"loss": 4.1311,
"step": 1615
},
{
"epoch": 1.0766219705171982,
"learning_rate": 2.3153528538592802e-05,
"loss": 4.1346,
"step": 1616
},
{
"epoch": 1.0772882485216957,
"learning_rate": 2.3126622954427607e-05,
"loss": 4.1346,
"step": 1617
},
{
"epoch": 1.077954526526193,
"learning_rate": 2.309971955219077e-05,
"loss": 4.168,
"step": 1618
},
{
"epoch": 1.0786208045306904,
"learning_rate": 2.3072818363216774e-05,
"loss": 4.1658,
"step": 1619
},
{
"epoch": 1.079287082535188,
"learning_rate": 2.3045919418837515e-05,
"loss": 4.1444,
"step": 1620
},
{
"epoch": 1.0799533605396852,
"learning_rate": 2.3019022750382285e-05,
"loss": 4.1644,
"step": 1621
},
{
"epoch": 1.0806196385441826,
"learning_rate": 2.2992128389177727e-05,
"loss": 4.183,
"step": 1622
},
{
"epoch": 1.08128591654868,
"learning_rate": 2.2965236366547782e-05,
"loss": 4.1573,
"step": 1623
},
{
"epoch": 1.0819521945531774,
"learning_rate": 2.2938346713813685e-05,
"loss": 4.1677,
"step": 1624
},
{
"epoch": 1.0826184725576746,
"learning_rate": 2.29114594622939e-05,
"loss": 4.0771,
"step": 1625
},
{
"epoch": 1.083284750562172,
"learning_rate": 2.288457464330409e-05,
"loss": 4.1597,
"step": 1626
},
{
"epoch": 1.0839510285666694,
"learning_rate": 2.28576922881571e-05,
"loss": 4.1221,
"step": 1627
},
{
"epoch": 1.0846173065711668,
"learning_rate": 2.2830812428162894e-05,
"loss": 4.1429,
"step": 1628
},
{
"epoch": 1.085283584575664,
"learning_rate": 2.2803935094628528e-05,
"loss": 4.1708,
"step": 1629
},
{
"epoch": 1.0859498625801616,
"learning_rate": 2.277706031885813e-05,
"loss": 4.2211,
"step": 1630
},
{
"epoch": 1.086616140584659,
"learning_rate": 2.2750188132152832e-05,
"loss": 4.1532,
"step": 1631
},
{
"epoch": 1.0872824185891563,
"learning_rate": 2.2723318565810744e-05,
"loss": 4.15,
"step": 1632
},
{
"epoch": 1.0879486965936538,
"learning_rate": 2.2696451651126974e-05,
"loss": 4.214,
"step": 1633
},
{
"epoch": 1.088614974598151,
"learning_rate": 2.2669587419393475e-05,
"loss": 4.234,
"step": 1634
},
{
"epoch": 1.0892812526026485,
"learning_rate": 2.2642725901899108e-05,
"loss": 4.1828,
"step": 1635
},
{
"epoch": 1.0899475306071458,
"learning_rate": 2.261586712992958e-05,
"loss": 4.1571,
"step": 1636
},
{
"epoch": 1.0906138086116433,
"learning_rate": 2.2589011134767397e-05,
"loss": 4.1719,
"step": 1637
},
{
"epoch": 1.0912800866161405,
"learning_rate": 2.2562157947691793e-05,
"loss": 4.1932,
"step": 1638
},
{
"epoch": 1.091946364620638,
"learning_rate": 2.2535307599978797e-05,
"loss": 4.1734,
"step": 1639
},
{
"epoch": 1.0926126426251352,
"learning_rate": 2.2508460122901083e-05,
"loss": 4.1427,
"step": 1640
},
{
"epoch": 1.0932789206296327,
"learning_rate": 2.2481615547727986e-05,
"loss": 4.1217,
"step": 1641
},
{
"epoch": 1.0939451986341302,
"learning_rate": 2.2454773905725487e-05,
"loss": 4.1671,
"step": 1642
},
{
"epoch": 1.0946114766386275,
"learning_rate": 2.242793522815613e-05,
"loss": 4.1558,
"step": 1643
},
{
"epoch": 1.095277754643125,
"learning_rate": 2.2401099546279007e-05,
"loss": 4.1795,
"step": 1644
},
{
"epoch": 1.0959440326476222,
"learning_rate": 2.2374266891349708e-05,
"loss": 4.1487,
"step": 1645
},
{
"epoch": 1.0966103106521197,
"learning_rate": 2.234743729462034e-05,
"loss": 4.1751,
"step": 1646
},
{
"epoch": 1.097276588656617,
"learning_rate": 2.2320610787339413e-05,
"loss": 4.1375,
"step": 1647
},
{
"epoch": 1.0979428666611144,
"learning_rate": 2.229378740075184e-05,
"loss": 4.1725,
"step": 1648
},
{
"epoch": 1.0986091446656117,
"learning_rate": 2.226696716609892e-05,
"loss": 4.1392,
"step": 1649
},
{
"epoch": 1.0992754226701091,
"learning_rate": 2.224015011461826e-05,
"loss": 4.1669,
"step": 1650
},
{
"epoch": 1.0999417006746064,
"learning_rate": 2.2213336277543764e-05,
"loss": 4.1703,
"step": 1651
},
{
"epoch": 1.1006079786791039,
"learning_rate": 2.2186525686105605e-05,
"loss": 4.1482,
"step": 1652
},
{
"epoch": 1.1012742566836011,
"learning_rate": 2.215971837153016e-05,
"loss": 4.167,
"step": 1653
},
{
"epoch": 1.1019405346880986,
"learning_rate": 2.2132914365039993e-05,
"loss": 4.1659,
"step": 1654
},
{
"epoch": 1.102606812692596,
"learning_rate": 2.2106113697853824e-05,
"loss": 4.196,
"step": 1655
},
{
"epoch": 1.1032730906970933,
"learning_rate": 2.2079316401186477e-05,
"loss": 4.1652,
"step": 1656
},
{
"epoch": 1.1039393687015908,
"learning_rate": 2.2052522506248844e-05,
"loss": 4.1353,
"step": 1657
},
{
"epoch": 1.104605646706088,
"learning_rate": 2.2025732044247872e-05,
"loss": 4.1299,
"step": 1658
},
{
"epoch": 1.1052719247105856,
"learning_rate": 2.1998945046386492e-05,
"loss": 4.1411,
"step": 1659
},
{
"epoch": 1.1059382027150828,
"learning_rate": 2.1972161543863602e-05,
"loss": 4.2049,
"step": 1660
},
{
"epoch": 1.1066044807195803,
"learning_rate": 2.1945381567874062e-05,
"loss": 4.1947,
"step": 1661
},
{
"epoch": 1.1072707587240775,
"learning_rate": 2.191860514960857e-05,
"loss": 4.1529,
"step": 1662
},
{
"epoch": 1.107937036728575,
"learning_rate": 2.1891832320253708e-05,
"loss": 4.1358,
"step": 1663
},
{
"epoch": 1.1086033147330723,
"learning_rate": 2.1865063110991895e-05,
"loss": 4.1636,
"step": 1664
},
{
"epoch": 1.1092695927375698,
"learning_rate": 2.1838297553001312e-05,
"loss": 4.1548,
"step": 1665
},
{
"epoch": 1.109935870742067,
"learning_rate": 2.1811535677455872e-05,
"loss": 4.1734,
"step": 1666
},
{
"epoch": 1.1106021487465645,
"learning_rate": 2.1784777515525247e-05,
"loss": 4.1616,
"step": 1667
},
{
"epoch": 1.111268426751062,
"learning_rate": 2.1758023098374742e-05,
"loss": 4.1578,
"step": 1668
},
{
"epoch": 1.1119347047555592,
"learning_rate": 2.17312724571653e-05,
"loss": 4.2165,
"step": 1669
},
{
"epoch": 1.1126009827600567,
"learning_rate": 2.170452562305351e-05,
"loss": 4.2199,
"step": 1670
},
{
"epoch": 1.113267260764554,
"learning_rate": 2.167778262719148e-05,
"loss": 4.1263,
"step": 1671
},
{
"epoch": 1.1139335387690514,
"learning_rate": 2.165104350072687e-05,
"loss": 4.1388,
"step": 1672
},
{
"epoch": 1.1145998167735487,
"learning_rate": 2.162430827480281e-05,
"loss": 4.1135,
"step": 1673
},
{
"epoch": 1.1152660947780462,
"learning_rate": 2.1597576980557922e-05,
"loss": 4.1185,
"step": 1674
},
{
"epoch": 1.1159323727825434,
"learning_rate": 2.1570849649126234e-05,
"loss": 4.1131,
"step": 1675
},
{
"epoch": 1.116598650787041,
"learning_rate": 2.1544126311637147e-05,
"loss": 4.1537,
"step": 1676
},
{
"epoch": 1.1172649287915384,
"learning_rate": 2.1517406999215428e-05,
"loss": 4.151,
"step": 1677
},
{
"epoch": 1.1179312067960356,
"learning_rate": 2.149069174298114e-05,
"loss": 4.2136,
"step": 1678
},
{
"epoch": 1.1185974848005331,
"learning_rate": 2.1463980574049626e-05,
"loss": 4.1395,
"step": 1679
},
{
"epoch": 1.1192637628050304,
"learning_rate": 2.1437273523531483e-05,
"loss": 4.1511,
"step": 1680
},
{
"epoch": 1.1199300408095278,
"learning_rate": 2.1410570622532498e-05,
"loss": 4.1743,
"step": 1681
},
{
"epoch": 1.120596318814025,
"learning_rate": 2.1383871902153614e-05,
"loss": 4.1053,
"step": 1682
},
{
"epoch": 1.1212625968185226,
"learning_rate": 2.1357177393490927e-05,
"loss": 4.1701,
"step": 1683
},
{
"epoch": 1.1219288748230198,
"learning_rate": 2.133048712763563e-05,
"loss": 4.1251,
"step": 1684
},
{
"epoch": 1.1225951528275173,
"learning_rate": 2.1303801135673936e-05,
"loss": 4.1877,
"step": 1685
},
{
"epoch": 1.1232614308320146,
"learning_rate": 2.127711944868713e-05,
"loss": 4.1441,
"step": 1686
},
{
"epoch": 1.123927708836512,
"learning_rate": 2.125044209775145e-05,
"loss": 4.1814,
"step": 1687
},
{
"epoch": 1.1245939868410093,
"learning_rate": 2.1223769113938095e-05,
"loss": 4.1895,
"step": 1688
},
{
"epoch": 1.1252602648455068,
"learning_rate": 2.1197100528313176e-05,
"loss": 4.1351,
"step": 1689
},
{
"epoch": 1.1259265428500043,
"learning_rate": 2.1170436371937685e-05,
"loss": 4.1791,
"step": 1690
},
{
"epoch": 1.1265928208545015,
"learning_rate": 2.114377667586744e-05,
"loss": 4.1446,
"step": 1691
},
{
"epoch": 1.127259098858999,
"learning_rate": 2.11171214711531e-05,
"loss": 4.1671,
"step": 1692
},
{
"epoch": 1.1279253768634963,
"learning_rate": 2.109047078884005e-05,
"loss": 4.1565,
"step": 1693
},
{
"epoch": 1.1285916548679937,
"learning_rate": 2.1063824659968417e-05,
"loss": 4.1843,
"step": 1694
},
{
"epoch": 1.129257932872491,
"learning_rate": 2.103718311557306e-05,
"loss": 4.2088,
"step": 1695
},
{
"epoch": 1.1299242108769885,
"learning_rate": 2.101054618668347e-05,
"loss": 4.154,
"step": 1696
},
{
"epoch": 1.1305904888814857,
"learning_rate": 2.0983913904323735e-05,
"loss": 4.1592,
"step": 1697
},
{
"epoch": 1.1312567668859832,
"learning_rate": 2.0957286299512597e-05,
"loss": 4.1455,
"step": 1698
},
{
"epoch": 1.1319230448904805,
"learning_rate": 2.0930663403263294e-05,
"loss": 4.1477,
"step": 1699
},
{
"epoch": 1.132589322894978,
"learning_rate": 2.0904045246583615e-05,
"loss": 4.1645,
"step": 1700
},
{
"epoch": 1.1332556008994752,
"learning_rate": 2.087743186047579e-05,
"loss": 4.143,
"step": 1701
},
{
"epoch": 1.1339218789039727,
"learning_rate": 2.0850823275936534e-05,
"loss": 4.2064,
"step": 1702
},
{
"epoch": 1.1345881569084701,
"learning_rate": 2.0824219523956954e-05,
"loss": 4.1352,
"step": 1703
},
{
"epoch": 1.1352544349129674,
"learning_rate": 2.079762063552252e-05,
"loss": 4.153,
"step": 1704
},
{
"epoch": 1.1359207129174649,
"learning_rate": 2.0771026641613044e-05,
"loss": 4.1924,
"step": 1705
},
{
"epoch": 1.1365869909219621,
"learning_rate": 2.0744437573202647e-05,
"loss": 4.134,
"step": 1706
},
{
"epoch": 1.1372532689264596,
"learning_rate": 2.0717853461259688e-05,
"loss": 4.171,
"step": 1707
},
{
"epoch": 1.1379195469309569,
"learning_rate": 2.0691274336746783e-05,
"loss": 4.1819,
"step": 1708
},
{
"epoch": 1.1385858249354543,
"learning_rate": 2.0664700230620717e-05,
"loss": 4.1761,
"step": 1709
},
{
"epoch": 1.1392521029399516,
"learning_rate": 2.0638131173832438e-05,
"loss": 4.209,
"step": 1710
},
{
"epoch": 1.139918380944449,
"learning_rate": 2.061156719732702e-05,
"loss": 4.1478,
"step": 1711
},
{
"epoch": 1.1405846589489466,
"learning_rate": 2.058500833204361e-05,
"loss": 4.1382,
"step": 1712
},
{
"epoch": 1.1412509369534438,
"learning_rate": 2.0558454608915398e-05,
"loss": 4.1551,
"step": 1713
},
{
"epoch": 1.1419172149579413,
"learning_rate": 2.0531906058869602e-05,
"loss": 4.141,
"step": 1714
},
{
"epoch": 1.1425834929624386,
"learning_rate": 2.0505362712827402e-05,
"loss": 4.1631,
"step": 1715
},
{
"epoch": 1.143249770966936,
"learning_rate": 2.0478824601703915e-05,
"loss": 4.23,
"step": 1716
},
{
"epoch": 1.1439160489714333,
"learning_rate": 2.045229175640817e-05,
"loss": 4.1911,
"step": 1717
},
{
"epoch": 1.1445823269759308,
"learning_rate": 2.042576420784306e-05,
"loss": 4.1691,
"step": 1718
},
{
"epoch": 1.145248604980428,
"learning_rate": 2.0399241986905292e-05,
"loss": 4.1545,
"step": 1719
},
{
"epoch": 1.1459148829849255,
"learning_rate": 2.0372725124485418e-05,
"loss": 4.1688,
"step": 1720
},
{
"epoch": 1.1465811609894228,
"learning_rate": 2.0346213651467684e-05,
"loss": 4.133,
"step": 1721
},
{
"epoch": 1.1472474389939202,
"learning_rate": 2.0319707598730088e-05,
"loss": 4.2236,
"step": 1722
},
{
"epoch": 1.1479137169984175,
"learning_rate": 2.0293206997144333e-05,
"loss": 4.1939,
"step": 1723
},
{
"epoch": 1.148579995002915,
"learning_rate": 2.026671187757576e-05,
"loss": 4.1326,
"step": 1724
},
{
"epoch": 1.1492462730074124,
"learning_rate": 2.0240222270883288e-05,
"loss": 4.1486,
"step": 1725
},
{
"epoch": 1.1499125510119097,
"learning_rate": 2.0213738207919487e-05,
"loss": 4.1815,
"step": 1726
},
{
"epoch": 1.1505788290164072,
"learning_rate": 2.018725971953041e-05,
"loss": 4.1563,
"step": 1727
},
{
"epoch": 1.1512451070209044,
"learning_rate": 2.016078683655565e-05,
"loss": 4.1746,
"step": 1728
},
{
"epoch": 1.151911385025402,
"learning_rate": 2.0134319589828242e-05,
"loss": 4.179,
"step": 1729
},
{
"epoch": 1.1525776630298992,
"learning_rate": 2.0107858010174695e-05,
"loss": 4.1043,
"step": 1730
},
{
"epoch": 1.1532439410343966,
"learning_rate": 2.0081402128414893e-05,
"loss": 4.1819,
"step": 1731
},
{
"epoch": 1.153910219038894,
"learning_rate": 2.0054951975362067e-05,
"loss": 4.1545,
"step": 1732
},
{
"epoch": 1.1545764970433914,
"learning_rate": 2.0028507581822827e-05,
"loss": 4.2212,
"step": 1733
},
{
"epoch": 1.1552427750478886,
"learning_rate": 2.000206897859703e-05,
"loss": 4.1246,
"step": 1734
},
{
"epoch": 1.1559090530523861,
"learning_rate": 1.9975636196477797e-05,
"loss": 4.1796,
"step": 1735
},
{
"epoch": 1.1565753310568834,
"learning_rate": 1.994920926625149e-05,
"loss": 4.1265,
"step": 1736
},
{
"epoch": 1.1572416090613808,
"learning_rate": 1.9922788218697634e-05,
"loss": 4.1905,
"step": 1737
},
{
"epoch": 1.1579078870658783,
"learning_rate": 1.9896373084588906e-05,
"loss": 4.1937,
"step": 1738
},
{
"epoch": 1.1585741650703756,
"learning_rate": 1.9869963894691104e-05,
"loss": 4.1811,
"step": 1739
},
{
"epoch": 1.159240443074873,
"learning_rate": 1.9843560679763095e-05,
"loss": 4.1519,
"step": 1740
},
{
"epoch": 1.1599067210793703,
"learning_rate": 1.9817163470556787e-05,
"loss": 4.1812,
"step": 1741
},
{
"epoch": 1.1605729990838678,
"learning_rate": 1.9790772297817107e-05,
"loss": 4.175,
"step": 1742
},
{
"epoch": 1.161239277088365,
"learning_rate": 1.9764387192281928e-05,
"loss": 4.1833,
"step": 1743
},
{
"epoch": 1.1619055550928625,
"learning_rate": 1.9738008184682068e-05,
"loss": 4.2118,
"step": 1744
},
{
"epoch": 1.1625718330973598,
"learning_rate": 1.971163530574125e-05,
"loss": 4.1194,
"step": 1745
},
{
"epoch": 1.1632381111018573,
"learning_rate": 1.9685268586176055e-05,
"loss": 4.1701,
"step": 1746
},
{
"epoch": 1.1639043891063547,
"learning_rate": 1.9658908056695867e-05,
"loss": 4.1723,
"step": 1747
},
{
"epoch": 1.164570667110852,
"learning_rate": 1.9632553748002914e-05,
"loss": 4.1291,
"step": 1748
},
{
"epoch": 1.1652369451153495,
"learning_rate": 1.9606205690792122e-05,
"loss": 4.158,
"step": 1749
},
{
"epoch": 1.1659032231198467,
"learning_rate": 1.9579863915751152e-05,
"loss": 4.1467,
"step": 1750
},
{
"epoch": 1.1665695011243442,
"learning_rate": 1.955352845356039e-05,
"loss": 4.1347,
"step": 1751
},
{
"epoch": 1.1672357791288415,
"learning_rate": 1.9527199334892798e-05,
"loss": 4.2235,
"step": 1752
},
{
"epoch": 1.167902057133339,
"learning_rate": 1.9500876590413998e-05,
"loss": 4.1635,
"step": 1753
},
{
"epoch": 1.1685683351378362,
"learning_rate": 1.9474560250782185e-05,
"loss": 4.1472,
"step": 1754
},
{
"epoch": 1.1692346131423337,
"learning_rate": 1.944825034664809e-05,
"loss": 4.1558,
"step": 1755
},
{
"epoch": 1.169900891146831,
"learning_rate": 1.9421946908654927e-05,
"loss": 4.1941,
"step": 1756
},
{
"epoch": 1.1705671691513284,
"learning_rate": 1.939564996743839e-05,
"loss": 4.201,
"step": 1757
},
{
"epoch": 1.1712334471558257,
"learning_rate": 1.9369359553626636e-05,
"loss": 4.1414,
"step": 1758
},
{
"epoch": 1.1718997251603231,
"learning_rate": 1.9343075697840195e-05,
"loss": 4.1876,
"step": 1759
},
{
"epoch": 1.1725660031648206,
"learning_rate": 1.9316798430691933e-05,
"loss": 4.1502,
"step": 1760
},
{
"epoch": 1.1732322811693179,
"learning_rate": 1.9290527782787092e-05,
"loss": 4.1574,
"step": 1761
},
{
"epoch": 1.1738985591738154,
"learning_rate": 1.926426378472318e-05,
"loss": 4.1474,
"step": 1762
},
{
"epoch": 1.1745648371783126,
"learning_rate": 1.9238006467089953e-05,
"loss": 4.0937,
"step": 1763
},
{
"epoch": 1.17523111518281,
"learning_rate": 1.92117558604694e-05,
"loss": 4.1249,
"step": 1764
},
{
"epoch": 1.1758973931873073,
"learning_rate": 1.9185511995435686e-05,
"loss": 4.1398,
"step": 1765
},
{
"epoch": 1.1765636711918048,
"learning_rate": 1.915927490255512e-05,
"loss": 4.1524,
"step": 1766
},
{
"epoch": 1.177229949196302,
"learning_rate": 1.913304461238614e-05,
"loss": 4.1758,
"step": 1767
},
{
"epoch": 1.1778962272007996,
"learning_rate": 1.9106821155479252e-05,
"loss": 4.1911,
"step": 1768
},
{
"epoch": 1.1785625052052968,
"learning_rate": 1.9080604562376987e-05,
"loss": 4.1528,
"step": 1769
},
{
"epoch": 1.1792287832097943,
"learning_rate": 1.9054394863613914e-05,
"loss": 4.2171,
"step": 1770
},
{
"epoch": 1.1798950612142916,
"learning_rate": 1.9028192089716544e-05,
"loss": 4.1627,
"step": 1771
},
{
"epoch": 1.180561339218789,
"learning_rate": 1.9001996271203332e-05,
"loss": 4.175,
"step": 1772
},
{
"epoch": 1.1812276172232865,
"learning_rate": 1.8975807438584642e-05,
"loss": 4.1223,
"step": 1773
},
{
"epoch": 1.1818938952277838,
"learning_rate": 1.894962562236269e-05,
"loss": 4.1225,
"step": 1774
},
{
"epoch": 1.1825601732322812,
"learning_rate": 1.8923450853031515e-05,
"loss": 4.1663,
"step": 1775
},
{
"epoch": 1.1832264512367785,
"learning_rate": 1.889728316107697e-05,
"loss": 4.1619,
"step": 1776
},
{
"epoch": 1.183892729241276,
"learning_rate": 1.8871122576976634e-05,
"loss": 4.1513,
"step": 1777
},
{
"epoch": 1.1845590072457732,
"learning_rate": 1.8844969131199832e-05,
"loss": 4.0926,
"step": 1778
},
{
"epoch": 1.1852252852502707,
"learning_rate": 1.881882285420758e-05,
"loss": 4.1892,
"step": 1779
},
{
"epoch": 1.185891563254768,
"learning_rate": 1.8792683776452525e-05,
"loss": 4.1195,
"step": 1780
},
{
"epoch": 1.1865578412592654,
"learning_rate": 1.8766551928378925e-05,
"loss": 4.1026,
"step": 1781
},
{
"epoch": 1.187224119263763,
"learning_rate": 1.8740427340422628e-05,
"loss": 4.1652,
"step": 1782
},
{
"epoch": 1.1878903972682602,
"learning_rate": 1.8714310043011054e-05,
"loss": 4.1768,
"step": 1783
},
{
"epoch": 1.1885566752727574,
"learning_rate": 1.8688200066563076e-05,
"loss": 4.1658,
"step": 1784
},
{
"epoch": 1.189222953277255,
"learning_rate": 1.8662097441489073e-05,
"loss": 4.1761,
"step": 1785
},
{
"epoch": 1.1898892312817524,
"learning_rate": 1.8636002198190874e-05,
"loss": 4.184,
"step": 1786
},
{
"epoch": 1.1905555092862496,
"learning_rate": 1.8609914367061688e-05,
"loss": 4.1576,
"step": 1787
},
{
"epoch": 1.1912217872907471,
"learning_rate": 1.8583833978486078e-05,
"loss": 4.1695,
"step": 1788
},
{
"epoch": 1.1918880652952444,
"learning_rate": 1.855776106283998e-05,
"loss": 4.1016,
"step": 1789
},
{
"epoch": 1.1925543432997419,
"learning_rate": 1.8531695650490597e-05,
"loss": 4.1164,
"step": 1790
},
{
"epoch": 1.1932206213042391,
"learning_rate": 1.850563777179639e-05,
"loss": 4.1272,
"step": 1791
},
{
"epoch": 1.1938868993087366,
"learning_rate": 1.847958745710708e-05,
"loss": 4.1689,
"step": 1792
},
{
"epoch": 1.1945531773132338,
"learning_rate": 1.8453544736763527e-05,
"loss": 4.1729,
"step": 1793
},
{
"epoch": 1.1952194553177313,
"learning_rate": 1.8427509641097785e-05,
"loss": 4.1716,
"step": 1794
},
{
"epoch": 1.1958857333222288,
"learning_rate": 1.8401482200433014e-05,
"loss": 4.1945,
"step": 1795
},
{
"epoch": 1.196552011326726,
"learning_rate": 1.8375462445083464e-05,
"loss": 4.1452,
"step": 1796
},
{
"epoch": 1.1972182893312235,
"learning_rate": 1.834945040535441e-05,
"loss": 4.1802,
"step": 1797
},
{
"epoch": 1.1978845673357208,
"learning_rate": 1.8323446111542185e-05,
"loss": 4.178,
"step": 1798
},
{
"epoch": 1.1985508453402183,
"learning_rate": 1.829744959393406e-05,
"loss": 4.2035,
"step": 1799
},
{
"epoch": 1.1992171233447155,
"learning_rate": 1.827146088280826e-05,
"loss": 4.2089,
"step": 1800
},
{
"epoch": 1.199883401349213,
"learning_rate": 1.8245480008433936e-05,
"loss": 4.0668,
"step": 1801
},
{
"epoch": 1.2005496793537103,
"learning_rate": 1.821950700107109e-05,
"loss": 4.1548,
"step": 1802
},
{
"epoch": 1.2012159573582077,
"learning_rate": 1.8193541890970562e-05,
"loss": 4.1734,
"step": 1803
},
{
"epoch": 1.201882235362705,
"learning_rate": 1.8167584708374008e-05,
"loss": 4.1707,
"step": 1804
},
{
"epoch": 1.2025485133672025,
"learning_rate": 1.8141635483513845e-05,
"loss": 4.1539,
"step": 1805
},
{
"epoch": 1.2032147913716997,
"learning_rate": 1.8115694246613206e-05,
"loss": 4.1811,
"step": 1806
},
{
"epoch": 1.2038810693761972,
"learning_rate": 1.808976102788596e-05,
"loss": 4.1624,
"step": 1807
},
{
"epoch": 1.2045473473806947,
"learning_rate": 1.8063835857536587e-05,
"loss": 4.2106,
"step": 1808
},
{
"epoch": 1.205213625385192,
"learning_rate": 1.8037918765760232e-05,
"loss": 4.1894,
"step": 1809
},
{
"epoch": 1.2058799033896894,
"learning_rate": 1.8012009782742602e-05,
"loss": 4.1519,
"step": 1810
},
{
"epoch": 1.2065461813941867,
"learning_rate": 1.7986108938659993e-05,
"loss": 4.1733,
"step": 1811
},
{
"epoch": 1.2072124593986842,
"learning_rate": 1.7960216263679193e-05,
"loss": 4.1611,
"step": 1812
},
{
"epoch": 1.2078787374031814,
"learning_rate": 1.793433178795747e-05,
"loss": 4.111,
"step": 1813
},
{
"epoch": 1.208545015407679,
"learning_rate": 1.7908455541642584e-05,
"loss": 4.1546,
"step": 1814
},
{
"epoch": 1.2092112934121761,
"learning_rate": 1.7882587554872678e-05,
"loss": 4.1464,
"step": 1815
},
{
"epoch": 1.2098775714166736,
"learning_rate": 1.7856727857776258e-05,
"loss": 4.1535,
"step": 1816
},
{
"epoch": 1.210543849421171,
"learning_rate": 1.7830876480472226e-05,
"loss": 4.1518,
"step": 1817
},
{
"epoch": 1.2112101274256684,
"learning_rate": 1.7805033453069755e-05,
"loss": 4.1373,
"step": 1818
},
{
"epoch": 1.2118764054301656,
"learning_rate": 1.777919880566829e-05,
"loss": 4.1723,
"step": 1819
},
{
"epoch": 1.212542683434663,
"learning_rate": 1.7753372568357548e-05,
"loss": 4.1465,
"step": 1820
},
{
"epoch": 1.2132089614391606,
"learning_rate": 1.7727554771217427e-05,
"loss": 4.1712,
"step": 1821
},
{
"epoch": 1.2138752394436578,
"learning_rate": 1.770174544431799e-05,
"loss": 4.1678,
"step": 1822
},
{
"epoch": 1.2145415174481553,
"learning_rate": 1.7675944617719463e-05,
"loss": 4.1397,
"step": 1823
},
{
"epoch": 1.2152077954526526,
"learning_rate": 1.7650152321472135e-05,
"loss": 4.1393,
"step": 1824
},
{
"epoch": 1.21587407345715,
"learning_rate": 1.7624368585616383e-05,
"loss": 4.1325,
"step": 1825
},
{
"epoch": 1.2165403514616473,
"learning_rate": 1.7598593440182615e-05,
"loss": 4.1601,
"step": 1826
},
{
"epoch": 1.2172066294661448,
"learning_rate": 1.757282691519122e-05,
"loss": 4.1669,
"step": 1827
},
{
"epoch": 1.217872907470642,
"learning_rate": 1.754706904065255e-05,
"loss": 4.125,
"step": 1828
},
{
"epoch": 1.2185391854751395,
"learning_rate": 1.7521319846566896e-05,
"loss": 4.143,
"step": 1829
},
{
"epoch": 1.219205463479637,
"learning_rate": 1.749557936292442e-05,
"loss": 4.1471,
"step": 1830
},
{
"epoch": 1.2198717414841342,
"learning_rate": 1.746984761970515e-05,
"loss": 4.1661,
"step": 1831
},
{
"epoch": 1.2205380194886317,
"learning_rate": 1.7444124646878933e-05,
"loss": 4.1453,
"step": 1832
},
{
"epoch": 1.221204297493129,
"learning_rate": 1.7418410474405403e-05,
"loss": 4.1692,
"step": 1833
},
{
"epoch": 1.2218705754976265,
"learning_rate": 1.7392705132233922e-05,
"loss": 4.1535,
"step": 1834
},
{
"epoch": 1.2225368535021237,
"learning_rate": 1.7367008650303616e-05,
"loss": 4.153,
"step": 1835
},
{
"epoch": 1.2232031315066212,
"learning_rate": 1.734132105854324e-05,
"loss": 4.1132,
"step": 1836
},
{
"epoch": 1.2238694095111184,
"learning_rate": 1.7315642386871222e-05,
"loss": 4.1089,
"step": 1837
},
{
"epoch": 1.224535687515616,
"learning_rate": 1.728997266519559e-05,
"loss": 4.1416,
"step": 1838
},
{
"epoch": 1.2252019655201132,
"learning_rate": 1.726431192341396e-05,
"loss": 4.1537,
"step": 1839
},
{
"epoch": 1.2258682435246107,
"learning_rate": 1.7238660191413477e-05,
"loss": 4.1193,
"step": 1840
},
{
"epoch": 1.226534521529108,
"learning_rate": 1.721301749907079e-05,
"loss": 4.1576,
"step": 1841
},
{
"epoch": 1.2272007995336054,
"learning_rate": 1.7187383876252044e-05,
"loss": 4.1252,
"step": 1842
},
{
"epoch": 1.2278670775381029,
"learning_rate": 1.7161759352812794e-05,
"loss": 4.1074,
"step": 1843
},
{
"epoch": 1.2285333555426001,
"learning_rate": 1.7136143958597996e-05,
"loss": 4.1315,
"step": 1844
},
{
"epoch": 1.2291996335470976,
"learning_rate": 1.7110537723442e-05,
"loss": 4.1312,
"step": 1845
},
{
"epoch": 1.2298659115515949,
"learning_rate": 1.708494067716847e-05,
"loss": 4.1469,
"step": 1846
},
{
"epoch": 1.2305321895560923,
"learning_rate": 1.705935284959035e-05,
"loss": 4.1112,
"step": 1847
},
{
"epoch": 1.2311984675605896,
"learning_rate": 1.703377427050989e-05,
"loss": 4.1532,
"step": 1848
},
{
"epoch": 1.231864745565087,
"learning_rate": 1.7008204969718536e-05,
"loss": 4.1505,
"step": 1849
},
{
"epoch": 1.2325310235695843,
"learning_rate": 1.698264497699693e-05,
"loss": 4.2101,
"step": 1850
},
{
"epoch": 1.2331973015740818,
"learning_rate": 1.6957094322114888e-05,
"loss": 4.1324,
"step": 1851
},
{
"epoch": 1.2338635795785793,
"learning_rate": 1.6931553034831334e-05,
"loss": 4.1746,
"step": 1852
},
{
"epoch": 1.2345298575830765,
"learning_rate": 1.690602114489429e-05,
"loss": 4.1612,
"step": 1853
},
{
"epoch": 1.2351961355875738,
"learning_rate": 1.6880498682040836e-05,
"loss": 4.188,
"step": 1854
},
{
"epoch": 1.2358624135920713,
"learning_rate": 1.6854985675997066e-05,
"loss": 4.1738,
"step": 1855
},
{
"epoch": 1.2365286915965688,
"learning_rate": 1.6829482156478054e-05,
"loss": 4.2075,
"step": 1856
},
{
"epoch": 1.237194969601066,
"learning_rate": 1.6803988153187844e-05,
"loss": 4.1236,
"step": 1857
},
{
"epoch": 1.2378612476055635,
"learning_rate": 1.6778503695819382e-05,
"loss": 4.1468,
"step": 1858
},
{
"epoch": 1.2385275256100607,
"learning_rate": 1.675302881405449e-05,
"loss": 4.1502,
"step": 1859
},
{
"epoch": 1.2391938036145582,
"learning_rate": 1.6727563537563857e-05,
"loss": 4.1842,
"step": 1860
},
{
"epoch": 1.2398600816190555,
"learning_rate": 1.6702107896006966e-05,
"loss": 4.1674,
"step": 1861
},
{
"epoch": 1.240526359623553,
"learning_rate": 1.6676661919032085e-05,
"loss": 4.128,
"step": 1862
},
{
"epoch": 1.2411926376280502,
"learning_rate": 1.6651225636276235e-05,
"loss": 4.1365,
"step": 1863
},
{
"epoch": 1.2418589156325477,
"learning_rate": 1.6625799077365133e-05,
"loss": 4.2016,
"step": 1864
},
{
"epoch": 1.2425251936370452,
"learning_rate": 1.6600382271913177e-05,
"loss": 4.1926,
"step": 1865
},
{
"epoch": 1.2431914716415424,
"learning_rate": 1.6574975249523395e-05,
"loss": 4.1349,
"step": 1866
},
{
"epoch": 1.24385774964604,
"learning_rate": 1.6549578039787436e-05,
"loss": 4.172,
"step": 1867
},
{
"epoch": 1.2445240276505372,
"learning_rate": 1.6524190672285513e-05,
"loss": 4.2072,
"step": 1868
},
{
"epoch": 1.2451903056550346,
"learning_rate": 1.6498813176586367e-05,
"loss": 4.1512,
"step": 1869
},
{
"epoch": 1.245856583659532,
"learning_rate": 1.647344558224727e-05,
"loss": 4.1881,
"step": 1870
},
{
"epoch": 1.2465228616640294,
"learning_rate": 1.6448087918813925e-05,
"loss": 4.1498,
"step": 1871
},
{
"epoch": 1.2471891396685266,
"learning_rate": 1.6422740215820482e-05,
"loss": 4.1508,
"step": 1872
},
{
"epoch": 1.247855417673024,
"learning_rate": 1.639740250278951e-05,
"loss": 4.1761,
"step": 1873
},
{
"epoch": 1.2485216956775214,
"learning_rate": 1.6372074809231924e-05,
"loss": 4.1611,
"step": 1874
},
{
"epoch": 1.2491879736820188,
"learning_rate": 1.634675716464695e-05,
"loss": 4.1932,
"step": 1875
},
{
"epoch": 1.249854251686516,
"learning_rate": 1.6321449598522154e-05,
"loss": 4.1401,
"step": 1876
},
{
"epoch": 1.2505205296910136,
"learning_rate": 1.6296152140333332e-05,
"loss": 4.122,
"step": 1877
},
{
"epoch": 1.251186807695511,
"learning_rate": 1.627086481954451e-05,
"loss": 4.1579,
"step": 1878
},
{
"epoch": 1.2518530857000083,
"learning_rate": 1.624558766560793e-05,
"loss": 4.1168,
"step": 1879
},
{
"epoch": 1.2525193637045058,
"learning_rate": 1.6220320707963964e-05,
"loss": 4.1438,
"step": 1880
},
{
"epoch": 1.253185641709003,
"learning_rate": 1.619506397604112e-05,
"loss": 4.1494,
"step": 1881
},
{
"epoch": 1.2538519197135005,
"learning_rate": 1.6169817499255997e-05,
"loss": 4.1433,
"step": 1882
},
{
"epoch": 1.2545181977179978,
"learning_rate": 1.614458130701325e-05,
"loss": 4.1545,
"step": 1883
},
{
"epoch": 1.2551844757224953,
"learning_rate": 1.6119355428705554e-05,
"loss": 4.1769,
"step": 1884
},
{
"epoch": 1.2558507537269925,
"learning_rate": 1.6094139893713576e-05,
"loss": 4.132,
"step": 1885
},
{
"epoch": 1.25651703173149,
"learning_rate": 1.6068934731405927e-05,
"loss": 4.1344,
"step": 1886
},
{
"epoch": 1.2571833097359875,
"learning_rate": 1.6043739971139134e-05,
"loss": 4.1573,
"step": 1887
},
{
"epoch": 1.2578495877404847,
"learning_rate": 1.6018555642257633e-05,
"loss": 4.1301,
"step": 1888
},
{
"epoch": 1.258515865744982,
"learning_rate": 1.599338177409369e-05,
"loss": 4.142,
"step": 1889
},
{
"epoch": 1.2591821437494795,
"learning_rate": 1.5968218395967377e-05,
"loss": 4.1634,
"step": 1890
},
{
"epoch": 1.259848421753977,
"learning_rate": 1.594306553718658e-05,
"loss": 4.198,
"step": 1891
},
{
"epoch": 1.2605146997584742,
"learning_rate": 1.591792322704691e-05,
"loss": 4.1597,
"step": 1892
},
{
"epoch": 1.2611809777629717,
"learning_rate": 1.5892791494831694e-05,
"loss": 4.1542,
"step": 1893
},
{
"epoch": 1.261847255767469,
"learning_rate": 1.5867670369811944e-05,
"loss": 4.1838,
"step": 1894
},
{
"epoch": 1.2625135337719664,
"learning_rate": 1.584255988124632e-05,
"loss": 4.1769,
"step": 1895
},
{
"epoch": 1.2631798117764637,
"learning_rate": 1.5817460058381088e-05,
"loss": 4.1369,
"step": 1896
},
{
"epoch": 1.2638460897809611,
"learning_rate": 1.5792370930450083e-05,
"loss": 4.1831,
"step": 1897
},
{
"epoch": 1.2645123677854584,
"learning_rate": 1.5767292526674718e-05,
"loss": 4.1425,
"step": 1898
},
{
"epoch": 1.2651786457899559,
"learning_rate": 1.5742224876263873e-05,
"loss": 4.1213,
"step": 1899
},
{
"epoch": 1.2658449237944533,
"learning_rate": 1.571716800841392e-05,
"loss": 4.1252,
"step": 1900
},
{
"epoch": 1.2665112017989506,
"learning_rate": 1.5692121952308693e-05,
"loss": 4.1249,
"step": 1901
},
{
"epoch": 1.2671774798034479,
"learning_rate": 1.56670867371194e-05,
"loss": 4.1743,
"step": 1902
},
{
"epoch": 1.2678437578079453,
"learning_rate": 1.5642062392004635e-05,
"loss": 4.1841,
"step": 1903
},
{
"epoch": 1.2685100358124428,
"learning_rate": 1.5617048946110348e-05,
"loss": 4.1036,
"step": 1904
},
{
"epoch": 1.26917631381694,
"learning_rate": 1.5592046428569786e-05,
"loss": 4.2117,
"step": 1905
},
{
"epoch": 1.2698425918214375,
"learning_rate": 1.556705486850343e-05,
"loss": 4.1409,
"step": 1906
},
{
"epoch": 1.2705088698259348,
"learning_rate": 1.5542074295019062e-05,
"loss": 4.1425,
"step": 1907
},
{
"epoch": 1.2711751478304323,
"learning_rate": 1.551710473721163e-05,
"loss": 4.1179,
"step": 1908
},
{
"epoch": 1.2718414258349298,
"learning_rate": 1.5492146224163257e-05,
"loss": 4.1361,
"step": 1909
},
{
"epoch": 1.272507703839427,
"learning_rate": 1.5467198784943204e-05,
"loss": 4.1454,
"step": 1910
},
{
"epoch": 1.2731739818439243,
"learning_rate": 1.544226244860784e-05,
"loss": 4.1509,
"step": 1911
},
{
"epoch": 1.2738402598484218,
"learning_rate": 1.5417337244200588e-05,
"loss": 4.1488,
"step": 1912
},
{
"epoch": 1.2745065378529192,
"learning_rate": 1.5392423200751926e-05,
"loss": 4.1024,
"step": 1913
},
{
"epoch": 1.2751728158574165,
"learning_rate": 1.5367520347279317e-05,
"loss": 4.1259,
"step": 1914
},
{
"epoch": 1.275839093861914,
"learning_rate": 1.5342628712787188e-05,
"loss": 4.1098,
"step": 1915
},
{
"epoch": 1.2765053718664112,
"learning_rate": 1.531774832626692e-05,
"loss": 4.1637,
"step": 1916
},
{
"epoch": 1.2771716498709087,
"learning_rate": 1.529287921669677e-05,
"loss": 4.1379,
"step": 1917
},
{
"epoch": 1.277837927875406,
"learning_rate": 1.5268021413041875e-05,
"loss": 4.1332,
"step": 1918
},
{
"epoch": 1.2785042058799034,
"learning_rate": 1.5243174944254204e-05,
"loss": 4.2549,
"step": 1919
},
{
"epoch": 1.2791704838844007,
"learning_rate": 1.5218339839272516e-05,
"loss": 4.1097,
"step": 1920
},
{
"epoch": 1.2798367618888982,
"learning_rate": 1.519351612702234e-05,
"loss": 4.1371,
"step": 1921
},
{
"epoch": 1.2805030398933956,
"learning_rate": 1.5168703836415932e-05,
"loss": 4.1223,
"step": 1922
},
{
"epoch": 1.281169317897893,
"learning_rate": 1.514390299635225e-05,
"loss": 4.1553,
"step": 1923
},
{
"epoch": 1.2818355959023902,
"learning_rate": 1.5119113635716914e-05,
"loss": 4.18,
"step": 1924
},
{
"epoch": 1.2825018739068876,
"learning_rate": 1.5094335783382168e-05,
"loss": 4.1875,
"step": 1925
},
{
"epoch": 1.2831681519113851,
"learning_rate": 1.5069569468206871e-05,
"loss": 4.1741,
"step": 1926
},
{
"epoch": 1.2838344299158824,
"learning_rate": 1.5044814719036422e-05,
"loss": 4.175,
"step": 1927
},
{
"epoch": 1.2845007079203798,
"learning_rate": 1.5020071564702742e-05,
"loss": 4.1654,
"step": 1928
},
{
"epoch": 1.285166985924877,
"learning_rate": 1.4995340034024297e-05,
"loss": 4.2006,
"step": 1929
},
{
"epoch": 1.2858332639293746,
"learning_rate": 1.4970620155805964e-05,
"loss": 4.1615,
"step": 1930
},
{
"epoch": 1.2864995419338718,
"learning_rate": 1.494591195883905e-05,
"loss": 4.1482,
"step": 1931
},
{
"epoch": 1.2871658199383693,
"learning_rate": 1.4921215471901304e-05,
"loss": 4.1657,
"step": 1932
},
{
"epoch": 1.2878320979428666,
"learning_rate": 1.4896530723756791e-05,
"loss": 4.1787,
"step": 1933
},
{
"epoch": 1.288498375947364,
"learning_rate": 1.4871857743155904e-05,
"loss": 4.1849,
"step": 1934
},
{
"epoch": 1.2891646539518615,
"learning_rate": 1.4847196558835364e-05,
"loss": 4.1942,
"step": 1935
},
{
"epoch": 1.2898309319563588,
"learning_rate": 1.4822547199518127e-05,
"loss": 4.1473,
"step": 1936
},
{
"epoch": 1.290497209960856,
"learning_rate": 1.4797909693913376e-05,
"loss": 4.1503,
"step": 1937
},
{
"epoch": 1.2911634879653535,
"learning_rate": 1.4773284070716503e-05,
"loss": 4.1239,
"step": 1938
},
{
"epoch": 1.291829765969851,
"learning_rate": 1.4748670358609048e-05,
"loss": 4.1975,
"step": 1939
},
{
"epoch": 1.2924960439743483,
"learning_rate": 1.4724068586258677e-05,
"loss": 4.1379,
"step": 1940
},
{
"epoch": 1.2931623219788457,
"learning_rate": 1.4699478782319164e-05,
"loss": 4.1714,
"step": 1941
},
{
"epoch": 1.293828599983343,
"learning_rate": 1.4674900975430327e-05,
"loss": 4.1684,
"step": 1942
},
{
"epoch": 1.2944948779878405,
"learning_rate": 1.4650335194218016e-05,
"loss": 4.1197,
"step": 1943
},
{
"epoch": 1.2951611559923377,
"learning_rate": 1.4625781467294083e-05,
"loss": 4.181,
"step": 1944
},
{
"epoch": 1.2958274339968352,
"learning_rate": 1.4601239823256334e-05,
"loss": 4.1212,
"step": 1945
},
{
"epoch": 1.2964937120013325,
"learning_rate": 1.4576710290688497e-05,
"loss": 4.1922,
"step": 1946
},
{
"epoch": 1.29715999000583,
"learning_rate": 1.455219289816019e-05,
"loss": 4.1477,
"step": 1947
},
{
"epoch": 1.2978262680103274,
"learning_rate": 1.4527687674226926e-05,
"loss": 4.1951,
"step": 1948
},
{
"epoch": 1.2984925460148247,
"learning_rate": 1.4503194647430007e-05,
"loss": 4.1689,
"step": 1949
},
{
"epoch": 1.2991588240193221,
"learning_rate": 1.4478713846296521e-05,
"loss": 4.1337,
"step": 1950
},
{
"epoch": 1.2998251020238194,
"learning_rate": 1.4454245299339364e-05,
"loss": 4.1907,
"step": 1951
},
{
"epoch": 1.3004913800283169,
"learning_rate": 1.4429789035057124e-05,
"loss": 4.1447,
"step": 1952
},
{
"epoch": 1.3011576580328141,
"learning_rate": 1.440534508193408e-05,
"loss": 4.1843,
"step": 1953
},
{
"epoch": 1.3018239360373116,
"learning_rate": 1.4380913468440205e-05,
"loss": 4.2032,
"step": 1954
},
{
"epoch": 1.3024902140418089,
"learning_rate": 1.4356494223031067e-05,
"loss": 4.1684,
"step": 1955
},
{
"epoch": 1.3031564920463063,
"learning_rate": 1.4332087374147843e-05,
"loss": 4.1313,
"step": 1956
},
{
"epoch": 1.3038227700508038,
"learning_rate": 1.430769295021727e-05,
"loss": 4.162,
"step": 1957
},
{
"epoch": 1.304489048055301,
"learning_rate": 1.428331097965161e-05,
"loss": 4.1958,
"step": 1958
},
{
"epoch": 1.3051553260597983,
"learning_rate": 1.4258941490848616e-05,
"loss": 4.1261,
"step": 1959
},
{
"epoch": 1.3058216040642958,
"learning_rate": 1.4234584512191532e-05,
"loss": 4.1613,
"step": 1960
},
{
"epoch": 1.3064878820687933,
"learning_rate": 1.4210240072048996e-05,
"loss": 4.1623,
"step": 1961
},
{
"epoch": 1.3071541600732905,
"learning_rate": 1.4185908198775066e-05,
"loss": 4.1174,
"step": 1962
},
{
"epoch": 1.307820438077788,
"learning_rate": 1.4161588920709146e-05,
"loss": 4.1569,
"step": 1963
},
{
"epoch": 1.3084867160822853,
"learning_rate": 1.4137282266175977e-05,
"loss": 4.1277,
"step": 1964
},
{
"epoch": 1.3091529940867828,
"learning_rate": 1.41129882634856e-05,
"loss": 4.1431,
"step": 1965
},
{
"epoch": 1.30981927209128,
"learning_rate": 1.4088706940933327e-05,
"loss": 4.2031,
"step": 1966
},
{
"epoch": 1.3104855500957775,
"learning_rate": 1.406443832679969e-05,
"loss": 4.1856,
"step": 1967
},
{
"epoch": 1.3111518281002748,
"learning_rate": 1.4040182449350408e-05,
"loss": 4.1447,
"step": 1968
},
{
"epoch": 1.3118181061047722,
"learning_rate": 1.401593933683642e-05,
"loss": 4.1525,
"step": 1969
},
{
"epoch": 1.3124843841092697,
"learning_rate": 1.399170901749372e-05,
"loss": 4.1609,
"step": 1970
},
{
"epoch": 1.313150662113767,
"learning_rate": 1.3967491519543444e-05,
"loss": 4.1564,
"step": 1971
},
{
"epoch": 1.3138169401182642,
"learning_rate": 1.3943286871191807e-05,
"loss": 4.1747,
"step": 1972
},
{
"epoch": 1.3144832181227617,
"learning_rate": 1.3919095100630037e-05,
"loss": 4.1397,
"step": 1973
},
{
"epoch": 1.3151494961272592,
"learning_rate": 1.3894916236034367e-05,
"loss": 4.1275,
"step": 1974
},
{
"epoch": 1.3158157741317564,
"learning_rate": 1.3870750305565985e-05,
"loss": 4.1765,
"step": 1975
},
{
"epoch": 1.316482052136254,
"learning_rate": 1.3846597337371064e-05,
"loss": 4.1429,
"step": 1976
},
{
"epoch": 1.3171483301407512,
"learning_rate": 1.382245735958061e-05,
"loss": 4.1918,
"step": 1977
},
{
"epoch": 1.3178146081452486,
"learning_rate": 1.3798330400310539e-05,
"loss": 4.1976,
"step": 1978
},
{
"epoch": 1.318480886149746,
"learning_rate": 1.3774216487661618e-05,
"loss": 4.1291,
"step": 1979
},
{
"epoch": 1.3191471641542434,
"learning_rate": 1.3750115649719389e-05,
"loss": 4.1367,
"step": 1980
},
{
"epoch": 1.3198134421587406,
"learning_rate": 1.3726027914554166e-05,
"loss": 4.1315,
"step": 1981
},
{
"epoch": 1.3204797201632381,
"learning_rate": 1.3701953310221033e-05,
"loss": 4.1615,
"step": 1982
},
{
"epoch": 1.3211459981677356,
"learning_rate": 1.3677891864759751e-05,
"loss": 4.1542,
"step": 1983
},
{
"epoch": 1.3218122761722328,
"learning_rate": 1.365384360619476e-05,
"loss": 4.135,
"step": 1984
},
{
"epoch": 1.32247855417673,
"learning_rate": 1.3629808562535154e-05,
"loss": 4.1915,
"step": 1985
},
{
"epoch": 1.3231448321812276,
"learning_rate": 1.360578676177462e-05,
"loss": 4.1682,
"step": 1986
},
{
"epoch": 1.323811110185725,
"learning_rate": 1.3581778231891418e-05,
"loss": 4.1778,
"step": 1987
},
{
"epoch": 1.3244773881902223,
"learning_rate": 1.3557783000848384e-05,
"loss": 4.1569,
"step": 1988
},
{
"epoch": 1.3251436661947198,
"learning_rate": 1.353380109659283e-05,
"loss": 4.1514,
"step": 1989
},
{
"epoch": 1.325809944199217,
"learning_rate": 1.3509832547056556e-05,
"loss": 4.1935,
"step": 1990
},
{
"epoch": 1.3264762222037145,
"learning_rate": 1.3485877380155815e-05,
"loss": 4.1495,
"step": 1991
},
{
"epoch": 1.327142500208212,
"learning_rate": 1.3461935623791266e-05,
"loss": 4.1464,
"step": 1992
},
{
"epoch": 1.3278087782127093,
"learning_rate": 1.3438007305847939e-05,
"loss": 4.1768,
"step": 1993
},
{
"epoch": 1.3284750562172065,
"learning_rate": 1.3414092454195245e-05,
"loss": 4.0995,
"step": 1994
},
{
"epoch": 1.329141334221704,
"learning_rate": 1.3390191096686883e-05,
"loss": 4.1451,
"step": 1995
},
{
"epoch": 1.3298076122262015,
"learning_rate": 1.3366303261160823e-05,
"loss": 4.1967,
"step": 1996
},
{
"epoch": 1.3304738902306987,
"learning_rate": 1.3342428975439347e-05,
"loss": 4.1422,
"step": 1997
},
{
"epoch": 1.3311401682351962,
"learning_rate": 1.3318568267328874e-05,
"loss": 4.199,
"step": 1998
},
{
"epoch": 1.3318064462396935,
"learning_rate": 1.3294721164620053e-05,
"loss": 4.1926,
"step": 1999
},
{
"epoch": 1.332472724244191,
"learning_rate": 1.3270887695087703e-05,
"loss": 4.1509,
"step": 2000
},
{
"epoch": 1.3331390022486882,
"learning_rate": 1.3247067886490727e-05,
"loss": 4.1693,
"step": 2001
},
{
"epoch": 1.3338052802531857,
"learning_rate": 1.3223261766572143e-05,
"loss": 4.1255,
"step": 2002
},
{
"epoch": 1.334471558257683,
"learning_rate": 1.3199469363059002e-05,
"loss": 4.1436,
"step": 2003
},
{
"epoch": 1.3351378362621804,
"learning_rate": 1.3175690703662427e-05,
"loss": 4.1677,
"step": 2004
},
{
"epoch": 1.3358041142666779,
"learning_rate": 1.3151925816077464e-05,
"loss": 4.1928,
"step": 2005
},
{
"epoch": 1.3364703922711751,
"learning_rate": 1.312817472798316e-05,
"loss": 4.1515,
"step": 2006
},
{
"epoch": 1.3371366702756724,
"learning_rate": 1.3104437467042507e-05,
"loss": 4.1502,
"step": 2007
},
{
"epoch": 1.3378029482801699,
"learning_rate": 1.3080714060902355e-05,
"loss": 4.1407,
"step": 2008
},
{
"epoch": 1.3384692262846674,
"learning_rate": 1.3057004537193423e-05,
"loss": 4.1516,
"step": 2009
},
{
"epoch": 1.3391355042891646,
"learning_rate": 1.3033308923530296e-05,
"loss": 4.1333,
"step": 2010
},
{
"epoch": 1.339801782293662,
"learning_rate": 1.3009627247511313e-05,
"loss": 4.1679,
"step": 2011
},
{
"epoch": 1.3404680602981593,
"learning_rate": 1.2985959536718612e-05,
"loss": 4.1339,
"step": 2012
},
{
"epoch": 1.3411343383026568,
"learning_rate": 1.296230581871804e-05,
"loss": 4.1259,
"step": 2013
},
{
"epoch": 1.341800616307154,
"learning_rate": 1.2938666121059167e-05,
"loss": 4.1639,
"step": 2014
},
{
"epoch": 1.3424668943116516,
"learning_rate": 1.2915040471275219e-05,
"loss": 4.1583,
"step": 2015
},
{
"epoch": 1.3431331723161488,
"learning_rate": 1.2891428896883079e-05,
"loss": 4.1174,
"step": 2016
},
{
"epoch": 1.3437994503206463,
"learning_rate": 1.286783142538322e-05,
"loss": 4.1204,
"step": 2017
},
{
"epoch": 1.3444657283251438,
"learning_rate": 1.2844248084259691e-05,
"loss": 4.1802,
"step": 2018
},
{
"epoch": 1.345132006329641,
"learning_rate": 1.2820678900980093e-05,
"loss": 4.1857,
"step": 2019
},
{
"epoch": 1.3457982843341383,
"learning_rate": 1.2797123902995522e-05,
"loss": 4.1878,
"step": 2020
},
{
"epoch": 1.3464645623386358,
"learning_rate": 1.2773583117740555e-05,
"loss": 4.145,
"step": 2021
},
{
"epoch": 1.3471308403431332,
"learning_rate": 1.2750056572633246e-05,
"loss": 4.1622,
"step": 2022
},
{
"epoch": 1.3477971183476305,
"learning_rate": 1.2726544295075018e-05,
"loss": 4.1661,
"step": 2023
},
{
"epoch": 1.348463396352128,
"learning_rate": 1.2703046312450706e-05,
"loss": 4.1599,
"step": 2024
},
{
"epoch": 1.3491296743566252,
"learning_rate": 1.2679562652128485e-05,
"loss": 4.2027,
"step": 2025
},
{
"epoch": 1.3497959523611227,
"learning_rate": 1.2656093341459852e-05,
"loss": 4.0843,
"step": 2026
},
{
"epoch": 1.3504622303656202,
"learning_rate": 1.263263840777958e-05,
"loss": 4.1995,
"step": 2027
},
{
"epoch": 1.3511285083701174,
"learning_rate": 1.260919787840572e-05,
"loss": 4.157,
"step": 2028
},
{
"epoch": 1.3517947863746147,
"learning_rate": 1.258577178063953e-05,
"loss": 4.1624,
"step": 2029
},
{
"epoch": 1.3524610643791122,
"learning_rate": 1.256236014176546e-05,
"loss": 4.125,
"step": 2030
},
{
"epoch": 1.3531273423836097,
"learning_rate": 1.2538962989051115e-05,
"loss": 4.1289,
"step": 2031
},
{
"epoch": 1.353793620388107,
"learning_rate": 1.251558034974726e-05,
"loss": 4.1138,
"step": 2032
},
{
"epoch": 1.3544598983926044,
"learning_rate": 1.2492212251087706e-05,
"loss": 4.1221,
"step": 2033
},
{
"epoch": 1.3551261763971016,
"learning_rate": 1.2468858720289353e-05,
"loss": 4.1894,
"step": 2034
},
{
"epoch": 1.3557924544015991,
"learning_rate": 1.2445519784552153e-05,
"loss": 4.1257,
"step": 2035
},
{
"epoch": 1.3564587324060964,
"learning_rate": 1.2422195471059031e-05,
"loss": 4.1946,
"step": 2036
},
{
"epoch": 1.3571250104105939,
"learning_rate": 1.2398885806975883e-05,
"loss": 4.1905,
"step": 2037
},
{
"epoch": 1.3577912884150911,
"learning_rate": 1.2375590819451566e-05,
"loss": 4.1312,
"step": 2038
},
{
"epoch": 1.3584575664195886,
"learning_rate": 1.2352310535617823e-05,
"loss": 4.1403,
"step": 2039
},
{
"epoch": 1.359123844424086,
"learning_rate": 1.2329044982589275e-05,
"loss": 4.1388,
"step": 2040
},
{
"epoch": 1.3597901224285833,
"learning_rate": 1.2305794187463384e-05,
"loss": 4.1412,
"step": 2041
},
{
"epoch": 1.3604564004330806,
"learning_rate": 1.2282558177320434e-05,
"loss": 4.2034,
"step": 2042
},
{
"epoch": 1.361122678437578,
"learning_rate": 1.2259336979223465e-05,
"loss": 4.1321,
"step": 2043
},
{
"epoch": 1.3617889564420755,
"learning_rate": 1.2236130620218305e-05,
"loss": 4.0973,
"step": 2044
},
{
"epoch": 1.3624552344465728,
"learning_rate": 1.221293912733347e-05,
"loss": 4.1514,
"step": 2045
},
{
"epoch": 1.3631215124510703,
"learning_rate": 1.2189762527580159e-05,
"loss": 4.2265,
"step": 2046
},
{
"epoch": 1.3637877904555675,
"learning_rate": 1.2166600847952242e-05,
"loss": 4.1987,
"step": 2047
},
{
"epoch": 1.364454068460065,
"learning_rate": 1.2143454115426197e-05,
"loss": 4.1546,
"step": 2048
},
{
"epoch": 1.3651203464645623,
"learning_rate": 1.2120322356961092e-05,
"loss": 4.1229,
"step": 2049
},
{
"epoch": 1.3657866244690597,
"learning_rate": 1.2097205599498578e-05,
"loss": 4.198,
"step": 2050
},
{
"epoch": 1.366452902473557,
"learning_rate": 1.2074103869962814e-05,
"loss": 4.1491,
"step": 2051
},
{
"epoch": 1.3671191804780545,
"learning_rate": 1.2051017195260453e-05,
"loss": 4.1647,
"step": 2052
},
{
"epoch": 1.367785458482552,
"learning_rate": 1.2027945602280624e-05,
"loss": 4.2034,
"step": 2053
},
{
"epoch": 1.3684517364870492,
"learning_rate": 1.2004889117894885e-05,
"loss": 4.1265,
"step": 2054
},
{
"epoch": 1.3691180144915465,
"learning_rate": 1.1981847768957192e-05,
"loss": 4.176,
"step": 2055
},
{
"epoch": 1.369784292496044,
"learning_rate": 1.1958821582303898e-05,
"loss": 4.1558,
"step": 2056
},
{
"epoch": 1.3704505705005414,
"learning_rate": 1.1935810584753662e-05,
"loss": 4.1591,
"step": 2057
},
{
"epoch": 1.3711168485050387,
"learning_rate": 1.1912814803107474e-05,
"loss": 4.1143,
"step": 2058
},
{
"epoch": 1.3717831265095362,
"learning_rate": 1.1889834264148589e-05,
"loss": 4.1486,
"step": 2059
},
{
"epoch": 1.3724494045140334,
"learning_rate": 1.1866868994642535e-05,
"loss": 4.1338,
"step": 2060
},
{
"epoch": 1.3731156825185309,
"learning_rate": 1.1843919021337016e-05,
"loss": 4.12,
"step": 2061
},
{
"epoch": 1.3737819605230284,
"learning_rate": 1.1820984370961937e-05,
"loss": 4.1511,
"step": 2062
},
{
"epoch": 1.3744482385275256,
"learning_rate": 1.1798065070229383e-05,
"loss": 4.136,
"step": 2063
},
{
"epoch": 1.3751145165320229,
"learning_rate": 1.1775161145833524e-05,
"loss": 4.1585,
"step": 2064
},
{
"epoch": 1.3757807945365204,
"learning_rate": 1.1752272624450628e-05,
"loss": 4.2002,
"step": 2065
},
{
"epoch": 1.3764470725410178,
"learning_rate": 1.1729399532739047e-05,
"loss": 4.1449,
"step": 2066
},
{
"epoch": 1.377113350545515,
"learning_rate": 1.1706541897339151e-05,
"loss": 4.152,
"step": 2067
},
{
"epoch": 1.3777796285500126,
"learning_rate": 1.168369974487327e-05,
"loss": 4.1659,
"step": 2068
},
{
"epoch": 1.3784459065545098,
"learning_rate": 1.1660873101945763e-05,
"loss": 4.1042,
"step": 2069
},
{
"epoch": 1.3791121845590073,
"learning_rate": 1.1638061995142888e-05,
"loss": 4.1899,
"step": 2070
},
{
"epoch": 1.3797784625635046,
"learning_rate": 1.16152664510328e-05,
"loss": 4.1686,
"step": 2071
},
{
"epoch": 1.380444740568002,
"learning_rate": 1.159248649616557e-05,
"loss": 4.1565,
"step": 2072
},
{
"epoch": 1.3811110185724993,
"learning_rate": 1.1569722157073074e-05,
"loss": 4.1974,
"step": 2073
},
{
"epoch": 1.3817772965769968,
"learning_rate": 1.1546973460269009e-05,
"loss": 4.1308,
"step": 2074
},
{
"epoch": 1.3824435745814942,
"learning_rate": 1.1524240432248858e-05,
"loss": 4.1347,
"step": 2075
},
{
"epoch": 1.3831098525859915,
"learning_rate": 1.1501523099489855e-05,
"loss": 4.1657,
"step": 2076
},
{
"epoch": 1.3837761305904888,
"learning_rate": 1.147882148845094e-05,
"loss": 4.1685,
"step": 2077
},
{
"epoch": 1.3844424085949862,
"learning_rate": 1.1456135625572772e-05,
"loss": 4.1662,
"step": 2078
},
{
"epoch": 1.3851086865994837,
"learning_rate": 1.1433465537277641e-05,
"loss": 4.1231,
"step": 2079
},
{
"epoch": 1.385774964603981,
"learning_rate": 1.1410811249969475e-05,
"loss": 4.1173,
"step": 2080
},
{
"epoch": 1.3864412426084785,
"learning_rate": 1.138817279003379e-05,
"loss": 4.1305,
"step": 2081
},
{
"epoch": 1.3871075206129757,
"learning_rate": 1.1365550183837685e-05,
"loss": 4.1332,
"step": 2082
},
{
"epoch": 1.3877737986174732,
"learning_rate": 1.1342943457729763e-05,
"loss": 4.1684,
"step": 2083
},
{
"epoch": 1.3884400766219704,
"learning_rate": 1.1320352638040174e-05,
"loss": 4.1294,
"step": 2084
},
{
"epoch": 1.389106354626468,
"learning_rate": 1.1297777751080512e-05,
"loss": 4.145,
"step": 2085
},
{
"epoch": 1.3897726326309652,
"learning_rate": 1.1275218823143819e-05,
"loss": 4.1549,
"step": 2086
},
{
"epoch": 1.3904389106354627,
"learning_rate": 1.1252675880504553e-05,
"loss": 4.1602,
"step": 2087
},
{
"epoch": 1.3911051886399601,
"learning_rate": 1.1230148949418557e-05,
"loss": 4.1491,
"step": 2088
},
{
"epoch": 1.3917714666444574,
"learning_rate": 1.1207638056123012e-05,
"loss": 4.1736,
"step": 2089
},
{
"epoch": 1.3924377446489546,
"learning_rate": 1.1185143226836428e-05,
"loss": 4.1685,
"step": 2090
},
{
"epoch": 1.3931040226534521,
"learning_rate": 1.1162664487758623e-05,
"loss": 4.1321,
"step": 2091
},
{
"epoch": 1.3937703006579496,
"learning_rate": 1.1140201865070643e-05,
"loss": 4.1302,
"step": 2092
},
{
"epoch": 1.3944365786624469,
"learning_rate": 1.1117755384934774e-05,
"loss": 4.1064,
"step": 2093
},
{
"epoch": 1.3951028566669443,
"learning_rate": 1.1095325073494522e-05,
"loss": 4.1933,
"step": 2094
},
{
"epoch": 1.3957691346714416,
"learning_rate": 1.1072910956874544e-05,
"loss": 4.1408,
"step": 2095
},
{
"epoch": 1.396435412675939,
"learning_rate": 1.1050513061180606e-05,
"loss": 4.1576,
"step": 2096
},
{
"epoch": 1.3971016906804365,
"learning_rate": 1.102813141249964e-05,
"loss": 4.1404,
"step": 2097
},
{
"epoch": 1.3977679686849338,
"learning_rate": 1.1005766036899614e-05,
"loss": 4.1406,
"step": 2098
},
{
"epoch": 1.398434246689431,
"learning_rate": 1.0983416960429547e-05,
"loss": 4.2539,
"step": 2099
},
{
"epoch": 1.3991005246939285,
"learning_rate": 1.0961084209119496e-05,
"loss": 4.1896,
"step": 2100
},
{
"epoch": 1.399766802698426,
"learning_rate": 1.0938767808980486e-05,
"loss": 4.1376,
"step": 2101
},
{
"epoch": 1.4004330807029233,
"learning_rate": 1.0916467786004492e-05,
"loss": 4.1533,
"step": 2102
},
{
"epoch": 1.4010993587074208,
"learning_rate": 1.0894184166164435e-05,
"loss": 4.1597,
"step": 2103
},
{
"epoch": 1.401765636711918,
"learning_rate": 1.087191697541411e-05,
"loss": 4.1626,
"step": 2104
},
{
"epoch": 1.4024319147164155,
"learning_rate": 1.084966623968818e-05,
"loss": 4.1129,
"step": 2105
},
{
"epoch": 1.4030981927209127,
"learning_rate": 1.082743198490217e-05,
"loss": 4.1908,
"step": 2106
},
{
"epoch": 1.4037644707254102,
"learning_rate": 1.080521423695238e-05,
"loss": 4.1267,
"step": 2107
},
{
"epoch": 1.4044307487299075,
"learning_rate": 1.0783013021715892e-05,
"loss": 4.1645,
"step": 2108
},
{
"epoch": 1.405097026734405,
"learning_rate": 1.0760828365050535e-05,
"loss": 4.1551,
"step": 2109
},
{
"epoch": 1.4057633047389024,
"learning_rate": 1.073866029279485e-05,
"loss": 4.1136,
"step": 2110
},
{
"epoch": 1.4064295827433997,
"learning_rate": 1.0716508830768065e-05,
"loss": 4.1962,
"step": 2111
},
{
"epoch": 1.407095860747897,
"learning_rate": 1.0694374004770047e-05,
"loss": 4.1463,
"step": 2112
},
{
"epoch": 1.4077621387523944,
"learning_rate": 1.0672255840581324e-05,
"loss": 4.1154,
"step": 2113
},
{
"epoch": 1.408428416756892,
"learning_rate": 1.065015436396298e-05,
"loss": 4.137,
"step": 2114
},
{
"epoch": 1.4090946947613892,
"learning_rate": 1.0628069600656678e-05,
"loss": 4.1301,
"step": 2115
},
{
"epoch": 1.4097609727658866,
"learning_rate": 1.0606001576384617e-05,
"loss": 4.1813,
"step": 2116
},
{
"epoch": 1.410427250770384,
"learning_rate": 1.0583950316849491e-05,
"loss": 4.1407,
"step": 2117
},
{
"epoch": 1.4110935287748814,
"learning_rate": 1.056191584773447e-05,
"loss": 4.1954,
"step": 2118
},
{
"epoch": 1.4117598067793786,
"learning_rate": 1.0539898194703188e-05,
"loss": 4.1612,
"step": 2119
},
{
"epoch": 1.412426084783876,
"learning_rate": 1.0517897383399672e-05,
"loss": 4.1458,
"step": 2120
},
{
"epoch": 1.4130923627883734,
"learning_rate": 1.0495913439448324e-05,
"loss": 4.1742,
"step": 2121
},
{
"epoch": 1.4137586407928708,
"learning_rate": 1.0473946388453933e-05,
"loss": 4.1383,
"step": 2122
},
{
"epoch": 1.4144249187973683,
"learning_rate": 1.0451996256001603e-05,
"loss": 4.175,
"step": 2123
},
{
"epoch": 1.4150911968018656,
"learning_rate": 1.043006306765669e-05,
"loss": 4.1684,
"step": 2124
},
{
"epoch": 1.4157574748063628,
"learning_rate": 1.040814684896488e-05,
"loss": 4.1351,
"step": 2125
},
{
"epoch": 1.4164237528108603,
"learning_rate": 1.0386247625452056e-05,
"loss": 4.1072,
"step": 2126
},
{
"epoch": 1.4170900308153578,
"learning_rate": 1.0364365422624305e-05,
"loss": 4.1495,
"step": 2127
},
{
"epoch": 1.417756308819855,
"learning_rate": 1.034250026596792e-05,
"loss": 4.1337,
"step": 2128
},
{
"epoch": 1.4184225868243525,
"learning_rate": 1.0320652180949305e-05,
"loss": 4.2267,
"step": 2129
},
{
"epoch": 1.4190888648288498,
"learning_rate": 1.0298821193015005e-05,
"loss": 4.1527,
"step": 2130
},
{
"epoch": 1.4197551428333473,
"learning_rate": 1.0277007327591636e-05,
"loss": 4.1185,
"step": 2131
},
{
"epoch": 1.4204214208378447,
"learning_rate": 1.0255210610085882e-05,
"loss": 4.1308,
"step": 2132
},
{
"epoch": 1.421087698842342,
"learning_rate": 1.0233431065884441e-05,
"loss": 4.147,
"step": 2133
},
{
"epoch": 1.4217539768468392,
"learning_rate": 1.0211668720354037e-05,
"loss": 4.1293,
"step": 2134
},
{
"epoch": 1.4224202548513367,
"learning_rate": 1.0189923598841333e-05,
"loss": 4.1794,
"step": 2135
},
{
"epoch": 1.4230865328558342,
"learning_rate": 1.016819572667295e-05,
"loss": 4.1518,
"step": 2136
},
{
"epoch": 1.4237528108603315,
"learning_rate": 1.0146485129155405e-05,
"loss": 4.1413,
"step": 2137
},
{
"epoch": 1.424419088864829,
"learning_rate": 1.0124791831575103e-05,
"loss": 4.1157,
"step": 2138
},
{
"epoch": 1.4250853668693262,
"learning_rate": 1.0103115859198303e-05,
"loss": 4.1394,
"step": 2139
},
{
"epoch": 1.4257516448738237,
"learning_rate": 1.0081457237271066e-05,
"loss": 4.1324,
"step": 2140
},
{
"epoch": 1.426417922878321,
"learning_rate": 1.0059815991019281e-05,
"loss": 4.1256,
"step": 2141
},
{
"epoch": 1.4270842008828184,
"learning_rate": 1.0038192145648567e-05,
"loss": 4.1035,
"step": 2142
},
{
"epoch": 1.4277504788873157,
"learning_rate": 1.001658572634429e-05,
"loss": 4.1484,
"step": 2143
},
{
"epoch": 1.4284167568918131,
"learning_rate": 9.994996758271517e-06,
"loss": 4.1673,
"step": 2144
},
{
"epoch": 1.4290830348963106,
"learning_rate": 9.973425266574984e-06,
"loss": 4.1534,
"step": 2145
},
{
"epoch": 1.4297493129008079,
"learning_rate": 9.951871276379076e-06,
"loss": 4.1291,
"step": 2146
},
{
"epoch": 1.4304155909053051,
"learning_rate": 9.930334812787812e-06,
"loss": 4.1362,
"step": 2147
},
{
"epoch": 1.4310818689098026,
"learning_rate": 9.908815900884766e-06,
"loss": 4.1451,
"step": 2148
},
{
"epoch": 1.4317481469143,
"learning_rate": 9.887314565733086e-06,
"loss": 4.1402,
"step": 2149
},
{
"epoch": 1.4324144249187973,
"learning_rate": 9.865830832375467e-06,
"loss": 4.143,
"step": 2150
},
{
"epoch": 1.4330807029232948,
"learning_rate": 9.844364725834057e-06,
"loss": 4.098,
"step": 2151
},
{
"epoch": 1.433746980927792,
"learning_rate": 9.822916271110505e-06,
"loss": 4.1457,
"step": 2152
},
{
"epoch": 1.4344132589322895,
"learning_rate": 9.801485493185908e-06,
"loss": 4.1718,
"step": 2153
},
{
"epoch": 1.4350795369367868,
"learning_rate": 9.78007241702076e-06,
"loss": 4.12,
"step": 2154
},
{
"epoch": 1.4357458149412843,
"learning_rate": 9.758677067554927e-06,
"loss": 4.1273,
"step": 2155
},
{
"epoch": 1.4364120929457815,
"learning_rate": 9.737299469707663e-06,
"loss": 4.1247,
"step": 2156
},
{
"epoch": 1.437078370950279,
"learning_rate": 9.715939648377517e-06,
"loss": 4.1979,
"step": 2157
},
{
"epoch": 1.4377446489547765,
"learning_rate": 9.69459762844234e-06,
"loss": 4.0758,
"step": 2158
},
{
"epoch": 1.4384109269592738,
"learning_rate": 9.673273434759256e-06,
"loss": 4.1577,
"step": 2159
},
{
"epoch": 1.439077204963771,
"learning_rate": 9.651967092164618e-06,
"loss": 4.0743,
"step": 2160
},
{
"epoch": 1.4397434829682685,
"learning_rate": 9.630678625473988e-06,
"loss": 4.1485,
"step": 2161
},
{
"epoch": 1.440409760972766,
"learning_rate": 9.60940805948213e-06,
"loss": 4.1655,
"step": 2162
},
{
"epoch": 1.4410760389772632,
"learning_rate": 9.588155418962932e-06,
"loss": 4.1785,
"step": 2163
},
{
"epoch": 1.4417423169817607,
"learning_rate": 9.566920728669415e-06,
"loss": 4.124,
"step": 2164
},
{
"epoch": 1.442408594986258,
"learning_rate": 9.54570401333369e-06,
"loss": 4.1186,
"step": 2165
},
{
"epoch": 1.4430748729907554,
"learning_rate": 9.524505297666933e-06,
"loss": 4.1733,
"step": 2166
},
{
"epoch": 1.4437411509952527,
"learning_rate": 9.503324606359362e-06,
"loss": 4.1,
"step": 2167
},
{
"epoch": 1.4444074289997502,
"learning_rate": 9.482161964080185e-06,
"loss": 4.1322,
"step": 2168
},
{
"epoch": 1.4450737070042474,
"learning_rate": 9.46101739547762e-06,
"loss": 4.1682,
"step": 2169
},
{
"epoch": 1.445739985008745,
"learning_rate": 9.439890925178808e-06,
"loss": 4.1553,
"step": 2170
},
{
"epoch": 1.4464062630132424,
"learning_rate": 9.418782577789811e-06,
"loss": 4.2035,
"step": 2171
},
{
"epoch": 1.4470725410177396,
"learning_rate": 9.397692377895597e-06,
"loss": 4.1604,
"step": 2172
},
{
"epoch": 1.4477388190222371,
"learning_rate": 9.37662035005999e-06,
"loss": 4.1905,
"step": 2173
},
{
"epoch": 1.4484050970267344,
"learning_rate": 9.355566518825635e-06,
"loss": 4.1725,
"step": 2174
},
{
"epoch": 1.4490713750312318,
"learning_rate": 9.334530908714023e-06,
"loss": 4.1524,
"step": 2175
},
{
"epoch": 1.449737653035729,
"learning_rate": 9.313513544225383e-06,
"loss": 4.1319,
"step": 2176
},
{
"epoch": 1.4504039310402266,
"learning_rate": 9.292514449838705e-06,
"loss": 4.1615,
"step": 2177
},
{
"epoch": 1.4510702090447238,
"learning_rate": 9.271533650011721e-06,
"loss": 4.1528,
"step": 2178
},
{
"epoch": 1.4517364870492213,
"learning_rate": 9.25057116918082e-06,
"loss": 4.1682,
"step": 2179
},
{
"epoch": 1.4524027650537188,
"learning_rate": 9.229627031761065e-06,
"loss": 4.1663,
"step": 2180
},
{
"epoch": 1.453069043058216,
"learning_rate": 9.208701262146182e-06,
"loss": 4.1267,
"step": 2181
},
{
"epoch": 1.4537353210627133,
"learning_rate": 9.187793884708473e-06,
"loss": 4.1966,
"step": 2182
},
{
"epoch": 1.4544015990672108,
"learning_rate": 9.166904923798821e-06,
"loss": 4.1945,
"step": 2183
},
{
"epoch": 1.4550678770717083,
"learning_rate": 9.146034403746687e-06,
"loss": 4.1776,
"step": 2184
},
{
"epoch": 1.4557341550762055,
"learning_rate": 9.125182348860017e-06,
"loss": 4.1475,
"step": 2185
},
{
"epoch": 1.456400433080703,
"learning_rate": 9.104348783425276e-06,
"loss": 4.1838,
"step": 2186
},
{
"epoch": 1.4570667110852003,
"learning_rate": 9.083533731707381e-06,
"loss": 4.1835,
"step": 2187
},
{
"epoch": 1.4577329890896977,
"learning_rate": 9.06273721794969e-06,
"loss": 4.19,
"step": 2188
},
{
"epoch": 1.458399267094195,
"learning_rate": 9.041959266373964e-06,
"loss": 4.1828,
"step": 2189
},
{
"epoch": 1.4590655450986925,
"learning_rate": 9.021199901180369e-06,
"loss": 4.1207,
"step": 2190
},
{
"epoch": 1.4597318231031897,
"learning_rate": 9.000459146547397e-06,
"loss": 4.1708,
"step": 2191
},
{
"epoch": 1.4603981011076872,
"learning_rate": 8.979737026631869e-06,
"loss": 4.1672,
"step": 2192
},
{
"epoch": 1.4610643791121847,
"learning_rate": 8.959033565568909e-06,
"loss": 4.1615,
"step": 2193
},
{
"epoch": 1.461730657116682,
"learning_rate": 8.938348787471903e-06,
"loss": 4.1389,
"step": 2194
},
{
"epoch": 1.4623969351211792,
"learning_rate": 8.917682716432483e-06,
"loss": 4.1457,
"step": 2195
},
{
"epoch": 1.4630632131256767,
"learning_rate": 8.897035376520477e-06,
"loss": 4.1819,
"step": 2196
},
{
"epoch": 1.4637294911301741,
"learning_rate": 8.876406791783929e-06,
"loss": 4.1521,
"step": 2197
},
{
"epoch": 1.4643957691346714,
"learning_rate": 8.85579698624901e-06,
"loss": 4.1173,
"step": 2198
},
{
"epoch": 1.4650620471391689,
"learning_rate": 8.835205983920026e-06,
"loss": 4.1563,
"step": 2199
},
{
"epoch": 1.4657283251436661,
"learning_rate": 8.814633808779388e-06,
"loss": 4.1305,
"step": 2200
},
{
"epoch": 1.4663946031481636,
"learning_rate": 8.79408048478757e-06,
"loss": 4.1131,
"step": 2201
},
{
"epoch": 1.4670608811526609,
"learning_rate": 8.773546035883093e-06,
"loss": 4.1643,
"step": 2202
},
{
"epoch": 1.4677271591571583,
"learning_rate": 8.753030485982514e-06,
"loss": 4.1386,
"step": 2203
},
{
"epoch": 1.4683934371616556,
"learning_rate": 8.732533858980347e-06,
"loss": 4.1364,
"step": 2204
},
{
"epoch": 1.469059715166153,
"learning_rate": 8.712056178749074e-06,
"loss": 4.1571,
"step": 2205
},
{
"epoch": 1.4697259931706506,
"learning_rate": 8.69159746913914e-06,
"loss": 4.1739,
"step": 2206
},
{
"epoch": 1.4703922711751478,
"learning_rate": 8.671157753978851e-06,
"loss": 4.1901,
"step": 2207
},
{
"epoch": 1.471058549179645,
"learning_rate": 8.650737057074404e-06,
"loss": 4.1681,
"step": 2208
},
{
"epoch": 1.4717248271841425,
"learning_rate": 8.630335402209872e-06,
"loss": 4.1922,
"step": 2209
},
{
"epoch": 1.47239110518864,
"learning_rate": 8.609952813147117e-06,
"loss": 4.1674,
"step": 2210
},
{
"epoch": 1.4730573831931373,
"learning_rate": 8.589589313625804e-06,
"loss": 4.1848,
"step": 2211
},
{
"epoch": 1.4737236611976348,
"learning_rate": 8.56924492736338e-06,
"loss": 4.1224,
"step": 2212
},
{
"epoch": 1.474389939202132,
"learning_rate": 8.548919678055015e-06,
"loss": 4.1696,
"step": 2213
},
{
"epoch": 1.4750562172066295,
"learning_rate": 8.528613589373577e-06,
"loss": 4.1516,
"step": 2214
},
{
"epoch": 1.475722495211127,
"learning_rate": 8.50832668496965e-06,
"loss": 4.1925,
"step": 2215
},
{
"epoch": 1.4763887732156242,
"learning_rate": 8.488058988471457e-06,
"loss": 4.1135,
"step": 2216
},
{
"epoch": 1.4770550512201215,
"learning_rate": 8.467810523484835e-06,
"loss": 4.1902,
"step": 2217
},
{
"epoch": 1.477721329224619,
"learning_rate": 8.447581313593259e-06,
"loss": 4.1559,
"step": 2218
},
{
"epoch": 1.4783876072291164,
"learning_rate": 8.427371382357744e-06,
"loss": 4.1414,
"step": 2219
},
{
"epoch": 1.4790538852336137,
"learning_rate": 8.407180753316865e-06,
"loss": 4.185,
"step": 2220
},
{
"epoch": 1.4797201632381112,
"learning_rate": 8.387009449986713e-06,
"loss": 4.1735,
"step": 2221
},
{
"epoch": 1.4803864412426084,
"learning_rate": 8.36685749586087e-06,
"loss": 4.1466,
"step": 2222
},
{
"epoch": 1.481052719247106,
"learning_rate": 8.346724914410385e-06,
"loss": 4.1443,
"step": 2223
},
{
"epoch": 1.4817189972516032,
"learning_rate": 8.32661172908373e-06,
"loss": 4.0921,
"step": 2224
},
{
"epoch": 1.4823852752561006,
"learning_rate": 8.306517963306817e-06,
"loss": 4.1475,
"step": 2225
},
{
"epoch": 1.483051553260598,
"learning_rate": 8.286443640482911e-06,
"loss": 4.1613,
"step": 2226
},
{
"epoch": 1.4837178312650954,
"learning_rate": 8.26638878399264e-06,
"loss": 4.1867,
"step": 2227
},
{
"epoch": 1.4843841092695929,
"learning_rate": 8.246353417193961e-06,
"loss": 4.1495,
"step": 2228
},
{
"epoch": 1.4850503872740901,
"learning_rate": 8.226337563422134e-06,
"loss": 4.1919,
"step": 2229
},
{
"epoch": 1.4857166652785874,
"learning_rate": 8.20634124598968e-06,
"loss": 4.2219,
"step": 2230
},
{
"epoch": 1.4863829432830848,
"learning_rate": 8.18636448818639e-06,
"loss": 4.1121,
"step": 2231
},
{
"epoch": 1.4870492212875823,
"learning_rate": 8.16640731327925e-06,
"loss": 4.1118,
"step": 2232
},
{
"epoch": 1.4877154992920796,
"learning_rate": 8.14646974451245e-06,
"loss": 4.1217,
"step": 2233
},
{
"epoch": 1.488381777296577,
"learning_rate": 8.126551805107341e-06,
"loss": 4.1626,
"step": 2234
},
{
"epoch": 1.4890480553010743,
"learning_rate": 8.106653518262407e-06,
"loss": 4.209,
"step": 2235
},
{
"epoch": 1.4897143333055718,
"learning_rate": 8.086774907153246e-06,
"loss": 4.2018,
"step": 2236
},
{
"epoch": 1.490380611310069,
"learning_rate": 8.066915994932554e-06,
"loss": 4.17,
"step": 2237
},
{
"epoch": 1.4910468893145665,
"learning_rate": 8.047076804730064e-06,
"loss": 4.1392,
"step": 2238
},
{
"epoch": 1.4917131673190638,
"learning_rate": 8.027257359652535e-06,
"loss": 4.1085,
"step": 2239
},
{
"epoch": 1.4923794453235613,
"learning_rate": 8.007457682783758e-06,
"loss": 4.1508,
"step": 2240
},
{
"epoch": 1.4930457233280587,
"learning_rate": 7.987677797184484e-06,
"loss": 4.1115,
"step": 2241
},
{
"epoch": 1.493712001332556,
"learning_rate": 7.967917725892379e-06,
"loss": 4.1269,
"step": 2242
},
{
"epoch": 1.4943782793370533,
"learning_rate": 7.948177491922094e-06,
"loss": 4.1366,
"step": 2243
},
{
"epoch": 1.4950445573415507,
"learning_rate": 7.928457118265128e-06,
"loss": 4.1918,
"step": 2244
},
{
"epoch": 1.4957108353460482,
"learning_rate": 7.908756627889863e-06,
"loss": 4.1268,
"step": 2245
},
{
"epoch": 1.4963771133505455,
"learning_rate": 7.889076043741538e-06,
"loss": 4.1517,
"step": 2246
},
{
"epoch": 1.497043391355043,
"learning_rate": 7.869415388742187e-06,
"loss": 4.1581,
"step": 2247
},
{
"epoch": 1.4977096693595402,
"learning_rate": 7.84977468579064e-06,
"loss": 4.1811,
"step": 2248
},
{
"epoch": 1.4983759473640377,
"learning_rate": 7.830153957762481e-06,
"loss": 4.1389,
"step": 2249
},
{
"epoch": 1.4990422253685352,
"learning_rate": 7.810553227510045e-06,
"loss": 4.1065,
"step": 2250
},
{
"epoch": 1.4997085033730324,
"learning_rate": 7.79097251786236e-06,
"loss": 4.1547,
"step": 2251
},
{
"epoch": 1.5003747813775297,
"learning_rate": 7.771411851625138e-06,
"loss": 4.176,
"step": 2252
},
{
"epoch": 1.5010410593820271,
"learning_rate": 7.751871251580764e-06,
"loss": 4.1274,
"step": 2253
},
{
"epoch": 1.5017073373865246,
"learning_rate": 7.732350740488234e-06,
"loss": 4.1943,
"step": 2254
},
{
"epoch": 1.5023736153910219,
"learning_rate": 7.71285034108315e-06,
"loss": 4.1544,
"step": 2255
},
{
"epoch": 1.5030398933955191,
"learning_rate": 7.693370076077688e-06,
"loss": 4.1835,
"step": 2256
},
{
"epoch": 1.5037061714000166,
"learning_rate": 7.673909968160579e-06,
"loss": 4.1955,
"step": 2257
},
{
"epoch": 1.504372449404514,
"learning_rate": 7.654470039997064e-06,
"loss": 4.1487,
"step": 2258
},
{
"epoch": 1.5050387274090116,
"learning_rate": 7.635050314228909e-06,
"loss": 4.1666,
"step": 2259
},
{
"epoch": 1.5057050054135088,
"learning_rate": 7.615650813474323e-06,
"loss": 4.1405,
"step": 2260
},
{
"epoch": 1.506371283418006,
"learning_rate": 7.596271560327967e-06,
"loss": 4.1522,
"step": 2261
},
{
"epoch": 1.5070375614225036,
"learning_rate": 7.576912577360923e-06,
"loss": 4.0985,
"step": 2262
},
{
"epoch": 1.507703839427001,
"learning_rate": 7.557573887120662e-06,
"loss": 4.0924,
"step": 2263
},
{
"epoch": 1.5083701174314983,
"learning_rate": 7.538255512131007e-06,
"loss": 4.1306,
"step": 2264
},
{
"epoch": 1.5090363954359955,
"learning_rate": 7.518957474892149e-06,
"loss": 4.1809,
"step": 2265
},
{
"epoch": 1.509702673440493,
"learning_rate": 7.499679797880571e-06,
"loss": 4.1218,
"step": 2266
},
{
"epoch": 1.5103689514449905,
"learning_rate": 7.480422503549037e-06,
"loss": 4.1858,
"step": 2267
},
{
"epoch": 1.5110352294494878,
"learning_rate": 7.461185614326596e-06,
"loss": 4.1495,
"step": 2268
},
{
"epoch": 1.511701507453985,
"learning_rate": 7.441969152618516e-06,
"loss": 4.1415,
"step": 2269
},
{
"epoch": 1.5123677854584825,
"learning_rate": 7.4227731408062465e-06,
"loss": 4.1505,
"step": 2270
},
{
"epoch": 1.51303406346298,
"learning_rate": 7.403597601247472e-06,
"loss": 4.1147,
"step": 2271
},
{
"epoch": 1.5137003414674775,
"learning_rate": 7.384442556275997e-06,
"loss": 4.1378,
"step": 2272
},
{
"epoch": 1.5143666194719747,
"learning_rate": 7.365308028201756e-06,
"loss": 4.1571,
"step": 2273
},
{
"epoch": 1.515032897476472,
"learning_rate": 7.346194039310814e-06,
"loss": 4.0884,
"step": 2274
},
{
"epoch": 1.5156991754809694,
"learning_rate": 7.327100611865284e-06,
"loss": 4.1996,
"step": 2275
},
{
"epoch": 1.516365453485467,
"learning_rate": 7.308027768103357e-06,
"loss": 4.1597,
"step": 2276
},
{
"epoch": 1.5170317314899642,
"learning_rate": 7.288975530239211e-06,
"loss": 4.1587,
"step": 2277
},
{
"epoch": 1.5176980094944614,
"learning_rate": 7.269943920463071e-06,
"loss": 4.128,
"step": 2278
},
{
"epoch": 1.518364287498959,
"learning_rate": 7.250932960941109e-06,
"loss": 4.1729,
"step": 2279
},
{
"epoch": 1.5190305655034564,
"learning_rate": 7.231942673815442e-06,
"loss": 4.0899,
"step": 2280
},
{
"epoch": 1.5196968435079536,
"learning_rate": 7.212973081204136e-06,
"loss": 4.126,
"step": 2281
},
{
"epoch": 1.5203631215124511,
"learning_rate": 7.194024205201133e-06,
"loss": 4.1381,
"step": 2282
},
{
"epoch": 1.5210293995169484,
"learning_rate": 7.175096067876244e-06,
"loss": 4.1703,
"step": 2283
},
{
"epoch": 1.5216956775214459,
"learning_rate": 7.15618869127514e-06,
"loss": 4.1583,
"step": 2284
},
{
"epoch": 1.5223619555259433,
"learning_rate": 7.137302097419296e-06,
"loss": 4.1311,
"step": 2285
},
{
"epoch": 1.5230282335304406,
"learning_rate": 7.118436308305987e-06,
"loss": 4.1378,
"step": 2286
},
{
"epoch": 1.5236945115349378,
"learning_rate": 7.099591345908274e-06,
"loss": 4.1566,
"step": 2287
},
{
"epoch": 1.5243607895394353,
"learning_rate": 7.08076723217494e-06,
"loss": 4.1793,
"step": 2288
},
{
"epoch": 1.5250270675439328,
"learning_rate": 7.061963989030487e-06,
"loss": 4.1565,
"step": 2289
},
{
"epoch": 1.52569334554843,
"learning_rate": 7.043181638375118e-06,
"loss": 4.1525,
"step": 2290
},
{
"epoch": 1.5263596235529273,
"learning_rate": 7.024420202084694e-06,
"loss": 4.1882,
"step": 2291
},
{
"epoch": 1.5270259015574248,
"learning_rate": 7.00567970201072e-06,
"loss": 4.2149,
"step": 2292
},
{
"epoch": 1.5276921795619223,
"learning_rate": 6.986960159980327e-06,
"loss": 4.1334,
"step": 2293
},
{
"epoch": 1.5283584575664197,
"learning_rate": 6.968261597796219e-06,
"loss": 4.1355,
"step": 2294
},
{
"epoch": 1.529024735570917,
"learning_rate": 6.949584037236667e-06,
"loss": 4.1586,
"step": 2295
},
{
"epoch": 1.5296910135754143,
"learning_rate": 6.930927500055504e-06,
"loss": 4.1751,
"step": 2296
},
{
"epoch": 1.5303572915799117,
"learning_rate": 6.9122920079820544e-06,
"loss": 4.1157,
"step": 2297
},
{
"epoch": 1.5310235695844092,
"learning_rate": 6.89367758272112e-06,
"loss": 4.182,
"step": 2298
},
{
"epoch": 1.5316898475889065,
"learning_rate": 6.875084245953001e-06,
"loss": 4.1482,
"step": 2299
},
{
"epoch": 1.5323561255934037,
"learning_rate": 6.856512019333411e-06,
"loss": 4.1714,
"step": 2300
},
{
"epoch": 1.5330224035979012,
"learning_rate": 6.837960924493473e-06,
"loss": 4.158,
"step": 2301
},
{
"epoch": 1.5336886816023987,
"learning_rate": 6.819430983039726e-06,
"loss": 4.163,
"step": 2302
},
{
"epoch": 1.534354959606896,
"learning_rate": 6.800922216554048e-06,
"loss": 4.1953,
"step": 2303
},
{
"epoch": 1.5350212376113932,
"learning_rate": 6.78243464659366e-06,
"loss": 4.1075,
"step": 2304
},
{
"epoch": 1.5356875156158907,
"learning_rate": 6.763968294691081e-06,
"loss": 4.0955,
"step": 2305
},
{
"epoch": 1.5363537936203882,
"learning_rate": 6.745523182354147e-06,
"loss": 4.2111,
"step": 2306
},
{
"epoch": 1.5370200716248856,
"learning_rate": 6.727099331065936e-06,
"loss": 4.1404,
"step": 2307
},
{
"epoch": 1.5376863496293829,
"learning_rate": 6.70869676228476e-06,
"loss": 4.1254,
"step": 2308
},
{
"epoch": 1.5383526276338801,
"learning_rate": 6.690315497444166e-06,
"loss": 4.1657,
"step": 2309
},
{
"epoch": 1.5390189056383776,
"learning_rate": 6.671955557952867e-06,
"loss": 4.1255,
"step": 2310
},
{
"epoch": 1.539685183642875,
"learning_rate": 6.653616965194739e-06,
"loss": 4.1442,
"step": 2311
},
{
"epoch": 1.5403514616473724,
"learning_rate": 6.635299740528808e-06,
"loss": 4.1948,
"step": 2312
},
{
"epoch": 1.5410177396518696,
"learning_rate": 6.617003905289199e-06,
"loss": 4.2138,
"step": 2313
},
{
"epoch": 1.541684017656367,
"learning_rate": 6.5987294807851295e-06,
"loss": 4.1869,
"step": 2314
},
{
"epoch": 1.5423502956608646,
"learning_rate": 6.580476488300891e-06,
"loss": 4.1885,
"step": 2315
},
{
"epoch": 1.5430165736653618,
"learning_rate": 6.5622449490958e-06,
"loss": 4.1158,
"step": 2316
},
{
"epoch": 1.5436828516698593,
"learning_rate": 6.5440348844041875e-06,
"loss": 4.1476,
"step": 2317
},
{
"epoch": 1.5443491296743566,
"learning_rate": 6.525846315435375e-06,
"loss": 4.1366,
"step": 2318
},
{
"epoch": 1.545015407678854,
"learning_rate": 6.507679263373648e-06,
"loss": 4.1416,
"step": 2319
},
{
"epoch": 1.5456816856833515,
"learning_rate": 6.489533749378226e-06,
"loss": 4.1678,
"step": 2320
},
{
"epoch": 1.5463479636878488,
"learning_rate": 6.471409794583264e-06,
"loss": 4.0998,
"step": 2321
},
{
"epoch": 1.547014241692346,
"learning_rate": 6.453307420097779e-06,
"loss": 4.1555,
"step": 2322
},
{
"epoch": 1.5476805196968435,
"learning_rate": 6.435226647005663e-06,
"loss": 4.1038,
"step": 2323
},
{
"epoch": 1.548346797701341,
"learning_rate": 6.417167496365673e-06,
"loss": 4.1324,
"step": 2324
},
{
"epoch": 1.5490130757058382,
"learning_rate": 6.3991299892113336e-06,
"loss": 4.1668,
"step": 2325
},
{
"epoch": 1.5496793537103355,
"learning_rate": 6.3811141465509924e-06,
"loss": 4.1257,
"step": 2326
},
{
"epoch": 1.550345631714833,
"learning_rate": 6.363119989367777e-06,
"loss": 4.2066,
"step": 2327
},
{
"epoch": 1.5510119097193305,
"learning_rate": 6.345147538619531e-06,
"loss": 4.1389,
"step": 2328
},
{
"epoch": 1.551678187723828,
"learning_rate": 6.327196815238817e-06,
"loss": 4.1262,
"step": 2329
},
{
"epoch": 1.5523444657283252,
"learning_rate": 6.309267840132918e-06,
"loss": 4.1342,
"step": 2330
},
{
"epoch": 1.5530107437328224,
"learning_rate": 6.291360634183765e-06,
"loss": 4.1126,
"step": 2331
},
{
"epoch": 1.55367702173732,
"learning_rate": 6.2734752182479425e-06,
"loss": 4.1405,
"step": 2332
},
{
"epoch": 1.5543432997418174,
"learning_rate": 6.255611613156631e-06,
"loss": 4.113,
"step": 2333
},
{
"epoch": 1.5550095777463147,
"learning_rate": 6.237769839715654e-06,
"loss": 4.1639,
"step": 2334
},
{
"epoch": 1.555675855750812,
"learning_rate": 6.2199499187053755e-06,
"loss": 4.1519,
"step": 2335
},
{
"epoch": 1.5563421337553094,
"learning_rate": 6.2021518708807065e-06,
"loss": 4.2014,
"step": 2336
},
{
"epoch": 1.5570084117598069,
"learning_rate": 6.184375716971108e-06,
"loss": 4.1747,
"step": 2337
},
{
"epoch": 1.5576746897643041,
"learning_rate": 6.166621477680515e-06,
"loss": 4.163,
"step": 2338
},
{
"epoch": 1.5583409677688014,
"learning_rate": 6.1488891736873496e-06,
"loss": 4.1644,
"step": 2339
},
{
"epoch": 1.5590072457732989,
"learning_rate": 6.131178825644485e-06,
"loss": 4.1816,
"step": 2340
},
{
"epoch": 1.5596735237777963,
"learning_rate": 6.113490454179219e-06,
"loss": 4.1047,
"step": 2341
},
{
"epoch": 1.5603398017822938,
"learning_rate": 6.09582407989325e-06,
"loss": 4.1784,
"step": 2342
},
{
"epoch": 1.561006079786791,
"learning_rate": 6.078179723362676e-06,
"loss": 4.1998,
"step": 2343
},
{
"epoch": 1.5616723577912883,
"learning_rate": 6.060557405137929e-06,
"loss": 4.1336,
"step": 2344
},
{
"epoch": 1.5623386357957858,
"learning_rate": 6.04295714574378e-06,
"loss": 4.0874,
"step": 2345
},
{
"epoch": 1.5630049138002833,
"learning_rate": 6.02537896567931e-06,
"loss": 4.1127,
"step": 2346
},
{
"epoch": 1.5636711918047805,
"learning_rate": 6.007822885417882e-06,
"loss": 4.1295,
"step": 2347
},
{
"epoch": 1.5643374698092778,
"learning_rate": 5.9902889254071116e-06,
"loss": 4.1511,
"step": 2348
},
{
"epoch": 1.5650037478137753,
"learning_rate": 5.972777106068874e-06,
"loss": 4.1331,
"step": 2349
},
{
"epoch": 1.5656700258182727,
"learning_rate": 5.95528744779924e-06,
"loss": 4.1651,
"step": 2350
},
{
"epoch": 1.56633630382277,
"learning_rate": 5.937819970968458e-06,
"loss": 4.1689,
"step": 2351
},
{
"epoch": 1.5670025818272675,
"learning_rate": 5.9203746959209775e-06,
"loss": 4.1783,
"step": 2352
},
{
"epoch": 1.5676688598317647,
"learning_rate": 5.902951642975349e-06,
"loss": 4.173,
"step": 2353
},
{
"epoch": 1.5683351378362622,
"learning_rate": 5.885550832424258e-06,
"loss": 4.121,
"step": 2354
},
{
"epoch": 1.5690014158407597,
"learning_rate": 5.868172284534498e-06,
"loss": 4.2004,
"step": 2355
},
{
"epoch": 1.569667693845257,
"learning_rate": 5.850816019546918e-06,
"loss": 4.1281,
"step": 2356
},
{
"epoch": 1.5703339718497542,
"learning_rate": 5.833482057676401e-06,
"loss": 4.1862,
"step": 2357
},
{
"epoch": 1.5710002498542517,
"learning_rate": 5.816170419111891e-06,
"loss": 4.1197,
"step": 2358
},
{
"epoch": 1.5716665278587492,
"learning_rate": 5.7988811240163005e-06,
"loss": 4.1184,
"step": 2359
},
{
"epoch": 1.5723328058632464,
"learning_rate": 5.781614192526532e-06,
"loss": 4.1686,
"step": 2360
},
{
"epoch": 1.5729990838677437,
"learning_rate": 5.76436964475342e-06,
"loss": 4.1688,
"step": 2361
},
{
"epoch": 1.5736653618722412,
"learning_rate": 5.7471475007817635e-06,
"loss": 4.0964,
"step": 2362
},
{
"epoch": 1.5743316398767386,
"learning_rate": 5.7299477806702445e-06,
"loss": 4.1645,
"step": 2363
},
{
"epoch": 1.574997917881236,
"learning_rate": 5.712770504451426e-06,
"loss": 4.1302,
"step": 2364
},
{
"epoch": 1.5756641958857334,
"learning_rate": 5.695615692131751e-06,
"loss": 4.1565,
"step": 2365
},
{
"epoch": 1.5763304738902306,
"learning_rate": 5.678483363691478e-06,
"loss": 4.0973,
"step": 2366
},
{
"epoch": 1.576996751894728,
"learning_rate": 5.6613735390846884e-06,
"loss": 4.135,
"step": 2367
},
{
"epoch": 1.5776630298992256,
"learning_rate": 5.644286238239249e-06,
"loss": 4.1455,
"step": 2368
},
{
"epoch": 1.5783293079037228,
"learning_rate": 5.627221481056794e-06,
"loss": 4.1298,
"step": 2369
},
{
"epoch": 1.57899558590822,
"learning_rate": 5.610179287412695e-06,
"loss": 4.1317,
"step": 2370
},
{
"epoch": 1.5796618639127176,
"learning_rate": 5.593159677156068e-06,
"loss": 4.1367,
"step": 2371
},
{
"epoch": 1.580328141917215,
"learning_rate": 5.576162670109697e-06,
"loss": 4.2087,
"step": 2372
},
{
"epoch": 1.5809944199217123,
"learning_rate": 5.559188286070052e-06,
"loss": 4.1468,
"step": 2373
},
{
"epoch": 1.5816606979262096,
"learning_rate": 5.542236544807256e-06,
"loss": 4.2008,
"step": 2374
},
{
"epoch": 1.582326975930707,
"learning_rate": 5.525307466065058e-06,
"loss": 4.1386,
"step": 2375
},
{
"epoch": 1.5829932539352045,
"learning_rate": 5.508401069560801e-06,
"loss": 4.1347,
"step": 2376
},
{
"epoch": 1.583659531939702,
"learning_rate": 5.4915173749854335e-06,
"loss": 4.1597,
"step": 2377
},
{
"epoch": 1.5843258099441992,
"learning_rate": 5.474656402003448e-06,
"loss": 4.1668,
"step": 2378
},
{
"epoch": 1.5849920879486965,
"learning_rate": 5.457818170252862e-06,
"loss": 4.1236,
"step": 2379
},
{
"epoch": 1.585658365953194,
"learning_rate": 5.441002699345246e-06,
"loss": 4.1765,
"step": 2380
},
{
"epoch": 1.5863246439576915,
"learning_rate": 5.424210008865607e-06,
"loss": 4.1439,
"step": 2381
},
{
"epoch": 1.5869909219621887,
"learning_rate": 5.407440118372451e-06,
"loss": 4.1581,
"step": 2382
},
{
"epoch": 1.587657199966686,
"learning_rate": 5.390693047397735e-06,
"loss": 4.1311,
"step": 2383
},
{
"epoch": 1.5883234779711835,
"learning_rate": 5.37396881544682e-06,
"loss": 4.184,
"step": 2384
},
{
"epoch": 1.588989755975681,
"learning_rate": 5.3572674419984675e-06,
"loss": 4.1052,
"step": 2385
},
{
"epoch": 1.5896560339801782,
"learning_rate": 5.340588946504837e-06,
"loss": 4.1744,
"step": 2386
},
{
"epoch": 1.5903223119846754,
"learning_rate": 5.323933348391427e-06,
"loss": 4.1654,
"step": 2387
},
{
"epoch": 1.590988589989173,
"learning_rate": 5.307300667057049e-06,
"loss": 4.1573,
"step": 2388
},
{
"epoch": 1.5916548679936704,
"learning_rate": 5.2906909218738445e-06,
"loss": 4.1522,
"step": 2389
},
{
"epoch": 1.5923211459981679,
"learning_rate": 5.274104132187252e-06,
"loss": 4.123,
"step": 2390
},
{
"epoch": 1.5929874240026651,
"learning_rate": 5.257540317315951e-06,
"loss": 4.1682,
"step": 2391
},
{
"epoch": 1.5936537020071624,
"learning_rate": 5.240999496551866e-06,
"loss": 4.1348,
"step": 2392
},
{
"epoch": 1.5943199800116599,
"learning_rate": 5.2244816891601575e-06,
"loss": 4.1541,
"step": 2393
},
{
"epoch": 1.5949862580161573,
"learning_rate": 5.207986914379162e-06,
"loss": 4.1922,
"step": 2394
},
{
"epoch": 1.5956525360206546,
"learning_rate": 5.191515191420396e-06,
"loss": 4.1435,
"step": 2395
},
{
"epoch": 1.5963188140251519,
"learning_rate": 5.175066539468534e-06,
"loss": 4.2017,
"step": 2396
},
{
"epoch": 1.5969850920296493,
"learning_rate": 5.15864097768137e-06,
"loss": 4.1342,
"step": 2397
},
{
"epoch": 1.5976513700341468,
"learning_rate": 5.142238525189804e-06,
"loss": 4.1694,
"step": 2398
},
{
"epoch": 1.5983176480386443,
"learning_rate": 5.125859201097841e-06,
"loss": 4.0999,
"step": 2399
},
{
"epoch": 1.5989839260431415,
"learning_rate": 5.109503024482526e-06,
"loss": 4.1193,
"step": 2400
},
{
"epoch": 1.5996502040476388,
"learning_rate": 5.09317001439395e-06,
"loss": 4.133,
"step": 2401
},
{
"epoch": 1.6003164820521363,
"learning_rate": 5.076860189855223e-06,
"loss": 4.1481,
"step": 2402
},
{
"epoch": 1.6009827600566338,
"learning_rate": 5.060573569862451e-06,
"loss": 4.1395,
"step": 2403
},
{
"epoch": 1.601649038061131,
"learning_rate": 5.0443101733847085e-06,
"loss": 4.1176,
"step": 2404
},
{
"epoch": 1.6023153160656283,
"learning_rate": 5.0280700193640395e-06,
"loss": 4.1493,
"step": 2405
},
{
"epoch": 1.6029815940701257,
"learning_rate": 5.011853126715396e-06,
"loss": 4.1845,
"step": 2406
},
{
"epoch": 1.6036478720746232,
"learning_rate": 4.995659514326645e-06,
"loss": 4.1006,
"step": 2407
},
{
"epoch": 1.6043141500791205,
"learning_rate": 4.979489201058543e-06,
"loss": 4.1333,
"step": 2408
},
{
"epoch": 1.6049804280836177,
"learning_rate": 4.963342205744706e-06,
"loss": 4.1038,
"step": 2409
},
{
"epoch": 1.6056467060881152,
"learning_rate": 4.947218547191585e-06,
"loss": 4.1208,
"step": 2410
},
{
"epoch": 1.6063129840926127,
"learning_rate": 4.931118244178468e-06,
"loss": 4.1528,
"step": 2411
},
{
"epoch": 1.6069792620971102,
"learning_rate": 4.915041315457428e-06,
"loss": 4.1384,
"step": 2412
},
{
"epoch": 1.6076455401016074,
"learning_rate": 4.898987779753314e-06,
"loss": 4.2233,
"step": 2413
},
{
"epoch": 1.6083118181061047,
"learning_rate": 4.8829576557637255e-06,
"loss": 4.122,
"step": 2414
},
{
"epoch": 1.6089780961106022,
"learning_rate": 4.86695096215902e-06,
"loss": 4.1806,
"step": 2415
},
{
"epoch": 1.6096443741150996,
"learning_rate": 4.850967717582228e-06,
"loss": 4.1683,
"step": 2416
},
{
"epoch": 1.610310652119597,
"learning_rate": 4.83500794064908e-06,
"loss": 4.1762,
"step": 2417
},
{
"epoch": 1.6109769301240942,
"learning_rate": 4.819071649948004e-06,
"loss": 4.1435,
"step": 2418
},
{
"epoch": 1.6116432081285916,
"learning_rate": 4.803158864040033e-06,
"loss": 4.0909,
"step": 2419
},
{
"epoch": 1.612309486133089,
"learning_rate": 4.787269601458841e-06,
"loss": 4.0991,
"step": 2420
},
{
"epoch": 1.6129757641375864,
"learning_rate": 4.771403880710712e-06,
"loss": 4.0856,
"step": 2421
},
{
"epoch": 1.6136420421420836,
"learning_rate": 4.755561720274501e-06,
"loss": 4.1568,
"step": 2422
},
{
"epoch": 1.614308320146581,
"learning_rate": 4.739743138601621e-06,
"loss": 4.1142,
"step": 2423
},
{
"epoch": 1.6149745981510786,
"learning_rate": 4.7239481541160255e-06,
"loss": 4.1173,
"step": 2424
},
{
"epoch": 1.615640876155576,
"learning_rate": 4.708176785214188e-06,
"loss": 4.1896,
"step": 2425
},
{
"epoch": 1.6163071541600733,
"learning_rate": 4.692429050265062e-06,
"loss": 4.1323,
"step": 2426
},
{
"epoch": 1.6169734321645706,
"learning_rate": 4.676704967610101e-06,
"loss": 4.1396,
"step": 2427
},
{
"epoch": 1.617639710169068,
"learning_rate": 4.661004555563189e-06,
"loss": 4.1878,
"step": 2428
},
{
"epoch": 1.6183059881735655,
"learning_rate": 4.645327832410648e-06,
"loss": 4.1964,
"step": 2429
},
{
"epoch": 1.6189722661780628,
"learning_rate": 4.629674816411206e-06,
"loss": 4.1971,
"step": 2430
},
{
"epoch": 1.61963854418256,
"learning_rate": 4.614045525795985e-06,
"loss": 4.1523,
"step": 2431
},
{
"epoch": 1.6203048221870575,
"learning_rate": 4.598439978768462e-06,
"loss": 4.1397,
"step": 2432
},
{
"epoch": 1.620971100191555,
"learning_rate": 4.582858193504483e-06,
"loss": 4.1179,
"step": 2433
},
{
"epoch": 1.6216373781960522,
"learning_rate": 4.567300188152196e-06,
"loss": 4.1402,
"step": 2434
},
{
"epoch": 1.6223036562005497,
"learning_rate": 4.551765980832059e-06,
"loss": 4.1469,
"step": 2435
},
{
"epoch": 1.622969934205047,
"learning_rate": 4.53625558963682e-06,
"loss": 4.1374,
"step": 2436
},
{
"epoch": 1.6236362122095445,
"learning_rate": 4.520769032631478e-06,
"loss": 4.1348,
"step": 2437
},
{
"epoch": 1.624302490214042,
"learning_rate": 4.5053063278532735e-06,
"loss": 4.1733,
"step": 2438
},
{
"epoch": 1.6249687682185392,
"learning_rate": 4.489867493311676e-06,
"loss": 4.1227,
"step": 2439
},
{
"epoch": 1.6256350462230365,
"learning_rate": 4.474452546988342e-06,
"loss": 4.1042,
"step": 2440
},
{
"epoch": 1.626301324227534,
"learning_rate": 4.459061506837114e-06,
"loss": 4.1039,
"step": 2441
},
{
"epoch": 1.6269676022320314,
"learning_rate": 4.443694390783979e-06,
"loss": 4.1586,
"step": 2442
},
{
"epoch": 1.6276338802365287,
"learning_rate": 4.428351216727081e-06,
"loss": 4.1643,
"step": 2443
},
{
"epoch": 1.628300158241026,
"learning_rate": 4.413032002536652e-06,
"loss": 4.1724,
"step": 2444
},
{
"epoch": 1.6289664362455234,
"learning_rate": 4.3977367660550275e-06,
"loss": 4.1834,
"step": 2445
},
{
"epoch": 1.6296327142500209,
"learning_rate": 4.382465525096632e-06,
"loss": 4.1741,
"step": 2446
},
{
"epoch": 1.6302989922545184,
"learning_rate": 4.3672182974479255e-06,
"loss": 4.1434,
"step": 2447
},
{
"epoch": 1.6309652702590156,
"learning_rate": 4.351995100867398e-06,
"loss": 4.1799,
"step": 2448
},
{
"epoch": 1.6316315482635129,
"learning_rate": 4.336795953085565e-06,
"loss": 4.1687,
"step": 2449
},
{
"epoch": 1.6322978262680103,
"learning_rate": 4.321620871804926e-06,
"loss": 4.157,
"step": 2450
},
{
"epoch": 1.6329641042725078,
"learning_rate": 4.306469874699928e-06,
"loss": 4.1593,
"step": 2451
},
{
"epoch": 1.633630382277005,
"learning_rate": 4.291342979417007e-06,
"loss": 4.1422,
"step": 2452
},
{
"epoch": 1.6342966602815023,
"learning_rate": 4.276240203574499e-06,
"loss": 4.1436,
"step": 2453
},
{
"epoch": 1.6349629382859998,
"learning_rate": 4.261161564762653e-06,
"loss": 4.1697,
"step": 2454
},
{
"epoch": 1.6356292162904973,
"learning_rate": 4.246107080543618e-06,
"loss": 4.1403,
"step": 2455
},
{
"epoch": 1.6362954942949945,
"learning_rate": 4.231076768451397e-06,
"loss": 4.1645,
"step": 2456
},
{
"epoch": 1.6369617722994918,
"learning_rate": 4.216070645991843e-06,
"loss": 4.1398,
"step": 2457
},
{
"epoch": 1.6376280503039893,
"learning_rate": 4.201088730642633e-06,
"loss": 4.1561,
"step": 2458
},
{
"epoch": 1.6382943283084868,
"learning_rate": 4.186131039853258e-06,
"loss": 4.1205,
"step": 2459
},
{
"epoch": 1.6389606063129842,
"learning_rate": 4.1711975910449785e-06,
"loss": 4.1107,
"step": 2460
},
{
"epoch": 1.6396268843174815,
"learning_rate": 4.156288401610847e-06,
"loss": 4.145,
"step": 2461
},
{
"epoch": 1.6402931623219787,
"learning_rate": 4.141403488915638e-06,
"loss": 4.1401,
"step": 2462
},
{
"epoch": 1.6409594403264762,
"learning_rate": 4.126542870295855e-06,
"loss": 4.1618,
"step": 2463
},
{
"epoch": 1.6416257183309737,
"learning_rate": 4.111706563059711e-06,
"loss": 4.179,
"step": 2464
},
{
"epoch": 1.642291996335471,
"learning_rate": 4.096894584487102e-06,
"loss": 4.1313,
"step": 2465
},
{
"epoch": 1.6429582743399682,
"learning_rate": 4.082106951829581e-06,
"loss": 4.1469,
"step": 2466
},
{
"epoch": 1.6436245523444657,
"learning_rate": 4.067343682310365e-06,
"loss": 4.1381,
"step": 2467
},
{
"epoch": 1.6442908303489632,
"learning_rate": 4.052604793124273e-06,
"loss": 4.1245,
"step": 2468
},
{
"epoch": 1.6449571083534604,
"learning_rate": 4.037890301437744e-06,
"loss": 4.1308,
"step": 2469
},
{
"epoch": 1.645623386357958,
"learning_rate": 4.023200224388787e-06,
"loss": 4.1458,
"step": 2470
},
{
"epoch": 1.6462896643624552,
"learning_rate": 4.008534579086987e-06,
"loss": 4.1475,
"step": 2471
},
{
"epoch": 1.6469559423669526,
"learning_rate": 3.993893382613467e-06,
"loss": 4.1356,
"step": 2472
},
{
"epoch": 1.6476222203714501,
"learning_rate": 3.979276652020875e-06,
"loss": 4.1828,
"step": 2473
},
{
"epoch": 1.6482884983759474,
"learning_rate": 3.9646844043333685e-06,
"loss": 4.152,
"step": 2474
},
{
"epoch": 1.6489547763804446,
"learning_rate": 3.950116656546588e-06,
"loss": 4.1029,
"step": 2475
},
{
"epoch": 1.649621054384942,
"learning_rate": 3.935573425627626e-06,
"loss": 4.1255,
"step": 2476
},
{
"epoch": 1.6502873323894396,
"learning_rate": 3.9210547285150415e-06,
"loss": 4.1409,
"step": 2477
},
{
"epoch": 1.6509536103939368,
"learning_rate": 3.906560582118815e-06,
"loss": 4.1464,
"step": 2478
},
{
"epoch": 1.651619888398434,
"learning_rate": 3.8920910033203056e-06,
"loss": 4.1287,
"step": 2479
},
{
"epoch": 1.6522861664029316,
"learning_rate": 3.877646008972294e-06,
"loss": 4.2269,
"step": 2480
},
{
"epoch": 1.652952444407429,
"learning_rate": 3.863225615898908e-06,
"loss": 4.1186,
"step": 2481
},
{
"epoch": 1.6536187224119265,
"learning_rate": 3.848829840895621e-06,
"loss": 4.104,
"step": 2482
},
{
"epoch": 1.6542850004164238,
"learning_rate": 3.834458700729249e-06,
"loss": 4.1207,
"step": 2483
},
{
"epoch": 1.654951278420921,
"learning_rate": 3.820112212137903e-06,
"loss": 4.101,
"step": 2484
},
{
"epoch": 1.6556175564254185,
"learning_rate": 3.805790391830982e-06,
"loss": 4.1469,
"step": 2485
},
{
"epoch": 1.656283834429916,
"learning_rate": 3.791493256489162e-06,
"loss": 4.1599,
"step": 2486
},
{
"epoch": 1.6569501124344133,
"learning_rate": 3.7772208227643574e-06,
"loss": 4.1768,
"step": 2487
},
{
"epoch": 1.6576163904389105,
"learning_rate": 3.762973107279716e-06,
"loss": 4.143,
"step": 2488
},
{
"epoch": 1.658282668443408,
"learning_rate": 3.7487501266296078e-06,
"loss": 4.1633,
"step": 2489
},
{
"epoch": 1.6589489464479055,
"learning_rate": 3.734551897379579e-06,
"loss": 4.1719,
"step": 2490
},
{
"epoch": 1.6596152244524027,
"learning_rate": 3.7203784360663575e-06,
"loss": 4.1182,
"step": 2491
},
{
"epoch": 1.6602815024569,
"learning_rate": 3.7062297591978155e-06,
"loss": 4.1725,
"step": 2492
},
{
"epoch": 1.6609477804613975,
"learning_rate": 3.692105883252964e-06,
"loss": 4.2103,
"step": 2493
},
{
"epoch": 1.661614058465895,
"learning_rate": 3.6780068246819233e-06,
"loss": 4.1382,
"step": 2494
},
{
"epoch": 1.6622803364703924,
"learning_rate": 3.6639325999059225e-06,
"loss": 4.1651,
"step": 2495
},
{
"epoch": 1.6629466144748897,
"learning_rate": 3.649883225317255e-06,
"loss": 4.1335,
"step": 2496
},
{
"epoch": 1.663612892479387,
"learning_rate": 3.635858717279267e-06,
"loss": 4.1836,
"step": 2497
},
{
"epoch": 1.6642791704838844,
"learning_rate": 3.6218590921263534e-06,
"loss": 4.2023,
"step": 2498
},
{
"epoch": 1.6649454484883819,
"learning_rate": 3.6078843661639246e-06,
"loss": 4.0935,
"step": 2499
},
{
"epoch": 1.6656117264928791,
"learning_rate": 3.593934555668385e-06,
"loss": 4.1237,
"step": 2500
},
{
"epoch": 1.6662780044973764,
"learning_rate": 3.580009676887122e-06,
"loss": 4.1255,
"step": 2501
},
{
"epoch": 1.6669442825018739,
"learning_rate": 3.5661097460384985e-06,
"loss": 4.0888,
"step": 2502
},
{
"epoch": 1.6676105605063714,
"learning_rate": 3.5522347793118027e-06,
"loss": 4.1469,
"step": 2503
},
{
"epoch": 1.6682768385108686,
"learning_rate": 3.538384792867247e-06,
"loss": 4.1726,
"step": 2504
},
{
"epoch": 1.668943116515366,
"learning_rate": 3.5245598028359666e-06,
"loss": 4.1061,
"step": 2505
},
{
"epoch": 1.6696093945198633,
"learning_rate": 3.5107598253199758e-06,
"loss": 4.1712,
"step": 2506
},
{
"epoch": 1.6702756725243608,
"learning_rate": 3.496984876392137e-06,
"loss": 4.1276,
"step": 2507
},
{
"epoch": 1.6709419505288583,
"learning_rate": 3.483234972096189e-06,
"loss": 4.1601,
"step": 2508
},
{
"epoch": 1.6716082285333556,
"learning_rate": 3.4695101284466903e-06,
"loss": 4.1901,
"step": 2509
},
{
"epoch": 1.6722745065378528,
"learning_rate": 3.455810361429004e-06,
"loss": 4.1276,
"step": 2510
},
{
"epoch": 1.6729407845423503,
"learning_rate": 3.4421356869993037e-06,
"loss": 4.1992,
"step": 2511
},
{
"epoch": 1.6736070625468478,
"learning_rate": 3.428486121084523e-06,
"loss": 4.1446,
"step": 2512
},
{
"epoch": 1.674273340551345,
"learning_rate": 3.414861679582357e-06,
"loss": 4.1624,
"step": 2513
},
{
"epoch": 1.6749396185558423,
"learning_rate": 3.4012623783612333e-06,
"loss": 4.1902,
"step": 2514
},
{
"epoch": 1.6756058965603398,
"learning_rate": 3.3876882332603027e-06,
"loss": 4.1994,
"step": 2515
},
{
"epoch": 1.6762721745648372,
"learning_rate": 3.374139260089415e-06,
"loss": 4.0897,
"step": 2516
},
{
"epoch": 1.6769384525693347,
"learning_rate": 3.3606154746291114e-06,
"loss": 4.135,
"step": 2517
},
{
"epoch": 1.677604730573832,
"learning_rate": 3.3471168926305864e-06,
"loss": 4.1372,
"step": 2518
},
{
"epoch": 1.6782710085783292,
"learning_rate": 3.3336435298156775e-06,
"loss": 4.1175,
"step": 2519
},
{
"epoch": 1.6789372865828267,
"learning_rate": 3.3201954018768575e-06,
"loss": 4.1257,
"step": 2520
},
{
"epoch": 1.6796035645873242,
"learning_rate": 3.3067725244772058e-06,
"loss": 4.1555,
"step": 2521
},
{
"epoch": 1.6802698425918214,
"learning_rate": 3.2933749132503826e-06,
"loss": 4.178,
"step": 2522
},
{
"epoch": 1.6809361205963187,
"learning_rate": 3.2800025838006453e-06,
"loss": 4.0662,
"step": 2523
},
{
"epoch": 1.6816023986008162,
"learning_rate": 3.2666555517027796e-06,
"loss": 4.1526,
"step": 2524
},
{
"epoch": 1.6822686766053137,
"learning_rate": 3.25333383250212e-06,
"loss": 4.0991,
"step": 2525
},
{
"epoch": 1.682934954609811,
"learning_rate": 3.2400374417145135e-06,
"loss": 4.1516,
"step": 2526
},
{
"epoch": 1.6836012326143082,
"learning_rate": 3.2267663948263137e-06,
"loss": 4.1127,
"step": 2527
},
{
"epoch": 1.6842675106188056,
"learning_rate": 3.2135207072943512e-06,
"loss": 4.1672,
"step": 2528
},
{
"epoch": 1.6849337886233031,
"learning_rate": 3.200300394545913e-06,
"loss": 4.1527,
"step": 2529
},
{
"epoch": 1.6856000666278006,
"learning_rate": 3.1871054719787546e-06,
"loss": 4.1435,
"step": 2530
},
{
"epoch": 1.6862663446322979,
"learning_rate": 3.173935954961038e-06,
"loss": 4.1503,
"step": 2531
},
{
"epoch": 1.686932622636795,
"learning_rate": 3.1607918588313385e-06,
"loss": 4.1579,
"step": 2532
},
{
"epoch": 1.6875989006412926,
"learning_rate": 3.1476731988986456e-06,
"loss": 4.1658,
"step": 2533
},
{
"epoch": 1.68826517864579,
"learning_rate": 3.134579990442285e-06,
"loss": 4.1869,
"step": 2534
},
{
"epoch": 1.6889314566502873,
"learning_rate": 3.121512248711961e-06,
"loss": 4.1413,
"step": 2535
},
{
"epoch": 1.6895977346547846,
"learning_rate": 3.1084699889277284e-06,
"loss": 4.1717,
"step": 2536
},
{
"epoch": 1.690264012659282,
"learning_rate": 3.0954532262799404e-06,
"loss": 4.1521,
"step": 2537
},
{
"epoch": 1.6909302906637795,
"learning_rate": 3.08246197592926e-06,
"loss": 4.1462,
"step": 2538
},
{
"epoch": 1.6915965686682768,
"learning_rate": 3.069496253006651e-06,
"loss": 4.1451,
"step": 2539
},
{
"epoch": 1.6922628466727743,
"learning_rate": 3.056556072613323e-06,
"loss": 4.1559,
"step": 2540
},
{
"epoch": 1.6929291246772715,
"learning_rate": 3.04364144982075e-06,
"loss": 4.1708,
"step": 2541
},
{
"epoch": 1.693595402681769,
"learning_rate": 3.030752399670636e-06,
"loss": 4.0608,
"step": 2542
},
{
"epoch": 1.6942616806862665,
"learning_rate": 3.0178889371748953e-06,
"loss": 4.1698,
"step": 2543
},
{
"epoch": 1.6949279586907637,
"learning_rate": 3.005051077315643e-06,
"loss": 4.1663,
"step": 2544
},
{
"epoch": 1.695594236695261,
"learning_rate": 2.9922388350451886e-06,
"loss": 4.1494,
"step": 2545
},
{
"epoch": 1.6962605146997585,
"learning_rate": 2.9794522252859836e-06,
"loss": 4.1646,
"step": 2546
},
{
"epoch": 1.696926792704256,
"learning_rate": 2.966691262930635e-06,
"loss": 4.1354,
"step": 2547
},
{
"epoch": 1.6975930707087532,
"learning_rate": 2.9539559628418785e-06,
"loss": 4.1843,
"step": 2548
},
{
"epoch": 1.6982593487132505,
"learning_rate": 2.9412463398525577e-06,
"loss": 4.1718,
"step": 2549
},
{
"epoch": 1.698925626717748,
"learning_rate": 2.9285624087656142e-06,
"loss": 4.1476,
"step": 2550
},
{
"epoch": 1.6995919047222454,
"learning_rate": 2.915904184354057e-06,
"loss": 4.1648,
"step": 2551
},
{
"epoch": 1.700258182726743,
"learning_rate": 2.9032716813609723e-06,
"loss": 4.1797,
"step": 2552
},
{
"epoch": 1.7009244607312402,
"learning_rate": 2.8906649144994747e-06,
"loss": 4.1656,
"step": 2553
},
{
"epoch": 1.7015907387357374,
"learning_rate": 2.878083898452702e-06,
"loss": 4.1565,
"step": 2554
},
{
"epoch": 1.7022570167402349,
"learning_rate": 2.865528647873808e-06,
"loss": 4.1559,
"step": 2555
},
{
"epoch": 1.7029232947447324,
"learning_rate": 2.8529991773859314e-06,
"loss": 4.1851,
"step": 2556
},
{
"epoch": 1.7035895727492296,
"learning_rate": 2.8404955015821884e-06,
"loss": 4.1604,
"step": 2557
},
{
"epoch": 1.7042558507537269,
"learning_rate": 2.8280176350256536e-06,
"loss": 4.1478,
"step": 2558
},
{
"epoch": 1.7049221287582244,
"learning_rate": 2.8155655922493363e-06,
"loss": 4.2111,
"step": 2559
},
{
"epoch": 1.7055884067627218,
"learning_rate": 2.8031393877561706e-06,
"loss": 4.1227,
"step": 2560
},
{
"epoch": 1.706254684767219,
"learning_rate": 2.790739036019005e-06,
"loss": 4.161,
"step": 2561
},
{
"epoch": 1.7069209627717163,
"learning_rate": 2.7783645514805614e-06,
"loss": 4.2107,
"step": 2562
},
{
"epoch": 1.7075872407762138,
"learning_rate": 2.7660159485534383e-06,
"loss": 4.1659,
"step": 2563
},
{
"epoch": 1.7082535187807113,
"learning_rate": 2.753693241620106e-06,
"loss": 4.1097,
"step": 2564
},
{
"epoch": 1.7089197967852088,
"learning_rate": 2.7413964450328537e-06,
"loss": 4.0755,
"step": 2565
},
{
"epoch": 1.709586074789706,
"learning_rate": 2.7291255731138e-06,
"loss": 4.0998,
"step": 2566
},
{
"epoch": 1.7102523527942033,
"learning_rate": 2.7168806401548756e-06,
"loss": 4.1506,
"step": 2567
},
{
"epoch": 1.7109186307987008,
"learning_rate": 2.70466166041779e-06,
"loss": 4.174,
"step": 2568
},
{
"epoch": 1.7115849088031982,
"learning_rate": 2.692468648134028e-06,
"loss": 4.1685,
"step": 2569
},
{
"epoch": 1.7122511868076955,
"learning_rate": 2.6803016175048323e-06,
"loss": 4.2036,
"step": 2570
},
{
"epoch": 1.7129174648121928,
"learning_rate": 2.668160582701182e-06,
"loss": 4.1263,
"step": 2571
},
{
"epoch": 1.7135837428166902,
"learning_rate": 2.656045557863776e-06,
"loss": 4.1998,
"step": 2572
},
{
"epoch": 1.7142500208211877,
"learning_rate": 2.6439565571030334e-06,
"loss": 4.1586,
"step": 2573
},
{
"epoch": 1.714916298825685,
"learning_rate": 2.6318935944990464e-06,
"loss": 4.1617,
"step": 2574
},
{
"epoch": 1.7155825768301824,
"learning_rate": 2.6198566841015877e-06,
"loss": 4.204,
"step": 2575
},
{
"epoch": 1.7162488548346797,
"learning_rate": 2.607845839930087e-06,
"loss": 4.1798,
"step": 2576
},
{
"epoch": 1.7169151328391772,
"learning_rate": 2.595861075973613e-06,
"loss": 4.164,
"step": 2577
},
{
"epoch": 1.7175814108436747,
"learning_rate": 2.5839024061908577e-06,
"loss": 4.1194,
"step": 2578
},
{
"epoch": 1.718247688848172,
"learning_rate": 2.571969844510122e-06,
"loss": 4.1183,
"step": 2579
},
{
"epoch": 1.7189139668526692,
"learning_rate": 2.560063404829305e-06,
"loss": 4.1914,
"step": 2580
},
{
"epoch": 1.7195802448571667,
"learning_rate": 2.5481831010158717e-06,
"loss": 4.0869,
"step": 2581
},
{
"epoch": 1.7202465228616641,
"learning_rate": 2.536328946906852e-06,
"loss": 4.1397,
"step": 2582
},
{
"epoch": 1.7209128008661614,
"learning_rate": 2.5245009563088174e-06,
"loss": 4.1423,
"step": 2583
},
{
"epoch": 1.7215790788706586,
"learning_rate": 2.512699142997868e-06,
"loss": 4.1848,
"step": 2584
},
{
"epoch": 1.7222453568751561,
"learning_rate": 2.5009235207196115e-06,
"loss": 4.1496,
"step": 2585
},
{
"epoch": 1.7229116348796536,
"learning_rate": 2.489174103189157e-06,
"loss": 4.1579,
"step": 2586
},
{
"epoch": 1.723577912884151,
"learning_rate": 2.477450904091089e-06,
"loss": 4.1521,
"step": 2587
},
{
"epoch": 1.7242441908886483,
"learning_rate": 2.4657539370794486e-06,
"loss": 4.1377,
"step": 2588
},
{
"epoch": 1.7249104688931456,
"learning_rate": 2.45408321577775e-06,
"loss": 4.1347,
"step": 2589
},
{
"epoch": 1.725576746897643,
"learning_rate": 2.4424387537789e-06,
"loss": 4.1486,
"step": 2590
},
{
"epoch": 1.7262430249021405,
"learning_rate": 2.4308205646452474e-06,
"loss": 4.1733,
"step": 2591
},
{
"epoch": 1.7269093029066378,
"learning_rate": 2.419228661908543e-06,
"loss": 4.1548,
"step": 2592
},
{
"epoch": 1.727575580911135,
"learning_rate": 2.4076630590699062e-06,
"loss": 4.1483,
"step": 2593
},
{
"epoch": 1.7282418589156325,
"learning_rate": 2.3961237695998285e-06,
"loss": 4.1432,
"step": 2594
},
{
"epoch": 1.72890813692013,
"learning_rate": 2.38461080693817e-06,
"loss": 4.1739,
"step": 2595
},
{
"epoch": 1.7295744149246273,
"learning_rate": 2.3731241844941076e-06,
"loss": 4.1157,
"step": 2596
},
{
"epoch": 1.7302406929291245,
"learning_rate": 2.3616639156461505e-06,
"loss": 4.1344,
"step": 2597
},
{
"epoch": 1.730906970933622,
"learning_rate": 2.3502300137421134e-06,
"loss": 4.1509,
"step": 2598
},
{
"epoch": 1.7315732489381195,
"learning_rate": 2.3388224920990938e-06,
"loss": 4.1598,
"step": 2599
},
{
"epoch": 1.732239526942617,
"learning_rate": 2.3274413640034657e-06,
"loss": 4.0994,
"step": 2600
},
{
"epoch": 1.7329058049471142,
"learning_rate": 2.31608664271088e-06,
"loss": 4.1663,
"step": 2601
},
{
"epoch": 1.7335720829516115,
"learning_rate": 2.304758341446209e-06,
"loss": 4.1659,
"step": 2602
},
{
"epoch": 1.734238360956109,
"learning_rate": 2.293456473403563e-06,
"loss": 4.1737,
"step": 2603
},
{
"epoch": 1.7349046389606064,
"learning_rate": 2.2821810517462656e-06,
"loss": 4.1779,
"step": 2604
},
{
"epoch": 1.7355709169651037,
"learning_rate": 2.270932089606834e-06,
"loss": 4.1117,
"step": 2605
},
{
"epoch": 1.736237194969601,
"learning_rate": 2.259709600086976e-06,
"loss": 4.109,
"step": 2606
},
{
"epoch": 1.7369034729740984,
"learning_rate": 2.248513596257554e-06,
"loss": 4.1293,
"step": 2607
},
{
"epoch": 1.737569750978596,
"learning_rate": 2.2373440911586e-06,
"loss": 4.1422,
"step": 2608
},
{
"epoch": 1.7382360289830932,
"learning_rate": 2.226201097799266e-06,
"loss": 4.1602,
"step": 2609
},
{
"epoch": 1.7389023069875904,
"learning_rate": 2.2150846291578376e-06,
"loss": 4.131,
"step": 2610
},
{
"epoch": 1.7395685849920879,
"learning_rate": 2.2039946981816996e-06,
"loss": 4.1167,
"step": 2611
},
{
"epoch": 1.7402348629965854,
"learning_rate": 2.1929313177873305e-06,
"loss": 4.1483,
"step": 2612
},
{
"epoch": 1.7409011410010828,
"learning_rate": 2.181894500860282e-06,
"loss": 4.0999,
"step": 2613
},
{
"epoch": 1.74156741900558,
"learning_rate": 2.170884260255179e-06,
"loss": 4.1824,
"step": 2614
},
{
"epoch": 1.7422336970100774,
"learning_rate": 2.1599006087956786e-06,
"loss": 4.2052,
"step": 2615
},
{
"epoch": 1.7428999750145748,
"learning_rate": 2.1489435592744743e-06,
"loss": 4.1807,
"step": 2616
},
{
"epoch": 1.7435662530190723,
"learning_rate": 2.138013124453289e-06,
"loss": 4.1477,
"step": 2617
},
{
"epoch": 1.7442325310235696,
"learning_rate": 2.1271093170628254e-06,
"loss": 4.1538,
"step": 2618
},
{
"epoch": 1.7448988090280668,
"learning_rate": 2.116232149802777e-06,
"loss": 4.1106,
"step": 2619
},
{
"epoch": 1.7455650870325643,
"learning_rate": 2.1053816353418326e-06,
"loss": 4.1804,
"step": 2620
},
{
"epoch": 1.7462313650370618,
"learning_rate": 2.094557786317611e-06,
"loss": 4.1575,
"step": 2621
},
{
"epoch": 1.7468976430415593,
"learning_rate": 2.0837606153366827e-06,
"loss": 4.1377,
"step": 2622
},
{
"epoch": 1.7475639210460565,
"learning_rate": 2.072990134974559e-06,
"loss": 4.1385,
"step": 2623
},
{
"epoch": 1.7482301990505538,
"learning_rate": 2.062246357775649e-06,
"loss": 4.1718,
"step": 2624
},
{
"epoch": 1.7488964770550512,
"learning_rate": 2.0515292962532545e-06,
"loss": 4.142,
"step": 2625
},
{
"epoch": 1.7495627550595487,
"learning_rate": 2.040838962889585e-06,
"loss": 4.1355,
"step": 2626
},
{
"epoch": 1.750229033064046,
"learning_rate": 2.0301753701357034e-06,
"loss": 4.1449,
"step": 2627
},
{
"epoch": 1.7508953110685432,
"learning_rate": 2.0195385304115243e-06,
"loss": 4.1414,
"step": 2628
},
{
"epoch": 1.7515615890730407,
"learning_rate": 2.0089284561058213e-06,
"loss": 4.2214,
"step": 2629
},
{
"epoch": 1.7522278670775382,
"learning_rate": 1.998345159576173e-06,
"loss": 4.1859,
"step": 2630
},
{
"epoch": 1.7528941450820354,
"learning_rate": 1.987788653148984e-06,
"loss": 4.1578,
"step": 2631
},
{
"epoch": 1.7535604230865327,
"learning_rate": 1.977258949119451e-06,
"loss": 4.143,
"step": 2632
},
{
"epoch": 1.7542267010910302,
"learning_rate": 1.966756059751554e-06,
"loss": 4.1228,
"step": 2633
},
{
"epoch": 1.7548929790955277,
"learning_rate": 1.956279997278043e-06,
"loss": 4.1847,
"step": 2634
},
{
"epoch": 1.7555592571000251,
"learning_rate": 1.9458307739004174e-06,
"loss": 4.148,
"step": 2635
},
{
"epoch": 1.7562255351045224,
"learning_rate": 1.9354084017889324e-06,
"loss": 4.1016,
"step": 2636
},
{
"epoch": 1.7568918131090197,
"learning_rate": 1.9250128930825504e-06,
"loss": 4.1501,
"step": 2637
},
{
"epoch": 1.7575580911135171,
"learning_rate": 1.9146442598889564e-06,
"loss": 4.168,
"step": 2638
},
{
"epoch": 1.7582243691180146,
"learning_rate": 1.904302514284531e-06,
"loss": 4.1663,
"step": 2639
},
{
"epoch": 1.7588906471225119,
"learning_rate": 1.8939876683143398e-06,
"loss": 4.159,
"step": 2640
},
{
"epoch": 1.7595569251270091,
"learning_rate": 1.8836997339921143e-06,
"loss": 4.1096,
"step": 2641
},
{
"epoch": 1.7602232031315066,
"learning_rate": 1.8734387233002525e-06,
"loss": 4.1918,
"step": 2642
},
{
"epoch": 1.760889481136004,
"learning_rate": 1.8632046481897813e-06,
"loss": 4.1083,
"step": 2643
},
{
"epoch": 1.7615557591405013,
"learning_rate": 1.8529975205803628e-06,
"loss": 4.1539,
"step": 2644
},
{
"epoch": 1.7622220371449986,
"learning_rate": 1.8428173523602738e-06,
"loss": 4.1304,
"step": 2645
},
{
"epoch": 1.762888315149496,
"learning_rate": 1.832664155386385e-06,
"loss": 4.134,
"step": 2646
},
{
"epoch": 1.7635545931539935,
"learning_rate": 1.8225379414841592e-06,
"loss": 4.1175,
"step": 2647
},
{
"epoch": 1.764220871158491,
"learning_rate": 1.8124387224476347e-06,
"loss": 4.1384,
"step": 2648
},
{
"epoch": 1.7648871491629883,
"learning_rate": 1.8023665100394022e-06,
"loss": 4.1378,
"step": 2649
},
{
"epoch": 1.7655534271674855,
"learning_rate": 1.792321315990597e-06,
"loss": 4.1247,
"step": 2650
},
{
"epoch": 1.766219705171983,
"learning_rate": 1.7823031520008943e-06,
"loss": 4.1653,
"step": 2651
},
{
"epoch": 1.7668859831764805,
"learning_rate": 1.7723120297384877e-06,
"loss": 4.1376,
"step": 2652
},
{
"epoch": 1.7675522611809777,
"learning_rate": 1.762347960840055e-06,
"loss": 4.1746,
"step": 2653
},
{
"epoch": 1.768218539185475,
"learning_rate": 1.7524109569107911e-06,
"loss": 4.1294,
"step": 2654
},
{
"epoch": 1.7688848171899725,
"learning_rate": 1.7425010295243543e-06,
"loss": 4.0916,
"step": 2655
},
{
"epoch": 1.76955109519447,
"learning_rate": 1.7326181902228623e-06,
"loss": 4.156,
"step": 2656
},
{
"epoch": 1.7702173731989672,
"learning_rate": 1.7227624505169044e-06,
"loss": 4.1364,
"step": 2657
},
{
"epoch": 1.7708836512034647,
"learning_rate": 1.7129338218854818e-06,
"loss": 4.1405,
"step": 2658
},
{
"epoch": 1.771549929207962,
"learning_rate": 1.703132315776035e-06,
"loss": 4.2146,
"step": 2659
},
{
"epoch": 1.7722162072124594,
"learning_rate": 1.6933579436044094e-06,
"loss": 4.1722,
"step": 2660
},
{
"epoch": 1.772882485216957,
"learning_rate": 1.6836107167548493e-06,
"loss": 4.1037,
"step": 2661
},
{
"epoch": 1.7735487632214542,
"learning_rate": 1.6738906465799759e-06,
"loss": 4.1115,
"step": 2662
},
{
"epoch": 1.7742150412259514,
"learning_rate": 1.6641977444007888e-06,
"loss": 4.2328,
"step": 2663
},
{
"epoch": 1.774881319230449,
"learning_rate": 1.6545320215066496e-06,
"loss": 4.16,
"step": 2664
},
{
"epoch": 1.7755475972349464,
"learning_rate": 1.6448934891552526e-06,
"loss": 4.1812,
"step": 2665
},
{
"epoch": 1.7762138752394436,
"learning_rate": 1.6352821585726264e-06,
"loss": 4.1562,
"step": 2666
},
{
"epoch": 1.7768801532439409,
"learning_rate": 1.6256980409531192e-06,
"loss": 4.1683,
"step": 2667
},
{
"epoch": 1.7775464312484384,
"learning_rate": 1.6161411474593878e-06,
"loss": 4.1129,
"step": 2668
},
{
"epoch": 1.7782127092529358,
"learning_rate": 1.6066114892223676e-06,
"loss": 4.1318,
"step": 2669
},
{
"epoch": 1.7788789872574333,
"learning_rate": 1.5971090773412966e-06,
"loss": 4.1885,
"step": 2670
},
{
"epoch": 1.7795452652619306,
"learning_rate": 1.5876339228836579e-06,
"loss": 4.1311,
"step": 2671
},
{
"epoch": 1.7802115432664278,
"learning_rate": 1.578186036885193e-06,
"loss": 4.1314,
"step": 2672
},
{
"epoch": 1.7808778212709253,
"learning_rate": 1.5687654303498889e-06,
"loss": 4.1178,
"step": 2673
},
{
"epoch": 1.7815440992754228,
"learning_rate": 1.5593721142499545e-06,
"loss": 4.1938,
"step": 2674
},
{
"epoch": 1.78221037727992,
"learning_rate": 1.5500060995258137e-06,
"loss": 4.1825,
"step": 2675
},
{
"epoch": 1.7828766552844173,
"learning_rate": 1.5406673970861012e-06,
"loss": 4.1473,
"step": 2676
},
{
"epoch": 1.7835429332889148,
"learning_rate": 1.5313560178076307e-06,
"loss": 4.1953,
"step": 2677
},
{
"epoch": 1.7842092112934123,
"learning_rate": 1.522071972535391e-06,
"loss": 4.129,
"step": 2678
},
{
"epoch": 1.7848754892979095,
"learning_rate": 1.5128152720825462e-06,
"loss": 4.1813,
"step": 2679
},
{
"epoch": 1.7855417673024068,
"learning_rate": 1.503585927230411e-06,
"loss": 4.1631,
"step": 2680
},
{
"epoch": 1.7862080453069042,
"learning_rate": 1.4943839487284173e-06,
"loss": 4.109,
"step": 2681
},
{
"epoch": 1.7868743233114017,
"learning_rate": 1.485209347294153e-06,
"loss": 4.149,
"step": 2682
},
{
"epoch": 1.7875406013158992,
"learning_rate": 1.4760621336133013e-06,
"loss": 4.1707,
"step": 2683
},
{
"epoch": 1.7882068793203965,
"learning_rate": 1.4669423183396508e-06,
"loss": 4.1598,
"step": 2684
},
{
"epoch": 1.7888731573248937,
"learning_rate": 1.4578499120950829e-06,
"loss": 4.1886,
"step": 2685
},
{
"epoch": 1.7895394353293912,
"learning_rate": 1.448784925469554e-06,
"loss": 4.1403,
"step": 2686
},
{
"epoch": 1.7902057133338887,
"learning_rate": 1.43974736902108e-06,
"loss": 4.1691,
"step": 2687
},
{
"epoch": 1.790871991338386,
"learning_rate": 1.4307372532757324e-06,
"loss": 4.1826,
"step": 2688
},
{
"epoch": 1.7915382693428832,
"learning_rate": 1.4217545887276251e-06,
"loss": 4.2406,
"step": 2689
},
{
"epoch": 1.7922045473473807,
"learning_rate": 1.412799385838895e-06,
"loss": 4.1845,
"step": 2690
},
{
"epoch": 1.7928708253518781,
"learning_rate": 1.403871655039693e-06,
"loss": 4.1568,
"step": 2691
},
{
"epoch": 1.7935371033563754,
"learning_rate": 1.394971406728185e-06,
"loss": 4.1694,
"step": 2692
},
{
"epoch": 1.7942033813608729,
"learning_rate": 1.386098651270512e-06,
"loss": 4.1339,
"step": 2693
},
{
"epoch": 1.7948696593653701,
"learning_rate": 1.3772533990008053e-06,
"loss": 4.1451,
"step": 2694
},
{
"epoch": 1.7955359373698676,
"learning_rate": 1.368435660221154e-06,
"loss": 4.1425,
"step": 2695
},
{
"epoch": 1.796202215374365,
"learning_rate": 1.3596454452016128e-06,
"loss": 4.1463,
"step": 2696
},
{
"epoch": 1.7968684933788623,
"learning_rate": 1.3508827641801669e-06,
"loss": 4.1108,
"step": 2697
},
{
"epoch": 1.7975347713833596,
"learning_rate": 1.3421476273627498e-06,
"loss": 4.1339,
"step": 2698
},
{
"epoch": 1.798201049387857,
"learning_rate": 1.3334400449231954e-06,
"loss": 4.1574,
"step": 2699
},
{
"epoch": 1.7988673273923546,
"learning_rate": 1.324760027003255e-06,
"loss": 4.1617,
"step": 2700
},
{
"epoch": 1.7995336053968518,
"learning_rate": 1.316107583712578e-06,
"loss": 4.2025,
"step": 2701
},
{
"epoch": 1.800199883401349,
"learning_rate": 1.3074827251286892e-06,
"loss": 4.1051,
"step": 2702
},
{
"epoch": 1.8008661614058465,
"learning_rate": 1.2988854612969863e-06,
"loss": 4.1306,
"step": 2703
},
{
"epoch": 1.801532439410344,
"learning_rate": 1.2903158022307376e-06,
"loss": 4.1393,
"step": 2704
},
{
"epoch": 1.8021987174148415,
"learning_rate": 1.2817737579110506e-06,
"loss": 4.1759,
"step": 2705
},
{
"epoch": 1.8028649954193388,
"learning_rate": 1.2732593382868668e-06,
"loss": 4.1367,
"step": 2706
},
{
"epoch": 1.803531273423836,
"learning_rate": 1.2647725532749732e-06,
"loss": 4.1571,
"step": 2707
},
{
"epoch": 1.8041975514283335,
"learning_rate": 1.2563134127599407e-06,
"loss": 4.1177,
"step": 2708
},
{
"epoch": 1.804863829432831,
"learning_rate": 1.2478819265941604e-06,
"loss": 4.1713,
"step": 2709
},
{
"epoch": 1.8055301074373282,
"learning_rate": 1.2394781045978188e-06,
"loss": 4.1605,
"step": 2710
},
{
"epoch": 1.8061963854418255,
"learning_rate": 1.2311019565588694e-06,
"loss": 4.1477,
"step": 2711
},
{
"epoch": 1.806862663446323,
"learning_rate": 1.2227534922330391e-06,
"loss": 4.1815,
"step": 2712
},
{
"epoch": 1.8075289414508204,
"learning_rate": 1.2144327213438138e-06,
"loss": 4.1544,
"step": 2713
},
{
"epoch": 1.8081952194553177,
"learning_rate": 1.2061396535824249e-06,
"loss": 4.1721,
"step": 2714
},
{
"epoch": 1.808861497459815,
"learning_rate": 1.1978742986078316e-06,
"loss": 4.1233,
"step": 2715
},
{
"epoch": 1.8095277754643124,
"learning_rate": 1.1896366660467173e-06,
"loss": 4.1123,
"step": 2716
},
{
"epoch": 1.81019405346881,
"learning_rate": 1.1814267654934846e-06,
"loss": 4.1669,
"step": 2717
},
{
"epoch": 1.8108603314733074,
"learning_rate": 1.1732446065102292e-06,
"loss": 4.1485,
"step": 2718
},
{
"epoch": 1.8115266094778046,
"learning_rate": 1.1650901986267365e-06,
"loss": 4.1477,
"step": 2719
},
{
"epoch": 1.812192887482302,
"learning_rate": 1.1569635513404758e-06,
"loss": 4.1144,
"step": 2720
},
{
"epoch": 1.8128591654867994,
"learning_rate": 1.1488646741165787e-06,
"loss": 4.1732,
"step": 2721
},
{
"epoch": 1.8135254434912969,
"learning_rate": 1.140793576387833e-06,
"loss": 4.1297,
"step": 2722
},
{
"epoch": 1.814191721495794,
"learning_rate": 1.1327502675546748e-06,
"loss": 4.1103,
"step": 2723
},
{
"epoch": 1.8148579995002914,
"learning_rate": 1.1247347569851684e-06,
"loss": 4.1335,
"step": 2724
},
{
"epoch": 1.8155242775047888,
"learning_rate": 1.1167470540150048e-06,
"loss": 4.1703,
"step": 2725
},
{
"epoch": 1.8161905555092863,
"learning_rate": 1.1087871679474921e-06,
"loss": 4.1251,
"step": 2726
},
{
"epoch": 1.8168568335137836,
"learning_rate": 1.1008551080535334e-06,
"loss": 4.1214,
"step": 2727
},
{
"epoch": 1.817523111518281,
"learning_rate": 1.092950883571625e-06,
"loss": 4.1474,
"step": 2728
},
{
"epoch": 1.8181893895227783,
"learning_rate": 1.0850745037078419e-06,
"loss": 4.1792,
"step": 2729
},
{
"epoch": 1.8188556675272758,
"learning_rate": 1.0772259776358318e-06,
"loss": 4.1364,
"step": 2730
},
{
"epoch": 1.8195219455317733,
"learning_rate": 1.0694053144967936e-06,
"loss": 4.1854,
"step": 2731
},
{
"epoch": 1.8201882235362705,
"learning_rate": 1.0616125233994857e-06,
"loss": 4.1493,
"step": 2732
},
{
"epoch": 1.8208545015407678,
"learning_rate": 1.0538476134201919e-06,
"loss": 4.1946,
"step": 2733
},
{
"epoch": 1.8215207795452653,
"learning_rate": 1.046110593602731e-06,
"loss": 4.1756,
"step": 2734
},
{
"epoch": 1.8221870575497627,
"learning_rate": 1.038401472958439e-06,
"loss": 4.1122,
"step": 2735
},
{
"epoch": 1.82285333555426,
"learning_rate": 1.0307202604661448e-06,
"loss": 4.1285,
"step": 2736
},
{
"epoch": 1.8235196135587572,
"learning_rate": 1.0230669650721864e-06,
"loss": 4.1297,
"step": 2737
},
{
"epoch": 1.8241858915632547,
"learning_rate": 1.0154415956903834e-06,
"loss": 4.1373,
"step": 2738
},
{
"epoch": 1.8248521695677522,
"learning_rate": 1.0078441612020262e-06,
"loss": 4.1579,
"step": 2739
},
{
"epoch": 1.8255184475722497,
"learning_rate": 1.0002746704558725e-06,
"loss": 4.1326,
"step": 2740
},
{
"epoch": 1.826184725576747,
"learning_rate": 9.927331322681337e-07,
"loss": 4.1147,
"step": 2741
},
{
"epoch": 1.8268510035812442,
"learning_rate": 9.85219555422462e-07,
"loss": 4.1696,
"step": 2742
},
{
"epoch": 1.8275172815857417,
"learning_rate": 9.77733948669951e-07,
"loss": 4.1549,
"step": 2743
},
{
"epoch": 1.8281835595902391,
"learning_rate": 9.702763207290994e-07,
"loss": 4.1306,
"step": 2744
},
{
"epoch": 1.8288498375947364,
"learning_rate": 9.628466802858394e-07,
"loss": 4.1374,
"step": 2745
},
{
"epoch": 1.8295161155992337,
"learning_rate": 9.554450359934964e-07,
"loss": 4.2407,
"step": 2746
},
{
"epoch": 1.8301823936037311,
"learning_rate": 9.480713964727855e-07,
"loss": 4.1069,
"step": 2747
},
{
"epoch": 1.8308486716082286,
"learning_rate": 9.40725770311815e-07,
"loss": 4.2067,
"step": 2748
},
{
"epoch": 1.8315149496127259,
"learning_rate": 9.334081660660577e-07,
"loss": 4.1821,
"step": 2749
},
{
"epoch": 1.8321812276172231,
"learning_rate": 9.261185922583488e-07,
"loss": 4.1194,
"step": 2750
},
{
"epoch": 1.8328475056217206,
"learning_rate": 9.188570573788801e-07,
"loss": 4.1765,
"step": 2751
},
{
"epoch": 1.833513783626218,
"learning_rate": 9.116235698851866e-07,
"loss": 4.1414,
"step": 2752
},
{
"epoch": 1.8341800616307156,
"learning_rate": 9.044181382021289e-07,
"loss": 4.1696,
"step": 2753
},
{
"epoch": 1.8348463396352128,
"learning_rate": 8.972407707219049e-07,
"loss": 4.1508,
"step": 2754
},
{
"epoch": 1.83551261763971,
"learning_rate": 8.900914758040141e-07,
"loss": 4.1665,
"step": 2755
},
{
"epoch": 1.8361788956442076,
"learning_rate": 8.829702617752622e-07,
"loss": 4.1618,
"step": 2756
},
{
"epoch": 1.836845173648705,
"learning_rate": 8.758771369297536e-07,
"loss": 4.1606,
"step": 2757
},
{
"epoch": 1.8375114516532023,
"learning_rate": 8.688121095288715e-07,
"loss": 4.0852,
"step": 2758
},
{
"epoch": 1.8381777296576995,
"learning_rate": 8.617751878012726e-07,
"loss": 4.124,
"step": 2759
},
{
"epoch": 1.838844007662197,
"learning_rate": 8.547663799428924e-07,
"loss": 4.1193,
"step": 2760
},
{
"epoch": 1.8395102856666945,
"learning_rate": 8.477856941169066e-07,
"loss": 4.1405,
"step": 2761
},
{
"epoch": 1.8401765636711918,
"learning_rate": 8.408331384537393e-07,
"loss": 4.1744,
"step": 2762
},
{
"epoch": 1.8408428416756892,
"learning_rate": 8.339087210510632e-07,
"loss": 4.1227,
"step": 2763
},
{
"epoch": 1.8415091196801865,
"learning_rate": 8.270124499737631e-07,
"loss": 4.1077,
"step": 2764
},
{
"epoch": 1.842175397684684,
"learning_rate": 8.201443332539499e-07,
"loss": 4.148,
"step": 2765
},
{
"epoch": 1.8428416756891814,
"learning_rate": 8.133043788909417e-07,
"loss": 4.1869,
"step": 2766
},
{
"epoch": 1.8435079536936787,
"learning_rate": 8.064925948512575e-07,
"loss": 4.1648,
"step": 2767
},
{
"epoch": 1.844174231698176,
"learning_rate": 7.99708989068601e-07,
"loss": 4.1334,
"step": 2768
},
{
"epoch": 1.8448405097026734,
"learning_rate": 7.929535694438661e-07,
"loss": 4.148,
"step": 2769
},
{
"epoch": 1.845506787707171,
"learning_rate": 7.86226343845109e-07,
"loss": 4.1497,
"step": 2770
},
{
"epoch": 1.8461730657116682,
"learning_rate": 7.795273201075454e-07,
"loss": 4.0929,
"step": 2771
},
{
"epoch": 1.8468393437161654,
"learning_rate": 7.728565060335563e-07,
"loss": 4.1003,
"step": 2772
},
{
"epoch": 1.847505621720663,
"learning_rate": 7.662139093926601e-07,
"loss": 4.1927,
"step": 2773
},
{
"epoch": 1.8481718997251604,
"learning_rate": 7.595995379215098e-07,
"loss": 4.0996,
"step": 2774
},
{
"epoch": 1.8488381777296579,
"learning_rate": 7.530133993238847e-07,
"loss": 4.1406,
"step": 2775
},
{
"epoch": 1.8495044557341551,
"learning_rate": 7.464555012706847e-07,
"loss": 4.1598,
"step": 2776
},
{
"epoch": 1.8501707337386524,
"learning_rate": 7.399258513999113e-07,
"loss": 4.1274,
"step": 2777
},
{
"epoch": 1.8508370117431499,
"learning_rate": 7.334244573166726e-07,
"loss": 4.1558,
"step": 2778
},
{
"epoch": 1.8515032897476473,
"learning_rate": 7.269513265931644e-07,
"loss": 4.1426,
"step": 2779
},
{
"epoch": 1.8521695677521446,
"learning_rate": 7.205064667686584e-07,
"loss": 4.1362,
"step": 2780
},
{
"epoch": 1.8528358457566418,
"learning_rate": 7.140898853495032e-07,
"loss": 4.163,
"step": 2781
},
{
"epoch": 1.8535021237611393,
"learning_rate": 7.077015898091177e-07,
"loss": 4.1926,
"step": 2782
},
{
"epoch": 1.8541684017656368,
"learning_rate": 7.01341587587967e-07,
"loss": 4.1262,
"step": 2783
},
{
"epoch": 1.854834679770134,
"learning_rate": 6.9500988609357e-07,
"loss": 4.1547,
"step": 2784
},
{
"epoch": 1.8555009577746313,
"learning_rate": 6.88706492700475e-07,
"loss": 4.1259,
"step": 2785
},
{
"epoch": 1.8561672357791288,
"learning_rate": 6.824314147502703e-07,
"loss": 4.1335,
"step": 2786
},
{
"epoch": 1.8568335137836263,
"learning_rate": 6.761846595515515e-07,
"loss": 4.1376,
"step": 2787
},
{
"epoch": 1.8574997917881237,
"learning_rate": 6.699662343799428e-07,
"loss": 4.1658,
"step": 2788
},
{
"epoch": 1.858166069792621,
"learning_rate": 6.637761464780623e-07,
"loss": 4.1744,
"step": 2789
},
{
"epoch": 1.8588323477971183,
"learning_rate": 6.576144030555259e-07,
"loss": 4.1345,
"step": 2790
},
{
"epoch": 1.8594986258016157,
"learning_rate": 6.514810112889319e-07,
"loss": 4.183,
"step": 2791
},
{
"epoch": 1.8601649038061132,
"learning_rate": 6.453759783218688e-07,
"loss": 4.1296,
"step": 2792
},
{
"epoch": 1.8608311818106105,
"learning_rate": 6.392993112648793e-07,
"loss": 4.136,
"step": 2793
},
{
"epoch": 1.8614974598151077,
"learning_rate": 6.332510171954853e-07,
"loss": 4.1041,
"step": 2794
},
{
"epoch": 1.8621637378196052,
"learning_rate": 6.272311031581518e-07,
"loss": 4.1936,
"step": 2795
},
{
"epoch": 1.8628300158241027,
"learning_rate": 6.212395761642897e-07,
"loss": 4.1364,
"step": 2796
},
{
"epoch": 1.8634962938286,
"learning_rate": 6.152764431922586e-07,
"loss": 4.1739,
"step": 2797
},
{
"epoch": 1.8641625718330974,
"learning_rate": 6.093417111873306e-07,
"loss": 4.1264,
"step": 2798
},
{
"epoch": 1.8648288498375947,
"learning_rate": 6.034353870617127e-07,
"loss": 4.1592,
"step": 2799
},
{
"epoch": 1.8654951278420921,
"learning_rate": 5.975574776945103e-07,
"loss": 4.1894,
"step": 2800
},
{
"epoch": 1.8661614058465896,
"learning_rate": 5.917079899317557e-07,
"loss": 4.1448,
"step": 2801
},
{
"epoch": 1.8668276838510869,
"learning_rate": 5.858869305863601e-07,
"loss": 4.1653,
"step": 2802
},
{
"epoch": 1.8674939618555841,
"learning_rate": 5.800943064381282e-07,
"loss": 4.213,
"step": 2803
},
{
"epoch": 1.8681602398600816,
"learning_rate": 5.743301242337546e-07,
"loss": 4.1559,
"step": 2804
},
{
"epoch": 1.868826517864579,
"learning_rate": 5.685943906867996e-07,
"loss": 4.163,
"step": 2805
},
{
"epoch": 1.8694927958690764,
"learning_rate": 5.628871124776863e-07,
"loss": 4.1862,
"step": 2806
},
{
"epoch": 1.8701590738735736,
"learning_rate": 5.572082962537056e-07,
"loss": 4.1827,
"step": 2807
},
{
"epoch": 1.870825351878071,
"learning_rate": 5.515579486289891e-07,
"loss": 4.147,
"step": 2808
},
{
"epoch": 1.8714916298825686,
"learning_rate": 5.459360761845139e-07,
"loss": 4.1508,
"step": 2809
},
{
"epoch": 1.872157907887066,
"learning_rate": 5.403426854680982e-07,
"loss": 4.1067,
"step": 2810
},
{
"epoch": 1.8728241858915633,
"learning_rate": 5.347777829943835e-07,
"loss": 4.146,
"step": 2811
},
{
"epoch": 1.8734904638960606,
"learning_rate": 5.292413752448239e-07,
"loss": 4.1318,
"step": 2812
},
{
"epoch": 1.874156741900558,
"learning_rate": 5.237334686676948e-07,
"loss": 4.1479,
"step": 2813
},
{
"epoch": 1.8748230199050555,
"learning_rate": 5.18254069678073e-07,
"loss": 4.1064,
"step": 2814
},
{
"epoch": 1.8754892979095528,
"learning_rate": 5.128031846578285e-07,
"loss": 4.1443,
"step": 2815
},
{
"epoch": 1.87615557591405,
"learning_rate": 5.073808199556329e-07,
"loss": 4.1644,
"step": 2816
},
{
"epoch": 1.8768218539185475,
"learning_rate": 5.019869818869261e-07,
"loss": 4.0891,
"step": 2817
},
{
"epoch": 1.877488131923045,
"learning_rate": 4.966216767339299e-07,
"loss": 4.1562,
"step": 2818
},
{
"epoch": 1.8781544099275422,
"learning_rate": 4.912849107456318e-07,
"loss": 4.1529,
"step": 2819
},
{
"epoch": 1.8788206879320395,
"learning_rate": 4.859766901377849e-07,
"loss": 4.1639,
"step": 2820
},
{
"epoch": 1.879486965936537,
"learning_rate": 4.806970210928824e-07,
"loss": 4.1728,
"step": 2821
},
{
"epoch": 1.8801532439410344,
"learning_rate": 4.7544590976018324e-07,
"loss": 4.1312,
"step": 2822
},
{
"epoch": 1.880819521945532,
"learning_rate": 4.702233622556673e-07,
"loss": 4.166,
"step": 2823
},
{
"epoch": 1.8814857999500292,
"learning_rate": 4.650293846620496e-07,
"loss": 4.1841,
"step": 2824
},
{
"epoch": 1.8821520779545264,
"learning_rate": 4.598639830287799e-07,
"loss": 4.1688,
"step": 2825
},
{
"epoch": 1.882818355959024,
"learning_rate": 4.547271633720179e-07,
"loss": 4.1762,
"step": 2826
},
{
"epoch": 1.8834846339635214,
"learning_rate": 4.496189316746308e-07,
"loss": 4.2074,
"step": 2827
},
{
"epoch": 1.8841509119680186,
"learning_rate": 4.4453929388618976e-07,
"loss": 4.1532,
"step": 2828
},
{
"epoch": 1.884817189972516,
"learning_rate": 4.3948825592297347e-07,
"loss": 4.1062,
"step": 2829
},
{
"epoch": 1.8854834679770134,
"learning_rate": 4.344658236679372e-07,
"loss": 4.1571,
"step": 2830
},
{
"epoch": 1.8861497459815109,
"learning_rate": 4.294720029707211e-07,
"loss": 4.1198,
"step": 2831
},
{
"epoch": 1.8868160239860081,
"learning_rate": 4.2450679964765316e-07,
"loss": 4.1972,
"step": 2832
},
{
"epoch": 1.8874823019905054,
"learning_rate": 4.195702194817186e-07,
"loss": 4.218,
"step": 2833
},
{
"epoch": 1.8881485799950029,
"learning_rate": 4.146622682225626e-07,
"loss": 4.2091,
"step": 2834
},
{
"epoch": 1.8888148579995003,
"learning_rate": 4.097829515864987e-07,
"loss": 4.134,
"step": 2835
},
{
"epoch": 1.8894811360039978,
"learning_rate": 4.0493227525648105e-07,
"loss": 4.1305,
"step": 2836
},
{
"epoch": 1.890147414008495,
"learning_rate": 4.0011024488210703e-07,
"loss": 4.1516,
"step": 2837
},
{
"epoch": 1.8908136920129923,
"learning_rate": 3.953168660796119e-07,
"loss": 4.1092,
"step": 2838
},
{
"epoch": 1.8914799700174898,
"learning_rate": 3.905521444318605e-07,
"loss": 4.1737,
"step": 2839
},
{
"epoch": 1.8921462480219873,
"learning_rate": 3.8581608548833856e-07,
"loss": 4.1772,
"step": 2840
},
{
"epoch": 1.8928125260264845,
"learning_rate": 3.811086947651504e-07,
"loss": 4.1143,
"step": 2841
},
{
"epoch": 1.8934788040309818,
"learning_rate": 3.764299777450075e-07,
"loss": 4.1709,
"step": 2842
},
{
"epoch": 1.8941450820354793,
"learning_rate": 3.717799398772259e-07,
"loss": 4.1753,
"step": 2843
},
{
"epoch": 1.8948113600399767,
"learning_rate": 3.6715858657772604e-07,
"loss": 4.1413,
"step": 2844
},
{
"epoch": 1.8954776380444742,
"learning_rate": 3.6256592322900793e-07,
"loss": 4.1726,
"step": 2845
},
{
"epoch": 1.8961439160489715,
"learning_rate": 3.580019551801622e-07,
"loss": 4.1293,
"step": 2846
},
{
"epoch": 1.8968101940534687,
"learning_rate": 3.5346668774685897e-07,
"loss": 4.1837,
"step": 2847
},
{
"epoch": 1.8974764720579662,
"learning_rate": 3.489601262113368e-07,
"loss": 4.1904,
"step": 2848
},
{
"epoch": 1.8981427500624637,
"learning_rate": 3.4448227582240257e-07,
"loss": 4.1837,
"step": 2849
},
{
"epoch": 1.898809028066961,
"learning_rate": 3.400331417954289e-07,
"loss": 4.1112,
"step": 2850
},
{
"epoch": 1.8994753060714582,
"learning_rate": 3.35612729312329e-07,
"loss": 4.1321,
"step": 2851
},
{
"epoch": 1.9001415840759557,
"learning_rate": 3.3122104352157626e-07,
"loss": 4.1549,
"step": 2852
},
{
"epoch": 1.9008078620804532,
"learning_rate": 3.268580895381762e-07,
"loss": 4.1446,
"step": 2853
},
{
"epoch": 1.9014741400849504,
"learning_rate": 3.225238724436863e-07,
"loss": 4.1812,
"step": 2854
},
{
"epoch": 1.9021404180894477,
"learning_rate": 3.182183972861713e-07,
"loss": 4.1392,
"step": 2855
},
{
"epoch": 1.9028066960939451,
"learning_rate": 3.1394166908023936e-07,
"loss": 4.1523,
"step": 2856
},
{
"epoch": 1.9034729740984426,
"learning_rate": 3.09693692807006e-07,
"loss": 4.1383,
"step": 2857
},
{
"epoch": 1.90413925210294,
"learning_rate": 3.0547447341410797e-07,
"loss": 4.1178,
"step": 2858
},
{
"epoch": 1.9048055301074374,
"learning_rate": 3.0128401581567824e-07,
"loss": 4.1357,
"step": 2859
},
{
"epoch": 1.9054718081119346,
"learning_rate": 2.971223248923599e-07,
"loss": 4.1673,
"step": 2860
},
{
"epoch": 1.906138086116432,
"learning_rate": 2.9298940549128964e-07,
"loss": 4.1446,
"step": 2861
},
{
"epoch": 1.9068043641209296,
"learning_rate": 2.8888526242608347e-07,
"loss": 4.181,
"step": 2862
},
{
"epoch": 1.9074706421254268,
"learning_rate": 2.8480990047686227e-07,
"loss": 4.1636,
"step": 2863
},
{
"epoch": 1.908136920129924,
"learning_rate": 2.807633243902041e-07,
"loss": 4.1482,
"step": 2864
},
{
"epoch": 1.9088031981344216,
"learning_rate": 2.7674553887917234e-07,
"loss": 4.1226,
"step": 2865
},
{
"epoch": 1.909469476138919,
"learning_rate": 2.727565486232986e-07,
"loss": 4.1825,
"step": 2866
},
{
"epoch": 1.9101357541434163,
"learning_rate": 2.687963582685665e-07,
"loss": 4.1094,
"step": 2867
},
{
"epoch": 1.9108020321479136,
"learning_rate": 2.6486497242742827e-07,
"loss": 4.1565,
"step": 2868
},
{
"epoch": 1.911468310152411,
"learning_rate": 2.6096239567877656e-07,
"loss": 4.1671,
"step": 2869
},
{
"epoch": 1.9121345881569085,
"learning_rate": 2.570886325679617e-07,
"loss": 4.1107,
"step": 2870
},
{
"epoch": 1.912800866161406,
"learning_rate": 2.5324368760676066e-07,
"loss": 4.1423,
"step": 2871
},
{
"epoch": 1.9134671441659032,
"learning_rate": 2.494275652733968e-07,
"loss": 4.1561,
"step": 2872
},
{
"epoch": 1.9141334221704005,
"learning_rate": 2.456402700125232e-07,
"loss": 4.143,
"step": 2873
},
{
"epoch": 1.914799700174898,
"learning_rate": 2.418818062352113e-07,
"loss": 4.1643,
"step": 2874
},
{
"epoch": 1.9154659781793955,
"learning_rate": 2.3815217831895943e-07,
"loss": 4.2009,
"step": 2875
},
{
"epoch": 1.9161322561838927,
"learning_rate": 2.344513906076734e-07,
"loss": 4.1067,
"step": 2876
},
{
"epoch": 1.91679853418839,
"learning_rate": 2.30779447411672e-07,
"loss": 4.1757,
"step": 2877
},
{
"epoch": 1.9174648121928874,
"learning_rate": 2.2713635300768422e-07,
"loss": 4.1784,
"step": 2878
},
{
"epoch": 1.918131090197385,
"learning_rate": 2.2352211163883253e-07,
"loss": 4.1617,
"step": 2879
},
{
"epoch": 1.9187973682018822,
"learning_rate": 2.1993672751463579e-07,
"loss": 4.0886,
"step": 2880
},
{
"epoch": 1.9194636462063797,
"learning_rate": 2.1638020481100086e-07,
"loss": 4.1535,
"step": 2881
},
{
"epoch": 1.920129924210877,
"learning_rate": 2.1285254767022255e-07,
"loss": 4.1984,
"step": 2882
},
{
"epoch": 1.9207962022153744,
"learning_rate": 2.0935376020097263e-07,
"loss": 4.1113,
"step": 2883
},
{
"epoch": 1.9214624802198719,
"learning_rate": 2.058838464783025e-07,
"loss": 4.1652,
"step": 2884
},
{
"epoch": 1.9221287582243691,
"learning_rate": 2.0244281054363213e-07,
"loss": 4.15,
"step": 2885
},
{
"epoch": 1.9227950362288664,
"learning_rate": 1.990306564047445e-07,
"loss": 4.135,
"step": 2886
},
{
"epoch": 1.9234613142333639,
"learning_rate": 1.956473880357912e-07,
"loss": 4.2053,
"step": 2887
},
{
"epoch": 1.9241275922378613,
"learning_rate": 1.9229300937727291e-07,
"loss": 4.1906,
"step": 2888
},
{
"epoch": 1.9247938702423586,
"learning_rate": 1.889675243360478e-07,
"loss": 4.1761,
"step": 2889
},
{
"epoch": 1.9254601482468559,
"learning_rate": 1.8567093678531212e-07,
"loss": 4.1709,
"step": 2890
},
{
"epoch": 1.9261264262513533,
"learning_rate": 1.8240325056462227e-07,
"loss": 4.1371,
"step": 2891
},
{
"epoch": 1.9267927042558508,
"learning_rate": 1.791644694798561e-07,
"loss": 4.1329,
"step": 2892
},
{
"epoch": 1.9274589822603483,
"learning_rate": 1.7595459730323505e-07,
"loss": 4.1657,
"step": 2893
},
{
"epoch": 1.9281252602648455,
"learning_rate": 1.7277363777330745e-07,
"loss": 4.1333,
"step": 2894
},
{
"epoch": 1.9287915382693428,
"learning_rate": 1.6962159459494588e-07,
"loss": 4.1404,
"step": 2895
},
{
"epoch": 1.9294578162738403,
"learning_rate": 1.6649847143934972e-07,
"loss": 4.1422,
"step": 2896
},
{
"epoch": 1.9301240942783378,
"learning_rate": 1.634042719440232e-07,
"loss": 4.1798,
"step": 2897
},
{
"epoch": 1.930790372282835,
"learning_rate": 1.6033899971279743e-07,
"loss": 4.1616,
"step": 2898
},
{
"epoch": 1.9314566502873323,
"learning_rate": 1.573026583158027e-07,
"loss": 4.1858,
"step": 2899
},
{
"epoch": 1.9321229282918297,
"learning_rate": 1.542952512894741e-07,
"loss": 4.1286,
"step": 2900
},
{
"epoch": 1.9327892062963272,
"learning_rate": 1.5131678213655133e-07,
"loss": 4.1216,
"step": 2901
},
{
"epoch": 1.9334554843008245,
"learning_rate": 1.4836725432606503e-07,
"loss": 4.1881,
"step": 2902
},
{
"epoch": 1.9341217623053217,
"learning_rate": 1.4544667129333944e-07,
"loss": 4.1873,
"step": 2903
},
{
"epoch": 1.9347880403098192,
"learning_rate": 1.425550364399897e-07,
"loss": 4.0843,
"step": 2904
},
{
"epoch": 1.9354543183143167,
"learning_rate": 1.3969235313390782e-07,
"loss": 4.1932,
"step": 2905
},
{
"epoch": 1.9361205963188142,
"learning_rate": 1.3685862470927403e-07,
"loss": 4.1585,
"step": 2906
},
{
"epoch": 1.9367868743233114,
"learning_rate": 1.3405385446654261e-07,
"loss": 4.1353,
"step": 2907
},
{
"epoch": 1.9374531523278087,
"learning_rate": 1.312780456724366e-07,
"loss": 4.099,
"step": 2908
},
{
"epoch": 1.9381194303323062,
"learning_rate": 1.285312015599477e-07,
"loss": 4.149,
"step": 2909
},
{
"epoch": 1.9387857083368036,
"learning_rate": 1.2581332532833613e-07,
"loss": 4.1043,
"step": 2910
},
{
"epoch": 1.939451986341301,
"learning_rate": 1.2312442014311977e-07,
"loss": 4.2044,
"step": 2911
},
{
"epoch": 1.9401182643457981,
"learning_rate": 1.2046448913607678e-07,
"loss": 4.1839,
"step": 2912
},
{
"epoch": 1.9407845423502956,
"learning_rate": 1.1783353540523733e-07,
"loss": 4.1679,
"step": 2913
},
{
"epoch": 1.941450820354793,
"learning_rate": 1.1523156201488361e-07,
"loss": 4.1182,
"step": 2914
},
{
"epoch": 1.9421170983592904,
"learning_rate": 1.1265857199553864e-07,
"loss": 4.1569,
"step": 2915
},
{
"epoch": 1.9427833763637878,
"learning_rate": 1.101145683439747e-07,
"loss": 4.1428,
"step": 2916
},
{
"epoch": 1.943449654368285,
"learning_rate": 1.0759955402320221e-07,
"loss": 4.1166,
"step": 2917
},
{
"epoch": 1.9441159323727826,
"learning_rate": 1.0511353196246132e-07,
"loss": 4.1682,
"step": 2918
},
{
"epoch": 1.94478221037728,
"learning_rate": 1.0265650505723589e-07,
"loss": 4.1749,
"step": 2919
},
{
"epoch": 1.9454484883817773,
"learning_rate": 1.0022847616923126e-07,
"loss": 4.1354,
"step": 2920
},
{
"epoch": 1.9461147663862746,
"learning_rate": 9.782944812637973e-08,
"loss": 4.1626,
"step": 2921
},
{
"epoch": 1.946781044390772,
"learning_rate": 9.545942372283789e-08,
"loss": 4.1018,
"step": 2922
},
{
"epoch": 1.9474473223952695,
"learning_rate": 9.311840571898101e-08,
"loss": 4.1339,
"step": 2923
},
{
"epoch": 1.9481136003997668,
"learning_rate": 9.080639684139747e-08,
"loss": 4.1663,
"step": 2924
},
{
"epoch": 1.948779878404264,
"learning_rate": 8.852339978289714e-08,
"loss": 4.1958,
"step": 2925
},
{
"epoch": 1.9494461564087615,
"learning_rate": 8.626941720249193e-08,
"loss": 4.1762,
"step": 2926
},
{
"epoch": 1.950112434413259,
"learning_rate": 8.404445172539854e-08,
"loss": 4.1507,
"step": 2927
},
{
"epoch": 1.9507787124177565,
"learning_rate": 8.184850594304683e-08,
"loss": 4.1392,
"step": 2928
},
{
"epoch": 1.9514449904222537,
"learning_rate": 7.968158241306035e-08,
"loss": 4.1445,
"step": 2929
},
{
"epoch": 1.952111268426751,
"learning_rate": 7.75436836592619e-08,
"loss": 4.1743,
"step": 2930
},
{
"epoch": 1.9527775464312485,
"learning_rate": 7.543481217166803e-08,
"loss": 4.1426,
"step": 2931
},
{
"epoch": 1.953443824435746,
"learning_rate": 7.335497040648898e-08,
"loss": 4.1183,
"step": 2932
},
{
"epoch": 1.9541101024402432,
"learning_rate": 7.130416078612312e-08,
"loss": 4.071,
"step": 2933
},
{
"epoch": 1.9547763804447404,
"learning_rate": 6.928238569915701e-08,
"loss": 4.1564,
"step": 2934
},
{
"epoch": 1.955442658449238,
"learning_rate": 6.728964750035705e-08,
"loss": 4.1987,
"step": 2935
},
{
"epoch": 1.9561089364537354,
"learning_rate": 6.53259485106722e-08,
"loss": 4.1178,
"step": 2936
},
{
"epoch": 1.9567752144582327,
"learning_rate": 6.339129101722574e-08,
"loss": 4.1754,
"step": 2937
},
{
"epoch": 1.95744149246273,
"learning_rate": 6.148567727332633e-08,
"loss": 4.167,
"step": 2938
},
{
"epoch": 1.9581077704672274,
"learning_rate": 5.960910949844301e-08,
"loss": 4.179,
"step": 2939
},
{
"epoch": 1.9587740484717249,
"learning_rate": 5.776158987822467e-08,
"loss": 4.1288,
"step": 2940
},
{
"epoch": 1.9594403264762223,
"learning_rate": 5.5943120564477816e-08,
"loss": 4.1622,
"step": 2941
},
{
"epoch": 1.9601066044807196,
"learning_rate": 5.415370367518602e-08,
"loss": 4.1316,
"step": 2942
},
{
"epoch": 1.9607728824852169,
"learning_rate": 5.2393341294482145e-08,
"loss": 4.1531,
"step": 2943
},
{
"epoch": 1.9614391604897143,
"learning_rate": 5.0662035472673344e-08,
"loss": 4.1871,
"step": 2944
},
{
"epoch": 1.9621054384942118,
"learning_rate": 4.89597882262105e-08,
"loss": 4.1169,
"step": 2945
},
{
"epoch": 1.962771716498709,
"learning_rate": 4.728660153771047e-08,
"loss": 4.1663,
"step": 2946
},
{
"epoch": 1.9634379945032063,
"learning_rate": 4.564247735593941e-08,
"loss": 4.2037,
"step": 2947
},
{
"epoch": 1.9641042725077038,
"learning_rate": 4.402741759581275e-08,
"loss": 4.1727,
"step": 2948
},
{
"epoch": 1.9647705505122013,
"learning_rate": 4.244142413839525e-08,
"loss": 4.144,
"step": 2949
},
{
"epoch": 1.9654368285166985,
"learning_rate": 4.088449883089818e-08,
"loss": 4.1395,
"step": 2950
},
{
"epoch": 1.966103106521196,
"learning_rate": 3.935664348668211e-08,
"loss": 4.1512,
"step": 2951
},
{
"epoch": 1.9667693845256933,
"learning_rate": 3.7857859885240266e-08,
"loss": 4.0827,
"step": 2952
},
{
"epoch": 1.9674356625301908,
"learning_rate": 3.638814977221239e-08,
"loss": 4.1329,
"step": 2953
},
{
"epoch": 1.9681019405346882,
"learning_rate": 3.494751485937364e-08,
"loss": 4.1581,
"step": 2954
},
{
"epoch": 1.9687682185391855,
"learning_rate": 3.353595682463739e-08,
"loss": 4.1706,
"step": 2955
},
{
"epoch": 1.9694344965436827,
"learning_rate": 3.2153477312052424e-08,
"loss": 4.164,
"step": 2956
},
{
"epoch": 1.9701007745481802,
"learning_rate": 3.080007793179185e-08,
"loss": 4.1254,
"step": 2957
},
{
"epoch": 1.9707670525526777,
"learning_rate": 2.9475760260166962e-08,
"loss": 4.1213,
"step": 2958
},
{
"epoch": 1.971433330557175,
"learning_rate": 2.8180525839616168e-08,
"loss": 4.1404,
"step": 2959
},
{
"epoch": 1.9720996085616722,
"learning_rate": 2.6914376178702183e-08,
"loss": 4.1738,
"step": 2960
},
{
"epoch": 1.9727658865661697,
"learning_rate": 2.567731275211205e-08,
"loss": 4.1826,
"step": 2961
},
{
"epoch": 1.9734321645706672,
"learning_rate": 2.44693370006599e-08,
"loss": 4.1243,
"step": 2962
},
{
"epoch": 1.9740984425751646,
"learning_rate": 2.3290450331278635e-08,
"loss": 4.1297,
"step": 2963
},
{
"epoch": 1.974764720579662,
"learning_rate": 2.2140654117019933e-08,
"loss": 4.1226,
"step": 2964
},
{
"epoch": 1.9754309985841592,
"learning_rate": 2.1019949697054232e-08,
"loss": 4.1435,
"step": 2965
},
{
"epoch": 1.9760972765886566,
"learning_rate": 1.9928338376673517e-08,
"loss": 4.134,
"step": 2966
},
{
"epoch": 1.9767635545931541,
"learning_rate": 1.8865821427280216e-08,
"loss": 4.128,
"step": 2967
},
{
"epoch": 1.9774298325976514,
"learning_rate": 1.7832400086387202e-08,
"loss": 4.1379,
"step": 2968
},
{
"epoch": 1.9780961106021486,
"learning_rate": 1.6828075557628885e-08,
"loss": 4.1769,
"step": 2969
},
{
"epoch": 1.978762388606646,
"learning_rate": 1.58528490107418e-08,
"loss": 4.1665,
"step": 2970
},
{
"epoch": 1.9794286666111436,
"learning_rate": 1.490672158157569e-08,
"loss": 4.1496,
"step": 2971
},
{
"epoch": 1.9800949446156408,
"learning_rate": 1.398969437209352e-08,
"loss": 4.1585,
"step": 2972
},
{
"epoch": 1.980761222620138,
"learning_rate": 1.3101768450352048e-08,
"loss": 4.1311,
"step": 2973
},
{
"epoch": 1.9814275006246356,
"learning_rate": 1.2242944850524018e-08,
"loss": 4.117,
"step": 2974
},
{
"epoch": 1.982093778629133,
"learning_rate": 1.141322457288707e-08,
"loss": 4.1387,
"step": 2975
},
{
"epoch": 1.9827600566336305,
"learning_rate": 1.0612608583818185e-08,
"loss": 4.1512,
"step": 2976
},
{
"epoch": 1.9834263346381278,
"learning_rate": 9.841097815793675e-09,
"loss": 4.1339,
"step": 2977
},
{
"epoch": 1.984092612642625,
"learning_rate": 9.098693167400307e-09,
"loss": 4.1253,
"step": 2978
},
{
"epoch": 1.9847588906471225,
"learning_rate": 8.385395503315852e-09,
"loss": 4.1259,
"step": 2979
},
{
"epoch": 1.98542516865162,
"learning_rate": 7.701205654317422e-09,
"loss": 4.172,
"step": 2980
},
{
"epoch": 1.9860914466561173,
"learning_rate": 7.046124417289801e-09,
"loss": 4.1273,
"step": 2981
},
{
"epoch": 1.9867577246606145,
"learning_rate": 6.420152555206005e-09,
"loss": 4.1533,
"step": 2982
},
{
"epoch": 1.987424002665112,
"learning_rate": 5.823290797132841e-09,
"loss": 4.1364,
"step": 2983
},
{
"epoch": 1.9880902806696095,
"learning_rate": 5.255539838244783e-09,
"loss": 4.1655,
"step": 2984
},
{
"epoch": 1.9887565586741067,
"learning_rate": 4.716900339796215e-09,
"loss": 4.1528,
"step": 2985
},
{
"epoch": 1.9894228366786042,
"learning_rate": 4.2073729291464134e-09,
"loss": 4.2005,
"step": 2986
},
{
"epoch": 1.9900891146831015,
"learning_rate": 3.7269581997428916e-09,
"loss": 4.1511,
"step": 2987
},
{
"epoch": 1.990755392687599,
"learning_rate": 3.275656711126951e-09,
"loss": 4.1594,
"step": 2988
},
{
"epoch": 1.9914216706920964,
"learning_rate": 2.853468988928132e-09,
"loss": 4.1658,
"step": 2989
},
{
"epoch": 1.9920879486965937,
"learning_rate": 2.4603955248725386e-09,
"loss": 4.145,
"step": 2990
},
{
"epoch": 1.992754226701091,
"learning_rate": 2.0964367767717374e-09,
"loss": 4.1078,
"step": 2991
},
{
"epoch": 1.9934205047055884,
"learning_rate": 1.7615931685310837e-09,
"loss": 4.1668,
"step": 2992
},
{
"epoch": 1.9940867827100859,
"learning_rate": 1.4558650901469463e-09,
"loss": 4.169,
"step": 2993
},
{
"epoch": 1.9947530607145831,
"learning_rate": 1.1792528976983796e-09,
"loss": 4.1474,
"step": 2994
},
{
"epoch": 1.9954193387190804,
"learning_rate": 9.317569133554528e-10,
"loss": 4.1263,
"step": 2995
},
{
"epoch": 1.9960856167235779,
"learning_rate": 7.133774253792469e-10,
"loss": 4.1267,
"step": 2996
},
{
"epoch": 1.9967518947280753,
"learning_rate": 5.241146881163061e-10,
"loss": 4.147,
"step": 2997
},
{
"epoch": 1.9974181727325728,
"learning_rate": 3.639689220041875e-10,
"loss": 4.1386,
"step": 2998
},
{
"epoch": 1.99808445073707,
"learning_rate": 2.3294031356313473e-10,
"loss": 4.1631,
"step": 2999
},
{
"epoch": 1.9987507287415673,
"learning_rate": 1.3102901540162916e-10,
"loss": 4.1565,
"step": 3000
},
{
"epoch": 1.9994170067460648,
"learning_rate": 5.823514621638992e-11,
"loss": 4.102,
"step": 3001
},
{
"epoch": 2.0,
"learning_rate": 1.455879079237388e-11,
"loss": 4.1819,
"step": 3002
}
],
"logging_steps": 1,
"max_steps": 3002,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 5000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.554274880625731e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}