| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 1812, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0033112582781456954, |
| "grad_norm": 0.609375, |
| "learning_rate": 1.3186813186813187e-07, |
| "loss": 1.9402873516082764, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.006622516556291391, |
| "grad_norm": 0.7890625, |
| "learning_rate": 3.956043956043956e-07, |
| "loss": 1.9065133333206177, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.009933774834437087, |
| "grad_norm": 0.984375, |
| "learning_rate": 6.593406593406594e-07, |
| "loss": 2.009326696395874, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.013245033112582781, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.230769230769231e-07, |
| "loss": 2.020029306411743, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.016556291390728478, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.1868131868131868e-06, |
| "loss": 2.017151355743408, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.019867549668874173, |
| "grad_norm": 0.5859375, |
| "learning_rate": 1.4505494505494505e-06, |
| "loss": 1.8570733070373535, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.023178807947019868, |
| "grad_norm": 0.6015625, |
| "learning_rate": 1.7142857142857143e-06, |
| "loss": 1.8826595544815063, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.026490066225165563, |
| "grad_norm": 0.97265625, |
| "learning_rate": 1.9780219780219782e-06, |
| "loss": 1.8526196479797363, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.029801324503311258, |
| "grad_norm": 0.59765625, |
| "learning_rate": 2.2417582417582418e-06, |
| "loss": 1.8230018615722656, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.033112582781456956, |
| "grad_norm": 0.7578125, |
| "learning_rate": 2.5054945054945057e-06, |
| "loss": 2.0143635272979736, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.03642384105960265, |
| "grad_norm": 0.6015625, |
| "learning_rate": 2.7692307692307693e-06, |
| "loss": 1.9134697914123535, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.039735099337748346, |
| "grad_norm": 0.79296875, |
| "learning_rate": 3.032967032967033e-06, |
| "loss": 1.945981740951538, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.04304635761589404, |
| "grad_norm": 0.703125, |
| "learning_rate": 3.296703296703297e-06, |
| "loss": 1.8832194805145264, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.046357615894039736, |
| "grad_norm": 0.5703125, |
| "learning_rate": 3.5604395604395607e-06, |
| "loss": 1.7985222339630127, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.04966887417218543, |
| "grad_norm": 0.56640625, |
| "learning_rate": 3.824175824175824e-06, |
| "loss": 1.8062254190444946, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.052980132450331126, |
| "grad_norm": 4.5, |
| "learning_rate": 4.087912087912088e-06, |
| "loss": 1.8083665370941162, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.056291390728476824, |
| "grad_norm": 0.8046875, |
| "learning_rate": 4.351648351648352e-06, |
| "loss": 1.9273862838745117, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.059602649006622516, |
| "grad_norm": 1.046875, |
| "learning_rate": 4.615384615384616e-06, |
| "loss": 1.8486627340316772, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.06291390728476821, |
| "grad_norm": 0.65625, |
| "learning_rate": 4.879120879120879e-06, |
| "loss": 1.798123836517334, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.06622516556291391, |
| "grad_norm": 0.53125, |
| "learning_rate": 5.142857142857142e-06, |
| "loss": 1.829748272895813, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0695364238410596, |
| "grad_norm": 0.62109375, |
| "learning_rate": 5.406593406593407e-06, |
| "loss": 1.7815451622009277, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.0728476821192053, |
| "grad_norm": 0.484375, |
| "learning_rate": 5.67032967032967e-06, |
| "loss": 1.847506046295166, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.076158940397351, |
| "grad_norm": 0.546875, |
| "learning_rate": 5.934065934065935e-06, |
| "loss": 1.8606122732162476, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.07947019867549669, |
| "grad_norm": 0.5625, |
| "learning_rate": 6.197802197802198e-06, |
| "loss": 1.9397691488265991, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.08278145695364239, |
| "grad_norm": 0.8359375, |
| "learning_rate": 6.461538461538462e-06, |
| "loss": 1.9331084489822388, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.08609271523178808, |
| "grad_norm": 0.56640625, |
| "learning_rate": 6.725274725274725e-06, |
| "loss": 1.732893943786621, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.08940397350993377, |
| "grad_norm": 0.419921875, |
| "learning_rate": 6.98901098901099e-06, |
| "loss": 1.8520346879959106, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.09271523178807947, |
| "grad_norm": 0.36328125, |
| "learning_rate": 7.252747252747252e-06, |
| "loss": 1.7379193305969238, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.09602649006622517, |
| "grad_norm": 0.494140625, |
| "learning_rate": 7.516483516483517e-06, |
| "loss": 1.6829403638839722, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.09933774834437085, |
| "grad_norm": 0.384765625, |
| "learning_rate": 7.78021978021978e-06, |
| "loss": 1.695286512374878, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.10264900662251655, |
| "grad_norm": 0.41796875, |
| "learning_rate": 8.043956043956044e-06, |
| "loss": 1.7086611986160278, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.10596026490066225, |
| "grad_norm": 0.373046875, |
| "learning_rate": 8.307692307692307e-06, |
| "loss": 1.696899175643921, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.10927152317880795, |
| "grad_norm": 0.5, |
| "learning_rate": 8.571428571428573e-06, |
| "loss": 1.7970926761627197, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.11258278145695365, |
| "grad_norm": 11.25, |
| "learning_rate": 8.835164835164836e-06, |
| "loss": 1.7659003734588623, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.11589403973509933, |
| "grad_norm": 0.31640625, |
| "learning_rate": 9.098901098901098e-06, |
| "loss": 1.73307204246521, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.11920529801324503, |
| "grad_norm": 0.5625, |
| "learning_rate": 9.362637362637363e-06, |
| "loss": 1.716630220413208, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.12251655629139073, |
| "grad_norm": 0.53515625, |
| "learning_rate": 9.626373626373627e-06, |
| "loss": 1.699965000152588, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.12582781456953643, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.89010989010989e-06, |
| "loss": 1.7758498191833496, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.1291390728476821, |
| "grad_norm": 0.421875, |
| "learning_rate": 1.0153846153846154e-05, |
| "loss": 1.640932559967041, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.13245033112582782, |
| "grad_norm": 2.109375, |
| "learning_rate": 1.0417582417582417e-05, |
| "loss": 1.6744168996810913, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.1357615894039735, |
| "grad_norm": 0.5546875, |
| "learning_rate": 1.0681318681318681e-05, |
| "loss": 1.6837286949157715, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.1390728476821192, |
| "grad_norm": 0.8046875, |
| "learning_rate": 1.0945054945054944e-05, |
| "loss": 1.5954879522323608, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.1423841059602649, |
| "grad_norm": 0.6796875, |
| "learning_rate": 1.120879120879121e-05, |
| "loss": 1.6993297338485718, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.1456953642384106, |
| "grad_norm": 0.42578125, |
| "learning_rate": 1.1472527472527473e-05, |
| "loss": 1.50092351436615, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.1490066225165563, |
| "grad_norm": 0.431640625, |
| "learning_rate": 1.1736263736263737e-05, |
| "loss": 1.4782135486602783, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.152317880794702, |
| "grad_norm": 0.384765625, |
| "learning_rate": 1.2e-05, |
| "loss": 1.655004858970642, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.15562913907284767, |
| "grad_norm": 0.357421875, |
| "learning_rate": 1.1999968010404002e-05, |
| "loss": 1.4930201768875122, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.15894039735099338, |
| "grad_norm": 0.3828125, |
| "learning_rate": 1.1999872042042387e-05, |
| "loss": 1.5215712785720825, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.16225165562913907, |
| "grad_norm": 0.71875, |
| "learning_rate": 1.1999712096194322e-05, |
| "loss": 1.5142920017242432, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.16556291390728478, |
| "grad_norm": 0.3046875, |
| "learning_rate": 1.1999488174991726e-05, |
| "loss": 1.5328994989395142, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.16887417218543047, |
| "grad_norm": 0.330078125, |
| "learning_rate": 1.1999200281419243e-05, |
| "loss": 1.573561429977417, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.17218543046357615, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.1998848419314207e-05, |
| "loss": 1.3963382244110107, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.17549668874172186, |
| "grad_norm": 0.32421875, |
| "learning_rate": 1.1998432593366584e-05, |
| "loss": 1.4885351657867432, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.17880794701986755, |
| "grad_norm": 0.7109375, |
| "learning_rate": 1.199795280911892e-05, |
| "loss": 1.509068489074707, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.18211920529801323, |
| "grad_norm": 0.384765625, |
| "learning_rate": 1.199740907296626e-05, |
| "loss": 1.3925909996032715, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.18543046357615894, |
| "grad_norm": 0.61328125, |
| "learning_rate": 1.1996801392156057e-05, |
| "loss": 1.4695332050323486, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.18874172185430463, |
| "grad_norm": 0.470703125, |
| "learning_rate": 1.1996129774788096e-05, |
| "loss": 1.4908018112182617, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.19205298013245034, |
| "grad_norm": 0.314453125, |
| "learning_rate": 1.1995394229814352e-05, |
| "loss": 1.3704346418380737, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.19536423841059603, |
| "grad_norm": 0.330078125, |
| "learning_rate": 1.1994594767038916e-05, |
| "loss": 1.5010247230529785, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.1986754966887417, |
| "grad_norm": 0.357421875, |
| "learning_rate": 1.1993731397117814e-05, |
| "loss": 1.406250238418579, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.20198675496688742, |
| "grad_norm": 0.84765625, |
| "learning_rate": 1.1992804131558912e-05, |
| "loss": 1.3863422870635986, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.2052980132450331, |
| "grad_norm": 0.32421875, |
| "learning_rate": 1.1991812982721724e-05, |
| "loss": 1.2978781461715698, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.20860927152317882, |
| "grad_norm": 0.59375, |
| "learning_rate": 1.1990757963817274e-05, |
| "loss": 1.4624699354171753, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.2119205298013245, |
| "grad_norm": 0.3046875, |
| "learning_rate": 1.198963908890791e-05, |
| "loss": 1.436671495437622, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.2152317880794702, |
| "grad_norm": 0.400390625, |
| "learning_rate": 1.1988456372907107e-05, |
| "loss": 1.4213171005249023, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2185430463576159, |
| "grad_norm": 0.400390625, |
| "learning_rate": 1.1987209831579291e-05, |
| "loss": 1.3802193403244019, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.22185430463576158, |
| "grad_norm": 3.734375, |
| "learning_rate": 1.1985899481539603e-05, |
| "loss": 1.4687957763671875, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.2251655629139073, |
| "grad_norm": 0.275390625, |
| "learning_rate": 1.1984525340253706e-05, |
| "loss": 1.3982669115066528, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.22847682119205298, |
| "grad_norm": 0.43359375, |
| "learning_rate": 1.1983087426037516e-05, |
| "loss": 1.4475197792053223, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.23178807947019867, |
| "grad_norm": 0.369140625, |
| "learning_rate": 1.1981585758057e-05, |
| "loss": 1.3907548189163208, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.23509933774834438, |
| "grad_norm": 0.28125, |
| "learning_rate": 1.1980020356327879e-05, |
| "loss": 1.4539854526519775, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.23841059602649006, |
| "grad_norm": 0.8203125, |
| "learning_rate": 1.1978391241715392e-05, |
| "loss": 1.3030190467834473, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.24172185430463577, |
| "grad_norm": 0.263671875, |
| "learning_rate": 1.1976698435934008e-05, |
| "loss": 1.4244205951690674, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.24503311258278146, |
| "grad_norm": 0.435546875, |
| "learning_rate": 1.1974941961547131e-05, |
| "loss": 1.4479776620864868, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.24834437086092714, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.1973121841966806e-05, |
| "loss": 1.4405760765075684, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.25165562913907286, |
| "grad_norm": 0.337890625, |
| "learning_rate": 1.19712381014534e-05, |
| "loss": 1.3684296607971191, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.25496688741721857, |
| "grad_norm": 0.474609375, |
| "learning_rate": 1.1969290765115295e-05, |
| "loss": 1.3721959590911865, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.2582781456953642, |
| "grad_norm": 0.4296875, |
| "learning_rate": 1.1967279858908524e-05, |
| "loss": 1.2832247018814087, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.26158940397350994, |
| "grad_norm": 0.37890625, |
| "learning_rate": 1.196520540963646e-05, |
| "loss": 1.3485926389694214, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.26490066225165565, |
| "grad_norm": 0.48046875, |
| "learning_rate": 1.1963067444949428e-05, |
| "loss": 1.3671433925628662, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.2682119205298013, |
| "grad_norm": 1.34375, |
| "learning_rate": 1.1960865993344364e-05, |
| "loss": 1.3472628593444824, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.271523178807947, |
| "grad_norm": 0.369140625, |
| "learning_rate": 1.1958601084164404e-05, |
| "loss": 1.3688595294952393, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.27483443708609273, |
| "grad_norm": 0.4453125, |
| "learning_rate": 1.1956272747598524e-05, |
| "loss": 1.3366729021072388, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.2781456953642384, |
| "grad_norm": 0.466796875, |
| "learning_rate": 1.1953881014681118e-05, |
| "loss": 1.2818353176116943, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.2814569536423841, |
| "grad_norm": 0.353515625, |
| "learning_rate": 1.1951425917291586e-05, |
| "loss": 1.3180421590805054, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.2847682119205298, |
| "grad_norm": 0.478515625, |
| "learning_rate": 1.194890748815392e-05, |
| "loss": 1.4252783060073853, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.28807947019867547, |
| "grad_norm": 0.353515625, |
| "learning_rate": 1.1946325760836258e-05, |
| "loss": 1.3658868074417114, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.2913907284768212, |
| "grad_norm": 0.33203125, |
| "learning_rate": 1.1943680769750438e-05, |
| "loss": 1.3427958488464355, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.2947019867549669, |
| "grad_norm": 0.6171875, |
| "learning_rate": 1.1940972550151545e-05, |
| "loss": 1.3762881755828857, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.2980132450331126, |
| "grad_norm": 0.318359375, |
| "learning_rate": 1.1938201138137428e-05, |
| "loss": 1.3542152643203735, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.30132450331125826, |
| "grad_norm": 0.3046875, |
| "learning_rate": 1.1935366570648237e-05, |
| "loss": 1.25732421875, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.304635761589404, |
| "grad_norm": 0.353515625, |
| "learning_rate": 1.1932468885465917e-05, |
| "loss": 1.4017279148101807, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.3079470198675497, |
| "grad_norm": 0.353515625, |
| "learning_rate": 1.1929508121213708e-05, |
| "loss": 1.3300182819366455, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.31125827814569534, |
| "grad_norm": 0.3515625, |
| "learning_rate": 1.1926484317355632e-05, |
| "loss": 1.3051140308380127, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.31456953642384106, |
| "grad_norm": 0.28515625, |
| "learning_rate": 1.1923397514195965e-05, |
| "loss": 1.349334955215454, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.31788079470198677, |
| "grad_norm": 0.5546875, |
| "learning_rate": 1.1920247752878701e-05, |
| "loss": 1.3017675876617432, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.3211920529801324, |
| "grad_norm": 0.40625, |
| "learning_rate": 1.1917035075387008e-05, |
| "loss": 1.325446367263794, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.32450331125827814, |
| "grad_norm": 0.44921875, |
| "learning_rate": 1.1913759524542653e-05, |
| "loss": 1.217170238494873, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.32781456953642385, |
| "grad_norm": 0.392578125, |
| "learning_rate": 1.191042114400545e-05, |
| "loss": 1.3321378231048584, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.33112582781456956, |
| "grad_norm": 0.296875, |
| "learning_rate": 1.1907019978272667e-05, |
| "loss": 1.2979496717453003, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3344370860927152, |
| "grad_norm": 0.859375, |
| "learning_rate": 1.190355607267844e-05, |
| "loss": 1.2881971597671509, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.33774834437086093, |
| "grad_norm": 0.58203125, |
| "learning_rate": 1.1900029473393154e-05, |
| "loss": 1.3050187826156616, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.34105960264900664, |
| "grad_norm": 0.279296875, |
| "learning_rate": 1.1896440227422847e-05, |
| "loss": 1.2708481550216675, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.3443708609271523, |
| "grad_norm": 0.36328125, |
| "learning_rate": 1.1892788382608577e-05, |
| "loss": 1.2817398309707642, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.347682119205298, |
| "grad_norm": 0.26953125, |
| "learning_rate": 1.1889073987625775e-05, |
| "loss": 1.3485984802246094, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.3509933774834437, |
| "grad_norm": 0.36328125, |
| "learning_rate": 1.1885297091983607e-05, |
| "loss": 1.3059399127960205, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.3543046357615894, |
| "grad_norm": 0.2490234375, |
| "learning_rate": 1.1881457746024304e-05, |
| "loss": 1.2323215007781982, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.3576158940397351, |
| "grad_norm": 0.36328125, |
| "learning_rate": 1.1877556000922508e-05, |
| "loss": 1.3672224283218384, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.3609271523178808, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.1873591908684567e-05, |
| "loss": 1.4012298583984375, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.36423841059602646, |
| "grad_norm": 0.36328125, |
| "learning_rate": 1.1869565522147865e-05, |
| "loss": 1.3226133584976196, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.3675496688741722, |
| "grad_norm": 0.625, |
| "learning_rate": 1.1865476894980099e-05, |
| "loss": 1.2324888706207275, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.3708609271523179, |
| "grad_norm": 0.6953125, |
| "learning_rate": 1.1861326081678575e-05, |
| "loss": 1.3415271043777466, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.3741721854304636, |
| "grad_norm": 0.326171875, |
| "learning_rate": 1.1857113137569471e-05, |
| "loss": 1.2043918371200562, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.37748344370860926, |
| "grad_norm": 0.70703125, |
| "learning_rate": 1.185283811880712e-05, |
| "loss": 1.2683188915252686, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.38079470198675497, |
| "grad_norm": 0.3203125, |
| "learning_rate": 1.1848501082373235e-05, |
| "loss": 1.2712855339050293, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.3841059602649007, |
| "grad_norm": 0.3984375, |
| "learning_rate": 1.184410208607617e-05, |
| "loss": 1.3452003002166748, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.38741721854304634, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.1839641188550139e-05, |
| "loss": 1.3081663846969604, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.39072847682119205, |
| "grad_norm": 0.330078125, |
| "learning_rate": 1.1835118449254436e-05, |
| "loss": 1.3071247339248657, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.39403973509933776, |
| "grad_norm": 0.6640625, |
| "learning_rate": 1.1830533928472643e-05, |
| "loss": 1.3188387155532837, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.3973509933774834, |
| "grad_norm": 0.333984375, |
| "learning_rate": 1.1825887687311838e-05, |
| "loss": 1.3684465885162354, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.40066225165562913, |
| "grad_norm": 0.34765625, |
| "learning_rate": 1.1821179787701752e-05, |
| "loss": 1.3333898782730103, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.40397350993377484, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.1816410292393981e-05, |
| "loss": 1.2793607711791992, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.40728476821192056, |
| "grad_norm": 0.310546875, |
| "learning_rate": 1.1811579264961115e-05, |
| "loss": 1.279396891593933, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.4105960264900662, |
| "grad_norm": 0.451171875, |
| "learning_rate": 1.1806686769795907e-05, |
| "loss": 1.215667724609375, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.4139072847682119, |
| "grad_norm": 1.15625, |
| "learning_rate": 1.1801732872110422e-05, |
| "loss": 1.313541293144226, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.41721854304635764, |
| "grad_norm": 0.76171875, |
| "learning_rate": 1.1796717637935153e-05, |
| "loss": 1.2715359926223755, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.4205298013245033, |
| "grad_norm": 0.5625, |
| "learning_rate": 1.1791641134118148e-05, |
| "loss": 1.309295892715454, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.423841059602649, |
| "grad_norm": 0.259765625, |
| "learning_rate": 1.1786503428324115e-05, |
| "loss": 1.2521069049835205, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.4271523178807947, |
| "grad_norm": 0.89453125, |
| "learning_rate": 1.1781304589033533e-05, |
| "loss": 1.140201210975647, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.4304635761589404, |
| "grad_norm": 0.65625, |
| "learning_rate": 1.1776044685541714e-05, |
| "loss": 1.2161259651184082, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.4337748344370861, |
| "grad_norm": 0.43359375, |
| "learning_rate": 1.1770723787957905e-05, |
| "loss": 1.326733112335205, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.4370860927152318, |
| "grad_norm": 0.46875, |
| "learning_rate": 1.1765341967204343e-05, |
| "loss": 1.2771416902542114, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.44039735099337746, |
| "grad_norm": 0.390625, |
| "learning_rate": 1.175989929501531e-05, |
| "loss": 1.2998336553573608, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.44370860927152317, |
| "grad_norm": 0.412109375, |
| "learning_rate": 1.1754395843936171e-05, |
| "loss": 1.3085970878601074, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.4470198675496689, |
| "grad_norm": 0.392578125, |
| "learning_rate": 1.1748831687322418e-05, |
| "loss": 1.307025671005249, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.4503311258278146, |
| "grad_norm": 0.349609375, |
| "learning_rate": 1.1743206899338683e-05, |
| "loss": 1.2538138628005981, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.45364238410596025, |
| "grad_norm": 0.74609375, |
| "learning_rate": 1.1737521554957757e-05, |
| "loss": 1.3346917629241943, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.45695364238410596, |
| "grad_norm": 0.3671875, |
| "learning_rate": 1.1731775729959587e-05, |
| "loss": 1.2633906602859497, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.4602649006622517, |
| "grad_norm": 0.33203125, |
| "learning_rate": 1.1725969500930265e-05, |
| "loss": 1.2056422233581543, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.46357615894039733, |
| "grad_norm": 0.48828125, |
| "learning_rate": 1.1720102945261004e-05, |
| "loss": 1.2352993488311768, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.46688741721854304, |
| "grad_norm": 0.376953125, |
| "learning_rate": 1.1714176141147119e-05, |
| "loss": 1.1110432147979736, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.47019867549668876, |
| "grad_norm": 0.412109375, |
| "learning_rate": 1.1708189167586969e-05, |
| "loss": 1.3148404359817505, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.4735099337748344, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.1702142104380916e-05, |
| "loss": 1.3085788488388062, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.4768211920529801, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.1696035032130257e-05, |
| "loss": 1.276586651802063, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.48013245033112584, |
| "grad_norm": 0.3515625, |
| "learning_rate": 1.1689868032236147e-05, |
| "loss": 1.1529457569122314, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.48344370860927155, |
| "grad_norm": 0.373046875, |
| "learning_rate": 1.1683641186898514e-05, |
| "loss": 1.2723958492279053, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.4867549668874172, |
| "grad_norm": 0.365234375, |
| "learning_rate": 1.1677354579114973e-05, |
| "loss": 1.2373274564743042, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.4900662251655629, |
| "grad_norm": 0.298828125, |
| "learning_rate": 1.1671008292679711e-05, |
| "loss": 1.264053463935852, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.49337748344370863, |
| "grad_norm": 0.625, |
| "learning_rate": 1.1664602412182362e-05, |
| "loss": 1.2861313819885254, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.4966887417218543, |
| "grad_norm": 0.39453125, |
| "learning_rate": 1.1658137023006901e-05, |
| "loss": 1.246382236480713, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.4296875, |
| "learning_rate": 1.1651612211330492e-05, |
| "loss": 1.278993844985962, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.5033112582781457, |
| "grad_norm": 0.3984375, |
| "learning_rate": 1.1645028064122333e-05, |
| "loss": 1.3227043151855469, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.5066225165562914, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.1638384669142515e-05, |
| "loss": 1.2452068328857422, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.5099337748344371, |
| "grad_norm": 0.55859375, |
| "learning_rate": 1.1631682114940838e-05, |
| "loss": 1.2535678148269653, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.5132450331125827, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.1624920490855633e-05, |
| "loss": 1.276329755783081, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.5165562913907285, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.1618099887012581e-05, |
| "loss": 1.2224030494689941, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.5198675496688742, |
| "grad_norm": 0.322265625, |
| "learning_rate": 1.1611220394323498e-05, |
| "loss": 1.2366602420806885, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.5231788079470199, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.1604282104485128e-05, |
| "loss": 1.2945020198822021, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.5264900662251656, |
| "grad_norm": 0.3828125, |
| "learning_rate": 1.1597285109977927e-05, |
| "loss": 1.216389536857605, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.5298013245033113, |
| "grad_norm": 0.3125, |
| "learning_rate": 1.159022950406482e-05, |
| "loss": 1.2610507011413574, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.5331125827814569, |
| "grad_norm": 0.31640625, |
| "learning_rate": 1.158311538078997e-05, |
| "loss": 1.2242460250854492, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.5364238410596026, |
| "grad_norm": 0.6015625, |
| "learning_rate": 1.1575942834977512e-05, |
| "loss": 1.2804948091506958, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.5397350993377483, |
| "grad_norm": 0.46484375, |
| "learning_rate": 1.1568711962230302e-05, |
| "loss": 1.2407575845718384, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.543046357615894, |
| "grad_norm": 0.306640625, |
| "learning_rate": 1.1561422858928625e-05, |
| "loss": 1.2760370969772339, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.5463576158940397, |
| "grad_norm": 0.263671875, |
| "learning_rate": 1.1554075622228926e-05, |
| "loss": 1.2385847568511963, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.5496688741721855, |
| "grad_norm": 0.365234375, |
| "learning_rate": 1.1546670350062515e-05, |
| "loss": 1.271156668663025, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.5529801324503312, |
| "grad_norm": 0.369140625, |
| "learning_rate": 1.1539207141134253e-05, |
| "loss": 1.288719654083252, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.5562913907284768, |
| "grad_norm": 0.390625, |
| "learning_rate": 1.1531686094921236e-05, |
| "loss": 1.2762104272842407, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.5596026490066225, |
| "grad_norm": 0.796875, |
| "learning_rate": 1.1524107311671476e-05, |
| "loss": 1.1551339626312256, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.5629139072847682, |
| "grad_norm": 0.28515625, |
| "learning_rate": 1.1516470892402566e-05, |
| "loss": 1.2796684503555298, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.5662251655629139, |
| "grad_norm": 0.74609375, |
| "learning_rate": 1.1508776938900323e-05, |
| "loss": 1.2220215797424316, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.5695364238410596, |
| "grad_norm": 0.2255859375, |
| "learning_rate": 1.150102555371744e-05, |
| "loss": 1.2903807163238525, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.5728476821192053, |
| "grad_norm": 0.3515625, |
| "learning_rate": 1.1493216840172122e-05, |
| "loss": 1.2266340255737305, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.5761589403973509, |
| "grad_norm": 0.365234375, |
| "learning_rate": 1.148535090234669e-05, |
| "loss": 1.2798501253128052, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.5794701986754967, |
| "grad_norm": 0.3203125, |
| "learning_rate": 1.1477427845086227e-05, |
| "loss": 1.2531522512435913, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.5827814569536424, |
| "grad_norm": 0.3125, |
| "learning_rate": 1.1469447773997141e-05, |
| "loss": 1.237846851348877, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.5860927152317881, |
| "grad_norm": 0.33203125, |
| "learning_rate": 1.1461410795445787e-05, |
| "loss": 1.2226859331130981, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.5894039735099338, |
| "grad_norm": 0.310546875, |
| "learning_rate": 1.1453317016557045e-05, |
| "loss": 1.2930818796157837, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.5927152317880795, |
| "grad_norm": 0.703125, |
| "learning_rate": 1.1445166545212877e-05, |
| "loss": 1.3712222576141357, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.5960264900662252, |
| "grad_norm": 0.59765625, |
| "learning_rate": 1.14369594900509e-05, |
| "loss": 1.2117972373962402, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.5993377483443708, |
| "grad_norm": 0.875, |
| "learning_rate": 1.1428695960462942e-05, |
| "loss": 1.1898581981658936, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.6026490066225165, |
| "grad_norm": 0.35546875, |
| "learning_rate": 1.142037606659357e-05, |
| "loss": 1.2242567539215088, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.6059602649006622, |
| "grad_norm": 0.3515625, |
| "learning_rate": 1.1411999919338643e-05, |
| "loss": 1.3001854419708252, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.609271523178808, |
| "grad_norm": 0.353515625, |
| "learning_rate": 1.1403567630343807e-05, |
| "loss": 1.33438241481781, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.6125827814569537, |
| "grad_norm": 0.4921875, |
| "learning_rate": 1.139507931200303e-05, |
| "loss": 1.2074322700500488, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.6158940397350994, |
| "grad_norm": 0.75, |
| "learning_rate": 1.1386535077457095e-05, |
| "loss": 1.1700530052185059, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.6192052980132451, |
| "grad_norm": 0.416015625, |
| "learning_rate": 1.1377935040592088e-05, |
| "loss": 1.1994613409042358, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.6225165562913907, |
| "grad_norm": 0.298828125, |
| "learning_rate": 1.1369279316037887e-05, |
| "loss": 1.3158299922943115, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.6258278145695364, |
| "grad_norm": 0.4453125, |
| "learning_rate": 1.136056801916663e-05, |
| "loss": 1.2066657543182373, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.6291390728476821, |
| "grad_norm": 0.423828125, |
| "learning_rate": 1.1351801266091177e-05, |
| "loss": 1.2929366827011108, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.6324503311258278, |
| "grad_norm": 0.384765625, |
| "learning_rate": 1.1342979173663568e-05, |
| "loss": 1.2256704568862915, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.6357615894039735, |
| "grad_norm": 0.265625, |
| "learning_rate": 1.1334101859473456e-05, |
| "loss": 1.2296499013900757, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.6390728476821192, |
| "grad_norm": 0.34375, |
| "learning_rate": 1.1325169441846548e-05, |
| "loss": 1.2478747367858887, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.6423841059602649, |
| "grad_norm": 0.6875, |
| "learning_rate": 1.1316182039843021e-05, |
| "loss": 1.2903329133987427, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.6456953642384106, |
| "grad_norm": 0.431640625, |
| "learning_rate": 1.1307139773255946e-05, |
| "loss": 1.219477891921997, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.6490066225165563, |
| "grad_norm": 0.546875, |
| "learning_rate": 1.1298042762609677e-05, |
| "loss": 1.256780743598938, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.652317880794702, |
| "grad_norm": 0.447265625, |
| "learning_rate": 1.1288891129158254e-05, |
| "loss": 1.2532964944839478, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.6556291390728477, |
| "grad_norm": 0.279296875, |
| "learning_rate": 1.127968499488378e-05, |
| "loss": 1.2641328573226929, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.6589403973509934, |
| "grad_norm": 0.294921875, |
| "learning_rate": 1.1270424482494814e-05, |
| "loss": 1.1905931234359741, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.6622516556291391, |
| "grad_norm": 0.40234375, |
| "learning_rate": 1.1261109715424701e-05, |
| "loss": 1.2366241216659546, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.6655629139072847, |
| "grad_norm": 0.29296875, |
| "learning_rate": 1.1251740817829957e-05, |
| "loss": 1.119259238243103, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.6688741721854304, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.1242317914588603e-05, |
| "loss": 1.2134146690368652, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.6721854304635762, |
| "grad_norm": 0.4609375, |
| "learning_rate": 1.12328411312985e-05, |
| "loss": 1.1952847242355347, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.6754966887417219, |
| "grad_norm": 0.38671875, |
| "learning_rate": 1.1223310594275679e-05, |
| "loss": 1.2045927047729492, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.6788079470198676, |
| "grad_norm": 0.427734375, |
| "learning_rate": 1.1213726430552653e-05, |
| "loss": 1.2554584741592407, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.6821192052980133, |
| "grad_norm": 0.41015625, |
| "learning_rate": 1.1204088767876721e-05, |
| "loss": 1.201694130897522, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.6854304635761589, |
| "grad_norm": 0.265625, |
| "learning_rate": 1.1194397734708273e-05, |
| "loss": 1.1945827007293701, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.6887417218543046, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.1184653460219077e-05, |
| "loss": 1.2243486642837524, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.6920529801324503, |
| "grad_norm": 0.306640625, |
| "learning_rate": 1.1174856074290548e-05, |
| "loss": 1.2726621627807617, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.695364238410596, |
| "grad_norm": 0.36328125, |
| "learning_rate": 1.1165005707512026e-05, |
| "loss": 1.205135464668274, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.6986754966887417, |
| "grad_norm": 0.396484375, |
| "learning_rate": 1.1155102491179039e-05, |
| "loss": 1.2586928606033325, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.7019867549668874, |
| "grad_norm": 0.78125, |
| "learning_rate": 1.1145146557291536e-05, |
| "loss": 1.1896426677703857, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.7052980132450332, |
| "grad_norm": 0.458984375, |
| "learning_rate": 1.1135138038552144e-05, |
| "loss": 1.2678966522216797, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.7086092715231788, |
| "grad_norm": 0.58203125, |
| "learning_rate": 1.1125077068364393e-05, |
| "loss": 1.2325910329818726, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.7119205298013245, |
| "grad_norm": 0.287109375, |
| "learning_rate": 1.1114963780830942e-05, |
| "loss": 1.2652318477630615, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.7152317880794702, |
| "grad_norm": 0.357421875, |
| "learning_rate": 1.1104798310751776e-05, |
| "loss": 1.2476012706756592, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.7185430463576159, |
| "grad_norm": 0.423828125, |
| "learning_rate": 1.1094580793622432e-05, |
| "loss": 1.2257249355316162, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.7218543046357616, |
| "grad_norm": 0.66796875, |
| "learning_rate": 1.1084311365632182e-05, |
| "loss": 1.1938482522964478, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.7251655629139073, |
| "grad_norm": 0.287109375, |
| "learning_rate": 1.1073990163662212e-05, |
| "loss": 1.2081587314605713, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.7284768211920529, |
| "grad_norm": 0.408203125, |
| "learning_rate": 1.1063617325283801e-05, |
| "loss": 1.2109920978546143, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.7317880794701986, |
| "grad_norm": 0.392578125, |
| "learning_rate": 1.1053192988756505e-05, |
| "loss": 1.234221339225769, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.7350993377483444, |
| "grad_norm": 0.2490234375, |
| "learning_rate": 1.1042717293026281e-05, |
| "loss": 1.2554912567138672, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.7384105960264901, |
| "grad_norm": 0.67578125, |
| "learning_rate": 1.103219037772366e-05, |
| "loss": 1.2109016180038452, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.7417218543046358, |
| "grad_norm": 0.474609375, |
| "learning_rate": 1.1021612383161882e-05, |
| "loss": 1.171287178993225, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.7450331125827815, |
| "grad_norm": 0.5703125, |
| "learning_rate": 1.1010983450335014e-05, |
| "loss": 1.2758331298828125, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.7483443708609272, |
| "grad_norm": 0.65625, |
| "learning_rate": 1.1000303720916088e-05, |
| "loss": 1.2499141693115234, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.7516556291390728, |
| "grad_norm": 0.318359375, |
| "learning_rate": 1.0989573337255199e-05, |
| "loss": 1.2714009284973145, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.7549668874172185, |
| "grad_norm": 0.32421875, |
| "learning_rate": 1.097879244237761e-05, |
| "loss": 1.3011789321899414, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.7582781456953642, |
| "grad_norm": 0.408203125, |
| "learning_rate": 1.0967961179981856e-05, |
| "loss": 1.2752792835235596, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.7615894039735099, |
| "grad_norm": 2.953125, |
| "learning_rate": 1.095707969443781e-05, |
| "loss": 1.1760289669036865, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.7649006622516556, |
| "grad_norm": 0.5703125, |
| "learning_rate": 1.0946148130784779e-05, |
| "loss": 1.3046189546585083, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.7682119205298014, |
| "grad_norm": 0.330078125, |
| "learning_rate": 1.0935166634729555e-05, |
| "loss": 1.204204797744751, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.7715231788079471, |
| "grad_norm": 0.3828125, |
| "learning_rate": 1.092413535264448e-05, |
| "loss": 1.204719066619873, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.7748344370860927, |
| "grad_norm": 0.28515625, |
| "learning_rate": 1.0913054431565497e-05, |
| "loss": 1.2189865112304688, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.7781456953642384, |
| "grad_norm": 0.66796875, |
| "learning_rate": 1.090192401919018e-05, |
| "loss": 1.130168080329895, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.7814569536423841, |
| "grad_norm": 0.330078125, |
| "learning_rate": 1.0890744263875782e-05, |
| "loss": 1.2293215990066528, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.7847682119205298, |
| "grad_norm": 0.265625, |
| "learning_rate": 1.0879515314637238e-05, |
| "loss": 1.189441442489624, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.7880794701986755, |
| "grad_norm": 0.345703125, |
| "learning_rate": 1.08682373211452e-05, |
| "loss": 1.2883621454238892, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.7913907284768212, |
| "grad_norm": 0.83203125, |
| "learning_rate": 1.0856910433724017e-05, |
| "loss": 1.2602189779281616, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.7947019867549668, |
| "grad_norm": 0.318359375, |
| "learning_rate": 1.0845534803349758e-05, |
| "loss": 1.2539976835250854, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.7980132450331126, |
| "grad_norm": 0.55078125, |
| "learning_rate": 1.0834110581648181e-05, |
| "loss": 1.27140474319458, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.8013245033112583, |
| "grad_norm": 0.427734375, |
| "learning_rate": 1.0822637920892714e-05, |
| "loss": 1.2304598093032837, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.804635761589404, |
| "grad_norm": 0.375, |
| "learning_rate": 1.0811116974002438e-05, |
| "loss": 1.2280398607254028, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.8079470198675497, |
| "grad_norm": 0.62109375, |
| "learning_rate": 1.0799547894540031e-05, |
| "loss": 1.24711012840271, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.8112582781456954, |
| "grad_norm": 0.283203125, |
| "learning_rate": 1.0787930836709738e-05, |
| "loss": 1.1827949285507202, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.8145695364238411, |
| "grad_norm": 0.380859375, |
| "learning_rate": 1.07762659553553e-05, |
| "loss": 1.2213504314422607, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.8178807947019867, |
| "grad_norm": 0.400390625, |
| "learning_rate": 1.0764553405957902e-05, |
| "loss": 1.2925806045532227, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.8211920529801324, |
| "grad_norm": 0.380859375, |
| "learning_rate": 1.0752793344634095e-05, |
| "loss": 1.318811058998108, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.8245033112582781, |
| "grad_norm": 0.419921875, |
| "learning_rate": 1.074098592813372e-05, |
| "loss": 1.301307201385498, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.8278145695364238, |
| "grad_norm": 0.4296875, |
| "learning_rate": 1.0729131313837806e-05, |
| "loss": 1.2577488422393799, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.8311258278145696, |
| "grad_norm": 0.294921875, |
| "learning_rate": 1.071722965975649e-05, |
| "loss": 1.278800368309021, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.8344370860927153, |
| "grad_norm": 0.390625, |
| "learning_rate": 1.0705281124526903e-05, |
| "loss": 1.1996169090270996, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.8377483443708609, |
| "grad_norm": 0.4921875, |
| "learning_rate": 1.069328586741105e-05, |
| "loss": 1.2342064380645752, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.8410596026490066, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.0681244048293686e-05, |
| "loss": 1.19930899143219, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.8443708609271523, |
| "grad_norm": 0.59375, |
| "learning_rate": 1.0669155827680204e-05, |
| "loss": 1.2355972528457642, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.847682119205298, |
| "grad_norm": 0.35546875, |
| "learning_rate": 1.0657021366694475e-05, |
| "loss": 1.2616443634033203, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.8509933774834437, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.0644840827076712e-05, |
| "loss": 1.2749875783920288, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.8543046357615894, |
| "grad_norm": 0.322265625, |
| "learning_rate": 1.0632614371181303e-05, |
| "loss": 1.139912486076355, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.8576158940397351, |
| "grad_norm": 0.404296875, |
| "learning_rate": 1.0620342161974664e-05, |
| "loss": 1.2624098062515259, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.8609271523178808, |
| "grad_norm": 0.400390625, |
| "learning_rate": 1.0608024363033044e-05, |
| "loss": 1.2522523403167725, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.8642384105960265, |
| "grad_norm": 0.46875, |
| "learning_rate": 1.059566113854037e-05, |
| "loss": 1.1696981191635132, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.8675496688741722, |
| "grad_norm": 0.455078125, |
| "learning_rate": 1.0583252653286037e-05, |
| "loss": 1.283386468887329, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.8708609271523179, |
| "grad_norm": 0.8671875, |
| "learning_rate": 1.0570799072662724e-05, |
| "loss": 1.2220840454101562, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.8741721854304636, |
| "grad_norm": 0.435546875, |
| "learning_rate": 1.0558300562664192e-05, |
| "loss": 1.2066237926483154, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.8774834437086093, |
| "grad_norm": 0.34375, |
| "learning_rate": 1.0545757289883054e-05, |
| "loss": 1.215103268623352, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.8807947019867549, |
| "grad_norm": 0.322265625, |
| "learning_rate": 1.0533169421508575e-05, |
| "loss": 1.152737021446228, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.8841059602649006, |
| "grad_norm": 0.6328125, |
| "learning_rate": 1.0520537125324432e-05, |
| "loss": 1.2905833721160889, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.8874172185430463, |
| "grad_norm": 1.0, |
| "learning_rate": 1.0507860569706478e-05, |
| "loss": 1.2769966125488281, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.890728476821192, |
| "grad_norm": 0.734375, |
| "learning_rate": 1.04951399236205e-05, |
| "loss": 1.217033863067627, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.8940397350993378, |
| "grad_norm": 0.408203125, |
| "learning_rate": 1.0482375356619973e-05, |
| "loss": 1.2337075471878052, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.8973509933774835, |
| "grad_norm": 0.396484375, |
| "learning_rate": 1.0469567038843786e-05, |
| "loss": 1.2098217010498047, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.9006622516556292, |
| "grad_norm": 0.333984375, |
| "learning_rate": 1.0456715141013988e-05, |
| "loss": 1.1911996603012085, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.9039735099337748, |
| "grad_norm": 0.39453125, |
| "learning_rate": 1.0443819834433498e-05, |
| "loss": 1.2873048782348633, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.9072847682119205, |
| "grad_norm": 0.271484375, |
| "learning_rate": 1.0430881290983842e-05, |
| "loss": 1.1980925798416138, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.9105960264900662, |
| "grad_norm": 0.2373046875, |
| "learning_rate": 1.0417899683122844e-05, |
| "loss": 1.1385635137557983, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.9139072847682119, |
| "grad_norm": 0.296875, |
| "learning_rate": 1.0404875183882331e-05, |
| "loss": 1.2270296812057495, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.9172185430463576, |
| "grad_norm": 1.1015625, |
| "learning_rate": 1.039180796686583e-05, |
| "loss": 1.1677497625350952, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.9205298013245033, |
| "grad_norm": 0.3203125, |
| "learning_rate": 1.0378698206246262e-05, |
| "loss": 1.2157952785491943, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.9238410596026491, |
| "grad_norm": 0.263671875, |
| "learning_rate": 1.0365546076763594e-05, |
| "loss": 1.1570444107055664, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.9271523178807947, |
| "grad_norm": 0.4375, |
| "learning_rate": 1.0352351753722548e-05, |
| "loss": 1.2186548709869385, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.9304635761589404, |
| "grad_norm": 0.380859375, |
| "learning_rate": 1.0339115412990228e-05, |
| "loss": 1.2401282787322998, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.9337748344370861, |
| "grad_norm": 0.365234375, |
| "learning_rate": 1.03258372309938e-05, |
| "loss": 1.2739580869674683, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.9370860927152318, |
| "grad_norm": 0.44140625, |
| "learning_rate": 1.0312517384718124e-05, |
| "loss": 1.2308034896850586, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.9403973509933775, |
| "grad_norm": 0.359375, |
| "learning_rate": 1.0299156051703417e-05, |
| "loss": 1.1893035173416138, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.9437086092715232, |
| "grad_norm": 0.341796875, |
| "learning_rate": 1.0285753410042862e-05, |
| "loss": 1.202154517173767, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.9470198675496688, |
| "grad_norm": 0.33203125, |
| "learning_rate": 1.0272309638380244e-05, |
| "loss": 1.135772943496704, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.9503311258278145, |
| "grad_norm": 0.51953125, |
| "learning_rate": 1.0258824915907578e-05, |
| "loss": 1.15782630443573, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.9536423841059603, |
| "grad_norm": 0.357421875, |
| "learning_rate": 1.0245299422362702e-05, |
| "loss": 1.2023342847824097, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.956953642384106, |
| "grad_norm": 0.3046875, |
| "learning_rate": 1.02317333380269e-05, |
| "loss": 1.2450610399246216, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.9602649006622517, |
| "grad_norm": 0.298828125, |
| "learning_rate": 1.0218126843722487e-05, |
| "loss": 1.1484456062316895, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.9635761589403974, |
| "grad_norm": 0.447265625, |
| "learning_rate": 1.0204480120810406e-05, |
| "loss": 1.2417069673538208, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.9668874172185431, |
| "grad_norm": 0.36328125, |
| "learning_rate": 1.0190793351187798e-05, |
| "loss": 1.257660984992981, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.9701986754966887, |
| "grad_norm": 0.291015625, |
| "learning_rate": 1.0177066717285597e-05, |
| "loss": 1.1812211275100708, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.9735099337748344, |
| "grad_norm": 0.400390625, |
| "learning_rate": 1.0163300402066082e-05, |
| "loss": 1.1695009469985962, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.9768211920529801, |
| "grad_norm": 0.376953125, |
| "learning_rate": 1.0149494589020445e-05, |
| "loss": 1.1857471466064453, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.9801324503311258, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.0135649462166347e-05, |
| "loss": 1.3397670984268188, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.9834437086092715, |
| "grad_norm": 0.44140625, |
| "learning_rate": 1.012176520604546e-05, |
| "loss": 1.1958118677139282, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.9867549668874173, |
| "grad_norm": 0.431640625, |
| "learning_rate": 1.0107842005721008e-05, |
| "loss": 1.206683874130249, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.9900662251655629, |
| "grad_norm": 0.392578125, |
| "learning_rate": 1.0093880046775308e-05, |
| "loss": 1.3119841814041138, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.9933774834437086, |
| "grad_norm": 0.296875, |
| "learning_rate": 1.0079879515307288e-05, |
| "loss": 1.1980386972427368, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.9966887417218543, |
| "grad_norm": 0.60546875, |
| "learning_rate": 1.0065840597930002e-05, |
| "loss": 1.2894337177276611, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.296875, |
| "learning_rate": 1.0051763481768167e-05, |
| "loss": 1.2193963527679443, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.0033112582781456, |
| "grad_norm": 0.337890625, |
| "learning_rate": 1.0037648354455634e-05, |
| "loss": 1.1995750665664673, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.0066225165562914, |
| "grad_norm": 0.322265625, |
| "learning_rate": 1.0023495404132912e-05, |
| "loss": 1.130192756652832, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.009933774834437, |
| "grad_norm": 0.423828125, |
| "learning_rate": 1.0009304819444656e-05, |
| "loss": 1.068596363067627, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.0132450331125828, |
| "grad_norm": 0.5390625, |
| "learning_rate": 9.995076789537138e-06, |
| "loss": 1.1101475954055786, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.0165562913907285, |
| "grad_norm": 0.5625, |
| "learning_rate": 9.980811504055752e-06, |
| "loss": 1.107115626335144, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.0198675496688743, |
| "grad_norm": 0.28125, |
| "learning_rate": 9.966509153142464e-06, |
| "loss": 1.142488956451416, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.0231788079470199, |
| "grad_norm": 0.375, |
| "learning_rate": 9.952169927433282e-06, |
| "loss": 1.0813229084014893, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.0264900662251655, |
| "grad_norm": 0.263671875, |
| "learning_rate": 9.93779401805572e-06, |
| "loss": 1.107801079750061, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.0298013245033113, |
| "grad_norm": 0.265625, |
| "learning_rate": 9.923381616626257e-06, |
| "loss": 1.1251033544540405, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.033112582781457, |
| "grad_norm": 0.40625, |
| "learning_rate": 9.908932915247765e-06, |
| "loss": 1.1380716562271118, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.0364238410596027, |
| "grad_norm": 0.361328125, |
| "learning_rate": 9.894448106506957e-06, |
| "loss": 1.1164017915725708, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.0397350993377483, |
| "grad_norm": 0.3671875, |
| "learning_rate": 9.879927383471828e-06, |
| "loss": 1.154578447341919, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.0430463576158941, |
| "grad_norm": 0.31640625, |
| "learning_rate": 9.865370939689072e-06, |
| "loss": 1.2191665172576904, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.0463576158940397, |
| "grad_norm": 1.15625, |
| "learning_rate": 9.850778969181497e-06, |
| "loss": 1.0986692905426025, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.0496688741721854, |
| "grad_norm": 0.455078125, |
| "learning_rate": 9.836151666445465e-06, |
| "loss": 1.059933066368103, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.0529801324503312, |
| "grad_norm": 0.40234375, |
| "learning_rate": 9.821489226448259e-06, |
| "loss": 1.0793806314468384, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.0562913907284768, |
| "grad_norm": 0.271484375, |
| "learning_rate": 9.806791844625525e-06, |
| "loss": 1.1094763278961182, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.0596026490066226, |
| "grad_norm": 0.4375, |
| "learning_rate": 9.792059716878636e-06, |
| "loss": 1.1059627532958984, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.0629139072847682, |
| "grad_norm": 0.42578125, |
| "learning_rate": 9.7772930395721e-06, |
| "loss": 1.0791089534759521, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.0662251655629138, |
| "grad_norm": 0.498046875, |
| "learning_rate": 9.762492009530935e-06, |
| "loss": 1.1716126203536987, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.0695364238410596, |
| "grad_norm": 0.466796875, |
| "learning_rate": 9.747656824038044e-06, |
| "loss": 1.0453616380691528, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.0728476821192052, |
| "grad_norm": 0.396484375, |
| "learning_rate": 9.732787680831589e-06, |
| "loss": 1.0998435020446777, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.076158940397351, |
| "grad_norm": 0.28125, |
| "learning_rate": 9.71788477810236e-06, |
| "loss": 1.0633697509765625, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.0794701986754967, |
| "grad_norm": 1.0703125, |
| "learning_rate": 9.702948314491119e-06, |
| "loss": 1.0050365924835205, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.0827814569536425, |
| "grad_norm": 0.3125, |
| "learning_rate": 9.687978489085959e-06, |
| "loss": 1.1256245374679565, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.086092715231788, |
| "grad_norm": 0.474609375, |
| "learning_rate": 9.67297550141967e-06, |
| "loss": 1.0635474920272827, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.0894039735099337, |
| "grad_norm": 0.484375, |
| "learning_rate": 9.657939551467042e-06, |
| "loss": 1.1216498613357544, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.0927152317880795, |
| "grad_norm": 0.328125, |
| "learning_rate": 9.642870839642226e-06, |
| "loss": 1.0962717533111572, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.096026490066225, |
| "grad_norm": 0.322265625, |
| "learning_rate": 9.62776956679606e-06, |
| "loss": 1.1562925577163696, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.099337748344371, |
| "grad_norm": 0.384765625, |
| "learning_rate": 9.612635934213384e-06, |
| "loss": 1.1446951627731323, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.1026490066225165, |
| "grad_norm": 0.431640625, |
| "learning_rate": 9.597470143610362e-06, |
| "loss": 1.1515048742294312, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.1059602649006623, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.58227239713179e-06, |
| "loss": 1.097119688987732, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.109271523178808, |
| "grad_norm": 0.345703125, |
| "learning_rate": 9.567042897348405e-06, |
| "loss": 1.1437350511550903, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.1125827814569536, |
| "grad_norm": 0.5, |
| "learning_rate": 9.551781847254186e-06, |
| "loss": 1.0003553628921509, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.1158940397350994, |
| "grad_norm": 0.77734375, |
| "learning_rate": 9.536489450263647e-06, |
| "loss": 1.175012230873108, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.119205298013245, |
| "grad_norm": 0.5234375, |
| "learning_rate": 9.521165910209115e-06, |
| "loss": 1.0635179281234741, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.1225165562913908, |
| "grad_norm": 0.46484375, |
| "learning_rate": 9.505811431338033e-06, |
| "loss": 1.119577407836914, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.1258278145695364, |
| "grad_norm": 0.515625, |
| "learning_rate": 9.490426218310226e-06, |
| "loss": 1.110081672668457, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.129139072847682, |
| "grad_norm": 0.42578125, |
| "learning_rate": 9.475010476195173e-06, |
| "loss": 1.125766396522522, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.1324503311258278, |
| "grad_norm": 0.294921875, |
| "learning_rate": 9.459564410469273e-06, |
| "loss": 1.1465976238250732, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.1357615894039734, |
| "grad_norm": 0.384765625, |
| "learning_rate": 9.444088227013111e-06, |
| "loss": 1.0712946653366089, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.1390728476821192, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.42858213210871e-06, |
| "loss": 1.1463303565979004, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.1423841059602649, |
| "grad_norm": 0.359375, |
| "learning_rate": 9.413046332436781e-06, |
| "loss": 1.0968739986419678, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.1456953642384107, |
| "grad_norm": 0.35546875, |
| "learning_rate": 9.39748103507398e-06, |
| "loss": 1.1099607944488525, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.1490066225165563, |
| "grad_norm": 0.466796875, |
| "learning_rate": 9.38188644749012e-06, |
| "loss": 1.2194066047668457, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.152317880794702, |
| "grad_norm": 0.53515625, |
| "learning_rate": 9.36626277754544e-06, |
| "loss": 1.0382094383239746, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.1556291390728477, |
| "grad_norm": 0.302734375, |
| "learning_rate": 9.350610233487807e-06, |
| "loss": 1.1106114387512207, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.1589403973509933, |
| "grad_norm": 0.388671875, |
| "learning_rate": 9.334929023949954e-06, |
| "loss": 1.104656457901001, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.1622516556291391, |
| "grad_norm": 0.404296875, |
| "learning_rate": 9.319219357946699e-06, |
| "loss": 1.1264766454696655, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.1655629139072847, |
| "grad_norm": 0.458984375, |
| "learning_rate": 9.303481444872153e-06, |
| "loss": 1.1377006769180298, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.1688741721854305, |
| "grad_norm": 0.376953125, |
| "learning_rate": 9.287715494496932e-06, |
| "loss": 1.0277259349822998, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.1721854304635762, |
| "grad_norm": 0.375, |
| "learning_rate": 9.271921716965366e-06, |
| "loss": 1.1559712886810303, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.1754966887417218, |
| "grad_norm": 0.328125, |
| "learning_rate": 9.256100322792685e-06, |
| "loss": 1.1573880910873413, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.1788079470198676, |
| "grad_norm": 0.337890625, |
| "learning_rate": 9.240251522862228e-06, |
| "loss": 1.0647897720336914, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.1821192052980132, |
| "grad_norm": 0.5, |
| "learning_rate": 9.224375528422625e-06, |
| "loss": 1.1478759050369263, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.185430463576159, |
| "grad_norm": 0.5, |
| "learning_rate": 9.208472551084974e-06, |
| "loss": 1.171141266822815, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.1887417218543046, |
| "grad_norm": 0.33984375, |
| "learning_rate": 9.192542802820039e-06, |
| "loss": 1.1633001565933228, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.1920529801324504, |
| "grad_norm": 0.384765625, |
| "learning_rate": 9.176586495955403e-06, |
| "loss": 1.0764974355697632, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.195364238410596, |
| "grad_norm": 0.484375, |
| "learning_rate": 9.160603843172655e-06, |
| "loss": 1.094746708869934, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.1986754966887416, |
| "grad_norm": 0.37109375, |
| "learning_rate": 9.144595057504552e-06, |
| "loss": 1.1030086278915405, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.2019867549668874, |
| "grad_norm": 0.333984375, |
| "learning_rate": 9.128560352332163e-06, |
| "loss": 1.1092870235443115, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.205298013245033, |
| "grad_norm": 0.455078125, |
| "learning_rate": 9.112499941382057e-06, |
| "loss": 1.0155194997787476, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.2086092715231789, |
| "grad_norm": 0.4296875, |
| "learning_rate": 9.096414038723415e-06, |
| "loss": 1.1965831518173218, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.2119205298013245, |
| "grad_norm": 0.26171875, |
| "learning_rate": 9.080302858765213e-06, |
| "loss": 1.077614665031433, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.2152317880794703, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.06416661625334e-06, |
| "loss": 1.1260370016098022, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.218543046357616, |
| "grad_norm": 0.55859375, |
| "learning_rate": 9.048005526267742e-06, |
| "loss": 1.122714638710022, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.2218543046357615, |
| "grad_norm": 0.3984375, |
| "learning_rate": 9.031819804219566e-06, |
| "loss": 1.1720261573791504, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.2251655629139073, |
| "grad_norm": 0.380859375, |
| "learning_rate": 9.01560966584827e-06, |
| "loss": 1.1976429224014282, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.228476821192053, |
| "grad_norm": 0.435546875, |
| "learning_rate": 8.999375327218765e-06, |
| "loss": 1.1546531915664673, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.2317880794701987, |
| "grad_norm": 0.2890625, |
| "learning_rate": 8.98311700471852e-06, |
| "loss": 1.1697813272476196, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.2350993377483444, |
| "grad_norm": 0.96875, |
| "learning_rate": 8.966834915054694e-06, |
| "loss": 1.1024788618087769, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.23841059602649, |
| "grad_norm": 0.291015625, |
| "learning_rate": 8.950529275251226e-06, |
| "loss": 1.1089746952056885, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.2417218543046358, |
| "grad_norm": 0.88671875, |
| "learning_rate": 8.934200302645968e-06, |
| "loss": 1.0583044290542603, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.2450331125827814, |
| "grad_norm": 1.6796875, |
| "learning_rate": 8.917848214887765e-06, |
| "loss": 1.0897866487503052, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.2483443708609272, |
| "grad_norm": 0.267578125, |
| "learning_rate": 8.901473229933567e-06, |
| "loss": 1.0810881853103638, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.2516556291390728, |
| "grad_norm": 0.8203125, |
| "learning_rate": 8.88507556604552e-06, |
| "loss": 1.0693601369857788, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.2549668874172186, |
| "grad_norm": 0.375, |
| "learning_rate": 8.868655441788064e-06, |
| "loss": 1.0969278812408447, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.2582781456953642, |
| "grad_norm": 0.375, |
| "learning_rate": 8.852213076025005e-06, |
| "loss": 1.154745101928711, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.26158940397351, |
| "grad_norm": 0.30859375, |
| "learning_rate": 8.83574868791661e-06, |
| "loss": 1.1125857830047607, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.2649006622516556, |
| "grad_norm": 0.39453125, |
| "learning_rate": 8.819262496916676e-06, |
| "loss": 1.1467704772949219, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.2682119205298013, |
| "grad_norm": 0.486328125, |
| "learning_rate": 8.80275472276962e-06, |
| "loss": 1.1354914903640747, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.271523178807947, |
| "grad_norm": 0.609375, |
| "learning_rate": 8.786225585507534e-06, |
| "loss": 1.1262799501419067, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.2748344370860927, |
| "grad_norm": 0.87890625, |
| "learning_rate": 8.769675305447267e-06, |
| "loss": 1.0878307819366455, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.2781456953642385, |
| "grad_norm": 0.31640625, |
| "learning_rate": 8.753104103187465e-06, |
| "loss": 1.0384912490844727, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.281456953642384, |
| "grad_norm": 0.345703125, |
| "learning_rate": 8.73651219960566e-06, |
| "loss": 1.1220159530639648, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.2847682119205297, |
| "grad_norm": 0.296875, |
| "learning_rate": 8.719899815855308e-06, |
| "loss": 1.057347059249878, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.2880794701986755, |
| "grad_norm": 0.53515625, |
| "learning_rate": 8.703267173362838e-06, |
| "loss": 1.1674779653549194, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.2913907284768211, |
| "grad_norm": 0.4140625, |
| "learning_rate": 8.686614493824725e-06, |
| "loss": 1.1553187370300293, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.294701986754967, |
| "grad_norm": 0.263671875, |
| "learning_rate": 8.6699419992045e-06, |
| "loss": 1.0362199544906616, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.2980132450331126, |
| "grad_norm": 0.341796875, |
| "learning_rate": 8.653249911729814e-06, |
| "loss": 1.0475202798843384, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.3013245033112582, |
| "grad_norm": 0.263671875, |
| "learning_rate": 8.63653845388948e-06, |
| "loss": 0.9839364886283875, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.304635761589404, |
| "grad_norm": 0.396484375, |
| "learning_rate": 8.619807848430485e-06, |
| "loss": 1.09470534324646, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.3079470198675498, |
| "grad_norm": 0.44140625, |
| "learning_rate": 8.603058318355047e-06, |
| "loss": 1.1766548156738281, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.3112582781456954, |
| "grad_norm": 0.439453125, |
| "learning_rate": 8.586290086917619e-06, |
| "loss": 1.1274049282073975, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.314569536423841, |
| "grad_norm": 0.345703125, |
| "learning_rate": 8.569503377621931e-06, |
| "loss": 1.1291522979736328, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.3178807947019868, |
| "grad_norm": 0.419921875, |
| "learning_rate": 8.552698414218007e-06, |
| "loss": 1.100280523300171, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.3211920529801324, |
| "grad_norm": 0.71484375, |
| "learning_rate": 8.535875420699173e-06, |
| "loss": 1.0891684293746948, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.3245033112582782, |
| "grad_norm": 0.3984375, |
| "learning_rate": 8.519034621299082e-06, |
| "loss": 1.1650758981704712, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.3278145695364238, |
| "grad_norm": 0.361328125, |
| "learning_rate": 8.502176240488724e-06, |
| "loss": 1.0949400663375854, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.3311258278145695, |
| "grad_norm": 0.328125, |
| "learning_rate": 8.485300502973424e-06, |
| "loss": 1.0937325954437256, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.3344370860927153, |
| "grad_norm": 0.447265625, |
| "learning_rate": 8.468407633689864e-06, |
| "loss": 1.0812309980392456, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.3377483443708609, |
| "grad_norm": 0.42578125, |
| "learning_rate": 8.451497857803066e-06, |
| "loss": 1.0209524631500244, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.3410596026490067, |
| "grad_norm": 0.3984375, |
| "learning_rate": 8.434571400703408e-06, |
| "loss": 1.1526941061019897, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.3443708609271523, |
| "grad_norm": 0.53125, |
| "learning_rate": 8.417628488003606e-06, |
| "loss": 1.1507055759429932, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.347682119205298, |
| "grad_norm": 0.263671875, |
| "learning_rate": 8.400669345535718e-06, |
| "loss": 1.0535410642623901, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.3509933774834437, |
| "grad_norm": 0.3515625, |
| "learning_rate": 8.383694199348123e-06, |
| "loss": 1.1291109323501587, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.3543046357615893, |
| "grad_norm": 0.435546875, |
| "learning_rate": 8.366703275702516e-06, |
| "loss": 1.0811841487884521, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.3576158940397351, |
| "grad_norm": 0.2451171875, |
| "learning_rate": 8.349696801070893e-06, |
| "loss": 1.134947657585144, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.3609271523178808, |
| "grad_norm": 0.36328125, |
| "learning_rate": 8.332675002132523e-06, |
| "loss": 1.1577849388122559, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.3642384105960264, |
| "grad_norm": 0.8046875, |
| "learning_rate": 8.315638105770938e-06, |
| "loss": 1.1460561752319336, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.3675496688741722, |
| "grad_norm": 0.69140625, |
| "learning_rate": 8.298586339070901e-06, |
| "loss": 1.1265171766281128, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.370860927152318, |
| "grad_norm": 0.31640625, |
| "learning_rate": 8.28151992931538e-06, |
| "loss": 1.2352640628814697, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.3741721854304636, |
| "grad_norm": 0.36328125, |
| "learning_rate": 8.264439103982522e-06, |
| "loss": 1.1254183053970337, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.3774834437086092, |
| "grad_norm": 0.53125, |
| "learning_rate": 8.247344090742623e-06, |
| "loss": 1.1543939113616943, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.380794701986755, |
| "grad_norm": 0.380859375, |
| "learning_rate": 8.230235117455083e-06, |
| "loss": 1.0869652032852173, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.3841059602649006, |
| "grad_norm": 0.34765625, |
| "learning_rate": 8.213112412165377e-06, |
| "loss": 1.0378072261810303, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.3874172185430464, |
| "grad_norm": 0.287109375, |
| "learning_rate": 8.195976203102018e-06, |
| "loss": 1.1564513444900513, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.390728476821192, |
| "grad_norm": 0.287109375, |
| "learning_rate": 8.178826718673508e-06, |
| "loss": 1.117110252380371, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.3940397350993377, |
| "grad_norm": 0.921875, |
| "learning_rate": 8.161664187465298e-06, |
| "loss": 1.1238434314727783, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.3973509933774835, |
| "grad_norm": 0.384765625, |
| "learning_rate": 8.144488838236734e-06, |
| "loss": 1.1785955429077148, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.400662251655629, |
| "grad_norm": 0.40625, |
| "learning_rate": 8.127300899918021e-06, |
| "loss": 1.2380834817886353, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.403973509933775, |
| "grad_norm": 0.8984375, |
| "learning_rate": 8.110100601607157e-06, |
| "loss": 1.1105948686599731, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.4072847682119205, |
| "grad_norm": 0.3671875, |
| "learning_rate": 8.092888172566887e-06, |
| "loss": 1.1651510000228882, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.410596026490066, |
| "grad_norm": 0.302734375, |
| "learning_rate": 8.075663842221656e-06, |
| "loss": 1.0690885782241821, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.413907284768212, |
| "grad_norm": 1.015625, |
| "learning_rate": 8.058427840154531e-06, |
| "loss": 1.154086947441101, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.4172185430463577, |
| "grad_norm": 0.41015625, |
| "learning_rate": 8.041180396104154e-06, |
| "loss": 1.0742074251174927, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.4205298013245033, |
| "grad_norm": 0.90234375, |
| "learning_rate": 8.02392173996168e-06, |
| "loss": 1.0775120258331299, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.423841059602649, |
| "grad_norm": 0.466796875, |
| "learning_rate": 8.006652101767706e-06, |
| "loss": 1.1411365270614624, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.4271523178807948, |
| "grad_norm": 0.578125, |
| "learning_rate": 7.989371711709214e-06, |
| "loss": 1.159982442855835, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.4304635761589404, |
| "grad_norm": 0.462890625, |
| "learning_rate": 7.97208080011649e-06, |
| "loss": 1.1033796072006226, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.4337748344370862, |
| "grad_norm": 0.419921875, |
| "learning_rate": 7.95477959746007e-06, |
| "loss": 1.1719162464141846, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.4370860927152318, |
| "grad_norm": 0.4140625, |
| "learning_rate": 7.937468334347649e-06, |
| "loss": 1.202003002166748, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.4403973509933774, |
| "grad_norm": 0.75390625, |
| "learning_rate": 7.92014724152103e-06, |
| "loss": 1.1364517211914062, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.4437086092715232, |
| "grad_norm": 0.41796875, |
| "learning_rate": 7.902816549853027e-06, |
| "loss": 1.1248327493667603, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.4470198675496688, |
| "grad_norm": 0.330078125, |
| "learning_rate": 7.885476490344407e-06, |
| "loss": 1.122451663017273, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.4503311258278146, |
| "grad_norm": 0.25390625, |
| "learning_rate": 7.868127294120787e-06, |
| "loss": 1.0662963390350342, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.4536423841059603, |
| "grad_norm": 0.271484375, |
| "learning_rate": 7.850769192429576e-06, |
| "loss": 1.1393651962280273, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.4569536423841059, |
| "grad_norm": 0.361328125, |
| "learning_rate": 7.833402416636885e-06, |
| "loss": 1.051371693611145, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.4602649006622517, |
| "grad_norm": 0.421875, |
| "learning_rate": 7.816027198224437e-06, |
| "loss": 1.134348750114441, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.4635761589403973, |
| "grad_norm": 0.4375, |
| "learning_rate": 7.798643768786491e-06, |
| "loss": 1.1689413785934448, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.466887417218543, |
| "grad_norm": 0.341796875, |
| "learning_rate": 7.781252360026748e-06, |
| "loss": 1.1354305744171143, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.4701986754966887, |
| "grad_norm": 0.443359375, |
| "learning_rate": 7.763853203755266e-06, |
| "loss": 1.0442321300506592, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.4735099337748343, |
| "grad_norm": 0.310546875, |
| "learning_rate": 7.746446531885371e-06, |
| "loss": 1.1418088674545288, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.4768211920529801, |
| "grad_norm": 0.353515625, |
| "learning_rate": 7.729032576430563e-06, |
| "loss": 1.1551663875579834, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.480132450331126, |
| "grad_norm": 0.53125, |
| "learning_rate": 7.711611569501427e-06, |
| "loss": 1.123194694519043, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.4834437086092715, |
| "grad_norm": 0.494140625, |
| "learning_rate": 7.694183743302533e-06, |
| "loss": 1.1747374534606934, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.4867549668874172, |
| "grad_norm": 0.546875, |
| "learning_rate": 7.676749330129348e-06, |
| "loss": 1.0527430772781372, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.490066225165563, |
| "grad_norm": 0.458984375, |
| "learning_rate": 7.659308562365133e-06, |
| "loss": 1.144150733947754, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.4933774834437086, |
| "grad_norm": 0.46875, |
| "learning_rate": 7.64186167247786e-06, |
| "loss": 1.0819345712661743, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.4966887417218544, |
| "grad_norm": 1.0703125, |
| "learning_rate": 7.624408893017088e-06, |
| "loss": 1.1327379941940308, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.31640625, |
| "learning_rate": 7.6069504566108895e-06, |
| "loss": 1.0392861366271973, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.5033112582781456, |
| "grad_norm": 1.3984375, |
| "learning_rate": 7.589486595962732e-06, |
| "loss": 1.1170624494552612, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.5066225165562914, |
| "grad_norm": 0.625, |
| "learning_rate": 7.572017543848388e-06, |
| "loss": 1.079361081123352, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.5099337748344372, |
| "grad_norm": 0.76171875, |
| "learning_rate": 7.5545435331128185e-06, |
| "loss": 1.1161906719207764, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.5132450331125828, |
| "grad_norm": 0.29296875, |
| "learning_rate": 7.537064796667088e-06, |
| "loss": 1.131893277168274, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.5165562913907285, |
| "grad_norm": 0.416015625, |
| "learning_rate": 7.519581567485246e-06, |
| "loss": 1.1086527109146118, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.519867549668874, |
| "grad_norm": 0.52734375, |
| "learning_rate": 7.502094078601223e-06, |
| "loss": 1.172413945198059, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.5231788079470199, |
| "grad_norm": 0.275390625, |
| "learning_rate": 7.48460256310573e-06, |
| "loss": 1.0879758596420288, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.5264900662251657, |
| "grad_norm": 0.306640625, |
| "learning_rate": 7.4671072541431455e-06, |
| "loss": 1.1638213396072388, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.5298013245033113, |
| "grad_norm": 0.42578125, |
| "learning_rate": 7.449608384908417e-06, |
| "loss": 1.091423749923706, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.533112582781457, |
| "grad_norm": 0.435546875, |
| "learning_rate": 7.432106188643942e-06, |
| "loss": 1.1669130325317383, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.5364238410596025, |
| "grad_norm": 0.361328125, |
| "learning_rate": 7.414600898636467e-06, |
| "loss": 1.1157746315002441, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.5397350993377483, |
| "grad_norm": 0.34375, |
| "learning_rate": 7.39709274821397e-06, |
| "loss": 1.079698085784912, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.5430463576158941, |
| "grad_norm": 0.333984375, |
| "learning_rate": 7.379581970742558e-06, |
| "loss": 0.983738899230957, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.5463576158940397, |
| "grad_norm": 0.412109375, |
| "learning_rate": 7.3620687996233606e-06, |
| "loss": 1.1889533996582031, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.5496688741721854, |
| "grad_norm": 0.353515625, |
| "learning_rate": 7.344553468289401e-06, |
| "loss": 1.1194508075714111, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.5529801324503312, |
| "grad_norm": 0.37890625, |
| "learning_rate": 7.327036210202505e-06, |
| "loss": 1.1008572578430176, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.5562913907284768, |
| "grad_norm": 0.337890625, |
| "learning_rate": 7.309517258850176e-06, |
| "loss": 1.0888991355895996, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.5596026490066226, |
| "grad_norm": 0.51953125, |
| "learning_rate": 7.291996847742485e-06, |
| "loss": 1.1011091470718384, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.5629139072847682, |
| "grad_norm": 0.5078125, |
| "learning_rate": 7.274475210408963e-06, |
| "loss": 1.0910996198654175, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.5662251655629138, |
| "grad_norm": 0.380859375, |
| "learning_rate": 7.256952580395487e-06, |
| "loss": 1.086224913597107, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.5695364238410596, |
| "grad_norm": 0.404296875, |
| "learning_rate": 7.239429191261161e-06, |
| "loss": 1.1097546815872192, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.5728476821192054, |
| "grad_norm": 0.408203125, |
| "learning_rate": 7.221905276575209e-06, |
| "loss": 1.0408302545547485, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.576158940397351, |
| "grad_norm": 0.361328125, |
| "learning_rate": 7.204381069913862e-06, |
| "loss": 1.1143653392791748, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.5794701986754967, |
| "grad_norm": 0.380859375, |
| "learning_rate": 7.186856804857243e-06, |
| "loss": 1.1487960815429688, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.5827814569536423, |
| "grad_norm": 0.5859375, |
| "learning_rate": 7.1693327149862504e-06, |
| "loss": 1.10153329372406, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.586092715231788, |
| "grad_norm": 0.384765625, |
| "learning_rate": 7.151809033879449e-06, |
| "loss": 1.0794066190719604, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.589403973509934, |
| "grad_norm": 0.96484375, |
| "learning_rate": 7.134285995109952e-06, |
| "loss": 1.1781980991363525, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.5927152317880795, |
| "grad_norm": 0.34765625, |
| "learning_rate": 7.116763832242319e-06, |
| "loss": 1.0936973094940186, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.596026490066225, |
| "grad_norm": 1.203125, |
| "learning_rate": 7.099242778829429e-06, |
| "loss": 1.1570301055908203, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.5993377483443707, |
| "grad_norm": 0.32421875, |
| "learning_rate": 7.081723068409368e-06, |
| "loss": 1.1480774879455566, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.6026490066225165, |
| "grad_norm": 0.416015625, |
| "learning_rate": 7.064204934502333e-06, |
| "loss": 1.1552226543426514, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.6059602649006623, |
| "grad_norm": 0.5234375, |
| "learning_rate": 7.0466886106075e-06, |
| "loss": 1.2289761304855347, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.609271523178808, |
| "grad_norm": 0.345703125, |
| "learning_rate": 7.029174330199921e-06, |
| "loss": 1.1841604709625244, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.6125827814569536, |
| "grad_norm": 0.4765625, |
| "learning_rate": 7.011662326727409e-06, |
| "loss": 1.1432627439498901, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.6158940397350994, |
| "grad_norm": 0.490234375, |
| "learning_rate": 6.994152833607431e-06, |
| "loss": 1.0701535940170288, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.6192052980132452, |
| "grad_norm": 0.51171875, |
| "learning_rate": 6.976646084223992e-06, |
| "loss": 1.0911750793457031, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.6225165562913908, |
| "grad_norm": 0.41796875, |
| "learning_rate": 6.959142311924522e-06, |
| "loss": 1.0632914304733276, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.6258278145695364, |
| "grad_norm": 0.361328125, |
| "learning_rate": 6.9416417500167744e-06, |
| "loss": 1.1161998510360718, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.629139072847682, |
| "grad_norm": 0.306640625, |
| "learning_rate": 6.924144631765715e-06, |
| "loss": 1.0910724401474, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.6324503311258278, |
| "grad_norm": 0.26953125, |
| "learning_rate": 6.906651190390396e-06, |
| "loss": 1.0503889322280884, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.6357615894039736, |
| "grad_norm": 0.384765625, |
| "learning_rate": 6.88916165906087e-06, |
| "loss": 1.2001702785491943, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.6390728476821192, |
| "grad_norm": 0.578125, |
| "learning_rate": 6.8716762708950744e-06, |
| "loss": 1.1387388706207275, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.6423841059602649, |
| "grad_norm": 0.59765625, |
| "learning_rate": 6.854195258955716e-06, |
| "loss": 1.14443039894104, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.6456953642384105, |
| "grad_norm": 0.384765625, |
| "learning_rate": 6.836718856247175e-06, |
| "loss": 1.1768776178359985, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.6490066225165563, |
| "grad_norm": 0.2373046875, |
| "learning_rate": 6.819247295712395e-06, |
| "loss": 1.1593154668807983, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.652317880794702, |
| "grad_norm": 0.275390625, |
| "learning_rate": 6.801780810229773e-06, |
| "loss": 1.1416690349578857, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.6556291390728477, |
| "grad_norm": 0.3515625, |
| "learning_rate": 6.784319632610069e-06, |
| "loss": 1.1714385747909546, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.6589403973509933, |
| "grad_norm": 0.353515625, |
| "learning_rate": 6.766863995593291e-06, |
| "loss": 1.125061273574829, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.6622516556291391, |
| "grad_norm": 0.3671875, |
| "learning_rate": 6.749414131845594e-06, |
| "loss": 1.0884270668029785, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.6655629139072847, |
| "grad_norm": 0.23046875, |
| "learning_rate": 6.7319702739561835e-06, |
| "loss": 1.0700236558914185, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.6688741721854305, |
| "grad_norm": 0.302734375, |
| "learning_rate": 6.714532654434209e-06, |
| "loss": 1.1123465299606323, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.6721854304635762, |
| "grad_norm": 0.361328125, |
| "learning_rate": 6.697101505705675e-06, |
| "loss": 1.0608749389648438, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.6754966887417218, |
| "grad_norm": 0.349609375, |
| "learning_rate": 6.6796770601103345e-06, |
| "loss": 1.1096681356430054, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.6788079470198676, |
| "grad_norm": 0.369140625, |
| "learning_rate": 6.662259549898591e-06, |
| "loss": 1.0963759422302246, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.6821192052980134, |
| "grad_norm": 0.5, |
| "learning_rate": 6.644849207228413e-06, |
| "loss": 1.1321171522140503, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.685430463576159, |
| "grad_norm": 0.369140625, |
| "learning_rate": 6.627446264162226e-06, |
| "loss": 1.1333951950073242, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.6887417218543046, |
| "grad_norm": 0.50390625, |
| "learning_rate": 6.6100509526638295e-06, |
| "loss": 1.0682663917541504, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.6920529801324502, |
| "grad_norm": 0.4296875, |
| "learning_rate": 6.5926635045953026e-06, |
| "loss": 1.0695470571517944, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.695364238410596, |
| "grad_norm": 0.5625, |
| "learning_rate": 6.57528415171391e-06, |
| "loss": 1.1304333209991455, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.6986754966887418, |
| "grad_norm": 0.3671875, |
| "learning_rate": 6.55791312566902e-06, |
| "loss": 1.1207385063171387, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.7019867549668874, |
| "grad_norm": 0.64453125, |
| "learning_rate": 6.540550657999007e-06, |
| "loss": 1.0782655477523804, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.705298013245033, |
| "grad_norm": 0.314453125, |
| "learning_rate": 6.523196980128175e-06, |
| "loss": 1.0895800590515137, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.7086092715231787, |
| "grad_norm": 0.380859375, |
| "learning_rate": 6.5058523233636675e-06, |
| "loss": 1.0197545289993286, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.7119205298013245, |
| "grad_norm": 0.3984375, |
| "learning_rate": 6.4885169188923835e-06, |
| "loss": 1.1468507051467896, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.7152317880794703, |
| "grad_norm": 0.453125, |
| "learning_rate": 6.4711909977779034e-06, |
| "loss": 1.1051777601242065, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.718543046357616, |
| "grad_norm": 0.330078125, |
| "learning_rate": 6.4538747909574e-06, |
| "loss": 1.0961897373199463, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.7218543046357615, |
| "grad_norm": 0.40625, |
| "learning_rate": 6.436568529238562e-06, |
| "loss": 1.1184409856796265, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.7251655629139073, |
| "grad_norm": 2.53125, |
| "learning_rate": 6.419272443296529e-06, |
| "loss": 1.058405876159668, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.728476821192053, |
| "grad_norm": 0.396484375, |
| "learning_rate": 6.401986763670795e-06, |
| "loss": 1.1203678846359253, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.7317880794701987, |
| "grad_norm": 0.5, |
| "learning_rate": 6.3847117207621605e-06, |
| "loss": 1.105993390083313, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.7350993377483444, |
| "grad_norm": 0.33203125, |
| "learning_rate": 6.367447544829642e-06, |
| "loss": 1.1263104677200317, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.73841059602649, |
| "grad_norm": 0.380859375, |
| "learning_rate": 6.35019446598741e-06, |
| "loss": 1.2111080884933472, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.7417218543046358, |
| "grad_norm": 0.4921875, |
| "learning_rate": 6.3329527142017255e-06, |
| "loss": 1.1802841424942017, |
| "step": 1052 |
| }, |
| { |
| "epoch": 1.7450331125827816, |
| "grad_norm": 0.4609375, |
| "learning_rate": 6.31572251928787e-06, |
| "loss": 1.1829028129577637, |
| "step": 1054 |
| }, |
| { |
| "epoch": 1.7483443708609272, |
| "grad_norm": 0.5078125, |
| "learning_rate": 6.298504110907079e-06, |
| "loss": 1.1439435482025146, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.7516556291390728, |
| "grad_norm": 0.4140625, |
| "learning_rate": 6.281297718563491e-06, |
| "loss": 1.0960673093795776, |
| "step": 1058 |
| }, |
| { |
| "epoch": 1.7549668874172184, |
| "grad_norm": 0.34765625, |
| "learning_rate": 6.264103571601083e-06, |
| "loss": 1.1312483549118042, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.7582781456953642, |
| "grad_norm": 0.330078125, |
| "learning_rate": 6.246921899200609e-06, |
| "loss": 1.1018383502960205, |
| "step": 1062 |
| }, |
| { |
| "epoch": 1.76158940397351, |
| "grad_norm": 0.5859375, |
| "learning_rate": 6.229752930376553e-06, |
| "loss": 1.0974431037902832, |
| "step": 1064 |
| }, |
| { |
| "epoch": 1.7649006622516556, |
| "grad_norm": 0.8046875, |
| "learning_rate": 6.212596893974069e-06, |
| "loss": 1.0925394296646118, |
| "step": 1066 |
| }, |
| { |
| "epoch": 1.7682119205298013, |
| "grad_norm": 0.458984375, |
| "learning_rate": 6.195454018665944e-06, |
| "loss": 1.0865404605865479, |
| "step": 1068 |
| }, |
| { |
| "epoch": 1.771523178807947, |
| "grad_norm": 0.5390625, |
| "learning_rate": 6.178324532949529e-06, |
| "loss": 1.0787994861602783, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.7748344370860927, |
| "grad_norm": 0.296875, |
| "learning_rate": 6.16120866514371e-06, |
| "loss": 1.0901260375976562, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.7781456953642385, |
| "grad_norm": 0.306640625, |
| "learning_rate": 6.1441066433858665e-06, |
| "loss": 1.067402958869934, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.781456953642384, |
| "grad_norm": 0.2890625, |
| "learning_rate": 6.127018695628808e-06, |
| "loss": 1.0247200727462769, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.7847682119205297, |
| "grad_norm": 0.494140625, |
| "learning_rate": 6.109945049637773e-06, |
| "loss": 1.0284204483032227, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.7880794701986755, |
| "grad_norm": 0.3046875, |
| "learning_rate": 6.092885932987351e-06, |
| "loss": 1.0762344598770142, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.7913907284768213, |
| "grad_norm": 1.3125, |
| "learning_rate": 6.0758415730584845e-06, |
| "loss": 1.0676106214523315, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.794701986754967, |
| "grad_norm": 0.396484375, |
| "learning_rate": 6.058812197035418e-06, |
| "loss": 1.0068953037261963, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.7980132450331126, |
| "grad_norm": 0.458984375, |
| "learning_rate": 6.041798031902674e-06, |
| "loss": 1.1694005727767944, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.8013245033112582, |
| "grad_norm": 0.373046875, |
| "learning_rate": 6.024799304442037e-06, |
| "loss": 1.1191160678863525, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.804635761589404, |
| "grad_norm": 1.546875, |
| "learning_rate": 6.0078162412295115e-06, |
| "loss": 1.0951286554336548, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.8079470198675498, |
| "grad_norm": 0.55078125, |
| "learning_rate": 5.990849068632318e-06, |
| "loss": 1.1190029382705688, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.8112582781456954, |
| "grad_norm": 0.5, |
| "learning_rate": 5.973898012805875e-06, |
| "loss": 1.0913665294647217, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.814569536423841, |
| "grad_norm": 0.40234375, |
| "learning_rate": 5.9569632996907755e-06, |
| "loss": 1.069852590560913, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.8178807947019866, |
| "grad_norm": 0.31640625, |
| "learning_rate": 5.940045155009784e-06, |
| "loss": 1.0911487340927124, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.8211920529801324, |
| "grad_norm": 0.353515625, |
| "learning_rate": 5.923143804264822e-06, |
| "loss": 1.135908603668213, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.8245033112582782, |
| "grad_norm": 0.384765625, |
| "learning_rate": 5.906259472733964e-06, |
| "loss": 1.1487995386123657, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.8278145695364238, |
| "grad_norm": 0.419921875, |
| "learning_rate": 5.8893923854684386e-06, |
| "loss": 1.1677968502044678, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.8311258278145695, |
| "grad_norm": 0.439453125, |
| "learning_rate": 5.872542767289624e-06, |
| "loss": 1.102380633354187, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.8344370860927153, |
| "grad_norm": 2.78125, |
| "learning_rate": 5.855710842786053e-06, |
| "loss": 1.1117829084396362, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.8377483443708609, |
| "grad_norm": 0.58984375, |
| "learning_rate": 5.838896836310418e-06, |
| "loss": 1.1342206001281738, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.8410596026490067, |
| "grad_norm": 0.43359375, |
| "learning_rate": 5.822100971976584e-06, |
| "loss": 1.0774588584899902, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.8443708609271523, |
| "grad_norm": 0.44140625, |
| "learning_rate": 5.805323473656598e-06, |
| "loss": 1.0986900329589844, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.847682119205298, |
| "grad_norm": 0.91015625, |
| "learning_rate": 5.788564564977708e-06, |
| "loss": 1.1522040367126465, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.8509933774834437, |
| "grad_norm": 0.330078125, |
| "learning_rate": 5.771824469319379e-06, |
| "loss": 1.0625782012939453, |
| "step": 1118 |
| }, |
| { |
| "epoch": 1.8543046357615895, |
| "grad_norm": 0.56640625, |
| "learning_rate": 5.7551034098103185e-06, |
| "loss": 1.1141998767852783, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.8576158940397351, |
| "grad_norm": 0.7109375, |
| "learning_rate": 5.738401609325501e-06, |
| "loss": 1.0555064678192139, |
| "step": 1122 |
| }, |
| { |
| "epoch": 1.8609271523178808, |
| "grad_norm": 0.431640625, |
| "learning_rate": 5.721719290483198e-06, |
| "loss": 1.1195597648620605, |
| "step": 1124 |
| }, |
| { |
| "epoch": 1.8642384105960264, |
| "grad_norm": 0.41015625, |
| "learning_rate": 5.705056675642008e-06, |
| "loss": 1.1044063568115234, |
| "step": 1126 |
| }, |
| { |
| "epoch": 1.8675496688741722, |
| "grad_norm": 0.3125, |
| "learning_rate": 5.688413986897899e-06, |
| "loss": 1.2024643421173096, |
| "step": 1128 |
| }, |
| { |
| "epoch": 1.870860927152318, |
| "grad_norm": 0.34765625, |
| "learning_rate": 5.671791446081238e-06, |
| "loss": 1.1156952381134033, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.8741721854304636, |
| "grad_norm": 0.6875, |
| "learning_rate": 5.655189274753848e-06, |
| "loss": 1.1415760517120361, |
| "step": 1132 |
| }, |
| { |
| "epoch": 1.8774834437086092, |
| "grad_norm": 0.55859375, |
| "learning_rate": 5.638607694206041e-06, |
| "loss": 1.1045377254486084, |
| "step": 1134 |
| }, |
| { |
| "epoch": 1.8807947019867548, |
| "grad_norm": 0.546875, |
| "learning_rate": 5.622046925453673e-06, |
| "loss": 1.0766429901123047, |
| "step": 1136 |
| }, |
| { |
| "epoch": 1.8841059602649006, |
| "grad_norm": 0.34765625, |
| "learning_rate": 5.605507189235207e-06, |
| "loss": 1.1174238920211792, |
| "step": 1138 |
| }, |
| { |
| "epoch": 1.8874172185430464, |
| "grad_norm": 0.3203125, |
| "learning_rate": 5.5889887060087605e-06, |
| "loss": 1.1253938674926758, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.890728476821192, |
| "grad_norm": 1.203125, |
| "learning_rate": 5.572491695949165e-06, |
| "loss": 1.1572551727294922, |
| "step": 1142 |
| }, |
| { |
| "epoch": 1.8940397350993377, |
| "grad_norm": 0.65625, |
| "learning_rate": 5.556016378945044e-06, |
| "loss": 1.1192970275878906, |
| "step": 1144 |
| }, |
| { |
| "epoch": 1.8973509933774835, |
| "grad_norm": 0.333984375, |
| "learning_rate": 5.539562974595868e-06, |
| "loss": 1.1140743494033813, |
| "step": 1146 |
| }, |
| { |
| "epoch": 1.9006622516556293, |
| "grad_norm": 0.60546875, |
| "learning_rate": 5.523131702209037e-06, |
| "loss": 1.0318939685821533, |
| "step": 1148 |
| }, |
| { |
| "epoch": 1.903973509933775, |
| "grad_norm": 0.47265625, |
| "learning_rate": 5.506722780796953e-06, |
| "loss": 1.0299806594848633, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.9072847682119205, |
| "grad_norm": 0.416015625, |
| "learning_rate": 5.490336429074103e-06, |
| "loss": 1.1678529977798462, |
| "step": 1152 |
| }, |
| { |
| "epoch": 1.910596026490066, |
| "grad_norm": 0.5234375, |
| "learning_rate": 5.4739728654541415e-06, |
| "loss": 1.0364153385162354, |
| "step": 1154 |
| }, |
| { |
| "epoch": 1.913907284768212, |
| "grad_norm": 0.330078125, |
| "learning_rate": 5.457632308046978e-06, |
| "loss": 1.1044658422470093, |
| "step": 1156 |
| }, |
| { |
| "epoch": 1.9172185430463577, |
| "grad_norm": 0.46875, |
| "learning_rate": 5.441314974655879e-06, |
| "loss": 1.1027004718780518, |
| "step": 1158 |
| }, |
| { |
| "epoch": 1.9205298013245033, |
| "grad_norm": 0.431640625, |
| "learning_rate": 5.425021082774551e-06, |
| "loss": 1.1235613822937012, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.923841059602649, |
| "grad_norm": 0.40234375, |
| "learning_rate": 5.408750849584253e-06, |
| "loss": 1.0770944356918335, |
| "step": 1162 |
| }, |
| { |
| "epoch": 1.9271523178807946, |
| "grad_norm": 0.3671875, |
| "learning_rate": 5.392504491950898e-06, |
| "loss": 1.1669319868087769, |
| "step": 1164 |
| }, |
| { |
| "epoch": 1.9304635761589404, |
| "grad_norm": 0.341796875, |
| "learning_rate": 5.376282226422157e-06, |
| "loss": 1.064618706703186, |
| "step": 1166 |
| }, |
| { |
| "epoch": 1.9337748344370862, |
| "grad_norm": 0.404296875, |
| "learning_rate": 5.360084269224581e-06, |
| "loss": 1.0915920734405518, |
| "step": 1168 |
| }, |
| { |
| "epoch": 1.9370860927152318, |
| "grad_norm": 0.44140625, |
| "learning_rate": 5.343910836260715e-06, |
| "loss": 1.013867974281311, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.9403973509933774, |
| "grad_norm": 0.5, |
| "learning_rate": 5.327762143106218e-06, |
| "loss": 1.1231892108917236, |
| "step": 1172 |
| }, |
| { |
| "epoch": 1.9437086092715232, |
| "grad_norm": 0.357421875, |
| "learning_rate": 5.3116384050069994e-06, |
| "loss": 1.16791832447052, |
| "step": 1174 |
| }, |
| { |
| "epoch": 1.9470198675496688, |
| "grad_norm": 1.5078125, |
| "learning_rate": 5.295539836876331e-06, |
| "loss": 1.1662765741348267, |
| "step": 1176 |
| }, |
| { |
| "epoch": 1.9503311258278146, |
| "grad_norm": 1.0625, |
| "learning_rate": 5.279466653292004e-06, |
| "loss": 1.0979350805282593, |
| "step": 1178 |
| }, |
| { |
| "epoch": 1.9536423841059603, |
| "grad_norm": 0.314453125, |
| "learning_rate": 5.263419068493455e-06, |
| "loss": 1.1612277030944824, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.9569536423841059, |
| "grad_norm": 0.48046875, |
| "learning_rate": 5.2473972963789146e-06, |
| "loss": 1.1360162496566772, |
| "step": 1182 |
| }, |
| { |
| "epoch": 1.9602649006622517, |
| "grad_norm": 0.337890625, |
| "learning_rate": 5.231401550502558e-06, |
| "loss": 1.0562247037887573, |
| "step": 1184 |
| }, |
| { |
| "epoch": 1.9635761589403975, |
| "grad_norm": 0.400390625, |
| "learning_rate": 5.215432044071655e-06, |
| "loss": 1.0509085655212402, |
| "step": 1186 |
| }, |
| { |
| "epoch": 1.966887417218543, |
| "grad_norm": 0.28515625, |
| "learning_rate": 5.199488989943729e-06, |
| "loss": 1.0427687168121338, |
| "step": 1188 |
| }, |
| { |
| "epoch": 1.9701986754966887, |
| "grad_norm": 0.462890625, |
| "learning_rate": 5.183572600623721e-06, |
| "loss": 1.105067491531372, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.9735099337748343, |
| "grad_norm": 0.6171875, |
| "learning_rate": 5.167683088261162e-06, |
| "loss": 1.061687707901001, |
| "step": 1192 |
| }, |
| { |
| "epoch": 1.9768211920529801, |
| "grad_norm": 0.625, |
| "learning_rate": 5.151820664647333e-06, |
| "loss": 1.1441823244094849, |
| "step": 1194 |
| }, |
| { |
| "epoch": 1.980132450331126, |
| "grad_norm": 0.375, |
| "learning_rate": 5.135985541212451e-06, |
| "loss": 1.0769778490066528, |
| "step": 1196 |
| }, |
| { |
| "epoch": 1.9834437086092715, |
| "grad_norm": 0.7890625, |
| "learning_rate": 5.1201779290228525e-06, |
| "loss": 1.0631657838821411, |
| "step": 1198 |
| }, |
| { |
| "epoch": 1.9867549668874172, |
| "grad_norm": 1.234375, |
| "learning_rate": 5.104398038778169e-06, |
| "loss": 1.1530580520629883, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.9900662251655628, |
| "grad_norm": 0.26171875, |
| "learning_rate": 5.088646080808537e-06, |
| "loss": 1.1302320957183838, |
| "step": 1202 |
| }, |
| { |
| "epoch": 1.9933774834437086, |
| "grad_norm": 0.5078125, |
| "learning_rate": 5.072922265071775e-06, |
| "loss": 1.1501787900924683, |
| "step": 1204 |
| }, |
| { |
| "epoch": 1.9966887417218544, |
| "grad_norm": 0.35546875, |
| "learning_rate": 5.057226801150598e-06, |
| "loss": 1.0549312829971313, |
| "step": 1206 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.451171875, |
| "learning_rate": 5.041559898249817e-06, |
| "loss": 1.0718798637390137, |
| "step": 1208 |
| }, |
| { |
| "epoch": 2.0033112582781456, |
| "grad_norm": 0.33984375, |
| "learning_rate": 5.025921765193557e-06, |
| "loss": 1.0916215181350708, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.006622516556291, |
| "grad_norm": 0.287109375, |
| "learning_rate": 5.010312610422467e-06, |
| "loss": 1.073822259902954, |
| "step": 1212 |
| }, |
| { |
| "epoch": 2.0099337748344372, |
| "grad_norm": 0.4453125, |
| "learning_rate": 4.99473264199094e-06, |
| "loss": 1.042319893836975, |
| "step": 1214 |
| }, |
| { |
| "epoch": 2.013245033112583, |
| "grad_norm": 0.271484375, |
| "learning_rate": 4.979182067564358e-06, |
| "loss": 1.0481133460998535, |
| "step": 1216 |
| }, |
| { |
| "epoch": 2.0165562913907285, |
| "grad_norm": 0.31640625, |
| "learning_rate": 4.963661094416295e-06, |
| "loss": 0.9801614284515381, |
| "step": 1218 |
| }, |
| { |
| "epoch": 2.019867549668874, |
| "grad_norm": 0.91796875, |
| "learning_rate": 4.948169929425778e-06, |
| "loss": 1.0478323698043823, |
| "step": 1220 |
| }, |
| { |
| "epoch": 2.0231788079470197, |
| "grad_norm": 0.66796875, |
| "learning_rate": 4.9327087790745186e-06, |
| "loss": 1.085882306098938, |
| "step": 1222 |
| }, |
| { |
| "epoch": 2.0264900662251657, |
| "grad_norm": 0.498046875, |
| "learning_rate": 4.917277849444166e-06, |
| "loss": 1.0048208236694336, |
| "step": 1224 |
| }, |
| { |
| "epoch": 2.0298013245033113, |
| "grad_norm": 0.5390625, |
| "learning_rate": 4.9018773462135525e-06, |
| "loss": 1.0288286209106445, |
| "step": 1226 |
| }, |
| { |
| "epoch": 2.033112582781457, |
| "grad_norm": 0.3125, |
| "learning_rate": 4.886507474655965e-06, |
| "loss": 1.102409839630127, |
| "step": 1228 |
| }, |
| { |
| "epoch": 2.0364238410596025, |
| "grad_norm": 0.361328125, |
| "learning_rate": 4.87116843963639e-06, |
| "loss": 1.0566478967666626, |
| "step": 1230 |
| }, |
| { |
| "epoch": 2.0397350993377485, |
| "grad_norm": 0.30078125, |
| "learning_rate": 4.855860445608805e-06, |
| "loss": 1.0917094945907593, |
| "step": 1232 |
| }, |
| { |
| "epoch": 2.043046357615894, |
| "grad_norm": 0.431640625, |
| "learning_rate": 4.840583696613433e-06, |
| "loss": 1.0331615209579468, |
| "step": 1234 |
| }, |
| { |
| "epoch": 2.0463576158940397, |
| "grad_norm": 0.45703125, |
| "learning_rate": 4.825338396274038e-06, |
| "loss": 1.0467015504837036, |
| "step": 1236 |
| }, |
| { |
| "epoch": 2.0496688741721854, |
| "grad_norm": 0.359375, |
| "learning_rate": 4.810124747795203e-06, |
| "loss": 1.0292361974716187, |
| "step": 1238 |
| }, |
| { |
| "epoch": 2.052980132450331, |
| "grad_norm": 0.369140625, |
| "learning_rate": 4.7949429539596205e-06, |
| "loss": 0.981902539730072, |
| "step": 1240 |
| }, |
| { |
| "epoch": 2.056291390728477, |
| "grad_norm": 0.408203125, |
| "learning_rate": 4.779793217125398e-06, |
| "loss": 1.0525181293487549, |
| "step": 1242 |
| }, |
| { |
| "epoch": 2.0596026490066226, |
| "grad_norm": 0.380859375, |
| "learning_rate": 4.76467573922335e-06, |
| "loss": 1.0744459629058838, |
| "step": 1244 |
| }, |
| { |
| "epoch": 2.062913907284768, |
| "grad_norm": 0.447265625, |
| "learning_rate": 4.749590721754318e-06, |
| "loss": 1.0127346515655518, |
| "step": 1246 |
| }, |
| { |
| "epoch": 2.066225165562914, |
| "grad_norm": 0.447265625, |
| "learning_rate": 4.734538365786471e-06, |
| "loss": 0.9119741320610046, |
| "step": 1248 |
| }, |
| { |
| "epoch": 2.0695364238410594, |
| "grad_norm": 0.353515625, |
| "learning_rate": 4.71951887195264e-06, |
| "loss": 1.0265223979949951, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.0728476821192054, |
| "grad_norm": 0.69140625, |
| "learning_rate": 4.704532440447632e-06, |
| "loss": 1.0805940628051758, |
| "step": 1252 |
| }, |
| { |
| "epoch": 2.076158940397351, |
| "grad_norm": 0.462890625, |
| "learning_rate": 4.689579271025565e-06, |
| "loss": 1.0568530559539795, |
| "step": 1254 |
| }, |
| { |
| "epoch": 2.0794701986754967, |
| "grad_norm": 0.423828125, |
| "learning_rate": 4.674659562997213e-06, |
| "loss": 1.1478904485702515, |
| "step": 1256 |
| }, |
| { |
| "epoch": 2.0827814569536423, |
| "grad_norm": 0.4375, |
| "learning_rate": 4.6597735152273365e-06, |
| "loss": 0.9818710088729858, |
| "step": 1258 |
| }, |
| { |
| "epoch": 2.0860927152317883, |
| "grad_norm": 0.515625, |
| "learning_rate": 4.644921326132045e-06, |
| "loss": 1.059829831123352, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.089403973509934, |
| "grad_norm": 0.70703125, |
| "learning_rate": 4.630103193676136e-06, |
| "loss": 1.0128612518310547, |
| "step": 1262 |
| }, |
| { |
| "epoch": 2.0927152317880795, |
| "grad_norm": 0.33984375, |
| "learning_rate": 4.61531931537048e-06, |
| "loss": 1.0629007816314697, |
| "step": 1264 |
| }, |
| { |
| "epoch": 2.096026490066225, |
| "grad_norm": 0.38671875, |
| "learning_rate": 4.6005698882693595e-06, |
| "loss": 1.0301984548568726, |
| "step": 1266 |
| }, |
| { |
| "epoch": 2.0993377483443707, |
| "grad_norm": 0.349609375, |
| "learning_rate": 4.585855108967869e-06, |
| "loss": 1.0227930545806885, |
| "step": 1268 |
| }, |
| { |
| "epoch": 2.1026490066225167, |
| "grad_norm": 0.54296875, |
| "learning_rate": 4.571175173599276e-06, |
| "loss": 1.0368671417236328, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.1059602649006623, |
| "grad_norm": 0.396484375, |
| "learning_rate": 4.5565302778324106e-06, |
| "loss": 0.9873349070549011, |
| "step": 1272 |
| }, |
| { |
| "epoch": 2.109271523178808, |
| "grad_norm": 0.72265625, |
| "learning_rate": 4.541920616869069e-06, |
| "loss": 1.007344365119934, |
| "step": 1274 |
| }, |
| { |
| "epoch": 2.1125827814569536, |
| "grad_norm": 0.38671875, |
| "learning_rate": 4.527346385441395e-06, |
| "loss": 1.0187866687774658, |
| "step": 1276 |
| }, |
| { |
| "epoch": 2.115894039735099, |
| "grad_norm": 0.41015625, |
| "learning_rate": 4.512807777809299e-06, |
| "loss": 1.082865595817566, |
| "step": 1278 |
| }, |
| { |
| "epoch": 2.119205298013245, |
| "grad_norm": 0.54296875, |
| "learning_rate": 4.4983049877578516e-06, |
| "loss": 1.0283514261245728, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.122516556291391, |
| "grad_norm": 0.5078125, |
| "learning_rate": 4.483838208594723e-06, |
| "loss": 1.1053177118301392, |
| "step": 1282 |
| }, |
| { |
| "epoch": 2.1258278145695364, |
| "grad_norm": 0.419921875, |
| "learning_rate": 4.469407633147583e-06, |
| "loss": 1.0836362838745117, |
| "step": 1284 |
| }, |
| { |
| "epoch": 2.129139072847682, |
| "grad_norm": 0.4453125, |
| "learning_rate": 4.455013453761553e-06, |
| "loss": 1.0553747415542603, |
| "step": 1286 |
| }, |
| { |
| "epoch": 2.1324503311258276, |
| "grad_norm": 0.5078125, |
| "learning_rate": 4.440655862296622e-06, |
| "loss": 1.0284078121185303, |
| "step": 1288 |
| }, |
| { |
| "epoch": 2.1357615894039736, |
| "grad_norm": 0.62109375, |
| "learning_rate": 4.426335050125107e-06, |
| "loss": 0.914071798324585, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.1390728476821192, |
| "grad_norm": 0.33203125, |
| "learning_rate": 4.412051208129087e-06, |
| "loss": 1.0339797735214233, |
| "step": 1292 |
| }, |
| { |
| "epoch": 2.142384105960265, |
| "grad_norm": 0.56640625, |
| "learning_rate": 4.3978045266978655e-06, |
| "loss": 1.0838464498519897, |
| "step": 1294 |
| }, |
| { |
| "epoch": 2.1456953642384105, |
| "grad_norm": 0.734375, |
| "learning_rate": 4.383595195725441e-06, |
| "loss": 1.0508580207824707, |
| "step": 1296 |
| }, |
| { |
| "epoch": 2.1490066225165565, |
| "grad_norm": 0.7734375, |
| "learning_rate": 4.3694234046079586e-06, |
| "loss": 1.0503878593444824, |
| "step": 1298 |
| }, |
| { |
| "epoch": 2.152317880794702, |
| "grad_norm": 0.310546875, |
| "learning_rate": 4.355289342241201e-06, |
| "loss": 1.066772222518921, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.1556291390728477, |
| "grad_norm": 0.390625, |
| "learning_rate": 4.3411931970180586e-06, |
| "loss": 1.0136038064956665, |
| "step": 1302 |
| }, |
| { |
| "epoch": 2.1589403973509933, |
| "grad_norm": 0.34375, |
| "learning_rate": 4.327135156826031e-06, |
| "loss": 1.0277198553085327, |
| "step": 1304 |
| }, |
| { |
| "epoch": 2.162251655629139, |
| "grad_norm": 0.443359375, |
| "learning_rate": 4.313115409044709e-06, |
| "loss": 1.0602567195892334, |
| "step": 1306 |
| }, |
| { |
| "epoch": 2.165562913907285, |
| "grad_norm": 0.41796875, |
| "learning_rate": 4.299134140543291e-06, |
| "loss": 1.0680055618286133, |
| "step": 1308 |
| }, |
| { |
| "epoch": 2.1688741721854305, |
| "grad_norm": 0.60546875, |
| "learning_rate": 4.285191537678081e-06, |
| "loss": 1.0317363739013672, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.172185430463576, |
| "grad_norm": 0.56640625, |
| "learning_rate": 4.271287786290006e-06, |
| "loss": 1.0057586431503296, |
| "step": 1312 |
| }, |
| { |
| "epoch": 2.1754966887417218, |
| "grad_norm": 0.443359375, |
| "learning_rate": 4.257423071702149e-06, |
| "loss": 1.0330718755722046, |
| "step": 1314 |
| }, |
| { |
| "epoch": 2.1788079470198674, |
| "grad_norm": 0.306640625, |
| "learning_rate": 4.243597578717265e-06, |
| "loss": 1.0199156999588013, |
| "step": 1316 |
| }, |
| { |
| "epoch": 2.1821192052980134, |
| "grad_norm": 0.361328125, |
| "learning_rate": 4.229811491615329e-06, |
| "loss": 0.9906125664710999, |
| "step": 1318 |
| }, |
| { |
| "epoch": 2.185430463576159, |
| "grad_norm": 0.302734375, |
| "learning_rate": 4.216064994151071e-06, |
| "loss": 1.1189707517623901, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.1887417218543046, |
| "grad_norm": 0.51953125, |
| "learning_rate": 4.202358269551536e-06, |
| "loss": 0.996783971786499, |
| "step": 1322 |
| }, |
| { |
| "epoch": 2.19205298013245, |
| "grad_norm": 0.31640625, |
| "learning_rate": 4.188691500513628e-06, |
| "loss": 1.0981279611587524, |
| "step": 1324 |
| }, |
| { |
| "epoch": 2.195364238410596, |
| "grad_norm": 0.33984375, |
| "learning_rate": 4.175064869201694e-06, |
| "loss": 1.0822064876556396, |
| "step": 1326 |
| }, |
| { |
| "epoch": 2.198675496688742, |
| "grad_norm": 0.58203125, |
| "learning_rate": 4.161478557245076e-06, |
| "loss": 0.9716603755950928, |
| "step": 1328 |
| }, |
| { |
| "epoch": 2.2019867549668874, |
| "grad_norm": 0.72265625, |
| "learning_rate": 4.147932745735708e-06, |
| "loss": 1.045888900756836, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.205298013245033, |
| "grad_norm": 0.359375, |
| "learning_rate": 4.134427615225686e-06, |
| "loss": 1.0230597257614136, |
| "step": 1332 |
| }, |
| { |
| "epoch": 2.2086092715231787, |
| "grad_norm": 0.53125, |
| "learning_rate": 4.120963345724871e-06, |
| "loss": 1.0504512786865234, |
| "step": 1334 |
| }, |
| { |
| "epoch": 2.2119205298013247, |
| "grad_norm": 0.39453125, |
| "learning_rate": 4.107540116698491e-06, |
| "loss": 1.048678994178772, |
| "step": 1336 |
| }, |
| { |
| "epoch": 2.2152317880794703, |
| "grad_norm": 0.357421875, |
| "learning_rate": 4.0941581070647416e-06, |
| "loss": 1.0520901679992676, |
| "step": 1338 |
| }, |
| { |
| "epoch": 2.218543046357616, |
| "grad_norm": 0.466796875, |
| "learning_rate": 4.080817495192409e-06, |
| "loss": 1.0306859016418457, |
| "step": 1340 |
| }, |
| { |
| "epoch": 2.2218543046357615, |
| "grad_norm": 0.421875, |
| "learning_rate": 4.067518458898486e-06, |
| "loss": 1.067014217376709, |
| "step": 1342 |
| }, |
| { |
| "epoch": 2.225165562913907, |
| "grad_norm": 0.357421875, |
| "learning_rate": 4.054261175445805e-06, |
| "loss": 0.98519366979599, |
| "step": 1344 |
| }, |
| { |
| "epoch": 2.228476821192053, |
| "grad_norm": 0.89453125, |
| "learning_rate": 4.041045821540672e-06, |
| "loss": 1.0907796621322632, |
| "step": 1346 |
| }, |
| { |
| "epoch": 2.2317880794701987, |
| "grad_norm": 0.310546875, |
| "learning_rate": 4.027872573330523e-06, |
| "loss": 0.9753696918487549, |
| "step": 1348 |
| }, |
| { |
| "epoch": 2.2350993377483444, |
| "grad_norm": 0.40625, |
| "learning_rate": 4.014741606401557e-06, |
| "loss": 1.0596659183502197, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.23841059602649, |
| "grad_norm": 0.30859375, |
| "learning_rate": 4.001653095776411e-06, |
| "loss": 1.0401997566223145, |
| "step": 1352 |
| }, |
| { |
| "epoch": 2.241721854304636, |
| "grad_norm": 0.79296875, |
| "learning_rate": 3.988607215911823e-06, |
| "loss": 1.0707045793533325, |
| "step": 1354 |
| }, |
| { |
| "epoch": 2.2450331125827816, |
| "grad_norm": 0.310546875, |
| "learning_rate": 3.975604140696301e-06, |
| "loss": 1.0365581512451172, |
| "step": 1356 |
| }, |
| { |
| "epoch": 2.248344370860927, |
| "grad_norm": 0.384765625, |
| "learning_rate": 3.962644043447818e-06, |
| "loss": 1.0854417085647583, |
| "step": 1358 |
| }, |
| { |
| "epoch": 2.251655629139073, |
| "grad_norm": 0.423828125, |
| "learning_rate": 3.9497270969114825e-06, |
| "loss": 1.0579876899719238, |
| "step": 1360 |
| }, |
| { |
| "epoch": 2.2549668874172184, |
| "grad_norm": 0.357421875, |
| "learning_rate": 3.9368534732572605e-06, |
| "loss": 1.1018257141113281, |
| "step": 1362 |
| }, |
| { |
| "epoch": 2.258278145695364, |
| "grad_norm": 0.8671875, |
| "learning_rate": 3.924023344077652e-06, |
| "loss": 1.0348024368286133, |
| "step": 1364 |
| }, |
| { |
| "epoch": 2.26158940397351, |
| "grad_norm": 0.345703125, |
| "learning_rate": 3.911236880385433e-06, |
| "loss": 1.0398342609405518, |
| "step": 1366 |
| }, |
| { |
| "epoch": 2.2649006622516556, |
| "grad_norm": 0.423828125, |
| "learning_rate": 3.898494252611356e-06, |
| "loss": 1.0437736511230469, |
| "step": 1368 |
| }, |
| { |
| "epoch": 2.2682119205298013, |
| "grad_norm": 0.609375, |
| "learning_rate": 3.885795630601883e-06, |
| "loss": 1.0380507707595825, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.271523178807947, |
| "grad_norm": 0.73046875, |
| "learning_rate": 3.873141183616928e-06, |
| "loss": 1.052344560623169, |
| "step": 1372 |
| }, |
| { |
| "epoch": 2.274834437086093, |
| "grad_norm": 0.30859375, |
| "learning_rate": 3.860531080327592e-06, |
| "loss": 0.9509193897247314, |
| "step": 1374 |
| }, |
| { |
| "epoch": 2.2781456953642385, |
| "grad_norm": 0.65625, |
| "learning_rate": 3.847965488813924e-06, |
| "loss": 1.1158931255340576, |
| "step": 1376 |
| }, |
| { |
| "epoch": 2.281456953642384, |
| "grad_norm": 0.51953125, |
| "learning_rate": 3.8354445765626675e-06, |
| "loss": 1.0930873155593872, |
| "step": 1378 |
| }, |
| { |
| "epoch": 2.2847682119205297, |
| "grad_norm": 0.76171875, |
| "learning_rate": 3.822968510465046e-06, |
| "loss": 1.0825984477996826, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.2880794701986753, |
| "grad_norm": 0.53515625, |
| "learning_rate": 3.810537456814521e-06, |
| "loss": 1.1007579565048218, |
| "step": 1382 |
| }, |
| { |
| "epoch": 2.2913907284768213, |
| "grad_norm": 0.74609375, |
| "learning_rate": 3.7981515813045858e-06, |
| "loss": 0.9964556097984314, |
| "step": 1384 |
| }, |
| { |
| "epoch": 2.294701986754967, |
| "grad_norm": 0.361328125, |
| "learning_rate": 3.785811049026554e-06, |
| "loss": 1.0913865566253662, |
| "step": 1386 |
| }, |
| { |
| "epoch": 2.2980132450331126, |
| "grad_norm": 0.4296875, |
| "learning_rate": 3.773516024467362e-06, |
| "loss": 1.0170255899429321, |
| "step": 1388 |
| }, |
| { |
| "epoch": 2.301324503311258, |
| "grad_norm": 0.2890625, |
| "learning_rate": 3.76126667150737e-06, |
| "loss": 0.9141421318054199, |
| "step": 1390 |
| }, |
| { |
| "epoch": 2.304635761589404, |
| "grad_norm": 0.357421875, |
| "learning_rate": 3.7490631534181817e-06, |
| "loss": 1.0807045698165894, |
| "step": 1392 |
| }, |
| { |
| "epoch": 2.30794701986755, |
| "grad_norm": 0.95703125, |
| "learning_rate": 3.7369056328604735e-06, |
| "loss": 1.0416104793548584, |
| "step": 1394 |
| }, |
| { |
| "epoch": 2.3112582781456954, |
| "grad_norm": 0.30078125, |
| "learning_rate": 3.7247942718818125e-06, |
| "loss": 0.938892126083374, |
| "step": 1396 |
| }, |
| { |
| "epoch": 2.314569536423841, |
| "grad_norm": 0.447265625, |
| "learning_rate": 3.7127292319145126e-06, |
| "loss": 1.0145360231399536, |
| "step": 1398 |
| }, |
| { |
| "epoch": 2.3178807947019866, |
| "grad_norm": 0.37109375, |
| "learning_rate": 3.7007106737734678e-06, |
| "loss": 1.0101609230041504, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.321192052980132, |
| "grad_norm": 0.96484375, |
| "learning_rate": 3.688738757654023e-06, |
| "loss": 1.0995274782180786, |
| "step": 1402 |
| }, |
| { |
| "epoch": 2.3245033112582782, |
| "grad_norm": 0.4140625, |
| "learning_rate": 3.6768136431298274e-06, |
| "loss": 1.0471631288528442, |
| "step": 1404 |
| }, |
| { |
| "epoch": 2.327814569536424, |
| "grad_norm": 0.625, |
| "learning_rate": 3.664935489150711e-06, |
| "loss": 1.104356050491333, |
| "step": 1406 |
| }, |
| { |
| "epoch": 2.3311258278145695, |
| "grad_norm": 0.38671875, |
| "learning_rate": 3.653104454040569e-06, |
| "loss": 1.0226653814315796, |
| "step": 1408 |
| }, |
| { |
| "epoch": 2.334437086092715, |
| "grad_norm": 0.6875, |
| "learning_rate": 3.6413206954952474e-06, |
| "loss": 1.0596727132797241, |
| "step": 1410 |
| }, |
| { |
| "epoch": 2.337748344370861, |
| "grad_norm": 0.455078125, |
| "learning_rate": 3.629584370580448e-06, |
| "loss": 1.0580487251281738, |
| "step": 1412 |
| }, |
| { |
| "epoch": 2.3410596026490067, |
| "grad_norm": 0.35546875, |
| "learning_rate": 3.6178956357296224e-06, |
| "loss": 1.0152729749679565, |
| "step": 1414 |
| }, |
| { |
| "epoch": 2.3443708609271523, |
| "grad_norm": 0.4453125, |
| "learning_rate": 3.6062546467419e-06, |
| "loss": 0.9716925024986267, |
| "step": 1416 |
| }, |
| { |
| "epoch": 2.347682119205298, |
| "grad_norm": 0.302734375, |
| "learning_rate": 3.5946615587800015e-06, |
| "loss": 1.0557975769042969, |
| "step": 1418 |
| }, |
| { |
| "epoch": 2.3509933774834435, |
| "grad_norm": 0.30078125, |
| "learning_rate": 3.58311652636818e-06, |
| "loss": 0.9758134484291077, |
| "step": 1420 |
| }, |
| { |
| "epoch": 2.3543046357615895, |
| "grad_norm": 0.390625, |
| "learning_rate": 3.5716197033901525e-06, |
| "loss": 1.0848175287246704, |
| "step": 1422 |
| }, |
| { |
| "epoch": 2.357615894039735, |
| "grad_norm": 0.6484375, |
| "learning_rate": 3.56017124308705e-06, |
| "loss": 1.0970638990402222, |
| "step": 1424 |
| }, |
| { |
| "epoch": 2.3609271523178808, |
| "grad_norm": 0.28515625, |
| "learning_rate": 3.5487712980553854e-06, |
| "loss": 1.0622012615203857, |
| "step": 1426 |
| }, |
| { |
| "epoch": 2.3642384105960264, |
| "grad_norm": 0.63671875, |
| "learning_rate": 3.537420020245004e-06, |
| "loss": 1.0812063217163086, |
| "step": 1428 |
| }, |
| { |
| "epoch": 2.3675496688741724, |
| "grad_norm": 0.365234375, |
| "learning_rate": 3.526117560957071e-06, |
| "loss": 0.9727545380592346, |
| "step": 1430 |
| }, |
| { |
| "epoch": 2.370860927152318, |
| "grad_norm": 0.30078125, |
| "learning_rate": 3.5148640708420447e-06, |
| "loss": 1.0257072448730469, |
| "step": 1432 |
| }, |
| { |
| "epoch": 2.3741721854304636, |
| "grad_norm": 0.388671875, |
| "learning_rate": 3.5036596998976788e-06, |
| "loss": 1.0375983715057373, |
| "step": 1434 |
| }, |
| { |
| "epoch": 2.377483443708609, |
| "grad_norm": 0.51953125, |
| "learning_rate": 3.492504597467012e-06, |
| "loss": 1.144998550415039, |
| "step": 1436 |
| }, |
| { |
| "epoch": 2.380794701986755, |
| "grad_norm": 0.44140625, |
| "learning_rate": 3.4813989122363892e-06, |
| "loss": 1.0561128854751587, |
| "step": 1438 |
| }, |
| { |
| "epoch": 2.384105960264901, |
| "grad_norm": 0.30859375, |
| "learning_rate": 3.4703427922334675e-06, |
| "loss": 1.039366364479065, |
| "step": 1440 |
| }, |
| { |
| "epoch": 2.3874172185430464, |
| "grad_norm": 1.0078125, |
| "learning_rate": 3.459336384825255e-06, |
| "loss": 1.0634366273880005, |
| "step": 1442 |
| }, |
| { |
| "epoch": 2.390728476821192, |
| "grad_norm": 0.376953125, |
| "learning_rate": 3.4483798367161363e-06, |
| "loss": 1.0396180152893066, |
| "step": 1444 |
| }, |
| { |
| "epoch": 2.3940397350993377, |
| "grad_norm": 0.3359375, |
| "learning_rate": 3.4374732939459216e-06, |
| "loss": 1.0524084568023682, |
| "step": 1446 |
| }, |
| { |
| "epoch": 2.3973509933774833, |
| "grad_norm": 0.361328125, |
| "learning_rate": 3.4266169018879037e-06, |
| "loss": 1.1443829536437988, |
| "step": 1448 |
| }, |
| { |
| "epoch": 2.4006622516556293, |
| "grad_norm": 0.380859375, |
| "learning_rate": 3.4158108052469115e-06, |
| "loss": 1.0547349452972412, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.403973509933775, |
| "grad_norm": 0.59375, |
| "learning_rate": 3.405055148057391e-06, |
| "loss": 1.09640371799469, |
| "step": 1452 |
| }, |
| { |
| "epoch": 2.4072847682119205, |
| "grad_norm": 0.400390625, |
| "learning_rate": 3.394350073681477e-06, |
| "loss": 1.0381419658660889, |
| "step": 1454 |
| }, |
| { |
| "epoch": 2.410596026490066, |
| "grad_norm": 0.67578125, |
| "learning_rate": 3.383695724807092e-06, |
| "loss": 1.0096808671951294, |
| "step": 1456 |
| }, |
| { |
| "epoch": 2.4139072847682117, |
| "grad_norm": 0.3359375, |
| "learning_rate": 3.3730922434460305e-06, |
| "loss": 0.9652847051620483, |
| "step": 1458 |
| }, |
| { |
| "epoch": 2.4172185430463577, |
| "grad_norm": 0.333984375, |
| "learning_rate": 3.3625397709320806e-06, |
| "loss": 1.0267952680587769, |
| "step": 1460 |
| }, |
| { |
| "epoch": 2.4205298013245033, |
| "grad_norm": 0.392578125, |
| "learning_rate": 3.352038447919129e-06, |
| "loss": 1.1289455890655518, |
| "step": 1462 |
| }, |
| { |
| "epoch": 2.423841059602649, |
| "grad_norm": 0.421875, |
| "learning_rate": 3.3415884143792916e-06, |
| "loss": 1.0683571100234985, |
| "step": 1464 |
| }, |
| { |
| "epoch": 2.4271523178807946, |
| "grad_norm": 0.84375, |
| "learning_rate": 3.331189809601052e-06, |
| "loss": 1.0286537408828735, |
| "step": 1466 |
| }, |
| { |
| "epoch": 2.4304635761589406, |
| "grad_norm": 0.6640625, |
| "learning_rate": 3.320842772187393e-06, |
| "loss": 1.026946783065796, |
| "step": 1468 |
| }, |
| { |
| "epoch": 2.433774834437086, |
| "grad_norm": 0.58203125, |
| "learning_rate": 3.3105474400539603e-06, |
| "loss": 1.0303407907485962, |
| "step": 1470 |
| }, |
| { |
| "epoch": 2.437086092715232, |
| "grad_norm": 0.392578125, |
| "learning_rate": 3.3003039504272156e-06, |
| "loss": 0.9855753779411316, |
| "step": 1472 |
| }, |
| { |
| "epoch": 2.4403973509933774, |
| "grad_norm": 1.4140625, |
| "learning_rate": 3.2901124398426203e-06, |
| "loss": 0.9761983752250671, |
| "step": 1474 |
| }, |
| { |
| "epoch": 2.443708609271523, |
| "grad_norm": 0.34765625, |
| "learning_rate": 3.2799730441427975e-06, |
| "loss": 1.0473487377166748, |
| "step": 1476 |
| }, |
| { |
| "epoch": 2.447019867549669, |
| "grad_norm": 0.279296875, |
| "learning_rate": 3.2698858984757387e-06, |
| "loss": 1.0138802528381348, |
| "step": 1478 |
| }, |
| { |
| "epoch": 2.4503311258278146, |
| "grad_norm": 2.4375, |
| "learning_rate": 3.2598511372929895e-06, |
| "loss": 1.146519660949707, |
| "step": 1480 |
| }, |
| { |
| "epoch": 2.4536423841059603, |
| "grad_norm": 1.4921875, |
| "learning_rate": 3.249868894347866e-06, |
| "loss": 1.0525798797607422, |
| "step": 1482 |
| }, |
| { |
| "epoch": 2.456953642384106, |
| "grad_norm": 0.310546875, |
| "learning_rate": 3.239939302693667e-06, |
| "loss": 1.0201683044433594, |
| "step": 1484 |
| }, |
| { |
| "epoch": 2.460264900662252, |
| "grad_norm": 0.39453125, |
| "learning_rate": 3.230062494681905e-06, |
| "loss": 1.0771936178207397, |
| "step": 1486 |
| }, |
| { |
| "epoch": 2.4635761589403975, |
| "grad_norm": 0.62890625, |
| "learning_rate": 3.2202386019605365e-06, |
| "loss": 1.1066436767578125, |
| "step": 1488 |
| }, |
| { |
| "epoch": 2.466887417218543, |
| "grad_norm": 0.35546875, |
| "learning_rate": 3.21046775547221e-06, |
| "loss": 0.9931922554969788, |
| "step": 1490 |
| }, |
| { |
| "epoch": 2.4701986754966887, |
| "grad_norm": 0.5546875, |
| "learning_rate": 3.2007500854525235e-06, |
| "loss": 1.0755813121795654, |
| "step": 1492 |
| }, |
| { |
| "epoch": 2.4735099337748343, |
| "grad_norm": 0.44921875, |
| "learning_rate": 3.1910857214282827e-06, |
| "loss": 0.9923776984214783, |
| "step": 1494 |
| }, |
| { |
| "epoch": 2.47682119205298, |
| "grad_norm": 2.484375, |
| "learning_rate": 3.181474792215783e-06, |
| "loss": 1.1028560400009155, |
| "step": 1496 |
| }, |
| { |
| "epoch": 2.480132450331126, |
| "grad_norm": 0.46484375, |
| "learning_rate": 3.1719174259190804e-06, |
| "loss": 1.0993995666503906, |
| "step": 1498 |
| }, |
| { |
| "epoch": 2.4834437086092715, |
| "grad_norm": 0.337890625, |
| "learning_rate": 3.1624137499282962e-06, |
| "loss": 1.0792993307113647, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.486754966887417, |
| "grad_norm": 0.4765625, |
| "learning_rate": 3.1529638909179116e-06, |
| "loss": 1.0069727897644043, |
| "step": 1502 |
| }, |
| { |
| "epoch": 2.4900662251655628, |
| "grad_norm": 0.3203125, |
| "learning_rate": 3.1435679748450805e-06, |
| "loss": 1.0655741691589355, |
| "step": 1504 |
| }, |
| { |
| "epoch": 2.493377483443709, |
| "grad_norm": 0.40234375, |
| "learning_rate": 3.1342261269479545e-06, |
| "loss": 1.008230209350586, |
| "step": 1506 |
| }, |
| { |
| "epoch": 2.4966887417218544, |
| "grad_norm": 0.61328125, |
| "learning_rate": 3.124938471744006e-06, |
| "loss": 1.0085946321487427, |
| "step": 1508 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.376953125, |
| "learning_rate": 3.1157051330283802e-06, |
| "loss": 1.1044775247573853, |
| "step": 1510 |
| }, |
| { |
| "epoch": 2.5033112582781456, |
| "grad_norm": 0.703125, |
| "learning_rate": 3.1065262338722284e-06, |
| "loss": 1.0872358083724976, |
| "step": 1512 |
| }, |
| { |
| "epoch": 2.506622516556291, |
| "grad_norm": 0.53515625, |
| "learning_rate": 3.0974018966210855e-06, |
| "loss": 1.109554409980774, |
| "step": 1514 |
| }, |
| { |
| "epoch": 2.5099337748344372, |
| "grad_norm": 1.8984375, |
| "learning_rate": 3.088332242893225e-06, |
| "loss": 1.1079566478729248, |
| "step": 1516 |
| }, |
| { |
| "epoch": 2.513245033112583, |
| "grad_norm": 0.70703125, |
| "learning_rate": 3.0793173935780485e-06, |
| "loss": 1.1112803220748901, |
| "step": 1518 |
| }, |
| { |
| "epoch": 2.5165562913907285, |
| "grad_norm": 0.357421875, |
| "learning_rate": 3.070357468834467e-06, |
| "loss": 1.1184546947479248, |
| "step": 1520 |
| }, |
| { |
| "epoch": 2.519867549668874, |
| "grad_norm": 0.59375, |
| "learning_rate": 3.061452588089301e-06, |
| "loss": 1.0849051475524902, |
| "step": 1522 |
| }, |
| { |
| "epoch": 2.52317880794702, |
| "grad_norm": 0.400390625, |
| "learning_rate": 3.0526028700356934e-06, |
| "loss": 1.0355197191238403, |
| "step": 1524 |
| }, |
| { |
| "epoch": 2.5264900662251657, |
| "grad_norm": 1.3515625, |
| "learning_rate": 3.0438084326315195e-06, |
| "loss": 1.0885202884674072, |
| "step": 1526 |
| }, |
| { |
| "epoch": 2.5298013245033113, |
| "grad_norm": 0.96875, |
| "learning_rate": 3.0350693930978228e-06, |
| "loss": 1.0239211320877075, |
| "step": 1528 |
| }, |
| { |
| "epoch": 2.533112582781457, |
| "grad_norm": 0.31640625, |
| "learning_rate": 3.0263858679172452e-06, |
| "loss": 0.9950514435768127, |
| "step": 1530 |
| }, |
| { |
| "epoch": 2.5364238410596025, |
| "grad_norm": 0.7421875, |
| "learning_rate": 3.0177579728324794e-06, |
| "loss": 1.006035327911377, |
| "step": 1532 |
| }, |
| { |
| "epoch": 2.539735099337748, |
| "grad_norm": 0.64453125, |
| "learning_rate": 3.0091858228447235e-06, |
| "loss": 1.0517382621765137, |
| "step": 1534 |
| }, |
| { |
| "epoch": 2.543046357615894, |
| "grad_norm": 0.427734375, |
| "learning_rate": 3.0006695322121533e-06, |
| "loss": 1.0941227674484253, |
| "step": 1536 |
| }, |
| { |
| "epoch": 2.5463576158940397, |
| "grad_norm": 1.140625, |
| "learning_rate": 2.9922092144483897e-06, |
| "loss": 1.1483227014541626, |
| "step": 1538 |
| }, |
| { |
| "epoch": 2.5496688741721854, |
| "grad_norm": 1.3828125, |
| "learning_rate": 2.9838049823209965e-06, |
| "loss": 1.0933794975280762, |
| "step": 1540 |
| }, |
| { |
| "epoch": 2.5529801324503314, |
| "grad_norm": 0.55859375, |
| "learning_rate": 2.9754569478499683e-06, |
| "loss": 1.0416162014007568, |
| "step": 1542 |
| }, |
| { |
| "epoch": 2.556291390728477, |
| "grad_norm": 0.58984375, |
| "learning_rate": 2.9671652223062422e-06, |
| "loss": 1.0642694234848022, |
| "step": 1544 |
| }, |
| { |
| "epoch": 2.5596026490066226, |
| "grad_norm": 0.40234375, |
| "learning_rate": 2.958929916210215e-06, |
| "loss": 1.0878373384475708, |
| "step": 1546 |
| }, |
| { |
| "epoch": 2.562913907284768, |
| "grad_norm": 0.515625, |
| "learning_rate": 2.9507511393302675e-06, |
| "loss": 1.1222093105316162, |
| "step": 1548 |
| }, |
| { |
| "epoch": 2.566225165562914, |
| "grad_norm": 2.359375, |
| "learning_rate": 2.9426290006813043e-06, |
| "loss": 1.0746393203735352, |
| "step": 1550 |
| }, |
| { |
| "epoch": 2.5695364238410594, |
| "grad_norm": 0.43359375, |
| "learning_rate": 2.9345636085232965e-06, |
| "loss": 1.1305041313171387, |
| "step": 1552 |
| }, |
| { |
| "epoch": 2.5728476821192054, |
| "grad_norm": 0.318359375, |
| "learning_rate": 2.9265550703598437e-06, |
| "loss": 1.0597765445709229, |
| "step": 1554 |
| }, |
| { |
| "epoch": 2.576158940397351, |
| "grad_norm": 0.404296875, |
| "learning_rate": 2.9186034929367357e-06, |
| "loss": 1.111279010772705, |
| "step": 1556 |
| }, |
| { |
| "epoch": 2.5794701986754967, |
| "grad_norm": 1.09375, |
| "learning_rate": 2.910708982240538e-06, |
| "loss": 1.0620553493499756, |
| "step": 1558 |
| }, |
| { |
| "epoch": 2.5827814569536423, |
| "grad_norm": 0.39453125, |
| "learning_rate": 2.9028716434971685e-06, |
| "loss": 0.9780864715576172, |
| "step": 1560 |
| }, |
| { |
| "epoch": 2.5860927152317883, |
| "grad_norm": 0.443359375, |
| "learning_rate": 2.895091581170501e-06, |
| "loss": 1.040873408317566, |
| "step": 1562 |
| }, |
| { |
| "epoch": 2.589403973509934, |
| "grad_norm": 0.484375, |
| "learning_rate": 2.887368898960976e-06, |
| "loss": 1.1218167543411255, |
| "step": 1564 |
| }, |
| { |
| "epoch": 2.5927152317880795, |
| "grad_norm": 0.30078125, |
| "learning_rate": 2.87970369980421e-06, |
| "loss": 1.0425817966461182, |
| "step": 1566 |
| }, |
| { |
| "epoch": 2.596026490066225, |
| "grad_norm": 1.03125, |
| "learning_rate": 2.872096085869631e-06, |
| "loss": 1.157275915145874, |
| "step": 1568 |
| }, |
| { |
| "epoch": 2.5993377483443707, |
| "grad_norm": 0.318359375, |
| "learning_rate": 2.8645461585591106e-06, |
| "loss": 1.0908249616622925, |
| "step": 1570 |
| }, |
| { |
| "epoch": 2.6026490066225163, |
| "grad_norm": 0.328125, |
| "learning_rate": 2.8570540185056215e-06, |
| "loss": 1.0348210334777832, |
| "step": 1572 |
| }, |
| { |
| "epoch": 2.6059602649006623, |
| "grad_norm": 0.375, |
| "learning_rate": 2.84961976557188e-06, |
| "loss": 1.02035391330719, |
| "step": 1574 |
| }, |
| { |
| "epoch": 2.609271523178808, |
| "grad_norm": 0.353515625, |
| "learning_rate": 2.8422434988490363e-06, |
| "loss": 1.078000545501709, |
| "step": 1576 |
| }, |
| { |
| "epoch": 2.6125827814569536, |
| "grad_norm": 0.44140625, |
| "learning_rate": 2.8349253166553342e-06, |
| "loss": 1.1093229055404663, |
| "step": 1578 |
| }, |
| { |
| "epoch": 2.6158940397350996, |
| "grad_norm": 0.400390625, |
| "learning_rate": 2.8276653165348124e-06, |
| "loss": 1.1029249429702759, |
| "step": 1580 |
| }, |
| { |
| "epoch": 2.619205298013245, |
| "grad_norm": 0.578125, |
| "learning_rate": 2.820463595256001e-06, |
| "loss": 1.0593039989471436, |
| "step": 1582 |
| }, |
| { |
| "epoch": 2.622516556291391, |
| "grad_norm": 0.416015625, |
| "learning_rate": 2.813320248810631e-06, |
| "loss": 1.1073625087738037, |
| "step": 1584 |
| }, |
| { |
| "epoch": 2.6258278145695364, |
| "grad_norm": 0.8671875, |
| "learning_rate": 2.8062353724123554e-06, |
| "loss": 0.997706413269043, |
| "step": 1586 |
| }, |
| { |
| "epoch": 2.629139072847682, |
| "grad_norm": 0.322265625, |
| "learning_rate": 2.799209060495479e-06, |
| "loss": 1.0507164001464844, |
| "step": 1588 |
| }, |
| { |
| "epoch": 2.6324503311258276, |
| "grad_norm": 0.4296875, |
| "learning_rate": 2.792241406713703e-06, |
| "loss": 1.0913563966751099, |
| "step": 1590 |
| }, |
| { |
| "epoch": 2.6357615894039736, |
| "grad_norm": 0.279296875, |
| "learning_rate": 2.785332503938872e-06, |
| "loss": 1.0073961019515991, |
| "step": 1592 |
| }, |
| { |
| "epoch": 2.6390728476821192, |
| "grad_norm": 0.60546875, |
| "learning_rate": 2.7784824442597397e-06, |
| "loss": 0.9370295405387878, |
| "step": 1594 |
| }, |
| { |
| "epoch": 2.642384105960265, |
| "grad_norm": 0.63671875, |
| "learning_rate": 2.77169131898074e-06, |
| "loss": 1.0220690965652466, |
| "step": 1596 |
| }, |
| { |
| "epoch": 2.6456953642384105, |
| "grad_norm": 0.42578125, |
| "learning_rate": 2.7649592186207713e-06, |
| "loss": 1.018792748451233, |
| "step": 1598 |
| }, |
| { |
| "epoch": 2.6490066225165565, |
| "grad_norm": 0.47265625, |
| "learning_rate": 2.7582862329119876e-06, |
| "loss": 0.9850583076477051, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.652317880794702, |
| "grad_norm": 0.62109375, |
| "learning_rate": 2.751672450798603e-06, |
| "loss": 1.1173804998397827, |
| "step": 1602 |
| }, |
| { |
| "epoch": 2.6556291390728477, |
| "grad_norm": 0.46875, |
| "learning_rate": 2.745117960435711e-06, |
| "loss": 0.9903439283370972, |
| "step": 1604 |
| }, |
| { |
| "epoch": 2.6589403973509933, |
| "grad_norm": 0.396484375, |
| "learning_rate": 2.7386228491880994e-06, |
| "loss": 1.0194555521011353, |
| "step": 1606 |
| }, |
| { |
| "epoch": 2.662251655629139, |
| "grad_norm": 0.416015625, |
| "learning_rate": 2.7321872036290987e-06, |
| "loss": 1.010141134262085, |
| "step": 1608 |
| }, |
| { |
| "epoch": 2.6655629139072845, |
| "grad_norm": 0.31640625, |
| "learning_rate": 2.725811109539415e-06, |
| "loss": 1.0798771381378174, |
| "step": 1610 |
| }, |
| { |
| "epoch": 2.6688741721854305, |
| "grad_norm": 0.53125, |
| "learning_rate": 2.719494651905997e-06, |
| "loss": 0.9975613355636597, |
| "step": 1612 |
| }, |
| { |
| "epoch": 2.672185430463576, |
| "grad_norm": 0.37109375, |
| "learning_rate": 2.7132379149208987e-06, |
| "loss": 1.0866070985794067, |
| "step": 1614 |
| }, |
| { |
| "epoch": 2.6754966887417218, |
| "grad_norm": 0.421875, |
| "learning_rate": 2.707040981980156e-06, |
| "loss": 1.053512692451477, |
| "step": 1616 |
| }, |
| { |
| "epoch": 2.678807947019868, |
| "grad_norm": 0.3515625, |
| "learning_rate": 2.700903935682677e-06, |
| "loss": 1.0476690530776978, |
| "step": 1618 |
| }, |
| { |
| "epoch": 2.6821192052980134, |
| "grad_norm": 0.78515625, |
| "learning_rate": 2.6948268578291427e-06, |
| "loss": 1.074185848236084, |
| "step": 1620 |
| }, |
| { |
| "epoch": 2.685430463576159, |
| "grad_norm": 3.90625, |
| "learning_rate": 2.688809829420914e-06, |
| "loss": 1.0318957567214966, |
| "step": 1622 |
| }, |
| { |
| "epoch": 2.6887417218543046, |
| "grad_norm": 0.41015625, |
| "learning_rate": 2.682852930658951e-06, |
| "loss": 1.0365896224975586, |
| "step": 1624 |
| }, |
| { |
| "epoch": 2.69205298013245, |
| "grad_norm": 0.3671875, |
| "learning_rate": 2.676956240942749e-06, |
| "loss": 0.9852394461631775, |
| "step": 1626 |
| }, |
| { |
| "epoch": 2.695364238410596, |
| "grad_norm": 0.7265625, |
| "learning_rate": 2.6711198388692742e-06, |
| "loss": 1.0080279111862183, |
| "step": 1628 |
| }, |
| { |
| "epoch": 2.698675496688742, |
| "grad_norm": 0.35546875, |
| "learning_rate": 2.6653438022319214e-06, |
| "loss": 1.0633718967437744, |
| "step": 1630 |
| }, |
| { |
| "epoch": 2.7019867549668874, |
| "grad_norm": 0.625, |
| "learning_rate": 2.659628208019472e-06, |
| "loss": 1.099438190460205, |
| "step": 1632 |
| }, |
| { |
| "epoch": 2.705298013245033, |
| "grad_norm": 0.71875, |
| "learning_rate": 2.6539731324150723e-06, |
| "loss": 0.9704924821853638, |
| "step": 1634 |
| }, |
| { |
| "epoch": 2.7086092715231787, |
| "grad_norm": 0.310546875, |
| "learning_rate": 2.6483786507952167e-06, |
| "loss": 0.9796273708343506, |
| "step": 1636 |
| }, |
| { |
| "epoch": 2.7119205298013247, |
| "grad_norm": 0.431640625, |
| "learning_rate": 2.642844837728739e-06, |
| "loss": 1.0218433141708374, |
| "step": 1638 |
| }, |
| { |
| "epoch": 2.7152317880794703, |
| "grad_norm": 0.5234375, |
| "learning_rate": 2.637371766975826e-06, |
| "loss": 0.9966109395027161, |
| "step": 1640 |
| }, |
| { |
| "epoch": 2.718543046357616, |
| "grad_norm": 0.466796875, |
| "learning_rate": 2.6319595114870276e-06, |
| "loss": 1.0867879390716553, |
| "step": 1642 |
| }, |
| { |
| "epoch": 2.7218543046357615, |
| "grad_norm": 0.40234375, |
| "learning_rate": 2.62660814340229e-06, |
| "loss": 1.021042823791504, |
| "step": 1644 |
| }, |
| { |
| "epoch": 2.725165562913907, |
| "grad_norm": 0.296875, |
| "learning_rate": 2.621317734049987e-06, |
| "loss": 1.0033985376358032, |
| "step": 1646 |
| }, |
| { |
| "epoch": 2.7284768211920527, |
| "grad_norm": 0.490234375, |
| "learning_rate": 2.6160883539459785e-06, |
| "loss": 1.008457064628601, |
| "step": 1648 |
| }, |
| { |
| "epoch": 2.7317880794701987, |
| "grad_norm": 0.5078125, |
| "learning_rate": 2.610920072792662e-06, |
| "loss": 1.0715134143829346, |
| "step": 1650 |
| }, |
| { |
| "epoch": 2.7350993377483444, |
| "grad_norm": 0.390625, |
| "learning_rate": 2.6058129594780482e-06, |
| "loss": 1.0398625135421753, |
| "step": 1652 |
| }, |
| { |
| "epoch": 2.73841059602649, |
| "grad_norm": 0.90625, |
| "learning_rate": 2.600767082074842e-06, |
| "loss": 1.050590991973877, |
| "step": 1654 |
| }, |
| { |
| "epoch": 2.741721854304636, |
| "grad_norm": 0.39453125, |
| "learning_rate": 2.5957825078395354e-06, |
| "loss": 1.068892478942871, |
| "step": 1656 |
| }, |
| { |
| "epoch": 2.7450331125827816, |
| "grad_norm": 0.3984375, |
| "learning_rate": 2.590859303211511e-06, |
| "loss": 1.0792319774627686, |
| "step": 1658 |
| }, |
| { |
| "epoch": 2.748344370860927, |
| "grad_norm": 0.41015625, |
| "learning_rate": 2.5859975338121514e-06, |
| "loss": 1.0801936388015747, |
| "step": 1660 |
| }, |
| { |
| "epoch": 2.751655629139073, |
| "grad_norm": 0.73046875, |
| "learning_rate": 2.581197264443977e-06, |
| "loss": 1.0703206062316895, |
| "step": 1662 |
| }, |
| { |
| "epoch": 2.7549668874172184, |
| "grad_norm": 1.15625, |
| "learning_rate": 2.5764585590897676e-06, |
| "loss": 1.1012338399887085, |
| "step": 1664 |
| }, |
| { |
| "epoch": 2.758278145695364, |
| "grad_norm": 0.439453125, |
| "learning_rate": 2.5717814809117207e-06, |
| "loss": 1.0293129682540894, |
| "step": 1666 |
| }, |
| { |
| "epoch": 2.76158940397351, |
| "grad_norm": 0.47265625, |
| "learning_rate": 2.567166092250602e-06, |
| "loss": 1.0774444341659546, |
| "step": 1668 |
| }, |
| { |
| "epoch": 2.7649006622516556, |
| "grad_norm": 0.36328125, |
| "learning_rate": 2.5626124546249205e-06, |
| "loss": 1.062659740447998, |
| "step": 1670 |
| }, |
| { |
| "epoch": 2.7682119205298013, |
| "grad_norm": 0.7109375, |
| "learning_rate": 2.558120628730104e-06, |
| "loss": 0.9909599423408508, |
| "step": 1672 |
| }, |
| { |
| "epoch": 2.7715231788079473, |
| "grad_norm": 0.76953125, |
| "learning_rate": 2.553690674437692e-06, |
| "loss": 1.0109676122665405, |
| "step": 1674 |
| }, |
| { |
| "epoch": 2.774834437086093, |
| "grad_norm": 1.3671875, |
| "learning_rate": 2.5493226507945386e-06, |
| "loss": 1.0199503898620605, |
| "step": 1676 |
| }, |
| { |
| "epoch": 2.7781456953642385, |
| "grad_norm": 0.34765625, |
| "learning_rate": 2.545016616022023e-06, |
| "loss": 1.0352284908294678, |
| "step": 1678 |
| }, |
| { |
| "epoch": 2.781456953642384, |
| "grad_norm": 0.396484375, |
| "learning_rate": 2.5407726275152766e-06, |
| "loss": 1.067415714263916, |
| "step": 1680 |
| }, |
| { |
| "epoch": 2.7847682119205297, |
| "grad_norm": 0.5703125, |
| "learning_rate": 2.5365907418424134e-06, |
| "loss": 1.017134428024292, |
| "step": 1682 |
| }, |
| { |
| "epoch": 2.7880794701986753, |
| "grad_norm": 0.578125, |
| "learning_rate": 2.5324710147437826e-06, |
| "loss": 1.0925308465957642, |
| "step": 1684 |
| }, |
| { |
| "epoch": 2.7913907284768213, |
| "grad_norm": 0.51953125, |
| "learning_rate": 2.5284135011312176e-06, |
| "loss": 1.0486555099487305, |
| "step": 1686 |
| }, |
| { |
| "epoch": 2.794701986754967, |
| "grad_norm": 0.38671875, |
| "learning_rate": 2.5244182550873128e-06, |
| "loss": 1.0359236001968384, |
| "step": 1688 |
| }, |
| { |
| "epoch": 2.7980132450331126, |
| "grad_norm": 0.330078125, |
| "learning_rate": 2.5204853298646943e-06, |
| "loss": 1.095540165901184, |
| "step": 1690 |
| }, |
| { |
| "epoch": 2.801324503311258, |
| "grad_norm": 0.4375, |
| "learning_rate": 2.5166147778853166e-06, |
| "loss": 1.0864918231964111, |
| "step": 1692 |
| }, |
| { |
| "epoch": 2.804635761589404, |
| "grad_norm": 0.515625, |
| "learning_rate": 2.512806650739761e-06, |
| "loss": 1.026558518409729, |
| "step": 1694 |
| }, |
| { |
| "epoch": 2.80794701986755, |
| "grad_norm": 0.4140625, |
| "learning_rate": 2.5090609991865466e-06, |
| "loss": 1.148924708366394, |
| "step": 1696 |
| }, |
| { |
| "epoch": 2.8112582781456954, |
| "grad_norm": 0.4140625, |
| "learning_rate": 2.5053778731514587e-06, |
| "loss": 0.9244303703308105, |
| "step": 1698 |
| }, |
| { |
| "epoch": 2.814569536423841, |
| "grad_norm": 1.078125, |
| "learning_rate": 2.5017573217268762e-06, |
| "loss": 1.0666385889053345, |
| "step": 1700 |
| }, |
| { |
| "epoch": 2.8178807947019866, |
| "grad_norm": 0.412109375, |
| "learning_rate": 2.4981993931711243e-06, |
| "loss": 1.0128355026245117, |
| "step": 1702 |
| }, |
| { |
| "epoch": 2.821192052980132, |
| "grad_norm": 0.44140625, |
| "learning_rate": 2.494704134907825e-06, |
| "loss": 1.0081136226654053, |
| "step": 1704 |
| }, |
| { |
| "epoch": 2.8245033112582782, |
| "grad_norm": 0.490234375, |
| "learning_rate": 2.491271593525272e-06, |
| "loss": 1.0477370023727417, |
| "step": 1706 |
| }, |
| { |
| "epoch": 2.827814569536424, |
| "grad_norm": 0.9453125, |
| "learning_rate": 2.487901814775802e-06, |
| "loss": 1.1048303842544556, |
| "step": 1708 |
| }, |
| { |
| "epoch": 2.8311258278145695, |
| "grad_norm": 0.546875, |
| "learning_rate": 2.484594843575192e-06, |
| "loss": 1.0124863386154175, |
| "step": 1710 |
| }, |
| { |
| "epoch": 2.8344370860927155, |
| "grad_norm": 0.474609375, |
| "learning_rate": 2.4813507240020545e-06, |
| "loss": 0.9982864260673523, |
| "step": 1712 |
| }, |
| { |
| "epoch": 2.837748344370861, |
| "grad_norm": 0.671875, |
| "learning_rate": 2.478169499297254e-06, |
| "loss": 1.0384469032287598, |
| "step": 1714 |
| }, |
| { |
| "epoch": 2.8410596026490067, |
| "grad_norm": 0.66796875, |
| "learning_rate": 2.475051211863329e-06, |
| "loss": 1.0405954122543335, |
| "step": 1716 |
| }, |
| { |
| "epoch": 2.8443708609271523, |
| "grad_norm": 0.369140625, |
| "learning_rate": 2.471995903263928e-06, |
| "loss": 1.0376397371292114, |
| "step": 1718 |
| }, |
| { |
| "epoch": 2.847682119205298, |
| "grad_norm": 0.6640625, |
| "learning_rate": 2.4690036142232548e-06, |
| "loss": 1.0328714847564697, |
| "step": 1720 |
| }, |
| { |
| "epoch": 2.8509933774834435, |
| "grad_norm": 0.32421875, |
| "learning_rate": 2.4660743846255235e-06, |
| "loss": 1.0731256008148193, |
| "step": 1722 |
| }, |
| { |
| "epoch": 2.8543046357615895, |
| "grad_norm": 0.330078125, |
| "learning_rate": 2.463208253514432e-06, |
| "loss": 0.9934253692626953, |
| "step": 1724 |
| }, |
| { |
| "epoch": 2.857615894039735, |
| "grad_norm": 0.2734375, |
| "learning_rate": 2.460405259092636e-06, |
| "loss": 1.0437015295028687, |
| "step": 1726 |
| }, |
| { |
| "epoch": 2.8609271523178808, |
| "grad_norm": 0.5, |
| "learning_rate": 2.4576654387212443e-06, |
| "loss": 1.0205596685409546, |
| "step": 1728 |
| }, |
| { |
| "epoch": 2.8642384105960264, |
| "grad_norm": 0.392578125, |
| "learning_rate": 2.4549888289193174e-06, |
| "loss": 1.0206191539764404, |
| "step": 1730 |
| }, |
| { |
| "epoch": 2.8675496688741724, |
| "grad_norm": 0.306640625, |
| "learning_rate": 2.4523754653633833e-06, |
| "loss": 1.005540132522583, |
| "step": 1732 |
| }, |
| { |
| "epoch": 2.870860927152318, |
| "grad_norm": 0.56640625, |
| "learning_rate": 2.4498253828869606e-06, |
| "loss": 1.1127885580062866, |
| "step": 1734 |
| }, |
| { |
| "epoch": 2.8741721854304636, |
| "grad_norm": 0.53515625, |
| "learning_rate": 2.447338615480094e-06, |
| "loss": 1.0119414329528809, |
| "step": 1736 |
| }, |
| { |
| "epoch": 2.877483443708609, |
| "grad_norm": 1.203125, |
| "learning_rate": 2.4449151962889023e-06, |
| "loss": 0.9939915537834167, |
| "step": 1738 |
| }, |
| { |
| "epoch": 2.880794701986755, |
| "grad_norm": 0.349609375, |
| "learning_rate": 2.442555157615136e-06, |
| "loss": 1.1005805730819702, |
| "step": 1740 |
| }, |
| { |
| "epoch": 2.8841059602649004, |
| "grad_norm": 0.396484375, |
| "learning_rate": 2.440258530915747e-06, |
| "loss": 1.0766074657440186, |
| "step": 1742 |
| }, |
| { |
| "epoch": 2.8874172185430464, |
| "grad_norm": 0.34375, |
| "learning_rate": 2.438025346802467e-06, |
| "loss": 1.0019570589065552, |
| "step": 1744 |
| }, |
| { |
| "epoch": 2.890728476821192, |
| "grad_norm": 0.3671875, |
| "learning_rate": 2.4358556350414055e-06, |
| "loss": 1.0643962621688843, |
| "step": 1746 |
| }, |
| { |
| "epoch": 2.8940397350993377, |
| "grad_norm": 0.392578125, |
| "learning_rate": 2.433749424552646e-06, |
| "loss": 0.9804382920265198, |
| "step": 1748 |
| }, |
| { |
| "epoch": 2.8973509933774837, |
| "grad_norm": 0.76953125, |
| "learning_rate": 2.4317067434098644e-06, |
| "loss": 1.0500622987747192, |
| "step": 1750 |
| }, |
| { |
| "epoch": 2.9006622516556293, |
| "grad_norm": 0.388671875, |
| "learning_rate": 2.4297276188399545e-06, |
| "loss": 1.082712173461914, |
| "step": 1752 |
| }, |
| { |
| "epoch": 2.903973509933775, |
| "grad_norm": 0.392578125, |
| "learning_rate": 2.4278120772226646e-06, |
| "loss": 0.9876247644424438, |
| "step": 1754 |
| }, |
| { |
| "epoch": 2.9072847682119205, |
| "grad_norm": 0.35546875, |
| "learning_rate": 2.4259601440902454e-06, |
| "loss": 1.0972561836242676, |
| "step": 1756 |
| }, |
| { |
| "epoch": 2.910596026490066, |
| "grad_norm": 0.341796875, |
| "learning_rate": 2.4241718441271104e-06, |
| "loss": 0.9264360666275024, |
| "step": 1758 |
| }, |
| { |
| "epoch": 2.9139072847682117, |
| "grad_norm": 0.35546875, |
| "learning_rate": 2.4224472011695073e-06, |
| "loss": 1.1301442384719849, |
| "step": 1760 |
| }, |
| { |
| "epoch": 2.9172185430463577, |
| "grad_norm": 0.333984375, |
| "learning_rate": 2.420786238205199e-06, |
| "loss": 0.9778857231140137, |
| "step": 1762 |
| }, |
| { |
| "epoch": 2.9205298013245033, |
| "grad_norm": 0.57421875, |
| "learning_rate": 2.419188977373158e-06, |
| "loss": 1.0964045524597168, |
| "step": 1764 |
| }, |
| { |
| "epoch": 2.923841059602649, |
| "grad_norm": 0.380859375, |
| "learning_rate": 2.4176554399632713e-06, |
| "loss": 1.0145188570022583, |
| "step": 1766 |
| }, |
| { |
| "epoch": 2.9271523178807946, |
| "grad_norm": 0.53125, |
| "learning_rate": 2.4161856464160563e-06, |
| "loss": 1.0378607511520386, |
| "step": 1768 |
| }, |
| { |
| "epoch": 2.9304635761589406, |
| "grad_norm": 0.5, |
| "learning_rate": 2.4147796163223895e-06, |
| "loss": 0.9963395595550537, |
| "step": 1770 |
| }, |
| { |
| "epoch": 2.933774834437086, |
| "grad_norm": 0.5703125, |
| "learning_rate": 2.413437368423242e-06, |
| "loss": 1.0542939901351929, |
| "step": 1772 |
| }, |
| { |
| "epoch": 2.937086092715232, |
| "grad_norm": 0.361328125, |
| "learning_rate": 2.4121589206094356e-06, |
| "loss": 1.083484411239624, |
| "step": 1774 |
| }, |
| { |
| "epoch": 2.9403973509933774, |
| "grad_norm": 0.361328125, |
| "learning_rate": 2.4109442899213987e-06, |
| "loss": 0.9940117001533508, |
| "step": 1776 |
| }, |
| { |
| "epoch": 2.943708609271523, |
| "grad_norm": 0.396484375, |
| "learning_rate": 2.4097934925489416e-06, |
| "loss": 1.0698211193084717, |
| "step": 1778 |
| }, |
| { |
| "epoch": 2.9470198675496686, |
| "grad_norm": 0.28125, |
| "learning_rate": 2.40870654383104e-06, |
| "loss": 1.026236653327942, |
| "step": 1780 |
| }, |
| { |
| "epoch": 2.9503311258278146, |
| "grad_norm": 0.296875, |
| "learning_rate": 2.4076834582556318e-06, |
| "loss": 1.0298240184783936, |
| "step": 1782 |
| }, |
| { |
| "epoch": 2.9536423841059603, |
| "grad_norm": 0.45703125, |
| "learning_rate": 2.4067242494594234e-06, |
| "loss": 0.9907808899879456, |
| "step": 1784 |
| }, |
| { |
| "epoch": 2.956953642384106, |
| "grad_norm": 0.328125, |
| "learning_rate": 2.4058289302277066e-06, |
| "loss": 0.9502798914909363, |
| "step": 1786 |
| }, |
| { |
| "epoch": 2.960264900662252, |
| "grad_norm": 0.43359375, |
| "learning_rate": 2.4049975124941905e-06, |
| "loss": 1.0373636484146118, |
| "step": 1788 |
| }, |
| { |
| "epoch": 2.9635761589403975, |
| "grad_norm": 0.796875, |
| "learning_rate": 2.40423000734084e-06, |
| "loss": 1.010378360748291, |
| "step": 1790 |
| }, |
| { |
| "epoch": 2.966887417218543, |
| "grad_norm": 0.3359375, |
| "learning_rate": 2.403526424997731e-06, |
| "loss": 1.0332858562469482, |
| "step": 1792 |
| }, |
| { |
| "epoch": 2.9701986754966887, |
| "grad_norm": 0.75, |
| "learning_rate": 2.4028867748429104e-06, |
| "loss": 1.0457154512405396, |
| "step": 1794 |
| }, |
| { |
| "epoch": 2.9735099337748343, |
| "grad_norm": 0.478515625, |
| "learning_rate": 2.4023110654022747e-06, |
| "loss": 1.130010724067688, |
| "step": 1796 |
| }, |
| { |
| "epoch": 2.97682119205298, |
| "grad_norm": 0.390625, |
| "learning_rate": 2.401799304349454e-06, |
| "loss": 1.1422545909881592, |
| "step": 1798 |
| }, |
| { |
| "epoch": 2.980132450331126, |
| "grad_norm": 0.470703125, |
| "learning_rate": 2.40135149850571e-06, |
| "loss": 0.9642534255981445, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.9834437086092715, |
| "grad_norm": 0.478515625, |
| "learning_rate": 2.4009676538398466e-06, |
| "loss": 0.9972424507141113, |
| "step": 1802 |
| }, |
| { |
| "epoch": 2.986754966887417, |
| "grad_norm": 0.83984375, |
| "learning_rate": 2.400647775468129e-06, |
| "loss": 1.1147444248199463, |
| "step": 1804 |
| }, |
| { |
| "epoch": 2.9900662251655628, |
| "grad_norm": 0.388671875, |
| "learning_rate": 2.4003918676542145e-06, |
| "loss": 0.9947775602340698, |
| "step": 1806 |
| }, |
| { |
| "epoch": 2.993377483443709, |
| "grad_norm": 0.3828125, |
| "learning_rate": 2.4001999338090986e-06, |
| "loss": 1.022848129272461, |
| "step": 1808 |
| }, |
| { |
| "epoch": 2.9966887417218544, |
| "grad_norm": 0.375, |
| "learning_rate": 2.400071976491067e-06, |
| "loss": 1.0713096857070923, |
| "step": 1810 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.4140625, |
| "learning_rate": 2.4000079974056627e-06, |
| "loss": 1.0778021812438965, |
| "step": 1812 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 1812, |
| "total_flos": 4.871467377855824e+18, |
| "train_loss": 1.1715838705861805, |
| "train_runtime": 22979.7614, |
| "train_samples_per_second": 2.523, |
| "train_steps_per_second": 0.079 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 1812, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.871467377855824e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|