| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 3250, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0012312427856868025, |
| "grad_norm": 1.3125, |
| "learning_rate": 1.0204081632653061e-07, |
| "loss": 1.426961898803711, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.002462485571373605, |
| "grad_norm": 1.6953125, |
| "learning_rate": 3.0612244897959183e-07, |
| "loss": 1.346108078956604, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.003693728357060408, |
| "grad_norm": 2.390625, |
| "learning_rate": 5.102040816326531e-07, |
| "loss": 1.8839138746261597, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.00492497114274721, |
| "grad_norm": 3.765625, |
| "learning_rate": 7.142857142857143e-07, |
| "loss": 1.8666269779205322, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0061562139284340135, |
| "grad_norm": 1.7109375, |
| "learning_rate": 9.183673469387756e-07, |
| "loss": 1.228968620300293, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.007387456714120816, |
| "grad_norm": 9.3125, |
| "learning_rate": 1.122448979591837e-06, |
| "loss": 1.0329455137252808, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.008618699499807618, |
| "grad_norm": 3.1875, |
| "learning_rate": 1.3265306122448982e-06, |
| "loss": 1.8221150636672974, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.00984994228549442, |
| "grad_norm": 3.234375, |
| "learning_rate": 1.5306122448979593e-06, |
| "loss": 1.8240559101104736, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.011081185071181223, |
| "grad_norm": 6.59375, |
| "learning_rate": 1.7346938775510206e-06, |
| "loss": 2.2386162281036377, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.012312427856868027, |
| "grad_norm": 7.09375, |
| "learning_rate": 1.938775510204082e-06, |
| "loss": 2.164640188217163, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01354367064255483, |
| "grad_norm": 10.0, |
| "learning_rate": 2.1428571428571427e-06, |
| "loss": 2.490449905395508, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.014774913428241632, |
| "grad_norm": 6.1875, |
| "learning_rate": 2.3469387755102044e-06, |
| "loss": 2.384032726287842, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.016006156213928435, |
| "grad_norm": 2.453125, |
| "learning_rate": 2.5510204081632657e-06, |
| "loss": 1.432393193244934, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.017237398999615235, |
| "grad_norm": 1.4296875, |
| "learning_rate": 2.7551020408163266e-06, |
| "loss": 1.337876558303833, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.01846864178530204, |
| "grad_norm": 1.2890625, |
| "learning_rate": 2.959183673469388e-06, |
| "loss": 1.2051901817321777, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01969988457098884, |
| "grad_norm": 1.0546875, |
| "learning_rate": 3.1632653061224496e-06, |
| "loss": 1.1560025215148926, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.020931127356675645, |
| "grad_norm": 2.46875, |
| "learning_rate": 3.3673469387755105e-06, |
| "loss": 1.1425062417984009, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.022162370142362445, |
| "grad_norm": 3.8125, |
| "learning_rate": 3.5714285714285718e-06, |
| "loss": 1.2081866264343262, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.02339361292804925, |
| "grad_norm": 8.4375, |
| "learning_rate": 3.7755102040816327e-06, |
| "loss": 2.156994104385376, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.024624855713736054, |
| "grad_norm": 14.375, |
| "learning_rate": 3.979591836734694e-06, |
| "loss": 2.4016313552856445, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.025856098499422855, |
| "grad_norm": 3.140625, |
| "learning_rate": 4.183673469387755e-06, |
| "loss": 1.7849466800689697, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.02708734128510966, |
| "grad_norm": 5.46875, |
| "learning_rate": 4.3877551020408165e-06, |
| "loss": 1.7391504049301147, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.02831858407079646, |
| "grad_norm": 5.96875, |
| "learning_rate": 4.591836734693878e-06, |
| "loss": 2.117762327194214, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.029549826856483264, |
| "grad_norm": 12.5, |
| "learning_rate": 4.795918367346939e-06, |
| "loss": 2.1546037197113037, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.030781069642170065, |
| "grad_norm": 6.375, |
| "learning_rate": 5e-06, |
| "loss": 1.6910151243209839, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.03201231242785687, |
| "grad_norm": 2.875, |
| "learning_rate": 5.204081632653062e-06, |
| "loss": 1.7401823997497559, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.03324355521354367, |
| "grad_norm": 5.3125, |
| "learning_rate": 5.408163265306123e-06, |
| "loss": 2.2959043979644775, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.03447479799923047, |
| "grad_norm": 4.59375, |
| "learning_rate": 5.6122448979591834e-06, |
| "loss": 2.2308778762817383, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.03570604078491728, |
| "grad_norm": 1.5078125, |
| "learning_rate": 5.816326530612246e-06, |
| "loss": 1.1782121658325195, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.03693728357060408, |
| "grad_norm": 1.15625, |
| "learning_rate": 6.020408163265307e-06, |
| "loss": 1.3119515180587769, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.03816852635629088, |
| "grad_norm": 4.21875, |
| "learning_rate": 6.224489795918368e-06, |
| "loss": 1.8275368213653564, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.03939976914197768, |
| "grad_norm": 3.140625, |
| "learning_rate": 6.4285714285714295e-06, |
| "loss": 1.9090592861175537, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.04063101192766449, |
| "grad_norm": 2.203125, |
| "learning_rate": 6.63265306122449e-06, |
| "loss": 1.7946950197219849, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.04186225471335129, |
| "grad_norm": 5.78125, |
| "learning_rate": 6.836734693877551e-06, |
| "loss": 1.8883665800094604, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.04309349749903809, |
| "grad_norm": 1.953125, |
| "learning_rate": 7.0408163265306125e-06, |
| "loss": 1.3253710269927979, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.04432474028472489, |
| "grad_norm": 1.578125, |
| "learning_rate": 7.244897959183675e-06, |
| "loss": 1.5811930894851685, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.0455559830704117, |
| "grad_norm": 6.875, |
| "learning_rate": 7.448979591836736e-06, |
| "loss": 2.326409339904785, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.0467872258560985, |
| "grad_norm": 6.0625, |
| "learning_rate": 7.653061224489796e-06, |
| "loss": 2.18452787399292, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.0480184686417853, |
| "grad_norm": 11.375, |
| "learning_rate": 7.857142857142858e-06, |
| "loss": 2.273136854171753, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.04924971142747211, |
| "grad_norm": 4.28125, |
| "learning_rate": 8.06122448979592e-06, |
| "loss": 2.2342689037323, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.05048095421315891, |
| "grad_norm": 7.84375, |
| "learning_rate": 8.26530612244898e-06, |
| "loss": 2.06551194190979, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.05171219699884571, |
| "grad_norm": 7.28125, |
| "learning_rate": 8.469387755102042e-06, |
| "loss": 2.0828216075897217, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.05294343978453251, |
| "grad_norm": 5.5, |
| "learning_rate": 8.673469387755103e-06, |
| "loss": 1.8301738500595093, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.05417468257021932, |
| "grad_norm": 3.046875, |
| "learning_rate": 8.877551020408163e-06, |
| "loss": 1.733726978302002, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.05540592535590612, |
| "grad_norm": 5.15625, |
| "learning_rate": 9.081632653061225e-06, |
| "loss": 2.214818239212036, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.05663716814159292, |
| "grad_norm": 3.359375, |
| "learning_rate": 9.285714285714288e-06, |
| "loss": 1.9520819187164307, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.05786841092727972, |
| "grad_norm": 1.21875, |
| "learning_rate": 9.489795918367348e-06, |
| "loss": 1.171900987625122, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.05909965371296653, |
| "grad_norm": 1.3203125, |
| "learning_rate": 9.693877551020408e-06, |
| "loss": 1.2222256660461426, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.06033089649865333, |
| "grad_norm": 5.0, |
| "learning_rate": 9.89795918367347e-06, |
| "loss": 1.6900241374969482, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.06156213928434013, |
| "grad_norm": 2.234375, |
| "learning_rate": 9.999998013185654e-06, |
| "loss": 1.8082116842269897, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06279338207002694, |
| "grad_norm": 4.40625, |
| "learning_rate": 9.999982118682718e-06, |
| "loss": 2.431914806365967, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.06402462485571374, |
| "grad_norm": 10.0, |
| "learning_rate": 9.999950329740007e-06, |
| "loss": 2.414013624191284, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.06525586764140054, |
| "grad_norm": 5.34375, |
| "learning_rate": 9.999902646483837e-06, |
| "loss": 1.9995551109313965, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.06648711042708734, |
| "grad_norm": 3.828125, |
| "learning_rate": 9.999839069103682e-06, |
| "loss": 2.1116445064544678, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.06771835321277414, |
| "grad_norm": 9.3125, |
| "learning_rate": 9.999759597852176e-06, |
| "loss": 1.7808245420455933, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.06894959599846094, |
| "grad_norm": 48.0, |
| "learning_rate": 9.99966423304511e-06, |
| "loss": 1.754453420639038, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.07018083878414776, |
| "grad_norm": 1.8046875, |
| "learning_rate": 9.999552975061427e-06, |
| "loss": 1.0939162969589233, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.07141208156983456, |
| "grad_norm": 1.8828125, |
| "learning_rate": 9.999425824343223e-06, |
| "loss": 1.3613158464431763, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.07264332435552136, |
| "grad_norm": 2.953125, |
| "learning_rate": 9.99928278139575e-06, |
| "loss": 1.8957613706588745, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.07387456714120816, |
| "grad_norm": 2.375, |
| "learning_rate": 9.999123846787406e-06, |
| "loss": 1.8484387397766113, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.07510580992689496, |
| "grad_norm": 1.8515625, |
| "learning_rate": 9.99894902114974e-06, |
| "loss": 1.4110791683197021, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.07633705271258176, |
| "grad_norm": 2.046875, |
| "learning_rate": 9.998758305177443e-06, |
| "loss": 1.6105386018753052, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.07756829549826856, |
| "grad_norm": 1.9375, |
| "learning_rate": 9.998551699628347e-06, |
| "loss": 1.7053779363632202, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.07879953828395536, |
| "grad_norm": 2.796875, |
| "learning_rate": 9.998329205323427e-06, |
| "loss": 1.6974133253097534, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.08003078106964218, |
| "grad_norm": 1.8671875, |
| "learning_rate": 9.998090823146794e-06, |
| "loss": 1.7098684310913086, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.08126202385532898, |
| "grad_norm": 1.90625, |
| "learning_rate": 9.997836554045689e-06, |
| "loss": 1.699587345123291, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.08249326664101578, |
| "grad_norm": 3.734375, |
| "learning_rate": 9.997566399030483e-06, |
| "loss": 2.0180535316467285, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.08372450942670258, |
| "grad_norm": 4.3125, |
| "learning_rate": 9.99728035917467e-06, |
| "loss": 1.9283902645111084, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.08495575221238938, |
| "grad_norm": 3.953125, |
| "learning_rate": 9.996978435614866e-06, |
| "loss": 1.7642266750335693, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.08618699499807618, |
| "grad_norm": 3.59375, |
| "learning_rate": 9.996660629550805e-06, |
| "loss": 1.8200689554214478, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.08741823778376298, |
| "grad_norm": 3.296875, |
| "learning_rate": 9.99632694224533e-06, |
| "loss": 1.5669602155685425, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.08864948056944978, |
| "grad_norm": 2.75, |
| "learning_rate": 9.995977375024389e-06, |
| "loss": 1.6687263250350952, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.0898807233551366, |
| "grad_norm": 6.375, |
| "learning_rate": 9.995611929277029e-06, |
| "loss": 1.7212157249450684, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.0911119661408234, |
| "grad_norm": 2.8125, |
| "learning_rate": 9.9952306064554e-06, |
| "loss": 1.6282907724380493, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.0923432089265102, |
| "grad_norm": 5.6875, |
| "learning_rate": 9.994833408074736e-06, |
| "loss": 1.6188124418258667, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.093574451712197, |
| "grad_norm": 3.359375, |
| "learning_rate": 9.994420335713354e-06, |
| "loss": 1.6069148778915405, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.0948056944978838, |
| "grad_norm": 2.09375, |
| "learning_rate": 9.99399139101265e-06, |
| "loss": 1.7498996257781982, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.0960369372835706, |
| "grad_norm": 4.9375, |
| "learning_rate": 9.99354657567709e-06, |
| "loss": 1.7885342836380005, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.0972681800692574, |
| "grad_norm": 3.109375, |
| "learning_rate": 9.993085891474208e-06, |
| "loss": 1.6744879484176636, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.09849942285494422, |
| "grad_norm": 4.65625, |
| "learning_rate": 9.99260934023459e-06, |
| "loss": 1.71375572681427, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.09973066564063102, |
| "grad_norm": 1.3359375, |
| "learning_rate": 9.992116923851869e-06, |
| "loss": 1.1846431493759155, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.10096190842631782, |
| "grad_norm": 1.5390625, |
| "learning_rate": 9.99160864428273e-06, |
| "loss": 1.2758105993270874, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.10219315121200462, |
| "grad_norm": 0.9765625, |
| "learning_rate": 9.991084503546882e-06, |
| "loss": 1.1898852586746216, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.10342439399769142, |
| "grad_norm": 1.2265625, |
| "learning_rate": 9.99054450372707e-06, |
| "loss": 1.1127582788467407, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.10465563678337822, |
| "grad_norm": 5.375, |
| "learning_rate": 9.989988646969049e-06, |
| "loss": 1.9128127098083496, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.10588687956906502, |
| "grad_norm": 3.8125, |
| "learning_rate": 9.989416935481586e-06, |
| "loss": 1.824889063835144, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.10711812235475182, |
| "grad_norm": 2.28125, |
| "learning_rate": 9.988829371536453e-06, |
| "loss": 1.4687048196792603, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.10834936514043864, |
| "grad_norm": 2.046875, |
| "learning_rate": 9.988225957468409e-06, |
| "loss": 1.603397011756897, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.10958060792612544, |
| "grad_norm": 2.671875, |
| "learning_rate": 9.987606695675196e-06, |
| "loss": 1.3623863458633423, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.11081185071181224, |
| "grad_norm": 1.84375, |
| "learning_rate": 9.986971588617529e-06, |
| "loss": 1.2137137651443481, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.11204309349749904, |
| "grad_norm": 5.65625, |
| "learning_rate": 9.986320638819092e-06, |
| "loss": 1.8477953672409058, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.11327433628318584, |
| "grad_norm": 5.03125, |
| "learning_rate": 9.98565384886651e-06, |
| "loss": 1.6397606134414673, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.11450557906887264, |
| "grad_norm": 1.9453125, |
| "learning_rate": 9.984971221409363e-06, |
| "loss": 1.579763412475586, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.11573682185455944, |
| "grad_norm": 2.03125, |
| "learning_rate": 9.984272759160156e-06, |
| "loss": 1.582782506942749, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.11696806464024626, |
| "grad_norm": 2.0625, |
| "learning_rate": 9.983558464894318e-06, |
| "loss": 1.4831057786941528, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.11819930742593306, |
| "grad_norm": 2.9375, |
| "learning_rate": 9.982828341450193e-06, |
| "loss": 1.6540857553482056, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.11943055021161986, |
| "grad_norm": 1.203125, |
| "learning_rate": 9.982082391729014e-06, |
| "loss": 1.1849217414855957, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.12066179299730666, |
| "grad_norm": 0.92578125, |
| "learning_rate": 9.981320618694908e-06, |
| "loss": 1.2112078666687012, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.12189303578299346, |
| "grad_norm": 1.875, |
| "learning_rate": 9.98054302537488e-06, |
| "loss": 1.2096866369247437, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.12312427856868026, |
| "grad_norm": 2.015625, |
| "learning_rate": 9.979749614858793e-06, |
| "loss": 1.3210318088531494, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.12435552135436706, |
| "grad_norm": 1.2109375, |
| "learning_rate": 9.978940390299372e-06, |
| "loss": 1.232903003692627, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.12558676414005387, |
| "grad_norm": 9.0625, |
| "learning_rate": 9.978115354912168e-06, |
| "loss": 1.129847526550293, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.12681800692574066, |
| "grad_norm": 2.546875, |
| "learning_rate": 9.977274511975564e-06, |
| "loss": 1.5993540287017822, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.12804924971142748, |
| "grad_norm": 2.578125, |
| "learning_rate": 9.976417864830761e-06, |
| "loss": 1.5882292985916138, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.12928049249711426, |
| "grad_norm": 8.3125, |
| "learning_rate": 9.97554541688175e-06, |
| "loss": 0.9923779964447021, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.13051173528280108, |
| "grad_norm": 1.15625, |
| "learning_rate": 9.974657171595321e-06, |
| "loss": 1.1275218725204468, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.1317429780684879, |
| "grad_norm": 2.03125, |
| "learning_rate": 9.973753132501028e-06, |
| "loss": 1.521848201751709, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.13297422085417468, |
| "grad_norm": 3.859375, |
| "learning_rate": 9.972833303191184e-06, |
| "loss": 1.5856269598007202, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.1342054636398615, |
| "grad_norm": 2.84375, |
| "learning_rate": 9.971897687320847e-06, |
| "loss": 1.6335004568099976, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.13543670642554828, |
| "grad_norm": 2.625, |
| "learning_rate": 9.970946288607809e-06, |
| "loss": 1.616341471672058, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1366679492112351, |
| "grad_norm": 1.421875, |
| "learning_rate": 9.969979110832574e-06, |
| "loss": 1.0930131673812866, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.13789919199692188, |
| "grad_norm": 1.0078125, |
| "learning_rate": 9.968996157838345e-06, |
| "loss": 0.9788625240325928, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.1391304347826087, |
| "grad_norm": 6.4375, |
| "learning_rate": 9.967997433531014e-06, |
| "loss": 1.7790307998657227, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.1403616775682955, |
| "grad_norm": 5.53125, |
| "learning_rate": 9.966982941879135e-06, |
| "loss": 1.9695312976837158, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.1415929203539823, |
| "grad_norm": 2.375, |
| "learning_rate": 9.965952686913926e-06, |
| "loss": 1.5633316040039062, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.1428241631396691, |
| "grad_norm": 2.78125, |
| "learning_rate": 9.964906672729232e-06, |
| "loss": 1.5933005809783936, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.1440554059253559, |
| "grad_norm": 1.0703125, |
| "learning_rate": 9.963844903481525e-06, |
| "loss": 1.1736454963684082, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.14528664871104272, |
| "grad_norm": 1.4140625, |
| "learning_rate": 9.96276738338988e-06, |
| "loss": 1.1599225997924805, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.1465178914967295, |
| "grad_norm": 1.1171875, |
| "learning_rate": 9.961674116735964e-06, |
| "loss": 0.9839186668395996, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.14774913428241632, |
| "grad_norm": 0.921875, |
| "learning_rate": 9.960565107864008e-06, |
| "loss": 1.1017593145370483, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.1489803770681031, |
| "grad_norm": 2.234375, |
| "learning_rate": 9.959440361180803e-06, |
| "loss": 1.6118263006210327, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.15021161985378992, |
| "grad_norm": 1.984375, |
| "learning_rate": 9.958299881155673e-06, |
| "loss": 1.5592412948608398, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.15144286263947673, |
| "grad_norm": 4.84375, |
| "learning_rate": 9.95714367232046e-06, |
| "loss": 1.6599498987197876, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.15267410542516352, |
| "grad_norm": 2.90625, |
| "learning_rate": 9.955971739269507e-06, |
| "loss": 1.6609536409378052, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.15390534821085033, |
| "grad_norm": 5.15625, |
| "learning_rate": 9.95478408665964e-06, |
| "loss": 1.8779211044311523, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.15513659099653712, |
| "grad_norm": 2.203125, |
| "learning_rate": 9.953580719210152e-06, |
| "loss": 2.085038661956787, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.15636783378222394, |
| "grad_norm": 2.328125, |
| "learning_rate": 9.952361641702772e-06, |
| "loss": 1.5933791399002075, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.15759907656791072, |
| "grad_norm": 2.390625, |
| "learning_rate": 9.951126858981663e-06, |
| "loss": 1.5544782876968384, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.15883031935359754, |
| "grad_norm": 1.96875, |
| "learning_rate": 9.94987637595339e-06, |
| "loss": 1.189842939376831, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.16006156213928435, |
| "grad_norm": 0.90625, |
| "learning_rate": 9.94861019758691e-06, |
| "loss": 1.2431635856628418, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.16129280492497114, |
| "grad_norm": 0.490234375, |
| "learning_rate": 9.947328328913541e-06, |
| "loss": 1.260135531425476, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.16252404771065795, |
| "grad_norm": 4.6875, |
| "learning_rate": 9.946030775026954e-06, |
| "loss": 1.277756929397583, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.16375529049634474, |
| "grad_norm": 1.1015625, |
| "learning_rate": 9.944717541083144e-06, |
| "loss": 1.1507606506347656, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.16498653328203156, |
| "grad_norm": 1.875, |
| "learning_rate": 9.943388632300416e-06, |
| "loss": 1.0960922241210938, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.16621777606771834, |
| "grad_norm": 2.8125, |
| "learning_rate": 9.942044053959356e-06, |
| "loss": 1.625441551208496, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.16744901885340516, |
| "grad_norm": 2.3125, |
| "learning_rate": 9.940683811402821e-06, |
| "loss": 1.5875778198242188, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.16868026163909197, |
| "grad_norm": 3.953125, |
| "learning_rate": 9.939307910035902e-06, |
| "loss": 1.6671921014785767, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.16991150442477876, |
| "grad_norm": 2.6875, |
| "learning_rate": 9.937916355325924e-06, |
| "loss": 1.627016544342041, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.17114274721046557, |
| "grad_norm": 2.1875, |
| "learning_rate": 9.936509152802411e-06, |
| "loss": 1.5069929361343384, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.17237398999615236, |
| "grad_norm": 2.328125, |
| "learning_rate": 9.935086308057054e-06, |
| "loss": 1.5513195991516113, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.17360523278183917, |
| "grad_norm": 2.296875, |
| "learning_rate": 9.933647826743712e-06, |
| "loss": 1.5851318836212158, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.17483647556752596, |
| "grad_norm": 2.734375, |
| "learning_rate": 9.932193714578376e-06, |
| "loss": 1.6008888483047485, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.17606771835321278, |
| "grad_norm": 2.453125, |
| "learning_rate": 9.93072397733914e-06, |
| "loss": 1.5561046600341797, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.17729896113889956, |
| "grad_norm": 5.03125, |
| "learning_rate": 9.929238620866201e-06, |
| "loss": 1.55705988407135, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.17853020392458638, |
| "grad_norm": 2.609375, |
| "learning_rate": 9.927737651061806e-06, |
| "loss": 1.5694118738174438, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.1797614467102732, |
| "grad_norm": 2.390625, |
| "learning_rate": 9.926221073890254e-06, |
| "loss": 1.6122347116470337, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.18099268949595998, |
| "grad_norm": 4.21875, |
| "learning_rate": 9.924688895377858e-06, |
| "loss": 1.6376450061798096, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.1822239322816468, |
| "grad_norm": 12.25, |
| "learning_rate": 9.923141121612922e-06, |
| "loss": 1.4908955097198486, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.18345517506733358, |
| "grad_norm": 3.625, |
| "learning_rate": 9.921577758745726e-06, |
| "loss": 1.6120257377624512, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.1846864178530204, |
| "grad_norm": 2.96875, |
| "learning_rate": 9.91999881298849e-06, |
| "loss": 1.654007911682129, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.18591766063870718, |
| "grad_norm": 23.125, |
| "learning_rate": 9.918404290615358e-06, |
| "loss": 1.1257104873657227, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.187148903424394, |
| "grad_norm": 26.875, |
| "learning_rate": 9.916794197962367e-06, |
| "loss": 0.9994939565658569, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.1883801462100808, |
| "grad_norm": 2.328125, |
| "learning_rate": 9.915168541427424e-06, |
| "loss": 1.6332511901855469, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.1896113889957676, |
| "grad_norm": 3.75, |
| "learning_rate": 9.91352732747029e-06, |
| "loss": 1.7764713764190674, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.1908426317814544, |
| "grad_norm": 5.03125, |
| "learning_rate": 9.911870562612528e-06, |
| "loss": 0.8898857831954956, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.1920738745671412, |
| "grad_norm": 4.5625, |
| "learning_rate": 9.910198253437513e-06, |
| "loss": 1.0964455604553223, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.19330511735282802, |
| "grad_norm": 2.578125, |
| "learning_rate": 9.908510406590374e-06, |
| "loss": 1.5099912881851196, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.1945363601385148, |
| "grad_norm": 3.96875, |
| "learning_rate": 9.906807028777992e-06, |
| "loss": 1.5241334438323975, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.19576760292420162, |
| "grad_norm": 6.28125, |
| "learning_rate": 9.90508812676895e-06, |
| "loss": 1.4628498554229736, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.19699884570988843, |
| "grad_norm": 9.125, |
| "learning_rate": 9.903353707393529e-06, |
| "loss": 1.0449775457382202, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.19823008849557522, |
| "grad_norm": 3.359375, |
| "learning_rate": 9.90160377754366e-06, |
| "loss": 1.5512725114822388, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.19946133128126203, |
| "grad_norm": 9.5625, |
| "learning_rate": 9.89983834417292e-06, |
| "loss": 1.4961134195327759, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.20069257406694882, |
| "grad_norm": 2.328125, |
| "learning_rate": 9.898057414296481e-06, |
| "loss": 1.173062801361084, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.20192381685263563, |
| "grad_norm": 1.8984375, |
| "learning_rate": 9.896260994991093e-06, |
| "loss": 1.42746102809906, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.20315505963832242, |
| "grad_norm": 2.359375, |
| "learning_rate": 9.89444909339506e-06, |
| "loss": 1.5326873064041138, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.20438630242400924, |
| "grad_norm": 2.734375, |
| "learning_rate": 9.892621716708204e-06, |
| "loss": 1.5838617086410522, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.20561754520969602, |
| "grad_norm": 2.859375, |
| "learning_rate": 9.89077887219184e-06, |
| "loss": 1.2709892988204956, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.20684878799538284, |
| "grad_norm": 2.46875, |
| "learning_rate": 9.888920567168744e-06, |
| "loss": 1.4641536474227905, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.20808003078106965, |
| "grad_norm": 2.515625, |
| "learning_rate": 9.887046809023133e-06, |
| "loss": 1.2603651285171509, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.20931127356675644, |
| "grad_norm": 1.296875, |
| "learning_rate": 9.88515760520062e-06, |
| "loss": 1.167304277420044, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.21054251635244325, |
| "grad_norm": 3.78125, |
| "learning_rate": 9.883252963208198e-06, |
| "loss": 2.0254769325256348, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.21177375913813004, |
| "grad_norm": 3.421875, |
| "learning_rate": 9.881332890614206e-06, |
| "loss": 2.1020188331604004, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.21300500192381686, |
| "grad_norm": 2.484375, |
| "learning_rate": 9.879397395048298e-06, |
| "loss": 1.5581285953521729, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.21423624470950364, |
| "grad_norm": 11.0625, |
| "learning_rate": 9.877446484201411e-06, |
| "loss": 1.601905345916748, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.21546748749519046, |
| "grad_norm": 3.5, |
| "learning_rate": 9.875480165825742e-06, |
| "loss": 1.564781665802002, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.21669873028087727, |
| "grad_norm": 3.53125, |
| "learning_rate": 9.873498447734707e-06, |
| "loss": 1.535958170890808, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.21792997306656406, |
| "grad_norm": 2.703125, |
| "learning_rate": 9.871501337802914e-06, |
| "loss": 1.5023021697998047, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.21916121585225087, |
| "grad_norm": 3.4375, |
| "learning_rate": 9.869488843966132e-06, |
| "loss": 1.558158278465271, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.22039245863793766, |
| "grad_norm": 3.5, |
| "learning_rate": 9.86746097422127e-06, |
| "loss": 1.9833602905273438, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.22162370142362448, |
| "grad_norm": 3.78125, |
| "learning_rate": 9.865417736626321e-06, |
| "loss": 1.7728450298309326, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.22285494420931126, |
| "grad_norm": 9.625, |
| "learning_rate": 9.863359139300352e-06, |
| "loss": 1.9449533224105835, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.22408618699499808, |
| "grad_norm": 2.984375, |
| "learning_rate": 9.861285190423466e-06, |
| "loss": 1.9914966821670532, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.2253174297806849, |
| "grad_norm": 9.0625, |
| "learning_rate": 9.859195898236758e-06, |
| "loss": 1.5391563177108765, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.22654867256637168, |
| "grad_norm": 3.515625, |
| "learning_rate": 9.857091271042301e-06, |
| "loss": 1.5083098411560059, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.2277799153520585, |
| "grad_norm": 4.6875, |
| "learning_rate": 9.854971317203097e-06, |
| "loss": 1.6200501918792725, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.22901115813774528, |
| "grad_norm": 2.328125, |
| "learning_rate": 9.852836045143056e-06, |
| "loss": 1.5281012058258057, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.2302424009234321, |
| "grad_norm": 2.546875, |
| "learning_rate": 9.850685463346956e-06, |
| "loss": 1.4439934492111206, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.23147364370911888, |
| "grad_norm": 2.921875, |
| "learning_rate": 9.848519580360403e-06, |
| "loss": 1.5237040519714355, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.2327048864948057, |
| "grad_norm": 3.109375, |
| "learning_rate": 9.846338404789812e-06, |
| "loss": 1.8433338403701782, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.2339361292804925, |
| "grad_norm": 3.015625, |
| "learning_rate": 9.844141945302366e-06, |
| "loss": 1.9542083740234375, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.2351673720661793, |
| "grad_norm": 4.65625, |
| "learning_rate": 9.841930210625972e-06, |
| "loss": 1.5500494241714478, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.2363986148518661, |
| "grad_norm": 2.359375, |
| "learning_rate": 9.839703209549246e-06, |
| "loss": 1.4857137203216553, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.2376298576375529, |
| "grad_norm": 2.140625, |
| "learning_rate": 9.837460950921454e-06, |
| "loss": 1.506941556930542, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.23886110042323971, |
| "grad_norm": 3.078125, |
| "learning_rate": 9.835203443652502e-06, |
| "loss": 1.4529392719268799, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.2400923432089265, |
| "grad_norm": 6.09375, |
| "learning_rate": 9.83293069671288e-06, |
| "loss": 0.4469324052333832, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.24132358599461332, |
| "grad_norm": 3.453125, |
| "learning_rate": 9.830642719133646e-06, |
| "loss": 0.5282363891601562, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.2425548287803001, |
| "grad_norm": 17.875, |
| "learning_rate": 9.828339520006363e-06, |
| "loss": 0.8444979190826416, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.24378607156598692, |
| "grad_norm": 6.03125, |
| "learning_rate": 9.826021108483089e-06, |
| "loss": 0.8458063006401062, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.24501731435167373, |
| "grad_norm": 2.0625, |
| "learning_rate": 9.823687493776328e-06, |
| "loss": 1.5211682319641113, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.24624855713736052, |
| "grad_norm": 2.328125, |
| "learning_rate": 9.821338685158996e-06, |
| "loss": 1.4932045936584473, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.24747979992304733, |
| "grad_norm": 3.859375, |
| "learning_rate": 9.818974691964387e-06, |
| "loss": 1.4841368198394775, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.24871104270873412, |
| "grad_norm": 9.875, |
| "learning_rate": 9.816595523586128e-06, |
| "loss": 1.1101207733154297, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.24994228549442093, |
| "grad_norm": 3.59375, |
| "learning_rate": 9.814201189478146e-06, |
| "loss": 1.877555251121521, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.25117352828010775, |
| "grad_norm": 7.625, |
| "learning_rate": 9.811791699154639e-06, |
| "loss": 1.7015639543533325, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.2524047710657945, |
| "grad_norm": 1.84375, |
| "learning_rate": 9.809367062190016e-06, |
| "loss": 1.4604737758636475, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.2536360138514813, |
| "grad_norm": 2.46875, |
| "learning_rate": 9.806927288218888e-06, |
| "loss": 1.491847038269043, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.25486725663716814, |
| "grad_norm": 2.8125, |
| "learning_rate": 9.804472386936008e-06, |
| "loss": 1.5824358463287354, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.25609849942285495, |
| "grad_norm": 2.5, |
| "learning_rate": 9.80200236809624e-06, |
| "loss": 1.495304822921753, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.25732974220854177, |
| "grad_norm": 3.03125, |
| "learning_rate": 9.799517241514516e-06, |
| "loss": 1.4013820886611938, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.2585609849942285, |
| "grad_norm": 2.671875, |
| "learning_rate": 9.797017017065806e-06, |
| "loss": 1.4486945867538452, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.25979222777991534, |
| "grad_norm": 2.203125, |
| "learning_rate": 9.794501704685071e-06, |
| "loss": 1.541428804397583, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.26102347056560216, |
| "grad_norm": 2.078125, |
| "learning_rate": 9.791971314367226e-06, |
| "loss": 1.5093767642974854, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.26225471335128897, |
| "grad_norm": 1.8203125, |
| "learning_rate": 9.789425856167101e-06, |
| "loss": 1.5601611137390137, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.2634859561369758, |
| "grad_norm": 4.46875, |
| "learning_rate": 9.786865340199396e-06, |
| "loss": 1.4437766075134277, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.26471719892266254, |
| "grad_norm": 2.4375, |
| "learning_rate": 9.784289776638653e-06, |
| "loss": 1.5902003049850464, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.26594844170834936, |
| "grad_norm": 9.0625, |
| "learning_rate": 9.7816991757192e-06, |
| "loss": 1.5537315607070923, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.2671796844940362, |
| "grad_norm": 2.53125, |
| "learning_rate": 9.77909354773512e-06, |
| "loss": 1.4479423761367798, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.268410927279723, |
| "grad_norm": 1.8828125, |
| "learning_rate": 9.776472903040208e-06, |
| "loss": 1.5103721618652344, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.26964217006540975, |
| "grad_norm": 5.40625, |
| "learning_rate": 9.773837252047936e-06, |
| "loss": 1.7486127614974976, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.27087341285109656, |
| "grad_norm": 6.5625, |
| "learning_rate": 9.771186605231391e-06, |
| "loss": 1.99478280544281, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.2721046556367834, |
| "grad_norm": 6.0625, |
| "learning_rate": 9.76852097312326e-06, |
| "loss": 1.9923797845840454, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.2733358984224702, |
| "grad_norm": 3.234375, |
| "learning_rate": 9.76584036631578e-06, |
| "loss": 1.859613299369812, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.274567141208157, |
| "grad_norm": 3.203125, |
| "learning_rate": 9.763144795460676e-06, |
| "loss": 1.4815813302993774, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.27579838399384377, |
| "grad_norm": 3.1875, |
| "learning_rate": 9.76043427126914e-06, |
| "loss": 1.5721994638442993, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.2770296267795306, |
| "grad_norm": 10.875, |
| "learning_rate": 9.757708804511798e-06, |
| "loss": 1.4801818132400513, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2782608695652174, |
| "grad_norm": 4.59375, |
| "learning_rate": 9.754968406018633e-06, |
| "loss": 1.4633471965789795, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.2794921123509042, |
| "grad_norm": 2.3125, |
| "learning_rate": 9.752213086678965e-06, |
| "loss": 1.6192139387130737, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.280723355136591, |
| "grad_norm": 5.0625, |
| "learning_rate": 9.749442857441414e-06, |
| "loss": 1.481449007987976, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.2819545979222778, |
| "grad_norm": 3.640625, |
| "learning_rate": 9.746657729313835e-06, |
| "loss": 0.6401450037956238, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.2831858407079646, |
| "grad_norm": 4.8125, |
| "learning_rate": 9.743857713363294e-06, |
| "loss": 0.5937597155570984, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.2844170834936514, |
| "grad_norm": 1.15625, |
| "learning_rate": 9.741042820716008e-06, |
| "loss": 1.0705316066741943, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.2856483262793382, |
| "grad_norm": 1.3515625, |
| "learning_rate": 9.738213062557315e-06, |
| "loss": 1.071405291557312, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.286879569065025, |
| "grad_norm": 1.796875, |
| "learning_rate": 9.735368450131622e-06, |
| "loss": 1.434294581413269, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.2881108118507118, |
| "grad_norm": 2.0625, |
| "learning_rate": 9.732508994742356e-06, |
| "loss": 1.401355504989624, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.2893420546363986, |
| "grad_norm": 3.328125, |
| "learning_rate": 9.729634707751929e-06, |
| "loss": 1.4860631227493286, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.29057329742208543, |
| "grad_norm": 2.078125, |
| "learning_rate": 9.72674560058169e-06, |
| "loss": 1.5327996015548706, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.29180454020777224, |
| "grad_norm": 3.109375, |
| "learning_rate": 9.723841684711874e-06, |
| "loss": 0.8789864778518677, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.293035782993459, |
| "grad_norm": 4.5625, |
| "learning_rate": 9.72092297168156e-06, |
| "loss": 0.7144789695739746, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.2942670257791458, |
| "grad_norm": 2.40625, |
| "learning_rate": 9.717989473088629e-06, |
| "loss": 1.4282610416412354, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.29549826856483263, |
| "grad_norm": 1.984375, |
| "learning_rate": 9.715041200589709e-06, |
| "loss": 1.4713945388793945, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.29672951135051945, |
| "grad_norm": 2.65625, |
| "learning_rate": 9.712078165900144e-06, |
| "loss": 1.4964369535446167, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.2979607541362062, |
| "grad_norm": 2.15625, |
| "learning_rate": 9.709100380793924e-06, |
| "loss": 1.5220392942428589, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.299191996921893, |
| "grad_norm": 2.140625, |
| "learning_rate": 9.706107857103662e-06, |
| "loss": 1.4790360927581787, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.30042323970757984, |
| "grad_norm": 2.375, |
| "learning_rate": 9.70310060672053e-06, |
| "loss": 1.5163480043411255, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.30165448249326665, |
| "grad_norm": 1.6015625, |
| "learning_rate": 9.700078641594224e-06, |
| "loss": 1.3747470378875732, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.30288572527895347, |
| "grad_norm": 3.203125, |
| "learning_rate": 9.697041973732907e-06, |
| "loss": 1.3642088174819946, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.3041169680646402, |
| "grad_norm": 2.84375, |
| "learning_rate": 9.693990615203169e-06, |
| "loss": 1.56373929977417, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.30534821085032704, |
| "grad_norm": 1.8515625, |
| "learning_rate": 9.69092457812997e-06, |
| "loss": 1.4737236499786377, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.30657945363601385, |
| "grad_norm": 2.359375, |
| "learning_rate": 9.687843874696601e-06, |
| "loss": 1.704555869102478, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.30781069642170067, |
| "grad_norm": 3.515625, |
| "learning_rate": 9.684748517144631e-06, |
| "loss": 1.9481480121612549, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.3090419392073875, |
| "grad_norm": 3.484375, |
| "learning_rate": 9.681638517773857e-06, |
| "loss": 1.8212928771972656, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.31027318199307424, |
| "grad_norm": 12.6875, |
| "learning_rate": 9.67851388894226e-06, |
| "loss": 1.5932549238204956, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.31150442477876106, |
| "grad_norm": 3.015625, |
| "learning_rate": 9.675374643065951e-06, |
| "loss": 1.5155253410339355, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.3127356675644479, |
| "grad_norm": 3.84375, |
| "learning_rate": 9.672220792619126e-06, |
| "loss": 1.6778662204742432, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.3139669103501347, |
| "grad_norm": 3.8125, |
| "learning_rate": 9.669052350134009e-06, |
| "loss": 1.4104807376861572, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.31519815313582145, |
| "grad_norm": 2.796875, |
| "learning_rate": 9.665869328200817e-06, |
| "loss": 1.526164174079895, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.31642939592150826, |
| "grad_norm": 1.625, |
| "learning_rate": 9.662671739467687e-06, |
| "loss": 1.2319751977920532, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.3176606387071951, |
| "grad_norm": 2.9375, |
| "learning_rate": 9.65945959664065e-06, |
| "loss": 1.6358551979064941, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.3188918814928819, |
| "grad_norm": 5.625, |
| "learning_rate": 9.656232912483566e-06, |
| "loss": 1.5214580297470093, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.3201231242785687, |
| "grad_norm": 1.640625, |
| "learning_rate": 9.652991699818075e-06, |
| "loss": 1.448410987854004, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.32135436706425546, |
| "grad_norm": 1.9140625, |
| "learning_rate": 9.64973597152355e-06, |
| "loss": 1.494712471961975, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.3225856098499423, |
| "grad_norm": 1.6796875, |
| "learning_rate": 9.646465740537044e-06, |
| "loss": 1.4891849756240845, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.3238168526356291, |
| "grad_norm": 2.09375, |
| "learning_rate": 9.643181019853237e-06, |
| "loss": 1.5300544500350952, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.3250480954213159, |
| "grad_norm": 2.4375, |
| "learning_rate": 9.639881822524385e-06, |
| "loss": 1.5648609399795532, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.32627933820700267, |
| "grad_norm": 2.859375, |
| "learning_rate": 9.636568161660271e-06, |
| "loss": 1.7806546688079834, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.3275105809926895, |
| "grad_norm": 3.71875, |
| "learning_rate": 9.63324005042815e-06, |
| "loss": 1.717749834060669, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.3287418237783763, |
| "grad_norm": 0.83203125, |
| "learning_rate": 9.629897502052697e-06, |
| "loss": 1.0430840253829956, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.3299730665640631, |
| "grad_norm": 0.8828125, |
| "learning_rate": 9.626540529815954e-06, |
| "loss": 1.063704252243042, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.3312043093497499, |
| "grad_norm": 5.9375, |
| "learning_rate": 9.62316914705728e-06, |
| "loss": 2.059296131134033, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.3324355521354367, |
| "grad_norm": 4.71875, |
| "learning_rate": 9.619783367173293e-06, |
| "loss": 2.1116085052490234, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.3336667949211235, |
| "grad_norm": 0.92578125, |
| "learning_rate": 9.61638320361782e-06, |
| "loss": 1.157242774963379, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.3348980377068103, |
| "grad_norm": 1.4375, |
| "learning_rate": 9.612968669901853e-06, |
| "loss": 1.1667792797088623, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.33612928049249713, |
| "grad_norm": 2.9375, |
| "learning_rate": 9.609539779593472e-06, |
| "loss": 1.5730559825897217, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.33736052327818394, |
| "grad_norm": 3.359375, |
| "learning_rate": 9.60609654631781e-06, |
| "loss": 1.5219664573669434, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.3385917660638707, |
| "grad_norm": 1.6328125, |
| "learning_rate": 9.602638983756993e-06, |
| "loss": 1.00815749168396, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.3398230088495575, |
| "grad_norm": 0.78125, |
| "learning_rate": 9.599167105650093e-06, |
| "loss": 1.1170340776443481, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.34105425163524433, |
| "grad_norm": 2.15625, |
| "learning_rate": 9.595680925793058e-06, |
| "loss": 1.3734591007232666, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.34228549442093115, |
| "grad_norm": 3.15625, |
| "learning_rate": 9.592180458038668e-06, |
| "loss": 1.4375791549682617, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.3435167372066179, |
| "grad_norm": 3.109375, |
| "learning_rate": 9.588665716296481e-06, |
| "loss": 1.5281890630722046, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.3447479799923047, |
| "grad_norm": 2.21875, |
| "learning_rate": 9.58513671453277e-06, |
| "loss": 1.5165098905563354, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.34597922277799154, |
| "grad_norm": 4.8125, |
| "learning_rate": 9.581593466770473e-06, |
| "loss": 1.736721158027649, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.34721046556367835, |
| "grad_norm": 3.625, |
| "learning_rate": 9.578035987089143e-06, |
| "loss": 1.5587836503982544, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.34844170834936516, |
| "grad_norm": 2.171875, |
| "learning_rate": 9.574464289624872e-06, |
| "loss": 1.361844778060913, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.3496729511350519, |
| "grad_norm": 2.0625, |
| "learning_rate": 9.570878388570262e-06, |
| "loss": 1.1876953840255737, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.35090419392073874, |
| "grad_norm": 1.640625, |
| "learning_rate": 9.567278298174348e-06, |
| "loss": 1.1157175302505493, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.35213543670642555, |
| "grad_norm": 1.8671875, |
| "learning_rate": 9.563664032742546e-06, |
| "loss": 1.4032427072525024, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.35336667949211237, |
| "grad_norm": 3.609375, |
| "learning_rate": 9.560035606636603e-06, |
| "loss": 1.4978705644607544, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.3545979222777991, |
| "grad_norm": 2.8125, |
| "learning_rate": 9.556393034274536e-06, |
| "loss": 1.4692853689193726, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.35582916506348594, |
| "grad_norm": 2.6875, |
| "learning_rate": 9.552736330130567e-06, |
| "loss": 1.8530701398849487, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.35706040784917276, |
| "grad_norm": 5.15625, |
| "learning_rate": 9.54906550873508e-06, |
| "loss": 1.5222429037094116, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.35829165063485957, |
| "grad_norm": 4.78125, |
| "learning_rate": 9.54538058467455e-06, |
| "loss": 1.6488416194915771, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.3595228934205464, |
| "grad_norm": 2.953125, |
| "learning_rate": 9.541681572591498e-06, |
| "loss": 1.8949358463287354, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.36075413620623314, |
| "grad_norm": 6.1875, |
| "learning_rate": 9.537968487184417e-06, |
| "loss": 1.937645673751831, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.36198537899191996, |
| "grad_norm": 5.21875, |
| "learning_rate": 9.534241343207726e-06, |
| "loss": 1.7506264448165894, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.3632166217776068, |
| "grad_norm": 3.3125, |
| "learning_rate": 9.530500155471706e-06, |
| "loss": 1.7294695377349854, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.3644478645632936, |
| "grad_norm": 3.09375, |
| "learning_rate": 9.526744938842452e-06, |
| "loss": 1.734103798866272, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.3656791073489804, |
| "grad_norm": 3.484375, |
| "learning_rate": 9.522975708241788e-06, |
| "loss": 1.663370966911316, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.36691035013466716, |
| "grad_norm": 3.125, |
| "learning_rate": 9.51919247864724e-06, |
| "loss": 1.817090630531311, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.368141592920354, |
| "grad_norm": 6.4375, |
| "learning_rate": 9.515395265091948e-06, |
| "loss": 1.9766976833343506, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.3693728357060408, |
| "grad_norm": 5.0625, |
| "learning_rate": 9.511584082664627e-06, |
| "loss": 1.778980016708374, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.3706040784917276, |
| "grad_norm": 2.40625, |
| "learning_rate": 9.5077589465095e-06, |
| "loss": 1.6368603706359863, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.37183532127741437, |
| "grad_norm": 9.8125, |
| "learning_rate": 9.503919871826231e-06, |
| "loss": 1.3770142793655396, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.3730665640631012, |
| "grad_norm": 3.0625, |
| "learning_rate": 9.500066873869873e-06, |
| "loss": 1.151017189025879, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.374297806848788, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.496199967950808e-06, |
| "loss": 1.1048874855041504, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.3755290496344748, |
| "grad_norm": 3.453125, |
| "learning_rate": 9.492319169434678e-06, |
| "loss": 1.4655290842056274, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.3767602924201616, |
| "grad_norm": 1.6953125, |
| "learning_rate": 9.488424493742337e-06, |
| "loss": 1.4659827947616577, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.3779915352058484, |
| "grad_norm": 2.578125, |
| "learning_rate": 9.484515956349767e-06, |
| "loss": 1.3192390203475952, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.3792227779915352, |
| "grad_norm": 2.375, |
| "learning_rate": 9.480593572788048e-06, |
| "loss": 1.4272172451019287, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.380454020777222, |
| "grad_norm": 3.25, |
| "learning_rate": 9.476657358643268e-06, |
| "loss": 1.3437293767929077, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.3816852635629088, |
| "grad_norm": 4.71875, |
| "learning_rate": 9.472707329556478e-06, |
| "loss": 1.0737183094024658, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.3829165063485956, |
| "grad_norm": 2.28125, |
| "learning_rate": 9.468743501223626e-06, |
| "loss": 1.581071376800537, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.3841477491342824, |
| "grad_norm": 2.6875, |
| "learning_rate": 9.464765889395485e-06, |
| "loss": 1.5359126329421997, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.3853789919199692, |
| "grad_norm": 2.078125, |
| "learning_rate": 9.460774509877606e-06, |
| "loss": 1.2157002687454224, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.38661023470565603, |
| "grad_norm": 1.328125, |
| "learning_rate": 9.456769378530246e-06, |
| "loss": 1.188981533050537, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.38784147749134285, |
| "grad_norm": 0.9921875, |
| "learning_rate": 9.452750511268303e-06, |
| "loss": 1.0613259077072144, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.3890727202770296, |
| "grad_norm": 2.921875, |
| "learning_rate": 9.448717924061264e-06, |
| "loss": 1.053981065750122, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.3903039630627164, |
| "grad_norm": 2.046875, |
| "learning_rate": 9.444671632933124e-06, |
| "loss": 1.4811919927597046, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.39153520584840323, |
| "grad_norm": 3.609375, |
| "learning_rate": 9.44061165396234e-06, |
| "loss": 1.3906296491622925, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.39276644863409005, |
| "grad_norm": 1.25, |
| "learning_rate": 9.436538003281759e-06, |
| "loss": 1.1125138998031616, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.39399769141977686, |
| "grad_norm": 1.546875, |
| "learning_rate": 9.432450697078547e-06, |
| "loss": 1.1720834970474243, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.3952289342054636, |
| "grad_norm": 3.015625, |
| "learning_rate": 9.428349751594143e-06, |
| "loss": 1.5647273063659668, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.39646017699115044, |
| "grad_norm": 2.4375, |
| "learning_rate": 9.424235183124176e-06, |
| "loss": 1.5186619758605957, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.39769141977683725, |
| "grad_norm": 2.40625, |
| "learning_rate": 9.420107008018404e-06, |
| "loss": 1.479695439338684, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.39892266256252407, |
| "grad_norm": 1.8828125, |
| "learning_rate": 9.415965242680664e-06, |
| "loss": 1.4690086841583252, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.4001539053482108, |
| "grad_norm": 0.98828125, |
| "learning_rate": 9.41180990356879e-06, |
| "loss": 1.0731900930404663, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.40138514813389764, |
| "grad_norm": 0.87109375, |
| "learning_rate": 9.407641007194547e-06, |
| "loss": 1.0590906143188477, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.40261639091958445, |
| "grad_norm": 3.46875, |
| "learning_rate": 9.403458570123585e-06, |
| "loss": 1.8852177858352661, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.40384763370527127, |
| "grad_norm": 4.5, |
| "learning_rate": 9.399262608975343e-06, |
| "loss": 1.837098479270935, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.4050788764909581, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.395053140423013e-06, |
| "loss": 1.0474339723587036, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.40631011927664484, |
| "grad_norm": 0.95703125, |
| "learning_rate": 9.390830181193458e-06, |
| "loss": 1.0683759450912476, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.40754136206233166, |
| "grad_norm": 2.453125, |
| "learning_rate": 9.386593748067142e-06, |
| "loss": 1.5274485349655151, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.4087726048480185, |
| "grad_norm": 2.546875, |
| "learning_rate": 9.382343857878075e-06, |
| "loss": 1.450246810913086, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.4100038476337053, |
| "grad_norm": 4.4375, |
| "learning_rate": 9.378080527513738e-06, |
| "loss": 1.6065733432769775, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.41123509041939205, |
| "grad_norm": 7.5, |
| "learning_rate": 9.373803773915018e-06, |
| "loss": 1.5195866823196411, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.41246633320507886, |
| "grad_norm": 0.8046875, |
| "learning_rate": 9.369513614076142e-06, |
| "loss": 1.011305570602417, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.4136975759907657, |
| "grad_norm": 1.359375, |
| "learning_rate": 9.365210065044609e-06, |
| "loss": 1.108022689819336, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.4149288187764525, |
| "grad_norm": 3.4375, |
| "learning_rate": 9.360893143921121e-06, |
| "loss": 1.8421379327774048, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.4161600615621393, |
| "grad_norm": 6.96875, |
| "learning_rate": 9.356562867859511e-06, |
| "loss": 1.377231478691101, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.41739130434782606, |
| "grad_norm": 1.390625, |
| "learning_rate": 9.352219254066691e-06, |
| "loss": 1.1049342155456543, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.4186225471335129, |
| "grad_norm": 0.84375, |
| "learning_rate": 9.347862319802558e-06, |
| "loss": 1.141373634338379, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.4198537899191997, |
| "grad_norm": 4.75, |
| "learning_rate": 9.343492082379952e-06, |
| "loss": 1.5442224740982056, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.4210850327048865, |
| "grad_norm": 4.03125, |
| "learning_rate": 9.339108559164567e-06, |
| "loss": 1.4725855588912964, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.4223162754905733, |
| "grad_norm": 1.828125, |
| "learning_rate": 9.334711767574893e-06, |
| "loss": 1.1119123697280884, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.4235475182762601, |
| "grad_norm": 1.4375, |
| "learning_rate": 9.330301725082143e-06, |
| "loss": 0.972973644733429, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.4247787610619469, |
| "grad_norm": 2.796875, |
| "learning_rate": 9.325878449210181e-06, |
| "loss": 1.7179160118103027, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.4260100038476337, |
| "grad_norm": 3.375, |
| "learning_rate": 9.321441957535464e-06, |
| "loss": 1.850766897201538, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.4272412466333205, |
| "grad_norm": 3.1875, |
| "learning_rate": 9.316992267686955e-06, |
| "loss": 1.4963725805282593, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.4284724894190073, |
| "grad_norm": 2.625, |
| "learning_rate": 9.312529397346066e-06, |
| "loss": 1.4499911069869995, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.4297037322046941, |
| "grad_norm": 15.8125, |
| "learning_rate": 9.308053364246581e-06, |
| "loss": 1.814996361732483, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.4309349749903809, |
| "grad_norm": 3.828125, |
| "learning_rate": 9.303564186174593e-06, |
| "loss": 1.6408932209014893, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.43216621777606773, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.299061880968416e-06, |
| "loss": 0.9904305338859558, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.43339746056175454, |
| "grad_norm": 0.93359375, |
| "learning_rate": 9.294546466518544e-06, |
| "loss": 0.979654848575592, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.4346287033474413, |
| "grad_norm": 2.0625, |
| "learning_rate": 9.290017960767545e-06, |
| "loss": 1.3709770441055298, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.4358599461331281, |
| "grad_norm": 1.8359375, |
| "learning_rate": 9.285476381710021e-06, |
| "loss": 1.4823150634765625, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.43709118891881493, |
| "grad_norm": 4.34375, |
| "learning_rate": 9.280921747392515e-06, |
| "loss": 1.5808249711990356, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.43832243170450175, |
| "grad_norm": 3.0625, |
| "learning_rate": 9.276354075913445e-06, |
| "loss": 1.526861310005188, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.43955367449018856, |
| "grad_norm": 2.421875, |
| "learning_rate": 9.271773385423042e-06, |
| "loss": 1.6235942840576172, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.4407849172758753, |
| "grad_norm": 2.3125, |
| "learning_rate": 9.267179694123259e-06, |
| "loss": 1.514561414718628, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.44201616006156214, |
| "grad_norm": 3.25, |
| "learning_rate": 9.26257302026772e-06, |
| "loss": 0.8033193349838257, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.44324740284724895, |
| "grad_norm": 2.546875, |
| "learning_rate": 9.257953382161628e-06, |
| "loss": 0.6848942041397095, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.44447864563293576, |
| "grad_norm": 2.625, |
| "learning_rate": 9.253320798161709e-06, |
| "loss": 1.8998783826828003, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.4457098884186225, |
| "grad_norm": 3.078125, |
| "learning_rate": 9.248675286676126e-06, |
| "loss": 2.021900177001953, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.44694113120430934, |
| "grad_norm": 3.0, |
| "learning_rate": 9.244016866164406e-06, |
| "loss": 1.5490355491638184, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.44817237398999615, |
| "grad_norm": 2.671875, |
| "learning_rate": 9.239345555137387e-06, |
| "loss": 1.566870093345642, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.44940361677568297, |
| "grad_norm": 4.15625, |
| "learning_rate": 9.234661372157114e-06, |
| "loss": 1.9461112022399902, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.4506348595613698, |
| "grad_norm": 3.53125, |
| "learning_rate": 9.22996433583679e-06, |
| "loss": 1.6727873086929321, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.45186610234705654, |
| "grad_norm": 3.75, |
| "learning_rate": 9.225254464840686e-06, |
| "loss": 1.6863263845443726, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.45309734513274336, |
| "grad_norm": 5.71875, |
| "learning_rate": 9.220531777884077e-06, |
| "loss": 1.6813945770263672, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.45432858791843017, |
| "grad_norm": 1.765625, |
| "learning_rate": 9.215796293733162e-06, |
| "loss": 1.3949999809265137, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.455559830704117, |
| "grad_norm": 3.234375, |
| "learning_rate": 9.21104803120499e-06, |
| "loss": 1.4089975357055664, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.45679107348980375, |
| "grad_norm": 2.4375, |
| "learning_rate": 9.206287009167393e-06, |
| "loss": 1.4907400608062744, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.45802231627549056, |
| "grad_norm": 2.015625, |
| "learning_rate": 9.201513246538901e-06, |
| "loss": 1.4012898206710815, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.4592535590611774, |
| "grad_norm": 1.921875, |
| "learning_rate": 9.196726762288662e-06, |
| "loss": 1.4157438278198242, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.4604848018468642, |
| "grad_norm": 1.984375, |
| "learning_rate": 9.191927575436388e-06, |
| "loss": 1.4142546653747559, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.461716044632551, |
| "grad_norm": 4.4375, |
| "learning_rate": 9.187115705052261e-06, |
| "loss": 0.5696741342544556, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.46294728741823776, |
| "grad_norm": 6.78125, |
| "learning_rate": 9.18229117025686e-06, |
| "loss": 0.30877745151519775, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.4641785302039246, |
| "grad_norm": 3.890625, |
| "learning_rate": 9.177453990221092e-06, |
| "loss": 1.320806860923767, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.4654097729896114, |
| "grad_norm": 2.015625, |
| "learning_rate": 9.17260418416611e-06, |
| "loss": 1.4618957042694092, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.4666410157752982, |
| "grad_norm": 11.4375, |
| "learning_rate": 9.167741771363234e-06, |
| "loss": 0.4812394082546234, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.467872258560985, |
| "grad_norm": 5.46875, |
| "learning_rate": 9.162866771133888e-06, |
| "loss": 0.23717719316482544, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.4691035013466718, |
| "grad_norm": 2.765625, |
| "learning_rate": 9.157979202849505e-06, |
| "loss": 1.6795170307159424, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.4703347441323586, |
| "grad_norm": 16.5, |
| "learning_rate": 9.15307908593146e-06, |
| "loss": 1.8667967319488525, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.4715659869180454, |
| "grad_norm": 3.6875, |
| "learning_rate": 9.148166439850996e-06, |
| "loss": 1.3985189199447632, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.4727972297037322, |
| "grad_norm": 2.453125, |
| "learning_rate": 9.143241284129136e-06, |
| "loss": 1.398207664489746, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.474028472489419, |
| "grad_norm": 4.34375, |
| "learning_rate": 9.138303638336623e-06, |
| "loss": 1.7506206035614014, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.4752597152751058, |
| "grad_norm": 3.09375, |
| "learning_rate": 9.133353522093815e-06, |
| "loss": 1.8263590335845947, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.4764909580607926, |
| "grad_norm": 2.78125, |
| "learning_rate": 9.128390955070634e-06, |
| "loss": 1.791210412979126, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.47772220084647943, |
| "grad_norm": 4.90625, |
| "learning_rate": 9.123415956986475e-06, |
| "loss": 1.8291805982589722, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.47895344363216624, |
| "grad_norm": 1.484375, |
| "learning_rate": 9.118428547610125e-06, |
| "loss": 1.1543712615966797, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.480184686417853, |
| "grad_norm": 1.3984375, |
| "learning_rate": 9.113428746759696e-06, |
| "loss": 1.1177196502685547, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.4814159292035398, |
| "grad_norm": 2.578125, |
| "learning_rate": 9.108416574302534e-06, |
| "loss": 1.8869653940200806, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.48264717198922663, |
| "grad_norm": 3.53125, |
| "learning_rate": 9.103392050155145e-06, |
| "loss": 1.6388249397277832, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.48387841477491345, |
| "grad_norm": 1.75, |
| "learning_rate": 9.09835519428312e-06, |
| "loss": 1.3413951396942139, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.4851096575606002, |
| "grad_norm": 1.9296875, |
| "learning_rate": 9.093306026701043e-06, |
| "loss": 1.431657075881958, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.486340900346287, |
| "grad_norm": 2.765625, |
| "learning_rate": 9.088244567472433e-06, |
| "loss": 1.496319055557251, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.48757214313197383, |
| "grad_norm": 1.984375, |
| "learning_rate": 9.083170836709643e-06, |
| "loss": 1.4392622709274292, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.48880338591766065, |
| "grad_norm": 5.15625, |
| "learning_rate": 9.078084854573788e-06, |
| "loss": 0.24732553958892822, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.49003462870334746, |
| "grad_norm": 3.015625, |
| "learning_rate": 9.072986641274668e-06, |
| "loss": 0.38419798016548157, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.4912658714890342, |
| "grad_norm": 3.140625, |
| "learning_rate": 9.067876217070686e-06, |
| "loss": 1.5040916204452515, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.49249711427472104, |
| "grad_norm": 2.734375, |
| "learning_rate": 9.062753602268766e-06, |
| "loss": 1.4809836149215698, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.49372835706040785, |
| "grad_norm": 3.203125, |
| "learning_rate": 9.057618817224268e-06, |
| "loss": 1.7967525720596313, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.49495959984609467, |
| "grad_norm": 3.203125, |
| "learning_rate": 9.05247188234092e-06, |
| "loss": 1.9494469165802002, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.4961908426317815, |
| "grad_norm": 1.7578125, |
| "learning_rate": 9.047312818070726e-06, |
| "loss": 1.2794251441955566, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.49742208541746824, |
| "grad_norm": 1.953125, |
| "learning_rate": 9.04214164491388e-06, |
| "loss": 1.5466313362121582, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.49865332820315506, |
| "grad_norm": 1.90625, |
| "learning_rate": 9.036958383418708e-06, |
| "loss": 1.1338438987731934, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.49988457098884187, |
| "grad_norm": 0.9765625, |
| "learning_rate": 9.031763054181554e-06, |
| "loss": 1.1572006940841675, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.5011158137745286, |
| "grad_norm": 4.4375, |
| "learning_rate": 9.026555677846726e-06, |
| "loss": 1.489051103591919, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.5023470565602155, |
| "grad_norm": 3.5, |
| "learning_rate": 9.021336275106397e-06, |
| "loss": 1.4597687721252441, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.5035782993459023, |
| "grad_norm": 1.3046875, |
| "learning_rate": 9.016104866700535e-06, |
| "loss": 0.971706211566925, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.504809542131589, |
| "grad_norm": 1.9453125, |
| "learning_rate": 9.010861473416803e-06, |
| "loss": 1.1148239374160767, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.5060407849172759, |
| "grad_norm": 3.125, |
| "learning_rate": 9.005606116090499e-06, |
| "loss": 1.4972327947616577, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.5072720277029626, |
| "grad_norm": 4.125, |
| "learning_rate": 9.000338815604452e-06, |
| "loss": 1.5357601642608643, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.5085032704886495, |
| "grad_norm": 1.3046875, |
| "learning_rate": 8.995059592888957e-06, |
| "loss": 1.3044366836547852, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.5097345132743363, |
| "grad_norm": 1.5, |
| "learning_rate": 8.989768468921675e-06, |
| "loss": 1.261732816696167, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.510965756060023, |
| "grad_norm": 4.75, |
| "learning_rate": 8.984465464727567e-06, |
| "loss": 1.6821751594543457, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.5121969988457099, |
| "grad_norm": 3.578125, |
| "learning_rate": 8.979150601378798e-06, |
| "loss": 2.098515033721924, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.5134282416313967, |
| "grad_norm": 2.40625, |
| "learning_rate": 8.973823899994653e-06, |
| "loss": 1.4232302904129028, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.5146594844170835, |
| "grad_norm": 1.171875, |
| "learning_rate": 8.968485381741464e-06, |
| "loss": 1.1819924116134644, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.5158907272027703, |
| "grad_norm": 1.9453125, |
| "learning_rate": 8.963135067832509e-06, |
| "loss": 1.5029240846633911, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.517121969988457, |
| "grad_norm": 3.125, |
| "learning_rate": 8.95777297952795e-06, |
| "loss": 1.506210207939148, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.5183532127741439, |
| "grad_norm": 1.7734375, |
| "learning_rate": 8.952399138134724e-06, |
| "loss": 1.4535478353500366, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.5195844555598307, |
| "grad_norm": 2.765625, |
| "learning_rate": 8.947013565006482e-06, |
| "loss": 1.3872270584106445, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.5208156983455176, |
| "grad_norm": 0.71484375, |
| "learning_rate": 8.941616281543484e-06, |
| "loss": 1.1330012083053589, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.5220469411312043, |
| "grad_norm": 1.59375, |
| "learning_rate": 8.936207309192522e-06, |
| "loss": 1.159617304801941, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.5232781839168911, |
| "grad_norm": 6.84375, |
| "learning_rate": 8.930786669446843e-06, |
| "loss": 1.4233540296554565, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.5245094267025779, |
| "grad_norm": 6.9375, |
| "learning_rate": 8.925354383846048e-06, |
| "loss": 1.1373634338378906, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.5257406694882647, |
| "grad_norm": 3.1875, |
| "learning_rate": 8.919910473976022e-06, |
| "loss": 1.413563847541809, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.5269719122739516, |
| "grad_norm": 2.234375, |
| "learning_rate": 8.914454961468828e-06, |
| "loss": 1.413554310798645, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.5282031550596383, |
| "grad_norm": 0.765625, |
| "learning_rate": 8.90898786800265e-06, |
| "loss": 1.1362459659576416, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.5294343978453251, |
| "grad_norm": 1.1015625, |
| "learning_rate": 8.903509215301677e-06, |
| "loss": 1.0461921691894531, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.530665640631012, |
| "grad_norm": 3.359375, |
| "learning_rate": 8.89801902513604e-06, |
| "loss": 1.4780032634735107, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.5318968834166987, |
| "grad_norm": 2.171875, |
| "learning_rate": 8.892517319321705e-06, |
| "loss": 1.5155971050262451, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.5331281262023856, |
| "grad_norm": 2.109375, |
| "learning_rate": 8.887004119720408e-06, |
| "loss": 1.4275978803634644, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.5343593689880723, |
| "grad_norm": 3.828125, |
| "learning_rate": 8.881479448239546e-06, |
| "loss": 1.4534516334533691, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.5355906117737591, |
| "grad_norm": 1.1171875, |
| "learning_rate": 8.875943326832113e-06, |
| "loss": 1.1164848804473877, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.536821854559446, |
| "grad_norm": 20.875, |
| "learning_rate": 8.87039577749659e-06, |
| "loss": 1.2804282903671265, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.5380530973451327, |
| "grad_norm": 3.828125, |
| "learning_rate": 8.864836822276872e-06, |
| "loss": 1.0548572540283203, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.5392843401308195, |
| "grad_norm": 1.953125, |
| "learning_rate": 8.859266483262183e-06, |
| "loss": 1.043743371963501, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.5405155829165064, |
| "grad_norm": 1.2578125, |
| "learning_rate": 8.853684782586971e-06, |
| "loss": 1.0042893886566162, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.5417468257021931, |
| "grad_norm": 0.65234375, |
| "learning_rate": 8.848091742430837e-06, |
| "loss": 1.0357824563980103, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.54297806848788, |
| "grad_norm": 2.28125, |
| "learning_rate": 8.842487385018443e-06, |
| "loss": 1.8888530731201172, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.5442093112735668, |
| "grad_norm": 3.171875, |
| "learning_rate": 8.836871732619419e-06, |
| "loss": 1.7589383125305176, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.5454405540592535, |
| "grad_norm": 2.375, |
| "learning_rate": 8.831244807548274e-06, |
| "loss": 1.6239700317382812, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.5466717968449404, |
| "grad_norm": 2.625, |
| "learning_rate": 8.825606632164314e-06, |
| "loss": 1.501517653465271, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.5479030396306271, |
| "grad_norm": 2.046875, |
| "learning_rate": 8.819957228871553e-06, |
| "loss": 1.0660182237625122, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.549134282416314, |
| "grad_norm": 1.921875, |
| "learning_rate": 8.81429662011861e-06, |
| "loss": 1.187867283821106, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.5503655252020008, |
| "grad_norm": 2.265625, |
| "learning_rate": 8.80862482839864e-06, |
| "loss": 1.75932776927948, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.5515967679876875, |
| "grad_norm": 3.625, |
| "learning_rate": 8.802941876249233e-06, |
| "loss": 1.7781065702438354, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.5528280107733744, |
| "grad_norm": 1.703125, |
| "learning_rate": 8.797247786252322e-06, |
| "loss": 1.0148627758026123, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.5540592535590612, |
| "grad_norm": 1.21875, |
| "learning_rate": 8.791542581034107e-06, |
| "loss": 0.9595763087272644, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.555290496344748, |
| "grad_norm": 2.25, |
| "learning_rate": 8.785826283264942e-06, |
| "loss": 1.3790762424468994, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.5565217391304348, |
| "grad_norm": 3.375, |
| "learning_rate": 8.780098915659272e-06, |
| "loss": 1.5391640663146973, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.5577529819161215, |
| "grad_norm": 4.46875, |
| "learning_rate": 8.774360500975518e-06, |
| "loss": 1.2881464958190918, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.5589842247018084, |
| "grad_norm": 1.8828125, |
| "learning_rate": 8.768611062016008e-06, |
| "loss": 1.4236103296279907, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.5602154674874952, |
| "grad_norm": 1.3671875, |
| "learning_rate": 8.76285062162687e-06, |
| "loss": 1.1012616157531738, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.561446710273182, |
| "grad_norm": 3.609375, |
| "learning_rate": 8.757079202697951e-06, |
| "loss": 0.9929218292236328, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.5626779530588688, |
| "grad_norm": 4.25, |
| "learning_rate": 8.751296828162721e-06, |
| "loss": 1.483315348625183, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.5639091958445556, |
| "grad_norm": 4.84375, |
| "learning_rate": 8.745503520998181e-06, |
| "loss": 1.8858379125595093, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.5651404386302424, |
| "grad_norm": 2.15625, |
| "learning_rate": 8.739699304224781e-06, |
| "loss": 1.0241905450820923, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.5663716814159292, |
| "grad_norm": 0.859375, |
| "learning_rate": 8.733884200906312e-06, |
| "loss": 1.160780429840088, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.567602924201616, |
| "grad_norm": 1.0390625, |
| "learning_rate": 8.728058234149836e-06, |
| "loss": 1.3259217739105225, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.5688341669873028, |
| "grad_norm": 0.828125, |
| "learning_rate": 8.722221427105573e-06, |
| "loss": 1.1867862939834595, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.5700654097729896, |
| "grad_norm": 3.984375, |
| "learning_rate": 8.71637380296682e-06, |
| "loss": 1.7894140481948853, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.5712966525586765, |
| "grad_norm": 4.0625, |
| "learning_rate": 8.71051538496986e-06, |
| "loss": 1.620642066001892, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.5725278953443632, |
| "grad_norm": 3.109375, |
| "learning_rate": 8.704646196393864e-06, |
| "loss": 1.5504050254821777, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.57375913813005, |
| "grad_norm": 4.125, |
| "learning_rate": 8.698766260560803e-06, |
| "loss": 1.5462700128555298, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.5749903809157368, |
| "grad_norm": 2.609375, |
| "learning_rate": 8.692875600835355e-06, |
| "loss": 1.4665104150772095, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.5762216237014236, |
| "grad_norm": 2.765625, |
| "learning_rate": 8.686974240624803e-06, |
| "loss": 1.4654189348220825, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.5774528664871105, |
| "grad_norm": 2.9375, |
| "learning_rate": 8.681062203378963e-06, |
| "loss": 1.7840183973312378, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.5786841092727972, |
| "grad_norm": 2.875, |
| "learning_rate": 8.675139512590063e-06, |
| "loss": 1.550964593887329, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.579915352058484, |
| "grad_norm": 1.0859375, |
| "learning_rate": 8.669206191792676e-06, |
| "loss": 1.1342413425445557, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.5811465948441709, |
| "grad_norm": 1.2578125, |
| "learning_rate": 8.663262264563607e-06, |
| "loss": 1.0562883615493774, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.5823778376298576, |
| "grad_norm": 2.71875, |
| "learning_rate": 8.657307754521811e-06, |
| "loss": 1.5223665237426758, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.5836090804155445, |
| "grad_norm": 2.5, |
| "learning_rate": 8.651342685328294e-06, |
| "loss": 1.2530782222747803, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.5848403232012312, |
| "grad_norm": 2.15625, |
| "learning_rate": 8.645367080686022e-06, |
| "loss": 1.4140348434448242, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.586071565986918, |
| "grad_norm": 1.65625, |
| "learning_rate": 8.63938096433982e-06, |
| "loss": 1.4089921712875366, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.5873028087726049, |
| "grad_norm": 2.703125, |
| "learning_rate": 8.633384360076288e-06, |
| "loss": 1.5375595092773438, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.5885340515582916, |
| "grad_norm": 3.125, |
| "learning_rate": 8.6273772917237e-06, |
| "loss": 1.456397533416748, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.5897652943439785, |
| "grad_norm": 2.765625, |
| "learning_rate": 8.621359783151906e-06, |
| "loss": 0.9872013330459595, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.5909965371296653, |
| "grad_norm": 1.0, |
| "learning_rate": 8.615331858272245e-06, |
| "loss": 1.005414605140686, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.592227779915352, |
| "grad_norm": 1.5, |
| "learning_rate": 8.609293541037448e-06, |
| "loss": 1.4516929388046265, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.5934590227010389, |
| "grad_norm": 4.21875, |
| "learning_rate": 8.603244855441541e-06, |
| "loss": 1.4895005226135254, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.5946902654867257, |
| "grad_norm": 1.9765625, |
| "learning_rate": 8.597185825519746e-06, |
| "loss": 1.4036403894424438, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.5959215082724124, |
| "grad_norm": 2.96875, |
| "learning_rate": 8.591116475348393e-06, |
| "loss": 1.4434735774993896, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.5971527510580993, |
| "grad_norm": 2.109375, |
| "learning_rate": 8.585036829044819e-06, |
| "loss": 1.4209600687026978, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.598383993843786, |
| "grad_norm": 2.703125, |
| "learning_rate": 8.578946910767277e-06, |
| "loss": 1.5273462533950806, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.5996152366294729, |
| "grad_norm": 1.53125, |
| "learning_rate": 8.572846744714833e-06, |
| "loss": 1.4886844158172607, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.6008464794151597, |
| "grad_norm": 3.46875, |
| "learning_rate": 8.566736355127278e-06, |
| "loss": 1.7636457681655884, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.6020777222008464, |
| "grad_norm": 2.59375, |
| "learning_rate": 8.560615766285025e-06, |
| "loss": 1.7612440586090088, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.6033089649865333, |
| "grad_norm": 11.3125, |
| "learning_rate": 8.554485002509015e-06, |
| "loss": 1.7582465410232544, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.6045402077722201, |
| "grad_norm": 1.875, |
| "learning_rate": 8.54834408816062e-06, |
| "loss": 1.5206481218338013, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.6057714505579069, |
| "grad_norm": 3.21875, |
| "learning_rate": 8.542193047641548e-06, |
| "loss": 1.4135831594467163, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.6070026933435937, |
| "grad_norm": 1.7421875, |
| "learning_rate": 8.536031905393742e-06, |
| "loss": 1.2139555215835571, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.6082339361292805, |
| "grad_norm": 2.828125, |
| "learning_rate": 8.529860685899291e-06, |
| "loss": 1.0709203481674194, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.6094651789149673, |
| "grad_norm": 3.15625, |
| "learning_rate": 8.523679413680324e-06, |
| "loss": 1.780793309211731, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.6106964217006541, |
| "grad_norm": 4.09375, |
| "learning_rate": 8.51748811329891e-06, |
| "loss": 1.6577130556106567, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.611927664486341, |
| "grad_norm": 2.703125, |
| "learning_rate": 8.51128680935698e-06, |
| "loss": 1.820251703262329, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.6131589072720277, |
| "grad_norm": 4.15625, |
| "learning_rate": 8.5050755264962e-06, |
| "loss": 1.7748041152954102, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.6143901500577145, |
| "grad_norm": 2.90625, |
| "learning_rate": 8.4988542893979e-06, |
| "loss": 1.4135329723358154, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.6156213928434013, |
| "grad_norm": 3.234375, |
| "learning_rate": 8.492623122782957e-06, |
| "loss": 1.8253610134124756, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.6168526356290881, |
| "grad_norm": 2.109375, |
| "learning_rate": 8.48638205141171e-06, |
| "loss": 1.2383848428726196, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.618083878414775, |
| "grad_norm": 1.703125, |
| "learning_rate": 8.480131100083853e-06, |
| "loss": 1.306466817855835, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.6193151212004617, |
| "grad_norm": 2.34375, |
| "learning_rate": 8.473870293638335e-06, |
| "loss": 1.4259259700775146, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.6205463639861485, |
| "grad_norm": 1.9375, |
| "learning_rate": 8.467599656953276e-06, |
| "loss": 1.386904001235962, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.6217776067718354, |
| "grad_norm": 1.4296875, |
| "learning_rate": 8.461319214945847e-06, |
| "loss": 1.241986632347107, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.6230088495575221, |
| "grad_norm": 1.5703125, |
| "learning_rate": 8.455028992572189e-06, |
| "loss": 1.3007842302322388, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.6242400923432089, |
| "grad_norm": 1.296875, |
| "learning_rate": 8.448729014827305e-06, |
| "loss": 1.1606676578521729, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.6254713351288957, |
| "grad_norm": 1.015625, |
| "learning_rate": 8.442419306744958e-06, |
| "loss": 1.0195151567459106, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.6267025779145825, |
| "grad_norm": 1.1328125, |
| "learning_rate": 8.436099893397582e-06, |
| "loss": 1.0201420783996582, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.6279338207002694, |
| "grad_norm": 1.8984375, |
| "learning_rate": 8.429770799896168e-06, |
| "loss": 1.2197397947311401, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.6291650634859561, |
| "grad_norm": 1.9453125, |
| "learning_rate": 8.423432051390184e-06, |
| "loss": 1.1003873348236084, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.6303963062716429, |
| "grad_norm": 2.53125, |
| "learning_rate": 8.417083673067452e-06, |
| "loss": 1.231971025466919, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.6316275490573298, |
| "grad_norm": 4.1875, |
| "learning_rate": 8.410725690154067e-06, |
| "loss": 1.5416268110275269, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.6328587918430165, |
| "grad_norm": 2.53125, |
| "learning_rate": 8.404358127914281e-06, |
| "loss": 1.7177008390426636, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.6340900346287034, |
| "grad_norm": 1.296875, |
| "learning_rate": 8.39798101165042e-06, |
| "loss": 1.1199826002120972, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.6353212774143902, |
| "grad_norm": 1.4296875, |
| "learning_rate": 8.391594366702772e-06, |
| "loss": 1.1079318523406982, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.6365525202000769, |
| "grad_norm": 1.234375, |
| "learning_rate": 8.385198218449479e-06, |
| "loss": 1.0160592794418335, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.6377837629857638, |
| "grad_norm": 1.4453125, |
| "learning_rate": 8.37879259230646e-06, |
| "loss": 0.9635307788848877, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.6390150057714505, |
| "grad_norm": 1.0546875, |
| "learning_rate": 8.372377513727283e-06, |
| "loss": 1.0464245080947876, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.6402462485571374, |
| "grad_norm": 0.8203125, |
| "learning_rate": 8.365953008203088e-06, |
| "loss": 0.9715243577957153, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.6414774913428242, |
| "grad_norm": 4.40625, |
| "learning_rate": 8.359519101262464e-06, |
| "loss": 1.554203748703003, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.6427087341285109, |
| "grad_norm": 3.34375, |
| "learning_rate": 8.353075818471362e-06, |
| "loss": 1.7830839157104492, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.6439399769141978, |
| "grad_norm": 2.890625, |
| "learning_rate": 8.34662318543299e-06, |
| "loss": 0.3349166512489319, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.6451712196998846, |
| "grad_norm": 2.8125, |
| "learning_rate": 8.340161227787709e-06, |
| "loss": 0.26277410984039307, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.6464024624855714, |
| "grad_norm": 2.609375, |
| "learning_rate": 8.333689971212932e-06, |
| "loss": 1.475071907043457, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.6476337052712582, |
| "grad_norm": 12.5625, |
| "learning_rate": 8.327209441423025e-06, |
| "loss": 1.4347758293151855, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.648864948056945, |
| "grad_norm": 4.21875, |
| "learning_rate": 8.320719664169203e-06, |
| "loss": 1.9238454103469849, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.6500961908426318, |
| "grad_norm": 5.03125, |
| "learning_rate": 8.314220665239418e-06, |
| "loss": 1.5550142526626587, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.6513274336283186, |
| "grad_norm": 5.9375, |
| "learning_rate": 8.30771247045828e-06, |
| "loss": 1.9398154020309448, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.6525586764140053, |
| "grad_norm": 2.765625, |
| "learning_rate": 8.301195105686927e-06, |
| "loss": 1.9076368808746338, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.6537899191996922, |
| "grad_norm": 2.625, |
| "learning_rate": 8.294668596822941e-06, |
| "loss": 1.4107842445373535, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.655021161985379, |
| "grad_norm": 5.3125, |
| "learning_rate": 8.28813296980024e-06, |
| "loss": 1.4180490970611572, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.6562524047710658, |
| "grad_norm": 2.40625, |
| "learning_rate": 8.28158825058897e-06, |
| "loss": 1.4826152324676514, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.6574836475567526, |
| "grad_norm": 2.484375, |
| "learning_rate": 8.275034465195413e-06, |
| "loss": 1.4656550884246826, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.6587148903424394, |
| "grad_norm": 2.6875, |
| "learning_rate": 8.268471639661868e-06, |
| "loss": 1.479034662246704, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.6599461331281262, |
| "grad_norm": 1.8828125, |
| "learning_rate": 8.261899800066561e-06, |
| "loss": 1.4503613710403442, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.661177375913813, |
| "grad_norm": 2.21875, |
| "learning_rate": 8.255318972523538e-06, |
| "loss": 1.4242353439331055, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.6624086186994999, |
| "grad_norm": 17.5, |
| "learning_rate": 8.248729183182556e-06, |
| "loss": 1.3858964443206787, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.6636398614851866, |
| "grad_norm": 7.46875, |
| "learning_rate": 8.242130458228986e-06, |
| "loss": 1.691213607788086, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.6648711042708734, |
| "grad_norm": 5.125, |
| "learning_rate": 8.235522823883702e-06, |
| "loss": 1.9530844688415527, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.6661023470565602, |
| "grad_norm": 1.6328125, |
| "learning_rate": 8.228906306402984e-06, |
| "loss": 1.4718236923217773, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.667333589842247, |
| "grad_norm": 2.15625, |
| "learning_rate": 8.22228093207841e-06, |
| "loss": 1.478079080581665, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.6685648326279339, |
| "grad_norm": 2.0, |
| "learning_rate": 8.21564672723675e-06, |
| "loss": 1.423277497291565, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.6697960754136206, |
| "grad_norm": 2.328125, |
| "learning_rate": 8.209003718239865e-06, |
| "loss": 1.4079033136367798, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.6710273181993074, |
| "grad_norm": 2.5625, |
| "learning_rate": 8.2023519314846e-06, |
| "loss": 1.510083794593811, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.6722585609849943, |
| "grad_norm": 1.96875, |
| "learning_rate": 8.195691393402676e-06, |
| "loss": 1.4157885313034058, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.673489803770681, |
| "grad_norm": 2.25, |
| "learning_rate": 8.189022130460595e-06, |
| "loss": 1.2089054584503174, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.6747210465563679, |
| "grad_norm": 2.0625, |
| "learning_rate": 8.182344169159527e-06, |
| "loss": 1.254252552986145, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.6759522893420546, |
| "grad_norm": 2.890625, |
| "learning_rate": 8.175657536035195e-06, |
| "loss": 1.513046383857727, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.6771835321277414, |
| "grad_norm": 3.359375, |
| "learning_rate": 8.1689622576578e-06, |
| "loss": 1.5234147310256958, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.6784147749134283, |
| "grad_norm": 0.78515625, |
| "learning_rate": 8.16225836063188e-06, |
| "loss": 1.077025294303894, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.679646017699115, |
| "grad_norm": 2.25, |
| "learning_rate": 8.155545871596228e-06, |
| "loss": 1.1133958101272583, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.6808772604848018, |
| "grad_norm": 1.25, |
| "learning_rate": 8.148824817223775e-06, |
| "loss": 1.2056477069854736, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.6821085032704887, |
| "grad_norm": 1.0234375, |
| "learning_rate": 8.14209522422149e-06, |
| "loss": 1.15602707862854, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.6833397460561754, |
| "grad_norm": 3.734375, |
| "learning_rate": 8.13535711933027e-06, |
| "loss": 1.6869274377822876, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.6845709888418623, |
| "grad_norm": 3.703125, |
| "learning_rate": 8.128610529324837e-06, |
| "loss": 1.7017213106155396, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.685802231627549, |
| "grad_norm": 10.0, |
| "learning_rate": 8.121855481013624e-06, |
| "loss": 1.94403076171875, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.6870334744132358, |
| "grad_norm": 2.9375, |
| "learning_rate": 8.11509200123868e-06, |
| "loss": 1.6434037685394287, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.6882647171989227, |
| "grad_norm": 2.03125, |
| "learning_rate": 8.108320116875557e-06, |
| "loss": 1.0867962837219238, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.6894959599846094, |
| "grad_norm": 1.0625, |
| "learning_rate": 8.101539854833201e-06, |
| "loss": 1.0537822246551514, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.6907272027702963, |
| "grad_norm": 2.359375, |
| "learning_rate": 8.094751242053846e-06, |
| "loss": 1.4303661584854126, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.6919584455559831, |
| "grad_norm": 2.796875, |
| "learning_rate": 8.087954305512923e-06, |
| "loss": 1.4762662649154663, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.6931896883416698, |
| "grad_norm": 6.28125, |
| "learning_rate": 8.08114907221891e-06, |
| "loss": 1.4999399185180664, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.6944209311273567, |
| "grad_norm": 2.578125, |
| "learning_rate": 8.074335569213287e-06, |
| "loss": 1.445244312286377, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.6956521739130435, |
| "grad_norm": 2.9375, |
| "learning_rate": 8.067513823570368e-06, |
| "loss": 1.4429575204849243, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.6968834166987303, |
| "grad_norm": 3.203125, |
| "learning_rate": 8.060683862397236e-06, |
| "loss": 1.4105318784713745, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.6981146594844171, |
| "grad_norm": 2.265625, |
| "learning_rate": 8.05384571283361e-06, |
| "loss": 1.4015456438064575, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.6993459022701038, |
| "grad_norm": 2.75, |
| "learning_rate": 8.046999402051754e-06, |
| "loss": 1.5193300247192383, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.7005771450557907, |
| "grad_norm": 6.28125, |
| "learning_rate": 8.040144957256357e-06, |
| "loss": 1.298558235168457, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.7018083878414775, |
| "grad_norm": 2.359375, |
| "learning_rate": 8.033282405684428e-06, |
| "loss": 1.5250219106674194, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.7030396306271643, |
| "grad_norm": 2.234375, |
| "learning_rate": 8.026411774605198e-06, |
| "loss": 1.498106837272644, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.7042708734128511, |
| "grad_norm": 2.109375, |
| "learning_rate": 8.019533091319991e-06, |
| "loss": 1.4430673122406006, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.7055021161985379, |
| "grad_norm": 2.0, |
| "learning_rate": 8.012646383162138e-06, |
| "loss": 1.4169411659240723, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.7067333589842247, |
| "grad_norm": 1.9453125, |
| "learning_rate": 8.00575167749685e-06, |
| "loss": 1.4503501653671265, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.7079646017699115, |
| "grad_norm": 1.4765625, |
| "learning_rate": 7.998849001721123e-06, |
| "loss": 1.3543637990951538, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.7091958445555983, |
| "grad_norm": 3.8125, |
| "learning_rate": 7.991938383263617e-06, |
| "loss": 1.388875126838684, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.7104270873412851, |
| "grad_norm": 1.921875, |
| "learning_rate": 7.98501984958456e-06, |
| "loss": 1.1109329462051392, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.7116583301269719, |
| "grad_norm": 2.25, |
| "learning_rate": 7.978093428175632e-06, |
| "loss": 1.2034087181091309, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.7128895729126588, |
| "grad_norm": 1.1328125, |
| "learning_rate": 7.971159146559848e-06, |
| "loss": 1.061422348022461, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.7141208156983455, |
| "grad_norm": 0.8828125, |
| "learning_rate": 7.964217032291463e-06, |
| "loss": 1.0195786952972412, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.7153520584840323, |
| "grad_norm": 0.85546875, |
| "learning_rate": 7.957267112955856e-06, |
| "loss": 0.9217634201049805, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.7165833012697191, |
| "grad_norm": 1.2421875, |
| "learning_rate": 7.950309416169415e-06, |
| "loss": 1.1007217168807983, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.7178145440554059, |
| "grad_norm": 3.546875, |
| "learning_rate": 7.943343969579443e-06, |
| "loss": 1.9357012510299683, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.7190457868410928, |
| "grad_norm": 3.25, |
| "learning_rate": 7.936370800864026e-06, |
| "loss": 1.784494400024414, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.7202770296267795, |
| "grad_norm": 1.796875, |
| "learning_rate": 7.929389937731942e-06, |
| "loss": 1.5166871547698975, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.7215082724124663, |
| "grad_norm": 5.28125, |
| "learning_rate": 7.922401407922546e-06, |
| "loss": 1.3804179430007935, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.7227395151981532, |
| "grad_norm": 3.328125, |
| "learning_rate": 7.915405239205647e-06, |
| "loss": 1.4463698863983154, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.7239707579838399, |
| "grad_norm": 2.09375, |
| "learning_rate": 7.90840145938142e-06, |
| "loss": 1.4229485988616943, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.7252020007695268, |
| "grad_norm": 2.046875, |
| "learning_rate": 7.901390096280276e-06, |
| "loss": 1.3788976669311523, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.7264332435552135, |
| "grad_norm": 2.828125, |
| "learning_rate": 7.894371177762765e-06, |
| "loss": 1.4840986728668213, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.7276644863409003, |
| "grad_norm": 1.7734375, |
| "learning_rate": 7.88734473171945e-06, |
| "loss": 1.4703840017318726, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.7288957291265872, |
| "grad_norm": 2.796875, |
| "learning_rate": 7.880310786070818e-06, |
| "loss": 1.4176890850067139, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.7301269719122739, |
| "grad_norm": 4.625, |
| "learning_rate": 7.873269368767147e-06, |
| "loss": 0.6599590182304382, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.7313582146979608, |
| "grad_norm": 8.0625, |
| "learning_rate": 7.866220507788409e-06, |
| "loss": 0.6873646378517151, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.7325894574836476, |
| "grad_norm": 2.703125, |
| "learning_rate": 7.859164231144152e-06, |
| "loss": 1.4601305723190308, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.7338207002693343, |
| "grad_norm": 2.65625, |
| "learning_rate": 7.852100566873394e-06, |
| "loss": 1.380963683128357, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.7350519430550212, |
| "grad_norm": 7.5625, |
| "learning_rate": 7.845029543044506e-06, |
| "loss": 1.0763018131256104, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.736283185840708, |
| "grad_norm": 2.640625, |
| "learning_rate": 7.837951187755106e-06, |
| "loss": 1.0719996690750122, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.7375144286263947, |
| "grad_norm": 3.1875, |
| "learning_rate": 7.830865529131939e-06, |
| "loss": 1.4080944061279297, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.7387456714120816, |
| "grad_norm": 3.859375, |
| "learning_rate": 7.82377259533078e-06, |
| "loss": 1.4090288877487183, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.7399769141977683, |
| "grad_norm": 0.8984375, |
| "learning_rate": 7.816672414536299e-06, |
| "loss": 1.0860710144042969, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.7412081569834552, |
| "grad_norm": 0.609375, |
| "learning_rate": 7.80956501496198e-06, |
| "loss": 1.0601508617401123, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.742439399769142, |
| "grad_norm": 3.265625, |
| "learning_rate": 7.802450424849975e-06, |
| "loss": 1.6900361776351929, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.7436706425548287, |
| "grad_norm": 3.484375, |
| "learning_rate": 7.795328672471024e-06, |
| "loss": 1.7971972227096558, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.7449018853405156, |
| "grad_norm": 6.5, |
| "learning_rate": 7.788199786124316e-06, |
| "loss": 1.9263969659805298, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.7461331281262024, |
| "grad_norm": 3.28125, |
| "learning_rate": 7.78106379413739e-06, |
| "loss": 1.4539012908935547, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.7473643709118892, |
| "grad_norm": 3.359375, |
| "learning_rate": 7.773920724866022e-06, |
| "loss": 1.5632342100143433, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.748595613697576, |
| "grad_norm": 2.9375, |
| "learning_rate": 7.766770606694109e-06, |
| "loss": 1.2990467548370361, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.7498268564832627, |
| "grad_norm": 0.83984375, |
| "learning_rate": 7.759613468033564e-06, |
| "loss": 1.1596078872680664, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.7510580992689496, |
| "grad_norm": 0.734375, |
| "learning_rate": 7.752449337324188e-06, |
| "loss": 1.0468875169754028, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.7522893420546364, |
| "grad_norm": 1.671875, |
| "learning_rate": 7.74527824303357e-06, |
| "loss": 1.1754250526428223, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.7535205848403232, |
| "grad_norm": 1.1875, |
| "learning_rate": 7.738100213656962e-06, |
| "loss": 1.1995749473571777, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.75475182762601, |
| "grad_norm": 3.921875, |
| "learning_rate": 7.730915277717192e-06, |
| "loss": 1.4692225456237793, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.7559830704116968, |
| "grad_norm": 4.75, |
| "learning_rate": 7.723723463764515e-06, |
| "loss": 1.7058963775634766, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.7572143131973836, |
| "grad_norm": 2.515625, |
| "learning_rate": 7.716524800376521e-06, |
| "loss": 1.4868927001953125, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.7584455559830704, |
| "grad_norm": 2.65625, |
| "learning_rate": 7.709319316158017e-06, |
| "loss": 1.4232137203216553, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.7596767987687573, |
| "grad_norm": 2.828125, |
| "learning_rate": 7.70210703974092e-06, |
| "loss": 1.4825751781463623, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.760908041554444, |
| "grad_norm": 2.046875, |
| "learning_rate": 7.69488799978413e-06, |
| "loss": 1.4381661415100098, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.7621392843401308, |
| "grad_norm": 1.984375, |
| "learning_rate": 7.68766222497342e-06, |
| "loss": 1.4844237565994263, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.7633705271258177, |
| "grad_norm": 2.359375, |
| "learning_rate": 7.680429744021333e-06, |
| "loss": 1.7116321325302124, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.7646017699115044, |
| "grad_norm": 2.078125, |
| "learning_rate": 7.673190585667056e-06, |
| "loss": 1.1763395071029663, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.7658330126971912, |
| "grad_norm": 2.1875, |
| "learning_rate": 7.665944778676307e-06, |
| "loss": 1.1598796844482422, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.767064255482878, |
| "grad_norm": 3.46875, |
| "learning_rate": 7.658692351841226e-06, |
| "loss": 1.3642587661743164, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.7682954982685648, |
| "grad_norm": 2.0, |
| "learning_rate": 7.651433333980256e-06, |
| "loss": 1.430611491203308, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.7695267410542517, |
| "grad_norm": 4.21875, |
| "learning_rate": 7.644167753938035e-06, |
| "loss": 1.4081594944000244, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.7707579838399384, |
| "grad_norm": 3.953125, |
| "learning_rate": 7.636895640585271e-06, |
| "loss": 1.8488107919692993, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.7719892266256252, |
| "grad_norm": 5.59375, |
| "learning_rate": 7.629617022818634e-06, |
| "loss": 1.9246183633804321, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.7732204694113121, |
| "grad_norm": 3.625, |
| "learning_rate": 7.622331929560643e-06, |
| "loss": 1.4888027906417847, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.7744517121969988, |
| "grad_norm": 0.96484375, |
| "learning_rate": 7.615040389759547e-06, |
| "loss": 1.1719835996627808, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.7756829549826857, |
| "grad_norm": 1.671875, |
| "learning_rate": 7.607742432389207e-06, |
| "loss": 1.0067853927612305, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.7769141977683724, |
| "grad_norm": 4.4375, |
| "learning_rate": 7.600438086448993e-06, |
| "loss": 1.4551739692687988, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.7781454405540592, |
| "grad_norm": 4.1875, |
| "learning_rate": 7.593127380963654e-06, |
| "loss": 1.7276794910430908, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.7793766833397461, |
| "grad_norm": 2.9375, |
| "learning_rate": 7.5858103449832135e-06, |
| "loss": 1.536874771118164, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.7806079261254328, |
| "grad_norm": 2.0625, |
| "learning_rate": 7.5784870075828446e-06, |
| "loss": 1.4394667148590088, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.7818391689111197, |
| "grad_norm": 1.7578125, |
| "learning_rate": 7.571157397862767e-06, |
| "loss": 1.2508525848388672, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.7830704116968065, |
| "grad_norm": 1.171875, |
| "learning_rate": 7.563821544948123e-06, |
| "loss": 1.0572394132614136, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.7843016544824932, |
| "grad_norm": 3.0, |
| "learning_rate": 7.556479477988856e-06, |
| "loss": 1.6088207960128784, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.7855328972681801, |
| "grad_norm": 3.328125, |
| "learning_rate": 7.54913122615961e-06, |
| "loss": 1.9735209941864014, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.7867641400538669, |
| "grad_norm": 3.03125, |
| "learning_rate": 7.5417768186596006e-06, |
| "loss": 1.5228854417800903, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.7879953828395537, |
| "grad_norm": 1.84375, |
| "learning_rate": 7.534416284712504e-06, |
| "loss": 1.4340687990188599, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.7892266256252405, |
| "grad_norm": 1.890625, |
| "learning_rate": 7.527049653566347e-06, |
| "loss": 1.4551016092300415, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.7904578684109272, |
| "grad_norm": 2.828125, |
| "learning_rate": 7.519676954493373e-06, |
| "loss": 1.3765220642089844, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.7916891111966141, |
| "grad_norm": 1.2421875, |
| "learning_rate": 7.512298216789948e-06, |
| "loss": 1.229244589805603, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.7929203539823009, |
| "grad_norm": 1.375, |
| "learning_rate": 7.504913469776427e-06, |
| "loss": 1.0655384063720703, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.7941515967679876, |
| "grad_norm": 5.6875, |
| "learning_rate": 7.497522742797046e-06, |
| "loss": 1.4802930355072021, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.7953828395536745, |
| "grad_norm": 4.40625, |
| "learning_rate": 7.490126065219798e-06, |
| "loss": 1.3891750574111938, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.7966140823393613, |
| "grad_norm": 2.734375, |
| "learning_rate": 7.482723466436333e-06, |
| "loss": 1.4222067594528198, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.7978453251250481, |
| "grad_norm": 2.71875, |
| "learning_rate": 7.475314975861816e-06, |
| "loss": 1.4236258268356323, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.7990765679107349, |
| "grad_norm": 12.3125, |
| "learning_rate": 7.467900622934834e-06, |
| "loss": 1.8899813890457153, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.8003078106964217, |
| "grad_norm": 5.1875, |
| "learning_rate": 7.460480437117266e-06, |
| "loss": 1.1637026071548462, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.8015390534821085, |
| "grad_norm": 2.171875, |
| "learning_rate": 7.453054447894168e-06, |
| "loss": 1.5327718257904053, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.8027702962677953, |
| "grad_norm": 2.484375, |
| "learning_rate": 7.445622684773652e-06, |
| "loss": 1.3591474294662476, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.8040015390534822, |
| "grad_norm": 0.96875, |
| "learning_rate": 7.438185177286785e-06, |
| "loss": 1.1956864595413208, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.8052327818391689, |
| "grad_norm": 1.390625, |
| "learning_rate": 7.430741954987446e-06, |
| "loss": 0.9828561544418335, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.8064640246248557, |
| "grad_norm": 6.875, |
| "learning_rate": 7.423293047452234e-06, |
| "loss": 1.4108320474624634, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.8076952674105425, |
| "grad_norm": 2.859375, |
| "learning_rate": 7.415838484280331e-06, |
| "loss": 1.5532580614089966, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.8089265101962293, |
| "grad_norm": 1.0, |
| "learning_rate": 7.408378295093399e-06, |
| "loss": 0.9883386492729187, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.8101577529819162, |
| "grad_norm": 0.78515625, |
| "learning_rate": 7.4009125095354494e-06, |
| "loss": 1.1321879625320435, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.8113889957676029, |
| "grad_norm": 3.671875, |
| "learning_rate": 7.393441157272738e-06, |
| "loss": 1.622273325920105, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.8126202385532897, |
| "grad_norm": 5.15625, |
| "learning_rate": 7.385964267993635e-06, |
| "loss": 1.7767966985702515, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.8138514813389766, |
| "grad_norm": 4.65625, |
| "learning_rate": 7.3784818714085136e-06, |
| "loss": 1.9980825185775757, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.8150827241246633, |
| "grad_norm": 7.5, |
| "learning_rate": 7.370993997249634e-06, |
| "loss": 1.5187777280807495, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.8163139669103502, |
| "grad_norm": 2.125, |
| "learning_rate": 7.36350067527102e-06, |
| "loss": 1.4495576620101929, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.817545209696037, |
| "grad_norm": 2.359375, |
| "learning_rate": 7.3560019352483444e-06, |
| "loss": 1.4711284637451172, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.8187764524817237, |
| "grad_norm": 2.03125, |
| "learning_rate": 7.3484978069788075e-06, |
| "loss": 1.474364161491394, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.8200076952674106, |
| "grad_norm": 2.25, |
| "learning_rate": 7.34098832028102e-06, |
| "loss": 1.4535422325134277, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.8212389380530973, |
| "grad_norm": 2.484375, |
| "learning_rate": 7.333473504994888e-06, |
| "loss": 1.6823538541793823, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.8224701808387841, |
| "grad_norm": 2.640625, |
| "learning_rate": 7.3259533909814905e-06, |
| "loss": 1.761359691619873, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.823701423624471, |
| "grad_norm": 6.78125, |
| "learning_rate": 7.318428008122958e-06, |
| "loss": 2.0572004318237305, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.8249326664101577, |
| "grad_norm": 3.875, |
| "learning_rate": 7.310897386322362e-06, |
| "loss": 1.606192946434021, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.8261639091958446, |
| "grad_norm": 2.296875, |
| "learning_rate": 7.303361555503592e-06, |
| "loss": 1.5423405170440674, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.8273951519815314, |
| "grad_norm": 2.859375, |
| "learning_rate": 7.295820545611232e-06, |
| "loss": 1.3942408561706543, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.8286263947672181, |
| "grad_norm": 0.765625, |
| "learning_rate": 7.288274386610446e-06, |
| "loss": 1.0209629535675049, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.829857637552905, |
| "grad_norm": 1.3046875, |
| "learning_rate": 7.280723108486863e-06, |
| "loss": 1.0453615188598633, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.8310888803385917, |
| "grad_norm": 2.859375, |
| "learning_rate": 7.273166741246449e-06, |
| "loss": 1.1256355047225952, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.8323201231242786, |
| "grad_norm": 1.203125, |
| "learning_rate": 7.265605314915399e-06, |
| "loss": 1.0713417530059814, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.8335513659099654, |
| "grad_norm": 2.6875, |
| "learning_rate": 7.258038859540002e-06, |
| "loss": 1.2602061033248901, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.8347826086956521, |
| "grad_norm": 2.765625, |
| "learning_rate": 7.250467405186534e-06, |
| "loss": 1.3749901056289673, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.836013851481339, |
| "grad_norm": 2.859375, |
| "learning_rate": 7.242890981941137e-06, |
| "loss": 1.319159984588623, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.8372450942670258, |
| "grad_norm": 2.328125, |
| "learning_rate": 7.235309619909698e-06, |
| "loss": 1.401831865310669, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.8384763370527126, |
| "grad_norm": 2.765625, |
| "learning_rate": 7.227723349217728e-06, |
| "loss": 1.5969266891479492, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.8397075798383994, |
| "grad_norm": 2.609375, |
| "learning_rate": 7.220132200010237e-06, |
| "loss": 1.5466394424438477, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.8409388226240861, |
| "grad_norm": 3.046875, |
| "learning_rate": 7.21253620245163e-06, |
| "loss": 1.8190059661865234, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.842170065409773, |
| "grad_norm": 2.65625, |
| "learning_rate": 7.204935386725573e-06, |
| "loss": 1.8231192827224731, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.8434013081954598, |
| "grad_norm": 2.046875, |
| "learning_rate": 7.197329783034879e-06, |
| "loss": 1.52850341796875, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.8446325509811466, |
| "grad_norm": 1.9375, |
| "learning_rate": 7.18971942160138e-06, |
| "loss": 1.413024663925171, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.8458637937668334, |
| "grad_norm": 5.28125, |
| "learning_rate": 7.182104332665827e-06, |
| "loss": 1.7647671699523926, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.8470950365525202, |
| "grad_norm": 3.34375, |
| "learning_rate": 7.174484546487743e-06, |
| "loss": 1.7847025394439697, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.848326279338207, |
| "grad_norm": 2.515625, |
| "learning_rate": 7.1668600933453225e-06, |
| "loss": 1.5937633514404297, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.8495575221238938, |
| "grad_norm": 2.109375, |
| "learning_rate": 7.159231003535305e-06, |
| "loss": 1.3834004402160645, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.8507887649095806, |
| "grad_norm": 2.8125, |
| "learning_rate": 7.151597307372853e-06, |
| "loss": 1.671330213546753, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.8520200076952674, |
| "grad_norm": 2.765625, |
| "learning_rate": 7.143959035191432e-06, |
| "loss": 1.8127377033233643, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.8532512504809542, |
| "grad_norm": 2.765625, |
| "learning_rate": 7.136316217342691e-06, |
| "loss": 1.3590033054351807, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.854482493266641, |
| "grad_norm": 7.9375, |
| "learning_rate": 7.128668884196346e-06, |
| "loss": 1.1065455675125122, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.8557137360523278, |
| "grad_norm": 8.5, |
| "learning_rate": 7.12101706614005e-06, |
| "loss": 1.72328782081604, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.8569449788380146, |
| "grad_norm": 9.375, |
| "learning_rate": 7.113360793579281e-06, |
| "loss": 1.730324387550354, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.8581762216237014, |
| "grad_norm": 6.09375, |
| "learning_rate": 7.105700096937211e-06, |
| "loss": 2.003936529159546, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.8594074644093882, |
| "grad_norm": 2.984375, |
| "learning_rate": 7.0980350066546e-06, |
| "loss": 1.5877940654754639, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.8606387071950751, |
| "grad_norm": 3.0625, |
| "learning_rate": 7.090365553189664e-06, |
| "loss": 1.6757268905639648, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.8618699499807618, |
| "grad_norm": 3.0, |
| "learning_rate": 7.082691767017955e-06, |
| "loss": 1.928758144378662, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.8631011927664486, |
| "grad_norm": 3.453125, |
| "learning_rate": 7.075013678632239e-06, |
| "loss": 1.6859486103057861, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.8643324355521355, |
| "grad_norm": 7.15625, |
| "learning_rate": 7.067331318542388e-06, |
| "loss": 2.008883237838745, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.8655636783378222, |
| "grad_norm": 4.0, |
| "learning_rate": 7.059644717275234e-06, |
| "loss": 1.767155647277832, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.8667949211235091, |
| "grad_norm": 3.09375, |
| "learning_rate": 7.051953905374471e-06, |
| "loss": 1.611624002456665, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.8680261639091958, |
| "grad_norm": 2.828125, |
| "learning_rate": 7.044258913400521e-06, |
| "loss": 1.3147724866867065, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.8692574066948826, |
| "grad_norm": 0.984375, |
| "learning_rate": 7.036559771930422e-06, |
| "loss": 1.1153672933578491, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.8704886494805695, |
| "grad_norm": 2.75, |
| "learning_rate": 7.028856511557692e-06, |
| "loss": 1.1882882118225098, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.8717198922662562, |
| "grad_norm": 5.0, |
| "learning_rate": 7.02114916289222e-06, |
| "loss": 1.5670009851455688, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.8729511350519431, |
| "grad_norm": 3.8125, |
| "learning_rate": 7.013437756560139e-06, |
| "loss": 1.4105370044708252, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.8741823778376299, |
| "grad_norm": 2.703125, |
| "learning_rate": 7.005722323203712e-06, |
| "loss": 1.5349271297454834, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.8754136206233166, |
| "grad_norm": 13.0, |
| "learning_rate": 6.998002893481193e-06, |
| "loss": 1.1651748418807983, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.8766448634090035, |
| "grad_norm": 0.9609375, |
| "learning_rate": 6.990279498066726e-06, |
| "loss": 1.072435975074768, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.8778761061946903, |
| "grad_norm": 1.921875, |
| "learning_rate": 6.9825521676502076e-06, |
| "loss": 1.2697044610977173, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.8791073489803771, |
| "grad_norm": 5.0, |
| "learning_rate": 6.97482093293717e-06, |
| "loss": 1.482898473739624, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.8803385917660639, |
| "grad_norm": 2.265625, |
| "learning_rate": 6.967085824648663e-06, |
| "loss": 1.3979582786560059, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.8815698345517506, |
| "grad_norm": 2.34375, |
| "learning_rate": 6.959346873521129e-06, |
| "loss": 1.4735959768295288, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.8828010773374375, |
| "grad_norm": 0.82421875, |
| "learning_rate": 6.951604110306278e-06, |
| "loss": 1.3152323961257935, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.8840323201231243, |
| "grad_norm": 1.6328125, |
| "learning_rate": 6.943857565770966e-06, |
| "loss": 1.1805187463760376, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.885263562908811, |
| "grad_norm": 11.125, |
| "learning_rate": 6.936107270697079e-06, |
| "loss": 1.6937435865402222, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.8864948056944979, |
| "grad_norm": 3.0, |
| "learning_rate": 6.928353255881406e-06, |
| "loss": 1.6152817010879517, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.8877260484801847, |
| "grad_norm": 1.7265625, |
| "learning_rate": 6.920595552135509e-06, |
| "loss": 1.4991744756698608, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.8889572912658715, |
| "grad_norm": 2.015625, |
| "learning_rate": 6.912834190285621e-06, |
| "loss": 1.431369423866272, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.8901885340515583, |
| "grad_norm": 1.0703125, |
| "learning_rate": 6.905069201172501e-06, |
| "loss": 1.1146457195281982, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.891419776837245, |
| "grad_norm": 0.94140625, |
| "learning_rate": 6.897300615651328e-06, |
| "loss": 1.0204036235809326, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.8926510196229319, |
| "grad_norm": 2.25, |
| "learning_rate": 6.889528464591566e-06, |
| "loss": 1.4429939985275269, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.8938822624086187, |
| "grad_norm": 1.7578125, |
| "learning_rate": 6.881752778876849e-06, |
| "loss": 1.6094764471054077, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.8951135051943055, |
| "grad_norm": 3.8125, |
| "learning_rate": 6.873973589404861e-06, |
| "loss": 1.1291427612304688, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.8963447479799923, |
| "grad_norm": 1.7109375, |
| "learning_rate": 6.8661909270872014e-06, |
| "loss": 1.1262400150299072, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.8975759907656791, |
| "grad_norm": 0.7109375, |
| "learning_rate": 6.858404822849272e-06, |
| "loss": 1.190704107284546, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.8988072335513659, |
| "grad_norm": 1.1015625, |
| "learning_rate": 6.850615307630158e-06, |
| "loss": 1.0832605361938477, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.9000384763370527, |
| "grad_norm": 3.96875, |
| "learning_rate": 6.842822412382486e-06, |
| "loss": 1.4920941591262817, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.9012697191227396, |
| "grad_norm": 6.15625, |
| "learning_rate": 6.8350261680723254e-06, |
| "loss": 1.636549711227417, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.9025009619084263, |
| "grad_norm": 1.8359375, |
| "learning_rate": 6.827226605679045e-06, |
| "loss": 1.2701830863952637, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.9037322046941131, |
| "grad_norm": 1.4296875, |
| "learning_rate": 6.819423756195205e-06, |
| "loss": 1.0739316940307617, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.9049634474798, |
| "grad_norm": 2.515625, |
| "learning_rate": 6.811617650626423e-06, |
| "loss": 1.635694980621338, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.9061946902654867, |
| "grad_norm": 4.71875, |
| "learning_rate": 6.8038083199912574e-06, |
| "loss": 1.6989467144012451, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.9074259330511736, |
| "grad_norm": 6.3125, |
| "learning_rate": 6.795995795321079e-06, |
| "loss": 1.4359147548675537, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.9086571758368603, |
| "grad_norm": 2.3125, |
| "learning_rate": 6.788180107659954e-06, |
| "loss": 1.4350411891937256, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.9098884186225471, |
| "grad_norm": 2.1875, |
| "learning_rate": 6.780361288064514e-06, |
| "loss": 1.3674818277359009, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.911119661408234, |
| "grad_norm": 2.734375, |
| "learning_rate": 6.772539367603839e-06, |
| "loss": 1.445178508758545, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.9123509041939207, |
| "grad_norm": 3.390625, |
| "learning_rate": 6.764714377359327e-06, |
| "loss": 1.8486201763153076, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.9135821469796075, |
| "grad_norm": 2.46875, |
| "learning_rate": 6.756886348424575e-06, |
| "loss": 1.7258269786834717, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.9148133897652944, |
| "grad_norm": 2.078125, |
| "learning_rate": 6.749055311905259e-06, |
| "loss": 1.5248911380767822, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.9160446325509811, |
| "grad_norm": 2.671875, |
| "learning_rate": 6.741221298919002e-06, |
| "loss": 1.4051947593688965, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.917275875336668, |
| "grad_norm": 2.703125, |
| "learning_rate": 6.7333843405952525e-06, |
| "loss": 1.2086796760559082, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.9185071181223547, |
| "grad_norm": 2.140625, |
| "learning_rate": 6.7255444680751684e-06, |
| "loss": 1.1943715810775757, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.9197383609080415, |
| "grad_norm": 2.328125, |
| "learning_rate": 6.717701712511482e-06, |
| "loss": 1.3514329195022583, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.9209696036937284, |
| "grad_norm": 2.609375, |
| "learning_rate": 6.7098561050683854e-06, |
| "loss": 1.5922294855117798, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.9222008464794151, |
| "grad_norm": 5.0625, |
| "learning_rate": 6.7020076769214014e-06, |
| "loss": 1.4178842306137085, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.923432089265102, |
| "grad_norm": 2.3125, |
| "learning_rate": 6.694156459257259e-06, |
| "loss": 1.4526511430740356, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.9246633320507888, |
| "grad_norm": 1.90625, |
| "learning_rate": 6.686302483273781e-06, |
| "loss": 1.2742807865142822, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.9258945748364755, |
| "grad_norm": 1.2890625, |
| "learning_rate": 6.678445780179738e-06, |
| "loss": 1.2045783996582031, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.9271258176221624, |
| "grad_norm": 6.21875, |
| "learning_rate": 6.670586381194744e-06, |
| "loss": 0.8458617329597473, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.9283570604078492, |
| "grad_norm": 4.0, |
| "learning_rate": 6.662724317549125e-06, |
| "loss": 0.547357439994812, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.929588303193536, |
| "grad_norm": 1.0234375, |
| "learning_rate": 6.654859620483798e-06, |
| "loss": 1.0537328720092773, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.9308195459792228, |
| "grad_norm": 1.1953125, |
| "learning_rate": 6.646992321250136e-06, |
| "loss": 1.317865014076233, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.9320507887649095, |
| "grad_norm": 1.984375, |
| "learning_rate": 6.639122451109861e-06, |
| "loss": 1.3471020460128784, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.9332820315505964, |
| "grad_norm": 2.796875, |
| "learning_rate": 6.631250041334905e-06, |
| "loss": 1.4489096403121948, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.9345132743362832, |
| "grad_norm": 1.1328125, |
| "learning_rate": 6.623375123207295e-06, |
| "loss": 1.2243424654006958, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.93574451712197, |
| "grad_norm": 1.578125, |
| "learning_rate": 6.6154977280190225e-06, |
| "loss": 1.1166824102401733, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.9369757599076568, |
| "grad_norm": 5.21875, |
| "learning_rate": 6.607617887071924e-06, |
| "loss": 1.246292233467102, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.9382070026933436, |
| "grad_norm": 1.421875, |
| "learning_rate": 6.599735631677555e-06, |
| "loss": 1.030418038368225, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.9394382454790304, |
| "grad_norm": 2.15625, |
| "learning_rate": 6.591850993157063e-06, |
| "loss": 1.056132197380066, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.9406694882647172, |
| "grad_norm": 0.81640625, |
| "learning_rate": 6.5839640028410635e-06, |
| "loss": 1.0492579936981201, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.941900731050404, |
| "grad_norm": 1.4453125, |
| "learning_rate": 6.5760746920695225e-06, |
| "loss": 1.1138197183609009, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.9431319738360908, |
| "grad_norm": 1.1015625, |
| "learning_rate": 6.568183092191624e-06, |
| "loss": 1.1705691814422607, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.9443632166217776, |
| "grad_norm": 2.109375, |
| "learning_rate": 6.560289234565649e-06, |
| "loss": 1.4367778301239014, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.9455944594074644, |
| "grad_norm": 2.078125, |
| "learning_rate": 6.552393150558847e-06, |
| "loss": 1.3702847957611084, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.9468257021931512, |
| "grad_norm": 2.203125, |
| "learning_rate": 6.544494871547316e-06, |
| "loss": 1.416054606437683, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.948056944978838, |
| "grad_norm": 2.609375, |
| "learning_rate": 6.536594428915875e-06, |
| "loss": 1.4559407234191895, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.9492881877645248, |
| "grad_norm": 3.375, |
| "learning_rate": 6.528691854057945e-06, |
| "loss": 1.782531976699829, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.9505194305502116, |
| "grad_norm": 7.71875, |
| "learning_rate": 6.520787178375415e-06, |
| "loss": 1.6748610734939575, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.9517506733358985, |
| "grad_norm": 3.671875, |
| "learning_rate": 6.5128804332785235e-06, |
| "loss": 1.773606777191162, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.9529819161215852, |
| "grad_norm": 21.5, |
| "learning_rate": 6.504971650185732e-06, |
| "loss": 1.8693180084228516, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.954213158907272, |
| "grad_norm": 2.40625, |
| "learning_rate": 6.497060860523598e-06, |
| "loss": 1.5108202695846558, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.9554444016929589, |
| "grad_norm": 3.1875, |
| "learning_rate": 6.4891480957266585e-06, |
| "loss": 1.4236609935760498, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.9566756444786456, |
| "grad_norm": 4.4375, |
| "learning_rate": 6.481233387237292e-06, |
| "loss": 1.7978931665420532, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.9579068872643325, |
| "grad_norm": 2.84375, |
| "learning_rate": 6.473316766505604e-06, |
| "loss": 1.8097015619277954, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.9591381300500192, |
| "grad_norm": 2.0625, |
| "learning_rate": 6.465398264989298e-06, |
| "loss": 1.521843433380127, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.960369372835706, |
| "grad_norm": 3.515625, |
| "learning_rate": 6.4574779141535515e-06, |
| "loss": 1.451346516609192, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.9616006156213929, |
| "grad_norm": 2.84375, |
| "learning_rate": 6.44955574547089e-06, |
| "loss": 1.4029566049575806, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.9628318584070796, |
| "grad_norm": 5.1875, |
| "learning_rate": 6.44163179042106e-06, |
| "loss": 1.5579237937927246, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.9640631011927665, |
| "grad_norm": 0.94140625, |
| "learning_rate": 6.433706080490913e-06, |
| "loss": 1.1235785484313965, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.9652943439784533, |
| "grad_norm": 0.796875, |
| "learning_rate": 6.425778647174267e-06, |
| "loss": 1.038987398147583, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.96652558676414, |
| "grad_norm": 2.984375, |
| "learning_rate": 6.417849521971793e-06, |
| "loss": 1.796567678451538, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.9677568295498269, |
| "grad_norm": 8.125, |
| "learning_rate": 6.409918736390879e-06, |
| "loss": 1.7613965272903442, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.9689880723355137, |
| "grad_norm": 3.546875, |
| "learning_rate": 6.401986321945518e-06, |
| "loss": 1.4940305948257446, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.9702193151212004, |
| "grad_norm": 3.453125, |
| "learning_rate": 6.3940523101561695e-06, |
| "loss": 1.6017310619354248, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.9714505579068873, |
| "grad_norm": 2.515625, |
| "learning_rate": 6.386116732549641e-06, |
| "loss": 1.7846119403839111, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.972681800692574, |
| "grad_norm": 3.25, |
| "learning_rate": 6.378179620658967e-06, |
| "loss": 1.8666661977767944, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.9739130434782609, |
| "grad_norm": 2.796875, |
| "learning_rate": 6.370241006023274e-06, |
| "loss": 1.5027897357940674, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.9751442862639477, |
| "grad_norm": 2.375, |
| "learning_rate": 6.362300920187663e-06, |
| "loss": 1.422347068786621, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.9763755290496344, |
| "grad_norm": 1.1328125, |
| "learning_rate": 6.354359394703076e-06, |
| "loss": 1.1453440189361572, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.9776067718353213, |
| "grad_norm": 0.85546875, |
| "learning_rate": 6.346416461126177e-06, |
| "loss": 0.971987247467041, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.9788380146210081, |
| "grad_norm": 4.125, |
| "learning_rate": 6.3384721510192326e-06, |
| "loss": 1.5504741668701172, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.9800692574066949, |
| "grad_norm": 3.296875, |
| "learning_rate": 6.330526495949969e-06, |
| "loss": 1.597891092300415, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.9813005001923817, |
| "grad_norm": 3.640625, |
| "learning_rate": 6.322579527491465e-06, |
| "loss": 1.8136931657791138, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.9825317429780684, |
| "grad_norm": 2.953125, |
| "learning_rate": 6.314631277222012e-06, |
| "loss": 1.7769173383712769, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.9837629857637553, |
| "grad_norm": 2.046875, |
| "learning_rate": 6.306681776724997e-06, |
| "loss": 1.513192892074585, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.9849942285494421, |
| "grad_norm": 2.546875, |
| "learning_rate": 6.2987310575887775e-06, |
| "loss": 1.431948184967041, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.986225471335129, |
| "grad_norm": 4.0625, |
| "learning_rate": 6.29077915140655e-06, |
| "loss": 1.4942731857299805, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.9874567141208157, |
| "grad_norm": 2.671875, |
| "learning_rate": 6.282826089776231e-06, |
| "loss": 1.4677563905715942, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.9886879569065025, |
| "grad_norm": 1.3046875, |
| "learning_rate": 6.2748719043003236e-06, |
| "loss": 1.2760637998580933, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.9899191996921893, |
| "grad_norm": 1.1640625, |
| "learning_rate": 6.266916626585802e-06, |
| "loss": 1.0633869171142578, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.9911504424778761, |
| "grad_norm": 3.515625, |
| "learning_rate": 6.258960288243977e-06, |
| "loss": 1.0499076843261719, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.992381685263563, |
| "grad_norm": 4.9375, |
| "learning_rate": 6.251002920890377e-06, |
| "loss": 1.0250937938690186, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.9936129280492497, |
| "grad_norm": 5.09375, |
| "learning_rate": 6.243044556144614e-06, |
| "loss": 1.390341877937317, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.9948441708349365, |
| "grad_norm": 3.296875, |
| "learning_rate": 6.2350852256302695e-06, |
| "loss": 1.8683525323867798, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.9960754136206234, |
| "grad_norm": 1.5546875, |
| "learning_rate": 6.227124960974758e-06, |
| "loss": 1.2446808815002441, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.9973066564063101, |
| "grad_norm": 3.078125, |
| "learning_rate": 6.219163793809209e-06, |
| "loss": 1.4699010848999023, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.9985378991919969, |
| "grad_norm": 1.671875, |
| "learning_rate": 6.2112017557683364e-06, |
| "loss": 1.0956699848175049, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.9997691419776837, |
| "grad_norm": 0.890625, |
| "learning_rate": 6.203238878490316e-06, |
| "loss": 1.0894286632537842, |
| "step": 1624 |
| }, |
| { |
| "epoch": 1.0006156213928434, |
| "grad_norm": 0.46875, |
| "learning_rate": 6.195275193616654e-06, |
| "loss": 1.1105027198791504, |
| "step": 1626 |
| }, |
| { |
| "epoch": 1.0018468641785303, |
| "grad_norm": 0.609375, |
| "learning_rate": 6.187310732792075e-06, |
| "loss": 1.3475432395935059, |
| "step": 1628 |
| }, |
| { |
| "epoch": 1.003078106964217, |
| "grad_norm": 2.0625, |
| "learning_rate": 6.179345527664378e-06, |
| "loss": 1.3273013830184937, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.0043093497499038, |
| "grad_norm": 1.6953125, |
| "learning_rate": 6.171379609884323e-06, |
| "loss": 1.4933701753616333, |
| "step": 1632 |
| }, |
| { |
| "epoch": 1.0055405925355907, |
| "grad_norm": 0.9296875, |
| "learning_rate": 6.163413011105499e-06, |
| "loss": 1.3138911724090576, |
| "step": 1634 |
| }, |
| { |
| "epoch": 1.0067718353212773, |
| "grad_norm": 0.9921875, |
| "learning_rate": 6.155445762984209e-06, |
| "loss": 0.9952437281608582, |
| "step": 1636 |
| }, |
| { |
| "epoch": 1.0080030781069642, |
| "grad_norm": 2.53125, |
| "learning_rate": 6.147477897179328e-06, |
| "loss": 1.2402042150497437, |
| "step": 1638 |
| }, |
| { |
| "epoch": 1.009234320892651, |
| "grad_norm": 3.65625, |
| "learning_rate": 6.1395094453521875e-06, |
| "loss": 1.377193570137024, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.0104655636783377, |
| "grad_norm": 5.03125, |
| "learning_rate": 6.13154043916645e-06, |
| "loss": 1.6374868154525757, |
| "step": 1642 |
| }, |
| { |
| "epoch": 1.0116968064640246, |
| "grad_norm": 2.015625, |
| "learning_rate": 6.123570910287979e-06, |
| "loss": 1.6595889329910278, |
| "step": 1644 |
| }, |
| { |
| "epoch": 1.0129280492497115, |
| "grad_norm": 3.140625, |
| "learning_rate": 6.1156008903847164e-06, |
| "loss": 1.840135097503662, |
| "step": 1646 |
| }, |
| { |
| "epoch": 1.0141592920353983, |
| "grad_norm": 3.515625, |
| "learning_rate": 6.10763041112655e-06, |
| "loss": 1.8915746212005615, |
| "step": 1648 |
| }, |
| { |
| "epoch": 1.015390534821085, |
| "grad_norm": 1.015625, |
| "learning_rate": 6.0996595041852e-06, |
| "loss": 1.5289441347122192, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.0166217776067719, |
| "grad_norm": 1.203125, |
| "learning_rate": 6.091688201234085e-06, |
| "loss": 1.2102296352386475, |
| "step": 1652 |
| }, |
| { |
| "epoch": 1.0178530203924587, |
| "grad_norm": 0.8125, |
| "learning_rate": 6.083716533948192e-06, |
| "loss": 1.161301612854004, |
| "step": 1654 |
| }, |
| { |
| "epoch": 1.0190842631781454, |
| "grad_norm": 0.8515625, |
| "learning_rate": 6.07574453400396e-06, |
| "loss": 1.096801519393921, |
| "step": 1656 |
| }, |
| { |
| "epoch": 1.0203155059638322, |
| "grad_norm": 0.74609375, |
| "learning_rate": 6.06777223307915e-06, |
| "loss": 1.015903115272522, |
| "step": 1658 |
| }, |
| { |
| "epoch": 1.0215467487495191, |
| "grad_norm": 0.80078125, |
| "learning_rate": 6.0597996628527155e-06, |
| "loss": 1.0650248527526855, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.0227779915352058, |
| "grad_norm": 5.40625, |
| "learning_rate": 6.051826855004683e-06, |
| "loss": 1.2022972106933594, |
| "step": 1662 |
| }, |
| { |
| "epoch": 1.0240092343208926, |
| "grad_norm": 4.46875, |
| "learning_rate": 6.043853841216025e-06, |
| "loss": 1.3405088186264038, |
| "step": 1664 |
| }, |
| { |
| "epoch": 1.0252404771065795, |
| "grad_norm": 1.9921875, |
| "learning_rate": 6.035880653168529e-06, |
| "loss": 1.6969496011734009, |
| "step": 1666 |
| }, |
| { |
| "epoch": 1.0264717198922662, |
| "grad_norm": 2.265625, |
| "learning_rate": 6.027907322544675e-06, |
| "loss": 1.3863646984100342, |
| "step": 1668 |
| }, |
| { |
| "epoch": 1.027702962677953, |
| "grad_norm": 4.28125, |
| "learning_rate": 6.019933881027508e-06, |
| "loss": 1.4729995727539062, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.02893420546364, |
| "grad_norm": 3.03125, |
| "learning_rate": 6.0119603603005235e-06, |
| "loss": 1.8226011991500854, |
| "step": 1672 |
| }, |
| { |
| "epoch": 1.0301654482493268, |
| "grad_norm": 1.7578125, |
| "learning_rate": 6.0039867920475185e-06, |
| "loss": 1.4725655317306519, |
| "step": 1674 |
| }, |
| { |
| "epoch": 1.0313966910350134, |
| "grad_norm": 3.921875, |
| "learning_rate": 5.996013207952484e-06, |
| "loss": 1.3765413761138916, |
| "step": 1676 |
| }, |
| { |
| "epoch": 1.0326279338207003, |
| "grad_norm": 4.65625, |
| "learning_rate": 5.9880396396994785e-06, |
| "loss": 1.5706751346588135, |
| "step": 1678 |
| }, |
| { |
| "epoch": 1.0338591766063872, |
| "grad_norm": 6.65625, |
| "learning_rate": 5.980066118972492e-06, |
| "loss": 1.954294204711914, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.0350904193920738, |
| "grad_norm": 0.734375, |
| "learning_rate": 5.972092677455326e-06, |
| "loss": 1.3637402057647705, |
| "step": 1682 |
| }, |
| { |
| "epoch": 1.0363216621777607, |
| "grad_norm": 4.75, |
| "learning_rate": 5.964119346831474e-06, |
| "loss": 1.143527626991272, |
| "step": 1684 |
| }, |
| { |
| "epoch": 1.0375529049634475, |
| "grad_norm": 1.96875, |
| "learning_rate": 5.956146158783977e-06, |
| "loss": 1.2583506107330322, |
| "step": 1686 |
| }, |
| { |
| "epoch": 1.0387841477491342, |
| "grad_norm": 2.0625, |
| "learning_rate": 5.948173144995318e-06, |
| "loss": 1.4587152004241943, |
| "step": 1688 |
| }, |
| { |
| "epoch": 1.040015390534821, |
| "grad_norm": 2.625, |
| "learning_rate": 5.940200337147286e-06, |
| "loss": 1.4821643829345703, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.041246633320508, |
| "grad_norm": 2.375, |
| "learning_rate": 5.9322277669208526e-06, |
| "loss": 1.3718185424804688, |
| "step": 1692 |
| }, |
| { |
| "epoch": 1.0424778761061946, |
| "grad_norm": 1.4453125, |
| "learning_rate": 5.92425546599604e-06, |
| "loss": 1.3705888986587524, |
| "step": 1694 |
| }, |
| { |
| "epoch": 1.0437091188918814, |
| "grad_norm": 1.609375, |
| "learning_rate": 5.916283466051808e-06, |
| "loss": 1.1750788688659668, |
| "step": 1696 |
| }, |
| { |
| "epoch": 1.0449403616775683, |
| "grad_norm": 4.0625, |
| "learning_rate": 5.908311798765918e-06, |
| "loss": 1.7212889194488525, |
| "step": 1698 |
| }, |
| { |
| "epoch": 1.0461716044632552, |
| "grad_norm": 4.59375, |
| "learning_rate": 5.900340495814802e-06, |
| "loss": 1.6617670059204102, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.0474028472489418, |
| "grad_norm": 13.125, |
| "learning_rate": 5.892369588873452e-06, |
| "loss": 1.8269574642181396, |
| "step": 1702 |
| }, |
| { |
| "epoch": 1.0486340900346287, |
| "grad_norm": 4.03125, |
| "learning_rate": 5.884399109615286e-06, |
| "loss": 1.8086248636245728, |
| "step": 1704 |
| }, |
| { |
| "epoch": 1.0498653328203156, |
| "grad_norm": 5.125, |
| "learning_rate": 5.876429089712021e-06, |
| "loss": 1.7379831075668335, |
| "step": 1706 |
| }, |
| { |
| "epoch": 1.0510965756060022, |
| "grad_norm": 3.828125, |
| "learning_rate": 5.86845956083355e-06, |
| "loss": 1.6424190998077393, |
| "step": 1708 |
| }, |
| { |
| "epoch": 1.052327818391689, |
| "grad_norm": 2.21875, |
| "learning_rate": 5.860490554647813e-06, |
| "loss": 1.4312442541122437, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.053559061177376, |
| "grad_norm": 2.328125, |
| "learning_rate": 5.8525221028206735e-06, |
| "loss": 1.5711675882339478, |
| "step": 1712 |
| }, |
| { |
| "epoch": 1.0547903039630626, |
| "grad_norm": 8.1875, |
| "learning_rate": 5.844554237015793e-06, |
| "loss": 1.5525487661361694, |
| "step": 1714 |
| }, |
| { |
| "epoch": 1.0560215467487495, |
| "grad_norm": 3.875, |
| "learning_rate": 5.8365869888945015e-06, |
| "loss": 1.7739177942276, |
| "step": 1716 |
| }, |
| { |
| "epoch": 1.0572527895344364, |
| "grad_norm": 1.5625, |
| "learning_rate": 5.82862039011568e-06, |
| "loss": 1.198196530342102, |
| "step": 1718 |
| }, |
| { |
| "epoch": 1.0584840323201232, |
| "grad_norm": 1.703125, |
| "learning_rate": 5.820654472335624e-06, |
| "loss": 1.0741544961929321, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.0597152751058099, |
| "grad_norm": 2.203125, |
| "learning_rate": 5.812689267207925e-06, |
| "loss": 1.2043887376785278, |
| "step": 1722 |
| }, |
| { |
| "epoch": 1.0609465178914967, |
| "grad_norm": 2.046875, |
| "learning_rate": 5.804724806383346e-06, |
| "loss": 1.3711163997650146, |
| "step": 1724 |
| }, |
| { |
| "epoch": 1.0621777606771836, |
| "grad_norm": 4.75, |
| "learning_rate": 5.796761121509686e-06, |
| "loss": 1.7536944150924683, |
| "step": 1726 |
| }, |
| { |
| "epoch": 1.0634090034628703, |
| "grad_norm": 4.5625, |
| "learning_rate": 5.7887982442316656e-06, |
| "loss": 1.9879385232925415, |
| "step": 1728 |
| }, |
| { |
| "epoch": 1.0646402462485571, |
| "grad_norm": 2.84375, |
| "learning_rate": 5.780836206190793e-06, |
| "loss": 1.881171464920044, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.065871489034244, |
| "grad_norm": 6.09375, |
| "learning_rate": 5.772875039025244e-06, |
| "loss": 1.7399423122406006, |
| "step": 1732 |
| }, |
| { |
| "epoch": 1.0671027318199306, |
| "grad_norm": 5.0625, |
| "learning_rate": 5.764914774369732e-06, |
| "loss": 1.1776556968688965, |
| "step": 1734 |
| }, |
| { |
| "epoch": 1.0683339746056175, |
| "grad_norm": 2.359375, |
| "learning_rate": 5.756955443855388e-06, |
| "loss": 0.3018825352191925, |
| "step": 1736 |
| }, |
| { |
| "epoch": 1.0695652173913044, |
| "grad_norm": 0.92578125, |
| "learning_rate": 5.748997079109625e-06, |
| "loss": 0.6248791217803955, |
| "step": 1738 |
| }, |
| { |
| "epoch": 1.0707964601769913, |
| "grad_norm": 4.90625, |
| "learning_rate": 5.741039711756025e-06, |
| "loss": 1.0930382013320923, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.072027702962678, |
| "grad_norm": 2.484375, |
| "learning_rate": 5.733083373414201e-06, |
| "loss": 1.4368818998336792, |
| "step": 1742 |
| }, |
| { |
| "epoch": 1.0732589457483648, |
| "grad_norm": 6.125, |
| "learning_rate": 5.725128095699678e-06, |
| "loss": 1.4669115543365479, |
| "step": 1744 |
| }, |
| { |
| "epoch": 1.0744901885340516, |
| "grad_norm": 3.609375, |
| "learning_rate": 5.717173910223772e-06, |
| "loss": 1.3312410116195679, |
| "step": 1746 |
| }, |
| { |
| "epoch": 1.0757214313197383, |
| "grad_norm": 1.78125, |
| "learning_rate": 5.709220848593452e-06, |
| "loss": 1.3409535884857178, |
| "step": 1748 |
| }, |
| { |
| "epoch": 1.0769526741054252, |
| "grad_norm": 3.125, |
| "learning_rate": 5.7012689424112245e-06, |
| "loss": 1.3486981391906738, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.078183916891112, |
| "grad_norm": 2.90625, |
| "learning_rate": 5.6933182232750036e-06, |
| "loss": 1.460854172706604, |
| "step": 1752 |
| }, |
| { |
| "epoch": 1.0794151596767987, |
| "grad_norm": 1.8125, |
| "learning_rate": 5.685368722777991e-06, |
| "loss": 1.3764128684997559, |
| "step": 1754 |
| }, |
| { |
| "epoch": 1.0806464024624856, |
| "grad_norm": 1.9453125, |
| "learning_rate": 5.677420472508537e-06, |
| "loss": 1.4338797330856323, |
| "step": 1756 |
| }, |
| { |
| "epoch": 1.0818776452481724, |
| "grad_norm": 3.671875, |
| "learning_rate": 5.669473504050033e-06, |
| "loss": 1.5261905193328857, |
| "step": 1758 |
| }, |
| { |
| "epoch": 1.083108888033859, |
| "grad_norm": 2.71875, |
| "learning_rate": 5.6615278489807694e-06, |
| "loss": 1.8875455856323242, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.084340130819546, |
| "grad_norm": 3.9375, |
| "learning_rate": 5.6535835388738235e-06, |
| "loss": 1.7816144227981567, |
| "step": 1762 |
| }, |
| { |
| "epoch": 1.0855713736052328, |
| "grad_norm": 3.5, |
| "learning_rate": 5.645640605296927e-06, |
| "loss": 1.541454792022705, |
| "step": 1764 |
| }, |
| { |
| "epoch": 1.0868026163909197, |
| "grad_norm": 2.0625, |
| "learning_rate": 5.6376990798123385e-06, |
| "loss": 1.4051401615142822, |
| "step": 1766 |
| }, |
| { |
| "epoch": 1.0880338591766063, |
| "grad_norm": 2.359375, |
| "learning_rate": 5.629758993976727e-06, |
| "loss": 1.4245781898498535, |
| "step": 1768 |
| }, |
| { |
| "epoch": 1.0892651019622932, |
| "grad_norm": 1.7265625, |
| "learning_rate": 5.6218203793410346e-06, |
| "loss": 1.4115514755249023, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.09049634474798, |
| "grad_norm": 2.96875, |
| "learning_rate": 5.61388326745036e-06, |
| "loss": 1.4192876815795898, |
| "step": 1772 |
| }, |
| { |
| "epoch": 1.0917275875336667, |
| "grad_norm": 2.125, |
| "learning_rate": 5.605947689843833e-06, |
| "loss": 1.346639633178711, |
| "step": 1774 |
| }, |
| { |
| "epoch": 1.0929588303193536, |
| "grad_norm": 1.90625, |
| "learning_rate": 5.598013678054484e-06, |
| "loss": 1.359352707862854, |
| "step": 1776 |
| }, |
| { |
| "epoch": 1.0941900731050405, |
| "grad_norm": 3.453125, |
| "learning_rate": 5.590081263609122e-06, |
| "loss": 1.602827548980713, |
| "step": 1778 |
| }, |
| { |
| "epoch": 1.095421315890727, |
| "grad_norm": 3.421875, |
| "learning_rate": 5.5821504780282086e-06, |
| "loss": 1.5239148139953613, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.096652558676414, |
| "grad_norm": 3.28125, |
| "learning_rate": 5.574221352825735e-06, |
| "loss": 1.657865047454834, |
| "step": 1782 |
| }, |
| { |
| "epoch": 1.0978838014621009, |
| "grad_norm": 3.734375, |
| "learning_rate": 5.566293919509089e-06, |
| "loss": 1.5900053977966309, |
| "step": 1784 |
| }, |
| { |
| "epoch": 1.0991150442477875, |
| "grad_norm": 1.328125, |
| "learning_rate": 5.558368209578941e-06, |
| "loss": 1.2565845251083374, |
| "step": 1786 |
| }, |
| { |
| "epoch": 1.1003462870334744, |
| "grad_norm": 1.015625, |
| "learning_rate": 5.550444254529113e-06, |
| "loss": 1.064510464668274, |
| "step": 1788 |
| }, |
| { |
| "epoch": 1.1015775298191612, |
| "grad_norm": 6.09375, |
| "learning_rate": 5.542522085846451e-06, |
| "loss": 1.0777498483657837, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.102808772604848, |
| "grad_norm": 1.515625, |
| "learning_rate": 5.534601735010703e-06, |
| "loss": 1.164319396018982, |
| "step": 1792 |
| }, |
| { |
| "epoch": 1.1040400153905348, |
| "grad_norm": 2.796875, |
| "learning_rate": 5.5266832334943975e-06, |
| "loss": 1.3410156965255737, |
| "step": 1794 |
| }, |
| { |
| "epoch": 1.1052712581762216, |
| "grad_norm": 3.890625, |
| "learning_rate": 5.518766612762712e-06, |
| "loss": 1.8330609798431396, |
| "step": 1796 |
| }, |
| { |
| "epoch": 1.1065025009619085, |
| "grad_norm": 2.234375, |
| "learning_rate": 5.510851904273344e-06, |
| "loss": 1.292941689491272, |
| "step": 1798 |
| }, |
| { |
| "epoch": 1.1077337437475951, |
| "grad_norm": 2.171875, |
| "learning_rate": 5.502939139476403e-06, |
| "loss": 1.4467616081237793, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.108964986533282, |
| "grad_norm": 1.5, |
| "learning_rate": 5.495028349814271e-06, |
| "loss": 1.3929224014282227, |
| "step": 1802 |
| }, |
| { |
| "epoch": 1.1101962293189689, |
| "grad_norm": 1.3828125, |
| "learning_rate": 5.487119566721477e-06, |
| "loss": 1.1502635478973389, |
| "step": 1804 |
| }, |
| { |
| "epoch": 1.1114274721046555, |
| "grad_norm": 6.03125, |
| "learning_rate": 5.479212821624587e-06, |
| "loss": 1.2217051982879639, |
| "step": 1806 |
| }, |
| { |
| "epoch": 1.1126587148903424, |
| "grad_norm": 4.09375, |
| "learning_rate": 5.4713081459420555e-06, |
| "loss": 1.8544467687606812, |
| "step": 1808 |
| }, |
| { |
| "epoch": 1.1138899576760293, |
| "grad_norm": 3.375, |
| "learning_rate": 5.463405571084127e-06, |
| "loss": 1.2114042043685913, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.1151212004617161, |
| "grad_norm": 2.4375, |
| "learning_rate": 5.455505128452687e-06, |
| "loss": 1.358185052871704, |
| "step": 1812 |
| }, |
| { |
| "epoch": 1.1163524432474028, |
| "grad_norm": 2.25, |
| "learning_rate": 5.447606849441156e-06, |
| "loss": 1.330447793006897, |
| "step": 1814 |
| }, |
| { |
| "epoch": 1.1175836860330897, |
| "grad_norm": 4.65625, |
| "learning_rate": 5.4397107654343515e-06, |
| "loss": 1.4095932245254517, |
| "step": 1816 |
| }, |
| { |
| "epoch": 1.1188149288187765, |
| "grad_norm": 1.0703125, |
| "learning_rate": 5.431816907808376e-06, |
| "loss": 1.2272474765777588, |
| "step": 1818 |
| }, |
| { |
| "epoch": 1.1200461716044632, |
| "grad_norm": 0.8359375, |
| "learning_rate": 5.423925307930478e-06, |
| "loss": 1.1478188037872314, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.12127741439015, |
| "grad_norm": 1.2578125, |
| "learning_rate": 5.416035997158937e-06, |
| "loss": 1.2050681114196777, |
| "step": 1822 |
| }, |
| { |
| "epoch": 1.122508657175837, |
| "grad_norm": 0.98046875, |
| "learning_rate": 5.408149006842941e-06, |
| "loss": 1.1752591133117676, |
| "step": 1824 |
| }, |
| { |
| "epoch": 1.1237398999615236, |
| "grad_norm": 2.671875, |
| "learning_rate": 5.400264368322448e-06, |
| "loss": 1.2104952335357666, |
| "step": 1826 |
| }, |
| { |
| "epoch": 1.1249711427472104, |
| "grad_norm": 0.828125, |
| "learning_rate": 5.392382112928078e-06, |
| "loss": 1.0541216135025024, |
| "step": 1828 |
| }, |
| { |
| "epoch": 1.1262023855328973, |
| "grad_norm": 4.0, |
| "learning_rate": 5.38450227198098e-06, |
| "loss": 1.2567408084869385, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.1274336283185842, |
| "grad_norm": 3.546875, |
| "learning_rate": 5.376624876792706e-06, |
| "loss": 1.3701972961425781, |
| "step": 1832 |
| }, |
| { |
| "epoch": 1.1286648711042708, |
| "grad_norm": 4.125, |
| "learning_rate": 5.368749958665096e-06, |
| "loss": 1.1659176349639893, |
| "step": 1834 |
| }, |
| { |
| "epoch": 1.1298961138899577, |
| "grad_norm": 1.703125, |
| "learning_rate": 5.36087754889014e-06, |
| "loss": 1.0371299982070923, |
| "step": 1836 |
| }, |
| { |
| "epoch": 1.1311273566756446, |
| "grad_norm": 3.265625, |
| "learning_rate": 5.353007678749867e-06, |
| "loss": 1.2042335271835327, |
| "step": 1838 |
| }, |
| { |
| "epoch": 1.1323585994613312, |
| "grad_norm": 2.1875, |
| "learning_rate": 5.345140379516205e-06, |
| "loss": 1.3575973510742188, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.133589842247018, |
| "grad_norm": 4.46875, |
| "learning_rate": 5.337275682450875e-06, |
| "loss": 1.4255024194717407, |
| "step": 1842 |
| }, |
| { |
| "epoch": 1.134821085032705, |
| "grad_norm": 2.328125, |
| "learning_rate": 5.329413618805257e-06, |
| "loss": 1.4426860809326172, |
| "step": 1844 |
| }, |
| { |
| "epoch": 1.1360523278183916, |
| "grad_norm": 2.46875, |
| "learning_rate": 5.321554219820264e-06, |
| "loss": 1.2452255487442017, |
| "step": 1846 |
| }, |
| { |
| "epoch": 1.1372835706040785, |
| "grad_norm": 1.375, |
| "learning_rate": 5.313697516726219e-06, |
| "loss": 0.993253767490387, |
| "step": 1848 |
| }, |
| { |
| "epoch": 1.1385148133897653, |
| "grad_norm": 4.6875, |
| "learning_rate": 5.305843540742741e-06, |
| "loss": 1.4551208019256592, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.139746056175452, |
| "grad_norm": 3.9375, |
| "learning_rate": 5.297992323078602e-06, |
| "loss": 1.603740930557251, |
| "step": 1852 |
| }, |
| { |
| "epoch": 1.1409772989611389, |
| "grad_norm": 2.140625, |
| "learning_rate": 5.2901438949316166e-06, |
| "loss": 1.4689736366271973, |
| "step": 1854 |
| }, |
| { |
| "epoch": 1.1422085417468257, |
| "grad_norm": 2.4375, |
| "learning_rate": 5.282298287488521e-06, |
| "loss": 1.4280515909194946, |
| "step": 1856 |
| }, |
| { |
| "epoch": 1.1434397845325126, |
| "grad_norm": 0.9453125, |
| "learning_rate": 5.2744555319248336e-06, |
| "loss": 1.2956243753433228, |
| "step": 1858 |
| }, |
| { |
| "epoch": 1.1446710273181993, |
| "grad_norm": 0.7734375, |
| "learning_rate": 5.2666156594047495e-06, |
| "loss": 1.0923516750335693, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.1459022701038861, |
| "grad_norm": 1.515625, |
| "learning_rate": 5.258778701081e-06, |
| "loss": 1.063520073890686, |
| "step": 1862 |
| }, |
| { |
| "epoch": 1.147133512889573, |
| "grad_norm": 1.3984375, |
| "learning_rate": 5.250944688094741e-06, |
| "loss": 0.9760624170303345, |
| "step": 1864 |
| }, |
| { |
| "epoch": 1.1483647556752596, |
| "grad_norm": 2.65625, |
| "learning_rate": 5.243113651575426e-06, |
| "loss": 1.2185200452804565, |
| "step": 1866 |
| }, |
| { |
| "epoch": 1.1495959984609465, |
| "grad_norm": 3.09375, |
| "learning_rate": 5.2352856226406765e-06, |
| "loss": 1.452656626701355, |
| "step": 1868 |
| }, |
| { |
| "epoch": 1.1508272412466334, |
| "grad_norm": 2.15625, |
| "learning_rate": 5.227460632396164e-06, |
| "loss": 1.4224770069122314, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.15205848403232, |
| "grad_norm": 2.5, |
| "learning_rate": 5.219638711935489e-06, |
| "loss": 1.4585014581680298, |
| "step": 1872 |
| }, |
| { |
| "epoch": 1.153289726818007, |
| "grad_norm": 3.5, |
| "learning_rate": 5.2118198923400485e-06, |
| "loss": 1.6782886981964111, |
| "step": 1874 |
| }, |
| { |
| "epoch": 1.1545209696036938, |
| "grad_norm": 6.09375, |
| "learning_rate": 5.2040042046789215e-06, |
| "loss": 1.87554132938385, |
| "step": 1876 |
| }, |
| { |
| "epoch": 1.1557522123893804, |
| "grad_norm": 3.28125, |
| "learning_rate": 5.1961916800087446e-06, |
| "loss": 1.6450105905532837, |
| "step": 1878 |
| }, |
| { |
| "epoch": 1.1569834551750673, |
| "grad_norm": 2.234375, |
| "learning_rate": 5.18838234937358e-06, |
| "loss": 1.4129842519760132, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.1582146979607542, |
| "grad_norm": 0.80078125, |
| "learning_rate": 5.180576243804796e-06, |
| "loss": 1.189549207687378, |
| "step": 1882 |
| }, |
| { |
| "epoch": 1.159445940746441, |
| "grad_norm": 1.65625, |
| "learning_rate": 5.172773394320956e-06, |
| "loss": 1.2712268829345703, |
| "step": 1884 |
| }, |
| { |
| "epoch": 1.1606771835321277, |
| "grad_norm": 0.59375, |
| "learning_rate": 5.1649738319276766e-06, |
| "loss": 1.157703161239624, |
| "step": 1886 |
| }, |
| { |
| "epoch": 1.1619084263178145, |
| "grad_norm": 0.5703125, |
| "learning_rate": 5.1571775876175154e-06, |
| "loss": 1.3145527839660645, |
| "step": 1888 |
| }, |
| { |
| "epoch": 1.1631396691035014, |
| "grad_norm": 2.046875, |
| "learning_rate": 5.149384692369845e-06, |
| "loss": 1.08966863155365, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.164370911889188, |
| "grad_norm": 2.859375, |
| "learning_rate": 5.1415951771507285e-06, |
| "loss": 1.065926194190979, |
| "step": 1892 |
| }, |
| { |
| "epoch": 1.165602154674875, |
| "grad_norm": 3.09375, |
| "learning_rate": 5.1338090729128006e-06, |
| "loss": 1.2760896682739258, |
| "step": 1894 |
| }, |
| { |
| "epoch": 1.1668333974605618, |
| "grad_norm": 2.546875, |
| "learning_rate": 5.126026410595142e-06, |
| "loss": 1.4395681619644165, |
| "step": 1896 |
| }, |
| { |
| "epoch": 1.1680646402462487, |
| "grad_norm": 2.0, |
| "learning_rate": 5.1182472211231514e-06, |
| "loss": 1.4737911224365234, |
| "step": 1898 |
| }, |
| { |
| "epoch": 1.1692958830319353, |
| "grad_norm": 2.140625, |
| "learning_rate": 5.110471535408437e-06, |
| "loss": 1.4659451246261597, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.1705271258176222, |
| "grad_norm": 3.515625, |
| "learning_rate": 5.102699384348672e-06, |
| "loss": 1.361763596534729, |
| "step": 1902 |
| }, |
| { |
| "epoch": 1.1717583686033088, |
| "grad_norm": 2.296875, |
| "learning_rate": 5.0949307988275e-06, |
| "loss": 1.3668726682662964, |
| "step": 1904 |
| }, |
| { |
| "epoch": 1.1729896113889957, |
| "grad_norm": 2.796875, |
| "learning_rate": 5.087165809714381e-06, |
| "loss": 1.4556688070297241, |
| "step": 1906 |
| }, |
| { |
| "epoch": 1.1742208541746826, |
| "grad_norm": 2.828125, |
| "learning_rate": 5.0794044478644934e-06, |
| "loss": 1.443922758102417, |
| "step": 1908 |
| }, |
| { |
| "epoch": 1.1754520969603695, |
| "grad_norm": 2.65625, |
| "learning_rate": 5.071646744118598e-06, |
| "loss": 1.4225409030914307, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.176683339746056, |
| "grad_norm": 7.21875, |
| "learning_rate": 5.063892729302924e-06, |
| "loss": 1.427457571029663, |
| "step": 1912 |
| }, |
| { |
| "epoch": 1.177914582531743, |
| "grad_norm": 2.8125, |
| "learning_rate": 5.056142434229037e-06, |
| "loss": 1.4073694944381714, |
| "step": 1914 |
| }, |
| { |
| "epoch": 1.1791458253174298, |
| "grad_norm": 7.9375, |
| "learning_rate": 5.048395889693725e-06, |
| "loss": 1.456547498703003, |
| "step": 1916 |
| }, |
| { |
| "epoch": 1.1803770681031165, |
| "grad_norm": 2.140625, |
| "learning_rate": 5.040653126478871e-06, |
| "loss": 1.4025368690490723, |
| "step": 1918 |
| }, |
| { |
| "epoch": 1.1816083108888034, |
| "grad_norm": 1.765625, |
| "learning_rate": 5.032914175351337e-06, |
| "loss": 1.4511933326721191, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.1828395536744902, |
| "grad_norm": 2.234375, |
| "learning_rate": 5.025179067062833e-06, |
| "loss": 1.3442623615264893, |
| "step": 1922 |
| }, |
| { |
| "epoch": 1.184070796460177, |
| "grad_norm": 6.40625, |
| "learning_rate": 5.017447832349795e-06, |
| "loss": 1.610106110572815, |
| "step": 1924 |
| }, |
| { |
| "epoch": 1.1853020392458637, |
| "grad_norm": 7.9375, |
| "learning_rate": 5.009720501933276e-06, |
| "loss": 0.8935953974723816, |
| "step": 1926 |
| }, |
| { |
| "epoch": 1.1865332820315506, |
| "grad_norm": 8.375, |
| "learning_rate": 5.001997106518808e-06, |
| "loss": 0.2732890248298645, |
| "step": 1928 |
| }, |
| { |
| "epoch": 1.1877645248172375, |
| "grad_norm": 27.5, |
| "learning_rate": 4.9942776767962894e-06, |
| "loss": 0.7279981374740601, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.1889957676029241, |
| "grad_norm": 3.3125, |
| "learning_rate": 4.986562243439861e-06, |
| "loss": 1.6977769136428833, |
| "step": 1932 |
| }, |
| { |
| "epoch": 1.190227010388611, |
| "grad_norm": 3.96875, |
| "learning_rate": 4.978850837107782e-06, |
| "loss": 1.213844895362854, |
| "step": 1934 |
| }, |
| { |
| "epoch": 1.1914582531742979, |
| "grad_norm": 4.875, |
| "learning_rate": 4.971143488442311e-06, |
| "loss": 0.7499221563339233, |
| "step": 1936 |
| }, |
| { |
| "epoch": 1.1926894959599845, |
| "grad_norm": 5.15625, |
| "learning_rate": 4.96344022806958e-06, |
| "loss": 1.049713373184204, |
| "step": 1938 |
| }, |
| { |
| "epoch": 1.1939207387456714, |
| "grad_norm": 3.1875, |
| "learning_rate": 4.955741086599481e-06, |
| "loss": 1.568014144897461, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.1951519815313583, |
| "grad_norm": 4.125, |
| "learning_rate": 4.948046094625532e-06, |
| "loss": 1.2914780378341675, |
| "step": 1942 |
| }, |
| { |
| "epoch": 1.196383224317045, |
| "grad_norm": 2.65625, |
| "learning_rate": 4.940355282724769e-06, |
| "loss": 1.070427656173706, |
| "step": 1944 |
| }, |
| { |
| "epoch": 1.1976144671027318, |
| "grad_norm": 7.1875, |
| "learning_rate": 4.932668681457615e-06, |
| "loss": 1.1766752004623413, |
| "step": 1946 |
| }, |
| { |
| "epoch": 1.1988457098884187, |
| "grad_norm": 2.84375, |
| "learning_rate": 4.9249863213677615e-06, |
| "loss": 1.4076998233795166, |
| "step": 1948 |
| }, |
| { |
| "epoch": 1.2000769526741055, |
| "grad_norm": 3.40625, |
| "learning_rate": 4.917308232982048e-06, |
| "loss": 1.2345445156097412, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.2013081954597922, |
| "grad_norm": 2.484375, |
| "learning_rate": 4.909634446810339e-06, |
| "loss": 1.2328537702560425, |
| "step": 1952 |
| }, |
| { |
| "epoch": 1.202539438245479, |
| "grad_norm": 2.25, |
| "learning_rate": 4.901964993345402e-06, |
| "loss": 1.3884059190750122, |
| "step": 1954 |
| }, |
| { |
| "epoch": 1.203770681031166, |
| "grad_norm": 2.28125, |
| "learning_rate": 4.8942999030627915e-06, |
| "loss": 1.4215402603149414, |
| "step": 1956 |
| }, |
| { |
| "epoch": 1.2050019238168526, |
| "grad_norm": 2.125, |
| "learning_rate": 4.886639206420722e-06, |
| "loss": 1.2735439538955688, |
| "step": 1958 |
| }, |
| { |
| "epoch": 1.2062331666025394, |
| "grad_norm": 3.421875, |
| "learning_rate": 4.878982933859951e-06, |
| "loss": 1.3453803062438965, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.2074644093882263, |
| "grad_norm": 4.28125, |
| "learning_rate": 4.8713311158036544e-06, |
| "loss": 1.2609363794326782, |
| "step": 1962 |
| }, |
| { |
| "epoch": 1.208695652173913, |
| "grad_norm": 1.21875, |
| "learning_rate": 4.863683782657311e-06, |
| "loss": 1.1702247858047485, |
| "step": 1964 |
| }, |
| { |
| "epoch": 1.2099268949595998, |
| "grad_norm": 4.28125, |
| "learning_rate": 4.8560409648085706e-06, |
| "loss": 1.4844826459884644, |
| "step": 1966 |
| }, |
| { |
| "epoch": 1.2111581377452867, |
| "grad_norm": 4.28125, |
| "learning_rate": 4.84840269262715e-06, |
| "loss": 1.9572813510894775, |
| "step": 1968 |
| }, |
| { |
| "epoch": 1.2123893805309733, |
| "grad_norm": 2.328125, |
| "learning_rate": 4.840768996464696e-06, |
| "loss": 1.7173551321029663, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.2136206233166602, |
| "grad_norm": 2.265625, |
| "learning_rate": 4.8331399066546795e-06, |
| "loss": 1.4655386209487915, |
| "step": 1972 |
| }, |
| { |
| "epoch": 1.214851866102347, |
| "grad_norm": 7.46875, |
| "learning_rate": 4.825515453512259e-06, |
| "loss": 1.461625337600708, |
| "step": 1974 |
| }, |
| { |
| "epoch": 1.216083108888034, |
| "grad_norm": 2.28125, |
| "learning_rate": 4.8178956673341745e-06, |
| "loss": 1.3926430940628052, |
| "step": 1976 |
| }, |
| { |
| "epoch": 1.2173143516737206, |
| "grad_norm": 2.953125, |
| "learning_rate": 4.810280578398621e-06, |
| "loss": 1.408326506614685, |
| "step": 1978 |
| }, |
| { |
| "epoch": 1.2185455944594075, |
| "grad_norm": 2.703125, |
| "learning_rate": 4.802670216965125e-06, |
| "loss": 1.4212825298309326, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.2197768372450943, |
| "grad_norm": 4.1875, |
| "learning_rate": 4.79506461327443e-06, |
| "loss": 1.7488731145858765, |
| "step": 1982 |
| }, |
| { |
| "epoch": 1.221008080030781, |
| "grad_norm": 3.453125, |
| "learning_rate": 4.787463797548373e-06, |
| "loss": 1.6178581714630127, |
| "step": 1984 |
| }, |
| { |
| "epoch": 1.2222393228164679, |
| "grad_norm": 3.609375, |
| "learning_rate": 4.779867799989765e-06, |
| "loss": 1.9020529985427856, |
| "step": 1986 |
| }, |
| { |
| "epoch": 1.2234705656021547, |
| "grad_norm": 4.78125, |
| "learning_rate": 4.772276650782275e-06, |
| "loss": 1.8441529273986816, |
| "step": 1988 |
| }, |
| { |
| "epoch": 1.2247018083878416, |
| "grad_norm": 5.0, |
| "learning_rate": 4.764690380090303e-06, |
| "loss": 1.6788111925125122, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.2259330511735282, |
| "grad_norm": 2.375, |
| "learning_rate": 4.757109018058865e-06, |
| "loss": 1.3692415952682495, |
| "step": 1992 |
| }, |
| { |
| "epoch": 1.2271642939592151, |
| "grad_norm": 3.6875, |
| "learning_rate": 4.749532594813469e-06, |
| "loss": 1.4384074211120605, |
| "step": 1994 |
| }, |
| { |
| "epoch": 1.2283955367449018, |
| "grad_norm": 3.359375, |
| "learning_rate": 4.7419611404600005e-06, |
| "loss": 1.49677312374115, |
| "step": 1996 |
| }, |
| { |
| "epoch": 1.2296267795305886, |
| "grad_norm": 1.8359375, |
| "learning_rate": 4.734394685084603e-06, |
| "loss": 1.3461604118347168, |
| "step": 1998 |
| }, |
| { |
| "epoch": 1.2308580223162755, |
| "grad_norm": 1.9453125, |
| "learning_rate": 4.726833258753552e-06, |
| "loss": 1.3872590065002441, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.2320892651019624, |
| "grad_norm": 3.046875, |
| "learning_rate": 4.719276891513139e-06, |
| "loss": 1.5854167938232422, |
| "step": 2002 |
| }, |
| { |
| "epoch": 1.233320507887649, |
| "grad_norm": 3.859375, |
| "learning_rate": 4.711725613389557e-06, |
| "loss": 1.751989722251892, |
| "step": 2004 |
| }, |
| { |
| "epoch": 1.234551750673336, |
| "grad_norm": 2.15625, |
| "learning_rate": 4.704179454388773e-06, |
| "loss": 1.7362277507781982, |
| "step": 2006 |
| }, |
| { |
| "epoch": 1.2357829934590228, |
| "grad_norm": 2.96875, |
| "learning_rate": 4.696638444496411e-06, |
| "loss": 1.4224112033843994, |
| "step": 2008 |
| }, |
| { |
| "epoch": 1.2370142362447094, |
| "grad_norm": 2.234375, |
| "learning_rate": 4.68910261367764e-06, |
| "loss": 1.4005614519119263, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.2382454790303963, |
| "grad_norm": 2.375, |
| "learning_rate": 4.681571991877043e-06, |
| "loss": 1.3552170991897583, |
| "step": 2012 |
| }, |
| { |
| "epoch": 1.2394767218160831, |
| "grad_norm": 4.15625, |
| "learning_rate": 4.674046609018512e-06, |
| "loss": 0.833554208278656, |
| "step": 2014 |
| }, |
| { |
| "epoch": 1.24070796460177, |
| "grad_norm": 5.875, |
| "learning_rate": 4.666526495005115e-06, |
| "loss": 0.39474761486053467, |
| "step": 2016 |
| }, |
| { |
| "epoch": 1.2419392073874567, |
| "grad_norm": 5.03125, |
| "learning_rate": 4.659011679718981e-06, |
| "loss": 0.4157543480396271, |
| "step": 2018 |
| }, |
| { |
| "epoch": 1.2431704501731435, |
| "grad_norm": 2.96875, |
| "learning_rate": 4.651502193021195e-06, |
| "loss": 0.6850585341453552, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.2444016929588304, |
| "grad_norm": 1.78125, |
| "learning_rate": 4.643998064751658e-06, |
| "loss": 1.0783367156982422, |
| "step": 2022 |
| }, |
| { |
| "epoch": 1.245632935744517, |
| "grad_norm": 2.328125, |
| "learning_rate": 4.636499324728982e-06, |
| "loss": 1.420037031173706, |
| "step": 2024 |
| }, |
| { |
| "epoch": 1.246864178530204, |
| "grad_norm": 4.875, |
| "learning_rate": 4.629006002750368e-06, |
| "loss": 1.3482452630996704, |
| "step": 2026 |
| }, |
| { |
| "epoch": 1.2480954213158908, |
| "grad_norm": 9.375, |
| "learning_rate": 4.6215181285914884e-06, |
| "loss": 1.141502022743225, |
| "step": 2028 |
| }, |
| { |
| "epoch": 1.2493266641015774, |
| "grad_norm": 2.40625, |
| "learning_rate": 4.614035732006368e-06, |
| "loss": 1.4624545574188232, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.2505579068872643, |
| "grad_norm": 6.1875, |
| "learning_rate": 4.606558842727265e-06, |
| "loss": 1.8598597049713135, |
| "step": 2032 |
| }, |
| { |
| "epoch": 1.2517891496729512, |
| "grad_norm": 2.21875, |
| "learning_rate": 4.599087490464553e-06, |
| "loss": 1.3582333326339722, |
| "step": 2034 |
| }, |
| { |
| "epoch": 1.2530203924586378, |
| "grad_norm": 1.8984375, |
| "learning_rate": 4.591621704906603e-06, |
| "loss": 1.374417781829834, |
| "step": 2036 |
| }, |
| { |
| "epoch": 1.2542516352443247, |
| "grad_norm": 2.125, |
| "learning_rate": 4.584161515719672e-06, |
| "loss": 1.430912971496582, |
| "step": 2038 |
| }, |
| { |
| "epoch": 1.2554828780300116, |
| "grad_norm": 3.265625, |
| "learning_rate": 4.576706952547769e-06, |
| "loss": 1.5239249467849731, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.2567141208156984, |
| "grad_norm": 1.6796875, |
| "learning_rate": 4.569258045012557e-06, |
| "loss": 1.2920567989349365, |
| "step": 2042 |
| }, |
| { |
| "epoch": 1.257945363601385, |
| "grad_norm": 2.3125, |
| "learning_rate": 4.561814822713218e-06, |
| "loss": 1.380325436592102, |
| "step": 2044 |
| }, |
| { |
| "epoch": 1.259176606387072, |
| "grad_norm": 2.859375, |
| "learning_rate": 4.554377315226348e-06, |
| "loss": 1.4241161346435547, |
| "step": 2046 |
| }, |
| { |
| "epoch": 1.2604078491727588, |
| "grad_norm": 2.84375, |
| "learning_rate": 4.546945552105836e-06, |
| "loss": 1.4067350625991821, |
| "step": 2048 |
| }, |
| { |
| "epoch": 1.2616390919584455, |
| "grad_norm": 2.96875, |
| "learning_rate": 4.539519562882736e-06, |
| "loss": 1.4366530179977417, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.2628703347441324, |
| "grad_norm": 2.328125, |
| "learning_rate": 4.532099377065168e-06, |
| "loss": 1.4625372886657715, |
| "step": 2052 |
| }, |
| { |
| "epoch": 1.2641015775298192, |
| "grad_norm": 2.640625, |
| "learning_rate": 4.524685024138187e-06, |
| "loss": 1.398501992225647, |
| "step": 2054 |
| }, |
| { |
| "epoch": 1.265332820315506, |
| "grad_norm": 2.453125, |
| "learning_rate": 4.51727653356367e-06, |
| "loss": 1.5378193855285645, |
| "step": 2056 |
| }, |
| { |
| "epoch": 1.2665640631011927, |
| "grad_norm": 3.765625, |
| "learning_rate": 4.509873934780204e-06, |
| "loss": 1.3529696464538574, |
| "step": 2058 |
| }, |
| { |
| "epoch": 1.2677953058868796, |
| "grad_norm": 5.25, |
| "learning_rate": 4.502477257202957e-06, |
| "loss": 1.392851710319519, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.2690265486725663, |
| "grad_norm": 10.3125, |
| "learning_rate": 4.495086530223576e-06, |
| "loss": 1.705520510673523, |
| "step": 2062 |
| }, |
| { |
| "epoch": 1.2702577914582531, |
| "grad_norm": 3.859375, |
| "learning_rate": 4.487701783210054e-06, |
| "loss": 1.6694080829620361, |
| "step": 2064 |
| }, |
| { |
| "epoch": 1.27148903424394, |
| "grad_norm": 3.84375, |
| "learning_rate": 4.480323045506628e-06, |
| "loss": 1.893826961517334, |
| "step": 2066 |
| }, |
| { |
| "epoch": 1.2727202770296269, |
| "grad_norm": 2.828125, |
| "learning_rate": 4.472950346433655e-06, |
| "loss": 1.817057490348816, |
| "step": 2068 |
| }, |
| { |
| "epoch": 1.2739515198153135, |
| "grad_norm": 2.125, |
| "learning_rate": 4.465583715287496e-06, |
| "loss": 1.6186152696609497, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.2751827626010004, |
| "grad_norm": 3.078125, |
| "learning_rate": 4.4582231813404014e-06, |
| "loss": 1.4072115421295166, |
| "step": 2072 |
| }, |
| { |
| "epoch": 1.2764140053866873, |
| "grad_norm": 9.625, |
| "learning_rate": 4.450868773840392e-06, |
| "loss": 1.1940281391143799, |
| "step": 2074 |
| }, |
| { |
| "epoch": 1.277645248172374, |
| "grad_norm": 3.40625, |
| "learning_rate": 4.443520522011146e-06, |
| "loss": 1.7899526357650757, |
| "step": 2076 |
| }, |
| { |
| "epoch": 1.2788764909580608, |
| "grad_norm": 2.25, |
| "learning_rate": 4.43617845505188e-06, |
| "loss": 1.3512349128723145, |
| "step": 2078 |
| }, |
| { |
| "epoch": 1.2801077337437476, |
| "grad_norm": 2.40625, |
| "learning_rate": 4.428842602137235e-06, |
| "loss": 1.4610764980316162, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.2813389765294345, |
| "grad_norm": 4.34375, |
| "learning_rate": 4.421512992417158e-06, |
| "loss": 0.9584015011787415, |
| "step": 2082 |
| }, |
| { |
| "epoch": 1.2825702193151212, |
| "grad_norm": 5.375, |
| "learning_rate": 4.414189655016789e-06, |
| "loss": 0.5581737756729126, |
| "step": 2084 |
| }, |
| { |
| "epoch": 1.283801462100808, |
| "grad_norm": 2.234375, |
| "learning_rate": 4.406872619036348e-06, |
| "loss": 0.6756957769393921, |
| "step": 2086 |
| }, |
| { |
| "epoch": 1.2850327048864947, |
| "grad_norm": 2.203125, |
| "learning_rate": 4.399561913551009e-06, |
| "loss": 1.0549678802490234, |
| "step": 2088 |
| }, |
| { |
| "epoch": 1.2862639476721816, |
| "grad_norm": 2.078125, |
| "learning_rate": 4.392257567610794e-06, |
| "loss": 1.180040717124939, |
| "step": 2090 |
| }, |
| { |
| "epoch": 1.2874951904578684, |
| "grad_norm": 2.234375, |
| "learning_rate": 4.384959610240456e-06, |
| "loss": 1.3322702646255493, |
| "step": 2092 |
| }, |
| { |
| "epoch": 1.2887264332435553, |
| "grad_norm": 2.828125, |
| "learning_rate": 4.377668070439359e-06, |
| "loss": 1.3996449708938599, |
| "step": 2094 |
| }, |
| { |
| "epoch": 1.289957676029242, |
| "grad_norm": 2.4375, |
| "learning_rate": 4.3703829771813685e-06, |
| "loss": 1.412023901939392, |
| "step": 2096 |
| }, |
| { |
| "epoch": 1.2911889188149288, |
| "grad_norm": 8.5625, |
| "learning_rate": 4.363104359414732e-06, |
| "loss": 1.0849536657333374, |
| "step": 2098 |
| }, |
| { |
| "epoch": 1.2924201616006157, |
| "grad_norm": 2.625, |
| "learning_rate": 4.3558322460619666e-06, |
| "loss": 0.7259770631790161, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.2936514043863023, |
| "grad_norm": 2.0625, |
| "learning_rate": 4.3485666660197445e-06, |
| "loss": 0.9858816266059875, |
| "step": 2102 |
| }, |
| { |
| "epoch": 1.2948826471719892, |
| "grad_norm": 2.484375, |
| "learning_rate": 4.3413076481587755e-06, |
| "loss": 1.346143364906311, |
| "step": 2104 |
| }, |
| { |
| "epoch": 1.296113889957676, |
| "grad_norm": 1.8515625, |
| "learning_rate": 4.334055221323696e-06, |
| "loss": 1.4789454936981201, |
| "step": 2106 |
| }, |
| { |
| "epoch": 1.297345132743363, |
| "grad_norm": 2.40625, |
| "learning_rate": 4.326809414332947e-06, |
| "loss": 1.4302010536193848, |
| "step": 2108 |
| }, |
| { |
| "epoch": 1.2985763755290496, |
| "grad_norm": 2.09375, |
| "learning_rate": 4.319570255978668e-06, |
| "loss": 1.4737108945846558, |
| "step": 2110 |
| }, |
| { |
| "epoch": 1.2998076183147365, |
| "grad_norm": 1.890625, |
| "learning_rate": 4.3123377750265804e-06, |
| "loss": 1.4115092754364014, |
| "step": 2112 |
| }, |
| { |
| "epoch": 1.301038861100423, |
| "grad_norm": 8.1875, |
| "learning_rate": 4.305112000215872e-06, |
| "loss": 1.4483823776245117, |
| "step": 2114 |
| }, |
| { |
| "epoch": 1.30227010388611, |
| "grad_norm": 3.328125, |
| "learning_rate": 4.297892960259081e-06, |
| "loss": 1.314262866973877, |
| "step": 2116 |
| }, |
| { |
| "epoch": 1.3035013466717968, |
| "grad_norm": 4.03125, |
| "learning_rate": 4.290680683841983e-06, |
| "loss": 1.3376444578170776, |
| "step": 2118 |
| }, |
| { |
| "epoch": 1.3047325894574837, |
| "grad_norm": 4.25, |
| "learning_rate": 4.283475199623483e-06, |
| "loss": 1.4766546487808228, |
| "step": 2120 |
| }, |
| { |
| "epoch": 1.3059638322431706, |
| "grad_norm": 2.546875, |
| "learning_rate": 4.276276536235488e-06, |
| "loss": 1.6876254081726074, |
| "step": 2122 |
| }, |
| { |
| "epoch": 1.3071950750288572, |
| "grad_norm": 2.421875, |
| "learning_rate": 4.2690847222828105e-06, |
| "loss": 1.5043630599975586, |
| "step": 2124 |
| }, |
| { |
| "epoch": 1.308426317814544, |
| "grad_norm": 4.625, |
| "learning_rate": 4.261899786343038e-06, |
| "loss": 1.9094319343566895, |
| "step": 2126 |
| }, |
| { |
| "epoch": 1.3096575606002308, |
| "grad_norm": 4.6875, |
| "learning_rate": 4.254721756966434e-06, |
| "loss": 1.5988795757293701, |
| "step": 2128 |
| }, |
| { |
| "epoch": 1.3108888033859176, |
| "grad_norm": 4.15625, |
| "learning_rate": 4.247550662675814e-06, |
| "loss": 1.7517191171646118, |
| "step": 2130 |
| }, |
| { |
| "epoch": 1.3121200461716045, |
| "grad_norm": 6.09375, |
| "learning_rate": 4.240386531966436e-06, |
| "loss": 1.20930814743042, |
| "step": 2132 |
| }, |
| { |
| "epoch": 1.3133512889572914, |
| "grad_norm": 3.0, |
| "learning_rate": 4.233229393305891e-06, |
| "loss": 1.524349570274353, |
| "step": 2134 |
| }, |
| { |
| "epoch": 1.314582531742978, |
| "grad_norm": 4.6875, |
| "learning_rate": 4.226079275133981e-06, |
| "loss": 1.5060465335845947, |
| "step": 2136 |
| }, |
| { |
| "epoch": 1.3158137745286649, |
| "grad_norm": 2.921875, |
| "learning_rate": 4.218936205862614e-06, |
| "loss": 1.264224648475647, |
| "step": 2138 |
| }, |
| { |
| "epoch": 1.3170450173143518, |
| "grad_norm": 2.859375, |
| "learning_rate": 4.211800213875687e-06, |
| "loss": 1.440643310546875, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.3182762601000384, |
| "grad_norm": 2.34375, |
| "learning_rate": 4.204671327528978e-06, |
| "loss": 1.4793394804000854, |
| "step": 2142 |
| }, |
| { |
| "epoch": 1.3195075028857253, |
| "grad_norm": 2.609375, |
| "learning_rate": 4.197549575150026e-06, |
| "loss": 1.4133528470993042, |
| "step": 2144 |
| }, |
| { |
| "epoch": 1.3207387456714121, |
| "grad_norm": 2.609375, |
| "learning_rate": 4.190434985038023e-06, |
| "loss": 1.4079787731170654, |
| "step": 2146 |
| }, |
| { |
| "epoch": 1.321969988457099, |
| "grad_norm": 2.53125, |
| "learning_rate": 4.183327585463704e-06, |
| "loss": 1.4368443489074707, |
| "step": 2148 |
| }, |
| { |
| "epoch": 1.3232012312427857, |
| "grad_norm": 2.0, |
| "learning_rate": 4.1762274046692244e-06, |
| "loss": 1.4060487747192383, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.3244324740284725, |
| "grad_norm": 2.921875, |
| "learning_rate": 4.1691344708680634e-06, |
| "loss": 1.4592519998550415, |
| "step": 2152 |
| }, |
| { |
| "epoch": 1.3256637168141592, |
| "grad_norm": 4.6875, |
| "learning_rate": 4.162048812244897e-06, |
| "loss": 1.6927399635314941, |
| "step": 2154 |
| }, |
| { |
| "epoch": 1.326894959599846, |
| "grad_norm": 2.1875, |
| "learning_rate": 4.154970456955495e-06, |
| "loss": 1.5677540302276611, |
| "step": 2156 |
| }, |
| { |
| "epoch": 1.328126202385533, |
| "grad_norm": 0.8984375, |
| "learning_rate": 4.147899433126607e-06, |
| "loss": 1.36515474319458, |
| "step": 2158 |
| }, |
| { |
| "epoch": 1.3293574451712198, |
| "grad_norm": 4.75, |
| "learning_rate": 4.140835768855848e-06, |
| "loss": 0.9946126341819763, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.3305886879569064, |
| "grad_norm": 3.90625, |
| "learning_rate": 4.133779492211595e-06, |
| "loss": 1.485393762588501, |
| "step": 2162 |
| }, |
| { |
| "epoch": 1.3318199307425933, |
| "grad_norm": 4.3125, |
| "learning_rate": 4.126730631232855e-06, |
| "loss": 1.871983289718628, |
| "step": 2164 |
| }, |
| { |
| "epoch": 1.3330511735282802, |
| "grad_norm": 0.90625, |
| "learning_rate": 4.1196892139291836e-06, |
| "loss": 1.5479357242584229, |
| "step": 2166 |
| }, |
| { |
| "epoch": 1.3342824163139668, |
| "grad_norm": 1.8046875, |
| "learning_rate": 4.112655268280551e-06, |
| "loss": 1.1007100343704224, |
| "step": 2168 |
| }, |
| { |
| "epoch": 1.3355136590996537, |
| "grad_norm": 3.09375, |
| "learning_rate": 4.1056288222372385e-06, |
| "loss": 1.3004124164581299, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.3367449018853406, |
| "grad_norm": 2.6875, |
| "learning_rate": 4.098609903719724e-06, |
| "loss": 1.4792126417160034, |
| "step": 2172 |
| }, |
| { |
| "epoch": 1.3379761446710274, |
| "grad_norm": 0.83203125, |
| "learning_rate": 4.0915985406185815e-06, |
| "loss": 1.221348762512207, |
| "step": 2174 |
| }, |
| { |
| "epoch": 1.339207387456714, |
| "grad_norm": 0.82421875, |
| "learning_rate": 4.084594760794356e-06, |
| "loss": 1.0107485055923462, |
| "step": 2176 |
| }, |
| { |
| "epoch": 1.340438630242401, |
| "grad_norm": 3.140625, |
| "learning_rate": 4.077598592077458e-06, |
| "loss": 1.1750391721725464, |
| "step": 2178 |
| }, |
| { |
| "epoch": 1.3416698730280876, |
| "grad_norm": 2.421875, |
| "learning_rate": 4.070610062268059e-06, |
| "loss": 1.3896641731262207, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.3429011158137745, |
| "grad_norm": 2.390625, |
| "learning_rate": 4.063629199135977e-06, |
| "loss": 1.4235678911209106, |
| "step": 2182 |
| }, |
| { |
| "epoch": 1.3441323585994613, |
| "grad_norm": 2.96875, |
| "learning_rate": 4.056656030420561e-06, |
| "loss": 1.469269037246704, |
| "step": 2184 |
| }, |
| { |
| "epoch": 1.3453636013851482, |
| "grad_norm": 8.25, |
| "learning_rate": 4.049690583830588e-06, |
| "loss": 1.6964664459228516, |
| "step": 2186 |
| }, |
| { |
| "epoch": 1.3465948441708349, |
| "grad_norm": 4.71875, |
| "learning_rate": 4.042732887044146e-06, |
| "loss": 1.4317083358764648, |
| "step": 2188 |
| }, |
| { |
| "epoch": 1.3478260869565217, |
| "grad_norm": 2.71875, |
| "learning_rate": 4.03578296770854e-06, |
| "loss": 1.290687918663025, |
| "step": 2190 |
| }, |
| { |
| "epoch": 1.3490573297422086, |
| "grad_norm": 1.0234375, |
| "learning_rate": 4.028840853440155e-06, |
| "loss": 1.2132869958877563, |
| "step": 2192 |
| }, |
| { |
| "epoch": 1.3502885725278952, |
| "grad_norm": 0.72265625, |
| "learning_rate": 4.021906571824371e-06, |
| "loss": 1.1403286457061768, |
| "step": 2194 |
| }, |
| { |
| "epoch": 1.3515198153135821, |
| "grad_norm": 1.2265625, |
| "learning_rate": 4.01498015041544e-06, |
| "loss": 1.2438344955444336, |
| "step": 2196 |
| }, |
| { |
| "epoch": 1.352751058099269, |
| "grad_norm": 2.78125, |
| "learning_rate": 4.008061616736384e-06, |
| "loss": 1.3489577770233154, |
| "step": 2198 |
| }, |
| { |
| "epoch": 1.3539823008849559, |
| "grad_norm": 4.28125, |
| "learning_rate": 4.00115099827888e-06, |
| "loss": 1.4866937398910522, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.3552135436706425, |
| "grad_norm": 3.265625, |
| "learning_rate": 3.994248322503152e-06, |
| "loss": 1.4686030149459839, |
| "step": 2202 |
| }, |
| { |
| "epoch": 1.3564447864563294, |
| "grad_norm": 3.296875, |
| "learning_rate": 3.987353616837864e-06, |
| "loss": 1.7023189067840576, |
| "step": 2204 |
| }, |
| { |
| "epoch": 1.357676029242016, |
| "grad_norm": 6.3125, |
| "learning_rate": 3.980466908680011e-06, |
| "loss": 1.3478009700775146, |
| "step": 2206 |
| }, |
| { |
| "epoch": 1.358907272027703, |
| "grad_norm": 6.4375, |
| "learning_rate": 3.973588225394804e-06, |
| "loss": 1.93203604221344, |
| "step": 2208 |
| }, |
| { |
| "epoch": 1.3601385148133898, |
| "grad_norm": 4.28125, |
| "learning_rate": 3.966717594315573e-06, |
| "loss": 1.782301425933838, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.3613697575990766, |
| "grad_norm": 5.03125, |
| "learning_rate": 3.959855042743644e-06, |
| "loss": 1.6604218482971191, |
| "step": 2212 |
| }, |
| { |
| "epoch": 1.3626010003847635, |
| "grad_norm": 6.0, |
| "learning_rate": 3.953000597948246e-06, |
| "loss": 1.6294022798538208, |
| "step": 2214 |
| }, |
| { |
| "epoch": 1.3638322431704502, |
| "grad_norm": 3.1875, |
| "learning_rate": 3.946154287166391e-06, |
| "loss": 1.921142578125, |
| "step": 2216 |
| }, |
| { |
| "epoch": 1.365063485956137, |
| "grad_norm": 3.359375, |
| "learning_rate": 3.939316137602767e-06, |
| "loss": 1.590577244758606, |
| "step": 2218 |
| }, |
| { |
| "epoch": 1.3662947287418237, |
| "grad_norm": 3.0625, |
| "learning_rate": 3.932486176429633e-06, |
| "loss": 1.6776155233383179, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.3675259715275105, |
| "grad_norm": 3.234375, |
| "learning_rate": 3.925664430786715e-06, |
| "loss": 1.7855265140533447, |
| "step": 2222 |
| }, |
| { |
| "epoch": 1.3687572143131974, |
| "grad_norm": 4.1875, |
| "learning_rate": 3.918850927781091e-06, |
| "loss": 1.6806097030639648, |
| "step": 2224 |
| }, |
| { |
| "epoch": 1.3699884570988843, |
| "grad_norm": 3.6875, |
| "learning_rate": 3.912045694487083e-06, |
| "loss": 1.7721450328826904, |
| "step": 2226 |
| }, |
| { |
| "epoch": 1.371219699884571, |
| "grad_norm": 3.515625, |
| "learning_rate": 3.905248757946154e-06, |
| "loss": 1.5471529960632324, |
| "step": 2228 |
| }, |
| { |
| "epoch": 1.3724509426702578, |
| "grad_norm": 1.1640625, |
| "learning_rate": 3.898460145166802e-06, |
| "loss": 1.17193603515625, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.3736821854559447, |
| "grad_norm": 1.890625, |
| "learning_rate": 3.891679883124446e-06, |
| "loss": 1.124650239944458, |
| "step": 2232 |
| }, |
| { |
| "epoch": 1.3749134282416313, |
| "grad_norm": 2.15625, |
| "learning_rate": 3.884907998761323e-06, |
| "loss": 1.1825627088546753, |
| "step": 2234 |
| }, |
| { |
| "epoch": 1.3761446710273182, |
| "grad_norm": 4.125, |
| "learning_rate": 3.8781445189863784e-06, |
| "loss": 1.3950858116149902, |
| "step": 2236 |
| }, |
| { |
| "epoch": 1.377375913813005, |
| "grad_norm": 2.078125, |
| "learning_rate": 3.871389470675166e-06, |
| "loss": 1.1823573112487793, |
| "step": 2238 |
| }, |
| { |
| "epoch": 1.378607156598692, |
| "grad_norm": 2.640625, |
| "learning_rate": 3.864642880669731e-06, |
| "loss": 1.4916805028915405, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.3798383993843786, |
| "grad_norm": 4.0625, |
| "learning_rate": 3.857904775778511e-06, |
| "loss": 1.5051604509353638, |
| "step": 2242 |
| }, |
| { |
| "epoch": 1.3810696421700654, |
| "grad_norm": 2.53125, |
| "learning_rate": 3.851175182776226e-06, |
| "loss": 0.89178866147995, |
| "step": 2244 |
| }, |
| { |
| "epoch": 1.382300884955752, |
| "grad_norm": 2.328125, |
| "learning_rate": 3.844454128403774e-06, |
| "loss": 1.2950024604797363, |
| "step": 2246 |
| }, |
| { |
| "epoch": 1.383532127741439, |
| "grad_norm": 1.828125, |
| "learning_rate": 3.837741639368122e-06, |
| "loss": 1.510188102722168, |
| "step": 2248 |
| }, |
| { |
| "epoch": 1.3847633705271258, |
| "grad_norm": 2.609375, |
| "learning_rate": 3.831037742342203e-06, |
| "loss": 1.3180665969848633, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.3859946133128127, |
| "grad_norm": 2.875, |
| "learning_rate": 3.824342463964806e-06, |
| "loss": 1.1624681949615479, |
| "step": 2252 |
| }, |
| { |
| "epoch": 1.3872258560984994, |
| "grad_norm": 1.0625, |
| "learning_rate": 3.817655830840477e-06, |
| "loss": 1.0815974473953247, |
| "step": 2254 |
| }, |
| { |
| "epoch": 1.3884570988841862, |
| "grad_norm": 1.8359375, |
| "learning_rate": 3.8109778695394064e-06, |
| "loss": 0.9283170700073242, |
| "step": 2256 |
| }, |
| { |
| "epoch": 1.389688341669873, |
| "grad_norm": 2.375, |
| "learning_rate": 3.8043086065973258e-06, |
| "loss": 1.3523941040039062, |
| "step": 2258 |
| }, |
| { |
| "epoch": 1.3909195844555597, |
| "grad_norm": 2.28125, |
| "learning_rate": 3.7976480685154028e-06, |
| "loss": 1.3457260131835938, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.3921508272412466, |
| "grad_norm": 1.578125, |
| "learning_rate": 3.7909962817601377e-06, |
| "loss": 1.2352582216262817, |
| "step": 2262 |
| }, |
| { |
| "epoch": 1.3933820700269335, |
| "grad_norm": 1.6875, |
| "learning_rate": 3.7843532727632525e-06, |
| "loss": 1.0768884420394897, |
| "step": 2264 |
| }, |
| { |
| "epoch": 1.3946133128126204, |
| "grad_norm": 2.796875, |
| "learning_rate": 3.7777190679215927e-06, |
| "loss": 1.3136581182479858, |
| "step": 2266 |
| }, |
| { |
| "epoch": 1.395844555598307, |
| "grad_norm": 2.25, |
| "learning_rate": 3.7710936935970173e-06, |
| "loss": 1.4727461338043213, |
| "step": 2268 |
| }, |
| { |
| "epoch": 1.3970757983839939, |
| "grad_norm": 7.75, |
| "learning_rate": 3.7644771761163e-06, |
| "loss": 1.4279669523239136, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.3983070411696805, |
| "grad_norm": 2.296875, |
| "learning_rate": 3.7578695417710164e-06, |
| "loss": 1.4044800996780396, |
| "step": 2272 |
| }, |
| { |
| "epoch": 1.3995382839553674, |
| "grad_norm": 1.40625, |
| "learning_rate": 3.7512708168174457e-06, |
| "loss": 1.259403109550476, |
| "step": 2274 |
| }, |
| { |
| "epoch": 1.4007695267410543, |
| "grad_norm": 1.609375, |
| "learning_rate": 3.744681027476464e-06, |
| "loss": 1.0513827800750732, |
| "step": 2276 |
| }, |
| { |
| "epoch": 1.4020007695267411, |
| "grad_norm": 3.28125, |
| "learning_rate": 3.738100199933441e-06, |
| "loss": 1.343108057975769, |
| "step": 2278 |
| }, |
| { |
| "epoch": 1.4032320123124278, |
| "grad_norm": 3.03125, |
| "learning_rate": 3.731528360338135e-06, |
| "loss": 1.8145740032196045, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.4044632550981146, |
| "grad_norm": 0.62890625, |
| "learning_rate": 3.724965534804588e-06, |
| "loss": 1.4862263202667236, |
| "step": 2282 |
| }, |
| { |
| "epoch": 1.4056944978838015, |
| "grad_norm": 1.4296875, |
| "learning_rate": 3.71841174941103e-06, |
| "loss": 1.0445220470428467, |
| "step": 2284 |
| }, |
| { |
| "epoch": 1.4069257406694882, |
| "grad_norm": 2.84375, |
| "learning_rate": 3.7118670301997613e-06, |
| "loss": 1.2141282558441162, |
| "step": 2286 |
| }, |
| { |
| "epoch": 1.408156983455175, |
| "grad_norm": 2.109375, |
| "learning_rate": 3.7053314031770617e-06, |
| "loss": 1.4393140077590942, |
| "step": 2288 |
| }, |
| { |
| "epoch": 1.409388226240862, |
| "grad_norm": 5.0625, |
| "learning_rate": 3.698804894313075e-06, |
| "loss": 1.3356846570968628, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.4106194690265488, |
| "grad_norm": 4.15625, |
| "learning_rate": 3.6922875295417222e-06, |
| "loss": 1.8149614334106445, |
| "step": 2292 |
| }, |
| { |
| "epoch": 1.4118507118122354, |
| "grad_norm": 0.7890625, |
| "learning_rate": 3.6857793347605824e-06, |
| "loss": 1.0744565725326538, |
| "step": 2294 |
| }, |
| { |
| "epoch": 1.4130819545979223, |
| "grad_norm": 0.99609375, |
| "learning_rate": 3.6792803358308e-06, |
| "loss": 1.0420913696289062, |
| "step": 2296 |
| }, |
| { |
| "epoch": 1.414313197383609, |
| "grad_norm": 4.8125, |
| "learning_rate": 3.672790558576975e-06, |
| "loss": 1.4709570407867432, |
| "step": 2298 |
| }, |
| { |
| "epoch": 1.4155444401692958, |
| "grad_norm": 5.375, |
| "learning_rate": 3.666310028787069e-06, |
| "loss": 1.267836093902588, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.4167756829549827, |
| "grad_norm": 1.265625, |
| "learning_rate": 3.6598387722122945e-06, |
| "loss": 1.403648853302002, |
| "step": 2302 |
| }, |
| { |
| "epoch": 1.4180069257406696, |
| "grad_norm": 1.234375, |
| "learning_rate": 3.653376814567014e-06, |
| "loss": 1.0699892044067383, |
| "step": 2304 |
| }, |
| { |
| "epoch": 1.4192381685263564, |
| "grad_norm": 8.625, |
| "learning_rate": 3.646924181528641e-06, |
| "loss": 1.3329198360443115, |
| "step": 2306 |
| }, |
| { |
| "epoch": 1.420469411312043, |
| "grad_norm": 3.640625, |
| "learning_rate": 3.6404808987375388e-06, |
| "loss": 1.5011190176010132, |
| "step": 2308 |
| }, |
| { |
| "epoch": 1.42170065409773, |
| "grad_norm": 0.99609375, |
| "learning_rate": 3.6340469917969146e-06, |
| "loss": 1.1454980373382568, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.4229318968834166, |
| "grad_norm": 1.3359375, |
| "learning_rate": 3.627622486272718e-06, |
| "loss": 1.0486154556274414, |
| "step": 2312 |
| }, |
| { |
| "epoch": 1.4241631396691035, |
| "grad_norm": 3.125, |
| "learning_rate": 3.6212074076935417e-06, |
| "loss": 1.3061400651931763, |
| "step": 2314 |
| }, |
| { |
| "epoch": 1.4253943824547903, |
| "grad_norm": 3.8125, |
| "learning_rate": 3.6148017815505232e-06, |
| "loss": 1.856020450592041, |
| "step": 2316 |
| }, |
| { |
| "epoch": 1.4266256252404772, |
| "grad_norm": 1.859375, |
| "learning_rate": 3.6084056332972324e-06, |
| "loss": 1.5413720607757568, |
| "step": 2318 |
| }, |
| { |
| "epoch": 1.4278568680261639, |
| "grad_norm": 2.96875, |
| "learning_rate": 3.602018988349582e-06, |
| "loss": 1.4303263425827026, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.4290881108118507, |
| "grad_norm": 4.03125, |
| "learning_rate": 3.59564187208572e-06, |
| "loss": 1.5900418758392334, |
| "step": 2322 |
| }, |
| { |
| "epoch": 1.4303193535975376, |
| "grad_norm": 4.15625, |
| "learning_rate": 3.589274309845936e-06, |
| "loss": 1.7773805856704712, |
| "step": 2324 |
| }, |
| { |
| "epoch": 1.4315505963832242, |
| "grad_norm": 1.1796875, |
| "learning_rate": 3.5829163269325496e-06, |
| "loss": 1.1708474159240723, |
| "step": 2326 |
| }, |
| { |
| "epoch": 1.432781839168911, |
| "grad_norm": 3.546875, |
| "learning_rate": 3.576567948609817e-06, |
| "loss": 0.9795735478401184, |
| "step": 2328 |
| }, |
| { |
| "epoch": 1.434013081954598, |
| "grad_norm": 2.5, |
| "learning_rate": 3.570229200103832e-06, |
| "loss": 1.0455710887908936, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.4352443247402848, |
| "grad_norm": 2.203125, |
| "learning_rate": 3.5639001066024205e-06, |
| "loss": 1.4353384971618652, |
| "step": 2332 |
| }, |
| { |
| "epoch": 1.4364755675259715, |
| "grad_norm": 2.609375, |
| "learning_rate": 3.557580693255043e-06, |
| "loss": 1.44502854347229, |
| "step": 2334 |
| }, |
| { |
| "epoch": 1.4377068103116584, |
| "grad_norm": 2.3125, |
| "learning_rate": 3.5512709851726968e-06, |
| "loss": 1.4663447141647339, |
| "step": 2336 |
| }, |
| { |
| "epoch": 1.438938053097345, |
| "grad_norm": 2.25, |
| "learning_rate": 3.544971007427811e-06, |
| "loss": 1.5666968822479248, |
| "step": 2338 |
| }, |
| { |
| "epoch": 1.4401692958830319, |
| "grad_norm": 3.421875, |
| "learning_rate": 3.538680785054154e-06, |
| "loss": 1.4446769952774048, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.4414005386687188, |
| "grad_norm": 3.21875, |
| "learning_rate": 3.5324003430467265e-06, |
| "loss": 1.1215463876724243, |
| "step": 2342 |
| }, |
| { |
| "epoch": 1.4426317814544056, |
| "grad_norm": 2.390625, |
| "learning_rate": 3.526129706361668e-06, |
| "loss": 0.6768141984939575, |
| "step": 2344 |
| }, |
| { |
| "epoch": 1.4438630242400923, |
| "grad_norm": 4.65625, |
| "learning_rate": 3.5198688999161507e-06, |
| "loss": 1.2538286447525024, |
| "step": 2346 |
| }, |
| { |
| "epoch": 1.4450942670257791, |
| "grad_norm": 4.59375, |
| "learning_rate": 3.5136179485882928e-06, |
| "loss": 1.9145801067352295, |
| "step": 2348 |
| }, |
| { |
| "epoch": 1.446325509811466, |
| "grad_norm": 2.875, |
| "learning_rate": 3.5073768772170454e-06, |
| "loss": 1.7400861978530884, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.4475567525971527, |
| "grad_norm": 2.4375, |
| "learning_rate": 3.501145710602103e-06, |
| "loss": 1.4013432264328003, |
| "step": 2352 |
| }, |
| { |
| "epoch": 1.4487879953828395, |
| "grad_norm": 4.71875, |
| "learning_rate": 3.494924473503801e-06, |
| "loss": 1.7890344858169556, |
| "step": 2354 |
| }, |
| { |
| "epoch": 1.4500192381685264, |
| "grad_norm": 2.625, |
| "learning_rate": 3.4887131906430216e-06, |
| "loss": 1.8322502374649048, |
| "step": 2356 |
| }, |
| { |
| "epoch": 1.4512504809542133, |
| "grad_norm": 11.375, |
| "learning_rate": 3.482511886701091e-06, |
| "loss": 1.65338933467865, |
| "step": 2358 |
| }, |
| { |
| "epoch": 1.4524817237399, |
| "grad_norm": 5.875, |
| "learning_rate": 3.4763205863196795e-06, |
| "loss": 1.3839627504348755, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.4537129665255868, |
| "grad_norm": 1.7734375, |
| "learning_rate": 3.4701393141007102e-06, |
| "loss": 1.5974239110946655, |
| "step": 2362 |
| }, |
| { |
| "epoch": 1.4549442093112734, |
| "grad_norm": 18.75, |
| "learning_rate": 3.4639680946062595e-06, |
| "loss": 1.3460378646850586, |
| "step": 2364 |
| }, |
| { |
| "epoch": 1.4561754520969603, |
| "grad_norm": 2.109375, |
| "learning_rate": 3.457806952358456e-06, |
| "loss": 1.401890516281128, |
| "step": 2366 |
| }, |
| { |
| "epoch": 1.4574066948826472, |
| "grad_norm": 1.6640625, |
| "learning_rate": 3.4516559118393827e-06, |
| "loss": 1.390883445739746, |
| "step": 2368 |
| }, |
| { |
| "epoch": 1.458637937668334, |
| "grad_norm": 1.8828125, |
| "learning_rate": 3.4455149974909864e-06, |
| "loss": 1.3334836959838867, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.4598691804540207, |
| "grad_norm": 1.8515625, |
| "learning_rate": 3.4393842337149775e-06, |
| "loss": 1.331444501876831, |
| "step": 2372 |
| }, |
| { |
| "epoch": 1.4611004232397076, |
| "grad_norm": 2.859375, |
| "learning_rate": 3.433263644872724e-06, |
| "loss": 0.9283863306045532, |
| "step": 2374 |
| }, |
| { |
| "epoch": 1.4623316660253944, |
| "grad_norm": 5.0625, |
| "learning_rate": 3.4271532552851692e-06, |
| "loss": 0.4115677773952484, |
| "step": 2376 |
| }, |
| { |
| "epoch": 1.463562908811081, |
| "grad_norm": 2.0, |
| "learning_rate": 3.421053089232725e-06, |
| "loss": 0.6582207679748535, |
| "step": 2378 |
| }, |
| { |
| "epoch": 1.464794151596768, |
| "grad_norm": 2.046875, |
| "learning_rate": 3.4149631709551833e-06, |
| "loss": 1.38728928565979, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.4660253943824548, |
| "grad_norm": 2.0, |
| "learning_rate": 3.4088835246516098e-06, |
| "loss": 0.8790442943572998, |
| "step": 2382 |
| }, |
| { |
| "epoch": 1.4672566371681417, |
| "grad_norm": 18.875, |
| "learning_rate": 3.402814174480257e-06, |
| "loss": 0.3224247694015503, |
| "step": 2384 |
| }, |
| { |
| "epoch": 1.4684878799538283, |
| "grad_norm": 3.40625, |
| "learning_rate": 3.3967551445584617e-06, |
| "loss": 0.8255885243415833, |
| "step": 2386 |
| }, |
| { |
| "epoch": 1.4697191227395152, |
| "grad_norm": 6.78125, |
| "learning_rate": 3.3907064589625538e-06, |
| "loss": 1.838360071182251, |
| "step": 2388 |
| }, |
| { |
| "epoch": 1.4709503655252019, |
| "grad_norm": 2.5625, |
| "learning_rate": 3.384668141727757e-06, |
| "loss": 1.5447924137115479, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.4721816083108887, |
| "grad_norm": 1.8203125, |
| "learning_rate": 3.3786402168480976e-06, |
| "loss": 1.358646035194397, |
| "step": 2392 |
| }, |
| { |
| "epoch": 1.4734128510965756, |
| "grad_norm": 2.90625, |
| "learning_rate": 3.372622708276302e-06, |
| "loss": 1.4267942905426025, |
| "step": 2394 |
| }, |
| { |
| "epoch": 1.4746440938822625, |
| "grad_norm": 4.4375, |
| "learning_rate": 3.3666156399237125e-06, |
| "loss": 1.7402775287628174, |
| "step": 2396 |
| }, |
| { |
| "epoch": 1.4758753366679493, |
| "grad_norm": 2.5, |
| "learning_rate": 3.360619035660181e-06, |
| "loss": 1.8984918594360352, |
| "step": 2398 |
| }, |
| { |
| "epoch": 1.477106579453636, |
| "grad_norm": 3.546875, |
| "learning_rate": 3.354632919313979e-06, |
| "loss": 1.718995451927185, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.4783378222393229, |
| "grad_norm": 1.6484375, |
| "learning_rate": 3.3486573146717066e-06, |
| "loss": 1.5234237909317017, |
| "step": 2402 |
| }, |
| { |
| "epoch": 1.4795690650250095, |
| "grad_norm": 1.1328125, |
| "learning_rate": 3.3426922454781907e-06, |
| "loss": 1.084274172782898, |
| "step": 2404 |
| }, |
| { |
| "epoch": 1.4808003078106964, |
| "grad_norm": 5.625, |
| "learning_rate": 3.336737735436395e-06, |
| "loss": 1.345269799232483, |
| "step": 2406 |
| }, |
| { |
| "epoch": 1.4820315505963833, |
| "grad_norm": 2.359375, |
| "learning_rate": 3.3307938082073256e-06, |
| "loss": 1.7669227123260498, |
| "step": 2408 |
| }, |
| { |
| "epoch": 1.4832627933820701, |
| "grad_norm": 1.671875, |
| "learning_rate": 3.3248604874099377e-06, |
| "loss": 1.3955857753753662, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.4844940361677568, |
| "grad_norm": 2.015625, |
| "learning_rate": 3.318937796621039e-06, |
| "loss": 1.4464383125305176, |
| "step": 2412 |
| }, |
| { |
| "epoch": 1.4857252789534436, |
| "grad_norm": 3.078125, |
| "learning_rate": 3.313025759375198e-06, |
| "loss": 1.3337277173995972, |
| "step": 2414 |
| }, |
| { |
| "epoch": 1.4869565217391305, |
| "grad_norm": 2.03125, |
| "learning_rate": 3.3071243991646473e-06, |
| "loss": 1.4842052459716797, |
| "step": 2416 |
| }, |
| { |
| "epoch": 1.4881877645248172, |
| "grad_norm": 1.71875, |
| "learning_rate": 3.301233739439198e-06, |
| "loss": 0.7694661617279053, |
| "step": 2418 |
| }, |
| { |
| "epoch": 1.489419007310504, |
| "grad_norm": 9.1875, |
| "learning_rate": 3.2953538036061373e-06, |
| "loss": 0.2676663398742676, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.490650250096191, |
| "grad_norm": 3.515625, |
| "learning_rate": 3.289484615030142e-06, |
| "loss": 0.8886584043502808, |
| "step": 2422 |
| }, |
| { |
| "epoch": 1.4918814928818778, |
| "grad_norm": 2.75, |
| "learning_rate": 3.2836261970331807e-06, |
| "loss": 1.4449005126953125, |
| "step": 2424 |
| }, |
| { |
| "epoch": 1.4931127356675644, |
| "grad_norm": 3.796875, |
| "learning_rate": 3.2777785728944282e-06, |
| "loss": 1.6887394189834595, |
| "step": 2426 |
| }, |
| { |
| "epoch": 1.4943439784532513, |
| "grad_norm": 4.71875, |
| "learning_rate": 3.2719417658501663e-06, |
| "loss": 1.7763824462890625, |
| "step": 2428 |
| }, |
| { |
| "epoch": 1.495575221238938, |
| "grad_norm": 1.328125, |
| "learning_rate": 3.2661157990936897e-06, |
| "loss": 1.5290007591247559, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.4968064640246248, |
| "grad_norm": 1.9609375, |
| "learning_rate": 3.2603006957752215e-06, |
| "loss": 1.2895843982696533, |
| "step": 2432 |
| }, |
| { |
| "epoch": 1.4980377068103117, |
| "grad_norm": 1.0390625, |
| "learning_rate": 3.2544964790018207e-06, |
| "loss": 1.420303225517273, |
| "step": 2434 |
| }, |
| { |
| "epoch": 1.4992689495959985, |
| "grad_norm": 1.875, |
| "learning_rate": 3.248703171837282e-06, |
| "loss": 1.1444916725158691, |
| "step": 2436 |
| }, |
| { |
| "epoch": 1.5005001923816854, |
| "grad_norm": 2.1875, |
| "learning_rate": 3.242920797302051e-06, |
| "loss": 1.2554428577423096, |
| "step": 2438 |
| }, |
| { |
| "epoch": 1.501731435167372, |
| "grad_norm": 1.7578125, |
| "learning_rate": 3.23714937837313e-06, |
| "loss": 1.4247585535049438, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.5029626779530587, |
| "grad_norm": 0.84375, |
| "learning_rate": 3.2313889379839945e-06, |
| "loss": 1.1883597373962402, |
| "step": 2442 |
| }, |
| { |
| "epoch": 1.5041939207387456, |
| "grad_norm": 0.65625, |
| "learning_rate": 3.2256394990244842e-06, |
| "loss": 1.0033619403839111, |
| "step": 2444 |
| }, |
| { |
| "epoch": 1.5054251635244325, |
| "grad_norm": 3.28125, |
| "learning_rate": 3.2199010843407317e-06, |
| "loss": 1.2857462167739868, |
| "step": 2446 |
| }, |
| { |
| "epoch": 1.5066564063101193, |
| "grad_norm": 2.96875, |
| "learning_rate": 3.214173716735059e-06, |
| "loss": 1.4051223993301392, |
| "step": 2448 |
| }, |
| { |
| "epoch": 1.5078876490958062, |
| "grad_norm": 1.984375, |
| "learning_rate": 3.208457418965895e-06, |
| "loss": 1.3864938020706177, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.5091188918814928, |
| "grad_norm": 1.0859375, |
| "learning_rate": 3.202752213747678e-06, |
| "loss": 1.2609646320343018, |
| "step": 2452 |
| }, |
| { |
| "epoch": 1.5103501346671797, |
| "grad_norm": 3.90625, |
| "learning_rate": 3.197058123750769e-06, |
| "loss": 1.5066922903060913, |
| "step": 2454 |
| }, |
| { |
| "epoch": 1.5115813774528664, |
| "grad_norm": 39.75, |
| "learning_rate": 3.191375171601362e-06, |
| "loss": 1.7706074714660645, |
| "step": 2456 |
| }, |
| { |
| "epoch": 1.5128126202385532, |
| "grad_norm": 1.0859375, |
| "learning_rate": 3.185703379881393e-06, |
| "loss": 1.743719458580017, |
| "step": 2458 |
| }, |
| { |
| "epoch": 1.51404386302424, |
| "grad_norm": 8.125, |
| "learning_rate": 3.1800427711284516e-06, |
| "loss": 1.3321908712387085, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.515275105809927, |
| "grad_norm": 1.6171875, |
| "learning_rate": 3.1743933678356884e-06, |
| "loss": 1.2848858833312988, |
| "step": 2462 |
| }, |
| { |
| "epoch": 1.5165063485956138, |
| "grad_norm": 2.09375, |
| "learning_rate": 3.1687551924517283e-06, |
| "loss": 1.413031816482544, |
| "step": 2464 |
| }, |
| { |
| "epoch": 1.5177375913813005, |
| "grad_norm": 8.9375, |
| "learning_rate": 3.1631282673805838e-06, |
| "loss": 1.4807164669036865, |
| "step": 2466 |
| }, |
| { |
| "epoch": 1.5189688341669871, |
| "grad_norm": 1.8046875, |
| "learning_rate": 3.1575126149815584e-06, |
| "loss": 1.3795205354690552, |
| "step": 2468 |
| }, |
| { |
| "epoch": 1.520200076952674, |
| "grad_norm": 0.8515625, |
| "learning_rate": 3.1519082575691647e-06, |
| "loss": 1.2714817523956299, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.5214313197383609, |
| "grad_norm": 1.0546875, |
| "learning_rate": 3.1463152174130318e-06, |
| "loss": 1.0713388919830322, |
| "step": 2472 |
| }, |
| { |
| "epoch": 1.5226625625240477, |
| "grad_norm": 3.203125, |
| "learning_rate": 3.1407335167378194e-06, |
| "loss": 1.0308473110198975, |
| "step": 2474 |
| }, |
| { |
| "epoch": 1.5238938053097346, |
| "grad_norm": 14.5, |
| "learning_rate": 3.1351631777231288e-06, |
| "loss": 1.3537421226501465, |
| "step": 2476 |
| }, |
| { |
| "epoch": 1.5251250480954213, |
| "grad_norm": 1.75, |
| "learning_rate": 3.1296042225034128e-06, |
| "loss": 1.3415385484695435, |
| "step": 2478 |
| }, |
| { |
| "epoch": 1.5263562908811081, |
| "grad_norm": 2.109375, |
| "learning_rate": 3.1240566731678884e-06, |
| "loss": 1.407192587852478, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.5275875336667948, |
| "grad_norm": 1.4765625, |
| "learning_rate": 3.118520551760454e-06, |
| "loss": 1.2609741687774658, |
| "step": 2482 |
| }, |
| { |
| "epoch": 1.5288187764524817, |
| "grad_norm": 1.7890625, |
| "learning_rate": 3.112995880279594e-06, |
| "loss": 1.061118245124817, |
| "step": 2484 |
| }, |
| { |
| "epoch": 1.5300500192381685, |
| "grad_norm": 5.0, |
| "learning_rate": 3.107482680678297e-06, |
| "loss": 1.2044938802719116, |
| "step": 2486 |
| }, |
| { |
| "epoch": 1.5312812620238554, |
| "grad_norm": 2.875, |
| "learning_rate": 3.1019809748639617e-06, |
| "loss": 1.4915175437927246, |
| "step": 2488 |
| }, |
| { |
| "epoch": 1.5325125048095423, |
| "grad_norm": 2.109375, |
| "learning_rate": 3.096490784698323e-06, |
| "loss": 1.3962339162826538, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.533743747595229, |
| "grad_norm": 3.9375, |
| "learning_rate": 3.091012131997352e-06, |
| "loss": 1.4024924039840698, |
| "step": 2492 |
| }, |
| { |
| "epoch": 1.5349749903809158, |
| "grad_norm": 1.6328125, |
| "learning_rate": 3.0855450385311736e-06, |
| "loss": 1.266303539276123, |
| "step": 2494 |
| }, |
| { |
| "epoch": 1.5362062331666024, |
| "grad_norm": 1.578125, |
| "learning_rate": 3.0800895260239815e-06, |
| "loss": 1.1494388580322266, |
| "step": 2496 |
| }, |
| { |
| "epoch": 1.5374374759522893, |
| "grad_norm": 1.0859375, |
| "learning_rate": 3.0746456161539534e-06, |
| "loss": 1.2219388484954834, |
| "step": 2498 |
| }, |
| { |
| "epoch": 1.5386687187379762, |
| "grad_norm": 1.734375, |
| "learning_rate": 3.06921333055316e-06, |
| "loss": 0.9378917813301086, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.539899961523663, |
| "grad_norm": 0.96875, |
| "learning_rate": 3.063792690807481e-06, |
| "loss": 1.071752667427063, |
| "step": 2502 |
| }, |
| { |
| "epoch": 1.54113120430935, |
| "grad_norm": 1.6328125, |
| "learning_rate": 3.0583837184565192e-06, |
| "loss": 1.0082283020019531, |
| "step": 2504 |
| }, |
| { |
| "epoch": 1.5423624470950366, |
| "grad_norm": 7.21875, |
| "learning_rate": 3.0529864349935196e-06, |
| "loss": 1.4392098188400269, |
| "step": 2506 |
| }, |
| { |
| "epoch": 1.5435936898807232, |
| "grad_norm": 5.8125, |
| "learning_rate": 3.047600861865277e-06, |
| "loss": 1.6693429946899414, |
| "step": 2508 |
| }, |
| { |
| "epoch": 1.54482493266641, |
| "grad_norm": 1.8046875, |
| "learning_rate": 3.0422270204720528e-06, |
| "loss": 1.7065497636795044, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.546056175452097, |
| "grad_norm": 3.65625, |
| "learning_rate": 3.0368649321674914e-06, |
| "loss": 1.5884374380111694, |
| "step": 2512 |
| }, |
| { |
| "epoch": 1.5472874182377838, |
| "grad_norm": 6.75, |
| "learning_rate": 3.03151461825854e-06, |
| "loss": 1.2509660720825195, |
| "step": 2514 |
| }, |
| { |
| "epoch": 1.5485186610234707, |
| "grad_norm": 1.0078125, |
| "learning_rate": 3.026176100005349e-06, |
| "loss": 1.0895438194274902, |
| "step": 2516 |
| }, |
| { |
| "epoch": 1.5497499038091573, |
| "grad_norm": 2.140625, |
| "learning_rate": 3.020849398621204e-06, |
| "loss": 1.5385931730270386, |
| "step": 2518 |
| }, |
| { |
| "epoch": 1.5509811465948442, |
| "grad_norm": 3.359375, |
| "learning_rate": 3.015534535272433e-06, |
| "loss": 1.6532469987869263, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.5522123893805309, |
| "grad_norm": 1.375, |
| "learning_rate": 3.0102315310783257e-06, |
| "loss": 1.404496669769287, |
| "step": 2522 |
| }, |
| { |
| "epoch": 1.5534436321662177, |
| "grad_norm": 1.6484375, |
| "learning_rate": 3.004940407111046e-06, |
| "loss": 0.9684979915618896, |
| "step": 2524 |
| }, |
| { |
| "epoch": 1.5546748749519046, |
| "grad_norm": 2.609375, |
| "learning_rate": 2.9996611843955505e-06, |
| "loss": 1.1497083902359009, |
| "step": 2526 |
| }, |
| { |
| "epoch": 1.5559061177375915, |
| "grad_norm": 5.71875, |
| "learning_rate": 2.9943938839095038e-06, |
| "loss": 1.3859484195709229, |
| "step": 2528 |
| }, |
| { |
| "epoch": 1.5571373605232783, |
| "grad_norm": 1.2734375, |
| "learning_rate": 2.9891385265831984e-06, |
| "loss": 1.2651927471160889, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.558368603308965, |
| "grad_norm": 2.28125, |
| "learning_rate": 2.9838951332994676e-06, |
| "loss": 1.4481619596481323, |
| "step": 2532 |
| }, |
| { |
| "epoch": 1.5595998460946516, |
| "grad_norm": 1.0625, |
| "learning_rate": 2.9786637248936025e-06, |
| "loss": 1.2641582489013672, |
| "step": 2534 |
| }, |
| { |
| "epoch": 1.5608310888803385, |
| "grad_norm": 1.3515625, |
| "learning_rate": 2.973444322153275e-06, |
| "loss": 0.9729331731796265, |
| "step": 2536 |
| }, |
| { |
| "epoch": 1.5620623316660254, |
| "grad_norm": 2.734375, |
| "learning_rate": 2.968236945818447e-06, |
| "loss": 1.1042187213897705, |
| "step": 2538 |
| }, |
| { |
| "epoch": 1.5632935744517122, |
| "grad_norm": 4.25, |
| "learning_rate": 2.9630416165812946e-06, |
| "loss": 1.7580437660217285, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.5645248172373991, |
| "grad_norm": 1.265625, |
| "learning_rate": 2.9578583550861207e-06, |
| "loss": 1.3896489143371582, |
| "step": 2542 |
| }, |
| { |
| "epoch": 1.5657560600230858, |
| "grad_norm": 0.9765625, |
| "learning_rate": 2.9526871819292774e-06, |
| "loss": 1.0869468450546265, |
| "step": 2544 |
| }, |
| { |
| "epoch": 1.5669873028087726, |
| "grad_norm": 24.125, |
| "learning_rate": 2.947528117659082e-06, |
| "loss": 1.2554739713668823, |
| "step": 2546 |
| }, |
| { |
| "epoch": 1.5682185455944593, |
| "grad_norm": 2.65625, |
| "learning_rate": 2.9423811827757336e-06, |
| "loss": 1.2301050424575806, |
| "step": 2548 |
| }, |
| { |
| "epoch": 1.5694497883801461, |
| "grad_norm": 6.5625, |
| "learning_rate": 2.9372463977312364e-06, |
| "loss": 1.5220141410827637, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.570681031165833, |
| "grad_norm": 3.890625, |
| "learning_rate": 2.932123782929315e-06, |
| "loss": 1.6439367532730103, |
| "step": 2552 |
| }, |
| { |
| "epoch": 1.57191227395152, |
| "grad_norm": 10.0625, |
| "learning_rate": 2.927013358725333e-06, |
| "loss": 1.434930682182312, |
| "step": 2554 |
| }, |
| { |
| "epoch": 1.5731435167372068, |
| "grad_norm": 3.953125, |
| "learning_rate": 2.9219151454262152e-06, |
| "loss": 1.3206510543823242, |
| "step": 2556 |
| }, |
| { |
| "epoch": 1.5743747595228934, |
| "grad_norm": 3.359375, |
| "learning_rate": 2.9168291632903593e-06, |
| "loss": 1.6884095668792725, |
| "step": 2558 |
| }, |
| { |
| "epoch": 1.5756060023085803, |
| "grad_norm": 3.53125, |
| "learning_rate": 2.911755432527568e-06, |
| "loss": 1.4005990028381348, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.576837245094267, |
| "grad_norm": 2.6875, |
| "learning_rate": 2.906693973298958e-06, |
| "loss": 1.689469575881958, |
| "step": 2562 |
| }, |
| { |
| "epoch": 1.5780684878799538, |
| "grad_norm": 2.203125, |
| "learning_rate": 2.901644805716884e-06, |
| "loss": 1.4509550333023071, |
| "step": 2564 |
| }, |
| { |
| "epoch": 1.5792997306656407, |
| "grad_norm": 0.8359375, |
| "learning_rate": 2.8966079498448564e-06, |
| "loss": 1.4247888326644897, |
| "step": 2566 |
| }, |
| { |
| "epoch": 1.5805309734513275, |
| "grad_norm": 0.8046875, |
| "learning_rate": 2.891583425697467e-06, |
| "loss": 1.0635548830032349, |
| "step": 2568 |
| }, |
| { |
| "epoch": 1.5817622162370142, |
| "grad_norm": 4.3125, |
| "learning_rate": 2.8865712532403056e-06, |
| "loss": 1.169121265411377, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.582993459022701, |
| "grad_norm": 2.171875, |
| "learning_rate": 2.881571452389877e-06, |
| "loss": 1.51327383518219, |
| "step": 2572 |
| }, |
| { |
| "epoch": 1.5842247018083877, |
| "grad_norm": 2.375, |
| "learning_rate": 2.876584043013527e-06, |
| "loss": 1.2776228189468384, |
| "step": 2574 |
| }, |
| { |
| "epoch": 1.5854559445940746, |
| "grad_norm": 2.46875, |
| "learning_rate": 2.8716090449293675e-06, |
| "loss": 1.3650319576263428, |
| "step": 2576 |
| }, |
| { |
| "epoch": 1.5866871873797614, |
| "grad_norm": 2.59375, |
| "learning_rate": 2.8666464779061878e-06, |
| "loss": 1.4736026525497437, |
| "step": 2578 |
| }, |
| { |
| "epoch": 1.5879184301654483, |
| "grad_norm": 2.109375, |
| "learning_rate": 2.8616963616633796e-06, |
| "loss": 1.456394076347351, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.5891496729511352, |
| "grad_norm": 1.7421875, |
| "learning_rate": 2.856758715870863e-06, |
| "loss": 1.1904287338256836, |
| "step": 2582 |
| }, |
| { |
| "epoch": 1.5903809157368218, |
| "grad_norm": 2.125, |
| "learning_rate": 2.851833560149007e-06, |
| "loss": 0.9508844017982483, |
| "step": 2584 |
| }, |
| { |
| "epoch": 1.5916121585225087, |
| "grad_norm": 3.328125, |
| "learning_rate": 2.846920914068543e-06, |
| "loss": 1.1540579795837402, |
| "step": 2586 |
| }, |
| { |
| "epoch": 1.5928434013081954, |
| "grad_norm": 1.8125, |
| "learning_rate": 2.8420207971504983e-06, |
| "loss": 1.4928700923919678, |
| "step": 2588 |
| }, |
| { |
| "epoch": 1.5940746440938822, |
| "grad_norm": 2.765625, |
| "learning_rate": 2.8371332288661134e-06, |
| "loss": 1.4172389507293701, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.595305886879569, |
| "grad_norm": 2.734375, |
| "learning_rate": 2.832258228636766e-06, |
| "loss": 1.3628898859024048, |
| "step": 2592 |
| }, |
| { |
| "epoch": 1.596537129665256, |
| "grad_norm": 2.5, |
| "learning_rate": 2.8273958158338925e-06, |
| "loss": 1.4539599418640137, |
| "step": 2594 |
| }, |
| { |
| "epoch": 1.5977683724509428, |
| "grad_norm": 1.8671875, |
| "learning_rate": 2.82254600977891e-06, |
| "loss": 1.3956893682479858, |
| "step": 2596 |
| }, |
| { |
| "epoch": 1.5989996152366295, |
| "grad_norm": 2.734375, |
| "learning_rate": 2.817708829743142e-06, |
| "loss": 1.6055045127868652, |
| "step": 2598 |
| }, |
| { |
| "epoch": 1.6002308580223161, |
| "grad_norm": 2.5, |
| "learning_rate": 2.812884294947742e-06, |
| "loss": 1.4774987697601318, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.601462100808003, |
| "grad_norm": 2.828125, |
| "learning_rate": 2.8080724245636142e-06, |
| "loss": 1.6516894102096558, |
| "step": 2602 |
| }, |
| { |
| "epoch": 1.6026933435936899, |
| "grad_norm": 5.21875, |
| "learning_rate": 2.8032732377113414e-06, |
| "loss": 1.8592480421066284, |
| "step": 2604 |
| }, |
| { |
| "epoch": 1.6039245863793767, |
| "grad_norm": 2.4375, |
| "learning_rate": 2.798486753461103e-06, |
| "loss": 1.561418056488037, |
| "step": 2606 |
| }, |
| { |
| "epoch": 1.6051558291650636, |
| "grad_norm": 2.875, |
| "learning_rate": 2.7937129908326083e-06, |
| "loss": 1.4258840084075928, |
| "step": 2608 |
| }, |
| { |
| "epoch": 1.6063870719507503, |
| "grad_norm": 1.1796875, |
| "learning_rate": 2.7889519687950113e-06, |
| "loss": 1.3105173110961914, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.6076183147364371, |
| "grad_norm": 1.5703125, |
| "learning_rate": 2.7842037062668425e-06, |
| "loss": 1.2106075286865234, |
| "step": 2612 |
| }, |
| { |
| "epoch": 1.6088495575221238, |
| "grad_norm": 4.9375, |
| "learning_rate": 2.7794682221159266e-06, |
| "loss": 1.4307101964950562, |
| "step": 2614 |
| }, |
| { |
| "epoch": 1.6100808003078106, |
| "grad_norm": 3.609375, |
| "learning_rate": 2.7747455351593167e-06, |
| "loss": 1.6504584550857544, |
| "step": 2616 |
| }, |
| { |
| "epoch": 1.6113120430934975, |
| "grad_norm": 3.46875, |
| "learning_rate": 2.7700356641632126e-06, |
| "loss": 1.661940574645996, |
| "step": 2618 |
| }, |
| { |
| "epoch": 1.6125432858791844, |
| "grad_norm": 4.25, |
| "learning_rate": 2.765338627842887e-06, |
| "loss": 1.7384159564971924, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.6137745286648713, |
| "grad_norm": 75.0, |
| "learning_rate": 2.7606544448626137e-06, |
| "loss": 1.7225772142410278, |
| "step": 2622 |
| }, |
| { |
| "epoch": 1.615005771450558, |
| "grad_norm": 4.6875, |
| "learning_rate": 2.7559831338355943e-06, |
| "loss": 1.4637809991836548, |
| "step": 2624 |
| }, |
| { |
| "epoch": 1.6162370142362446, |
| "grad_norm": 2.625, |
| "learning_rate": 2.7513247133238787e-06, |
| "loss": 1.4859167337417603, |
| "step": 2626 |
| }, |
| { |
| "epoch": 1.6174682570219314, |
| "grad_norm": 2.625, |
| "learning_rate": 2.746679201838294e-06, |
| "loss": 1.2690590620040894, |
| "step": 2628 |
| }, |
| { |
| "epoch": 1.6186994998076183, |
| "grad_norm": 3.296875, |
| "learning_rate": 2.7420466178383726e-06, |
| "loss": 1.333600640296936, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.6199307425933052, |
| "grad_norm": 2.671875, |
| "learning_rate": 2.7374269797322824e-06, |
| "loss": 1.341450572013855, |
| "step": 2632 |
| }, |
| { |
| "epoch": 1.621161985378992, |
| "grad_norm": 1.703125, |
| "learning_rate": 2.7328203058767424e-06, |
| "loss": 1.355420708656311, |
| "step": 2634 |
| }, |
| { |
| "epoch": 1.6223932281646787, |
| "grad_norm": 1.84375, |
| "learning_rate": 2.728226614576961e-06, |
| "loss": 1.169846534729004, |
| "step": 2636 |
| }, |
| { |
| "epoch": 1.6236244709503656, |
| "grad_norm": 2.03125, |
| "learning_rate": 2.7236459240865555e-06, |
| "loss": 1.2084237337112427, |
| "step": 2638 |
| }, |
| { |
| "epoch": 1.6248557137360522, |
| "grad_norm": 1.3984375, |
| "learning_rate": 2.7190782526074885e-06, |
| "loss": 1.0683963298797607, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.626086956521739, |
| "grad_norm": 1.171875, |
| "learning_rate": 2.7145236182899817e-06, |
| "loss": 0.9492171406745911, |
| "step": 2642 |
| }, |
| { |
| "epoch": 1.627318199307426, |
| "grad_norm": 0.984375, |
| "learning_rate": 2.7099820392324572e-06, |
| "loss": 1.1876254081726074, |
| "step": 2644 |
| }, |
| { |
| "epoch": 1.6285494420931128, |
| "grad_norm": 1.15625, |
| "learning_rate": 2.705453533481459e-06, |
| "loss": 1.1485795974731445, |
| "step": 2646 |
| }, |
| { |
| "epoch": 1.6297806848787997, |
| "grad_norm": 1.5859375, |
| "learning_rate": 2.700938119031586e-06, |
| "loss": 1.128466248512268, |
| "step": 2648 |
| }, |
| { |
| "epoch": 1.6310119276644863, |
| "grad_norm": 3.671875, |
| "learning_rate": 2.696435813825411e-06, |
| "loss": 1.4734625816345215, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.6322431704501732, |
| "grad_norm": 3.296875, |
| "learning_rate": 2.69194663575342e-06, |
| "loss": 1.65286123752594, |
| "step": 2652 |
| }, |
| { |
| "epoch": 1.6334744132358598, |
| "grad_norm": 1.765625, |
| "learning_rate": 2.687470602653936e-06, |
| "loss": 1.244763731956482, |
| "step": 2654 |
| }, |
| { |
| "epoch": 1.6347056560215467, |
| "grad_norm": 1.65625, |
| "learning_rate": 2.683007732313046e-06, |
| "loss": 1.109053134918213, |
| "step": 2656 |
| }, |
| { |
| "epoch": 1.6359368988072336, |
| "grad_norm": 1.6484375, |
| "learning_rate": 2.6785580424645376e-06, |
| "loss": 0.9938783645629883, |
| "step": 2658 |
| }, |
| { |
| "epoch": 1.6371681415929205, |
| "grad_norm": 1.21875, |
| "learning_rate": 2.674121550789819e-06, |
| "loss": 0.9947565793991089, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.638399384378607, |
| "grad_norm": 0.74609375, |
| "learning_rate": 2.6696982749178596e-06, |
| "loss": 0.9547367691993713, |
| "step": 2662 |
| }, |
| { |
| "epoch": 1.639630627164294, |
| "grad_norm": 2.28125, |
| "learning_rate": 2.6652882324251095e-06, |
| "loss": 1.0342196226119995, |
| "step": 2664 |
| }, |
| { |
| "epoch": 1.6408618699499806, |
| "grad_norm": 4.625, |
| "learning_rate": 2.6608914408354355e-06, |
| "loss": 1.2955328226089478, |
| "step": 2666 |
| }, |
| { |
| "epoch": 1.6420931127356675, |
| "grad_norm": 3.96875, |
| "learning_rate": 2.6565079176200503e-06, |
| "loss": 1.697257399559021, |
| "step": 2668 |
| }, |
| { |
| "epoch": 1.6433243555213544, |
| "grad_norm": 6.46875, |
| "learning_rate": 2.6521376801974437e-06, |
| "loss": 1.003428339958191, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.6445555983070412, |
| "grad_norm": 59.25, |
| "learning_rate": 2.647780745933312e-06, |
| "loss": 0.1837526559829712, |
| "step": 2672 |
| }, |
| { |
| "epoch": 1.645786841092728, |
| "grad_norm": 2.296875, |
| "learning_rate": 2.64343713214049e-06, |
| "loss": 0.8456532955169678, |
| "step": 2674 |
| }, |
| { |
| "epoch": 1.6470180838784148, |
| "grad_norm": 4.5625, |
| "learning_rate": 2.6391068560788814e-06, |
| "loss": 1.4209729433059692, |
| "step": 2676 |
| }, |
| { |
| "epoch": 1.6482493266641016, |
| "grad_norm": 3.46875, |
| "learning_rate": 2.6347899349553916e-06, |
| "loss": 1.708794355392456, |
| "step": 2678 |
| }, |
| { |
| "epoch": 1.6494805694497883, |
| "grad_norm": 3.328125, |
| "learning_rate": 2.630486385923858e-06, |
| "loss": 1.7653048038482666, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.6507118122354751, |
| "grad_norm": 7.75, |
| "learning_rate": 2.6261962260849845e-06, |
| "loss": 1.6216604709625244, |
| "step": 2682 |
| }, |
| { |
| "epoch": 1.651943055021162, |
| "grad_norm": 3.6875, |
| "learning_rate": 2.6219194724862636e-06, |
| "loss": 1.884316325187683, |
| "step": 2684 |
| }, |
| { |
| "epoch": 1.6531742978068489, |
| "grad_norm": 2.53125, |
| "learning_rate": 2.617656142121927e-06, |
| "loss": 1.6673423051834106, |
| "step": 2686 |
| }, |
| { |
| "epoch": 1.6544055405925358, |
| "grad_norm": 2.140625, |
| "learning_rate": 2.6134062519328596e-06, |
| "loss": 1.3633517026901245, |
| "step": 2688 |
| }, |
| { |
| "epoch": 1.6556367833782224, |
| "grad_norm": 2.640625, |
| "learning_rate": 2.609169818806544e-06, |
| "loss": 1.4055132865905762, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.656868026163909, |
| "grad_norm": 2.109375, |
| "learning_rate": 2.6049468595769866e-06, |
| "loss": 1.4592944383621216, |
| "step": 2692 |
| }, |
| { |
| "epoch": 1.658099268949596, |
| "grad_norm": 3.0625, |
| "learning_rate": 2.6007373910246586e-06, |
| "loss": 1.468887448310852, |
| "step": 2694 |
| }, |
| { |
| "epoch": 1.6593305117352828, |
| "grad_norm": 3.0, |
| "learning_rate": 2.596541429876419e-06, |
| "loss": 1.43437659740448, |
| "step": 2696 |
| }, |
| { |
| "epoch": 1.6605617545209697, |
| "grad_norm": 4.15625, |
| "learning_rate": 2.5923589928054545e-06, |
| "loss": 1.3961412906646729, |
| "step": 2698 |
| }, |
| { |
| "epoch": 1.6617929973066565, |
| "grad_norm": 2.21875, |
| "learning_rate": 2.588190096431212e-06, |
| "loss": 1.3536356687545776, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.6630242400923432, |
| "grad_norm": 7.09375, |
| "learning_rate": 2.5840347573193364e-06, |
| "loss": 1.5625301599502563, |
| "step": 2702 |
| }, |
| { |
| "epoch": 1.66425548287803, |
| "grad_norm": 3.75, |
| "learning_rate": 2.579892991981597e-06, |
| "loss": 1.809969186782837, |
| "step": 2704 |
| }, |
| { |
| "epoch": 1.6654867256637167, |
| "grad_norm": 2.578125, |
| "learning_rate": 2.5757648168758277e-06, |
| "loss": 1.6495802402496338, |
| "step": 2706 |
| }, |
| { |
| "epoch": 1.6667179684494036, |
| "grad_norm": 2.890625, |
| "learning_rate": 2.571650248405858e-06, |
| "loss": 1.403519630432129, |
| "step": 2708 |
| }, |
| { |
| "epoch": 1.6679492112350904, |
| "grad_norm": 1.7734375, |
| "learning_rate": 2.5675493029214544e-06, |
| "loss": 1.4832587242126465, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.6691804540207773, |
| "grad_norm": 2.203125, |
| "learning_rate": 2.563461996718244e-06, |
| "loss": 1.3312305212020874, |
| "step": 2712 |
| }, |
| { |
| "epoch": 1.6704116968064642, |
| "grad_norm": 3.71875, |
| "learning_rate": 2.559388346037662e-06, |
| "loss": 1.5110353231430054, |
| "step": 2714 |
| }, |
| { |
| "epoch": 1.6716429395921508, |
| "grad_norm": 1.8125, |
| "learning_rate": 2.5553283670668783e-06, |
| "loss": 1.3926719427108765, |
| "step": 2716 |
| }, |
| { |
| "epoch": 1.6728741823778375, |
| "grad_norm": 2.921875, |
| "learning_rate": 2.551282075938739e-06, |
| "loss": 1.2947973012924194, |
| "step": 2718 |
| }, |
| { |
| "epoch": 1.6741054251635243, |
| "grad_norm": 1.765625, |
| "learning_rate": 2.547249488731698e-06, |
| "loss": 1.1865016222000122, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.6753366679492112, |
| "grad_norm": 3.609375, |
| "learning_rate": 2.5432306214697565e-06, |
| "loss": 1.3613277673721313, |
| "step": 2722 |
| }, |
| { |
| "epoch": 1.676567910734898, |
| "grad_norm": 6.15625, |
| "learning_rate": 2.5392254901223955e-06, |
| "loss": 1.4786993265151978, |
| "step": 2724 |
| }, |
| { |
| "epoch": 1.677799153520585, |
| "grad_norm": 1.4453125, |
| "learning_rate": 2.535234110604517e-06, |
| "loss": 1.3303709030151367, |
| "step": 2726 |
| }, |
| { |
| "epoch": 1.6790303963062716, |
| "grad_norm": 1.0859375, |
| "learning_rate": 2.531256498776376e-06, |
| "loss": 1.0553029775619507, |
| "step": 2728 |
| }, |
| { |
| "epoch": 1.6802616390919585, |
| "grad_norm": 1.4765625, |
| "learning_rate": 2.5272926704435224e-06, |
| "loss": 1.0534037351608276, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.6814928818776451, |
| "grad_norm": 0.98828125, |
| "learning_rate": 2.523342641356733e-06, |
| "loss": 1.185265064239502, |
| "step": 2732 |
| }, |
| { |
| "epoch": 1.682724124663332, |
| "grad_norm": 2.734375, |
| "learning_rate": 2.519406427211954e-06, |
| "loss": 1.3950031995773315, |
| "step": 2734 |
| }, |
| { |
| "epoch": 1.6839553674490189, |
| "grad_norm": 3.265625, |
| "learning_rate": 2.5154840436502343e-06, |
| "loss": 1.6567586660385132, |
| "step": 2736 |
| }, |
| { |
| "epoch": 1.6851866102347057, |
| "grad_norm": 4.53125, |
| "learning_rate": 2.5115755062576675e-06, |
| "loss": 1.7554434537887573, |
| "step": 2738 |
| }, |
| { |
| "epoch": 1.6864178530203926, |
| "grad_norm": 6.9375, |
| "learning_rate": 2.5076808305653223e-06, |
| "loss": 1.8998305797576904, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.6876490958060792, |
| "grad_norm": 0.69921875, |
| "learning_rate": 2.503800032049194e-06, |
| "loss": 1.280155897140503, |
| "step": 2742 |
| }, |
| { |
| "epoch": 1.6888803385917661, |
| "grad_norm": 1.7109375, |
| "learning_rate": 2.499933126130129e-06, |
| "loss": 1.109460473060608, |
| "step": 2744 |
| }, |
| { |
| "epoch": 1.6901115813774528, |
| "grad_norm": 2.640625, |
| "learning_rate": 2.4960801281737722e-06, |
| "loss": 1.156549096107483, |
| "step": 2746 |
| }, |
| { |
| "epoch": 1.6913428241631396, |
| "grad_norm": 1.9921875, |
| "learning_rate": 2.492241053490502e-06, |
| "loss": 1.4689304828643799, |
| "step": 2748 |
| }, |
| { |
| "epoch": 1.6925740669488265, |
| "grad_norm": 9.125, |
| "learning_rate": 2.488415917335374e-06, |
| "loss": 1.446475625038147, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.6938053097345134, |
| "grad_norm": 2.578125, |
| "learning_rate": 2.4846047349080552e-06, |
| "loss": 1.4750339984893799, |
| "step": 2752 |
| }, |
| { |
| "epoch": 1.6950365525202, |
| "grad_norm": 2.921875, |
| "learning_rate": 2.480807521352764e-06, |
| "loss": 1.373404622077942, |
| "step": 2754 |
| }, |
| { |
| "epoch": 1.696267795305887, |
| "grad_norm": 2.34375, |
| "learning_rate": 2.4770242917582134e-06, |
| "loss": 1.4248535633087158, |
| "step": 2756 |
| }, |
| { |
| "epoch": 1.6974990380915735, |
| "grad_norm": 2.03125, |
| "learning_rate": 2.4732550611575503e-06, |
| "loss": 1.3744779825210571, |
| "step": 2758 |
| }, |
| { |
| "epoch": 1.6987302808772604, |
| "grad_norm": 2.5625, |
| "learning_rate": 2.4694998445282937e-06, |
| "loss": 1.3755619525909424, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.6999615236629473, |
| "grad_norm": 2.234375, |
| "learning_rate": 2.4657586567922766e-06, |
| "loss": 1.4600454568862915, |
| "step": 2762 |
| }, |
| { |
| "epoch": 1.7011927664486342, |
| "grad_norm": 2.671875, |
| "learning_rate": 2.462031512815585e-06, |
| "loss": 1.3545056581497192, |
| "step": 2764 |
| }, |
| { |
| "epoch": 1.702424009234321, |
| "grad_norm": 8.6875, |
| "learning_rate": 2.4583184274085044e-06, |
| "loss": 1.5093567371368408, |
| "step": 2766 |
| }, |
| { |
| "epoch": 1.7036552520200077, |
| "grad_norm": 1.78125, |
| "learning_rate": 2.4546194153254504e-06, |
| "loss": 1.4223270416259766, |
| "step": 2768 |
| }, |
| { |
| "epoch": 1.7048864948056945, |
| "grad_norm": 2.421875, |
| "learning_rate": 2.4509344912649222e-06, |
| "loss": 1.4608110189437866, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.7061177375913812, |
| "grad_norm": 2.234375, |
| "learning_rate": 2.447263669869434e-06, |
| "loss": 1.4189188480377197, |
| "step": 2772 |
| }, |
| { |
| "epoch": 1.707348980377068, |
| "grad_norm": 2.609375, |
| "learning_rate": 2.443606965725466e-06, |
| "loss": 1.347614049911499, |
| "step": 2774 |
| }, |
| { |
| "epoch": 1.708580223162755, |
| "grad_norm": 1.8984375, |
| "learning_rate": 2.4399643933633977e-06, |
| "loss": 1.2820149660110474, |
| "step": 2776 |
| }, |
| { |
| "epoch": 1.7098114659484418, |
| "grad_norm": 1.3984375, |
| "learning_rate": 2.4363359672574557e-06, |
| "loss": 1.3065999746322632, |
| "step": 2778 |
| }, |
| { |
| "epoch": 1.7110427087341287, |
| "grad_norm": 1.8046875, |
| "learning_rate": 2.4327217018256545e-06, |
| "loss": 1.141822338104248, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.7122739515198153, |
| "grad_norm": 0.91796875, |
| "learning_rate": 2.4291216114297395e-06, |
| "loss": 1.1083295345306396, |
| "step": 2782 |
| }, |
| { |
| "epoch": 1.713505194305502, |
| "grad_norm": 1.046875, |
| "learning_rate": 2.4255357103751298e-06, |
| "loss": 1.0195274353027344, |
| "step": 2784 |
| }, |
| { |
| "epoch": 1.7147364370911888, |
| "grad_norm": 1.375, |
| "learning_rate": 2.42196401291086e-06, |
| "loss": 0.925314724445343, |
| "step": 2786 |
| }, |
| { |
| "epoch": 1.7159676798768757, |
| "grad_norm": 1.0703125, |
| "learning_rate": 2.4184065332295276e-06, |
| "loss": 1.0302790403366089, |
| "step": 2788 |
| }, |
| { |
| "epoch": 1.7171989226625626, |
| "grad_norm": 15.875, |
| "learning_rate": 2.414863285467232e-06, |
| "loss": 1.4890433549880981, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.7184301654482494, |
| "grad_norm": 6.625, |
| "learning_rate": 2.4113342837035206e-06, |
| "loss": 1.8335363864898682, |
| "step": 2792 |
| }, |
| { |
| "epoch": 1.719661408233936, |
| "grad_norm": 5.6875, |
| "learning_rate": 2.4078195419613325e-06, |
| "loss": 1.7545239925384521, |
| "step": 2794 |
| }, |
| { |
| "epoch": 1.720892651019623, |
| "grad_norm": 2.453125, |
| "learning_rate": 2.4043190742069432e-06, |
| "loss": 1.3311874866485596, |
| "step": 2796 |
| }, |
| { |
| "epoch": 1.7221238938053096, |
| "grad_norm": 4.46875, |
| "learning_rate": 2.4008328943499077e-06, |
| "loss": 1.400950312614441, |
| "step": 2798 |
| }, |
| { |
| "epoch": 1.7233551365909965, |
| "grad_norm": 7.3125, |
| "learning_rate": 2.397361016243007e-06, |
| "loss": 1.402868390083313, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.7245863793766834, |
| "grad_norm": 3.03125, |
| "learning_rate": 2.3939034536821925e-06, |
| "loss": 1.3764790296554565, |
| "step": 2802 |
| }, |
| { |
| "epoch": 1.7258176221623702, |
| "grad_norm": 2.65625, |
| "learning_rate": 2.390460220406531e-06, |
| "loss": 1.4015588760375977, |
| "step": 2804 |
| }, |
| { |
| "epoch": 1.727048864948057, |
| "grad_norm": 8.5625, |
| "learning_rate": 2.3870313300981492e-06, |
| "loss": 1.4938013553619385, |
| "step": 2806 |
| }, |
| { |
| "epoch": 1.7282801077337437, |
| "grad_norm": 2.125, |
| "learning_rate": 2.383616796382181e-06, |
| "loss": 1.3828613758087158, |
| "step": 2808 |
| }, |
| { |
| "epoch": 1.7295113505194304, |
| "grad_norm": 4.75, |
| "learning_rate": 2.3802166328267104e-06, |
| "loss": 1.0383518934249878, |
| "step": 2810 |
| }, |
| { |
| "epoch": 1.7307425933051173, |
| "grad_norm": 3.59375, |
| "learning_rate": 2.3768308529427235e-06, |
| "loss": 0.5984392762184143, |
| "step": 2812 |
| }, |
| { |
| "epoch": 1.7319738360908041, |
| "grad_norm": 2.84375, |
| "learning_rate": 2.3734594701840484e-06, |
| "loss": 1.0914403200149536, |
| "step": 2814 |
| }, |
| { |
| "epoch": 1.733205078876491, |
| "grad_norm": 2.46875, |
| "learning_rate": 2.370102497947305e-06, |
| "loss": 1.3296228647232056, |
| "step": 2816 |
| }, |
| { |
| "epoch": 1.7344363216621779, |
| "grad_norm": 1.203125, |
| "learning_rate": 2.366759949571851e-06, |
| "loss": 1.2498970031738281, |
| "step": 2818 |
| }, |
| { |
| "epoch": 1.7356675644478645, |
| "grad_norm": 1.1796875, |
| "learning_rate": 2.3634318383397303e-06, |
| "loss": 1.0500065088272095, |
| "step": 2820 |
| }, |
| { |
| "epoch": 1.7368988072335514, |
| "grad_norm": 2.46875, |
| "learning_rate": 2.3601181774756173e-06, |
| "loss": 1.252281665802002, |
| "step": 2822 |
| }, |
| { |
| "epoch": 1.738130050019238, |
| "grad_norm": 4.4375, |
| "learning_rate": 2.3568189801467657e-06, |
| "loss": 1.373681664466858, |
| "step": 2824 |
| }, |
| { |
| "epoch": 1.739361292804925, |
| "grad_norm": 0.734375, |
| "learning_rate": 2.353534259462958e-06, |
| "loss": 1.2320070266723633, |
| "step": 2826 |
| }, |
| { |
| "epoch": 1.7405925355906118, |
| "grad_norm": 1.40625, |
| "learning_rate": 2.350264028476452e-06, |
| "loss": 1.0676991939544678, |
| "step": 2828 |
| }, |
| { |
| "epoch": 1.7418237783762986, |
| "grad_norm": 3.796875, |
| "learning_rate": 2.3470083001819276e-06, |
| "loss": 1.2847479581832886, |
| "step": 2830 |
| }, |
| { |
| "epoch": 1.7430550211619855, |
| "grad_norm": 4.78125, |
| "learning_rate": 2.343767087516437e-06, |
| "loss": 1.801426649093628, |
| "step": 2832 |
| }, |
| { |
| "epoch": 1.7442862639476722, |
| "grad_norm": 4.3125, |
| "learning_rate": 2.3405404033593516e-06, |
| "loss": 1.8096853494644165, |
| "step": 2834 |
| }, |
| { |
| "epoch": 1.745517506733359, |
| "grad_norm": 3.0625, |
| "learning_rate": 2.3373282605323154e-06, |
| "loss": 1.4246339797973633, |
| "step": 2836 |
| }, |
| { |
| "epoch": 1.7467487495190457, |
| "grad_norm": 9.4375, |
| "learning_rate": 2.3341306717991864e-06, |
| "loss": 1.9062341451644897, |
| "step": 2838 |
| }, |
| { |
| "epoch": 1.7479799923047326, |
| "grad_norm": 5.5, |
| "learning_rate": 2.330947649865992e-06, |
| "loss": 1.4905970096588135, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.7492112350904194, |
| "grad_norm": 0.9375, |
| "learning_rate": 2.327779207380876e-06, |
| "loss": 0.9517359137535095, |
| "step": 2842 |
| }, |
| { |
| "epoch": 1.7504424778761063, |
| "grad_norm": 2.671875, |
| "learning_rate": 2.3246253569340506e-06, |
| "loss": 1.1234976053237915, |
| "step": 2844 |
| }, |
| { |
| "epoch": 1.751673720661793, |
| "grad_norm": 2.671875, |
| "learning_rate": 2.3214861110577416e-06, |
| "loss": 1.0962259769439697, |
| "step": 2846 |
| }, |
| { |
| "epoch": 1.7529049634474798, |
| "grad_norm": 1.46875, |
| "learning_rate": 2.318361482226145e-06, |
| "loss": 1.1401879787445068, |
| "step": 2848 |
| }, |
| { |
| "epoch": 1.7541362062331665, |
| "grad_norm": 4.09375, |
| "learning_rate": 2.3152514828553716e-06, |
| "loss": 1.123967170715332, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.7553674490188533, |
| "grad_norm": 4.0625, |
| "learning_rate": 2.312156125303401e-06, |
| "loss": 1.9278596639633179, |
| "step": 2852 |
| }, |
| { |
| "epoch": 1.7565986918045402, |
| "grad_norm": 2.359375, |
| "learning_rate": 2.309075421870032e-06, |
| "loss": 1.457679033279419, |
| "step": 2854 |
| }, |
| { |
| "epoch": 1.757829934590227, |
| "grad_norm": 3.046875, |
| "learning_rate": 2.3060093847968333e-06, |
| "loss": 1.4130196571350098, |
| "step": 2856 |
| }, |
| { |
| "epoch": 1.759061177375914, |
| "grad_norm": 2.984375, |
| "learning_rate": 2.302958026267094e-06, |
| "loss": 1.3906093835830688, |
| "step": 2858 |
| }, |
| { |
| "epoch": 1.7602924201616006, |
| "grad_norm": 2.265625, |
| "learning_rate": 2.299921358405778e-06, |
| "loss": 1.4181544780731201, |
| "step": 2860 |
| }, |
| { |
| "epoch": 1.7615236629472875, |
| "grad_norm": 2.3125, |
| "learning_rate": 2.2968993932794724e-06, |
| "loss": 1.576407551765442, |
| "step": 2862 |
| }, |
| { |
| "epoch": 1.762754905732974, |
| "grad_norm": 2.734375, |
| "learning_rate": 2.293892142896341e-06, |
| "loss": 1.506213903427124, |
| "step": 2864 |
| }, |
| { |
| "epoch": 1.763986148518661, |
| "grad_norm": 2.296875, |
| "learning_rate": 2.290899619206078e-06, |
| "loss": 1.5014433860778809, |
| "step": 2866 |
| }, |
| { |
| "epoch": 1.7652173913043478, |
| "grad_norm": 1.6875, |
| "learning_rate": 2.2879218340998575e-06, |
| "loss": 1.075061559677124, |
| "step": 2868 |
| }, |
| { |
| "epoch": 1.7664486340900347, |
| "grad_norm": 1.7109375, |
| "learning_rate": 2.2849587994102908e-06, |
| "loss": 1.2030928134918213, |
| "step": 2870 |
| }, |
| { |
| "epoch": 1.7676798768757216, |
| "grad_norm": 2.796875, |
| "learning_rate": 2.2820105269113725e-06, |
| "loss": 1.3955581188201904, |
| "step": 2872 |
| }, |
| { |
| "epoch": 1.7689111196614082, |
| "grad_norm": 6.625, |
| "learning_rate": 2.2790770283184404e-06, |
| "loss": 1.318220853805542, |
| "step": 2874 |
| }, |
| { |
| "epoch": 1.7701423624470949, |
| "grad_norm": 3.40625, |
| "learning_rate": 2.276158315288127e-06, |
| "loss": 1.6458066701889038, |
| "step": 2876 |
| }, |
| { |
| "epoch": 1.7713736052327818, |
| "grad_norm": 4.6875, |
| "learning_rate": 2.2732543994183104e-06, |
| "loss": 1.8615339994430542, |
| "step": 2878 |
| }, |
| { |
| "epoch": 1.7726048480184686, |
| "grad_norm": 4.28125, |
| "learning_rate": 2.2703652922480716e-06, |
| "loss": 1.877263069152832, |
| "step": 2880 |
| }, |
| { |
| "epoch": 1.7738360908041555, |
| "grad_norm": 2.8125, |
| "learning_rate": 2.2674910052576456e-06, |
| "loss": 1.2448543310165405, |
| "step": 2882 |
| }, |
| { |
| "epoch": 1.7750673335898424, |
| "grad_norm": 1.6015625, |
| "learning_rate": 2.26463154986838e-06, |
| "loss": 0.9634664058685303, |
| "step": 2884 |
| }, |
| { |
| "epoch": 1.776298576375529, |
| "grad_norm": 3.796875, |
| "learning_rate": 2.261786937442686e-06, |
| "loss": 0.9872276186943054, |
| "step": 2886 |
| }, |
| { |
| "epoch": 1.7775298191612159, |
| "grad_norm": 4.40625, |
| "learning_rate": 2.2589571792839933e-06, |
| "loss": 1.7003523111343384, |
| "step": 2888 |
| }, |
| { |
| "epoch": 1.7787610619469025, |
| "grad_norm": 3.234375, |
| "learning_rate": 2.256142286636708e-06, |
| "loss": 1.7901766300201416, |
| "step": 2890 |
| }, |
| { |
| "epoch": 1.7799923047325894, |
| "grad_norm": 2.140625, |
| "learning_rate": 2.2533422706861665e-06, |
| "loss": 1.3721871376037598, |
| "step": 2892 |
| }, |
| { |
| "epoch": 1.7812235475182763, |
| "grad_norm": 1.921875, |
| "learning_rate": 2.2505571425585893e-06, |
| "loss": 1.4578273296356201, |
| "step": 2894 |
| }, |
| { |
| "epoch": 1.7824547903039631, |
| "grad_norm": 1.875, |
| "learning_rate": 2.247786913321037e-06, |
| "loss": 1.0729336738586426, |
| "step": 2896 |
| }, |
| { |
| "epoch": 1.78368603308965, |
| "grad_norm": 2.4375, |
| "learning_rate": 2.245031593981371e-06, |
| "loss": 1.1757901906967163, |
| "step": 2898 |
| }, |
| { |
| "epoch": 1.7849172758753367, |
| "grad_norm": 4.34375, |
| "learning_rate": 2.242291195488204e-06, |
| "loss": 1.8881491422653198, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.7861485186610233, |
| "grad_norm": 3.046875, |
| "learning_rate": 2.2395657287308597e-06, |
| "loss": 1.7370243072509766, |
| "step": 2902 |
| }, |
| { |
| "epoch": 1.7873797614467102, |
| "grad_norm": 2.984375, |
| "learning_rate": 2.2368552045393277e-06, |
| "loss": 1.454010009765625, |
| "step": 2904 |
| }, |
| { |
| "epoch": 1.788611004232397, |
| "grad_norm": 2.890625, |
| "learning_rate": 2.2341596336842223e-06, |
| "loss": 1.459133267402649, |
| "step": 2906 |
| }, |
| { |
| "epoch": 1.789842247018084, |
| "grad_norm": 2.375, |
| "learning_rate": 2.2314790268767393e-06, |
| "loss": 1.3337326049804688, |
| "step": 2908 |
| }, |
| { |
| "epoch": 1.7910734898037708, |
| "grad_norm": 5.875, |
| "learning_rate": 2.2288133947686115e-06, |
| "loss": 1.2916251420974731, |
| "step": 2910 |
| }, |
| { |
| "epoch": 1.7923047325894574, |
| "grad_norm": 1.8046875, |
| "learning_rate": 2.226162747952068e-06, |
| "loss": 1.0963714122772217, |
| "step": 2912 |
| }, |
| { |
| "epoch": 1.7935359753751443, |
| "grad_norm": 1.9609375, |
| "learning_rate": 2.223527096959793e-06, |
| "loss": 1.1820143461227417, |
| "step": 2914 |
| }, |
| { |
| "epoch": 1.794767218160831, |
| "grad_norm": 2.984375, |
| "learning_rate": 2.220906452264882e-06, |
| "loss": 1.2962974309921265, |
| "step": 2916 |
| }, |
| { |
| "epoch": 1.7959984609465178, |
| "grad_norm": 16.125, |
| "learning_rate": 2.2183008242808025e-06, |
| "loss": 1.6060822010040283, |
| "step": 2918 |
| }, |
| { |
| "epoch": 1.7972297037322047, |
| "grad_norm": 3.125, |
| "learning_rate": 2.215710223361349e-06, |
| "loss": 1.433869481086731, |
| "step": 2920 |
| }, |
| { |
| "epoch": 1.7984609465178916, |
| "grad_norm": 4.65625, |
| "learning_rate": 2.2131346598006046e-06, |
| "loss": 1.6119310855865479, |
| "step": 2922 |
| }, |
| { |
| "epoch": 1.7996921893035784, |
| "grad_norm": 3.640625, |
| "learning_rate": 2.210574143832902e-06, |
| "loss": 1.397140622138977, |
| "step": 2924 |
| }, |
| { |
| "epoch": 1.800923432089265, |
| "grad_norm": 2.546875, |
| "learning_rate": 2.208028685632776e-06, |
| "loss": 1.4758628606796265, |
| "step": 2926 |
| }, |
| { |
| "epoch": 1.802154674874952, |
| "grad_norm": 2.796875, |
| "learning_rate": 2.205498295314931e-06, |
| "loss": 1.3613635301589966, |
| "step": 2928 |
| }, |
| { |
| "epoch": 1.8033859176606386, |
| "grad_norm": 4.09375, |
| "learning_rate": 2.2029829829341963e-06, |
| "loss": 1.2514557838439941, |
| "step": 2930 |
| }, |
| { |
| "epoch": 1.8046171604463255, |
| "grad_norm": 0.9296875, |
| "learning_rate": 2.200482758485486e-06, |
| "loss": 1.0420831441879272, |
| "step": 2932 |
| }, |
| { |
| "epoch": 1.8058484032320123, |
| "grad_norm": 1.671875, |
| "learning_rate": 2.197997631903763e-06, |
| "loss": 1.0888323783874512, |
| "step": 2934 |
| }, |
| { |
| "epoch": 1.8070796460176992, |
| "grad_norm": 3.484375, |
| "learning_rate": 2.1955276130639934e-06, |
| "loss": 1.4852358102798462, |
| "step": 2936 |
| }, |
| { |
| "epoch": 1.8083108888033859, |
| "grad_norm": 1.171875, |
| "learning_rate": 2.1930727117811135e-06, |
| "loss": 1.3741511106491089, |
| "step": 2938 |
| }, |
| { |
| "epoch": 1.8095421315890727, |
| "grad_norm": 1.1328125, |
| "learning_rate": 2.1906329378099856e-06, |
| "loss": 1.0535756349563599, |
| "step": 2940 |
| }, |
| { |
| "epoch": 1.8107733743747594, |
| "grad_norm": 8.625, |
| "learning_rate": 2.1882083008453653e-06, |
| "loss": 1.3297520875930786, |
| "step": 2942 |
| }, |
| { |
| "epoch": 1.8120046171604463, |
| "grad_norm": 4.25, |
| "learning_rate": 2.185798810521855e-06, |
| "loss": 1.585155963897705, |
| "step": 2944 |
| }, |
| { |
| "epoch": 1.8132358599461331, |
| "grad_norm": 3.03125, |
| "learning_rate": 2.183404476413874e-06, |
| "loss": 1.9898563623428345, |
| "step": 2946 |
| }, |
| { |
| "epoch": 1.81446710273182, |
| "grad_norm": 3.640625, |
| "learning_rate": 2.181025308035614e-06, |
| "loss": 1.9423363208770752, |
| "step": 2948 |
| }, |
| { |
| "epoch": 1.8156983455175069, |
| "grad_norm": 42.0, |
| "learning_rate": 2.178661314841005e-06, |
| "loss": 1.3260115385055542, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.8169295883031935, |
| "grad_norm": 1.875, |
| "learning_rate": 2.1763125062236744e-06, |
| "loss": 1.3768151998519897, |
| "step": 2952 |
| }, |
| { |
| "epoch": 1.8181608310888804, |
| "grad_norm": 2.03125, |
| "learning_rate": 2.1739788915169138e-06, |
| "loss": 1.415548324584961, |
| "step": 2954 |
| }, |
| { |
| "epoch": 1.819392073874567, |
| "grad_norm": 3.65625, |
| "learning_rate": 2.17166047999364e-06, |
| "loss": 1.480286955833435, |
| "step": 2956 |
| }, |
| { |
| "epoch": 1.820623316660254, |
| "grad_norm": 4.5, |
| "learning_rate": 2.1693572808663567e-06, |
| "loss": 1.5904080867767334, |
| "step": 2958 |
| }, |
| { |
| "epoch": 1.8218545594459408, |
| "grad_norm": 5.625, |
| "learning_rate": 2.167069303287119e-06, |
| "loss": 1.708762288093567, |
| "step": 2960 |
| }, |
| { |
| "epoch": 1.8230858022316276, |
| "grad_norm": 3.46875, |
| "learning_rate": 2.1647965563474997e-06, |
| "loss": 1.8747527599334717, |
| "step": 2962 |
| }, |
| { |
| "epoch": 1.8243170450173145, |
| "grad_norm": 2.671875, |
| "learning_rate": 2.1625390490785485e-06, |
| "loss": 1.7154184579849243, |
| "step": 2964 |
| }, |
| { |
| "epoch": 1.8255482878030012, |
| "grad_norm": 4.15625, |
| "learning_rate": 2.1602967904507578e-06, |
| "loss": 1.6695775985717773, |
| "step": 2966 |
| }, |
| { |
| "epoch": 1.8267795305886878, |
| "grad_norm": 2.484375, |
| "learning_rate": 2.1580697893740287e-06, |
| "loss": 1.418200969696045, |
| "step": 2968 |
| }, |
| { |
| "epoch": 1.8280107733743747, |
| "grad_norm": 3.484375, |
| "learning_rate": 2.1558580546976363e-06, |
| "loss": 1.2916027307510376, |
| "step": 2970 |
| }, |
| { |
| "epoch": 1.8292420161600615, |
| "grad_norm": 0.7578125, |
| "learning_rate": 2.1536615952101886e-06, |
| "loss": 0.9269257187843323, |
| "step": 2972 |
| }, |
| { |
| "epoch": 1.8304732589457484, |
| "grad_norm": 0.984375, |
| "learning_rate": 2.151480419639599e-06, |
| "loss": 1.0978361368179321, |
| "step": 2974 |
| }, |
| { |
| "epoch": 1.8317045017314353, |
| "grad_norm": 5.28125, |
| "learning_rate": 2.1493145366530464e-06, |
| "loss": 1.0932646989822388, |
| "step": 2976 |
| }, |
| { |
| "epoch": 1.832935744517122, |
| "grad_norm": 0.92578125, |
| "learning_rate": 2.1471639548569448e-06, |
| "loss": 1.1995317935943604, |
| "step": 2978 |
| }, |
| { |
| "epoch": 1.8341669873028088, |
| "grad_norm": 1.015625, |
| "learning_rate": 2.1450286827969046e-06, |
| "loss": 1.2731231451034546, |
| "step": 2980 |
| }, |
| { |
| "epoch": 1.8353982300884955, |
| "grad_norm": 2.0, |
| "learning_rate": 2.1429087289577018e-06, |
| "loss": 1.3012076616287231, |
| "step": 2982 |
| }, |
| { |
| "epoch": 1.8366294728741823, |
| "grad_norm": 2.8125, |
| "learning_rate": 2.1408041017632443e-06, |
| "loss": 1.3655699491500854, |
| "step": 2984 |
| }, |
| { |
| "epoch": 1.8378607156598692, |
| "grad_norm": 3.046875, |
| "learning_rate": 2.1387148095765366e-06, |
| "loss": 1.5270923376083374, |
| "step": 2986 |
| }, |
| { |
| "epoch": 1.839091958445556, |
| "grad_norm": 3.21875, |
| "learning_rate": 2.1366408606996488e-06, |
| "loss": 1.518768548965454, |
| "step": 2988 |
| }, |
| { |
| "epoch": 1.840323201231243, |
| "grad_norm": 3.65625, |
| "learning_rate": 2.1345822633736804e-06, |
| "loss": 1.5318608283996582, |
| "step": 2990 |
| }, |
| { |
| "epoch": 1.8415544440169296, |
| "grad_norm": 4.84375, |
| "learning_rate": 2.1325390257787324e-06, |
| "loss": 2.0122017860412598, |
| "step": 2992 |
| }, |
| { |
| "epoch": 1.8427856868026162, |
| "grad_norm": 2.484375, |
| "learning_rate": 2.1305111560338686e-06, |
| "loss": 1.6048575639724731, |
| "step": 2994 |
| }, |
| { |
| "epoch": 1.844016929588303, |
| "grad_norm": 2.96875, |
| "learning_rate": 2.1284986621970894e-06, |
| "loss": 1.4295393228530884, |
| "step": 2996 |
| }, |
| { |
| "epoch": 1.84524817237399, |
| "grad_norm": 8.125, |
| "learning_rate": 2.126501552265296e-06, |
| "loss": 1.5011441707611084, |
| "step": 2998 |
| }, |
| { |
| "epoch": 1.8464794151596768, |
| "grad_norm": 3.71875, |
| "learning_rate": 2.1245198341742587e-06, |
| "loss": 1.8417302370071411, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.8477106579453637, |
| "grad_norm": 2.859375, |
| "learning_rate": 2.1225535157985893e-06, |
| "loss": 1.7056752443313599, |
| "step": 3002 |
| }, |
| { |
| "epoch": 1.8489419007310504, |
| "grad_norm": 3.65625, |
| "learning_rate": 2.120602604951704e-06, |
| "loss": 1.4045121669769287, |
| "step": 3004 |
| }, |
| { |
| "epoch": 1.8501731435167372, |
| "grad_norm": 3.65625, |
| "learning_rate": 2.118667109385796e-06, |
| "loss": 1.4713330268859863, |
| "step": 3006 |
| }, |
| { |
| "epoch": 1.8514043863024239, |
| "grad_norm": 3.640625, |
| "learning_rate": 2.1167470367918042e-06, |
| "loss": 1.7798014879226685, |
| "step": 3008 |
| }, |
| { |
| "epoch": 1.8526356290881107, |
| "grad_norm": 2.625, |
| "learning_rate": 2.114842394799382e-06, |
| "loss": 1.5931851863861084, |
| "step": 3010 |
| }, |
| { |
| "epoch": 1.8538668718737976, |
| "grad_norm": 1.7734375, |
| "learning_rate": 2.112953190976869e-06, |
| "loss": 1.183977484703064, |
| "step": 3012 |
| }, |
| { |
| "epoch": 1.8550981146594845, |
| "grad_norm": 2.34375, |
| "learning_rate": 2.111079432831256e-06, |
| "loss": 1.3038005828857422, |
| "step": 3014 |
| }, |
| { |
| "epoch": 1.8563293574451714, |
| "grad_norm": 3.0625, |
| "learning_rate": 2.109221127808161e-06, |
| "loss": 1.7839789390563965, |
| "step": 3016 |
| }, |
| { |
| "epoch": 1.857560600230858, |
| "grad_norm": 4.09375, |
| "learning_rate": 2.1073782832917974e-06, |
| "loss": 1.9610928297042847, |
| "step": 3018 |
| }, |
| { |
| "epoch": 1.8587918430165449, |
| "grad_norm": 6.25, |
| "learning_rate": 2.105550906604942e-06, |
| "loss": 1.7422419786453247, |
| "step": 3020 |
| }, |
| { |
| "epoch": 1.8600230858022315, |
| "grad_norm": 3.296875, |
| "learning_rate": 2.103739005008909e-06, |
| "loss": 1.6052628755569458, |
| "step": 3022 |
| }, |
| { |
| "epoch": 1.8612543285879184, |
| "grad_norm": 17.75, |
| "learning_rate": 2.101942585703522e-06, |
| "loss": 1.737350583076477, |
| "step": 3024 |
| }, |
| { |
| "epoch": 1.8624855713736053, |
| "grad_norm": 8.4375, |
| "learning_rate": 2.1001616558270812e-06, |
| "loss": 1.894784688949585, |
| "step": 3026 |
| }, |
| { |
| "epoch": 1.8637168141592921, |
| "grad_norm": 4.34375, |
| "learning_rate": 2.098396222456341e-06, |
| "loss": 1.7377785444259644, |
| "step": 3028 |
| }, |
| { |
| "epoch": 1.8649480569449788, |
| "grad_norm": 3.578125, |
| "learning_rate": 2.0966462926064745e-06, |
| "loss": 1.8874013423919678, |
| "step": 3030 |
| }, |
| { |
| "epoch": 1.8661792997306657, |
| "grad_norm": 3.328125, |
| "learning_rate": 2.0949118732310513e-06, |
| "loss": 1.5274291038513184, |
| "step": 3032 |
| }, |
| { |
| "epoch": 1.8674105425163523, |
| "grad_norm": 3.0625, |
| "learning_rate": 2.09319297122201e-06, |
| "loss": 1.6374666690826416, |
| "step": 3034 |
| }, |
| { |
| "epoch": 1.8686417853020392, |
| "grad_norm": 4.1875, |
| "learning_rate": 2.0914895934096256e-06, |
| "loss": 1.0928645133972168, |
| "step": 3036 |
| }, |
| { |
| "epoch": 1.869873028087726, |
| "grad_norm": 3.09375, |
| "learning_rate": 2.089801746562489e-06, |
| "loss": 1.419162392616272, |
| "step": 3038 |
| }, |
| { |
| "epoch": 1.871104270873413, |
| "grad_norm": 4.6875, |
| "learning_rate": 2.088129437387473e-06, |
| "loss": 0.9393894076347351, |
| "step": 3040 |
| }, |
| { |
| "epoch": 1.8723355136590998, |
| "grad_norm": 2.859375, |
| "learning_rate": 2.086472672529713e-06, |
| "loss": 1.576371431350708, |
| "step": 3042 |
| }, |
| { |
| "epoch": 1.8735667564447864, |
| "grad_norm": 2.484375, |
| "learning_rate": 2.0848314585725764e-06, |
| "loss": 1.6301262378692627, |
| "step": 3044 |
| }, |
| { |
| "epoch": 1.8747979992304733, |
| "grad_norm": 1.4453125, |
| "learning_rate": 2.0832058020376353e-06, |
| "loss": 1.2185789346694946, |
| "step": 3046 |
| }, |
| { |
| "epoch": 1.87602924201616, |
| "grad_norm": 0.9296875, |
| "learning_rate": 2.081595709384644e-06, |
| "loss": 1.14590585231781, |
| "step": 3048 |
| }, |
| { |
| "epoch": 1.8772604848018468, |
| "grad_norm": 1.8203125, |
| "learning_rate": 2.080001187011511e-06, |
| "loss": 1.0869334936141968, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.8784917275875337, |
| "grad_norm": 4.4375, |
| "learning_rate": 2.0784222412542755e-06, |
| "loss": 1.4626398086547852, |
| "step": 3052 |
| }, |
| { |
| "epoch": 1.8797229703732206, |
| "grad_norm": 3.734375, |
| "learning_rate": 2.0768588783870785e-06, |
| "loss": 1.3795206546783447, |
| "step": 3054 |
| }, |
| { |
| "epoch": 1.8809542131589074, |
| "grad_norm": 3.53125, |
| "learning_rate": 2.0753111046221435e-06, |
| "loss": 1.4011050462722778, |
| "step": 3056 |
| }, |
| { |
| "epoch": 1.882185455944594, |
| "grad_norm": 1.5234375, |
| "learning_rate": 2.073778926109746e-06, |
| "loss": 1.3742821216583252, |
| "step": 3058 |
| }, |
| { |
| "epoch": 1.8834166987302807, |
| "grad_norm": 1.6328125, |
| "learning_rate": 2.0722623489381943e-06, |
| "loss": 1.282598614692688, |
| "step": 3060 |
| }, |
| { |
| "epoch": 1.8846479415159676, |
| "grad_norm": 2.765625, |
| "learning_rate": 2.0707613791338006e-06, |
| "loss": 1.3038278818130493, |
| "step": 3062 |
| }, |
| { |
| "epoch": 1.8858791843016545, |
| "grad_norm": 3.265625, |
| "learning_rate": 2.06927602266086e-06, |
| "loss": 1.6639541387557983, |
| "step": 3064 |
| }, |
| { |
| "epoch": 1.8871104270873413, |
| "grad_norm": 2.734375, |
| "learning_rate": 2.0678062854216265e-06, |
| "loss": 1.669793725013733, |
| "step": 3066 |
| }, |
| { |
| "epoch": 1.8883416698730282, |
| "grad_norm": 2.109375, |
| "learning_rate": 2.0663521732562896e-06, |
| "loss": 1.4260603189468384, |
| "step": 3068 |
| }, |
| { |
| "epoch": 1.8895729126587149, |
| "grad_norm": 1.84375, |
| "learning_rate": 2.064913691942947e-06, |
| "loss": 1.317846417427063, |
| "step": 3070 |
| }, |
| { |
| "epoch": 1.8908041554444017, |
| "grad_norm": 1.21875, |
| "learning_rate": 2.063490847197591e-06, |
| "loss": 0.9934130907058716, |
| "step": 3072 |
| }, |
| { |
| "epoch": 1.8920353982300884, |
| "grad_norm": 2.484375, |
| "learning_rate": 2.062083644674075e-06, |
| "loss": 1.2410752773284912, |
| "step": 3074 |
| }, |
| { |
| "epoch": 1.8932666410157752, |
| "grad_norm": 5.46875, |
| "learning_rate": 2.0606920899640993e-06, |
| "loss": 1.5223228931427002, |
| "step": 3076 |
| }, |
| { |
| "epoch": 1.8944978838014621, |
| "grad_norm": 1.7109375, |
| "learning_rate": 2.059316188597182e-06, |
| "loss": 1.3340084552764893, |
| "step": 3078 |
| }, |
| { |
| "epoch": 1.895729126587149, |
| "grad_norm": 0.8515625, |
| "learning_rate": 2.057955946040645e-06, |
| "loss": 1.0385233163833618, |
| "step": 3080 |
| }, |
| { |
| "epoch": 1.8969603693728359, |
| "grad_norm": 1.09375, |
| "learning_rate": 2.0566113676995854e-06, |
| "loss": 1.1956368684768677, |
| "step": 3082 |
| }, |
| { |
| "epoch": 1.8981916121585225, |
| "grad_norm": 0.7109375, |
| "learning_rate": 2.055282458916856e-06, |
| "loss": 1.0864170789718628, |
| "step": 3084 |
| }, |
| { |
| "epoch": 1.8994228549442091, |
| "grad_norm": 3.25, |
| "learning_rate": 2.0539692249730468e-06, |
| "loss": 1.3644320964813232, |
| "step": 3086 |
| }, |
| { |
| "epoch": 1.900654097729896, |
| "grad_norm": 12.625, |
| "learning_rate": 2.05267167108646e-06, |
| "loss": 1.8048946857452393, |
| "step": 3088 |
| }, |
| { |
| "epoch": 1.901885340515583, |
| "grad_norm": 4.5625, |
| "learning_rate": 2.051389802413092e-06, |
| "loss": 1.1744332313537598, |
| "step": 3090 |
| }, |
| { |
| "epoch": 1.9031165833012698, |
| "grad_norm": 1.1953125, |
| "learning_rate": 2.050123624046611e-06, |
| "loss": 1.1367355585098267, |
| "step": 3092 |
| }, |
| { |
| "epoch": 1.9043478260869566, |
| "grad_norm": 2.109375, |
| "learning_rate": 2.0488731410183385e-06, |
| "loss": 1.2757030725479126, |
| "step": 3094 |
| }, |
| { |
| "epoch": 1.9055790688726433, |
| "grad_norm": 3.5, |
| "learning_rate": 2.0476383582972295e-06, |
| "loss": 1.6981068849563599, |
| "step": 3096 |
| }, |
| { |
| "epoch": 1.9068103116583301, |
| "grad_norm": 2.34375, |
| "learning_rate": 2.04641928078985e-06, |
| "loss": 1.582550287246704, |
| "step": 3098 |
| }, |
| { |
| "epoch": 1.9080415544440168, |
| "grad_norm": 3.46875, |
| "learning_rate": 2.04521591334036e-06, |
| "loss": 1.4223381280899048, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.9092727972297037, |
| "grad_norm": 2.0, |
| "learning_rate": 2.044028260730494e-06, |
| "loss": 1.3338019847869873, |
| "step": 3102 |
| }, |
| { |
| "epoch": 1.9105040400153905, |
| "grad_norm": 3.109375, |
| "learning_rate": 2.042856327679542e-06, |
| "loss": 1.4374977350234985, |
| "step": 3104 |
| }, |
| { |
| "epoch": 1.9117352828010774, |
| "grad_norm": 2.875, |
| "learning_rate": 2.041700118844329e-06, |
| "loss": 1.6318196058273315, |
| "step": 3106 |
| }, |
| { |
| "epoch": 1.9129665255867643, |
| "grad_norm": 2.890625, |
| "learning_rate": 2.0405596388191977e-06, |
| "loss": 1.8509365320205688, |
| "step": 3108 |
| }, |
| { |
| "epoch": 1.914197768372451, |
| "grad_norm": 2.71875, |
| "learning_rate": 2.0394348921359923e-06, |
| "loss": 1.6199047565460205, |
| "step": 3110 |
| }, |
| { |
| "epoch": 1.9154290111581378, |
| "grad_norm": 2.234375, |
| "learning_rate": 2.0383258832640375e-06, |
| "loss": 1.4047884941101074, |
| "step": 3112 |
| }, |
| { |
| "epoch": 1.9166602539438244, |
| "grad_norm": 1.84375, |
| "learning_rate": 2.0372326166101207e-06, |
| "loss": 1.2699506282806396, |
| "step": 3114 |
| }, |
| { |
| "epoch": 1.9178914967295113, |
| "grad_norm": 1.0859375, |
| "learning_rate": 2.036155096518477e-06, |
| "loss": 1.227724313735962, |
| "step": 3116 |
| }, |
| { |
| "epoch": 1.9191227395151982, |
| "grad_norm": 1.734375, |
| "learning_rate": 2.035093327270771e-06, |
| "loss": 1.1748602390289307, |
| "step": 3118 |
| }, |
| { |
| "epoch": 1.920353982300885, |
| "grad_norm": 2.296875, |
| "learning_rate": 2.0340473130860763e-06, |
| "loss": 1.5392735004425049, |
| "step": 3120 |
| }, |
| { |
| "epoch": 1.921585225086572, |
| "grad_norm": 2.703125, |
| "learning_rate": 2.0330170581208658e-06, |
| "loss": 1.4597396850585938, |
| "step": 3122 |
| }, |
| { |
| "epoch": 1.9228164678722586, |
| "grad_norm": 2.4375, |
| "learning_rate": 2.032002566468988e-06, |
| "loss": 1.4114183187484741, |
| "step": 3124 |
| }, |
| { |
| "epoch": 1.9240477106579452, |
| "grad_norm": 1.9140625, |
| "learning_rate": 2.031003842161656e-06, |
| "loss": 1.3674333095550537, |
| "step": 3126 |
| }, |
| { |
| "epoch": 1.925278953443632, |
| "grad_norm": 2.046875, |
| "learning_rate": 2.0300208891674274e-06, |
| "loss": 1.216900110244751, |
| "step": 3128 |
| }, |
| { |
| "epoch": 1.926510196229319, |
| "grad_norm": 2.015625, |
| "learning_rate": 2.0290537113921924e-06, |
| "loss": 1.058232307434082, |
| "step": 3130 |
| }, |
| { |
| "epoch": 1.9277414390150058, |
| "grad_norm": 3.578125, |
| "learning_rate": 2.028102312679155e-06, |
| "loss": 0.627967119216919, |
| "step": 3132 |
| }, |
| { |
| "epoch": 1.9289726818006927, |
| "grad_norm": 3.765625, |
| "learning_rate": 2.027166696808819e-06, |
| "loss": 0.7637472152709961, |
| "step": 3134 |
| }, |
| { |
| "epoch": 1.9302039245863793, |
| "grad_norm": 1.4765625, |
| "learning_rate": 2.0262468674989744e-06, |
| "loss": 1.1989485025405884, |
| "step": 3136 |
| }, |
| { |
| "epoch": 1.9314351673720662, |
| "grad_norm": 1.28125, |
| "learning_rate": 2.0253428284046796e-06, |
| "loss": 1.3709150552749634, |
| "step": 3138 |
| }, |
| { |
| "epoch": 1.9326664101577529, |
| "grad_norm": 2.203125, |
| "learning_rate": 2.0244545831182504e-06, |
| "loss": 1.3663153648376465, |
| "step": 3140 |
| }, |
| { |
| "epoch": 1.9338976529434397, |
| "grad_norm": 2.421875, |
| "learning_rate": 2.0235821351692415e-06, |
| "loss": 1.3322885036468506, |
| "step": 3142 |
| }, |
| { |
| "epoch": 1.9351288957291266, |
| "grad_norm": 1.1796875, |
| "learning_rate": 2.022725488024437e-06, |
| "loss": 1.0995405912399292, |
| "step": 3144 |
| }, |
| { |
| "epoch": 1.9363601385148135, |
| "grad_norm": 1.1640625, |
| "learning_rate": 2.021884645087835e-06, |
| "loss": 1.2344316244125366, |
| "step": 3146 |
| }, |
| { |
| "epoch": 1.9375913813005003, |
| "grad_norm": 2.109375, |
| "learning_rate": 2.02105960970063e-06, |
| "loss": 1.1434847116470337, |
| "step": 3148 |
| }, |
| { |
| "epoch": 1.938822624086187, |
| "grad_norm": 1.390625, |
| "learning_rate": 2.0202503851412066e-06, |
| "loss": 1.007233738899231, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.9400538668718736, |
| "grad_norm": 3.5, |
| "learning_rate": 2.019456974625122e-06, |
| "loss": 1.0748920440673828, |
| "step": 3152 |
| }, |
| { |
| "epoch": 1.9412851096575605, |
| "grad_norm": 4.0, |
| "learning_rate": 2.0186793813050944e-06, |
| "loss": 1.0976916551589966, |
| "step": 3154 |
| }, |
| { |
| "epoch": 1.9425163524432474, |
| "grad_norm": 0.9296875, |
| "learning_rate": 2.0179176082709897e-06, |
| "loss": 1.1037846803665161, |
| "step": 3156 |
| }, |
| { |
| "epoch": 1.9437475952289343, |
| "grad_norm": 2.390625, |
| "learning_rate": 2.0171716585498098e-06, |
| "loss": 1.2665685415267944, |
| "step": 3158 |
| }, |
| { |
| "epoch": 1.9449788380146211, |
| "grad_norm": 2.03125, |
| "learning_rate": 2.0164415351056825e-06, |
| "loss": 1.4245879650115967, |
| "step": 3160 |
| }, |
| { |
| "epoch": 1.9462100808003078, |
| "grad_norm": 3.9375, |
| "learning_rate": 2.0157272408398456e-06, |
| "loss": 1.3685814142227173, |
| "step": 3162 |
| }, |
| { |
| "epoch": 1.9474413235859946, |
| "grad_norm": 2.546875, |
| "learning_rate": 2.0150287785906396e-06, |
| "loss": 1.4000155925750732, |
| "step": 3164 |
| }, |
| { |
| "epoch": 1.9486725663716813, |
| "grad_norm": 2.640625, |
| "learning_rate": 2.0143461511334917e-06, |
| "loss": 1.5712698698043823, |
| "step": 3166 |
| }, |
| { |
| "epoch": 1.9499038091573682, |
| "grad_norm": 3.453125, |
| "learning_rate": 2.0136793611809104e-06, |
| "loss": 1.8835021257400513, |
| "step": 3168 |
| }, |
| { |
| "epoch": 1.951135051943055, |
| "grad_norm": 2.453125, |
| "learning_rate": 2.0130284113824712e-06, |
| "loss": 1.5735080242156982, |
| "step": 3170 |
| }, |
| { |
| "epoch": 1.952366294728742, |
| "grad_norm": 3.484375, |
| "learning_rate": 2.0123933043248056e-06, |
| "loss": 1.8424159288406372, |
| "step": 3172 |
| }, |
| { |
| "epoch": 1.9535975375144288, |
| "grad_norm": 2.875, |
| "learning_rate": 2.0117740425315924e-06, |
| "loss": 1.6976765394210815, |
| "step": 3174 |
| }, |
| { |
| "epoch": 1.9548287803001154, |
| "grad_norm": 2.21875, |
| "learning_rate": 2.0111706284635478e-06, |
| "loss": 1.4014792442321777, |
| "step": 3176 |
| }, |
| { |
| "epoch": 1.956060023085802, |
| "grad_norm": 3.8125, |
| "learning_rate": 2.0105830645184145e-06, |
| "loss": 1.4296321868896484, |
| "step": 3178 |
| }, |
| { |
| "epoch": 1.957291265871489, |
| "grad_norm": 2.625, |
| "learning_rate": 2.010011353030953e-06, |
| "loss": 1.9288471937179565, |
| "step": 3180 |
| }, |
| { |
| "epoch": 1.9585225086571758, |
| "grad_norm": 2.4375, |
| "learning_rate": 2.0094554962729317e-06, |
| "loss": 1.6823272705078125, |
| "step": 3182 |
| }, |
| { |
| "epoch": 1.9597537514428627, |
| "grad_norm": 3.265625, |
| "learning_rate": 2.008915496453119e-06, |
| "loss": 1.494765043258667, |
| "step": 3184 |
| }, |
| { |
| "epoch": 1.9609849942285495, |
| "grad_norm": 2.359375, |
| "learning_rate": 2.0083913557172724e-06, |
| "loss": 1.4550701379776, |
| "step": 3186 |
| }, |
| { |
| "epoch": 1.9622162370142362, |
| "grad_norm": 2.484375, |
| "learning_rate": 2.007883076148133e-06, |
| "loss": 1.4107712507247925, |
| "step": 3188 |
| }, |
| { |
| "epoch": 1.963447479799923, |
| "grad_norm": 2.09375, |
| "learning_rate": 2.0073906597654126e-06, |
| "loss": 1.3898992538452148, |
| "step": 3190 |
| }, |
| { |
| "epoch": 1.9646787225856097, |
| "grad_norm": 0.58984375, |
| "learning_rate": 2.0069141085257925e-06, |
| "loss": 0.9835103750228882, |
| "step": 3192 |
| }, |
| { |
| "epoch": 1.9659099653712966, |
| "grad_norm": 4.03125, |
| "learning_rate": 2.00645342432291e-06, |
| "loss": 1.4404178857803345, |
| "step": 3194 |
| }, |
| { |
| "epoch": 1.9671412081569835, |
| "grad_norm": 4.25, |
| "learning_rate": 2.006008608987351e-06, |
| "loss": 1.7752964496612549, |
| "step": 3196 |
| }, |
| { |
| "epoch": 1.9683724509426703, |
| "grad_norm": 3.375, |
| "learning_rate": 2.0055796642866476e-06, |
| "loss": 1.7659649848937988, |
| "step": 3198 |
| }, |
| { |
| "epoch": 1.9696036937283572, |
| "grad_norm": 2.453125, |
| "learning_rate": 2.0051665919252657e-06, |
| "loss": 1.4808554649353027, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.9708349365140438, |
| "grad_norm": 3.140625, |
| "learning_rate": 2.004769393544601e-06, |
| "loss": 1.6572133302688599, |
| "step": 3202 |
| }, |
| { |
| "epoch": 1.9720661792997307, |
| "grad_norm": 2.578125, |
| "learning_rate": 2.004388070722972e-06, |
| "loss": 1.6950914859771729, |
| "step": 3204 |
| }, |
| { |
| "epoch": 1.9732974220854174, |
| "grad_norm": 2.578125, |
| "learning_rate": 2.0040226249756135e-06, |
| "loss": 1.7044365406036377, |
| "step": 3206 |
| }, |
| { |
| "epoch": 1.9745286648711042, |
| "grad_norm": 2.5, |
| "learning_rate": 2.0036730577546716e-06, |
| "loss": 1.5040702819824219, |
| "step": 3208 |
| }, |
| { |
| "epoch": 1.975759907656791, |
| "grad_norm": 2.859375, |
| "learning_rate": 2.0033393704491954e-06, |
| "loss": 1.2767277956008911, |
| "step": 3210 |
| }, |
| { |
| "epoch": 1.976991150442478, |
| "grad_norm": 2.328125, |
| "learning_rate": 2.003021564385135e-06, |
| "loss": 1.0338983535766602, |
| "step": 3212 |
| }, |
| { |
| "epoch": 1.9782223932281648, |
| "grad_norm": 4.3125, |
| "learning_rate": 2.002719640825332e-06, |
| "loss": 1.162734866142273, |
| "step": 3214 |
| }, |
| { |
| "epoch": 1.9794536360138515, |
| "grad_norm": 6.96875, |
| "learning_rate": 2.00243360096952e-06, |
| "loss": 1.7887216806411743, |
| "step": 3216 |
| }, |
| { |
| "epoch": 1.9806848787995381, |
| "grad_norm": 4.78125, |
| "learning_rate": 2.0021634459543126e-06, |
| "loss": 1.5415432453155518, |
| "step": 3218 |
| }, |
| { |
| "epoch": 1.981916121585225, |
| "grad_norm": 7.5625, |
| "learning_rate": 2.0019091768532075e-06, |
| "loss": 1.8647511005401611, |
| "step": 3220 |
| }, |
| { |
| "epoch": 1.9831473643709119, |
| "grad_norm": 4.90625, |
| "learning_rate": 2.001670794676574e-06, |
| "loss": 1.5809669494628906, |
| "step": 3222 |
| }, |
| { |
| "epoch": 1.9843786071565988, |
| "grad_norm": 4.4375, |
| "learning_rate": 2.0014483003716547e-06, |
| "loss": 1.4375505447387695, |
| "step": 3224 |
| }, |
| { |
| "epoch": 1.9856098499422856, |
| "grad_norm": 1.984375, |
| "learning_rate": 2.001241694822559e-06, |
| "loss": 1.4724972248077393, |
| "step": 3226 |
| }, |
| { |
| "epoch": 1.9868410927279723, |
| "grad_norm": 2.640625, |
| "learning_rate": 2.0010509788502606e-06, |
| "loss": 1.4315625429153442, |
| "step": 3228 |
| }, |
| { |
| "epoch": 1.9880723355136591, |
| "grad_norm": 2.21875, |
| "learning_rate": 2.0008761532125942e-06, |
| "loss": 1.3821440935134888, |
| "step": 3230 |
| }, |
| { |
| "epoch": 1.9893035782993458, |
| "grad_norm": 1.015625, |
| "learning_rate": 2.000717218604251e-06, |
| "loss": 1.0991805791854858, |
| "step": 3232 |
| }, |
| { |
| "epoch": 1.9905348210850327, |
| "grad_norm": 2.28125, |
| "learning_rate": 2.0005741756567775e-06, |
| "loss": 1.0305452346801758, |
| "step": 3234 |
| }, |
| { |
| "epoch": 1.9917660638707195, |
| "grad_norm": 7.09375, |
| "learning_rate": 2.000447024938574e-06, |
| "loss": 1.0397870540618896, |
| "step": 3236 |
| }, |
| { |
| "epoch": 1.9929973066564064, |
| "grad_norm": 5.375, |
| "learning_rate": 2.000335766954891e-06, |
| "loss": 0.9876999258995056, |
| "step": 3238 |
| }, |
| { |
| "epoch": 1.9942285494420933, |
| "grad_norm": 2.84375, |
| "learning_rate": 2.0002404021478243e-06, |
| "loss": 1.9116934537887573, |
| "step": 3240 |
| }, |
| { |
| "epoch": 1.99545979222778, |
| "grad_norm": 4.53125, |
| "learning_rate": 2.0001609308963195e-06, |
| "loss": 1.5504405498504639, |
| "step": 3242 |
| }, |
| { |
| "epoch": 1.9966910350134666, |
| "grad_norm": 2.421875, |
| "learning_rate": 2.000097353516165e-06, |
| "loss": 1.3082109689712524, |
| "step": 3244 |
| }, |
| { |
| "epoch": 1.9979222777991534, |
| "grad_norm": 1.5703125, |
| "learning_rate": 2.000049670259995e-06, |
| "loss": 1.2466901540756226, |
| "step": 3246 |
| }, |
| { |
| "epoch": 1.9991535205848403, |
| "grad_norm": 1.2578125, |
| "learning_rate": 2.0000178813172827e-06, |
| "loss": 1.0891525745391846, |
| "step": 3248 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 5.8125, |
| "learning_rate": 2.0000019868143473e-06, |
| "loss": 0.9806809425354004, |
| "step": 3250 |
| }, |
| { |
| "epoch": 2.0, |
| "step": 3250, |
| "total_flos": 2.574483891351978e+18, |
| "train_loss": 1.4189628758430481, |
| "train_runtime": 74292.9455, |
| "train_samples_per_second": 0.35, |
| "train_steps_per_second": 0.044 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 3250, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 9999999, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.574483891351978e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|