| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 1410, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.004259850905218318, |
| "grad_norm": 0.80859375, |
| "learning_rate": 4.225352112676057e-07, |
| "loss": 1.8638979196548462, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.008519701810436636, |
| "grad_norm": 0.69921875, |
| "learning_rate": 1.267605633802817e-06, |
| "loss": 1.9382712841033936, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.012779552715654952, |
| "grad_norm": 1.09375, |
| "learning_rate": 2.1126760563380285e-06, |
| "loss": 1.8919719457626343, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.01703940362087327, |
| "grad_norm": 0.6875, |
| "learning_rate": 2.957746478873239e-06, |
| "loss": 1.9754539728164673, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.021299254526091587, |
| "grad_norm": 0.8046875, |
| "learning_rate": 3.8028169014084508e-06, |
| "loss": 1.9735431671142578, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.025559105431309903, |
| "grad_norm": 0.490234375, |
| "learning_rate": 4.6478873239436615e-06, |
| "loss": 1.962188959121704, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.029818956336528223, |
| "grad_norm": 0.796875, |
| "learning_rate": 5.492957746478874e-06, |
| "loss": 1.8216444253921509, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.03407880724174654, |
| "grad_norm": 0.65234375, |
| "learning_rate": 6.338028169014085e-06, |
| "loss": 1.879197359085083, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.038338658146964855, |
| "grad_norm": 0.58203125, |
| "learning_rate": 7.183098591549295e-06, |
| "loss": 1.9074592590332031, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.042598509052183174, |
| "grad_norm": 0.5625, |
| "learning_rate": 8.028169014084507e-06, |
| "loss": 1.8535538911819458, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.046858359957401494, |
| "grad_norm": 0.70703125, |
| "learning_rate": 8.873239436619718e-06, |
| "loss": 1.7652872800827026, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.051118210862619806, |
| "grad_norm": 0.470703125, |
| "learning_rate": 9.71830985915493e-06, |
| "loss": 1.7537739276885986, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.055378061767838126, |
| "grad_norm": 0.4375, |
| "learning_rate": 1.056338028169014e-05, |
| "loss": 1.6868540048599243, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.059637912673056445, |
| "grad_norm": 0.419921875, |
| "learning_rate": 1.1408450704225351e-05, |
| "loss": 1.694838523864746, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.06389776357827476, |
| "grad_norm": 0.6953125, |
| "learning_rate": 1.2253521126760564e-05, |
| "loss": 1.7118496894836426, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.06815761448349308, |
| "grad_norm": 0.5, |
| "learning_rate": 1.3098591549295775e-05, |
| "loss": 1.7367674112319946, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.0724174653887114, |
| "grad_norm": 0.625, |
| "learning_rate": 1.3943661971830985e-05, |
| "loss": 1.6931723356246948, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.07667731629392971, |
| "grad_norm": 0.87109375, |
| "learning_rate": 1.4788732394366198e-05, |
| "loss": 1.8322988748550415, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.08093716719914804, |
| "grad_norm": 0.71875, |
| "learning_rate": 1.563380281690141e-05, |
| "loss": 1.723473072052002, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.08519701810436635, |
| "grad_norm": 0.765625, |
| "learning_rate": 1.6478873239436623e-05, |
| "loss": 1.584614872932434, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.08945686900958466, |
| "grad_norm": 0.458984375, |
| "learning_rate": 1.7323943661971833e-05, |
| "loss": 1.757703423500061, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.09371671991480299, |
| "grad_norm": 0.384765625, |
| "learning_rate": 1.816901408450704e-05, |
| "loss": 1.5781840085983276, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.0979765708200213, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.9014084507042255e-05, |
| "loss": 1.6014955043792725, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.10223642172523961, |
| "grad_norm": 0.73828125, |
| "learning_rate": 1.9859154929577465e-05, |
| "loss": 1.674490213394165, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.10649627263045794, |
| "grad_norm": 0.61328125, |
| "learning_rate": 2.0704225352112676e-05, |
| "loss": 1.393676519393921, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.11075612353567625, |
| "grad_norm": 0.69921875, |
| "learning_rate": 2.154929577464789e-05, |
| "loss": 1.495132327079773, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.11501597444089456, |
| "grad_norm": 0.5703125, |
| "learning_rate": 2.23943661971831e-05, |
| "loss": 1.5442848205566406, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.11927582534611289, |
| "grad_norm": 0.396484375, |
| "learning_rate": 2.3239436619718308e-05, |
| "loss": 1.459320068359375, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.1235356762513312, |
| "grad_norm": 0.7578125, |
| "learning_rate": 2.4084507042253522e-05, |
| "loss": 1.4848005771636963, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.12779552715654952, |
| "grad_norm": 0.310546875, |
| "learning_rate": 2.4929577464788733e-05, |
| "loss": 1.3594304323196411, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.13205537806176784, |
| "grad_norm": 0.6640625, |
| "learning_rate": 2.5774647887323944e-05, |
| "loss": 1.4234025478363037, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.13631522896698617, |
| "grad_norm": 0.27734375, |
| "learning_rate": 2.6619718309859158e-05, |
| "loss": 1.3102433681488037, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.14057507987220447, |
| "grad_norm": 0.3046875, |
| "learning_rate": 2.746478873239437e-05, |
| "loss": 1.3784610033035278, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.1448349307774228, |
| "grad_norm": 0.5625, |
| "learning_rate": 2.8309859154929576e-05, |
| "loss": 1.325601577758789, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.14909478168264112, |
| "grad_norm": 0.36328125, |
| "learning_rate": 2.915492957746479e-05, |
| "loss": 1.3749815225601196, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.15335463258785942, |
| "grad_norm": 0.2890625, |
| "learning_rate": 3e-05, |
| "loss": 1.4330412149429321, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.15761448349307774, |
| "grad_norm": 1.609375, |
| "learning_rate": 2.9999867885940888e-05, |
| "loss": 1.4420013427734375, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.16187433439829607, |
| "grad_norm": 0.345703125, |
| "learning_rate": 2.999947154667255e-05, |
| "loss": 1.3299309015274048, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.16613418530351437, |
| "grad_norm": 0.3125, |
| "learning_rate": 2.9998810990921997e-05, |
| "loss": 1.2926124334335327, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.1703940362087327, |
| "grad_norm": 0.34765625, |
| "learning_rate": 2.999788623323402e-05, |
| "loss": 1.4053008556365967, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.17465388711395102, |
| "grad_norm": 0.341796875, |
| "learning_rate": 2.999669729397085e-05, |
| "loss": 1.3341435194015503, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.17891373801916932, |
| "grad_norm": 1.1171875, |
| "learning_rate": 2.999524419931176e-05, |
| "loss": 1.3351154327392578, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.18317358892438765, |
| "grad_norm": 0.275390625, |
| "learning_rate": 2.9993526981252465e-05, |
| "loss": 1.279821515083313, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.18743343982960597, |
| "grad_norm": 0.29296875, |
| "learning_rate": 2.999154567760439e-05, |
| "loss": 1.2992417812347412, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.19169329073482427, |
| "grad_norm": 0.43359375, |
| "learning_rate": 2.998930033199389e-05, |
| "loss": 1.2671334743499756, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.1959531416400426, |
| "grad_norm": 0.396484375, |
| "learning_rate": 2.9986790993861245e-05, |
| "loss": 1.4086840152740479, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.20021299254526093, |
| "grad_norm": 0.4765625, |
| "learning_rate": 2.9984017718459603e-05, |
| "loss": 1.260171890258789, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.20447284345047922, |
| "grad_norm": 0.4453125, |
| "learning_rate": 2.998098056685374e-05, |
| "loss": 1.337839126586914, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.20873269435569755, |
| "grad_norm": 0.44921875, |
| "learning_rate": 2.9977679605918732e-05, |
| "loss": 1.3128660917282104, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.21299254526091588, |
| "grad_norm": 0.3515625, |
| "learning_rate": 2.9974114908338454e-05, |
| "loss": 1.3010621070861816, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.21725239616613418, |
| "grad_norm": 0.2451171875, |
| "learning_rate": 2.9970286552604036e-05, |
| "loss": 1.3007886409759521, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.2215122470713525, |
| "grad_norm": 0.251953125, |
| "learning_rate": 2.996619462301207e-05, |
| "loss": 1.3102291822433472, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.22577209797657083, |
| "grad_norm": 0.515625, |
| "learning_rate": 2.9961839209662808e-05, |
| "loss": 1.4082542657852173, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.23003194888178913, |
| "grad_norm": 0.447265625, |
| "learning_rate": 2.9957220408458118e-05, |
| "loss": 1.220694899559021, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.23429179978700745, |
| "grad_norm": 0.494140625, |
| "learning_rate": 2.9952338321099435e-05, |
| "loss": 1.3067396879196167, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.23855165069222578, |
| "grad_norm": 0.34765625, |
| "learning_rate": 2.9947193055085505e-05, |
| "loss": 1.283180594444275, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.24281150159744408, |
| "grad_norm": 0.255859375, |
| "learning_rate": 2.9941784723709973e-05, |
| "loss": 1.273285150527954, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.2470713525026624, |
| "grad_norm": 0.4453125, |
| "learning_rate": 2.993611344605895e-05, |
| "loss": 1.2464628219604492, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.25133120340788073, |
| "grad_norm": 0.255859375, |
| "learning_rate": 2.9930179347008347e-05, |
| "loss": 1.2899105548858643, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.25559105431309903, |
| "grad_norm": 0.71484375, |
| "learning_rate": 2.9923982557221154e-05, |
| "loss": 1.166140079498291, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.2598509052183174, |
| "grad_norm": 0.359375, |
| "learning_rate": 2.9917523213144554e-05, |
| "loss": 1.3073238134384155, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.2641107561235357, |
| "grad_norm": 0.251953125, |
| "learning_rate": 2.9910801457006897e-05, |
| "loss": 1.2734519243240356, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.268370607028754, |
| "grad_norm": 0.2470703125, |
| "learning_rate": 2.9903817436814603e-05, |
| "loss": 1.190434455871582, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.27263045793397234, |
| "grad_norm": 0.41796875, |
| "learning_rate": 2.9896571306348874e-05, |
| "loss": 1.2509433031082153, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.27689030883919064, |
| "grad_norm": 0.48828125, |
| "learning_rate": 2.9889063225162337e-05, |
| "loss": 1.3253034353256226, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.28115015974440893, |
| "grad_norm": 0.318359375, |
| "learning_rate": 2.98812933585755e-05, |
| "loss": 1.2004448175430298, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.2854100106496273, |
| "grad_norm": 0.2890625, |
| "learning_rate": 2.9873261877673142e-05, |
| "loss": 1.1677073240280151, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.2896698615548456, |
| "grad_norm": 0.48046875, |
| "learning_rate": 2.9864968959300505e-05, |
| "loss": 1.2530547380447388, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.2939297124600639, |
| "grad_norm": 0.482421875, |
| "learning_rate": 2.985641478605945e-05, |
| "loss": 1.2573705911636353, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.29818956336528224, |
| "grad_norm": 0.435546875, |
| "learning_rate": 2.9847599546304395e-05, |
| "loss": 1.3057535886764526, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.30244941427050054, |
| "grad_norm": 0.2734375, |
| "learning_rate": 2.9838523434138204e-05, |
| "loss": 1.2737255096435547, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.30670926517571884, |
| "grad_norm": 0.419921875, |
| "learning_rate": 2.982918664940787e-05, |
| "loss": 1.2411197423934937, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.3109691160809372, |
| "grad_norm": 0.3203125, |
| "learning_rate": 2.9819589397700148e-05, |
| "loss": 1.2790652513504028, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.3152289669861555, |
| "grad_norm": 0.408203125, |
| "learning_rate": 2.9809731890337017e-05, |
| "loss": 1.2759779691696167, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.3194888178913738, |
| "grad_norm": 0.337890625, |
| "learning_rate": 2.979961434437103e-05, |
| "loss": 1.3018522262573242, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.32374866879659214, |
| "grad_norm": 0.365234375, |
| "learning_rate": 2.9789236982580538e-05, |
| "loss": 1.3175352811813354, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.32800851970181044, |
| "grad_norm": 0.32421875, |
| "learning_rate": 2.9778600033464767e-05, |
| "loss": 1.291448712348938, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.33226837060702874, |
| "grad_norm": 0.275390625, |
| "learning_rate": 2.97677037312388e-05, |
| "loss": 1.2841699123382568, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.3365282215122471, |
| "grad_norm": 0.33203125, |
| "learning_rate": 2.975654831582843e-05, |
| "loss": 1.3081012964248657, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.3407880724174654, |
| "grad_norm": 0.2734375, |
| "learning_rate": 2.9745134032864862e-05, |
| "loss": 1.2524945735931396, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.3450479233226837, |
| "grad_norm": 0.228515625, |
| "learning_rate": 2.973346113367929e-05, |
| "loss": 1.1932268142700195, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.34930777422790205, |
| "grad_norm": 0.453125, |
| "learning_rate": 2.972152987529741e-05, |
| "loss": 1.2276166677474976, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.35356762513312034, |
| "grad_norm": 0.2734375, |
| "learning_rate": 2.9709340520433722e-05, |
| "loss": 1.2343382835388184, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.35782747603833864, |
| "grad_norm": 0.96875, |
| "learning_rate": 2.9696893337485734e-05, |
| "loss": 1.3475210666656494, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.362087326943557, |
| "grad_norm": 0.69921875, |
| "learning_rate": 2.9684188600528098e-05, |
| "loss": 1.2921943664550781, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.3663471778487753, |
| "grad_norm": 0.515625, |
| "learning_rate": 2.967122658930654e-05, |
| "loss": 1.1600617170333862, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.3706070287539936, |
| "grad_norm": 0.341796875, |
| "learning_rate": 2.9658007589231723e-05, |
| "loss": 1.3178966045379639, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.37486687965921195, |
| "grad_norm": 0.33203125, |
| "learning_rate": 2.9644531891372925e-05, |
| "loss": 1.3098689317703247, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.37912673056443025, |
| "grad_norm": 0.234375, |
| "learning_rate": 2.9630799792451687e-05, |
| "loss": 1.1713343858718872, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.38338658146964855, |
| "grad_norm": 0.2109375, |
| "learning_rate": 2.9616811594835214e-05, |
| "loss": 1.2428940534591675, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.3876464323748669, |
| "grad_norm": 0.359375, |
| "learning_rate": 2.9602567606529776e-05, |
| "loss": 1.2774041891098022, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.3919062832800852, |
| "grad_norm": 0.33984375, |
| "learning_rate": 2.9588068141173888e-05, |
| "loss": 1.2816710472106934, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.3961661341853035, |
| "grad_norm": 0.349609375, |
| "learning_rate": 2.9573313518031424e-05, |
| "loss": 1.1415907144546509, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.40042598509052185, |
| "grad_norm": 0.396484375, |
| "learning_rate": 2.955830406198458e-05, |
| "loss": 1.232388973236084, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.40468583599574015, |
| "grad_norm": 0.3125, |
| "learning_rate": 2.95430401035267e-05, |
| "loss": 1.250954031944275, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.40894568690095845, |
| "grad_norm": 0.310546875, |
| "learning_rate": 2.9527521978755053e-05, |
| "loss": 1.205154299736023, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.4132055378061768, |
| "grad_norm": 0.26171875, |
| "learning_rate": 2.9511750029363377e-05, |
| "loss": 1.2991074323654175, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.4174653887113951, |
| "grad_norm": 0.296875, |
| "learning_rate": 2.949572460263438e-05, |
| "loss": 1.2244809865951538, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.4217252396166134, |
| "grad_norm": 0.41796875, |
| "learning_rate": 2.947944605143208e-05, |
| "loss": 1.2750012874603271, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.42598509052183176, |
| "grad_norm": 0.376953125, |
| "learning_rate": 2.9462914734194078e-05, |
| "loss": 1.3423129320144653, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.43024494142705005, |
| "grad_norm": 0.35546875, |
| "learning_rate": 2.9446131014923593e-05, |
| "loss": 1.280989646911621, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.43450479233226835, |
| "grad_norm": 0.26171875, |
| "learning_rate": 2.9429095263181514e-05, |
| "loss": 1.2904020547866821, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.4387646432374867, |
| "grad_norm": 0.427734375, |
| "learning_rate": 2.9411807854078226e-05, |
| "loss": 1.2392964363098145, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.443024494142705, |
| "grad_norm": 0.3046875, |
| "learning_rate": 2.9394269168265358e-05, |
| "loss": 1.2662076950073242, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.4472843450479233, |
| "grad_norm": 0.3046875, |
| "learning_rate": 2.9376479591927408e-05, |
| "loss": 1.2238541841506958, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.45154419595314166, |
| "grad_norm": 0.263671875, |
| "learning_rate": 2.935843951677323e-05, |
| "loss": 1.2052886486053467, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.45580404685835996, |
| "grad_norm": 0.3515625, |
| "learning_rate": 2.9340149340027412e-05, |
| "loss": 1.2680332660675049, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.46006389776357826, |
| "grad_norm": 0.318359375, |
| "learning_rate": 2.9321609464421546e-05, |
| "loss": 1.233550786972046, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.4643237486687966, |
| "grad_norm": 0.578125, |
| "learning_rate": 2.930282029818533e-05, |
| "loss": 1.2138795852661133, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.4685835995740149, |
| "grad_norm": 0.2578125, |
| "learning_rate": 2.92837822550376e-05, |
| "loss": 1.1213371753692627, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.4728434504792332, |
| "grad_norm": 0.404296875, |
| "learning_rate": 2.9264495754177225e-05, |
| "loss": 1.2127740383148193, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.47710330138445156, |
| "grad_norm": 1.5078125, |
| "learning_rate": 2.924496122027384e-05, |
| "loss": 1.3384878635406494, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.48136315228966986, |
| "grad_norm": 0.345703125, |
| "learning_rate": 2.9225179083458555e-05, |
| "loss": 1.1937229633331299, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.48562300319488816, |
| "grad_norm": 0.52734375, |
| "learning_rate": 2.9205149779314425e-05, |
| "loss": 1.2608391046524048, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.4898828541001065, |
| "grad_norm": 0.31640625, |
| "learning_rate": 2.918487374886691e-05, |
| "loss": 1.2325993776321411, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.4941427050053248, |
| "grad_norm": 0.287109375, |
| "learning_rate": 2.91643514385741e-05, |
| "loss": 1.2050917148590088, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.4984025559105431, |
| "grad_norm": 0.296875, |
| "learning_rate": 2.9143583300316975e-05, |
| "loss": 1.2299753427505493, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.5026624068157615, |
| "grad_norm": 0.322265625, |
| "learning_rate": 2.9122569791389354e-05, |
| "loss": 1.2500553131103516, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.5069222577209798, |
| "grad_norm": 0.46875, |
| "learning_rate": 2.9101311374487908e-05, |
| "loss": 1.3044551610946655, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.5111821086261981, |
| "grad_norm": 0.28125, |
| "learning_rate": 2.907980851770193e-05, |
| "loss": 1.1923537254333496, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.5154419595314164, |
| "grad_norm": 0.419921875, |
| "learning_rate": 2.905806169450303e-05, |
| "loss": 1.2567352056503296, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.5197018104366348, |
| "grad_norm": 0.369140625, |
| "learning_rate": 2.9036071383734716e-05, |
| "loss": 1.2812081575393677, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.5239616613418531, |
| "grad_norm": 0.4609375, |
| "learning_rate": 2.9013838069601874e-05, |
| "loss": 1.2612706422805786, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.5282215122470714, |
| "grad_norm": 0.27734375, |
| "learning_rate": 2.8991362241660053e-05, |
| "loss": 1.2162076234817505, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.5324813631522897, |
| "grad_norm": 0.267578125, |
| "learning_rate": 2.8968644394804736e-05, |
| "loss": 1.2357534170150757, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.536741214057508, |
| "grad_norm": 0.255859375, |
| "learning_rate": 2.894568502926042e-05, |
| "loss": 1.144363284111023, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.5410010649627263, |
| "grad_norm": 0.68359375, |
| "learning_rate": 2.8922484650569597e-05, |
| "loss": 1.1998339891433716, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.5452609158679447, |
| "grad_norm": 0.435546875, |
| "learning_rate": 2.8899043769581627e-05, |
| "loss": 1.1842751502990723, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.549520766773163, |
| "grad_norm": 0.78515625, |
| "learning_rate": 2.8875362902441517e-05, |
| "loss": 1.1901715993881226, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.5537806176783813, |
| "grad_norm": 0.447265625, |
| "learning_rate": 2.885144257057849e-05, |
| "loss": 1.3038347959518433, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.5580404685835996, |
| "grad_norm": 0.46484375, |
| "learning_rate": 2.8827283300694593e-05, |
| "loss": 1.2350062131881714, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.5623003194888179, |
| "grad_norm": 0.443359375, |
| "learning_rate": 2.8802885624753013e-05, |
| "loss": 1.2469710111618042, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.5665601703940362, |
| "grad_norm": 0.38671875, |
| "learning_rate": 2.8778250079966417e-05, |
| "loss": 1.2484819889068604, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.5708200212992546, |
| "grad_norm": 0.48046875, |
| "learning_rate": 2.875337720878512e-05, |
| "loss": 1.213232159614563, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.5750798722044729, |
| "grad_norm": 0.349609375, |
| "learning_rate": 2.8728267558885102e-05, |
| "loss": 1.1985093355178833, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.5793397231096912, |
| "grad_norm": 0.28125, |
| "learning_rate": 2.8702921683156e-05, |
| "loss": 1.2459266185760498, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.5835995740149095, |
| "grad_norm": 0.251953125, |
| "learning_rate": 2.867734013968891e-05, |
| "loss": 1.3075346946716309, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.5878594249201278, |
| "grad_norm": 0.63671875, |
| "learning_rate": 2.8651523491764074e-05, |
| "loss": 1.254473090171814, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.5921192758253461, |
| "grad_norm": 0.5703125, |
| "learning_rate": 2.8625472307838518e-05, |
| "loss": 1.2639200687408447, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.5963791267305645, |
| "grad_norm": 0.703125, |
| "learning_rate": 2.8599187161533533e-05, |
| "loss": 1.23056161403656, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.6006389776357828, |
| "grad_norm": 0.53125, |
| "learning_rate": 2.8572668631622e-05, |
| "loss": 1.2778501510620117, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.6048988285410011, |
| "grad_norm": 0.376953125, |
| "learning_rate": 2.8545917302015693e-05, |
| "loss": 1.240308403968811, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.6091586794462194, |
| "grad_norm": 0.2734375, |
| "learning_rate": 2.851893376175241e-05, |
| "loss": 1.3061432838439941, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.6134185303514377, |
| "grad_norm": 0.326171875, |
| "learning_rate": 2.849171860498298e-05, |
| "loss": 1.1693536043167114, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.617678381256656, |
| "grad_norm": 0.396484375, |
| "learning_rate": 2.8464272430958208e-05, |
| "loss": 1.3255276679992676, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.6219382321618744, |
| "grad_norm": 0.4140625, |
| "learning_rate": 2.843659584401568e-05, |
| "loss": 1.1839312314987183, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.6261980830670927, |
| "grad_norm": 0.400390625, |
| "learning_rate": 2.840868945356643e-05, |
| "loss": 1.2237545251846313, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.630457933972311, |
| "grad_norm": 0.279296875, |
| "learning_rate": 2.8380553874081544e-05, |
| "loss": 1.219810962677002, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.6347177848775293, |
| "grad_norm": 0.2099609375, |
| "learning_rate": 2.8352189725078623e-05, |
| "loss": 1.148103952407837, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.6389776357827476, |
| "grad_norm": 0.70703125, |
| "learning_rate": 2.8323597631108148e-05, |
| "loss": 1.266182780265808, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6432374866879659, |
| "grad_norm": 0.9765625, |
| "learning_rate": 2.829477822173972e-05, |
| "loss": 1.1832197904586792, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.6474973375931843, |
| "grad_norm": 0.322265625, |
| "learning_rate": 2.8265732131548185e-05, |
| "loss": 1.2743726968765259, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.6517571884984026, |
| "grad_norm": 0.2216796875, |
| "learning_rate": 2.82364600000997e-05, |
| "loss": 1.2408907413482666, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.6560170394036209, |
| "grad_norm": 0.46875, |
| "learning_rate": 2.8206962471937612e-05, |
| "loss": 1.2314817905426025, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.6602768903088392, |
| "grad_norm": 0.279296875, |
| "learning_rate": 2.817724019656829e-05, |
| "loss": 1.0730669498443604, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.6645367412140575, |
| "grad_norm": 0.3203125, |
| "learning_rate": 2.81472938284468e-05, |
| "loss": 1.250943660736084, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.6687965921192758, |
| "grad_norm": 0.98046875, |
| "learning_rate": 2.811712402696252e-05, |
| "loss": 1.1586111783981323, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.6730564430244942, |
| "grad_norm": 0.267578125, |
| "learning_rate": 2.808673145642461e-05, |
| "loss": 1.2091357707977295, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.6773162939297125, |
| "grad_norm": 0.7890625, |
| "learning_rate": 2.805611678604737e-05, |
| "loss": 1.219393253326416, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.6815761448349308, |
| "grad_norm": 0.302734375, |
| "learning_rate": 2.8025280689935538e-05, |
| "loss": 1.2416179180145264, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.6858359957401491, |
| "grad_norm": 0.46875, |
| "learning_rate": 2.7994223847069417e-05, |
| "loss": 1.2236298322677612, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.6900958466453674, |
| "grad_norm": 1.109375, |
| "learning_rate": 2.7962946941289932e-05, |
| "loss": 1.1898835897445679, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.6943556975505857, |
| "grad_norm": 0.357421875, |
| "learning_rate": 2.7931450661283587e-05, |
| "loss": 1.1595722436904907, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.6986155484558041, |
| "grad_norm": 0.53125, |
| "learning_rate": 2.7899735700567272e-05, |
| "loss": 1.221711277961731, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.7028753993610224, |
| "grad_norm": 0.2275390625, |
| "learning_rate": 2.7867802757473023e-05, |
| "loss": 1.2105400562286377, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.7071352502662407, |
| "grad_norm": 0.30078125, |
| "learning_rate": 2.7835652535132635e-05, |
| "loss": 1.2640867233276367, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.711395101171459, |
| "grad_norm": 0.330078125, |
| "learning_rate": 2.780328574146216e-05, |
| "loss": 1.259413480758667, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.7156549520766773, |
| "grad_norm": 0.4453125, |
| "learning_rate": 2.7770703089146355e-05, |
| "loss": 1.3237056732177734, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.7199148029818956, |
| "grad_norm": 0.400390625, |
| "learning_rate": 2.7737905295622957e-05, |
| "loss": 1.2199316024780273, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.724174653887114, |
| "grad_norm": 0.51953125, |
| "learning_rate": 2.7704893083066906e-05, |
| "loss": 1.1969261169433594, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.7284345047923323, |
| "grad_norm": 0.466796875, |
| "learning_rate": 2.7671667178374443e-05, |
| "loss": 1.2693402767181396, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.7326943556975506, |
| "grad_norm": 0.265625, |
| "learning_rate": 2.7638228313147083e-05, |
| "loss": 1.230875015258789, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.7369542066027689, |
| "grad_norm": 0.375, |
| "learning_rate": 2.760457722367553e-05, |
| "loss": 1.1558018922805786, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.7412140575079872, |
| "grad_norm": 2.40625, |
| "learning_rate": 2.7570714650923446e-05, |
| "loss": 1.3312543630599976, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.7454739084132055, |
| "grad_norm": 0.283203125, |
| "learning_rate": 2.7536641340511177e-05, |
| "loss": 1.1423282623291016, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.7497337593184239, |
| "grad_norm": 0.41796875, |
| "learning_rate": 2.7502358042699257e-05, |
| "loss": 1.1751903295516968, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.7539936102236422, |
| "grad_norm": 0.384765625, |
| "learning_rate": 2.7467865512371974e-05, |
| "loss": 1.2713823318481445, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.7582534611288605, |
| "grad_norm": 0.330078125, |
| "learning_rate": 2.7433164509020684e-05, |
| "loss": 1.2887362241744995, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.7625133120340788, |
| "grad_norm": 0.341796875, |
| "learning_rate": 2.7398255796727127e-05, |
| "loss": 1.2369112968444824, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.7667731629392971, |
| "grad_norm": 0.34375, |
| "learning_rate": 2.7363140144146578e-05, |
| "loss": 1.150454044342041, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.7710330138445154, |
| "grad_norm": 0.275390625, |
| "learning_rate": 2.7327818324490938e-05, |
| "loss": 1.2185767889022827, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.7752928647497338, |
| "grad_norm": 0.6953125, |
| "learning_rate": 2.729229111551171e-05, |
| "loss": 1.2292591333389282, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.7795527156549521, |
| "grad_norm": 0.236328125, |
| "learning_rate": 2.725655929948285e-05, |
| "loss": 1.2185684442520142, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.7838125665601704, |
| "grad_norm": 0.30078125, |
| "learning_rate": 2.722062366318357e-05, |
| "loss": 1.1981046199798584, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.7880724174653887, |
| "grad_norm": 0.2197265625, |
| "learning_rate": 2.7184484997881e-05, |
| "loss": 1.1411432027816772, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.792332268370607, |
| "grad_norm": 0.85546875, |
| "learning_rate": 2.7148144099312765e-05, |
| "loss": 1.2738561630249023, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.7965921192758253, |
| "grad_norm": 0.275390625, |
| "learning_rate": 2.7111601767669473e-05, |
| "loss": 1.1942780017852783, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.8008519701810437, |
| "grad_norm": 0.212890625, |
| "learning_rate": 2.7074858807577084e-05, |
| "loss": 1.1684967279434204, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.805111821086262, |
| "grad_norm": 0.171875, |
| "learning_rate": 2.7037916028079198e-05, |
| "loss": 1.1836313009262085, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.8093716719914803, |
| "grad_norm": 0.365234375, |
| "learning_rate": 2.7000774242619235e-05, |
| "loss": 1.2047457695007324, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.8136315228966986, |
| "grad_norm": 0.2294921875, |
| "learning_rate": 2.696343426902254e-05, |
| "loss": 1.186992883682251, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.8178913738019169, |
| "grad_norm": 0.376953125, |
| "learning_rate": 2.6925896929478355e-05, |
| "loss": 1.1887181997299194, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.8221512247071352, |
| "grad_norm": 0.328125, |
| "learning_rate": 2.6888163050521734e-05, |
| "loss": 1.2181212902069092, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.8264110756123536, |
| "grad_norm": 0.55078125, |
| "learning_rate": 2.6850233463015334e-05, |
| "loss": 1.1820951700210571, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.8306709265175719, |
| "grad_norm": 0.267578125, |
| "learning_rate": 2.6812109002131106e-05, |
| "loss": 1.1575113534927368, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.8349307774227902, |
| "grad_norm": 0.40625, |
| "learning_rate": 2.6773790507331936e-05, |
| "loss": 1.1017088890075684, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.8391906283280085, |
| "grad_norm": 0.310546875, |
| "learning_rate": 2.673527882235314e-05, |
| "loss": 1.1889958381652832, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.8434504792332268, |
| "grad_norm": 0.373046875, |
| "learning_rate": 2.6696574795183882e-05, |
| "loss": 1.1406269073486328, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.8477103301384451, |
| "grad_norm": 0.453125, |
| "learning_rate": 2.665767927804852e-05, |
| "loss": 1.172967791557312, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.8519701810436635, |
| "grad_norm": 0.23046875, |
| "learning_rate": 2.661859312738783e-05, |
| "loss": 1.2290892601013184, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.8562300319488818, |
| "grad_norm": 0.255859375, |
| "learning_rate": 2.6579317203840154e-05, |
| "loss": 1.0655782222747803, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.8604898828541001, |
| "grad_norm": 0.208984375, |
| "learning_rate": 2.6539852372222434e-05, |
| "loss": 1.1730587482452393, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.8647497337593184, |
| "grad_norm": 0.283203125, |
| "learning_rate": 2.6500199501511184e-05, |
| "loss": 1.2667183876037598, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.8690095846645367, |
| "grad_norm": 0.3125, |
| "learning_rate": 2.646035946482336e-05, |
| "loss": 1.2611602544784546, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.873269435569755, |
| "grad_norm": 0.234375, |
| "learning_rate": 2.6420333139397122e-05, |
| "loss": 1.2684861421585083, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.8775292864749734, |
| "grad_norm": 0.2333984375, |
| "learning_rate": 2.638012140657252e-05, |
| "loss": 1.2144488096237183, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.8817891373801917, |
| "grad_norm": 0.44921875, |
| "learning_rate": 2.6339725151772095e-05, |
| "loss": 1.2024558782577515, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.88604898828541, |
| "grad_norm": 0.478515625, |
| "learning_rate": 2.6299145264481386e-05, |
| "loss": 1.2472572326660156, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.8903088391906283, |
| "grad_norm": 0.310546875, |
| "learning_rate": 2.625838263822932e-05, |
| "loss": 1.15989351272583, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.8945686900958466, |
| "grad_norm": 0.236328125, |
| "learning_rate": 2.621743817056858e-05, |
| "loss": 1.2214092016220093, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.898828541001065, |
| "grad_norm": 0.37109375, |
| "learning_rate": 2.6176312763055795e-05, |
| "loss": 1.1031744480133057, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.9030883919062833, |
| "grad_norm": 0.251953125, |
| "learning_rate": 2.6135007321231715e-05, |
| "loss": 1.0990759134292603, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.9073482428115016, |
| "grad_norm": 0.41015625, |
| "learning_rate": 2.6093522754601284e-05, |
| "loss": 1.180249810218811, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.9116080937167199, |
| "grad_norm": 0.185546875, |
| "learning_rate": 2.6051859976613564e-05, |
| "loss": 1.1679967641830444, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.9158679446219382, |
| "grad_norm": 0.294921875, |
| "learning_rate": 2.601001990464169e-05, |
| "loss": 1.1675636768341064, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.9201277955271565, |
| "grad_norm": 0.93359375, |
| "learning_rate": 2.5968003459962608e-05, |
| "loss": 1.187214732170105, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.9243876464323749, |
| "grad_norm": 0.23828125, |
| "learning_rate": 2.592581156773684e-05, |
| "loss": 1.1574485301971436, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.9286474973375932, |
| "grad_norm": 0.33203125, |
| "learning_rate": 2.588344515698806e-05, |
| "loss": 1.207824468612671, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.9329073482428115, |
| "grad_norm": 0.275390625, |
| "learning_rate": 2.58409051605827e-05, |
| "loss": 1.160508155822754, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.9371671991480298, |
| "grad_norm": 0.3125, |
| "learning_rate": 2.5798192515209343e-05, |
| "loss": 1.1380846500396729, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.9414270500532481, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 2.5755308161358166e-05, |
| "loss": 1.1430374383926392, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.9456869009584664, |
| "grad_norm": 1.0625, |
| "learning_rate": 2.5712253043300174e-05, |
| "loss": 1.1965644359588623, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.9499467518636848, |
| "grad_norm": 0.302734375, |
| "learning_rate": 2.5669028109066426e-05, |
| "loss": 1.2050869464874268, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.9542066027689031, |
| "grad_norm": 0.380859375, |
| "learning_rate": 2.5625634310427188e-05, |
| "loss": 1.1945817470550537, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.9584664536741214, |
| "grad_norm": 0.275390625, |
| "learning_rate": 2.558207260287093e-05, |
| "loss": 1.1947966814041138, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.9627263045793397, |
| "grad_norm": 0.279296875, |
| "learning_rate": 2.553834394558332e-05, |
| "loss": 1.134352445602417, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.966986155484558, |
| "grad_norm": 1.0859375, |
| "learning_rate": 2.5494449301426102e-05, |
| "loss": 1.2251217365264893, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.9712460063897763, |
| "grad_norm": 0.2177734375, |
| "learning_rate": 2.5450389636915867e-05, |
| "loss": 1.081860899925232, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.9755058572949947, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 2.540616592220281e-05, |
| "loss": 1.182367205619812, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.979765708200213, |
| "grad_norm": 0.2451171875, |
| "learning_rate": 2.5361779131049344e-05, |
| "loss": 1.158174991607666, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.9840255591054313, |
| "grad_norm": 0.2001953125, |
| "learning_rate": 2.5317230240808656e-05, |
| "loss": 1.1436811685562134, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.9882854100106496, |
| "grad_norm": 0.31640625, |
| "learning_rate": 2.527252023240319e-05, |
| "loss": 1.1009982824325562, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.9925452609158679, |
| "grad_norm": 0.39453125, |
| "learning_rate": 2.5227650090303083e-05, |
| "loss": 1.2242732048034668, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.9968051118210862, |
| "grad_norm": 0.255859375, |
| "learning_rate": 2.5182620802504415e-05, |
| "loss": 1.1412031650543213, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.453125, |
| "learning_rate": 2.513743336050753e-05, |
| "loss": 1.3368866443634033, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.0042598509052183, |
| "grad_norm": 0.23828125, |
| "learning_rate": 2.5092088759295147e-05, |
| "loss": 0.9358726739883423, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.0085197018104366, |
| "grad_norm": 0.189453125, |
| "learning_rate": 2.5046587997310503e-05, |
| "loss": 0.9842238426208496, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.012779552715655, |
| "grad_norm": 0.2314453125, |
| "learning_rate": 2.500093207643532e-05, |
| "loss": 0.909864068031311, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.0170394036208732, |
| "grad_norm": 0.248046875, |
| "learning_rate": 2.4955122001967757e-05, |
| "loss": 0.8217376470565796, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.0212992545260915, |
| "grad_norm": 0.2236328125, |
| "learning_rate": 2.4909158782600303e-05, |
| "loss": 0.9412868618965149, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.0255591054313098, |
| "grad_norm": 0.423828125, |
| "learning_rate": 2.4863043430397546e-05, |
| "loss": 0.9232436418533325, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.0298189563365283, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 2.481677696077387e-05, |
| "loss": 0.9075867533683777, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.0340788072417466, |
| "grad_norm": 0.3203125, |
| "learning_rate": 2.477036039247113e-05, |
| "loss": 0.9229554533958435, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.038338658146965, |
| "grad_norm": 0.28515625, |
| "learning_rate": 2.4723794747536204e-05, |
| "loss": 0.8909753561019897, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.0425985090521832, |
| "grad_norm": 0.2578125, |
| "learning_rate": 2.4677081051298473e-05, |
| "loss": 0.8516156077384949, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.0468583599574015, |
| "grad_norm": 0.4375, |
| "learning_rate": 2.4630220332347293e-05, |
| "loss": 0.910189151763916, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.0511182108626198, |
| "grad_norm": 0.2314453125, |
| "learning_rate": 2.458321362250928e-05, |
| "loss": 0.8809674978256226, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.055378061767838, |
| "grad_norm": 0.337890625, |
| "learning_rate": 2.4536061956825653e-05, |
| "loss": 0.9545248746871948, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.0596379126730564, |
| "grad_norm": 0.359375, |
| "learning_rate": 2.44887663735294e-05, |
| "loss": 0.8128166794776917, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.0638977635782747, |
| "grad_norm": 0.271484375, |
| "learning_rate": 2.4441327914022435e-05, |
| "loss": 0.7933678030967712, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.068157614483493, |
| "grad_norm": 0.5859375, |
| "learning_rate": 2.4393747622852666e-05, |
| "loss": 0.845329761505127, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.0724174653887113, |
| "grad_norm": 0.291015625, |
| "learning_rate": 2.4346026547690983e-05, |
| "loss": 0.8825768232345581, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.0766773162939298, |
| "grad_norm": 0.322265625, |
| "learning_rate": 2.4298165739308227e-05, |
| "loss": 0.9173828959465027, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.0809371671991481, |
| "grad_norm": 0.625, |
| "learning_rate": 2.4250166251551998e-05, |
| "loss": 0.9571421146392822, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.0851970181043664, |
| "grad_norm": 0.29296875, |
| "learning_rate": 2.4202029141323492e-05, |
| "loss": 0.8474833369255066, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.0894568690095847, |
| "grad_norm": 0.34765625, |
| "learning_rate": 2.415375546855422e-05, |
| "loss": 0.8801344633102417, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.093716719914803, |
| "grad_norm": 0.33984375, |
| "learning_rate": 2.4105346296182648e-05, |
| "loss": 0.8761341571807861, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.0979765708200213, |
| "grad_norm": 0.921875, |
| "learning_rate": 2.4056802690130826e-05, |
| "loss": 0.8511140942573547, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.1022364217252396, |
| "grad_norm": 0.2470703125, |
| "learning_rate": 2.4008125719280893e-05, |
| "loss": 0.8243319392204285, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.106496272630458, |
| "grad_norm": 0.2353515625, |
| "learning_rate": 2.395931645545155e-05, |
| "loss": 0.9023821949958801, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.1107561235356762, |
| "grad_norm": 0.5859375, |
| "learning_rate": 2.391037597337446e-05, |
| "loss": 0.8977804183959961, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.1150159744408945, |
| "grad_norm": 0.271484375, |
| "learning_rate": 2.3861305350670564e-05, |
| "loss": 0.8644490242004395, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.1192758253461128, |
| "grad_norm": 0.220703125, |
| "learning_rate": 2.381210566782642e-05, |
| "loss": 0.8652825951576233, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.123535676251331, |
| "grad_norm": 0.48828125, |
| "learning_rate": 2.3762778008170296e-05, |
| "loss": 0.9315000176429749, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.1277955271565494, |
| "grad_norm": 0.298828125, |
| "learning_rate": 2.3713323457848425e-05, |
| "loss": 0.8627546429634094, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.132055378061768, |
| "grad_norm": 0.3515625, |
| "learning_rate": 2.366374310580106e-05, |
| "loss": 0.8466436266899109, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.1363152289669862, |
| "grad_norm": 0.234375, |
| "learning_rate": 2.3614038043738432e-05, |
| "loss": 0.8433495163917542, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.1405750798722045, |
| "grad_norm": 0.671875, |
| "learning_rate": 2.35642093661168e-05, |
| "loss": 0.9653686285018921, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.1448349307774228, |
| "grad_norm": 0.46484375, |
| "learning_rate": 2.351425817011432e-05, |
| "loss": 0.9155454039573669, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.1490947816826411, |
| "grad_norm": 0.2333984375, |
| "learning_rate": 2.3464185555606854e-05, |
| "loss": 0.8044310212135315, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.1533546325878594, |
| "grad_norm": 0.2451171875, |
| "learning_rate": 2.3413992625143808e-05, |
| "loss": 0.8448784947395325, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.1576144834930777, |
| "grad_norm": 0.51171875, |
| "learning_rate": 2.3363680483923794e-05, |
| "loss": 0.9145954251289368, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.161874334398296, |
| "grad_norm": 0.251953125, |
| "learning_rate": 2.3313250239770364e-05, |
| "loss": 0.8059402108192444, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.1661341853035143, |
| "grad_norm": 0.32421875, |
| "learning_rate": 2.326270300310756e-05, |
| "loss": 0.910370945930481, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.1703940362087326, |
| "grad_norm": 0.2060546875, |
| "learning_rate": 2.3212039886935464e-05, |
| "loss": 0.8459041118621826, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.174653887113951, |
| "grad_norm": 0.23046875, |
| "learning_rate": 2.3161262006805744e-05, |
| "loss": 0.8679651618003845, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.1789137380191694, |
| "grad_norm": 0.353515625, |
| "learning_rate": 2.3110370480797046e-05, |
| "loss": 0.8998923897743225, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.1831735889243877, |
| "grad_norm": 0.380859375, |
| "learning_rate": 2.3059366429490382e-05, |
| "loss": 0.9410486817359924, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.187433439829606, |
| "grad_norm": 0.294921875, |
| "learning_rate": 2.3008250975944458e-05, |
| "loss": 0.8485605120658875, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.1916932907348243, |
| "grad_norm": 0.240234375, |
| "learning_rate": 2.2957025245670945e-05, |
| "loss": 0.8777744770050049, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.1959531416400426, |
| "grad_norm": 0.40234375, |
| "learning_rate": 2.2905690366609703e-05, |
| "loss": 0.9006752967834473, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.200212992545261, |
| "grad_norm": 0.3828125, |
| "learning_rate": 2.2854247469103943e-05, |
| "loss": 0.8309807181358337, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.2044728434504792, |
| "grad_norm": 0.26953125, |
| "learning_rate": 2.280269768587534e-05, |
| "loss": 0.9057250618934631, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.2087326943556975, |
| "grad_norm": 0.2080078125, |
| "learning_rate": 2.2751042151999064e-05, |
| "loss": 0.829549252986908, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.2129925452609158, |
| "grad_norm": 0.2734375, |
| "learning_rate": 2.2699282004878834e-05, |
| "loss": 0.9091805219650269, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.2172523961661341, |
| "grad_norm": 0.2490234375, |
| "learning_rate": 2.264741838422183e-05, |
| "loss": 0.8178958296775818, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.2215122470713524, |
| "grad_norm": 0.62109375, |
| "learning_rate": 2.2595452432013637e-05, |
| "loss": 0.9319694638252258, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.225772097976571, |
| "grad_norm": 0.255859375, |
| "learning_rate": 2.2543385292493068e-05, |
| "loss": 0.8054318428039551, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.230031948881789, |
| "grad_norm": 0.37890625, |
| "learning_rate": 2.2491218112126974e-05, |
| "loss": 0.8717759847640991, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.2342917997870075, |
| "grad_norm": 0.28515625, |
| "learning_rate": 2.2438952039585023e-05, |
| "loss": 0.9084351062774658, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.2385516506922258, |
| "grad_norm": 0.25390625, |
| "learning_rate": 2.238658822571437e-05, |
| "loss": 0.877246618270874, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.2428115015974441, |
| "grad_norm": 0.380859375, |
| "learning_rate": 2.2334127823514353e-05, |
| "loss": 0.8917878866195679, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.2470713525026624, |
| "grad_norm": 0.2392578125, |
| "learning_rate": 2.2281571988111087e-05, |
| "loss": 0.9018102884292603, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.2513312034078807, |
| "grad_norm": 0.181640625, |
| "learning_rate": 2.222892187673203e-05, |
| "loss": 0.8929234147071838, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.255591054313099, |
| "grad_norm": 0.291015625, |
| "learning_rate": 2.2176178648680504e-05, |
| "loss": 0.9248031973838806, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.2598509052183173, |
| "grad_norm": 0.275390625, |
| "learning_rate": 2.2123343465310163e-05, |
| "loss": 0.9204663038253784, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.2641107561235356, |
| "grad_norm": 0.2021484375, |
| "learning_rate": 2.2070417489999427e-05, |
| "loss": 0.8040061593055725, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.268370607028754, |
| "grad_norm": 0.349609375, |
| "learning_rate": 2.201740188812588e-05, |
| "loss": 0.9146944880485535, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.2726304579339724, |
| "grad_norm": 0.2578125, |
| "learning_rate": 2.196429782704057e-05, |
| "loss": 0.8526248931884766, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.2768903088391905, |
| "grad_norm": 0.328125, |
| "learning_rate": 2.191110647604235e-05, |
| "loss": 0.8366101384162903, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.281150159744409, |
| "grad_norm": 0.2333984375, |
| "learning_rate": 2.1857829006352092e-05, |
| "loss": 0.8716267347335815, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.2854100106496273, |
| "grad_norm": 0.333984375, |
| "learning_rate": 2.180446659108693e-05, |
| "loss": 0.9040926694869995, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.2896698615548456, |
| "grad_norm": 0.251953125, |
| "learning_rate": 2.1751020405234427e-05, |
| "loss": 0.8583382368087769, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.293929712460064, |
| "grad_norm": 0.314453125, |
| "learning_rate": 2.1697491625626652e-05, |
| "loss": 0.8685941696166992, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.2981895633652822, |
| "grad_norm": 0.21484375, |
| "learning_rate": 2.1643881430914343e-05, |
| "loss": 0.8654310703277588, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.3024494142705005, |
| "grad_norm": 0.2265625, |
| "learning_rate": 2.1590191001540903e-05, |
| "loss": 0.8943390846252441, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.3067092651757188, |
| "grad_norm": 0.26171875, |
| "learning_rate": 2.153642151971643e-05, |
| "loss": 0.8576252460479736, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.3109691160809371, |
| "grad_norm": 0.4375, |
| "learning_rate": 2.1482574169391664e-05, |
| "loss": 0.8761968612670898, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.3152289669861554, |
| "grad_norm": 0.232421875, |
| "learning_rate": 2.1428650136231948e-05, |
| "loss": 0.8207455277442932, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.3194888178913737, |
| "grad_norm": 0.22265625, |
| "learning_rate": 2.1374650607591106e-05, |
| "loss": 0.8694437742233276, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.323748668796592, |
| "grad_norm": 0.408203125, |
| "learning_rate": 2.1320576772485284e-05, |
| "loss": 0.872995138168335, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.3280085197018106, |
| "grad_norm": 0.30859375, |
| "learning_rate": 2.126642982156679e-05, |
| "loss": 0.9666632413864136, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.3322683706070286, |
| "grad_norm": 0.3828125, |
| "learning_rate": 2.1212210947097873e-05, |
| "loss": 0.8025370836257935, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.3365282215122471, |
| "grad_norm": 0.349609375, |
| "learning_rate": 2.1157921342924457e-05, |
| "loss": 0.8531129956245422, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.3407880724174654, |
| "grad_norm": 0.2734375, |
| "learning_rate": 2.1103562204449876e-05, |
| "loss": 0.8310921788215637, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.3450479233226837, |
| "grad_norm": 0.291015625, |
| "learning_rate": 2.1049134728608537e-05, |
| "loss": 0.903289794921875, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.349307774227902, |
| "grad_norm": 0.28125, |
| "learning_rate": 2.0994640113839568e-05, |
| "loss": 0.8707770705223083, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.3535676251331203, |
| "grad_norm": 0.267578125, |
| "learning_rate": 2.0940079560060427e-05, |
| "loss": 0.8999609351158142, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.3578274760383386, |
| "grad_norm": 0.2119140625, |
| "learning_rate": 2.088545426864048e-05, |
| "loss": 0.8670209646224976, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.362087326943557, |
| "grad_norm": 0.349609375, |
| "learning_rate": 2.0830765442374563e-05, |
| "loss": 0.8102102279663086, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.3663471778487752, |
| "grad_norm": 0.2041015625, |
| "learning_rate": 2.077601428545648e-05, |
| "loss": 0.8202542662620544, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.3706070287539935, |
| "grad_norm": 0.234375, |
| "learning_rate": 2.0721202003452496e-05, |
| "loss": 0.8944796323776245, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.374866879659212, |
| "grad_norm": 0.208984375, |
| "learning_rate": 2.066632980327478e-05, |
| "loss": 0.9467480778694153, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.3791267305644301, |
| "grad_norm": 0.482421875, |
| "learning_rate": 2.061139889315486e-05, |
| "loss": 0.8729652762413025, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.3833865814696487, |
| "grad_norm": 0.275390625, |
| "learning_rate": 2.0556410482616977e-05, |
| "loss": 0.8954660892486572, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.387646432374867, |
| "grad_norm": 0.2734375, |
| "learning_rate": 2.050136578245149e-05, |
| "loss": 0.870725691318512, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.3919062832800853, |
| "grad_norm": 0.251953125, |
| "learning_rate": 2.0446266004688197e-05, |
| "loss": 0.8651110529899597, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.3961661341853036, |
| "grad_norm": 0.240234375, |
| "learning_rate": 2.039111236256964e-05, |
| "loss": 0.8937119841575623, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.4004259850905219, |
| "grad_norm": 0.2333984375, |
| "learning_rate": 2.0335906070524416e-05, |
| "loss": 0.8803120851516724, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.4046858359957402, |
| "grad_norm": 0.2236328125, |
| "learning_rate": 2.02806483441404e-05, |
| "loss": 0.8514755368232727, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.4089456869009584, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 2.0225340400138033e-05, |
| "loss": 0.8654860258102417, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.4132055378061767, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 2.0169983456343464e-05, |
| "loss": 0.861249566078186, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.417465388711395, |
| "grad_norm": 0.48046875, |
| "learning_rate": 2.011457873166179e-05, |
| "loss": 0.8996407389640808, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.4217252396166133, |
| "grad_norm": 0.390625, |
| "learning_rate": 2.005912744605019e-05, |
| "loss": 0.822201132774353, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.4259850905218316, |
| "grad_norm": 0.294921875, |
| "learning_rate": 2.0003630820491066e-05, |
| "loss": 0.8432199358940125, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.4302449414270502, |
| "grad_norm": 0.263671875, |
| "learning_rate": 1.9948090076965163e-05, |
| "loss": 0.8672274351119995, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.4345047923322682, |
| "grad_norm": 0.25, |
| "learning_rate": 1.9892506438424666e-05, |
| "loss": 0.8486787676811218, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.4387646432374868, |
| "grad_norm": 0.279296875, |
| "learning_rate": 1.9836881128766248e-05, |
| "loss": 0.8892148733139038, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.443024494142705, |
| "grad_norm": 0.27734375, |
| "learning_rate": 1.9781215372804158e-05, |
| "loss": 0.8915472030639648, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.4472843450479234, |
| "grad_norm": 0.3203125, |
| "learning_rate": 1.9725510396243226e-05, |
| "loss": 0.8767306804656982, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.4515441959531417, |
| "grad_norm": 0.421875, |
| "learning_rate": 1.9669767425651873e-05, |
| "loss": 1.0251777172088623, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.45580404685836, |
| "grad_norm": 0.32421875, |
| "learning_rate": 1.9613987688435132e-05, |
| "loss": 0.8821164965629578, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.4600638977635783, |
| "grad_norm": 0.2099609375, |
| "learning_rate": 1.955817241280757e-05, |
| "loss": 0.8836470246315002, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.4643237486687966, |
| "grad_norm": 0.173828125, |
| "learning_rate": 1.9502322827766297e-05, |
| "loss": 0.9067674279212952, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.4685835995740149, |
| "grad_norm": 0.2294921875, |
| "learning_rate": 1.9446440163063875e-05, |
| "loss": 0.9052207469940186, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.4728434504792332, |
| "grad_norm": 0.271484375, |
| "learning_rate": 1.939052564918126e-05, |
| "loss": 0.8458245396614075, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.4771033013844517, |
| "grad_norm": 0.328125, |
| "learning_rate": 1.9334580517300668e-05, |
| "loss": 0.9541709423065186, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.4813631522896697, |
| "grad_norm": 0.2255859375, |
| "learning_rate": 1.9278605999278513e-05, |
| "loss": 0.9391557574272156, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.4856230031948883, |
| "grad_norm": 0.18359375, |
| "learning_rate": 1.922260332761827e-05, |
| "loss": 0.9119634628295898, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.4898828541001066, |
| "grad_norm": 0.3671875, |
| "learning_rate": 1.9166573735443302e-05, |
| "loss": 0.872115433216095, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.4941427050053249, |
| "grad_norm": 2.609375, |
| "learning_rate": 1.9110518456469764e-05, |
| "loss": 0.9491547346115112, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.4984025559105432, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 1.905443872497939e-05, |
| "loss": 0.8039662837982178, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.5026624068157615, |
| "grad_norm": 0.26953125, |
| "learning_rate": 1.8998335775792343e-05, |
| "loss": 0.8376708030700684, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.5069222577209798, |
| "grad_norm": 0.2275390625, |
| "learning_rate": 1.894221084424001e-05, |
| "loss": 0.8669439554214478, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.511182108626198, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 1.888606516613781e-05, |
| "loss": 0.8526804447174072, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.5154419595314164, |
| "grad_norm": 0.2060546875, |
| "learning_rate": 1.8829899977757996e-05, |
| "loss": 0.838132381439209, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.5197018104366347, |
| "grad_norm": 0.294921875, |
| "learning_rate": 1.8773716515802387e-05, |
| "loss": 0.9030261635780334, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.5239616613418532, |
| "grad_norm": 0.298828125, |
| "learning_rate": 1.8717516017375192e-05, |
| "loss": 0.8684689998626709, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.5282215122470713, |
| "grad_norm": 0.208984375, |
| "learning_rate": 1.866129971995575e-05, |
| "loss": 0.950151264667511, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.5324813631522898, |
| "grad_norm": 0.4765625, |
| "learning_rate": 1.8605068861371255e-05, |
| "loss": 0.9864886403083801, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.5367412140575079, |
| "grad_norm": 0.2421875, |
| "learning_rate": 1.8548824679769538e-05, |
| "loss": 0.9203893542289734, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.5410010649627264, |
| "grad_norm": 0.2490234375, |
| "learning_rate": 1.8492568413591787e-05, |
| "loss": 0.8589147329330444, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.5452609158679447, |
| "grad_norm": 0.2265625, |
| "learning_rate": 1.8436301301545282e-05, |
| "loss": 0.7150123119354248, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.549520766773163, |
| "grad_norm": 0.1796875, |
| "learning_rate": 1.8380024582576128e-05, |
| "loss": 0.843291163444519, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.5537806176783813, |
| "grad_norm": 0.267578125, |
| "learning_rate": 1.8323739495841943e-05, |
| "loss": 0.8748659491539001, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.5580404685835996, |
| "grad_norm": 0.2314453125, |
| "learning_rate": 1.8267447280684607e-05, |
| "loss": 0.8816359043121338, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.5623003194888179, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 1.8211149176602964e-05, |
| "loss": 0.9086512923240662, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.5665601703940362, |
| "grad_norm": 0.19140625, |
| "learning_rate": 1.8154846423225515e-05, |
| "loss": 0.9282605648040771, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.5708200212992547, |
| "grad_norm": 0.486328125, |
| "learning_rate": 1.8098540260283158e-05, |
| "loss": 0.8508008122444153, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.5750798722044728, |
| "grad_norm": 0.2021484375, |
| "learning_rate": 1.8042231927581833e-05, |
| "loss": 0.7999932169914246, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.5793397231096913, |
| "grad_norm": 0.341796875, |
| "learning_rate": 1.7985922664975274e-05, |
| "loss": 0.9391716718673706, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.5835995740149094, |
| "grad_norm": 0.201171875, |
| "learning_rate": 1.79296137123377e-05, |
| "loss": 0.8545106649398804, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.5878594249201279, |
| "grad_norm": 0.197265625, |
| "learning_rate": 1.7873306309536485e-05, |
| "loss": 0.8491992950439453, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.592119275825346, |
| "grad_norm": 0.306640625, |
| "learning_rate": 1.7817001696404894e-05, |
| "loss": 0.8515585064888, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.5963791267305645, |
| "grad_norm": 0.212890625, |
| "learning_rate": 1.7760701112714742e-05, |
| "loss": 0.8558241128921509, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.6006389776357828, |
| "grad_norm": 0.28125, |
| "learning_rate": 1.7704405798149154e-05, |
| "loss": 0.8748922944068909, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.604898828541001, |
| "grad_norm": 0.45703125, |
| "learning_rate": 1.764811699227521e-05, |
| "loss": 0.881086528301239, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.6091586794462194, |
| "grad_norm": 0.2373046875, |
| "learning_rate": 1.7591835934516677e-05, |
| "loss": 0.8601434230804443, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.6134185303514377, |
| "grad_norm": 0.27734375, |
| "learning_rate": 1.7535563864126723e-05, |
| "loss": 0.925481915473938, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.617678381256656, |
| "grad_norm": 0.224609375, |
| "learning_rate": 1.7479302020160627e-05, |
| "loss": 0.8856874108314514, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.6219382321618743, |
| "grad_norm": 0.6875, |
| "learning_rate": 1.7423051641448478e-05, |
| "loss": 0.9088162779808044, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.6261980830670928, |
| "grad_norm": 0.2734375, |
| "learning_rate": 1.7366813966567914e-05, |
| "loss": 0.7893877029418945, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.6304579339723109, |
| "grad_norm": 0.3046875, |
| "learning_rate": 1.7310590233816868e-05, |
| "loss": 0.8651562929153442, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.6347177848775294, |
| "grad_norm": 0.2470703125, |
| "learning_rate": 1.7254381681186248e-05, |
| "loss": 0.8518175482749939, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.6389776357827475, |
| "grad_norm": 0.306640625, |
| "learning_rate": 1.7198189546332738e-05, |
| "loss": 0.8798878192901611, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.643237486687966, |
| "grad_norm": 0.248046875, |
| "learning_rate": 1.7142015066551515e-05, |
| "loss": 0.815255343914032, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.6474973375931843, |
| "grad_norm": 0.2373046875, |
| "learning_rate": 1.7085859478748988e-05, |
| "loss": 0.936029314994812, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.6517571884984026, |
| "grad_norm": 0.2060546875, |
| "learning_rate": 1.7029724019415604e-05, |
| "loss": 0.9097844362258911, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.6560170394036209, |
| "grad_norm": 0.29296875, |
| "learning_rate": 1.6973609924598605e-05, |
| "loss": 0.8360726833343506, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.6602768903088392, |
| "grad_norm": 0.31640625, |
| "learning_rate": 1.691751842987478e-05, |
| "loss": 0.7691276669502258, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.6645367412140575, |
| "grad_norm": 0.412109375, |
| "learning_rate": 1.6861450770323317e-05, |
| "loss": 0.9032488465309143, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.6687965921192758, |
| "grad_norm": 0.30859375, |
| "learning_rate": 1.680540818049856e-05, |
| "loss": 0.8317678570747375, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.6730564430244943, |
| "grad_norm": 0.455078125, |
| "learning_rate": 1.674939189440285e-05, |
| "loss": 0.8583813905715942, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.6773162939297124, |
| "grad_norm": 0.2314453125, |
| "learning_rate": 1.6693403145459335e-05, |
| "loss": 0.8612514138221741, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.681576144834931, |
| "grad_norm": 0.19921875, |
| "learning_rate": 1.6637443166484836e-05, |
| "loss": 0.8975757360458374, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.685835995740149, |
| "grad_norm": 0.2734375, |
| "learning_rate": 1.6581513189662684e-05, |
| "loss": 0.8868735432624817, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.6900958466453675, |
| "grad_norm": 0.251953125, |
| "learning_rate": 1.652561444651558e-05, |
| "loss": 0.887550950050354, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.6943556975505856, |
| "grad_norm": 0.25, |
| "learning_rate": 1.6469748167878502e-05, |
| "loss": 0.8832526803016663, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.698615548455804, |
| "grad_norm": 0.203125, |
| "learning_rate": 1.64139155838716e-05, |
| "loss": 0.8911911845207214, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.7028753993610224, |
| "grad_norm": 0.181640625, |
| "learning_rate": 1.635811792387308e-05, |
| "loss": 0.8105019927024841, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.7071352502662407, |
| "grad_norm": 0.337890625, |
| "learning_rate": 1.630235641649217e-05, |
| "loss": 0.8116901516914368, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.711395101171459, |
| "grad_norm": 0.2353515625, |
| "learning_rate": 1.6246632289542054e-05, |
| "loss": 0.936326801776886, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.7156549520766773, |
| "grad_norm": 0.251953125, |
| "learning_rate": 1.6190946770012838e-05, |
| "loss": 0.7342237234115601, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.7199148029818956, |
| "grad_norm": 0.314453125, |
| "learning_rate": 1.613530108404451e-05, |
| "loss": 0.8804312944412231, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.7241746538871139, |
| "grad_norm": 0.255859375, |
| "learning_rate": 1.6079696456899987e-05, |
| "loss": 0.900128960609436, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.7284345047923324, |
| "grad_norm": 0.2353515625, |
| "learning_rate": 1.6024134112938102e-05, |
| "loss": 0.9259054660797119, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.7326943556975505, |
| "grad_norm": 0.353515625, |
| "learning_rate": 1.5968615275586648e-05, |
| "loss": 0.7679681777954102, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.736954206602769, |
| "grad_norm": 0.267578125, |
| "learning_rate": 1.5913141167315455e-05, |
| "loss": 0.8207501173019409, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.741214057507987, |
| "grad_norm": 0.69140625, |
| "learning_rate": 1.5857713009609468e-05, |
| "loss": 0.8840711116790771, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.7454739084132056, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.5802332022941827e-05, |
| "loss": 0.87161785364151, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.749733759318424, |
| "grad_norm": 0.1552734375, |
| "learning_rate": 1.5746999426747028e-05, |
| "loss": 0.8653435111045837, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.7539936102236422, |
| "grad_norm": 0.39453125, |
| "learning_rate": 1.5691716439394043e-05, |
| "loss": 0.8810278177261353, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.7582534611288605, |
| "grad_norm": 0.29296875, |
| "learning_rate": 1.563648427815953e-05, |
| "loss": 0.8902249336242676, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.7625133120340788, |
| "grad_norm": 0.32421875, |
| "learning_rate": 1.558130415920098e-05, |
| "loss": 0.8972048163414001, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.766773162939297, |
| "grad_norm": 0.2412109375, |
| "learning_rate": 1.552617729752998e-05, |
| "loss": 0.8320347666740417, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.7710330138445154, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 1.5471104906985447e-05, |
| "loss": 0.8805668354034424, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.775292864749734, |
| "grad_norm": 0.146484375, |
| "learning_rate": 1.5416088200206873e-05, |
| "loss": 0.8669639229774475, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.779552715654952, |
| "grad_norm": 0.2412109375, |
| "learning_rate": 1.5361128388607685e-05, |
| "loss": 0.8641019463539124, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.7838125665601705, |
| "grad_norm": 0.2080078125, |
| "learning_rate": 1.5306226682348513e-05, |
| "loss": 0.8257539868354797, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.7880724174653886, |
| "grad_norm": 0.2353515625, |
| "learning_rate": 1.525138429031056e-05, |
| "loss": 0.8225594758987427, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.792332268370607, |
| "grad_norm": 0.2734375, |
| "learning_rate": 1.5196602420068995e-05, |
| "loss": 0.8701678514480591, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.7965921192758252, |
| "grad_norm": 0.28515625, |
| "learning_rate": 1.514188227786637e-05, |
| "loss": 0.8979432582855225, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.8008519701810437, |
| "grad_norm": 0.26171875, |
| "learning_rate": 1.5087225068586032e-05, |
| "loss": 0.8577451109886169, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.805111821086262, |
| "grad_norm": 0.375, |
| "learning_rate": 1.5032631995725602e-05, |
| "loss": 0.7677904367446899, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.8093716719914803, |
| "grad_norm": 0.2138671875, |
| "learning_rate": 1.4978104261370499e-05, |
| "loss": 0.8740429878234863, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.8136315228966986, |
| "grad_norm": 0.2080078125, |
| "learning_rate": 1.4923643066167442e-05, |
| "loss": 0.8772373795509338, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.817891373801917, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 1.4869249609298016e-05, |
| "loss": 0.8475224375724792, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.8221512247071352, |
| "grad_norm": 0.2294921875, |
| "learning_rate": 1.4814925088452294e-05, |
| "loss": 0.8336386680603027, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.8264110756123535, |
| "grad_norm": 0.26171875, |
| "learning_rate": 1.4760670699802433e-05, |
| "loss": 0.8594604730606079, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.830670926517572, |
| "grad_norm": 0.345703125, |
| "learning_rate": 1.4706487637976349e-05, |
| "loss": 0.8947794437408447, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.83493077742279, |
| "grad_norm": 0.181640625, |
| "learning_rate": 1.4652377096031413e-05, |
| "loss": 0.8090410828590393, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.8391906283280086, |
| "grad_norm": 0.255859375, |
| "learning_rate": 1.4598340265428186e-05, |
| "loss": 0.8447999954223633, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.8434504792332267, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 1.4544378336004174e-05, |
| "loss": 0.8753990530967712, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.8477103301384452, |
| "grad_norm": 0.291015625, |
| "learning_rate": 1.4490492495947626e-05, |
| "loss": 0.8337631225585938, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.8519701810436635, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.4436683931771386e-05, |
| "loss": 0.8855006098747253, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.8562300319488818, |
| "grad_norm": 0.25, |
| "learning_rate": 1.4382953828286769e-05, |
| "loss": 0.8446431756019592, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.8604898828541, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 1.4329303368577442e-05, |
| "loss": 0.9195294976234436, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.8647497337593184, |
| "grad_norm": 0.2197265625, |
| "learning_rate": 1.4275733733973408e-05, |
| "loss": 0.8846089243888855, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.8690095846645367, |
| "grad_norm": 0.310546875, |
| "learning_rate": 1.4222246104024985e-05, |
| "loss": 0.8711283802986145, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.873269435569755, |
| "grad_norm": 0.2421875, |
| "learning_rate": 1.4168841656476817e-05, |
| "loss": 0.8777478337287903, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.8775292864749735, |
| "grad_norm": 0.2060546875, |
| "learning_rate": 1.411552156724196e-05, |
| "loss": 0.9211516976356506, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.8817891373801916, |
| "grad_norm": 0.2060546875, |
| "learning_rate": 1.4062287010375991e-05, |
| "loss": 0.7991109490394592, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.8860489882854101, |
| "grad_norm": 0.267578125, |
| "learning_rate": 1.4009139158051142e-05, |
| "loss": 0.7523772120475769, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.8903088391906282, |
| "grad_norm": 0.306640625, |
| "learning_rate": 1.3956079180530488e-05, |
| "loss": 0.8029102087020874, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.8945686900958467, |
| "grad_norm": 1.09375, |
| "learning_rate": 1.3903108246142204e-05, |
| "loss": 0.9185020923614502, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.898828541001065, |
| "grad_norm": 0.185546875, |
| "learning_rate": 1.3850227521253819e-05, |
| "loss": 0.8490954041481018, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.9030883919062833, |
| "grad_norm": 0.2490234375, |
| "learning_rate": 1.379743817024653e-05, |
| "loss": 0.9293335676193237, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.9073482428115016, |
| "grad_norm": 0.26953125, |
| "learning_rate": 1.3744741355489573e-05, |
| "loss": 0.83982253074646, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.91160809371672, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.3692138237314642e-05, |
| "loss": 0.8462101817131042, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.9158679446219382, |
| "grad_norm": 0.296875, |
| "learning_rate": 1.3639629973990308e-05, |
| "loss": 0.8812525272369385, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.9201277955271565, |
| "grad_norm": 0.2021484375, |
| "learning_rate": 1.3587217721696534e-05, |
| "loss": 0.8216854929924011, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.924387646432375, |
| "grad_norm": 0.267578125, |
| "learning_rate": 1.3534902634499233e-05, |
| "loss": 0.8462478518486023, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.928647497337593, |
| "grad_norm": 0.2099609375, |
| "learning_rate": 1.3482685864324816e-05, |
| "loss": 0.8769442439079285, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.9329073482428116, |
| "grad_norm": 0.2890625, |
| "learning_rate": 1.3430568560934854e-05, |
| "loss": 0.8453910946846008, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.9371671991480297, |
| "grad_norm": 0.236328125, |
| "learning_rate": 1.3378551871900778e-05, |
| "loss": 0.7549237012863159, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.9414270500532482, |
| "grad_norm": 0.2734375, |
| "learning_rate": 1.332663694257857e-05, |
| "loss": 0.8484979867935181, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.9456869009584663, |
| "grad_norm": 0.33203125, |
| "learning_rate": 1.3274824916083569e-05, |
| "loss": 0.8290879130363464, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.9499467518636848, |
| "grad_norm": 0.314453125, |
| "learning_rate": 1.3223116933265295e-05, |
| "loss": 0.880619466304779, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.9542066027689031, |
| "grad_norm": 0.205078125, |
| "learning_rate": 1.3171514132682338e-05, |
| "loss": 0.8705392479896545, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.9584664536741214, |
| "grad_norm": 0.2333984375, |
| "learning_rate": 1.3120017650577267e-05, |
| "loss": 0.849368691444397, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.9627263045793397, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.3068628620851627e-05, |
| "loss": 0.8190315961837769, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.966986155484558, |
| "grad_norm": 0.390625, |
| "learning_rate": 1.3017348175040983e-05, |
| "loss": 0.8338907361030579, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.9712460063897763, |
| "grad_norm": 0.2294921875, |
| "learning_rate": 1.2966177442289958e-05, |
| "loss": 0.783728837966919, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.9755058572949946, |
| "grad_norm": 0.16796875, |
| "learning_rate": 1.2915117549327428e-05, |
| "loss": 0.8934606313705444, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.9797657082002131, |
| "grad_norm": 0.458984375, |
| "learning_rate": 1.2864169620441688e-05, |
| "loss": 0.8038821220397949, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.9840255591054312, |
| "grad_norm": 0.349609375, |
| "learning_rate": 1.2813334777455677e-05, |
| "loss": 0.9299109578132629, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.9882854100106497, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 1.27626141397023e-05, |
| "loss": 0.7765668034553528, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.9925452609158678, |
| "grad_norm": 0.224609375, |
| "learning_rate": 1.2712008823999787e-05, |
| "loss": 0.8893784284591675, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.9968051118210863, |
| "grad_norm": 0.2275390625, |
| "learning_rate": 1.2661519944627085e-05, |
| "loss": 0.8529191017150879, |
| "step": 938 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.365234375, |
| "learning_rate": 1.2611148613299316e-05, |
| "loss": 0.8112186789512634, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.0042598509052185, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 1.2560895939143335e-05, |
| "loss": 0.6377139687538147, |
| "step": 942 |
| }, |
| { |
| "epoch": 2.0085197018104366, |
| "grad_norm": 0.15625, |
| "learning_rate": 1.2510763028673259e-05, |
| "loss": 0.5881322026252747, |
| "step": 944 |
| }, |
| { |
| "epoch": 2.012779552715655, |
| "grad_norm": 0.16015625, |
| "learning_rate": 1.2460750985766133e-05, |
| "loss": 0.5497787594795227, |
| "step": 946 |
| }, |
| { |
| "epoch": 2.017039403620873, |
| "grad_norm": 0.27734375, |
| "learning_rate": 1.2410860911637633e-05, |
| "loss": 0.6513974070549011, |
| "step": 948 |
| }, |
| { |
| "epoch": 2.0212992545260917, |
| "grad_norm": 0.16796875, |
| "learning_rate": 1.2361093904817794e-05, |
| "loss": 0.6880634427070618, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.02555910543131, |
| "grad_norm": 0.2470703125, |
| "learning_rate": 1.2311451061126825e-05, |
| "loss": 0.669802188873291, |
| "step": 952 |
| }, |
| { |
| "epoch": 2.0298189563365283, |
| "grad_norm": 0.2451171875, |
| "learning_rate": 1.2261933473650986e-05, |
| "loss": 0.6532925963401794, |
| "step": 954 |
| }, |
| { |
| "epoch": 2.0340788072417464, |
| "grad_norm": 0.216796875, |
| "learning_rate": 1.2212542232718526e-05, |
| "loss": 0.6424761414527893, |
| "step": 956 |
| }, |
| { |
| "epoch": 2.038338658146965, |
| "grad_norm": 0.3046875, |
| "learning_rate": 1.2163278425875673e-05, |
| "loss": 0.599922776222229, |
| "step": 958 |
| }, |
| { |
| "epoch": 2.042598509052183, |
| "grad_norm": 0.361328125, |
| "learning_rate": 1.211414313786267e-05, |
| "loss": 0.5999573469161987, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.0468583599574015, |
| "grad_norm": 0.341796875, |
| "learning_rate": 1.2065137450589902e-05, |
| "loss": 0.5664547681808472, |
| "step": 962 |
| }, |
| { |
| "epoch": 2.0511182108626196, |
| "grad_norm": 0.27734375, |
| "learning_rate": 1.2016262443114092e-05, |
| "loss": 0.6771121025085449, |
| "step": 964 |
| }, |
| { |
| "epoch": 2.055378061767838, |
| "grad_norm": 0.251953125, |
| "learning_rate": 1.19675191916145e-05, |
| "loss": 0.6011976003646851, |
| "step": 966 |
| }, |
| { |
| "epoch": 2.0596379126730566, |
| "grad_norm": 0.45703125, |
| "learning_rate": 1.1918908769369263e-05, |
| "loss": 0.624125599861145, |
| "step": 968 |
| }, |
| { |
| "epoch": 2.0638977635782747, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.187043224673176e-05, |
| "loss": 0.5838209390640259, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.0681576144834932, |
| "grad_norm": 0.4765625, |
| "learning_rate": 1.1822090691107007e-05, |
| "loss": 0.6163349151611328, |
| "step": 972 |
| }, |
| { |
| "epoch": 2.0724174653887113, |
| "grad_norm": 0.2578125, |
| "learning_rate": 1.1773885166928193e-05, |
| "loss": 0.6664748787879944, |
| "step": 974 |
| }, |
| { |
| "epoch": 2.07667731629393, |
| "grad_norm": 0.26171875, |
| "learning_rate": 1.1725816735633235e-05, |
| "loss": 0.6090631484985352, |
| "step": 976 |
| }, |
| { |
| "epoch": 2.080937167199148, |
| "grad_norm": 0.2734375, |
| "learning_rate": 1.1677886455641398e-05, |
| "loss": 0.6150251030921936, |
| "step": 978 |
| }, |
| { |
| "epoch": 2.0851970181043664, |
| "grad_norm": 0.59375, |
| "learning_rate": 1.1630095382329988e-05, |
| "loss": 0.6834192872047424, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.0894568690095845, |
| "grad_norm": 0.263671875, |
| "learning_rate": 1.158244456801111e-05, |
| "loss": 0.5855680108070374, |
| "step": 982 |
| }, |
| { |
| "epoch": 2.093716719914803, |
| "grad_norm": 0.26171875, |
| "learning_rate": 1.1534935061908528e-05, |
| "loss": 0.6290924549102783, |
| "step": 984 |
| }, |
| { |
| "epoch": 2.097976570820021, |
| "grad_norm": 0.27734375, |
| "learning_rate": 1.1487567910134513e-05, |
| "loss": 0.5710505247116089, |
| "step": 986 |
| }, |
| { |
| "epoch": 2.1022364217252396, |
| "grad_norm": 0.3203125, |
| "learning_rate": 1.1440344155666851e-05, |
| "loss": 0.6610984802246094, |
| "step": 988 |
| }, |
| { |
| "epoch": 2.106496272630458, |
| "grad_norm": 0.2138671875, |
| "learning_rate": 1.1393264838325865e-05, |
| "loss": 0.6294957995414734, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.110756123535676, |
| "grad_norm": 0.46484375, |
| "learning_rate": 1.1346330994751497e-05, |
| "loss": 0.6489307880401611, |
| "step": 992 |
| }, |
| { |
| "epoch": 2.1150159744408947, |
| "grad_norm": 0.287109375, |
| "learning_rate": 1.1299543658380509e-05, |
| "loss": 0.5717250108718872, |
| "step": 994 |
| }, |
| { |
| "epoch": 2.119275825346113, |
| "grad_norm": 0.236328125, |
| "learning_rate": 1.1252903859423728e-05, |
| "loss": 0.5853033065795898, |
| "step": 996 |
| }, |
| { |
| "epoch": 2.1235356762513313, |
| "grad_norm": 0.2216796875, |
| "learning_rate": 1.120641262484335e-05, |
| "loss": 0.608925461769104, |
| "step": 998 |
| }, |
| { |
| "epoch": 2.1277955271565494, |
| "grad_norm": 0.216796875, |
| "learning_rate": 1.1160070978330323e-05, |
| "loss": 0.6262862086296082, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.132055378061768, |
| "grad_norm": 0.30078125, |
| "learning_rate": 1.1113879940281813e-05, |
| "loss": 0.5531333088874817, |
| "step": 1002 |
| }, |
| { |
| "epoch": 2.136315228966986, |
| "grad_norm": 0.302734375, |
| "learning_rate": 1.1067840527778752e-05, |
| "loss": 0.6142609119415283, |
| "step": 1004 |
| }, |
| { |
| "epoch": 2.1405750798722045, |
| "grad_norm": 0.271484375, |
| "learning_rate": 1.1021953754563406e-05, |
| "loss": 0.6254585981369019, |
| "step": 1006 |
| }, |
| { |
| "epoch": 2.1448349307774226, |
| "grad_norm": 0.224609375, |
| "learning_rate": 1.0976220631017094e-05, |
| "loss": 0.648613691329956, |
| "step": 1008 |
| }, |
| { |
| "epoch": 2.149094781682641, |
| "grad_norm": 0.400390625, |
| "learning_rate": 1.0930642164137922e-05, |
| "loss": 0.4957270324230194, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.1533546325878596, |
| "grad_norm": 0.2353515625, |
| "learning_rate": 1.0885219357518583e-05, |
| "loss": 0.6625660061836243, |
| "step": 1012 |
| }, |
| { |
| "epoch": 2.1576144834930777, |
| "grad_norm": 0.21875, |
| "learning_rate": 1.0839953211324313e-05, |
| "loss": 0.6448312401771545, |
| "step": 1014 |
| }, |
| { |
| "epoch": 2.1618743343982962, |
| "grad_norm": 0.310546875, |
| "learning_rate": 1.0794844722270831e-05, |
| "loss": 0.6265139579772949, |
| "step": 1016 |
| }, |
| { |
| "epoch": 2.1661341853035143, |
| "grad_norm": 0.3515625, |
| "learning_rate": 1.0749894883602406e-05, |
| "loss": 0.58893221616745, |
| "step": 1018 |
| }, |
| { |
| "epoch": 2.170394036208733, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.0705104685069973e-05, |
| "loss": 0.524358332157135, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.174653887113951, |
| "grad_norm": 0.392578125, |
| "learning_rate": 1.0660475112909354e-05, |
| "loss": 0.6041074395179749, |
| "step": 1022 |
| }, |
| { |
| "epoch": 2.1789137380191694, |
| "grad_norm": 0.25, |
| "learning_rate": 1.0616007149819543e-05, |
| "loss": 0.6296215653419495, |
| "step": 1024 |
| }, |
| { |
| "epoch": 2.1831735889243875, |
| "grad_norm": 0.37890625, |
| "learning_rate": 1.057170177494105e-05, |
| "loss": 0.6504489779472351, |
| "step": 1026 |
| }, |
| { |
| "epoch": 2.187433439829606, |
| "grad_norm": 0.361328125, |
| "learning_rate": 1.052755996383437e-05, |
| "loss": 0.6803485155105591, |
| "step": 1028 |
| }, |
| { |
| "epoch": 2.191693290734824, |
| "grad_norm": 0.2333984375, |
| "learning_rate": 1.0483582688458472e-05, |
| "loss": 0.6579641699790955, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.1959531416400426, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.0439770917149414e-05, |
| "loss": 0.6605786085128784, |
| "step": 1032 |
| }, |
| { |
| "epoch": 2.2002129925452607, |
| "grad_norm": 0.189453125, |
| "learning_rate": 1.0396125614599018e-05, |
| "loss": 0.6570585370063782, |
| "step": 1034 |
| }, |
| { |
| "epoch": 2.2044728434504792, |
| "grad_norm": 0.337890625, |
| "learning_rate": 1.0352647741833637e-05, |
| "loss": 0.6363896131515503, |
| "step": 1036 |
| }, |
| { |
| "epoch": 2.2087326943556977, |
| "grad_norm": 0.296875, |
| "learning_rate": 1.0309338256192982e-05, |
| "loss": 0.6393426656723022, |
| "step": 1038 |
| }, |
| { |
| "epoch": 2.212992545260916, |
| "grad_norm": 0.345703125, |
| "learning_rate": 1.0266198111309041e-05, |
| "loss": 0.7091052532196045, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.2172523961661343, |
| "grad_norm": 0.75, |
| "learning_rate": 1.0223228257085083e-05, |
| "loss": 0.6515456438064575, |
| "step": 1042 |
| }, |
| { |
| "epoch": 2.2215122470713524, |
| "grad_norm": 0.244140625, |
| "learning_rate": 1.0180429639674761e-05, |
| "loss": 0.6235453486442566, |
| "step": 1044 |
| }, |
| { |
| "epoch": 2.225772097976571, |
| "grad_norm": 0.263671875, |
| "learning_rate": 1.0137803201461248e-05, |
| "loss": 0.5850796699523926, |
| "step": 1046 |
| }, |
| { |
| "epoch": 2.230031948881789, |
| "grad_norm": 0.443359375, |
| "learning_rate": 1.0095349881036508e-05, |
| "loss": 0.5203170776367188, |
| "step": 1048 |
| }, |
| { |
| "epoch": 2.2342917997870075, |
| "grad_norm": 0.72265625, |
| "learning_rate": 1.0053070613180625e-05, |
| "loss": 0.6159985065460205, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.2385516506922256, |
| "grad_norm": 0.203125, |
| "learning_rate": 1.0010966328841206e-05, |
| "loss": 0.6239602565765381, |
| "step": 1052 |
| }, |
| { |
| "epoch": 2.242811501597444, |
| "grad_norm": 0.265625, |
| "learning_rate": 9.969037955112908e-06, |
| "loss": 0.6027981042861938, |
| "step": 1054 |
| }, |
| { |
| "epoch": 2.247071352502662, |
| "grad_norm": 0.30078125, |
| "learning_rate": 9.927286415217005e-06, |
| "loss": 0.591469407081604, |
| "step": 1056 |
| }, |
| { |
| "epoch": 2.2513312034078807, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 9.88571262848107e-06, |
| "loss": 0.5683766007423401, |
| "step": 1058 |
| }, |
| { |
| "epoch": 2.255591054313099, |
| "grad_norm": 0.35546875, |
| "learning_rate": 9.844317510318719e-06, |
| "loss": 0.6158217191696167, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.2598509052183173, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.803101972209462e-06, |
| "loss": 0.5769312381744385, |
| "step": 1062 |
| }, |
| { |
| "epoch": 2.264110756123536, |
| "grad_norm": 0.24609375, |
| "learning_rate": 9.762066921678647e-06, |
| "loss": 0.5810741186141968, |
| "step": 1064 |
| }, |
| { |
| "epoch": 2.268370607028754, |
| "grad_norm": 0.28125, |
| "learning_rate": 9.721213262277447e-06, |
| "loss": 0.5853366255760193, |
| "step": 1066 |
| }, |
| { |
| "epoch": 2.2726304579339724, |
| "grad_norm": 0.2314453125, |
| "learning_rate": 9.680541893563e-06, |
| "loss": 0.5754764676094055, |
| "step": 1068 |
| }, |
| { |
| "epoch": 2.2768903088391905, |
| "grad_norm": 0.376953125, |
| "learning_rate": 9.640053711078571e-06, |
| "loss": 0.6414265632629395, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.281150159744409, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.599749606333844e-06, |
| "loss": 0.5730122327804565, |
| "step": 1072 |
| }, |
| { |
| "epoch": 2.285410010649627, |
| "grad_norm": 0.224609375, |
| "learning_rate": 9.559630466785301e-06, |
| "loss": 0.6548243761062622, |
| "step": 1074 |
| }, |
| { |
| "epoch": 2.2896698615548456, |
| "grad_norm": 0.359375, |
| "learning_rate": 9.519697175816675e-06, |
| "loss": 0.6757615804672241, |
| "step": 1076 |
| }, |
| { |
| "epoch": 2.2939297124600637, |
| "grad_norm": 0.271484375, |
| "learning_rate": 9.4799506127195e-06, |
| "loss": 0.6540831923484802, |
| "step": 1078 |
| }, |
| { |
| "epoch": 2.2981895633652822, |
| "grad_norm": 0.5390625, |
| "learning_rate": 9.44039165267372e-06, |
| "loss": 0.5985897779464722, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.3024494142705008, |
| "grad_norm": 0.318359375, |
| "learning_rate": 9.40102116672848e-06, |
| "loss": 0.6373129487037659, |
| "step": 1082 |
| }, |
| { |
| "epoch": 2.306709265175719, |
| "grad_norm": 0.32421875, |
| "learning_rate": 9.361840021782899e-06, |
| "loss": 0.5798696279525757, |
| "step": 1084 |
| }, |
| { |
| "epoch": 2.3109691160809374, |
| "grad_norm": 0.53125, |
| "learning_rate": 9.322849080566986e-06, |
| "loss": 0.6472339034080505, |
| "step": 1086 |
| }, |
| { |
| "epoch": 2.3152289669861554, |
| "grad_norm": 0.2236328125, |
| "learning_rate": 9.284049201622668e-06, |
| "loss": 0.5931280851364136, |
| "step": 1088 |
| }, |
| { |
| "epoch": 2.319488817891374, |
| "grad_norm": 0.26171875, |
| "learning_rate": 9.245441239284858e-06, |
| "loss": 0.6150895953178406, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.323748668796592, |
| "grad_norm": 0.37890625, |
| "learning_rate": 9.207026043662654e-06, |
| "loss": 0.5743486285209656, |
| "step": 1092 |
| }, |
| { |
| "epoch": 2.3280085197018106, |
| "grad_norm": 0.302734375, |
| "learning_rate": 9.168804460620634e-06, |
| "loss": 0.6586934328079224, |
| "step": 1094 |
| }, |
| { |
| "epoch": 2.3322683706070286, |
| "grad_norm": 0.333984375, |
| "learning_rate": 9.130777331760208e-06, |
| "loss": 0.581457793712616, |
| "step": 1096 |
| }, |
| { |
| "epoch": 2.336528221512247, |
| "grad_norm": 0.236328125, |
| "learning_rate": 9.092945494401107e-06, |
| "loss": 0.602104663848877, |
| "step": 1098 |
| }, |
| { |
| "epoch": 2.3407880724174652, |
| "grad_norm": 0.318359375, |
| "learning_rate": 9.055309781562922e-06, |
| "loss": 0.5987313985824585, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.3450479233226837, |
| "grad_norm": 0.212890625, |
| "learning_rate": 9.017871021946787e-06, |
| "loss": 0.5123194456100464, |
| "step": 1102 |
| }, |
| { |
| "epoch": 2.349307774227902, |
| "grad_norm": 0.494140625, |
| "learning_rate": 8.980630039917124e-06, |
| "loss": 0.5810441374778748, |
| "step": 1104 |
| }, |
| { |
| "epoch": 2.3535676251331203, |
| "grad_norm": 0.5390625, |
| "learning_rate": 8.943587655483478e-06, |
| "loss": 0.5871768593788147, |
| "step": 1106 |
| }, |
| { |
| "epoch": 2.357827476038339, |
| "grad_norm": 0.2412109375, |
| "learning_rate": 8.906744684282483e-06, |
| "loss": 0.6104775667190552, |
| "step": 1108 |
| }, |
| { |
| "epoch": 2.362087326943557, |
| "grad_norm": 0.296875, |
| "learning_rate": 8.870101937559877e-06, |
| "loss": 0.6351394653320312, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.3663471778487755, |
| "grad_norm": 0.52734375, |
| "learning_rate": 8.833660222152663e-06, |
| "loss": 0.6355900168418884, |
| "step": 1112 |
| }, |
| { |
| "epoch": 2.3706070287539935, |
| "grad_norm": 0.2294921875, |
| "learning_rate": 8.797420340471334e-06, |
| "loss": 0.5833765268325806, |
| "step": 1114 |
| }, |
| { |
| "epoch": 2.374866879659212, |
| "grad_norm": 0.2216796875, |
| "learning_rate": 8.761383090482205e-06, |
| "loss": 0.6019313931465149, |
| "step": 1116 |
| }, |
| { |
| "epoch": 2.37912673056443, |
| "grad_norm": 0.2265625, |
| "learning_rate": 8.725549265689833e-06, |
| "loss": 0.5999468564987183, |
| "step": 1118 |
| }, |
| { |
| "epoch": 2.3833865814696487, |
| "grad_norm": 0.484375, |
| "learning_rate": 8.689919655119559e-06, |
| "loss": 0.6521666646003723, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.3876464323748667, |
| "grad_norm": 0.224609375, |
| "learning_rate": 8.654495043300129e-06, |
| "loss": 0.612395703792572, |
| "step": 1122 |
| }, |
| { |
| "epoch": 2.3919062832800853, |
| "grad_norm": 0.466796875, |
| "learning_rate": 8.619276210246427e-06, |
| "loss": 0.5964239239692688, |
| "step": 1124 |
| }, |
| { |
| "epoch": 2.3961661341853033, |
| "grad_norm": 0.26171875, |
| "learning_rate": 8.584263931442275e-06, |
| "loss": 0.6384221911430359, |
| "step": 1126 |
| }, |
| { |
| "epoch": 2.400425985090522, |
| "grad_norm": 0.28125, |
| "learning_rate": 8.549458977823395e-06, |
| "loss": 0.6933798789978027, |
| "step": 1128 |
| }, |
| { |
| "epoch": 2.40468583599574, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 8.514862115760396e-06, |
| "loss": 0.5889874696731567, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.4089456869009584, |
| "grad_norm": 0.30859375, |
| "learning_rate": 8.480474107041925e-06, |
| "loss": 0.6254542469978333, |
| "step": 1132 |
| }, |
| { |
| "epoch": 2.413205537806177, |
| "grad_norm": 0.314453125, |
| "learning_rate": 8.446295708857888e-06, |
| "loss": 0.6616327166557312, |
| "step": 1134 |
| }, |
| { |
| "epoch": 2.417465388711395, |
| "grad_norm": 0.375, |
| "learning_rate": 8.412327673782774e-06, |
| "loss": 0.6202198266983032, |
| "step": 1136 |
| }, |
| { |
| "epoch": 2.4217252396166136, |
| "grad_norm": 0.267578125, |
| "learning_rate": 8.378570749759076e-06, |
| "loss": 0.6176246404647827, |
| "step": 1138 |
| }, |
| { |
| "epoch": 2.4259850905218316, |
| "grad_norm": 0.369140625, |
| "learning_rate": 8.345025680080836e-06, |
| "loss": 0.5884604454040527, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.43024494142705, |
| "grad_norm": 0.4453125, |
| "learning_rate": 8.311693203377277e-06, |
| "loss": 0.5704495906829834, |
| "step": 1142 |
| }, |
| { |
| "epoch": 2.4345047923322682, |
| "grad_norm": 0.357421875, |
| "learning_rate": 8.278574053596534e-06, |
| "loss": 0.5104537606239319, |
| "step": 1144 |
| }, |
| { |
| "epoch": 2.4387646432374868, |
| "grad_norm": 0.375, |
| "learning_rate": 8.245668959989489e-06, |
| "loss": 0.6920484900474548, |
| "step": 1146 |
| }, |
| { |
| "epoch": 2.443024494142705, |
| "grad_norm": 0.3359375, |
| "learning_rate": 8.212978647093724e-06, |
| "loss": 0.605790376663208, |
| "step": 1148 |
| }, |
| { |
| "epoch": 2.4472843450479234, |
| "grad_norm": 0.431640625, |
| "learning_rate": 8.180503834717563e-06, |
| "loss": 0.6005589962005615, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.451544195953142, |
| "grad_norm": 0.30078125, |
| "learning_rate": 8.148245237924212e-06, |
| "loss": 0.6908122301101685, |
| "step": 1152 |
| }, |
| { |
| "epoch": 2.45580404685836, |
| "grad_norm": 0.392578125, |
| "learning_rate": 8.116203567016035e-06, |
| "loss": 0.5939027667045593, |
| "step": 1154 |
| }, |
| { |
| "epoch": 2.460063897763578, |
| "grad_norm": 0.357421875, |
| "learning_rate": 8.084379527518908e-06, |
| "loss": 0.6245042681694031, |
| "step": 1156 |
| }, |
| { |
| "epoch": 2.4643237486687966, |
| "grad_norm": 0.25390625, |
| "learning_rate": 8.05277382016666e-06, |
| "loss": 0.5638337731361389, |
| "step": 1158 |
| }, |
| { |
| "epoch": 2.468583599574015, |
| "grad_norm": 0.2890625, |
| "learning_rate": 8.021387140885672e-06, |
| "loss": 0.665945291519165, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.472843450479233, |
| "grad_norm": 0.2255859375, |
| "learning_rate": 7.99022018077955e-06, |
| "loss": 0.5603002309799194, |
| "step": 1162 |
| }, |
| { |
| "epoch": 2.4771033013844517, |
| "grad_norm": 0.287109375, |
| "learning_rate": 7.959273626113896e-06, |
| "loss": 0.5992410182952881, |
| "step": 1164 |
| }, |
| { |
| "epoch": 2.4813631522896697, |
| "grad_norm": 0.216796875, |
| "learning_rate": 7.9285481583012e-06, |
| "loss": 0.6628497242927551, |
| "step": 1166 |
| }, |
| { |
| "epoch": 2.4856230031948883, |
| "grad_norm": 0.984375, |
| "learning_rate": 7.898044453885837e-06, |
| "loss": 0.5260273218154907, |
| "step": 1168 |
| }, |
| { |
| "epoch": 2.4898828541001063, |
| "grad_norm": 0.265625, |
| "learning_rate": 7.867763184529182e-06, |
| "loss": 0.6244964599609375, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.494142705005325, |
| "grad_norm": 0.369140625, |
| "learning_rate": 7.837705016994796e-06, |
| "loss": 0.6657370328903198, |
| "step": 1172 |
| }, |
| { |
| "epoch": 2.498402555910543, |
| "grad_norm": 0.333984375, |
| "learning_rate": 7.80787061313377e-06, |
| "loss": 0.6410002708435059, |
| "step": 1174 |
| }, |
| { |
| "epoch": 2.5026624068157615, |
| "grad_norm": 0.1953125, |
| "learning_rate": 7.77826062987014e-06, |
| "loss": 0.5408449769020081, |
| "step": 1176 |
| }, |
| { |
| "epoch": 2.50692225772098, |
| "grad_norm": 0.35546875, |
| "learning_rate": 7.748875719186413e-06, |
| "loss": 0.5735031962394714, |
| "step": 1178 |
| }, |
| { |
| "epoch": 2.511182108626198, |
| "grad_norm": 0.322265625, |
| "learning_rate": 7.71971652810923e-06, |
| "loss": 0.6153873801231384, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.515441959531416, |
| "grad_norm": 0.33984375, |
| "learning_rate": 7.690783698695106e-06, |
| "loss": 0.5873544216156006, |
| "step": 1182 |
| }, |
| { |
| "epoch": 2.5197018104366347, |
| "grad_norm": 0.259765625, |
| "learning_rate": 7.662077868016297e-06, |
| "loss": 0.6717422604560852, |
| "step": 1184 |
| }, |
| { |
| "epoch": 2.523961661341853, |
| "grad_norm": 0.94921875, |
| "learning_rate": 7.633599668146775e-06, |
| "loss": 0.6083505153656006, |
| "step": 1186 |
| }, |
| { |
| "epoch": 2.5282215122470713, |
| "grad_norm": 0.30078125, |
| "learning_rate": 7.605349726148296e-06, |
| "loss": 0.6134154200553894, |
| "step": 1188 |
| }, |
| { |
| "epoch": 2.5324813631522898, |
| "grad_norm": 0.275390625, |
| "learning_rate": 7.577328664056617e-06, |
| "loss": 0.589963972568512, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.536741214057508, |
| "grad_norm": 0.51171875, |
| "learning_rate": 7.549537098867776e-06, |
| "loss": 0.5288025140762329, |
| "step": 1192 |
| }, |
| { |
| "epoch": 2.5410010649627264, |
| "grad_norm": 0.2451171875, |
| "learning_rate": 7.521975642524525e-06, |
| "loss": 0.616111159324646, |
| "step": 1194 |
| }, |
| { |
| "epoch": 2.545260915867945, |
| "grad_norm": 0.3046875, |
| "learning_rate": 7.494644901902843e-06, |
| "loss": 0.6015118360519409, |
| "step": 1196 |
| }, |
| { |
| "epoch": 2.549520766773163, |
| "grad_norm": 0.30078125, |
| "learning_rate": 7.467545478798574e-06, |
| "loss": 0.5770639777183533, |
| "step": 1198 |
| }, |
| { |
| "epoch": 2.553780617678381, |
| "grad_norm": 0.365234375, |
| "learning_rate": 7.440677969914182e-06, |
| "loss": 0.6590741872787476, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.5580404685835996, |
| "grad_norm": 0.2392578125, |
| "learning_rate": 7.4140429668456115e-06, |
| "loss": 0.47983720898628235, |
| "step": 1202 |
| }, |
| { |
| "epoch": 2.562300319488818, |
| "grad_norm": 0.31640625, |
| "learning_rate": 7.38764105606926e-06, |
| "loss": 0.549656093120575, |
| "step": 1204 |
| }, |
| { |
| "epoch": 2.566560170394036, |
| "grad_norm": 1.1640625, |
| "learning_rate": 7.361472818929058e-06, |
| "loss": 0.5793447494506836, |
| "step": 1206 |
| }, |
| { |
| "epoch": 2.5708200212992547, |
| "grad_norm": 0.251953125, |
| "learning_rate": 7.335538831623676e-06, |
| "loss": 0.637956976890564, |
| "step": 1208 |
| }, |
| { |
| "epoch": 2.5750798722044728, |
| "grad_norm": 0.5625, |
| "learning_rate": 7.309839665193839e-06, |
| "loss": 0.5784144401550293, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.5793397231096913, |
| "grad_norm": 0.2275390625, |
| "learning_rate": 7.284375885509741e-06, |
| "loss": 0.6299670338630676, |
| "step": 1212 |
| }, |
| { |
| "epoch": 2.5835995740149094, |
| "grad_norm": 0.322265625, |
| "learning_rate": 7.259148053258603e-06, |
| "loss": 0.674586296081543, |
| "step": 1214 |
| }, |
| { |
| "epoch": 2.587859424920128, |
| "grad_norm": 0.3671875, |
| "learning_rate": 7.234156723932312e-06, |
| "loss": 0.6188330054283142, |
| "step": 1216 |
| }, |
| { |
| "epoch": 2.592119275825346, |
| "grad_norm": 0.271484375, |
| "learning_rate": 7.20940244781519e-06, |
| "loss": 0.6208375096321106, |
| "step": 1218 |
| }, |
| { |
| "epoch": 2.5963791267305645, |
| "grad_norm": 0.388671875, |
| "learning_rate": 7.184885769971888e-06, |
| "loss": 0.6017476916313171, |
| "step": 1220 |
| }, |
| { |
| "epoch": 2.600638977635783, |
| "grad_norm": 0.333984375, |
| "learning_rate": 7.160607230235378e-06, |
| "loss": 0.6354559659957886, |
| "step": 1222 |
| }, |
| { |
| "epoch": 2.604898828541001, |
| "grad_norm": 0.31640625, |
| "learning_rate": 7.136567363195069e-06, |
| "loss": 0.6745753884315491, |
| "step": 1224 |
| }, |
| { |
| "epoch": 2.609158679446219, |
| "grad_norm": 0.427734375, |
| "learning_rate": 7.112766698185027e-06, |
| "loss": 0.5988171100616455, |
| "step": 1226 |
| }, |
| { |
| "epoch": 2.6134185303514377, |
| "grad_norm": 0.25390625, |
| "learning_rate": 7.089205759272327e-06, |
| "loss": 0.6004793643951416, |
| "step": 1228 |
| }, |
| { |
| "epoch": 2.617678381256656, |
| "grad_norm": 0.31640625, |
| "learning_rate": 7.06588506524552e-06, |
| "loss": 0.5850980877876282, |
| "step": 1230 |
| }, |
| { |
| "epoch": 2.6219382321618743, |
| "grad_norm": 0.33203125, |
| "learning_rate": 7.042805129603193e-06, |
| "loss": 0.5615159869194031, |
| "step": 1232 |
| }, |
| { |
| "epoch": 2.626198083067093, |
| "grad_norm": 0.333984375, |
| "learning_rate": 7.019966460542681e-06, |
| "loss": 0.6120025515556335, |
| "step": 1234 |
| }, |
| { |
| "epoch": 2.630457933972311, |
| "grad_norm": 0.35546875, |
| "learning_rate": 6.997369560948859e-06, |
| "loss": 0.6796953082084656, |
| "step": 1236 |
| }, |
| { |
| "epoch": 2.6347177848775294, |
| "grad_norm": 0.306640625, |
| "learning_rate": 6.975014928383083e-06, |
| "loss": 0.5794081091880798, |
| "step": 1238 |
| }, |
| { |
| "epoch": 2.6389776357827475, |
| "grad_norm": 0.28515625, |
| "learning_rate": 6.952903055072226e-06, |
| "loss": 0.5920906066894531, |
| "step": 1240 |
| }, |
| { |
| "epoch": 2.643237486687966, |
| "grad_norm": 0.2412109375, |
| "learning_rate": 6.9310344278978505e-06, |
| "loss": 0.5745714902877808, |
| "step": 1242 |
| }, |
| { |
| "epoch": 2.647497337593184, |
| "grad_norm": 0.2578125, |
| "learning_rate": 6.909409528385466e-06, |
| "loss": 0.5876143574714661, |
| "step": 1244 |
| }, |
| { |
| "epoch": 2.6517571884984026, |
| "grad_norm": 0.2734375, |
| "learning_rate": 6.888028832693953e-06, |
| "loss": 0.586786150932312, |
| "step": 1246 |
| }, |
| { |
| "epoch": 2.656017039403621, |
| "grad_norm": 0.380859375, |
| "learning_rate": 6.86689281160506e-06, |
| "loss": 0.5594542622566223, |
| "step": 1248 |
| }, |
| { |
| "epoch": 2.660276890308839, |
| "grad_norm": 0.23046875, |
| "learning_rate": 6.846001930513041e-06, |
| "loss": 0.6434107422828674, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.6645367412140573, |
| "grad_norm": 0.361328125, |
| "learning_rate": 6.825356649414415e-06, |
| "loss": 0.6385661959648132, |
| "step": 1252 |
| }, |
| { |
| "epoch": 2.668796592119276, |
| "grad_norm": 0.291015625, |
| "learning_rate": 6.80495742289783e-06, |
| "loss": 0.6039466261863708, |
| "step": 1254 |
| }, |
| { |
| "epoch": 2.6730564430244943, |
| "grad_norm": 0.298828125, |
| "learning_rate": 6.784804700134056e-06, |
| "loss": 0.6025973558425903, |
| "step": 1256 |
| }, |
| { |
| "epoch": 2.6773162939297124, |
| "grad_norm": 0.296875, |
| "learning_rate": 6.764898924866091e-06, |
| "loss": 0.6119323372840881, |
| "step": 1258 |
| }, |
| { |
| "epoch": 2.681576144834931, |
| "grad_norm": 0.32421875, |
| "learning_rate": 6.7452405353993985e-06, |
| "loss": 0.617369532585144, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.685835995740149, |
| "grad_norm": 0.330078125, |
| "learning_rate": 6.72582996459225e-06, |
| "loss": 0.6640692949295044, |
| "step": 1262 |
| }, |
| { |
| "epoch": 2.6900958466453675, |
| "grad_norm": 0.3359375, |
| "learning_rate": 6.706667639846196e-06, |
| "loss": 0.6609706282615662, |
| "step": 1264 |
| }, |
| { |
| "epoch": 2.6943556975505856, |
| "grad_norm": 0.2490234375, |
| "learning_rate": 6.687753983096654e-06, |
| "loss": 0.53211909532547, |
| "step": 1266 |
| }, |
| { |
| "epoch": 2.698615548455804, |
| "grad_norm": 0.455078125, |
| "learning_rate": 6.669089410803617e-06, |
| "loss": 0.667971134185791, |
| "step": 1268 |
| }, |
| { |
| "epoch": 2.702875399361022, |
| "grad_norm": 0.265625, |
| "learning_rate": 6.650674333942487e-06, |
| "loss": 0.5798393487930298, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.7071352502662407, |
| "grad_norm": 0.439453125, |
| "learning_rate": 6.632509157995023e-06, |
| "loss": 0.6258153915405273, |
| "step": 1272 |
| }, |
| { |
| "epoch": 2.711395101171459, |
| "grad_norm": 0.392578125, |
| "learning_rate": 6.614594282940414e-06, |
| "loss": 0.624832272529602, |
| "step": 1274 |
| }, |
| { |
| "epoch": 2.7156549520766773, |
| "grad_norm": 0.349609375, |
| "learning_rate": 6.596930103246468e-06, |
| "loss": 0.5772223472595215, |
| "step": 1276 |
| }, |
| { |
| "epoch": 2.7199148029818954, |
| "grad_norm": 0.61328125, |
| "learning_rate": 6.579517007860933e-06, |
| "loss": 0.5936267971992493, |
| "step": 1278 |
| }, |
| { |
| "epoch": 2.724174653887114, |
| "grad_norm": 0.2197265625, |
| "learning_rate": 6.562355380202927e-06, |
| "loss": 0.668041467666626, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.7284345047923324, |
| "grad_norm": 0.25, |
| "learning_rate": 6.5454455981545e-06, |
| "loss": 0.5487772226333618, |
| "step": 1282 |
| }, |
| { |
| "epoch": 2.7326943556975505, |
| "grad_norm": 0.205078125, |
| "learning_rate": 6.528788034052311e-06, |
| "loss": 0.6349499225616455, |
| "step": 1284 |
| }, |
| { |
| "epoch": 2.736954206602769, |
| "grad_norm": 0.23828125, |
| "learning_rate": 6.512383054679422e-06, |
| "loss": 0.5938593149185181, |
| "step": 1286 |
| }, |
| { |
| "epoch": 2.741214057507987, |
| "grad_norm": 0.263671875, |
| "learning_rate": 6.496231021257242e-06, |
| "loss": 0.6245843172073364, |
| "step": 1288 |
| }, |
| { |
| "epoch": 2.7454739084132056, |
| "grad_norm": 0.357421875, |
| "learning_rate": 6.480332289437552e-06, |
| "loss": 0.5823163390159607, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.749733759318424, |
| "grad_norm": 0.3359375, |
| "learning_rate": 6.464687209294682e-06, |
| "loss": 0.5846402049064636, |
| "step": 1292 |
| }, |
| { |
| "epoch": 2.753993610223642, |
| "grad_norm": 0.412109375, |
| "learning_rate": 6.44929612531781e-06, |
| "loss": 0.6277037262916565, |
| "step": 1294 |
| }, |
| { |
| "epoch": 2.7582534611288603, |
| "grad_norm": 0.20703125, |
| "learning_rate": 6.434159376403363e-06, |
| "loss": 0.6208704113960266, |
| "step": 1296 |
| }, |
| { |
| "epoch": 2.762513312034079, |
| "grad_norm": 0.32421875, |
| "learning_rate": 6.419277295847563e-06, |
| "loss": 0.5632691979408264, |
| "step": 1298 |
| }, |
| { |
| "epoch": 2.7667731629392973, |
| "grad_norm": 0.296875, |
| "learning_rate": 6.404650211339093e-06, |
| "loss": 0.6156328320503235, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.7710330138445154, |
| "grad_norm": 0.259765625, |
| "learning_rate": 6.390278444951868e-06, |
| "loss": 0.6689990758895874, |
| "step": 1302 |
| }, |
| { |
| "epoch": 2.775292864749734, |
| "grad_norm": 0.25, |
| "learning_rate": 6.376162313137955e-06, |
| "loss": 0.6374217867851257, |
| "step": 1304 |
| }, |
| { |
| "epoch": 2.779552715654952, |
| "grad_norm": 0.439453125, |
| "learning_rate": 6.3623021267205975e-06, |
| "loss": 0.6087695360183716, |
| "step": 1306 |
| }, |
| { |
| "epoch": 2.7838125665601705, |
| "grad_norm": 0.2734375, |
| "learning_rate": 6.348698190887377e-06, |
| "loss": 0.5766043066978455, |
| "step": 1308 |
| }, |
| { |
| "epoch": 2.7880724174653886, |
| "grad_norm": 0.359375, |
| "learning_rate": 6.3353508051834924e-06, |
| "loss": 0.6857935786247253, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.792332268370607, |
| "grad_norm": 0.259765625, |
| "learning_rate": 6.322260263505159e-06, |
| "loss": 0.6080771684646606, |
| "step": 1312 |
| }, |
| { |
| "epoch": 2.796592119275825, |
| "grad_norm": 0.232421875, |
| "learning_rate": 6.309426854093147e-06, |
| "loss": 0.5428948402404785, |
| "step": 1314 |
| }, |
| { |
| "epoch": 2.8008519701810437, |
| "grad_norm": 0.609375, |
| "learning_rate": 6.2968508595264195e-06, |
| "loss": 0.6500948667526245, |
| "step": 1316 |
| }, |
| { |
| "epoch": 2.8051118210862622, |
| "grad_norm": 0.306640625, |
| "learning_rate": 6.284532556715927e-06, |
| "loss": 0.6038864850997925, |
| "step": 1318 |
| }, |
| { |
| "epoch": 2.8093716719914803, |
| "grad_norm": 0.404296875, |
| "learning_rate": 6.272472216898501e-06, |
| "loss": 0.6369448304176331, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.8136315228966984, |
| "grad_norm": 0.32421875, |
| "learning_rate": 6.260670105630885e-06, |
| "loss": 0.6288717985153198, |
| "step": 1322 |
| }, |
| { |
| "epoch": 2.817891373801917, |
| "grad_norm": 0.3984375, |
| "learning_rate": 6.2491264827838775e-06, |
| "loss": 0.6535931825637817, |
| "step": 1324 |
| }, |
| { |
| "epoch": 2.8221512247071354, |
| "grad_norm": 0.28515625, |
| "learning_rate": 6.237841602536627e-06, |
| "loss": 0.6414341330528259, |
| "step": 1326 |
| }, |
| { |
| "epoch": 2.8264110756123535, |
| "grad_norm": 0.2578125, |
| "learning_rate": 6.226815713371023e-06, |
| "loss": 0.5740489959716797, |
| "step": 1328 |
| }, |
| { |
| "epoch": 2.830670926517572, |
| "grad_norm": 0.2890625, |
| "learning_rate": 6.216049058066229e-06, |
| "loss": 0.5453130602836609, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.83493077742279, |
| "grad_norm": 0.228515625, |
| "learning_rate": 6.205541873693331e-06, |
| "loss": 0.531428873538971, |
| "step": 1332 |
| }, |
| { |
| "epoch": 2.8391906283280086, |
| "grad_norm": 0.3046875, |
| "learning_rate": 6.195294391610128e-06, |
| "loss": 0.6185562014579773, |
| "step": 1334 |
| }, |
| { |
| "epoch": 2.8434504792332267, |
| "grad_norm": 0.3671875, |
| "learning_rate": 6.185306837456027e-06, |
| "loss": 0.6069992184638977, |
| "step": 1336 |
| }, |
| { |
| "epoch": 2.847710330138445, |
| "grad_norm": 0.26953125, |
| "learning_rate": 6.1755794311470824e-06, |
| "loss": 0.5699736475944519, |
| "step": 1338 |
| }, |
| { |
| "epoch": 2.8519701810436633, |
| "grad_norm": 0.875, |
| "learning_rate": 6.166112386871149e-06, |
| "loss": 0.5937331318855286, |
| "step": 1340 |
| }, |
| { |
| "epoch": 2.856230031948882, |
| "grad_norm": 0.30078125, |
| "learning_rate": 6.15690591308317e-06, |
| "loss": 0.5383535623550415, |
| "step": 1342 |
| }, |
| { |
| "epoch": 2.8604898828541003, |
| "grad_norm": 0.2119140625, |
| "learning_rate": 6.14796021250058e-06, |
| "loss": 0.5439456701278687, |
| "step": 1344 |
| }, |
| { |
| "epoch": 2.8647497337593184, |
| "grad_norm": 0.2578125, |
| "learning_rate": 6.139275482098847e-06, |
| "loss": 0.5950272083282471, |
| "step": 1346 |
| }, |
| { |
| "epoch": 2.8690095846645365, |
| "grad_norm": 0.255859375, |
| "learning_rate": 6.130851913107137e-06, |
| "loss": 0.5372447967529297, |
| "step": 1348 |
| }, |
| { |
| "epoch": 2.873269435569755, |
| "grad_norm": 0.2294921875, |
| "learning_rate": 6.122689691004103e-06, |
| "loss": 0.5755343437194824, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.8775292864749735, |
| "grad_norm": 0.28515625, |
| "learning_rate": 6.114788995513787e-06, |
| "loss": 0.6370142102241516, |
| "step": 1352 |
| }, |
| { |
| "epoch": 2.8817891373801916, |
| "grad_norm": 0.28125, |
| "learning_rate": 6.107150000601684e-06, |
| "loss": 0.5815765261650085, |
| "step": 1354 |
| }, |
| { |
| "epoch": 2.88604898828541, |
| "grad_norm": 0.2490234375, |
| "learning_rate": 6.099772874470899e-06, |
| "loss": 0.6185727715492249, |
| "step": 1356 |
| }, |
| { |
| "epoch": 2.890308839190628, |
| "grad_norm": 0.36328125, |
| "learning_rate": 6.092657779558442e-06, |
| "loss": 0.5713162422180176, |
| "step": 1358 |
| }, |
| { |
| "epoch": 2.8945686900958467, |
| "grad_norm": 0.283203125, |
| "learning_rate": 6.08580487253166e-06, |
| "loss": 0.6169173121452332, |
| "step": 1360 |
| }, |
| { |
| "epoch": 2.8988285410010652, |
| "grad_norm": 0.34375, |
| "learning_rate": 6.079214304284781e-06, |
| "loss": 0.5929686427116394, |
| "step": 1362 |
| }, |
| { |
| "epoch": 2.9030883919062833, |
| "grad_norm": 0.294921875, |
| "learning_rate": 6.072886219935593e-06, |
| "loss": 0.5761704444885254, |
| "step": 1364 |
| }, |
| { |
| "epoch": 2.9073482428115014, |
| "grad_norm": 0.25390625, |
| "learning_rate": 6.066820758822244e-06, |
| "loss": 0.5787940621376038, |
| "step": 1366 |
| }, |
| { |
| "epoch": 2.91160809371672, |
| "grad_norm": 0.26171875, |
| "learning_rate": 6.0610180545001845e-06, |
| "loss": 0.5501613020896912, |
| "step": 1368 |
| }, |
| { |
| "epoch": 2.9158679446219384, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 6.055478234739217e-06, |
| "loss": 0.5612152218818665, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.9201277955271565, |
| "grad_norm": 0.255859375, |
| "learning_rate": 6.050201421520689e-06, |
| "loss": 0.6078463792800903, |
| "step": 1372 |
| }, |
| { |
| "epoch": 2.924387646432375, |
| "grad_norm": 0.1953125, |
| "learning_rate": 6.045187731034801e-06, |
| "loss": 0.5890936255455017, |
| "step": 1374 |
| }, |
| { |
| "epoch": 2.928647497337593, |
| "grad_norm": 0.302734375, |
| "learning_rate": 6.040437273678055e-06, |
| "loss": 0.6533024311065674, |
| "step": 1376 |
| }, |
| { |
| "epoch": 2.9329073482428116, |
| "grad_norm": 4.3125, |
| "learning_rate": 6.0359501540508174e-06, |
| "loss": 0.6827770471572876, |
| "step": 1378 |
| }, |
| { |
| "epoch": 2.9371671991480297, |
| "grad_norm": 0.3359375, |
| "learning_rate": 6.0317264709550185e-06, |
| "loss": 0.6418617963790894, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.9414270500532482, |
| "grad_norm": 0.2431640625, |
| "learning_rate": 6.02776631739198e-06, |
| "loss": 0.5774567127227783, |
| "step": 1382 |
| }, |
| { |
| "epoch": 2.9456869009584663, |
| "grad_norm": 0.38671875, |
| "learning_rate": 6.0240697805603594e-06, |
| "loss": 0.6014460921287537, |
| "step": 1384 |
| }, |
| { |
| "epoch": 2.949946751863685, |
| "grad_norm": 0.51171875, |
| "learning_rate": 6.020636941854242e-06, |
| "loss": 0.5642235279083252, |
| "step": 1386 |
| }, |
| { |
| "epoch": 2.9542066027689033, |
| "grad_norm": 0.333984375, |
| "learning_rate": 6.017467876861333e-06, |
| "loss": 0.5891353487968445, |
| "step": 1388 |
| }, |
| { |
| "epoch": 2.9584664536741214, |
| "grad_norm": 0.244140625, |
| "learning_rate": 6.014562655361307e-06, |
| "loss": 0.5744375586509705, |
| "step": 1390 |
| }, |
| { |
| "epoch": 2.9627263045793395, |
| "grad_norm": 0.255859375, |
| "learning_rate": 6.011921341324265e-06, |
| "loss": 0.5458447933197021, |
| "step": 1392 |
| }, |
| { |
| "epoch": 2.966986155484558, |
| "grad_norm": 0.23828125, |
| "learning_rate": 6.009543992909327e-06, |
| "loss": 0.6621728539466858, |
| "step": 1394 |
| }, |
| { |
| "epoch": 2.9712460063897765, |
| "grad_norm": 0.236328125, |
| "learning_rate": 6.007430662463352e-06, |
| "loss": 0.5778822898864746, |
| "step": 1396 |
| }, |
| { |
| "epoch": 2.9755058572949946, |
| "grad_norm": 0.5390625, |
| "learning_rate": 6.005581396519782e-06, |
| "loss": 0.5913535952568054, |
| "step": 1398 |
| }, |
| { |
| "epoch": 2.979765708200213, |
| "grad_norm": 1.1328125, |
| "learning_rate": 6.0039962357976234e-06, |
| "loss": 0.5911454558372498, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.984025559105431, |
| "grad_norm": 0.2197265625, |
| "learning_rate": 6.002675215200546e-06, |
| "loss": 0.5291861295700073, |
| "step": 1402 |
| }, |
| { |
| "epoch": 2.9882854100106497, |
| "grad_norm": 0.29296875, |
| "learning_rate": 6.001618363816112e-06, |
| "loss": 0.577559232711792, |
| "step": 1404 |
| }, |
| { |
| "epoch": 2.992545260915868, |
| "grad_norm": 0.28125, |
| "learning_rate": 6.000825704915147e-06, |
| "loss": 0.5995616912841797, |
| "step": 1406 |
| }, |
| { |
| "epoch": 2.9968051118210863, |
| "grad_norm": 0.251953125, |
| "learning_rate": 6.000297255951213e-06, |
| "loss": 0.5123644471168518, |
| "step": 1408 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.2890625, |
| "learning_rate": 6.000033028560234e-06, |
| "loss": 0.584560215473175, |
| "step": 1410 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 1410, |
| "total_flos": 3.6491913262740275e+18, |
| "train_loss": 0.9255026633857836, |
| "train_runtime": 18814.6678, |
| "train_samples_per_second": 2.396, |
| "train_steps_per_second": 0.075 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 1410, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 99999, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.6491913262740275e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|